Skip to content

Commit

Permalink
rename to sanetise
Browse files Browse the repository at this point in the history
  • Loading branch information
b8raoult committed Sep 30, 2024
1 parent f6f8bb9 commit 65715a7
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 52 deletions.
22 changes: 11 additions & 11 deletions src/anemoi/utils/anonymize.py → src/anemoi/utils/sanetise.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,41 +20,41 @@
RE2 = re.compile(r"\(([^}]*)\)")


def anonymize(obj):
"""Anonymize an object:
def sanetise(obj):
"""sanetise an object:
- by replacing all full paths with shortened versions.
- by replacing URL passwords with '***'.
"""

if isinstance(obj, dict):
return {anonymize(k): anonymize(v) for k, v in obj.items()}
return {sanetise(k): sanetise(v) for k, v in obj.items()}

if isinstance(obj, list):
return [anonymize(v) for v in obj]
return [sanetise(v) for v in obj]

if isinstance(obj, tuple):
return tuple(anonymize(v) for v in obj)
return tuple(sanetise(v) for v in obj)

if isinstance(obj, str):
return _anonymize_string(obj)
return _sanetise_string(obj)

return obj


def _anonymize_string(obj):
def _sanetise_string(obj):

parsed = urlparse(obj, allow_fragments=True)

if parsed.scheme:
return _anonymize_url(parsed)
return _sanetise_url(parsed)

if obj.startswith("/") or obj.startswith("~"):
return _anonymize_path(obj)
return _sanetise_path(obj)

return obj


def _anonymize_url(parsed):
def _sanetise_url(parsed):

LIST = [
"pass",
Expand Down Expand Up @@ -98,7 +98,7 @@ def _anonymize_url(parsed):
return urlunparse([scheme, netloc, path, params, query, fragment])


def _anonymize_path(path):
def _sanetise_path(path):
bits = list(reversed(Path(path).parts))
result = [bits.pop(0)]
for bit in bits:
Expand Down
10 changes: 10 additions & 0 deletions src/anemoi/utils/sanetize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

from .sanetise import sanetise as sanetize

__all__ = ["sanetize"]
82 changes: 41 additions & 41 deletions tests/test_anonymize.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,60 +6,60 @@
# nor does it submit to any jurisdiction.


from anemoi.utils.anonymize import anonymize
from anemoi.utils.sanetise import sanetise


def test_anonymize_urls():
assert anonymize("http://johndoe:password@host:port/path") == "http://user:***@host:port/path"
def test_sanetise_urls():
assert sanetise("http://johndoe:password@host:port/path") == "http://user:***@host:port/path"

assert anonymize("http://www.example.com/path?pass=secret") == "http://www.example.com/path?pass=hidden"
assert anonymize("http://www.example.com/path?password=secret") == "http://www.example.com/path?password=hidden"
assert anonymize("http://www.example.com/path?token=secret") == "http://www.example.com/path?token=hidden"
assert anonymize("http://www.example.com/path?user=secret") == "http://www.example.com/path?user=hidden"
assert anonymize("http://www.example.com/path?key=secret") == "http://www.example.com/path?key=hidden"
assert anonymize("http://www.example.com/path?pwd=secret") == "http://www.example.com/path?pwd=hidden"
assert anonymize("http://www.example.com/path?_key=secret") == "http://www.example.com/path?_key=hidden"
assert anonymize("http://www.example.com/path?_token=secret") == "http://www.example.com/path?_token=hidden"
assert anonymize("http://www.example.com/path?apikey=secret") == "http://www.example.com/path?apikey=hidden"
assert anonymize("http://www.example.com/path?api_key=secret") == "http://www.example.com/path?api_key=hidden"
assert anonymize("http://www.example.com/path?api_token=secret") == "http://www.example.com/path?api_token=hidden"
assert anonymize("http://www.example.com/path?_api_token=secret") == "http://www.example.com/path?_api_token=hidden"
assert anonymize("http://www.example.com/path?_api_key=secret") == "http://www.example.com/path?_api_key=hidden"
assert anonymize("http://www.example.com/path?username=secret") == "http://www.example.com/path?username=hidden"
assert anonymize("http://www.example.com/path?login=secret") == "http://www.example.com/path?login=hidden"
assert sanetise("http://www.example.com/path?pass=secret") == "http://www.example.com/path?pass=hidden"
assert sanetise("http://www.example.com/path?password=secret") == "http://www.example.com/path?password=hidden"
assert sanetise("http://www.example.com/path?token=secret") == "http://www.example.com/path?token=hidden"
assert sanetise("http://www.example.com/path?user=secret") == "http://www.example.com/path?user=hidden"
assert sanetise("http://www.example.com/path?key=secret") == "http://www.example.com/path?key=hidden"
assert sanetise("http://www.example.com/path?pwd=secret") == "http://www.example.com/path?pwd=hidden"
assert sanetise("http://www.example.com/path?_key=secret") == "http://www.example.com/path?_key=hidden"
assert sanetise("http://www.example.com/path?_token=secret") == "http://www.example.com/path?_token=hidden"
assert sanetise("http://www.example.com/path?apikey=secret") == "http://www.example.com/path?apikey=hidden"
assert sanetise("http://www.example.com/path?api_key=secret") == "http://www.example.com/path?api_key=hidden"
assert sanetise("http://www.example.com/path?api_token=secret") == "http://www.example.com/path?api_token=hidden"
assert sanetise("http://www.example.com/path?_api_token=secret") == "http://www.example.com/path?_api_token=hidden"
assert sanetise("http://www.example.com/path?_api_key=secret") == "http://www.example.com/path?_api_key=hidden"
assert sanetise("http://www.example.com/path?username=secret") == "http://www.example.com/path?username=hidden"
assert sanetise("http://www.example.com/path?login=secret") == "http://www.example.com/path?login=hidden"

assert anonymize("http://www.example.com/path;pass=secret") == "http://www.example.com/path;pass=hidden"
assert anonymize("http://www.example.com/path;password=secret") == "http://www.example.com/path;password=hidden"
assert anonymize("http://www.example.com/path;token=secret") == "http://www.example.com/path;token=hidden"
assert anonymize("http://www.example.com/path;user=secret") == "http://www.example.com/path;user=hidden"
assert anonymize("http://www.example.com/path;key=secret") == "http://www.example.com/path;key=hidden"
assert anonymize("http://www.example.com/path;pwd=secret") == "http://www.example.com/path;pwd=hidden"
assert anonymize("http://www.example.com/path;_key=secret") == "http://www.example.com/path;_key=hidden"
assert anonymize("http://www.example.com/path;_token=secret") == "http://www.example.com/path;_token=hidden"
assert anonymize("http://www.example.com/path;apikey=secret") == "http://www.example.com/path;apikey=hidden"
assert anonymize("http://www.example.com/path;api_key=secret") == "http://www.example.com/path;api_key=hidden"
assert anonymize("http://www.example.com/path;api_token=secret") == "http://www.example.com/path;api_token=hidden"
assert anonymize("http://www.example.com/path;_api_token=secret") == "http://www.example.com/path;_api_token=hidden"
assert anonymize("http://www.example.com/path;_api_key=secret") == "http://www.example.com/path;_api_key=hidden"
assert anonymize("http://www.example.com/path;username=secret") == "http://www.example.com/path;username=hidden"
assert anonymize("http://www.example.com/path;login=secret") == "http://www.example.com/path;login=hidden"
assert sanetise("http://www.example.com/path;pass=secret") == "http://www.example.com/path;pass=hidden"
assert sanetise("http://www.example.com/path;password=secret") == "http://www.example.com/path;password=hidden"
assert sanetise("http://www.example.com/path;token=secret") == "http://www.example.com/path;token=hidden"
assert sanetise("http://www.example.com/path;user=secret") == "http://www.example.com/path;user=hidden"
assert sanetise("http://www.example.com/path;key=secret") == "http://www.example.com/path;key=hidden"
assert sanetise("http://www.example.com/path;pwd=secret") == "http://www.example.com/path;pwd=hidden"
assert sanetise("http://www.example.com/path;_key=secret") == "http://www.example.com/path;_key=hidden"
assert sanetise("http://www.example.com/path;_token=secret") == "http://www.example.com/path;_token=hidden"
assert sanetise("http://www.example.com/path;apikey=secret") == "http://www.example.com/path;apikey=hidden"
assert sanetise("http://www.example.com/path;api_key=secret") == "http://www.example.com/path;api_key=hidden"
assert sanetise("http://www.example.com/path;api_token=secret") == "http://www.example.com/path;api_token=hidden"
assert sanetise("http://www.example.com/path;_api_token=secret") == "http://www.example.com/path;_api_token=hidden"
assert sanetise("http://www.example.com/path;_api_key=secret") == "http://www.example.com/path;_api_key=hidden"
assert sanetise("http://www.example.com/path;username=secret") == "http://www.example.com/path;username=hidden"
assert sanetise("http://www.example.com/path;login=secret") == "http://www.example.com/path;login=hidden"


def test_anonymize_paths():
def test_sanetise_paths():
# We want to keep earthkit-data's url and path pattern

assert anonymize("/home/johndoe/.ssh/id_rsa") == "/.../id_rsa"
assert sanetise("/home/johndoe/.ssh/id_rsa") == "/.../id_rsa"

assert (
anonymize("/data/model/{date:strftime(%Y)}/{date:strftime(%m)}/{date:strftime(%d)}/analysis.grib")
sanetise("/data/model/{date:strftime(%Y)}/{date:strftime(%m)}/{date:strftime(%d)}/analysis.grib")
== "/.../{date:strftime(%Y)}/{date:strftime(%m)}/{date:strftime(%d)}/analysis.grib"
)

assert anonymize("test.grib") == "test.grib"
assert anonymize("../test.grib") == "../test.grib"
assert anonymize("./test.grib") == "./test.grib"
assert anonymize("sub/folder/test.grib") == "sub/folder/test.grib"
assert anonymize("./folder/test.grib") == "./folder/test.grib"
assert sanetise("test.grib") == "test.grib"
assert sanetise("../test.grib") == "../test.grib"
assert sanetise("./test.grib") == "./test.grib"
assert sanetise("sub/folder/test.grib") == "sub/folder/test.grib"
assert sanetise("./folder/test.grib") == "./folder/test.grib"


if __name__ == "__main__":
Expand Down

0 comments on commit 65715a7

Please sign in to comment.