diff --git a/src/anemoi/utils/anonymize.py b/src/anemoi/utils/sanetise.py similarity index 86% rename from src/anemoi/utils/anonymize.py rename to src/anemoi/utils/sanetise.py index 2388e98..81d2575 100644 --- a/src/anemoi/utils/anonymize.py +++ b/src/anemoi/utils/sanetise.py @@ -20,41 +20,41 @@ RE2 = re.compile(r"\(([^}]*)\)") -def anonymize(obj): - """Anonymize an object: +def sanetise(obj): + """sanetise an object: - by replacing all full paths with shortened versions. - by replacing URL passwords with '***'. """ if isinstance(obj, dict): - return {anonymize(k): anonymize(v) for k, v in obj.items()} + return {sanetise(k): sanetise(v) for k, v in obj.items()} if isinstance(obj, list): - return [anonymize(v) for v in obj] + return [sanetise(v) for v in obj] if isinstance(obj, tuple): - return tuple(anonymize(v) for v in obj) + return tuple(sanetise(v) for v in obj) if isinstance(obj, str): - return _anonymize_string(obj) + return _sanetise_string(obj) return obj -def _anonymize_string(obj): +def _sanetise_string(obj): parsed = urlparse(obj, allow_fragments=True) if parsed.scheme: - return _anonymize_url(parsed) + return _sanetise_url(parsed) if obj.startswith("/") or obj.startswith("~"): - return _anonymize_path(obj) + return _sanetise_path(obj) return obj -def _anonymize_url(parsed): +def _sanetise_url(parsed): LIST = [ "pass", @@ -98,7 +98,7 @@ def _anonymize_url(parsed): return urlunparse([scheme, netloc, path, params, query, fragment]) -def _anonymize_path(path): +def _sanetise_path(path): bits = list(reversed(Path(path).parts)) result = [bits.pop(0)] for bit in bits: diff --git a/src/anemoi/utils/sanetize.py b/src/anemoi/utils/sanetize.py new file mode 100644 index 0000000..a707d36 --- /dev/null +++ b/src/anemoi/utils/sanetize.py @@ -0,0 +1,10 @@ +# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +from .sanetise import sanetise as sanetize + +__all__ = ["sanetize"] diff --git a/tests/test_anonymize.py b/tests/test_anonymize.py index 7ef94be..05b9d28 100644 --- a/tests/test_anonymize.py +++ b/tests/test_anonymize.py @@ -6,60 +6,60 @@ # nor does it submit to any jurisdiction. -from anemoi.utils.anonymize import anonymize +from anemoi.utils.sanetise import sanetise -def test_anonymize_urls(): - assert anonymize("http://johndoe:password@host:port/path") == "http://user:***@host:port/path" +def test_sanetise_urls(): + assert sanetise("http://johndoe:password@host:port/path") == "http://user:***@host:port/path" - assert anonymize("http://www.example.com/path?pass=secret") == "http://www.example.com/path?pass=hidden" - assert anonymize("http://www.example.com/path?password=secret") == "http://www.example.com/path?password=hidden" - assert anonymize("http://www.example.com/path?token=secret") == "http://www.example.com/path?token=hidden" - assert anonymize("http://www.example.com/path?user=secret") == "http://www.example.com/path?user=hidden" - assert anonymize("http://www.example.com/path?key=secret") == "http://www.example.com/path?key=hidden" - assert anonymize("http://www.example.com/path?pwd=secret") == "http://www.example.com/path?pwd=hidden" - assert anonymize("http://www.example.com/path?_key=secret") == "http://www.example.com/path?_key=hidden" - assert anonymize("http://www.example.com/path?_token=secret") == "http://www.example.com/path?_token=hidden" - assert anonymize("http://www.example.com/path?apikey=secret") == "http://www.example.com/path?apikey=hidden" - assert anonymize("http://www.example.com/path?api_key=secret") == "http://www.example.com/path?api_key=hidden" - assert anonymize("http://www.example.com/path?api_token=secret") == "http://www.example.com/path?api_token=hidden" - assert anonymize("http://www.example.com/path?_api_token=secret") == "http://www.example.com/path?_api_token=hidden" - assert anonymize("http://www.example.com/path?_api_key=secret") == "http://www.example.com/path?_api_key=hidden" - assert anonymize("http://www.example.com/path?username=secret") == "http://www.example.com/path?username=hidden" - assert anonymize("http://www.example.com/path?login=secret") == "http://www.example.com/path?login=hidden" + assert sanetise("http://www.example.com/path?pass=secret") == "http://www.example.com/path?pass=hidden" + assert sanetise("http://www.example.com/path?password=secret") == "http://www.example.com/path?password=hidden" + assert sanetise("http://www.example.com/path?token=secret") == "http://www.example.com/path?token=hidden" + assert sanetise("http://www.example.com/path?user=secret") == "http://www.example.com/path?user=hidden" + assert sanetise("http://www.example.com/path?key=secret") == "http://www.example.com/path?key=hidden" + assert sanetise("http://www.example.com/path?pwd=secret") == "http://www.example.com/path?pwd=hidden" + assert sanetise("http://www.example.com/path?_key=secret") == "http://www.example.com/path?_key=hidden" + assert sanetise("http://www.example.com/path?_token=secret") == "http://www.example.com/path?_token=hidden" + assert sanetise("http://www.example.com/path?apikey=secret") == "http://www.example.com/path?apikey=hidden" + assert sanetise("http://www.example.com/path?api_key=secret") == "http://www.example.com/path?api_key=hidden" + assert sanetise("http://www.example.com/path?api_token=secret") == "http://www.example.com/path?api_token=hidden" + assert sanetise("http://www.example.com/path?_api_token=secret") == "http://www.example.com/path?_api_token=hidden" + assert sanetise("http://www.example.com/path?_api_key=secret") == "http://www.example.com/path?_api_key=hidden" + assert sanetise("http://www.example.com/path?username=secret") == "http://www.example.com/path?username=hidden" + assert sanetise("http://www.example.com/path?login=secret") == "http://www.example.com/path?login=hidden" - assert anonymize("http://www.example.com/path;pass=secret") == "http://www.example.com/path;pass=hidden" - assert anonymize("http://www.example.com/path;password=secret") == "http://www.example.com/path;password=hidden" - assert anonymize("http://www.example.com/path;token=secret") == "http://www.example.com/path;token=hidden" - assert anonymize("http://www.example.com/path;user=secret") == "http://www.example.com/path;user=hidden" - assert anonymize("http://www.example.com/path;key=secret") == "http://www.example.com/path;key=hidden" - assert anonymize("http://www.example.com/path;pwd=secret") == "http://www.example.com/path;pwd=hidden" - assert anonymize("http://www.example.com/path;_key=secret") == "http://www.example.com/path;_key=hidden" - assert anonymize("http://www.example.com/path;_token=secret") == "http://www.example.com/path;_token=hidden" - assert anonymize("http://www.example.com/path;apikey=secret") == "http://www.example.com/path;apikey=hidden" - assert anonymize("http://www.example.com/path;api_key=secret") == "http://www.example.com/path;api_key=hidden" - assert anonymize("http://www.example.com/path;api_token=secret") == "http://www.example.com/path;api_token=hidden" - assert anonymize("http://www.example.com/path;_api_token=secret") == "http://www.example.com/path;_api_token=hidden" - assert anonymize("http://www.example.com/path;_api_key=secret") == "http://www.example.com/path;_api_key=hidden" - assert anonymize("http://www.example.com/path;username=secret") == "http://www.example.com/path;username=hidden" - assert anonymize("http://www.example.com/path;login=secret") == "http://www.example.com/path;login=hidden" + assert sanetise("http://www.example.com/path;pass=secret") == "http://www.example.com/path;pass=hidden" + assert sanetise("http://www.example.com/path;password=secret") == "http://www.example.com/path;password=hidden" + assert sanetise("http://www.example.com/path;token=secret") == "http://www.example.com/path;token=hidden" + assert sanetise("http://www.example.com/path;user=secret") == "http://www.example.com/path;user=hidden" + assert sanetise("http://www.example.com/path;key=secret") == "http://www.example.com/path;key=hidden" + assert sanetise("http://www.example.com/path;pwd=secret") == "http://www.example.com/path;pwd=hidden" + assert sanetise("http://www.example.com/path;_key=secret") == "http://www.example.com/path;_key=hidden" + assert sanetise("http://www.example.com/path;_token=secret") == "http://www.example.com/path;_token=hidden" + assert sanetise("http://www.example.com/path;apikey=secret") == "http://www.example.com/path;apikey=hidden" + assert sanetise("http://www.example.com/path;api_key=secret") == "http://www.example.com/path;api_key=hidden" + assert sanetise("http://www.example.com/path;api_token=secret") == "http://www.example.com/path;api_token=hidden" + assert sanetise("http://www.example.com/path;_api_token=secret") == "http://www.example.com/path;_api_token=hidden" + assert sanetise("http://www.example.com/path;_api_key=secret") == "http://www.example.com/path;_api_key=hidden" + assert sanetise("http://www.example.com/path;username=secret") == "http://www.example.com/path;username=hidden" + assert sanetise("http://www.example.com/path;login=secret") == "http://www.example.com/path;login=hidden" -def test_anonymize_paths(): +def test_sanetise_paths(): # We want to keep earthkit-data's url and path pattern - assert anonymize("/home/johndoe/.ssh/id_rsa") == "/.../id_rsa" + assert sanetise("/home/johndoe/.ssh/id_rsa") == "/.../id_rsa" assert ( - anonymize("/data/model/{date:strftime(%Y)}/{date:strftime(%m)}/{date:strftime(%d)}/analysis.grib") + sanetise("/data/model/{date:strftime(%Y)}/{date:strftime(%m)}/{date:strftime(%d)}/analysis.grib") == "/.../{date:strftime(%Y)}/{date:strftime(%m)}/{date:strftime(%d)}/analysis.grib" ) - assert anonymize("test.grib") == "test.grib" - assert anonymize("../test.grib") == "../test.grib" - assert anonymize("./test.grib") == "./test.grib" - assert anonymize("sub/folder/test.grib") == "sub/folder/test.grib" - assert anonymize("./folder/test.grib") == "./folder/test.grib" + assert sanetise("test.grib") == "test.grib" + assert sanetise("../test.grib") == "../test.grib" + assert sanetise("./test.grib") == "./test.grib" + assert sanetise("sub/folder/test.grib") == "sub/folder/test.grib" + assert sanetise("./folder/test.grib") == "./folder/test.grib" if __name__ == "__main__":