From ffa9485aef5974c345f1404daf44a06deb7fb9cd Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Mon, 14 Oct 2024 17:00:21 +0200 Subject: [PATCH] fix(config): Fix XDG_X_HOME env vars, add OPENML_CACHE_DIR env var --- openml/config.py | 114 ++++++++++++++++++++++++++++--- tests/test_openml/test_config.py | 37 ++++++++-- 2 files changed, 136 insertions(+), 15 deletions(-) diff --git a/openml/config.py b/openml/config.py index b21c981e2..3ba1275b4 100644 --- a/openml/config.py +++ b/openml/config.py @@ -8,6 +8,7 @@ import logging.handlers import os import platform +import shutil import warnings from io import StringIO from pathlib import Path @@ -20,6 +21,8 @@ console_handler: logging.StreamHandler | None = None file_handler: logging.handlers.RotatingFileHandler | None = None +OPENML_CACHE_DIR_ENV_VAR = "OPENML_CACHE_DIR" + class _Config(TypedDict): apikey: str @@ -101,14 +104,50 @@ def set_file_log_level(file_output_level: int) -> None: # Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards) _user_path = Path("~").expanduser().absolute() + + +def _resolve_default_cache_dir() -> Path: + user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR) + if user_defined_cache_dir is not None: + return Path(user_defined_cache_dir) + + if platform.system().lower() == "linux": + xdg_cache_home = os.environ.get("XDG_CACHE_HOME") + if xdg_cache_home is None: + return Path("~", ".cache", "openml") + + # This is the proper XDG_CACHE_HOME directory, but + # we unfortunatly had a problem where we used XDG_CACHE_HOME/org, + # we check hueristically if this old directory still exists and issue + # a warning if it does. There's too much data to move to do this for the user. + + # The new cache directory exists + cache_dir = Path(xdg_cache_home) / "openml" + if cache_dir.exists(): + return cache_dir + + # The old cache directory *does not* exist + hueristic_dir_for_backwards_compat = Path(xdg_cache_home) / "org" / "openml" + if not hueristic_dir_for_backwards_compat.exists(): + return cache_dir + + root_dir_to_delete = Path(xdg_cache_home) / "org" + openml_logger.warning( + "An old cache directory was found at '%s'. This directory is no longer used by " + "OpenML-Python. To silence this warning you would need to delete the old cache " + "directory. The cached files will then be located in '%s'.", + root_dir_to_delete, + cache_dir, + ) + return Path(xdg_cache_home) + + return _user_path / ".openml" + + _defaults: _Config = { "apikey": "", "server": "https://www.openml.org/api/v1/xml", - "cachedir": ( - Path(os.environ.get("XDG_CACHE_HOME", _user_path / ".cache" / "openml")) - if platform.system() == "Linux" - else _user_path / ".openml" - ), + "cachedir": _resolve_default_cache_dir(), "avoid_duplicate_runs": True, "retry_policy": "human", "connection_n_retries": 5, @@ -218,11 +257,66 @@ def stop_using_configuration_for_example(cls) -> None: cls._start_last_called = False +def _handle_xdg_config_home_backwards_compatibility( + xdg_home: str, +) -> Path: + # NOTE(eddiebergman): A previous bug results in the config + # file being located at `${XDG_CONFIG_HOME}/config` instead + # of `${XDG_CONFIG_HOME}/openml/config`. As to maintain backwards + # compatibility, where users may already may have had a configuration, + # we copy it over an issue a warning until it's deleted. + # As a huerisitic to ensure that it's "our" config file, we try + # parse it first. + config_dir = Path(xdg_home) / "openml" + backwards_compat_config_file = config_dir / "config" + if not backwards_compat_config_file.exists(): + return config_dir + + correct_config_location = config_dir / "config" + # If it errors, that's a good sign it's not ours and we can + # safely ignore it, jumping out of this block. This is a huerisitc + try: + _parse_config(backwards_compat_config_file) + except Exception: # noqa: BLE001 + return config_dir + + # Looks like it's ours, lets try copy it to the correct place + try: + # We copy and return the new copied location + shutil.copy(backwards_compat_config_file, correct_config_location) + openml_logger.warning( + "An openml configuration file was found at the old location " + f"at {backwards_compat_config_file}. We have copied it to the new " + f"location at {correct_config_location}. " + "\nTo silence this warning please verify that the configuration file " + f"at {correct_config_location} is correct and delete the file at " + f"{backwards_compat_config_file}." + ) + return config_dir + except Exception as e: # noqa: BLE001 + # We failed to copy and its ours, return the old one. + openml_logger.warning( + "While attempting to perform a backwards compatible fix, we " + f"failed to copy the openml config file at " + f"{backwards_compat_config_file}' to {correct_config_location}" + f"\n{type(e)}: {e}", + "\n\nTo silence this warning, please copy the file " + "to the new location and delete the old file at " + f"{backwards_compat_config_file}.", + ) + return backwards_compat_config_file + + def determine_config_file_path() -> Path: - if platform.system() == "Linux": - config_dir = Path(os.environ.get("XDG_CONFIG_HOME", Path("~") / ".config" / "openml")) + if platform.system().lower() == "linux": + xdg_home = os.environ.get("XDG_CONFIG_HOME") + if xdg_home is not None: + config_dir = _handle_xdg_config_home_backwards_compatibility(xdg_home) + else: + config_dir = Path("~", ".config", "openml") else: config_dir = Path("~") / ".openml" + # Still use os.path.expanduser to trigger the mock in the unit test config_dir = Path(config_dir).expanduser().resolve() return config_dir / "config" @@ -260,11 +354,15 @@ def _setup(config: _Config | None = None) -> None: apikey = config["apikey"] server = config["server"] show_progress = config["show_progress"] - short_cache_dir = Path(config["cachedir"]) n_retries = int(config["connection_n_retries"]) set_retry_policy(config["retry_policy"], n_retries) + user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR) + if user_defined_cache_dir is not None: + short_cache_dir = Path(user_defined_cache_dir) + else: + short_cache_dir = Path(config["cachedir"]) _root_cache_directory = short_cache_dir.expanduser().resolve() try: diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index a92cd0cfd..a3b7dfd72 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -29,15 +29,29 @@ def test_non_writable_home(self, log_handler_mock, warnings_mock): assert not log_handler_mock.call_args_list[0][1]["create_file_handler"] assert openml.config._root_cache_directory == Path(td) / "something-else" - @unittest.mock.patch("os.path.expanduser") - def test_XDG_directories_do_not_exist(self, expanduser_mock): + def test_XDG_directories_do_not_exist(self): with tempfile.TemporaryDirectory(dir=self.workdir) as td: + # Save previous state + _prev = os.environ.get("XDG_CONFIG_HOME") - def side_effect(path_): - return os.path.join(td, str(path_).replace("~/", "")) + os.environ["XDG_CONFIG_HOME"] = str(Path(td) / "fake_xdg_cache_home") - expanduser_mock.side_effect = side_effect + expected_config_dir = Path(td) / "fake_xdg_cache_home" / "openml" + expected_determined_config_file_path = expected_config_dir / "config" + + # Ensure that it correctly determines the path to the config file + determined_config_file_path = openml.config.determine_config_file_path() + assert determined_config_file_path == expected_determined_config_file_path + + # Ensure that setup will create the config folder as the configuration + # will be written to that location. openml.config._setup() + assert expected_config_dir.exists() + + # Reset it + os.environ.pop("XDG_CONFIG_HOME") + if _prev is not None: + os.environ["XDG_CONFIG_HOME"] = _prev def test_get_config_as_dict(self): """Checks if the current configuration is returned accurately as a dict.""" @@ -121,7 +135,7 @@ def test_example_configuration_start_twice(self): def test_configuration_file_not_overwritten_on_load(): - """ Regression test for #1337 """ + """Regression test for #1337""" config_file_content = "apikey = abcd" with tempfile.TemporaryDirectory() as tmpdir: config_file_path = Path(tmpdir) / "config" @@ -136,12 +150,21 @@ def test_configuration_file_not_overwritten_on_load(): assert config_file_content == new_file_content assert "abcd" == read_config["apikey"] + def test_configuration_loads_booleans(tmp_path): config_file_content = "avoid_duplicate_runs=true\nshow_progress=false" - with (tmp_path/"config").open("w") as config_file: + with (tmp_path / "config").open("w") as config_file: config_file.write(config_file_content) read_config = openml.config._parse_config(tmp_path) # Explicit test to avoid truthy/falsy modes of other types assert True == read_config["avoid_duplicate_runs"] assert False == read_config["show_progress"] + + +def test_openml_cache_dir_env_var(tmp_path: Path) -> None: + expected_path = tmp_path / "test-cache" + with unittest.mock.patch.dict(os.environ, {"OPENML_CACHE_DIR": str(expected_path)}): + openml.config._setup() + assert openml.config._root_cache_directory == expected_path + assert openml.config.get_cache_directory() == str(expected_path / "org" / "openml" / "www")