From 89f169556f21599fa37cb7e70a991df4c1f50704 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 20 Nov 2024 14:02:37 -0500 Subject: [PATCH 1/4] overload the fetch argument of nimbus to add a User-Agent --- xclim/testing/utils.py | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/xclim/testing/utils.py b/xclim/testing/utils.py index 25a626bd7..182a39adf 100644 --- a/xclim/testing/utils.py +++ b/xclim/testing/utils.py @@ -13,13 +13,14 @@ import sys import time import warnings -from collections.abc import Sequence +from collections.abc import Callable, Sequence from datetime import datetime as dt +from functools import wraps from importlib import import_module from io import StringIO from pathlib import Path from shutil import copytree -from typing import TextIO +from typing import IO, TextIO from urllib.error import HTTPError, URLError from urllib.parse import urljoin, urlparse from urllib.request import urlretrieve @@ -519,7 +520,8 @@ def nimbus( # noqa: PR01 remote = audit_url( urljoin(urljoin(repo, branch if branch.endswith("/") else f"{branch}/"), "data") ) - return pooch.create( + + _nimbus = pooch.create( path=cache_dir, base_url=remote, version=default_testdata_version, @@ -528,6 +530,35 @@ def nimbus( # noqa: PR01 registry=load_registry(branch=branch, repo=repo), ) + # Add a custom fetch method to the Pooch instance + # Needed to address: https://github.com/readthedocs/readthedocs.org/issues/11763 + # Fix inspired by @bjlittle (https://github.com/bjlittle/geovista/pull/1202) + _nimbus.fetch_diversion = _nimbus.fetch + + # Overload the fetch method to add user-agent headers + @wraps(_nimbus.fetch_diversion) + def _fetch(*args: str, **kwargs: bool | Callable) -> str: # numpydoc ignore=GL08 + + def _downloader( + url: str, + output_file: str | IO, + poocher: pooch.Pooch, + check_only: bool | None = False, + ) -> None: + """Download the file from the URL and save it to the save_path.""" + headers = {"User-Agent": f"xclim ({__xclim_version__})"} + downloader = pooch.HTTPDownloader(headers=headers) + return downloader(url, output_file, poocher, check_only=check_only) + + # default to our http/s downloader with user-agent headers + kwargs.setdefault("downloader", _downloader) + return _nimbus.fetch_diversion(*args, **kwargs) + + # Replace the fetch method with the custom fetch method + _nimbus.fetch = _fetch + + return _nimbus + # idea copied from raven that it borrowed from xclim that borrowed it from xarray that was borrowed from Seaborn def open_dataset( From fc14674640853dc176ae1ed8ff262954717d57ef Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 20 Nov 2024 14:36:20 -0500 Subject: [PATCH 2/4] better support of external testdata repositories --- xclim/testing/utils.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/xclim/testing/utils.py b/xclim/testing/utils.py index 182a39adf..7de871a76 100644 --- a/xclim/testing/utils.py +++ b/xclim/testing/utils.py @@ -436,6 +436,8 @@ def load_registry( dict Dictionary of filenames and hashes. """ + if not repo.endswith("/"): + repo = f"{repo}/" remote_registry = audit_url( urljoin( urljoin(repo, branch if branch.endswith("/") else f"{branch}/"), @@ -443,7 +445,7 @@ def load_registry( ) ) - if branch != default_testdata_version: + if repo == default_testdata_repo_url and branch != default_testdata_version: custom_registry_folder = Path( str(ilr.files("xclim").joinpath(f"testing/{branch}")) ) @@ -452,10 +454,20 @@ def load_registry( urlretrieve(remote_registry, registry_file) # noqa: S310 elif repo != default_testdata_repo_url: - registry_file = Path(str(ilr.files("xclim").joinpath("testing/registry.txt"))) + external_repo_name = urlparse(repo).path.split("/")[-2] + external_branch_name = branch.split("/")[-1] + registry_file = Path( + str( + ilr.files("xclim").joinpath( + f"testing/registry.{external_repo_name}.{external_branch_name}.txt" + ) + ) + ) urlretrieve(remote_registry, registry_file) # noqa: S310 - registry_file = Path(str(ilr.files("xclim").joinpath("testing/registry.txt"))) + else: + registry_file = Path(str(ilr.files("xclim").joinpath("testing/registry.txt"))) + if not registry_file.exists(): raise FileNotFoundError(f"Registry file not found: {registry_file}") @@ -517,6 +529,8 @@ def nimbus( # noqa: PR01 "The `pooch` package is required to fetch the xclim testing data. " "You can install it with `pip install pooch` or `pip install xclim[dev]`." ) + if not repo.endswith("/"): + repo = f"{repo}/" remote = audit_url( urljoin(urljoin(repo, branch if branch.endswith("/") else f"{branch}/"), "data") ) From 412219fc71157f0abfce88a473b0da165633d911 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 20 Nov 2024 15:10:03 -0500 Subject: [PATCH 3/4] cleaner --- xclim/testing/utils.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/xclim/testing/utils.py b/xclim/testing/utils.py index 7de871a76..1969b6519 100644 --- a/xclim/testing/utils.py +++ b/xclim/testing/utils.py @@ -445,15 +445,7 @@ def load_registry( ) ) - if repo == default_testdata_repo_url and branch != default_testdata_version: - custom_registry_folder = Path( - str(ilr.files("xclim").joinpath(f"testing/{branch}")) - ) - custom_registry_folder.mkdir(parents=True, exist_ok=True) - registry_file = custom_registry_folder.joinpath("registry.txt") - urlretrieve(remote_registry, registry_file) # noqa: S310 - - elif repo != default_testdata_repo_url: + if repo != default_testdata_repo_url: external_repo_name = urlparse(repo).path.split("/")[-2] external_branch_name = branch.split("/")[-1] registry_file = Path( @@ -465,6 +457,14 @@ def load_registry( ) urlretrieve(remote_registry, registry_file) # noqa: S310 + elif branch != default_testdata_version: + custom_registry_folder = Path( + str(ilr.files("xclim").joinpath(f"testing/{branch}")) + ) + custom_registry_folder.mkdir(parents=True, exist_ok=True) + registry_file = custom_registry_folder.joinpath("registry.txt") + urlretrieve(remote_registry, registry_file) # noqa: S310 + else: registry_file = Path(str(ilr.files("xclim").joinpath("testing/registry.txt"))) From 2f57a894c6c316f118e4e9eb455d82af1f7f6cdd Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 20 Nov 2024 16:33:14 -0500 Subject: [PATCH 4/4] update CHANGELOG.rst --- CHANGELOG.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index aed32045a..223085d08 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,10 +15,12 @@ Bug fixes * Fixed pickling issue with ``xclim.sdba.Grouper`` and other classes for usage with `dask>=2024.11`. (:issue:`1992`, :pull:`1993`). * Fixed an issue with ``nimbus`` that was causing URL path components to be improperly joined. (:pull:`1997`). * `base_kws_vars` in `MBCn` is now copied inside the `adjust` function so that in-place changes do not change the dict globally. (:pull:`1999`). +* Fixed a bug in the logic of ``xclim.testing.utils.load_registry`` that impacted the ability to load a `registry.txt` from a non-default repository. (:pull:`2001`). Internal changes ^^^^^^^^^^^^^^^^ * Changed french translations with word "pluvieux" to "avec précipitations". (:issue:`1960`, :pull:`1994`). +* In order to address 403 (forbidden) request errors when retrieving data from GitHub via ReadTheDocs, the ``nimbus`` class has been modified to use an overloaded `fetch` method that appends a User-Agent header to the request. (:pull:`2001`). v0.53.2 (2024-10-31) --------------------