From 3bd3ecfae32ad9669bf24d60462dc70e8693e27a Mon Sep 17 00:00:00 2001 From: Luca Fabbri Date: Mon, 16 Sep 2024 15:53:28 +0200 Subject: [PATCH] Redirect from /doi/xxx/yyy to proper dataset, query by DOI value (#86) * Redirect from /doi/xxx/yyy to proper dataset, query by DOI value * Now redirecting to dataset page --- cads_catalogue_api_service/doi.py | 81 +++++++++++++++++++++++ cads_catalogue_api_service/main.py | 2 + tests/test_40_doi.py | 100 +++++++++++++++++++++++++++++ 3 files changed, 183 insertions(+) create mode 100644 cads_catalogue_api_service/doi.py create mode 100644 tests/test_40_doi.py diff --git a/cads_catalogue_api_service/doi.py b/cads_catalogue_api_service/doi.py new file mode 100644 index 0000000..1e44a32 --- /dev/null +++ b/cads_catalogue_api_service/doi.py @@ -0,0 +1,81 @@ +"""DOI compatibility routes for CADS.""" + +# Copyright 2024, European Union. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# mypy: ignore-errors + +import cads_catalogue +import fastapi +import sqlalchemy as sa +import stac_fastapi.types +import stac_fastapi.types.core +import structlog + +from . import client, dependencies + +logger = structlog.getLogger(__name__) + +router = fastapi.APIRouter( + prefix="/doi", + tags=["doi"], + responses={fastapi.status.HTTP_404_NOT_FOUND: {"description": "Not found"}}, +) + + +def query_collection( + session: sa.orm.Session, + doi: str, + request: fastapi.Request, +) -> stac_fastapi.types.stac.Collection: + """Load a STAC collection from database.""" + return client.collection_serializer( + session.query(cads_catalogue.database.Resource) + .filter(cads_catalogue.database.Resource.doi == doi) + .one(), + session=session, + request=request, + ) + + +@router.get("/{doi_prefix}/{doi_suffix}") +def redirect_by_doi( + doi_prefix: str, + doi_suffix: str, + request: fastapi.Request, + session=fastapi.Depends(dependencies.get_session), +): + """Permalink service to redirect to dataset page by querying using DOI. + + Required for keeping DOI compatibility from the old CDS. + """ + doi = f"{doi_prefix}/{doi_suffix}" + try: + collection = query_collection(session, doi, request) + except sa.orm.exc.NoResultFound as exc: + raise fastapi.HTTPException( + status_code=fastapi.status.HTTP_404_NOT_FOUND, + detail="Dataset not found", + ) from exc + except sa.orm.exc.MultipleResultsFound as exc: + logger.error(f"Search by DOI {doi} lead to multiple results", doi=doi) + raise fastapi.HTTPException( + status_code=fastapi.status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Error while searching for this DOI", + ) from exc + + # FIXME: not optimal, we are hardcoding a web portal logic here. + return fastapi.responses.RedirectResponse( + url=f"/datasets/{collection['id']}", + status_code=fastapi.status.HTTP_301_MOVED_PERMANENTLY, + ) diff --git a/cads_catalogue_api_service/main.py b/cads_catalogue_api_service/main.py index 2fd89d1..def319a 100644 --- a/cads_catalogue_api_service/main.py +++ b/cads_catalogue_api_service/main.py @@ -38,6 +38,7 @@ client, collection_ext, config, + doi, exceptions, extensions, messages, @@ -85,6 +86,7 @@ async def lifespan(application: fastapi.FastAPI): app.include_router(messages.router) app.include_router(schema_org.router) app.include_router(collection_ext.router) +app.include_router(doi.router) def catalogue_openapi() -> dict[str, Any]: diff --git a/tests/test_40_doi.py b/tests/test_40_doi.py new file mode 100644 index 0000000..d2130b5 --- /dev/null +++ b/tests/test_40_doi.py @@ -0,0 +1,100 @@ +# Copyright 2024, European Union. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any + +import fastapi +import fastapi.testclient +import sqlalchemy as sa +import stac_fastapi.types +import stac_fastapi.types.core + +from cads_catalogue_api_service.main import app + +client = fastapi.testclient.TestClient(app) + + +def static_collection( + session: Any, + request: Any, + collection_id: str | None = None, +) -> stac_fastapi.types.stac.Collection: + return { + "type": "Collection", + "id": "era5-something", + "stac_version": "1.0.0", + "title": "Era5 name", + "description": "This dataset provides a modelled time series of gridded river discharge.", + "keywords": [ + "Temporal coverage: Past", + ], + "links": [ + { + "rel": "self", + "type": "application/json", + "href": "http://localhost:8080/api/catalogue/v1/collections/era5-something", + }, + ], + } + + +def error_static_collection_query(error: Exception): + def query_collection( + session: sa.orm.Session, + doi: str, + request: fastapi.Request, + ) -> stac_fastapi.types.stac.Collection: + raise error + + return query_collection + + +def test_doi_error(monkeypatch) -> None: + monkeypatch.setattr( + "cads_catalogue_api_service.doi.query_collection", + error_static_collection_query(sa.orm.exc.NoResultFound("foo bar not found")), + ) + + response = client.get( + "/doi/11111/doi", + ) + + assert response.status_code == 404 + assert response.json()["title"] == "Dataset not found" + + monkeypatch.setattr( + "cads_catalogue_api_service.doi.query_collection", + error_static_collection_query( + sa.orm.exc.MultipleResultsFound("multiple foo ba") + ), + ) + + response = client.get( + "/doi/11111/doi", + ) + + assert response.status_code == 500 + assert response.json()["title"] == "Error while searching for this DOI" + + +def test_doi_redirect(monkeypatch) -> None: + monkeypatch.setattr( + "cads_catalogue_api_service.doi.query_collection", + static_collection, + ) + + response = client.get("/doi/11111/22222", allow_redirects=False) + + assert response.status_code == 301 + assert response.headers["location"] == "/datasets/era5-something"