From d72f7d29b20be11ddde08364bab1aba39dabc039 Mon Sep 17 00:00:00 2001 From: Luca Fabbri Date: Wed, 28 Feb 2024 18:09:25 +0100 Subject: [PATCH 1/7] WIP for canonical pagination --- cads_catalogue_api_service/client.py | 180 +++++++---------------- cads_catalogue_api_service/extensions.py | 14 +- tests/test_10_sorting_paging.py | 2 +- 3 files changed, 60 insertions(+), 136 deletions(-) diff --git a/cads_catalogue_api_service/client.py b/cads_catalogue_api_service/client.py index 403a9cd..7b31db6 100644 --- a/cads_catalogue_api_service/client.py +++ b/cads_catalogue_api_service/client.py @@ -41,27 +41,6 @@ def decode_base64(encoded: str) -> str: return decoded_str -def encode_base64(decoded: str) -> str: - decoded_bytes = decoded.encode("ascii") - encoded_bytes = base64.b64encode(decoded_bytes) - encoded_str = encoded_bytes.decode("ascii") - return encoded_str - - -def encode_cursor(plain_cursor: Any) -> str: - """Encode the cursor. - - Encoding is based on the type of the plain cursor value - """ - encoded = None - match type(plain_cursor): # noqa: E999 - case datetime.datetime: - encoded = encode_base64(plain_cursor.astimezone().isoformat()) - case _: - encoded = encode_base64(str(plain_cursor)) - return encoded - - def decode_cursor(encoded_cursor: str, sortby: str) -> Any: """Decode the cursor to the original entity.""" decoded: str | datetime.datetime | None = None @@ -74,101 +53,58 @@ def decode_cursor(encoded_cursor: str, sortby: str) -> Any: def get_sorting_clause( - model: type[cads_catalogue.database.Resource], sort: str, inverse: bool + model: type[cads_catalogue.database.Resource], sort: str ) -> dict | tuple: """Get the sorting clause.""" supported_sorts = { "update": ( model.resource_update, - sqlalchemy.desc if not inverse else sqlalchemy.asc, + sqlalchemy.desc, ), - "title": (model.title, sqlalchemy.asc if not inverse else sqlalchemy.desc), - "id": (model.resource_uid, sqlalchemy.asc if not inverse else sqlalchemy.desc), + "title": (model.title, sqlalchemy.asc), + "id": (model.resource_uid, sqlalchemy.asc), } return supported_sorts.get(sort) or supported_sorts["update"] -DEFINED_SORT_CRITERIA = { - "update": ("__le__", "__gt__"), - "title": ("__le__", "__gt__"), - "id": ("__ge__", "__lt__"), -} - - -def get_cursor_compare_criteria(sortby: str, back: bool = False) -> str: - """Generate the proper cursor based on sorting criteria.""" - compare_criteria = DEFINED_SORT_CRITERIA.get(sortby) - if not compare_criteria: - return "__ge__" if not back else "__lt__" - return compare_criteria[0 if not back else 1] - - -def apply_sorting( +def apply_sorting_and_limit( search: sqlalchemy.orm.Query, sortby: str, - cursor: str | None, + page: int, limit: int, - inverse: bool = False, ): - """Apply sortby to the running query. - - The sorting algorithm influences how pagination is build. - Pagination is based on cursor: see https://use-the-index-luke.com/no-offset - """ - sorting_clause = get_sorting_clause( - cads_catalogue.database.Resource, sortby, inverse - ) + """Apply sortby and limit to the running query.""" + sorting_clause = get_sorting_clause(cads_catalogue.database.Resource, sortby) sort_by, sort_order_fn = sorting_clause if sortby != "relevance": search = search.order_by(sort_order_fn(sort_by)) - get_cursor_direction = get_cursor_compare_criteria(sortby, inverse) - # cursor meaning is based on the sorting criteria - if cursor: - sort_expr = getattr(sort_by, get_cursor_direction)( - decode_cursor(cursor, sortby) - ) - search = search.filter(*(sort_expr,)) - # limit is +1 for getting the next page - search = search.limit(limit + 1) + search = search.offset(page * limit).limit(limit) return search, sort_by def get_next_prev_links( - collections: list, - sort_by, - cursor: str | None, + sortby: str, + page: int, limit: int, - back: bool = False, + count: int, ) -> dict[str, Any]: """Generate a prev/next links array. # See https://github.com/radiantearth/stac-api-spec/tree/main/item-search#pagination """ + print(page, limit, count, sortby) links = {} - if len(collections) <= limit: - results = collections - else: - results = collections[:-1] - # Next - if len(collections) > limit or back: + if page * limit + limit < count: # We need a next link, as we have more records to explore - next_cursor = cursor if back else getattr(collections[-1], sort_by.key) - links["next"] = {"cursor": encode_cursor(next_cursor)} + links["next"] = dict(page=page + 1, limit=limit, sortby=sortby.value) # Prev - if cursor: - # We have a cursor, so we provide a back link - # NOTE: this is not perfect - # The back link is always present because we don't know anything about the previous page - back_cursor = getattr((results[0] if not back else results[-1]), sort_by.key) - links["prev"] = { - "cursor": encode_cursor(back_cursor), - "back": "true", - } + if page > 0: + links["next"] = dict(page=page - 1, limit=limit, sortby=sortby.value) return links @@ -355,10 +291,10 @@ def lookup_id( # avoid loading datasets from other portals, to block URL manipulation/pollution search = search.filter(record.portal.in_(portals)) row = search.one() - except sqlalchemy.orm.exc.NoResultFound: + except sqlalchemy.orm.exc.NoResultFound as exc: raise stac_fastapi.types.errors.NotFoundError( f"{record.__name__} {id} not found" - ) + ) from exc return row @@ -368,7 +304,7 @@ def get_active_message( filter_types=["warning", "critical"], ) -> models.Message | None: """Return the latest active message for a dataset.""" - messages = ( + message = ( session.query(cads_catalogue.database.Message) .join(cads_catalogue.database.Message.resources) .where( @@ -377,17 +313,17 @@ def get_active_message( cads_catalogue.database.Message.severity.in_(filter_types), ) .order_by(cads_catalogue.database.Message.date.desc()) - .all() + .first() ) - if messages: + if message: return models.Message( - id=messages[0].message_uid, + id=message.message_uid, date=None, - summary=messages[0].summary, - url=messages[0].url, - severity=messages[0].severity, - content=messages[0].content, - live=messages[0].live, + summary=message.summary, + url=message.url, + severity=message.severity, + content=message.content, + live=message.live, ) return None @@ -398,6 +334,8 @@ def collection_serializer( request: fastapi.Request, preview: bool = False, schema_org: bool = False, + with_message: bool = True, + with_keywords: bool = True, ) -> stac_fastapi.types.stac.Collection: """Transform database model to stac collection.""" collection_links = generate_collection_links( @@ -408,7 +346,7 @@ def collection_serializer( model=db_model, base_url=config.settings.document_storage_url ) - active_message = get_active_message(db_model, session) + active_message = get_active_message(db_model, session) if with_message else None additional_properties = { **({"assets": assets} if assets else {}), @@ -449,7 +387,11 @@ def collection_serializer( title=db_model.title, description=db_model.abstract, # FIXME: this is triggering a long list of subqueries - keywords=[keyword.keyword_name for keyword in db_model.keywords], + keywords=( + [keyword.keyword_name for keyword in db_model.keywords] + if with_keywords + else [] + ), # https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#license # note that this small check, evenif correct, is triggering a lot of subrequests license=( @@ -538,9 +480,8 @@ def all_datasets( q: str | None = None, kw: list[str] | None = [], sortby: str = "relevance", - cursor: str | None = None, + page: int = 0, limit: int = 999, - back: bool = False, route_name="Get Collections", search_stats: bool = False, ) -> models.CADSCollections: @@ -550,27 +491,22 @@ def all_datasets( ) route_ref = str(request.url_for(route_name)) - base_url = str(request.base_url) + with self.reader.context_session() as session: search = session.query(self.collection_table).options( *database.deferred_columns ) search = search_utils.apply_filters(session, search, q, kw, portals=portals) count = search.count() - search, sort_by = apply_sorting( - search=search, sortby=sortby, cursor=cursor, limit=limit, inverse=back + search, sort_by = apply_sorting_and_limit( + search=search, sortby=sortby, page=page, limit=limit ) + print(sort_by) collections = search.all() - # Filter function always returns an item more than the limit to know if there is a next/prev page - # But response is build on effective page size - if len(collections) <= limit: - results = collections - else: - results = collections[:-1] - - if len(results) == 0: + if len(collections) == 0 and route_name != "Get Collections": + # For canonical STAC requests to /collections, we don't want to raise a 404 raise stac_fastapi.types.errors.NotFoundError( "Search does not match any dataset" ) @@ -579,7 +515,7 @@ def all_datasets( collection_serializer( collection, session=session, request=request, preview=True ) - for collection in (results if not back else reversed(results)) + for collection in collections ] links = [ @@ -601,41 +537,37 @@ def all_datasets( ] next_prev_links = get_next_prev_links( - collections=collections, - sort_by=sort_by, - cursor=cursor, + sortby=sortby, + page=page, limit=limit, - back=back, + count=count, ) - if next_prev_links.get("next"): + print(next_prev_links) + + if next_prev_links.get("prev"): qs = urllib.parse.urlencode( { - **{ - k: v - for (k, v) in request.query_params.items() - if k != "back" - }, - "cursor": next_prev_links["next"]["cursor"], + **{k: v for (k, v) in request.query_params.items()}, + **next_prev_links["prev"], } ) links.append( { - "rel": "next", + "rel": "prev", "href": f"{route_ref}?{qs}", "type": stac_pydantic.shared.MimeTypes.json, } ) - if next_prev_links.get("prev"): + if next_prev_links.get("next"): qs = urllib.parse.urlencode( { **{k: v for (k, v) in request.query_params.items()}, - "cursor": next_prev_links["prev"]["cursor"], - "back": "true", + **next_prev_links["next"], } ) links.append( { - "rel": "prev", + "rel": "next", "href": f"{route_ref}?{qs}", "type": stac_pydantic.shared.MimeTypes.json, } diff --git a/cads_catalogue_api_service/extensions.py b/cads_catalogue_api_service/extensions.py index 69bedf5..7f4e523 100644 --- a/cads_catalogue_api_service/extensions.py +++ b/cads_catalogue_api_service/extensions.py @@ -39,15 +39,8 @@ def datasets_search( sortby: CatalogueSortCriterion = fastapi.Query( default=CatalogueSortCriterion.update_desc ), - # FIXME: remove this deprecated parameter - sorting: CatalogueSortCriterion = fastapi.Query( - default=None, - deprecated=True, - description="Deprecated, use sortby instead.", - ), - cursor: str = fastapi.Query(default=None, include_in_schema=False), + page: int = fastapi.Query(default=0, ge=0), limit: int = fastapi.Query(default=config.MAX_LIMIT, ge=1, le=config.MAX_LIMIT), - back: bool = fastapi.Query(default=False, include_in_schema=False), search_stats: bool = fastapi.Query(default=True), ) -> dict[str, Any]: """Filter datasets based on search parameters.""" @@ -55,10 +48,9 @@ def datasets_search( request=request, q=q, kw=kw, - sortby=sorting or sortby, - cursor=cursor, + sortby=sortby, + page=page, limit=limit, - back=back, route_name="Datasets Search", search_stats=search_stats, ) diff --git a/tests/test_10_sorting_paging.py b/tests/test_10_sorting_paging.py index ace84be..facd14b 100644 --- a/tests/test_10_sorting_paging.py +++ b/tests/test_10_sorting_paging.py @@ -92,7 +92,7 @@ def test_get_cursor_compare_criteria() -> None: def test_apply_sorting() -> None: query = FakeQuery() - search, sort_by = cads_catalogue_api_service.client.apply_sorting( + search, sort_by = cads_catalogue_api_service.client.apply_sorting_and_limit( query, sortby="id", cursor=None, limit=10 ) assert sort_by.key == "resource_uid" From ddfcb439c2e531704bae6f4985462895209d1526 Mon Sep 17 00:00:00 2001 From: Luca Fabbri Date: Thu, 29 Feb 2024 09:37:12 +0100 Subject: [PATCH 2/7] Fixed prev link --- cads_catalogue_api_service/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cads_catalogue_api_service/client.py b/cads_catalogue_api_service/client.py index 7b31db6..7182a7a 100644 --- a/cads_catalogue_api_service/client.py +++ b/cads_catalogue_api_service/client.py @@ -104,7 +104,7 @@ def get_next_prev_links( links["next"] = dict(page=page + 1, limit=limit, sortby=sortby.value) # Prev if page > 0: - links["next"] = dict(page=page - 1, limit=limit, sortby=sortby.value) + links["prev"] = dict(page=page - 1, limit=limit, sortby=sortby.value) return links From 5950ddc0a42426c3e61fa04848ef666205742123 Mon Sep 17 00:00:00 2001 From: Luca Fabbri Date: Thu, 29 Feb 2024 11:22:33 +0100 Subject: [PATCH 3/7] Properly apply relevance sorting if we are sorting by relevance --- cads_catalogue_api_service/client.py | 48 +++++++----------- cads_catalogue_api_service/search_utils.py | 59 +++++++++++++++------- 2 files changed, 58 insertions(+), 49 deletions(-) diff --git a/cads_catalogue_api_service/client.py b/cads_catalogue_api_service/client.py index 7182a7a..df4ef04 100644 --- a/cads_catalogue_api_service/client.py +++ b/cads_catalogue_api_service/client.py @@ -14,8 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import base64 -import datetime import urllib from typing import Any, Type @@ -28,30 +26,19 @@ import stac_fastapi.types import stac_fastapi.types.core import stac_pydantic -from dateutil import parser -from . import config, database, dependencies, exceptions, models, search_utils +from . import ( + config, + database, + dependencies, + exceptions, + extensions, + models, + search_utils, +) from .fastapisessionmaker import FastAPISessionMaker -def decode_base64(encoded: str) -> str: - encoded_bytes = encoded.encode("ascii") - decoded_bytes = base64.b64decode(encoded_bytes) - decoded_str = decoded_bytes.decode("ascii") - return decoded_str - - -def decode_cursor(encoded_cursor: str, sortby: str) -> Any: - """Decode the cursor to the original entity.""" - decoded: str | datetime.datetime | None = None - match sortby: - case "update": - decoded = parser.parse(decode_base64(encoded_cursor)) - case _: - decoded = decode_base64(encoded_cursor) - return decoded - - def get_sorting_clause( model: type[cads_catalogue.database.Resource], sort: str ) -> dict | tuple: @@ -69,6 +56,7 @@ def get_sorting_clause( def apply_sorting_and_limit( search: sqlalchemy.orm.Query, + q: str, sortby: str, page: int, limit: int, @@ -77,12 +65,15 @@ def apply_sorting_and_limit( sorting_clause = get_sorting_clause(cads_catalogue.database.Resource, sortby) sort_by, sort_order_fn = sorting_clause - if sortby != "relevance": + if sortby == "relevance" and q: + # generate sorting by relevance based on input + search = search.order_by(search_utils.fulltext_order_by(q)) + else: search = search.order_by(sort_order_fn(sort_by)) search = search.offset(page * limit).limit(limit) - return search, sort_by + return search def get_next_prev_links( @@ -95,7 +86,6 @@ def get_next_prev_links( # See https://github.com/radiantearth/stac-api-spec/tree/main/item-search#pagination """ - print(page, limit, count, sortby) links = {} # Next @@ -479,7 +469,7 @@ def all_datasets( request: fastapi.Request, q: str | None = None, kw: list[str] | None = [], - sortby: str = "relevance", + sortby: str = extensions.CatalogueSortCriterion.update_desc, page: int = 0, limit: int = 999, route_name="Get Collections", @@ -499,10 +489,9 @@ def all_datasets( ) search = search_utils.apply_filters(session, search, q, kw, portals=portals) count = search.count() - search, sort_by = apply_sorting_and_limit( - search=search, sortby=sortby, page=page, limit=limit + search = apply_sorting_and_limit( + search=search, q=q, sortby=sortby, page=page, limit=limit ) - print(sort_by) collections = search.all() if len(collections) == 0 and route_name != "Get Collections": @@ -542,7 +531,6 @@ def all_datasets( limit=limit, count=count, ) - print(next_prev_links) if next_prev_links.get("prev"): qs = urllib.parse.urlencode( diff --git a/cads_catalogue_api_service/search_utils.py b/cads_catalogue_api_service/search_utils.py index acb756b..8de45db 100644 --- a/cads_catalogue_api_service/search_utils.py +++ b/cads_catalogue_api_service/search_utils.py @@ -20,6 +20,12 @@ import sqlalchemy as sa import stac_fastapi.types +# TODO: this should be placed in a configuration file +WEIGHT_HIGH_PRIORITY_TERMS = 1.0 +WEIGHT_TITLE = 0.8 +WEIGHT_DESCRIPTION = 0.5 +WEIGHT_FULLTEXT = 0.3 + def split_by_category(keywords: list) -> list: """Given a list of keywords composed by a "category: value", split them in multiple lists. @@ -35,6 +41,39 @@ def split_by_category(keywords: list) -> list: return list(categories.values()) +def generate_ts_query(q: str = ""): + """Generate the string tokenizer from query string. + + Parameters + ---------- + q : str, optional + query string, as read from the API request + + Returns + ------- + SQL Function expression + Tokenized expression used for the full text search or sorting + """ + return sa.func.to_tsquery("english", "|".join(q.split())) + + +def fulltext_order_by(q: str): + """Generate the full text search order by clause.""" + tsquery = generate_ts_query(q) + return sa.func.ts_rank( + "{%s,%s,%s,%s}" + % ( + # NOTE: order of weights follows {D,C,B,A} labelling of 'search_field' of table resources + WEIGHT_HIGH_PRIORITY_TERMS, + WEIGHT_FULLTEXT, + WEIGHT_DESCRIPTION, + WEIGHT_TITLE, + ), + cads_catalogue.database.Resource.search_field, + tsquery, + ).desc() + + def apply_filters( session: sa.orm.Session, search: sa.orm.Query, @@ -90,28 +129,10 @@ def apply_filters( # FT search if q: - weight_high_priority_terms = 1.0 - weight_title = 0.8 - weight_description = 0.5 - weight_fulltext = 0.3 - tsquery = sa.func.to_tsquery("english", "|".join(q.split())) + tsquery = generate_ts_query(q) search = search.filter( cads_catalogue.database.Resource.search_field.bool_op("@@")(tsquery) - ).order_by( - sa.func.ts_rank( - "{%s,%s,%s,%s}" - % ( - # NOTE: order of weights follows {D,C,B,A} labelling of 'search_field' of table resources - weight_high_priority_terms, - weight_fulltext, - weight_description, - weight_title, - ), - cads_catalogue.database.Resource.search_field, - tsquery, - ).desc() ) - return search From 82eae84abfb6008e92cb9f97a47b6fd2a5861523 Mon Sep 17 00:00:00 2001 From: Luca Fabbri Date: Thu, 29 Feb 2024 13:02:21 +0100 Subject: [PATCH 4/7] Fixed tests --- cads_catalogue_api_service/client.py | 2 +- tests/test_10_serializers.py | 21 +++- tests/test_10_sorting_paging.py | 171 +++++++++++++-------------- tests/test_50_dependencies.py | 28 +++++ tests/testing.py | 45 ++++++- 5 files changed, 170 insertions(+), 97 deletions(-) create mode 100644 tests/test_50_dependencies.py diff --git a/cads_catalogue_api_service/client.py b/cads_catalogue_api_service/client.py index df4ef04..29ec2df 100644 --- a/cads_catalogue_api_service/client.py +++ b/cads_catalogue_api_service/client.py @@ -77,7 +77,7 @@ def apply_sorting_and_limit( def get_next_prev_links( - sortby: str, + sortby: extensions.CatalogueSortCriterion, page: int, limit: int, count: int, diff --git a/tests/test_10_serializers.py b/tests/test_10_serializers.py index e4fbd35..8c77b46 100644 --- a/tests/test_10_serializers.py +++ b/tests/test_10_serializers.py @@ -12,20 +12,29 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime + from testing import Request, generate_expected, get_record import cads_catalogue_api_service.client +import cads_catalogue_api_service.models -def empty_get_active_message(*args, **kwargs) -> None: - return None +def fake_get_active_message(*args, **kwargs) -> None: + return cads_catalogue_api_service.models.Message( + id="message-2", + date=datetime.datetime(2024, 1, 1, 12, 15, 34), + content="Message 2", + severity="warning", + live=True, + ) def test_collection_serializer(monkeypatch) -> None: """Test serialization from db record to STAC.""" monkeypatch.setattr( "cads_catalogue_api_service.client.get_active_message", - empty_get_active_message, + fake_get_active_message, ) request = Request("https://mycatalogue.org/") # note the final slash! record = get_record("era5-something") @@ -40,3 +49,9 @@ def test_collection_serializer(monkeypatch) -> None: ) assert stac_record == generate_expected(request.base_url, preview=True) + + stac_record = cads_catalogue_api_service.client.collection_serializer( + record, session=object(), request=request, schema_org=True + ) + + assert stac_record == generate_expected(request.base_url, schema_org=True) diff --git a/tests/test_10_sorting_paging.py b/tests/test_10_sorting_paging.py index facd14b..75a19d4 100644 --- a/tests/test_10_sorting_paging.py +++ b/tests/test_10_sorting_paging.py @@ -14,9 +14,11 @@ # type: ignore -from typing import Any +import sqlalchemy as sa +from testing import get_record import cads_catalogue_api_service.client +import cads_catalogue_api_service.extensions class FakeQuery: @@ -28,125 +30,110 @@ def limit(self, limit: int): self.limit = limit return self - -class MockObj: - def __init__(self, value: int): - self.value = str(value) - - def __getattribute__(self, __name: str) -> Any: - if __name != "value": - global called_key - called_key = self.value - return self.value - return super().__getattribute__(__name) - - -called_key = None - - -class MockSortBy: - def __init__(self, value: str): - self.value = value - - @property - def key(self): - return self.value + def offset(self, offset: int): + self.offset = offset + return self -def test_get_cursor_compare_criteria() -> None: - # unknown criteria - assert ( - cads_catalogue_api_service.client.get_cursor_compare_criteria(sortby="foo") - == "__ge__" - ) - assert ( - cads_catalogue_api_service.client.get_cursor_compare_criteria( - sortby="foo", back=True - ) - == "__lt__" - ) - # asc criteria - assert ( - cads_catalogue_api_service.client.get_cursor_compare_criteria(sortby="id") - == "__ge__" - ) - assert ( - cads_catalogue_api_service.client.get_cursor_compare_criteria( - sortby="id", back=True - ) - == "__lt__" - ) - # desc criteria - assert ( - cads_catalogue_api_service.client.get_cursor_compare_criteria(sortby="update") - == "__le__" - ) - assert ( - cads_catalogue_api_service.client.get_cursor_compare_criteria( - sortby="update", back=True - ) - == "__gt__" +def test_apply_sorting() -> None: + query = FakeQuery() + search = cads_catalogue_api_service.client.apply_sorting_and_limit( + query, q="", sortby="id", page=2, limit=10 ) + assert search.limit == 10 + assert search.offset == 20 + assert search.order_by.element.key == "resource_uid" -def test_apply_sorting() -> None: query = FakeQuery() - - search, sort_by = cads_catalogue_api_service.client.apply_sorting_and_limit( - query, sortby="id", cursor=None, limit=10 + search = cads_catalogue_api_service.client.apply_sorting_and_limit( + query, q="foo", sortby="relevance", page=0, limit=10 ) - assert sort_by.key == "resource_uid" - assert search.limit == 11 + assert search.limit == 10 + assert search.offset == 0 + assert search.order_by.element.name == "ts_rank" -def test_get_next_prev_links() -> None: - collections = [MockObj(v) for v in range(1, 17)] +def test_get_next_prev_links() -> None: next_prev_links = cads_catalogue_api_service.client.get_next_prev_links( - collections=collections[0:11], sort_by=MockSortBy("id"), cursor=None, limit=10 + sortby=cads_catalogue_api_service.extensions.CatalogueSortCriterion.id_asc, + page=0, + limit=10, + count=100, ) - assert next_prev_links.get("next") == {"cursor": "MTE="} + assert next_prev_links.get("next") == { + "limit": 10, + "page": 1, + "sortby": cads_catalogue_api_service.extensions.CatalogueSortCriterion.id_asc.value, + } assert next_prev_links.get("prev") is None - assert called_key == "11" next_prev_links = cads_catalogue_api_service.client.get_next_prev_links( - collections=collections[2:13], sort_by=MockSortBy("id"), cursor="2", limit=10 + sortby=cads_catalogue_api_service.extensions.CatalogueSortCriterion.id_asc, + page=2, + limit=10, + count=100, ) - assert next_prev_links.get("next") == {"cursor": "MTM="} - assert next_prev_links.get("prev") == {"back": "true", "cursor": "Mw=="} - assert called_key == "3" + assert next_prev_links.get("prev") == { + "limit": 10, + "page": 1, + "sortby": cads_catalogue_api_service.extensions.CatalogueSortCriterion.id_asc.value, + } + assert next_prev_links.get("next") == { + "limit": 10, + "page": 3, + "sortby": cads_catalogue_api_service.extensions.CatalogueSortCriterion.id_asc.value, + } next_prev_links = cads_catalogue_api_service.client.get_next_prev_links( - collections=collections[2:13], - sort_by=MockSortBy("id"), - cursor="2", + sortby=cads_catalogue_api_service.extensions.CatalogueSortCriterion.id_asc, + page=9, limit=10, - back=True, + count=100, ) - assert next_prev_links.get("next") == {"cursor": "Mg=="} - assert next_prev_links.get("prev") == {"back": "true", "cursor": "MTI="} - assert called_key == "12" + assert next_prev_links.get("prev") == { + "limit": 10, + "page": 8, + "sortby": cads_catalogue_api_service.extensions.CatalogueSortCriterion.id_asc.value, + } + assert next_prev_links.get("next") is None next_prev_links = cads_catalogue_api_service.client.get_next_prev_links( - collections=collections[0:4], - sort_by=MockSortBy("id"), - cursor=None, + sortby=cads_catalogue_api_service.extensions.CatalogueSortCriterion.id_asc, + page=9, limit=10, + count=96, ) + assert next_prev_links.get("prev") == { + "limit": 10, + "page": 8, + "sortby": cads_catalogue_api_service.extensions.CatalogueSortCriterion.id_asc.value, + } assert next_prev_links.get("next") is None - assert next_prev_links.get("prev") is None - next_prev_links = cads_catalogue_api_service.client.get_next_prev_links( - collections=collections[2:6], - sort_by=MockSortBy("id"), - cursor="2", - limit=10, - back=True, - ) - assert next_prev_links.get("next") == {"cursor": "Mg=="} - assert next_prev_links.get("prev") == {"back": "true", "cursor": "Ng=="} +def test_get_sorting_clause(): + record = get_record(id="foo-bar") + + assert cads_catalogue_api_service.client.get_sorting_clause(record, sort="id") == ( + record.resource_uid, + sa.asc, + ) + assert cads_catalogue_api_service.client.get_sorting_clause( + record, sort="updated" + ) == ( + record.resource_update, + sa.desc, + ) + # Default + assert cads_catalogue_api_service.client.get_sorting_clause( + record, sort="foobarbaz" + ) == ( + record.resource_update, + sa.desc, + ) diff --git a/tests/test_50_dependencies.py b/tests/test_50_dependencies.py new file mode 100644 index 0000000..685858d --- /dev/null +++ b/tests/test_50_dependencies.py @@ -0,0 +1,28 @@ +# Copyright 2024, European Union. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cads_catalogue_api_service + + +def test_get_portals_values(): + assert cads_catalogue_api_service.dependencies.get_portals_values( + portal="foo,bar" + ) == [ + "foo", + "bar", + ] + + assert ( + cads_catalogue_api_service.dependencies.get_portals_values(portal=None) is None + ) diff --git a/tests/testing.py b/tests/testing.py index d19ea86..7c77de6 100644 --- a/tests/testing.py +++ b/tests/testing.py @@ -71,13 +71,33 @@ def get_record(id: str) -> cads_catalogue.database.Resource: resource_uid="another-dataset", title="Yet another dataset" ) ], + messages=[ + cads_catalogue.database.Message( + message_uid="message-1", + date=datetime.datetime.now(), + content="Message 1", + severity="info", + live=True, + ), + cads_catalogue.database.Message( + message_uid="message-2", + date=datetime.datetime.now(), + content="Message 2", + severity="warning", + live=True, + ), + ], qa_flag=True, disabled_reason="Disabled because of a reason", + layout="resouces/reanalysis-era5-pressure-levels/layout.json", ) def generate_expected( - base_url="http://foo.org/", document_storage_url="/document-storage/", preview=False + base_url="http://foo.org/", + document_storage_url="/document-storage/", + preview=False, + schema_org=False, ) -> dict: expected = { "type": "Collection", @@ -91,6 +111,17 @@ def generate_expected( "spatial": {"bbox": [[-0.5, 45.0, 50.0, 15.0]]}, "temporal": {"interval": [["1980-01-01T00:00:00Z", None]]}, }, + **( + { + "creator_contact_email": None, + "creator_name": None, + "creator_type": None, + "creator_url": None, + "file_format": None, + } + if schema_org + else {} + ), "links": [ { "rel": "self", @@ -156,6 +187,11 @@ def generate_expected( ), "type": "application/json", }, + { + "href": "https://mycatalogue.org/document-storage/resouces/reanalysis-era5-pressure-levels/layout.json", + "rel": "layout", + "type": "application/json", + }, { "rel": "related", "href": urllib.parse.urljoin( @@ -176,6 +212,13 @@ def generate_expected( "updated": "2020-02-05T00:00:00Z", "sci:doi": "11.2222/cads.12345", "cads:disabled_reason": "Disabled because of a reason", + "cads:message": { + "content": "Message 2", + "date": datetime.datetime(2024, 1, 1, 12, 15, 34), + "id": "message-2", + "live": True, + "severity": "warning", + }, } if not preview: expected = { From 899a58a17235dba7bef4f1e50c3a8288fcfe7cd7 Mon Sep 17 00:00:00 2001 From: Luca Fabbri Date: Thu, 29 Feb 2024 13:09:40 +0100 Subject: [PATCH 5/7] Test coverage for split_by_category --- cads_catalogue_api_service/client.py | 8 ++++---- tests/test_10_search_utils.py | 11 +++++++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/cads_catalogue_api_service/client.py b/cads_catalogue_api_service/client.py index 29ec2df..ad1f4dd 100644 --- a/cads_catalogue_api_service/client.py +++ b/cads_catalogue_api_service/client.py @@ -56,10 +56,10 @@ def get_sorting_clause( def apply_sorting_and_limit( search: sqlalchemy.orm.Query, - q: str, sortby: str, page: int, limit: int, + q: str | None = "", ): """Apply sortby and limit to the running query.""" sorting_clause = get_sorting_clause(cads_catalogue.database.Resource, sortby) @@ -77,7 +77,7 @@ def apply_sorting_and_limit( def get_next_prev_links( - sortby: extensions.CatalogueSortCriterion, + sortby: str, page: int, limit: int, count: int, @@ -91,10 +91,10 @@ def get_next_prev_links( # Next if page * limit + limit < count: # We need a next link, as we have more records to explore - links["next"] = dict(page=page + 1, limit=limit, sortby=sortby.value) + links["next"] = dict(page=page + 1, limit=limit, sortby=sortby) # Prev if page > 0: - links["prev"] = dict(page=page - 1, limit=limit, sortby=sortby.value) + links["prev"] = dict(page=page - 1, limit=limit, sortby=sortby) return links diff --git a/tests/test_10_search_utils.py b/tests/test_10_search_utils.py index f5c02cd..2fe360b 100644 --- a/tests/test_10_search_utils.py +++ b/tests/test_10_search_utils.py @@ -19,6 +19,7 @@ from cads_catalogue_api_service.main import app from cads_catalogue_api_service.search_utils import ( populate_facets, + split_by_category, ) client = fastapi.testclient.TestClient(app) @@ -68,3 +69,13 @@ def test_populate_facets(monkeypatch): {"category": "cat2", "groups": {"kw1": 2}}, ] } + + +def test_split_by_category(): + assert split_by_category(["cat1: kw1", "cat1: kw2", "cat2: kw1"]) == [ + [ + "cat1: kw1", + "cat1: kw2", + ], + ["cat2: kw1"], + ] From 35ff4c5ac0e17b79869baf8efd3877c6c039caed Mon Sep 17 00:00:00 2001 From: Luca Fabbri Date: Thu, 29 Feb 2024 13:17:28 +0100 Subject: [PATCH 6/7] Fixes to link building --- cads_catalogue_api_service/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cads_catalogue_api_service/client.py b/cads_catalogue_api_service/client.py index ad1f4dd..abafb41 100644 --- a/cads_catalogue_api_service/client.py +++ b/cads_catalogue_api_service/client.py @@ -526,7 +526,7 @@ def all_datasets( ] next_prev_links = get_next_prev_links( - sortby=sortby, + sortby=sortby.value, page=page, limit=limit, count=count, From 60ee33eee426344a5dcc41edd2e0d773ad8812cd Mon Sep 17 00:00:00 2001 From: Luca Fabbri Date: Thu, 29 Feb 2024 15:49:48 +0100 Subject: [PATCH 7/7] type fixes --- cads_catalogue_api_service/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cads_catalogue_api_service/client.py b/cads_catalogue_api_service/client.py index abafb41..d35157c 100644 --- a/cads_catalogue_api_service/client.py +++ b/cads_catalogue_api_service/client.py @@ -469,7 +469,7 @@ def all_datasets( request: fastapi.Request, q: str | None = None, kw: list[str] | None = [], - sortby: str = extensions.CatalogueSortCriterion.update_desc, + sortby: extensions.CatalogueSortCriterion = extensions.CatalogueSortCriterion.update_desc, page: int = 0, limit: int = 999, route_name="Get Collections",