From b98f091fda1f0047bf620b028fc086c1fa44377e Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Thu, 23 Nov 2023 09:49:15 -0800 Subject: [PATCH] fix: Fix bugs in the "get tags" REST API, make it more consistent (#119) * fix: Fix bugs in the "get tags" REST API, make it more consistent * fix: shallow search was not returning parent tags with children that match * docs: update the ADR that describes the "get tags" endpoint * fix: don't throw an error when retrieving tags below a leaf tag * feat: return the number of matching children only with ?search_term=... * chore: Version bump --- .../0014-single-taxonomy-view-api.rst | 115 +++------- openedx_learning/__init__.py | 2 +- openedx_tagging/core/tagging/models/base.py | 4 +- .../core/tagging/rest_api/paginators.py | 9 +- .../core/tagging/rest_api/v1/serializers.py | 10 +- .../core/tagging/rest_api/v1/views.py | 76 ++++--- .../openedx_tagging/core/tagging/test_api.py | 34 +-- .../core/tagging/test_models.py | 26 ++- .../core/tagging/test_views.py | 214 ++++++++++++++---- 9 files changed, 305 insertions(+), 185 deletions(-) diff --git a/docs/decisions/0014-single-taxonomy-view-api.rst b/docs/decisions/0014-single-taxonomy-view-api.rst index ddd0de26..c984aa0f 100644 --- a/docs/decisions/0014-single-taxonomy-view-api.rst +++ b/docs/decisions/0014-single-taxonomy-view-api.rst @@ -4,10 +4,7 @@ Context -------- -This view returns tags of a closed taxonomy (for MVP has not been implemented yet -for open taxonomies). It is necessary to make a decision about what structure the tags are going -to have, how the pagination is going to work and how will the search for tags be implemented. -It was taken into account that taxonomies commonly have the following characteristics: +This view returns tags of a taxonomy (works with closed, open, and system-defined). It is necessary to make a decision about what structure the tags are going to have, how the pagination is going to work and how will the search for tags be implemented. It was taken into account that taxonomies commonly have the following characteristics: - It has few root tags. - It may a very large number of children for each tag. @@ -17,16 +14,18 @@ For the decisions, the following use cases were taken into account: - As a taxonomy administrator, I want to see all the tags available for use with a closed taxonomy, so that I can see the taxonomy's structure in the management interface. - - As a taxonomy administrator, I want to see the available tags as a lits of root tags - that can be expanded to show children tags. - - As a taxonomy administrator, I want to sort the list of root tags alphabetically: A-Z (default) and Z-A. - - As a taxonomy administrator, I want to expand all root tags to see all children tags. - - As a taxonomy administrator, I want to search for tags, so I can find root and children tags more easily. + + - As a taxonomy administrator, I want to see the available tags as a list of root tags + that can be expanded to show children tags. + - As a taxonomy administrator, I want to sort the list of root tags alphabetically: A-Z (default) and Z-A. + - As a taxonomy administrator, I want to expand all root tags to see all children tags. + - As a taxonomy administrator, I want to search for tags, so I can find root and children tags more easily. - As a course author, when I am editing the tags of a component, I want to see all the tags available from a particular taxonomy that I can use. - - As a course author, I want to see the available tags as a lits of root tags - that can be expanded to show children tags. - - As a course author, I want to search for tags, so I can find root and children tags more easily. + + - As a course author, I want to see the available tags as a list of root tags + that can be expanded to show children tags. + - As a course author, I want to search for tags, so I can find root and children tags more easily. Excluded use cases: @@ -41,107 +40,59 @@ Decision Views & Pagination ~~~~~~~~~~~~~~~~~~~ -Make one view: +We will have one REST API endpoint that can handle these use cases: -**get_matching_tags(parent_tag_id: str = None, search_term: str = None)** +**/tagging/rest_api/v1/taxonomies/:id/tags/?parent_tag=...** that can handle this cases: -- Get the root tags of the taxonomy. If ``parent_tag_id`` is ``None``. +- Get the root tags of the taxonomy. If ``parent_tag`` is omitted. - Get the children of a tag. Called each time the user expands a parent tag to see its children. - If ``parent_tag_id`` is not ``None``. + In this case, ``parent_tag`` is set to the value of the parent tag. In both cases the results are paginated. In addition to the common pagination metadata, it is necessary to return: - Total number of pages. -- Total number of root/children tags. +- Total number of tags in the result. - Range index of current page, Ex. Page 1: 1-12, Page 2: 13-24. - Total number of children of each tag. The pagination of root tags and child tags are independent. -In order to be able to fulfill the functionality of "Expand-all" in a scalable way, -the following has been agreed: -- Create a ``TAGS_THRESHOLD`` (default: 1000). -- If ``taxonomy.tags.count < TAGS_THRESHOLD``, then ``get_matching_tags()`` will return all tags on the taxonomy, - roots and children. -- Otherwise, ``get_matching_tags()`` will only return paginated root tags, and it will be necessary - to use ``get_matching_tags()`` to return paginated children. Also the "Expand-all" functionality will be disabled. +**Optional full-depth response** -For search you can see the next section (Search tags) +In order to be able to fulfill the functionality of "Expand-all" in a scalable way, and allow users to quickly browse taxonomies that have lots of small branches, the API will accept an optional parameter ``?full_depth_threshold``. If specified (e.g. ``?full_depth_threshold=1000``) and there are fewer results than this threshold, the full tree of tags will be returned a a single giant page, including all tags up to three levels deep. **Pros** -- It is the simplest way. +- This approach is simple and flexible. - Paging both root tags and children mitigates the huge number of tags that can be found in large taxonomies. Search tags ~~~~~~~~~~~~ -Support tag search on the backend. Return a subset of matching tags. -We will use the same view to perform a search with the same logic: - -**get_matching_tags(parent_tag_id: str = None, search_term: str = None)** - -We can use ``search_term`` to perform a search on all taxonomy tags or children tags depending of ``parent_tag_id``. -The result will be a pruned tree with the necessary tags to be able to reach the results from a root tag. -Ex. if in the result there may be a child tag of ``depth=2``, but the parents are not found in the result. -In this case, it is necessary to add the parent and the parent of the parent (root tag) to be able to show -the child tag that is in the result. - -For the search, ``SEARCH_TAGS_THRESHOLD`` will be used. (It is recommended that it be 20% of ``TAGS_THRESHOLD``). -It will work in the following way: - -- If ``search_result.count() < SEARCH_TAGS_THRESHOLD``, then it will return all tags on the result tree without pagination. -- Otherwise, it will return the roots of the result tree with pagination. Each root will have the entire pruned branch. +The same API endpoint will support an optional ``?search_term=...`` parameter to support searching/filtering tags by keyword. -It will work in the same way of ``TAGS_THRESHOLD`` (see Views & Pagination) - -**Pros** - -- It is the most scalable way. +The API endpoint will work exactly as described above (returning a single level of tags by default, paginated, optionally listing only the tags below a specific parent tag, optionally returning a deep tree if the results are small enough) - BUT only tags that match the keyword OR their ancestor tags will be returned. We return their ancestor tags (even if the ancestors themselves don't match the keyword) so that the tree of tags that do match can be displayed correctly. This also allows the UI to load the matching tags one layer at a time, paginated, if desired. Tag representation ~~~~~~~~~~~~~~~~~~~ -Return a list of root tags and within a link to obtain the children tags -or the complete list of children tags depending of ``TAGS_THRESHOLD`` or ``SEARCH_TAGS_THRESHOLD``. -The list of root tags will be ordered alphabetically. If it has child tags, they must also -be ordered alphabetically. - -**(taxonomy.tags.count < *_THRESHOLD)**:: - - { - "count": 100, - "tags": [ - { - "id": "tag_1", - "value": "Tag 1", - "taxonomy_id": "1", - "sub_tags": [ - { - "id": "tag_2", - "value": "Tag 2", - "taxonomy_id": "1", - "sub_tags": [ - (....) - ] - }, - (....) - ] - } +Return a list of root tags and within a link to obtain the children tags or the complete list of children tags depending on the requested ``?full_depth_threshold`` and the number of results. +The list of tags will be ordered in tree order (and alphabetically). If it has child tags, they must also be ordered alphabetically. - -**Otherwise**:: +**Example**:: { "count": 100, "tags": [ { - "id": "tag_1", "value": "Tag 1", - "taxonomy_id": "1", - "sub_tags_link": "http//api-call-to-get-children.com" + "depth": 0, + "external_id": None, + "child_count": 15, + "parent_value": None, + "sub_tags_url": "http//api-call-to-get-children.com" }, (....) ] @@ -155,6 +106,12 @@ be ordered alphabetically. - It is kept as a simple implementation. +Backend Python API +~~~~~~~~~~~~~~~~~~ + +On the backend, a very flexible API is available as ``Taxonomy.get_filtered_tags()`` which can cover all of the same use cases as the REST API endpoint (listing tags, shallow or deep, filtering on search term). However, the Python API returns a ``QuerySet`` of tag data dictionaries, rather than a JSON response. + + Rejected Options ----------------- @@ -199,4 +156,4 @@ can return all the tags in one page. So we can perform the tag search on the fro **Cons:** - It is not scalable. -- Sets limits of tags that can be created in the taxonomy. \ No newline at end of file +- Sets limits of tags that can be created in the taxonomy. diff --git a/openedx_learning/__init__.py b/openedx_learning/__init__.py index d59f3d1e..d458a9e2 100644 --- a/openedx_learning/__init__.py +++ b/openedx_learning/__init__.py @@ -1,4 +1,4 @@ """ Open edX Learning ("Learning Core"). """ -__version__ = "0.3.5" +__version__ = "0.3.6" diff --git a/openedx_tagging/core/tagging/models/base.py b/openedx_tagging/core/tagging/models/base.py index f79bb252..981cffc3 100644 --- a/openedx_tagging/core/tagging/models/base.py +++ b/openedx_tagging/core/tagging/models/base.py @@ -493,13 +493,15 @@ def _get_filtered_tags_deep( if pk is not None: matching_ids.append(pk) qs = qs.filter(pk__in=matching_ids) + qs = qs.annotate(child_count=models.Count("children", filter=Q(children__pk__in=matching_ids))) elif excluded_values: raise NotImplementedError("Using excluded_values without search_term is not currently supported.") # We could implement this in the future but I'd prefer to get rid of the "excluded_values" API altogether. # It remains to be seen if it's useful to do that on the backend, or if we can do it better/simpler on the # frontend. + else: + qs = qs.annotate(child_count=models.Count("children")) - qs = qs.annotate(child_count=models.Count("children")) # Add the "depth" to each tag: qs = Tag.annotate_depth(qs) # Add the "lineage" as a field called "sort_key" to sort them in order correctly: diff --git a/openedx_tagging/core/tagging/rest_api/paginators.py b/openedx_tagging/core/tagging/rest_api/paginators.py index 8848e537..2ebce12d 100644 --- a/openedx_tagging/core/tagging/rest_api/paginators.py +++ b/openedx_tagging/core/tagging/rest_api/paginators.py @@ -5,10 +5,7 @@ from edx_rest_framework_extensions.paginators import DefaultPagination # type: ignore[import] # From this point, the tags begin to be paginated -TAGS_THRESHOLD = 1000 - -# From this point, search tags begin to be paginated -SEARCH_TAGS_THRESHOLD = 200 +MAX_FULL_DEPTH_THRESHOLD = 10_000 class TagsPagination(DefaultPagination): @@ -29,5 +26,5 @@ class DisabledTagsPagination(DefaultPagination): It should be used if the number of tags within the taxonomy does not exceed `TAGS_THRESHOLD`. """ - page_size = TAGS_THRESHOLD - max_page_size = TAGS_THRESHOLD + 1 + page_size = MAX_FULL_DEPTH_THRESHOLD + max_page_size = MAX_FULL_DEPTH_THRESHOLD + 1 diff --git a/openedx_tagging/core/tagging/rest_api/v1/serializers.py b/openedx_tagging/core/tagging/rest_api/v1/serializers.py index 62d4ef3e..ca95c91c 100644 --- a/openedx_tagging/core/tagging/rest_api/v1/serializers.py +++ b/openedx_tagging/core/tagging/rest_api/v1/serializers.py @@ -4,6 +4,7 @@ from __future__ import annotations from typing import Any +from urllib.parse import urlencode from rest_framework import serializers from rest_framework.reverse import reverse @@ -162,12 +163,17 @@ def get_sub_tags_url(self, obj: TagData | Tag): child_count = obj.child_count if isinstance(obj, Tag) else obj["child_count"] if child_count > 0 and "taxonomy_id" in self.context: value = obj.value if isinstance(obj, Tag) else obj["value"] - query_params = f"?parent_tag={value}" request = self.context["request"] + query_params = request.query_params + new_query_params = {"parent_tag": value} + if "full_depth_threshold" in query_params: + new_query_params["full_depth_threshold"] = query_params["full_depth_threshold"] + if "search_term" in query_params: + new_query_params["search_term"] = query_params["search_term"] url_namespace = request.resolver_match.namespace # get the namespace, usually "oel_tagging" url = ( reverse(f"{url_namespace}:taxonomy-tags", args=[str(self.context["taxonomy_id"])]) - + query_params + + "?" + urlencode(new_query_params) ) return request.build_absolute_uri(url) return None diff --git a/openedx_tagging/core/tagging/rest_api/v1/views.py b/openedx_tagging/core/tagging/rest_api/v1/views.py index aa77bf64..591e95e1 100644 --- a/openedx_tagging/core/tagging/rest_api/v1/views.py +++ b/openedx_tagging/core/tagging/rest_api/v1/views.py @@ -30,7 +30,7 @@ from ...import_export.parsers import ParserFormat from ...models import Taxonomy from ...rules import ObjectTagPermissionItem -from ..paginators import TAGS_THRESHOLD, DisabledTagsPagination, TagsPagination +from ..paginators import MAX_FULL_DEPTH_THRESHOLD, DisabledTagsPagination, TagsPagination from .permissions import ObjectTagObjectPermissions, TagObjectPermissions, TaxonomyObjectPermissions from .serializers import ( ObjectTagListQueryParamsSerializer, @@ -528,24 +528,28 @@ class TaxonomyTagsView(ListAPIView, RetrieveUpdateDestroyAPIView): """ View to list/create/update/delete tags of a taxonomy. - If you specify ?root_only or ?parent_tag_value=..., only one "level" of the - hierachy will be returned. Otherwise, several levels will be returned, in - tree order, up to the maximum supported depth. Additional levels/depth can - be retrieved by using ?parent_tag_value to load more data. + **List Request Details** - Note: If the taxonomy is particularly large (> 1,000 tags), ?root_only is - automatically set true by default and cannot be disabled. This way, users - can more easily select which tags they want to expand in the tree, and load - just that subset of the tree as needed. This may be changed in the future. + By default, only one "level" of the hierachy will be returned. But for more efficient handling of small taxonomies, + you can set ?full_depth_threshold=1000 which means that if there are fewer than 1,000 tags in the result set, they + will be returned in a single page of results that has all the tags in tree order up to the maximum supported depth. + + Additional levels/depth can be retrieved recursively by using the "sub_tags_url" field of any returned tag, or + using the ?parent_tag=value parameter manually. **List Query Parameters** * id (required) - The ID of the taxonomy to retrieve tags. + * search_term (optional) - Only return tags matching this term, plus their ancestors. Case insensitive. * parent_tag (optional) - Retrieve children of the tag with this value. - * root_only (optional) - If specified, only root tags are returned. - * include_counts (optional) - Include the count of how many times each - tag has been used. + * full_depth_threshold (optional) - If there are fewer than this many results, return the full (sub)tree of + results, up to maximum supported depth, in a single giant page of results. By default this is disabled + (equivalent to full_depth_threshold=0), which provides a more consistent and predictable response. + Using full_depth_threshold=1000 is recommended in general, but use lower values during development to ensure + compatibility with both large and small result sizes. + * include_counts (optional) - Include the count of how many times each tag has been used. * page (optional) - Page number (default: 1) - * page_size (optional) - Number of items per page (default: 10) + * page_size (optional) - Number of items per page (default: 30). Ignored when there are fewer tags than + specified by ?full_depth_threshold. **List Example Requests** GET api/tagging/v1/taxonomy/:id/tags - Get tags of taxonomy @@ -655,34 +659,42 @@ def get_queryset(self) -> TagDataQuerySet: taxonomy_id = int(self.kwargs.get("pk")) taxonomy = self.get_taxonomy(taxonomy_id) parent_tag_value = self.request.query_params.get("parent_tag", None) - root_only = "root_only" in self.request.query_params include_counts = "include_counts" in self.request.query_params search_term = self.request.query_params.get("search_term", None) - - if parent_tag_value: - # Fetching tags below a certain parent is always paginated and only returns the direct children - depth = 1 - if root_only: - raise ValidationError("?root_only and ?parent_tag cannot be used together") + try: + full_depth_threshold = int(self.request.query_params.get("full_depth_threshold", 0)) + except ValueError: + full_depth_threshold = -1 + if full_depth_threshold < 0 or full_depth_threshold > MAX_FULL_DEPTH_THRESHOLD: + raise ValidationError("Invalid full_depth_threshold") + + if full_depth_threshold or search_term: + # If full_depth_threshold is set, default to maximum depth and later prune to depth=1 if needed. + # We also need to do a deep search if there is a search term, to ensure we return parent items with children + # that match. + depth = None else: - if root_only: - depth = 1 # User Explicitly requested to load only the root tags for now - elif search_term: - depth = None # For search, default to maximum depth but use normal pagination - elif taxonomy.tag_set.count() > TAGS_THRESHOLD: - # This is a very large taxonomy. Only load the root tags at first, so users can choose what to load. - depth = 1 - else: - # We can load and display all the tags in the taxonomy at once: - self.pagination_class = DisabledTagsPagination - depth = None # Maximum depth + depth = 1 # Otherwise (default), load a single level of results only. - return taxonomy.get_filtered_tags( + results = taxonomy.get_filtered_tags( parent_tag_value=parent_tag_value, search_term=search_term, depth=depth, include_counts=include_counts, ) + if depth == 1: + # We're already returning just a single level. It will be paginated normally. + return results + elif full_depth_threshold and len(results) < full_depth_threshold: + # We can load and display all the tags in this (sub)tree at once: + self.pagination_class = DisabledTagsPagination + return results + else: + # We had to do a deep query, but we will only return one level of results. + # This is because the user did not request a deep response (via full_depth_threshold) or the result was too + # large (larger than the threshold). + # It will be paginated normally. + return results.filter(parent_value=parent_tag_value) def post(self, request, *args, **kwargs): """ diff --git a/tests/openedx_tagging/core/tagging/test_api.py b/tests/openedx_tagging/core/tagging/test_api.py index 4173c5a1..5ad7e3fa 100644 --- a/tests/openedx_tagging/core/tagging/test_api.py +++ b/tests/openedx_tagging/core/tagging/test_api.py @@ -193,11 +193,11 @@ def test_get_root_language_tags(self): def test_search_tags(self) -> None: result = tagging_api.search_tags(self.taxonomy, search_term='eU') assert pretty_format_tags(result, parent=False) == [ - 'Archaea (children: 3)', # Doesn't match 'eU' but is included because a child is included + 'Archaea (children: 1)', # Doesn't match 'eU' but is included because a child is included ' Euryarchaeida (children: 0)', - 'Bacteria (children: 2)', # Doesn't match 'eU' but is included because a child is included + 'Bacteria (children: 1)', # Doesn't match 'eU' but is included because a child is included ' Eubacteria (children: 0)', - 'Eukaryota (children: 5)', + 'Eukaryota (children: 0)', ] @override_settings(LANGUAGES=test_languages) @@ -603,30 +603,30 @@ def get_object_tags(): @ddt.data( ("ChA", [ - "Archaea (used: 1, children: 3)", + "Archaea (used: 1, children: 2)", " Euryarchaeida (used: 0, children: 0)", " Proteoarchaeota (used: 0, children: 0)", - "Bacteria (used: 0, children: 2)", # does not contain "cha" but a child does + "Bacteria (used: 0, children: 1)", # does not contain "cha" but a child does " Archaebacteria (used: 1, children: 0)", ]), ("ar", [ - "Archaea (used: 1, children: 3)", + "Archaea (used: 1, children: 2)", " Euryarchaeida (used: 0, children: 0)", " Proteoarchaeota (used: 0, children: 0)", - "Bacteria (used: 0, children: 2)", # does not contain "ar" but a child does + "Bacteria (used: 0, children: 1)", # does not contain "ar" but a child does " Archaebacteria (used: 1, children: 0)", - "Eukaryota (used: 0, children: 5)", - " Animalia (used: 1, children: 7)", # does not contain "ar" but a child does + "Eukaryota (used: 0, children: 1)", + " Animalia (used: 1, children: 2)", # does not contain "ar" but a child does " Arthropoda (used: 1, children: 0)", " Cnidaria (used: 0, children: 0)", ]), ("aE", [ - "Archaea (used: 1, children: 3)", + "Archaea (used: 1, children: 2)", " Euryarchaeida (used: 0, children: 0)", " Proteoarchaeota (used: 0, children: 0)", - "Bacteria (used: 0, children: 2)", # does not contain "ae" but a child does + "Bacteria (used: 0, children: 1)", # does not contain "ae" but a child does " Archaebacteria (used: 1, children: 0)", - "Eukaryota (used: 0, children: 5)", # does not contain "ae" but a child does + "Eukaryota (used: 0, children: 1)", # does not contain "ae" but a child does " Plantae (used: 1, children: 0)", ]), ("a", [ @@ -637,11 +637,11 @@ def get_object_tags(): "Bacteria (used: 0, children: 2)", " Archaebacteria (used: 1, children: 0)", " Eubacteria (used: 0, children: 0)", - "Eukaryota (used: 0, children: 5)", + "Eukaryota (used: 0, children: 4)", " Animalia (used: 1, children: 7)", " Arthropoda (used: 1, children: 0)", - " Chordata (used: 0, children: 1)", - " Cnidaria (used: 0, children: 0)", + " Chordata (used: 0, children: 0)", # <<< Chordata has a matching child but we only support searching + " Cnidaria (used: 0, children: 0)", # 3 levels deep at once for now. " Ctenophora (used: 0, children: 0)", " Gastrotrich (used: 1, children: 0)", " Placozoa (used: 1, children: 0)", @@ -678,11 +678,11 @@ def test_autocomplete_tags_closed_omit_object(self) -> None: tagging_api.tag_object(object_id=object_id, taxonomy=self.taxonomy, tags=["Archaebacteria"]) result = tagging_api.search_tags(self.taxonomy, "ChA", exclude_object_id=object_id) assert pretty_format_tags(result, parent=False) == [ - "Archaea (children: 3)", + "Archaea (children: 2)", " Euryarchaeida (children: 0)", " Proteoarchaeota (children: 0)", # These results are no longer included because of exclude_object_id: - # "Bacteria (children: 2)", # does not contain "cha" but a child does + # "Bacteria (children: 1)", # does not contain "cha" but a child does # " Archaebacteria (children: 0)", ] diff --git a/tests/openedx_tagging/core/tagging/test_models.py b/tests/openedx_tagging/core/tagging/test_models.py index 367b735b..c730b16b 100644 --- a/tests/openedx_tagging/core/tagging/test_models.py +++ b/tests/openedx_tagging/core/tagging/test_models.py @@ -417,10 +417,10 @@ def test_search(self) -> None: """ result = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="ARCH")) assert result == [ - "Archaea (None) (children: 3)", # Matches the value of this root tag, ARCHaea + "Archaea (None) (children: 2)", # Matches the value of this root tag, ARCHaea " Euryarchaeida (Archaea) (children: 0)", # Matches the value of this child tag " Proteoarchaeota (Archaea) (children: 0)", # Matches the value of this child tag - "Bacteria (None) (children: 2)", # Does not match this tag but matches a descendant: + "Bacteria (None) (children: 1)", # Does not match this tag but matches a descendant: " Archaebacteria (Bacteria) (children: 0)", # Matches the value of this child tag ] @@ -431,9 +431,25 @@ def test_search_2(self) -> None: """ result = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="chordata")) assert result == [ - "Eukaryota (None) (children: 5)", - " Animalia (Eukaryota) (children: 7)", - " Chordata (Animalia) (children: 1)", # this is the matching tag. + "Eukaryota (None) (children: 1)", # Has one child that matches + " Animalia (Eukaryota) (children: 1)", + " Chordata (Animalia) (children: 0)", # this is the matching tag. + ] + + def test_search_3(self) -> None: + """ + Another search test, that matches a tag deeper in the taxonomy to check + that the correct child_count is returned by the search. + """ + result = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="RO")) + assert result == [ + "Archaea (None) (children: 1)", + " Proteoarchaeota (Archaea) (children: 0)", + "Eukaryota (None) (children: 2)", # Note the "children: 2" is correct - 2 direct children are in the result + " Animalia (Eukaryota) (children: 2)", + " Arthropoda (Animalia) (children: 0)", # match + " Gastrotrich (Animalia) (children: 0)", # match + " Protista (Eukaryota) (children: 0)", # match ] def test_tags_deep(self) -> None: diff --git a/tests/openedx_tagging/core/tagging/test_views.py b/tests/openedx_tagging/core/tagging/test_views.py index 882de3df..f3f88208 100644 --- a/tests/openedx_tagging/core/tagging/test_views.py +++ b/tests/openedx_tagging/core/tagging/test_views.py @@ -4,7 +4,7 @@ from __future__ import annotations import json -from urllib.parse import parse_qs, quote, quote_plus, urlparse +from urllib.parse import parse_qs, quote_plus, urlparse import ddt # type: ignore[import] # typing support in rules depends on https://github.com/dfunckt/django-rules/pull/177 @@ -1105,7 +1105,7 @@ def test_small_taxonomy_root(self): Test explicitly requesting only the root tags of a small taxonomy. """ self.client.force_authenticate(user=self.staff) - response = self.client.get(self.small_taxonomy_url + "?root_only&include_counts") + response = self.client.get(self.small_taxonomy_url + "?include_counts") assert response.status_code == status.HTTP_200_OK data = response.data @@ -1142,7 +1142,7 @@ def test_small_taxonomy(self): Test loading all the tags of a small taxonomy at once. """ self.client.force_authenticate(user=self.staff) - response = self.client.get(self.small_taxonomy_url) + response = self.client.get(self.small_taxonomy_url + "?full_depth_threshold=1000") assert response.status_code == status.HTTP_200_OK data = response.data @@ -1182,22 +1182,20 @@ def test_small_taxonomy_paged(self): Test loading only the first few of the tags of a small taxonomy. """ self.client.force_authenticate(user=self.staff) - response = self.client.get(self.small_taxonomy_url + "?page_size=5") + response = self.client.get(self.small_taxonomy_url + "?page_size=2") assert response.status_code == status.HTTP_200_OK data = response.data + # When pagination is active, we only load a single "layer" at a time: assert pretty_format_tags(data["results"]) == [ "Archaea (None) (children: 3)", - " DPANN (Archaea) (children: 0)", - " Euryarchaeida (Archaea) (children: 0)", - " Proteoarchaeota (Archaea) (children: 0)", "Bacteria (None) (children: 2)", ] # Checking pagination values assert data.get("next") is not None assert data.get("previous") is None - assert data.get("count") == 20 - assert data.get("num_pages") == 4 + assert data.get("count") == 3 + assert data.get("num_pages") == 2 assert data.get("current_page") == 1 # Get the next page: @@ -1205,11 +1203,7 @@ def test_small_taxonomy_paged(self): assert next_response.status_code == status.HTTP_200_OK next_data = next_response.data assert pretty_format_tags(next_data["results"]) == [ - " Archaebacteria (Bacteria) (children: 0)", - " Eubacteria (Bacteria) (children: 0)", "Eukaryota (None) (children: 5)", - " Animalia (Eukaryota) (children: 7)", - " Arthropoda (Animalia) (children: 0)", ] assert next_data.get("current_page") == 2 @@ -1218,18 +1212,18 @@ def test_small_search(self): Test performing a search """ search_term = 'eU' - url = f"{self.small_taxonomy_url}?search_term={search_term}" + url = f"{self.small_taxonomy_url}?search_term={search_term}&full_depth_threshold=100" self.client.force_authenticate(user=self.staff) response = self.client.get(url) assert response.status_code == status.HTTP_200_OK data = response.data assert pretty_format_tags(data["results"], parent=False) == [ - "Archaea (children: 3)", # No match in this tag, but a child matches so it's included + "Archaea (children: 1)", # No match in this tag, but a child matches so it's included " Euryarchaeida (children: 0)", - "Bacteria (children: 2)", # No match in this tag, but a child matches so it's included + "Bacteria (children: 1)", # No match in this tag, but a child matches so it's included " Eubacteria (children: 0)", - "Eukaryota (children: 5)", + "Eukaryota (children: 0)", ] # Checking pagination values @@ -1239,6 +1233,61 @@ def test_small_search(self): assert data.get("num_pages") == 1 assert data.get("current_page") == 1 + def test_small_search_shallow(self): + """ + Test performing a search without full_depth_threshold + """ + search_term = 'eU' + url = f"{self.small_taxonomy_url}?search_term={search_term}" + self.client.force_authenticate(user=self.staff) + response = self.client.get(url) + assert response.status_code == status.HTTP_200_OK + + data = response.data + assert pretty_format_tags(data["results"], parent=False) == [ + "Archaea (children: 1)", # No match in this tag, but a child matches so it's included + "Bacteria (children: 1)", # No match in this tag, but a child matches so it's included + "Eukaryota (children: 0)", + ] + + # Checking pagination values + assert data.get("next") is None + assert data.get("previous") is None + assert data.get("count") == 3 + assert data.get("num_pages") == 1 + assert data.get("current_page") == 1 + + # And we can load the sub_tags_url to "drill down" into the search: + sub_tags_response = self.client.get(data["results"][0]["sub_tags_url"]) + assert sub_tags_response.status_code == status.HTTP_200_OK + + assert pretty_format_tags(sub_tags_response.data["results"], parent=False) == [ + # This tag maches our search results and is a child of the previously returned Archaea tag: + " Euryarchaeida (children: 0)", + ] + + def test_empty_results(self): + """ + Test that various queries return an empty list + """ + self.client.force_authenticate(user=self.staff) + + def assert_empty(url): + response = self.client.get(url) + assert response.status_code == status.HTTP_200_OK + assert response.data["results"] == [] + assert response.data["count"] == 0 + + # Search terms that won't match any tags: + assert_empty(f"{self.small_taxonomy_url}?search_term=foobar") + assert_empty(f"{self.small_taxonomy_url}?search_term=foobar&full_depth_threshold=1000") + # Requesting children of leaf tags is always an empty result. + # Prior versions of the code would sometimes throw an exception when trying to handle these. + assert_empty(f"{self.small_taxonomy_url}?parent_tag=Fungi") + assert_empty(f"{self.small_taxonomy_url}?parent_tag=Fungi&full_depth_threshold=1000") + assert_empty(f"{self.small_taxonomy_url}?search_term=eu&parent_tag=Euryarchaeida") + assert_empty(f"{self.small_taxonomy_url}?search_term=eu&parent_tag=Euryarchaeida&full_depth_threshold=1000") + def test_large_taxonomy(self): """ Test listing the tags in a large taxonomy (~7,000 tags). @@ -1251,8 +1300,6 @@ def test_large_taxonomy(self): data = response.data results = data["results"] - # Even though we didn't specify root_only, only the root tags will have - # been returned, because of the taxonomy's size. assert pretty_format_tags(results) == [ "Tag 0 (None) (used: 0, children: 12)", "Tag 1099 (None) (used: 0, children: 12)", @@ -1276,7 +1323,7 @@ def test_large_taxonomy(self): assert results[0].get("sub_tags_url") == ( "http://testserver/tagging/" f"rest_api/v1/taxonomies/{self.large_taxonomy.id}" - f"/tags/?parent_tag={quote(results[0]['value'])}" + f"/tags/?parent_tag={quote_plus(results[0]['value'])}" ) # Checking pagination values @@ -1320,44 +1367,91 @@ def test_large_search(self): Test searching in a large taxonomy """ self._build_large_taxonomy() - search_term = '11' - url = f"{self.large_taxonomy_url}?search_term={search_term}" self.client.force_authenticate(user=self.staff) - response = self.client.get(url) + + # Perform the search with full_depth_threshold=1000, which will give us the full tree of results, since + # there are less than 1000 matches: + search_term = '11' + response = self.client.get(f"{self.large_taxonomy_url}?search_term={search_term}&full_depth_threshold=1000") assert response.status_code == status.HTTP_200_OK data = response.data results = data["results"] - assert pretty_format_tags(results) == [ - "Tag 0 (None) (children: 12)", # First 2 results don't match but have children that match - " Tag 1 (Tag 0) (children: 12)", + assert pretty_format_tags(results)[:20] == [ + "Tag 0 (None) (children: 3)", # First 2 results don't match but have children that match + # Note the count here ---^ is not the total number of matching descendants, just the number of children + # once we filter the tree to include only matches and their ancestors. + " Tag 1 (Tag 0) (children: 1)", " Tag 11 (Tag 1) (children: 0)", - " Tag 105 (Tag 0) (children: 12)", # Non-match but children match + " Tag 105 (Tag 0) (children: 8)", # Non-match but children match " Tag 110 (Tag 105) (children: 0)", " Tag 111 (Tag 105) (children: 0)", " Tag 112 (Tag 105) (children: 0)", " Tag 113 (Tag 105) (children: 0)", " Tag 114 (Tag 105) (children: 0)", " Tag 115 (Tag 105) (children: 0)", - ] - assert data.get("count") == 362 - assert data.get("num_pages") == 37 - assert data.get("current_page") == 1 - # Get the next page: - next_response = self.client.get(response.data.get("next")) - assert next_response.status_code == status.HTTP_200_OK - assert pretty_format_tags(next_response.data["results"]) == [ " Tag 116 (Tag 105) (children: 0)", " Tag 117 (Tag 105) (children: 0)", - " Tag 118 (Tag 0) (children: 12)", + " Tag 118 (Tag 0) (children: 1)", " Tag 119 (Tag 118) (children: 0)", - "Tag 1099 (None) (children: 12)", + "Tag 1099 (None) (children: 9)", " Tag 1100 (Tag 1099) (children: 12)", " Tag 1101 (Tag 1100) (children: 0)", " Tag 1102 (Tag 1100) (children: 0)", " Tag 1103 (Tag 1100) (children: 0)", " Tag 1104 (Tag 1100) (children: 0)", ] + expected_num_results = 362 + assert data.get("count") == expected_num_results + assert len(results) == expected_num_results + assert data.get("num_pages") == 1 + assert data.get("current_page") == 1 + + # Now, perform the search with full_depth_threshold=100, which will give us paginated results, since there are + # more than 100 matches: + response2 = self.client.get(f"{self.large_taxonomy_url}?search_term={search_term}&full_depth_threshold=100") + assert response2.status_code == status.HTTP_200_OK + + data2 = response2.data + results2 = data2["results"] + assert pretty_format_tags(results2) == [ + # Notice that none of these root tags directly match the search query, but their children/grandchildren do + "Tag 0 (None) (children: 3)", + "Tag 1099 (None) (children: 9)", + "Tag 1256 (None) (children: 2)", + "Tag 1413 (None) (children: 1)", + "Tag 157 (None) (children: 2)", + "Tag 1570 (None) (children: 2)", + "Tag 1727 (None) (children: 1)", + "Tag 1884 (None) (children: 2)", + "Tag 2041 (None) (children: 1)", + "Tag 2198 (None) (children: 2)", + ] + assert data2.get("count") == 51 + assert data2.get("num_pages") == 6 + assert data2.get("current_page") == 1 + + # Now load the results that are in the subtree of the root tag 'Tag 0' + tag_0_subtags_url = results2[0]["sub_tags_url"] + assert "full_depth_threshold=100" in tag_0_subtags_url + response3 = self.client.get(tag_0_subtags_url) + data3 = response3.data + # Now the number of results is below our threshold (100), so the subtree gets returned as a single page: + assert pretty_format_tags(data3["results"]) == [ + " Tag 1 (Tag 0) (children: 1)", # Non-match but children match + " Tag 11 (Tag 1) (children: 0)", # Matches '11' + " Tag 105 (Tag 0) (children: 8)", # Non-match but children match + " Tag 110 (Tag 105) (children: 0)", # Matches '11' + " Tag 111 (Tag 105) (children: 0)", + " Tag 112 (Tag 105) (children: 0)", + " Tag 113 (Tag 105) (children: 0)", + " Tag 114 (Tag 105) (children: 0)", + " Tag 115 (Tag 105) (children: 0)", + " Tag 116 (Tag 105) (children: 0)", + " Tag 117 (Tag 105) (children: 0)", + " Tag 118 (Tag 0) (children: 1)", + " Tag 119 (Tag 118) (children: 0)", + ] def test_get_children(self): self._build_large_taxonomy() @@ -1385,7 +1479,7 @@ def test_get_children(self): assert results[0].get("sub_tags_url") == ( "http://testserver/tagging/" f"rest_api/v1/taxonomies/{self.large_taxonomy.id}" - f"/tags/?parent_tag={quote(tag.value)}" + f"/tags/?parent_tag={quote_plus(tag.value)}" ) # Checking pagination values @@ -1400,23 +1494,59 @@ def test_get_children(self): assert data.get("current_page") == 1 def test_get_leaves(self): - # Get tags depth=2 + """ + Test getting the tags at depth=2, using "full_depth_threshold=1000" to + load the whole subtree. + """ + # Get tags at depth=2 self.client.force_authenticate(user=self.staff) parent_tag = Tag.objects.get(value="Animalia") # Build url to get tags depth=2 - url = f"{self.small_taxonomy_url}?parent_tag={parent_tag.value}" + url = f"{self.small_taxonomy_url}?parent_tag={parent_tag.value}&full_depth_threshold=1000" response = self.client.get(url) results = response.data["results"] - # Even though we didn't specify root_only, only the root tags will have - # been returned, because of the taxonomy's size. + # Because the result is small, the result includes the complete tree below this one. assert pretty_format_tags(results) == [ " Arthropoda (Animalia) (children: 0)", " Chordata (Animalia) (children: 1)", + " Mammalia (Chordata) (children: 0)", + " Cnidaria (Animalia) (children: 0)", + " Ctenophora (Animalia) (children: 0)", + " Gastrotrich (Animalia) (children: 0)", + " Placozoa (Animalia) (children: 0)", + " Porifera (Animalia) (children: 0)", + ] + assert response.data.get("next") is None + + def test_get_leaves_paginated(self): + """ + Test getting depth=2 entries, disabling the feature to return the whole + subtree if the result is small enough. + """ + # Get tags at depth=2 + self.client.force_authenticate(user=self.staff) + parent_tag = Tag.objects.get(value="Animalia") + + # Build url to get tags depth=2 + url = f"{self.small_taxonomy_url}?parent_tag={parent_tag.value}&page_size=5" + response = self.client.get(url) + results = response.data["results"] + + # Because the result is small, the result includes the complete tree below this one. + assert pretty_format_tags(results) == [ + " Arthropoda (Animalia) (children: 0)", + " Chordata (Animalia) (children: 1)", # Note the child is not included " Cnidaria (Animalia) (children: 0)", " Ctenophora (Animalia) (children: 0)", " Gastrotrich (Animalia) (children: 0)", + ] + next_url = response.data.get("next") + assert next_url is not None + response2 = self.client.get(next_url) + results2 = response2.data["results"] + assert pretty_format_tags(results2) == [ " Placozoa (Animalia) (children: 0)", " Porifera (Animalia) (children: 0)", ]