diff --git a/openedx_tagging/core/tagging/api.py b/openedx_tagging/core/tagging/api.py index 64d5c6a8..9cde39a4 100644 --- a/openedx_tagging/core/tagging/api.py +++ b/openedx_tagging/core/tagging/api.py @@ -21,7 +21,7 @@ from .data import TagDataQuerySet from .models import ObjectTag, Tag, Taxonomy -from .models.utils import ConcatNull +from .models.utils import ConcatNull, StringAgg # Export this as part of the API TagDoesNotExist = Tag.DoesNotExist @@ -196,7 +196,7 @@ def get_object_tags( return tags -def get_object_tag_counts(object_id_pattern: str) -> dict[str, int]: +def get_object_tag_counts(object_id_pattern: str, count_implicit=False) -> dict[str, int]: """ Given an object ID, a "starts with" glob pattern like "course-v1:foo+bar+baz@*", or a list of "comma,separated,IDs", return a @@ -217,8 +217,36 @@ def get_object_tag_counts(object_id_pattern: str) -> dict[str, int]: qs = qs.exclude(taxonomy_id=None) # The whole taxonomy was deleted qs = qs.exclude(taxonomy__enabled=False) # The whole taxonomy is disabled qs = qs.exclude(tag_id=None, taxonomy__allow_free_text=False) # The taxonomy exists but the tag is deleted - qs = qs.values("object_id").annotate(num_tags=models.Count("id")).order_by("object_id") - return {row["object_id"]: row["num_tags"] for row in qs} + if count_implicit: + # Counting the implicit tags is tricky, because if two "grandchild" tags have the same implicit parent tag, we + # need to count that parent tag only once. To do that, we collect all the ancestor tag IDs into an aggregate + # string, and then count the unique values using python + qs = qs.values("object_id").annotate( + num_tags=models.Count("id"), + tag_ids_str_1=StringAgg("tag_id"), + tag_ids_str_2=StringAgg("tag__parent_id"), + tag_ids_str_3=StringAgg("tag__parent__parent_id"), + tag_ids_str_4=StringAgg("tag__parent__parent__parent_id"), + ).order_by("object_id") + result = {} + for row in qs: + # ObjectTags for free text taxonomies will be included in "num_tags" count, but not "tag_ids_str_1" since + # they have no tag ID. We can compute how many free text tags each object has now: + if row["tag_ids_str_1"]: + num_free_text_tags = row["num_tags"] - len(row["tag_ids_str_1"].split(",")) + else: + num_free_text_tags = row["num_tags"] + # Then we count the total number of *unique* Tags for this object, both implicit and explicit: + other_tag_ids = set() + for field in ("tag_ids_str_1", "tag_ids_str_2", "tag_ids_str_3", "tag_ids_str_4"): + if row[field] is not None: + for tag_id in row[field].split(","): + other_tag_ids.add(int(tag_id)) + result[row["object_id"]] = num_free_text_tags + len(other_tag_ids) + return result + else: + qs = qs.values("object_id").annotate(num_tags=models.Count("id")).order_by("object_id") + return {row["object_id"]: row["num_tags"] for row in qs} def delete_object_tags(object_id: str): diff --git a/openedx_tagging/core/tagging/models/utils.py b/openedx_tagging/core/tagging/models/utils.py index 1f7dcb92..c2e2201e 100644 --- a/openedx_tagging/core/tagging/models/utils.py +++ b/openedx_tagging/core/tagging/models/utils.py @@ -1,7 +1,7 @@ """ Utilities for tagging and taxonomy models """ - +from django.db.models import Aggregate, CharField from django.db.models.expressions import Func @@ -22,3 +22,23 @@ def as_sqlite(self, compiler, connection, **extra_context): arg_joiner=" || ", **extra_context, ) + + +class StringAgg(Aggregate): # pylint: disable=abstract-method + """ + Aggregate function that collects the values of some column across all rows, + and creates a string by concatenating those values, with "," as a separator. + + This is the same as Django's django.contrib.postgres.aggregates.StringAgg, + but this version works with MySQL and SQLite. + """ + function = 'GROUP_CONCAT' + template = '%(function)s(%(distinct)s%(expressions)s)' + + def __init__(self, expression, distinct=False, **extra): + super().__init__( + expression, + distinct='DISTINCT ' if distinct else '', + output_field=CharField(), + **extra, + ) diff --git a/openedx_tagging/core/tagging/rest_api/v1/views.py b/openedx_tagging/core/tagging/rest_api/v1/views.py index 591e95e1..99b6a59b 100644 --- a/openedx_tagging/core/tagging/rest_api/v1/views.py +++ b/openedx_tagging/core/tagging/rest_api/v1/views.py @@ -496,9 +496,11 @@ class ObjectTagCountsView( **Retrieve Parameters** * object_id_pattern (required): - The Object ID to retrieve ObjectTags for. Can contain '*' at the end for wildcard matching, or use ',' to separate multiple object IDs. + * count_implicit (optional): If present, implicit parent/grandparent tags will be included in the counts **Retrieve Example Requests** GET api/tagging/v1/object_tag_counts/:object_id_pattern + GET api/tagging/v1/object_tag_counts/:object_id_pattern?count_implicit **Retrieve Query Returns** * 200 - Success @@ -517,8 +519,9 @@ def retrieve(self, request, *args, **kwargs) -> Response: """ # This API does NOT bother doing any permission checks as the # of tags is not considered sensitive information. object_id_pattern = self.kwargs["object_id_pattern"] + count_implicit = "count_implicit" in request.query_params try: - return Response(get_object_tag_counts(object_id_pattern)) + return Response(get_object_tag_counts(object_id_pattern, count_implicit=count_implicit)) except ValueError as err: raise ValidationError(err.args[0]) from err diff --git a/tests/openedx_tagging/core/tagging/test_api.py b/tests/openedx_tagging/core/tagging/test_api.py index 5ad7e3fa..5befef5c 100644 --- a/tests/openedx_tagging/core/tagging/test_api.py +++ b/tests/openedx_tagging/core/tagging/test_api.py @@ -704,6 +704,36 @@ def test_get_object_tag_counts(self) -> None: assert tagging_api.get_object_tag_counts(f"{obj1},{obj2}") == {obj1: 1, obj2: 2} assert tagging_api.get_object_tag_counts("object_*") == {obj1: 1, obj2: 2} + def test_get_object_tag_counts_implicit(self) -> None: + """ + Basic test of get_object_tag_counts, including implicit (parent) tags + + Note that: + - "DPANN" is "Archaea > DPANN" (2 tags, 1 implicit), and + - "Chordata" is "Eukaryota > Animalia > Chordata" (3 tags, 2 implicit) + - "Arthropoda" is "Eukaryota > Animalia > Arthropoda" (same) + """ + self.taxonomy.allow_multiple = True + self.taxonomy.save() + obj1, obj2, obj3 = "object_id1", "object_id2", "object_id3" + other = "other_object" + # Give each object 1-2 tags: + tagging_api.tag_object(object_id=obj1, taxonomy=self.taxonomy, tags=["DPANN"]) + tagging_api.tag_object(object_id=obj2, taxonomy=self.taxonomy, tags=["Chordata"]) + tagging_api.tag_object(object_id=obj2, taxonomy=self.free_text_taxonomy, tags=["has a notochord"]) + tagging_api.tag_object(object_id=obj3, taxonomy=self.taxonomy, tags=["Chordata", "Arthropoda"]) + tagging_api.tag_object(object_id=other, taxonomy=self.free_text_taxonomy, tags=["other"]) + + assert tagging_api.get_object_tag_counts(obj1, count_implicit=True) == {obj1: 2} + assert tagging_api.get_object_tag_counts(obj2, count_implicit=True) == {obj2: 4} + assert tagging_api.get_object_tag_counts(f"{obj1},{obj2}", count_implicit=True) == {obj1: 2, obj2: 4} + assert tagging_api.get_object_tag_counts("object_*", count_implicit=True) == { + obj1: 2, + obj2: 4, + obj3: 4, # obj3 has 2 explicit tags and 2 implicit tags (not 4 because the implicit tags are the same) + } + assert tagging_api.get_object_tag_counts(other, count_implicit=True) == {other: 1} + def test_get_object_tag_counts_deleted_disabled(self) -> None: """ Test that get_object_tag_counts doesn't "count" disabled taxonomies or @@ -726,6 +756,9 @@ def test_get_object_tag_counts_deleted_disabled(self) -> None: self.free_text_taxonomy.enabled = False self.free_text_taxonomy.save() assert tagging_api.get_object_tag_counts("object_*") == {obj1: 1, obj2: 1} + # Also check the result with count_implicit: + # "English" has no implicit tags but "Chordata" has two, so we expect these totals: + assert tagging_api.get_object_tag_counts("object_*", count_implicit=True) == {obj1: 1, obj2: 3} # But, by the way, if we re-enable the taxonomy and restore the tag, the counts return: self.free_text_taxonomy.enabled = True diff --git a/tests/openedx_tagging/core/tagging/test_views.py b/tests/openedx_tagging/core/tagging/test_views.py index 00151f30..ae1eef4f 100644 --- a/tests/openedx_tagging/core/tagging/test_views.py +++ b/tests/openedx_tagging/core/tagging/test_views.py @@ -1028,10 +1028,14 @@ def test_get_counts(self): # Course 7 Unit 2 api.tag_object(object_id="course07-unit02-problem01", taxonomy=self.free_text_taxonomy, tags=["b"]) api.tag_object(object_id="course07-unit02-problem02", taxonomy=self.free_text_taxonomy, tags=["c", "d"]) - api.tag_object(object_id="course07-unit02-problem03", taxonomy=self.free_text_taxonomy, tags=["N", "M", "x"]) - - def check(object_id_pattern: str): - result = self.client.get(OBJECT_TAG_COUNTS_URL.format(object_id_pattern=object_id_pattern)) + api.tag_object(object_id="course07-unit02-problem03", taxonomy=self.free_text_taxonomy, tags=["N", "M"]) + api.tag_object(object_id="course07-unit02-problem03", taxonomy=self.taxonomy, tags=["Mammalia"]) + + def check(object_id_pattern: str, count_implicit=False): + url = OBJECT_TAG_COUNTS_URL.format(object_id_pattern=object_id_pattern) + if count_implicit: + url += "?count_implicit" + result = self.client.get(url) assert result.status_code == status.HTTP_200_OK return result.data @@ -1044,6 +1048,20 @@ def check(object_id_pattern: str): "course07-unit01-problem01": 3, "course07-unit01-problem02": 2, } + with self.assertNumQueries(1): + assert check(object_id_pattern="course07-unit02-*") == { + "course07-unit02-problem01": 1, + "course07-unit02-problem02": 2, + "course07-unit02-problem03": 3, + } + with self.assertNumQueries(1): + assert check(object_id_pattern="course07-unit02-*", count_implicit=True) == { + "course07-unit02-problem01": 1, + "course07-unit02-problem02": 2, + # "Mammalia" includes 1 explicit + 3 implicit tags: "Eukaryota > Animalia > Chordata > Mammalia" + # so problem03 has 2 free text tags and "4" life on earth tags: + "course07-unit02-problem03": 6, + } with self.assertNumQueries(1): assert check(object_id_pattern="course07-unit*") == { "course07-unit01-problem01": 3,