Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Count implicit tags #133

Merged
merged 2 commits into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 32 additions & 4 deletions openedx_tagging/core/tagging/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from .data import TagDataQuerySet
from .models import ObjectTag, Tag, Taxonomy
from .models.utils import ConcatNull
from .models.utils import ConcatNull, StringAgg

# Export this as part of the API
TagDoesNotExist = Tag.DoesNotExist
Expand Down Expand Up @@ -196,7 +196,7 @@ def get_object_tags(
return tags


def get_object_tag_counts(object_id_pattern: str) -> dict[str, int]:
def get_object_tag_counts(object_id_pattern: str, count_implicit=False) -> dict[str, int]:
"""
Given an object ID, a "starts with" glob pattern like
"course-v1:foo+bar+baz@*", or a list of "comma,separated,IDs", return a
Expand All @@ -217,8 +217,36 @@ def get_object_tag_counts(object_id_pattern: str) -> dict[str, int]:
qs = qs.exclude(taxonomy_id=None) # The whole taxonomy was deleted
qs = qs.exclude(taxonomy__enabled=False) # The whole taxonomy is disabled
qs = qs.exclude(tag_id=None, taxonomy__allow_free_text=False) # The taxonomy exists but the tag is deleted
qs = qs.values("object_id").annotate(num_tags=models.Count("id")).order_by("object_id")
return {row["object_id"]: row["num_tags"] for row in qs}
if count_implicit:
# Counting the implicit tags is tricky, because if two "grandchild" tags have the same implicit parent tag, we
# need to count that parent tag only once. To do that, we collect all the ancestor tag IDs into an aggregate
# string, and then count the unique values using python
qs = qs.values("object_id").annotate(
num_tags=models.Count("id"),
tag_ids_str_1=StringAgg("tag_id"),
tag_ids_str_2=StringAgg("tag__parent_id"),
tag_ids_str_3=StringAgg("tag__parent__parent_id"),
tag_ids_str_4=StringAgg("tag__parent__parent__parent_id"),
).order_by("object_id")
result = {}
for row in qs:
# ObjectTags for free text taxonomies will be included in "num_tags" count, but not "tag_ids_str_1" since
# they have no tag ID. We can compute how many free text tags each object has now:
if row["tag_ids_str_1"]:
num_free_text_tags = row["num_tags"] - len(row["tag_ids_str_1"].split(","))
else:
num_free_text_tags = row["num_tags"]
# Then we count the total number of *unique* Tags for this object, both implicit and explicit:
other_tag_ids = set()
for field in ("tag_ids_str_1", "tag_ids_str_2", "tag_ids_str_3", "tag_ids_str_4"):
if row[field] is not None:
for tag_id in row[field].split(","):
other_tag_ids.add(int(tag_id))
result[row["object_id"]] = num_free_text_tags + len(other_tag_ids)
return result
else:
qs = qs.values("object_id").annotate(num_tags=models.Count("id")).order_by("object_id")
return {row["object_id"]: row["num_tags"] for row in qs}


def delete_object_tags(object_id: str):
Expand Down
22 changes: 21 additions & 1 deletion openedx_tagging/core/tagging/models/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Utilities for tagging and taxonomy models
"""

from django.db.models import Aggregate, CharField
from django.db.models.expressions import Func


Expand All @@ -22,3 +22,23 @@ def as_sqlite(self, compiler, connection, **extra_context):
arg_joiner=" || ",
**extra_context,
)


class StringAgg(Aggregate): # pylint: disable=abstract-method
"""
Aggregate function that collects the values of some column across all rows,
and creates a string by concatenating those values, with "," as a separator.

This is the same as Django's django.contrib.postgres.aggregates.StringAgg,
but this version works with MySQL and SQLite.
"""
function = 'GROUP_CONCAT'
template = '%(function)s(%(distinct)s%(expressions)s)'

def __init__(self, expression, distinct=False, **extra):
super().__init__(
expression,
distinct='DISTINCT ' if distinct else '',
output_field=CharField(),
**extra,
)
5 changes: 4 additions & 1 deletion openedx_tagging/core/tagging/rest_api/v1/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,9 +496,11 @@ class ObjectTagCountsView(
**Retrieve Parameters**
* object_id_pattern (required): - The Object ID to retrieve ObjectTags for. Can contain '*' at the end
for wildcard matching, or use ',' to separate multiple object IDs.
* count_implicit (optional): If present, implicit parent/grandparent tags will be included in the counts

**Retrieve Example Requests**
GET api/tagging/v1/object_tag_counts/:object_id_pattern
GET api/tagging/v1/object_tag_counts/:object_id_pattern?count_implicit

**Retrieve Query Returns**
* 200 - Success
Expand All @@ -517,8 +519,9 @@ def retrieve(self, request, *args, **kwargs) -> Response:
"""
# This API does NOT bother doing any permission checks as the # of tags is not considered sensitive information.
object_id_pattern = self.kwargs["object_id_pattern"]
count_implicit = "count_implicit" in request.query_params
try:
return Response(get_object_tag_counts(object_id_pattern))
return Response(get_object_tag_counts(object_id_pattern, count_implicit=count_implicit))
except ValueError as err:
raise ValidationError(err.args[0]) from err

Expand Down
33 changes: 33 additions & 0 deletions tests/openedx_tagging/core/tagging/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,36 @@ def test_get_object_tag_counts(self) -> None:
assert tagging_api.get_object_tag_counts(f"{obj1},{obj2}") == {obj1: 1, obj2: 2}
assert tagging_api.get_object_tag_counts("object_*") == {obj1: 1, obj2: 2}

def test_get_object_tag_counts_implicit(self) -> None:
"""
Basic test of get_object_tag_counts, including implicit (parent) tags

Note that:
- "DPANN" is "Archaea > DPANN" (2 tags, 1 implicit), and
- "Chordata" is "Eukaryota > Animalia > Chordata" (3 tags, 2 implicit)
- "Arthropoda" is "Eukaryota > Animalia > Arthropoda" (same)
"""
self.taxonomy.allow_multiple = True
self.taxonomy.save()
obj1, obj2, obj3 = "object_id1", "object_id2", "object_id3"
other = "other_object"
# Give each object 1-2 tags:
tagging_api.tag_object(object_id=obj1, taxonomy=self.taxonomy, tags=["DPANN"])
tagging_api.tag_object(object_id=obj2, taxonomy=self.taxonomy, tags=["Chordata"])
tagging_api.tag_object(object_id=obj2, taxonomy=self.free_text_taxonomy, tags=["has a notochord"])
tagging_api.tag_object(object_id=obj3, taxonomy=self.taxonomy, tags=["Chordata", "Arthropoda"])
tagging_api.tag_object(object_id=other, taxonomy=self.free_text_taxonomy, tags=["other"])

assert tagging_api.get_object_tag_counts(obj1, count_implicit=True) == {obj1: 2}
assert tagging_api.get_object_tag_counts(obj2, count_implicit=True) == {obj2: 4}
assert tagging_api.get_object_tag_counts(f"{obj1},{obj2}", count_implicit=True) == {obj1: 2, obj2: 4}
assert tagging_api.get_object_tag_counts("object_*", count_implicit=True) == {
obj1: 2,
obj2: 4,
obj3: 4, # obj3 has 2 explicit tags and 2 implicit tags (not 4 because the implicit tags are the same)
}
assert tagging_api.get_object_tag_counts(other, count_implicit=True) == {other: 1}

def test_get_object_tag_counts_deleted_disabled(self) -> None:
"""
Test that get_object_tag_counts doesn't "count" disabled taxonomies or
Expand All @@ -726,6 +756,9 @@ def test_get_object_tag_counts_deleted_disabled(self) -> None:
self.free_text_taxonomy.enabled = False
self.free_text_taxonomy.save()
assert tagging_api.get_object_tag_counts("object_*") == {obj1: 1, obj2: 1}
# Also check the result with count_implicit:
# "English" has no implicit tags but "Chordata" has two, so we expect these totals:
assert tagging_api.get_object_tag_counts("object_*", count_implicit=True) == {obj1: 1, obj2: 3}

# But, by the way, if we re-enable the taxonomy and restore the tag, the counts return:
self.free_text_taxonomy.enabled = True
Expand Down
26 changes: 22 additions & 4 deletions tests/openedx_tagging/core/tagging/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1028,10 +1028,14 @@ def test_get_counts(self):
# Course 7 Unit 2
api.tag_object(object_id="course07-unit02-problem01", taxonomy=self.free_text_taxonomy, tags=["b"])
api.tag_object(object_id="course07-unit02-problem02", taxonomy=self.free_text_taxonomy, tags=["c", "d"])
api.tag_object(object_id="course07-unit02-problem03", taxonomy=self.free_text_taxonomy, tags=["N", "M", "x"])

def check(object_id_pattern: str):
result = self.client.get(OBJECT_TAG_COUNTS_URL.format(object_id_pattern=object_id_pattern))
api.tag_object(object_id="course07-unit02-problem03", taxonomy=self.free_text_taxonomy, tags=["N", "M"])
api.tag_object(object_id="course07-unit02-problem03", taxonomy=self.taxonomy, tags=["Mammalia"])

def check(object_id_pattern: str, count_implicit=False):
url = OBJECT_TAG_COUNTS_URL.format(object_id_pattern=object_id_pattern)
if count_implicit:
url += "?count_implicit"
result = self.client.get(url)
assert result.status_code == status.HTTP_200_OK
return result.data

Expand All @@ -1044,6 +1048,20 @@ def check(object_id_pattern: str):
"course07-unit01-problem01": 3,
"course07-unit01-problem02": 2,
}
with self.assertNumQueries(1):
assert check(object_id_pattern="course07-unit02-*") == {
"course07-unit02-problem01": 1,
"course07-unit02-problem02": 2,
"course07-unit02-problem03": 3,
}
with self.assertNumQueries(1):
assert check(object_id_pattern="course07-unit02-*", count_implicit=True) == {
"course07-unit02-problem01": 1,
"course07-unit02-problem02": 2,
# "Mammalia" includes 1 explicit + 3 implicit tags: "Eukaryota > Animalia > Chordata > Mammalia"
# so problem03 has 2 free text tags and "4" life on earth tags:
"course07-unit02-problem03": 6,
}
with self.assertNumQueries(1):
assert check(object_id_pattern="course07-unit*") == {
"course07-unit01-problem01": 3,
Expand Down
Loading