Skip to content

Commit

Permalink
feat: Add total descendant count to tag list APIs (#156)
Browse files Browse the repository at this point in the history
  • Loading branch information
bradenmacdonald authored Feb 8, 2024
1 parent 3355189 commit c827081
Show file tree
Hide file tree
Showing 9 changed files with 133 additions and 61 deletions.
2 changes: 1 addition & 1 deletion openedx_learning/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""
Open edX Learning ("Learning Core").
"""
__version__ = "0.5.1"
__version__ = "0.5.2"
1 change: 1 addition & 0 deletions openedx_tagging/core/tagging/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class TagData(TypedDict):
value: str
external_id: str | None
child_count: int
descendant_count: int
depth: int
parent_value: str | None
# Note: usage_count may or may not be present, depending on the request.
Expand Down
41 changes: 36 additions & 5 deletions openedx_tagging/core/tagging/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,18 @@ def child_count(self) -> int:
return self.taxonomy.tag_set.filter(parent=self).count()
return 0

@cached_property
def descendant_count(self) -> int:
"""
How many descendant tags this tag has in the taxonomy.
"""
if self.taxonomy and not self.taxonomy.allow_free_text:
return self.taxonomy.tag_set.filter(
Q(parent__parent=self) |
Q(parent__parent__parent=self)
).count() + self.child_count
return 0

def clean(self):
"""
Validate this tag before saving
Expand Down Expand Up @@ -436,6 +448,7 @@ def _get_filtered_tags_free_text(
qs = qs.annotate(
depth=Value(0),
child_count=Value(0),
descendant_count=Value(0),
external_id=Value(None, output_field=models.CharField()),
parent_value=Value(None, output_field=models.CharField()),
_id=Value(None, output_field=models.CharField()),
Expand Down Expand Up @@ -467,12 +480,16 @@ def _get_filtered_tags_one_level(
else:
qs = self.tag_set.filter(parent=None).annotate(depth=Value(0))
qs = qs.annotate(parent_value=Value(None, output_field=models.CharField()))
qs = qs.annotate(child_count=models.Count("children"))
qs = qs.annotate(child_count=models.Count("children", distinct=True))
qs = qs.annotate(grandchild_count=models.Count("children__children", distinct=True))
qs = qs.annotate(great_grandchild_count=models.Count("children__children__children"))
qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count"))
# Filter by search term:
if search_term:
qs = qs.filter(value__icontains=search_term)
qs = qs.annotate(_id=F("id")) # ID has an underscore to encourage use of 'value' rather than this internal ID
qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id").order_by("value")
qs = qs.values("value", "child_count", "descendant_count", "depth", "parent_value", "external_id", "_id")
qs = qs.order_by("value")
if include_counts:
# We need to include the count of how many times this tag is used to tag objects.
# You'd think we could just use:
Expand Down Expand Up @@ -524,14 +541,27 @@ def _get_filtered_tags_deep(
if pk is not None:
matching_ids.append(pk)
qs = qs.filter(pk__in=matching_ids)
qs = qs.annotate(child_count=models.Count("children", filter=Q(children__pk__in=matching_ids)))
qs = qs.annotate(
child_count=models.Count("children", filter=Q(children__pk__in=matching_ids), distinct=True),
grandchild_count=models.Count(
"children__children", filter=Q(children__children__pk__in=matching_ids), distinct=True,
),
great_grandchild_count=models.Count(
"children__children__children",
filter=Q(children__children__children__pk__in=matching_ids),
),
)
qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count"))
elif excluded_values:
raise NotImplementedError("Using excluded_values without search_term is not currently supported.")
# We could implement this in the future but I'd prefer to get rid of the "excluded_values" API altogether.
# It remains to be seen if it's useful to do that on the backend, or if we can do it better/simpler on the
# frontend.
else:
qs = qs.annotate(child_count=models.Count("children"))
qs = qs.annotate(child_count=models.Count("children", distinct=True))
qs = qs.annotate(grandchild_count=models.Count("children__children", distinct=True))
qs = qs.annotate(great_grandchild_count=models.Count("children__children__children"))
qs = qs.annotate(descendant_count=F("child_count") + F("grandchild_count") + F("great_grandchild_count"))

# Add the "depth" to each tag:
qs = Tag.annotate_depth(qs)
Expand All @@ -550,7 +580,8 @@ def _get_filtered_tags_deep(
# Add the parent value
qs = qs.annotate(parent_value=F("parent__value"))
qs = qs.annotate(_id=F("id")) # ID has an underscore to encourage use of 'value' rather than this internal ID
qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id").order_by("sort_key")
qs = qs.values("value", "child_count", "descendant_count", "depth", "parent_value", "external_id", "_id")
qs = qs.order_by("sort_key")
if include_counts:
# Including the counts is a bit tricky; see the comment above in _get_filtered_tags_one_level()
obj_tags = ObjectTag.objects.filter(tag_id=models.OuterRef("pk")).order_by().annotate(
Expand Down
1 change: 1 addition & 0 deletions openedx_tagging/core/tagging/rest_api/v1/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ class TagDataSerializer(UserPermissionsSerializerMixin, serializers.Serializer):
value = serializers.CharField()
external_id = serializers.CharField(allow_null=True)
child_count = serializers.IntegerField()
descendant_count = serializers.IntegerField()
depth = serializers.IntegerField()
parent_value = serializers.CharField(allow_null=True)
usage_count = serializers.IntegerField(required=False)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,27 +53,27 @@ def test_import_template(self, template_file, parser_format):
'Electronic instruments (ELECTRIC) (None) (children: 2)',
' Synthesizer (SYNTH) (Electronic instruments) (children: 0)',
' Theramin (THERAMIN) (Electronic instruments) (children: 0)',
'Percussion instruments (PERCUSS) (None) (children: 3)',
'Percussion instruments (PERCUSS) (None) (children: 3 + 6)',
' Chordophone (CHORD) (Percussion instruments) (children: 1)',
' Piano (PIANO) (Chordophone) (children: 0)',
' Idiophone (BELLS) (Percussion instruments) (children: 2)',
' Celesta (CELESTA) (Idiophone) (children: 0)',
' Hi-hat (HI-HAT) (Idiophone) (children: 0)',
' Membranophone (DRUMS) (Percussion instruments) (children: 2)',
' Membranophone (DRUMS) (Percussion instruments) (children: 2 + 1)',
' Cajón (CAJÓN) (Membranophone) (children: 1)',
# This tag is present in the import files, but it will be omitted from get_tags()
# because it sits beyond TAXONOMY_MAX_DEPTH.
# Pyle Stringed Jam Cajón (PYLE) (Cajón) (children: 0)
' Tabla (TABLA) (Membranophone) (children: 0)',
'String instruments (STRINGS) (None) (children: 2)',
'String instruments (STRINGS) (None) (children: 2 + 5)',
' Bowed strings (BOW) (String instruments) (children: 2)',
' Cello (CELLO) (Bowed strings) (children: 0)',
' Violin (VIOLIN) (Bowed strings) (children: 0)',
' Plucked strings (PLUCK) (String instruments) (children: 3)',
' Banjo (BANJO) (Plucked strings) (children: 0)',
' Harp (HARP) (Plucked strings) (children: 0)',
' Mandolin (MANDOLIN) (Plucked strings) (children: 0)',
'Wind instruments (WINDS) (None) (children: 2)',
'Wind instruments (WINDS) (None) (children: 2 + 5)',
' Brass (BRASS) (Wind instruments) (children: 2)',
' Trumpet (TRUMPET) (Brass) (children: 0)',
' Tuba (TUBA) (Brass) (children: 0)',
Expand Down
10 changes: 5 additions & 5 deletions tests/openedx_tagging/core/tagging/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,8 @@ def test_get_tags(self) -> None:
"Bacteria (children: 2)",
" Archaebacteria (children: 0)",
" Eubacteria (children: 0)",
"Eukaryota (children: 5)",
" Animalia (children: 7)",
"Eukaryota (children: 5 + 8)",
" Animalia (children: 7 + 1)",
" Arthropoda (children: 0)",
" Chordata (children: 1)", # The child of this is excluded due to depth limit
" Cnidaria (children: 0)",
Expand Down Expand Up @@ -166,7 +166,7 @@ def test_get_root_tags(self):
assert pretty_format_tags(root_life_on_earth_tags, parent=False) == [
'Archaea (children: 3)',
'Bacteria (children: 2)',
'Eukaryota (children: 5)',
'Eukaryota (children: 5 + 8)',
]

@override_settings(LANGUAGES=test_languages)
Expand Down Expand Up @@ -633,7 +633,7 @@ def get_object_tags():
" Proteoarchaeota (used: 0, children: 0)",
"Bacteria (used: 0, children: 1)", # does not contain "ar" but a child does
" Archaebacteria (used: 1, children: 0)",
"Eukaryota (used: 0, children: 1)",
"Eukaryota (used: 0, children: 1 + 2)",
" Animalia (used: 1, children: 2)", # does not contain "ar" but a child does
" Arthropoda (used: 1, children: 0)",
" Cnidaria (used: 0, children: 0)",
Expand All @@ -655,7 +655,7 @@ def get_object_tags():
"Bacteria (used: 0, children: 2)",
" Archaebacteria (used: 1, children: 0)",
" Eubacteria (used: 0, children: 0)",
"Eukaryota (used: 0, children: 4)",
"Eukaryota (used: 0, children: 4 + 7)",
" Animalia (used: 1, children: 7)",
" Arthropoda (used: 1, children: 0)",
" Chordata (used: 0, children: 0)", # <<< Chordata has a matching child but we only support searching
Expand Down
74 changes: 54 additions & 20 deletions tests/openedx_tagging/core/tagging/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,9 +329,9 @@ def test_get_root(self) -> None:
del r["_id"] # Remove the internal database IDs; they aren't interesting here and a other tests check them
assert result == [
# These are the root tags, in alphabetical order:
{"value": "Archaea", "child_count": 3, **common_fields},
{"value": "Bacteria", "child_count": 2, **common_fields},
{"value": "Eukaryota", "child_count": 5, **common_fields},
{"value": "Archaea", "child_count": 3, "descendant_count": 3, **common_fields},
{"value": "Bacteria", "child_count": 2, "descendant_count": 2, **common_fields},
{"value": "Eukaryota", "child_count": 5, "descendant_count": 13, **common_fields},
]

def test_get_child_tags_one_level(self) -> None:
Expand All @@ -345,11 +345,11 @@ def test_get_child_tags_one_level(self) -> None:
del r["_id"] # Remove the internal database IDs; they aren't interesting here and a other tests check them
assert result == [
# These are the Eukaryota tags, in alphabetical order:
{"value": "Animalia", "child_count": 7, **common_fields},
{"value": "Fungi", "child_count": 0, **common_fields},
{"value": "Monera", "child_count": 0, **common_fields},
{"value": "Plantae", "child_count": 0, **common_fields},
{"value": "Protista", "child_count": 0, **common_fields},
{"value": "Animalia", "child_count": 7, "descendant_count": 8, **common_fields},
{"value": "Fungi", "child_count": 0, "descendant_count": 0, **common_fields},
{"value": "Monera", "child_count": 0, "descendant_count": 0, **common_fields},
{"value": "Plantae", "child_count": 0, "descendant_count": 0, **common_fields},
{"value": "Protista", "child_count": 0, "descendant_count": 0, **common_fields},
]

def test_get_grandchild_tags_one_level(self) -> None:
Expand All @@ -363,13 +363,13 @@ def test_get_grandchild_tags_one_level(self) -> None:
del r["_id"] # Remove the internal database IDs; they aren't interesting here and a other tests check them
assert result == [
# These are the Eukaryota tags, in alphabetical order:
{"value": "Arthropoda", "child_count": 0, **common_fields},
{"value": "Chordata", "child_count": 1, **common_fields},
{"value": "Cnidaria", "child_count": 0, **common_fields},
{"value": "Ctenophora", "child_count": 0, **common_fields},
{"value": "Gastrotrich", "child_count": 0, **common_fields},
{"value": "Placozoa", "child_count": 0, **common_fields},
{"value": "Porifera", "child_count": 0, **common_fields},
{"value": "Arthropoda", "child_count": 0, "descendant_count": 0, **common_fields},
{"value": "Chordata", "child_count": 1, "descendant_count": 1, **common_fields},
{"value": "Cnidaria", "child_count": 0, "descendant_count": 0, **common_fields},
{"value": "Ctenophora", "child_count": 0, "descendant_count": 0, **common_fields},
{"value": "Gastrotrich", "child_count": 0, "descendant_count": 0, **common_fields},
{"value": "Placozoa", "child_count": 0, "descendant_count": 0, **common_fields},
{"value": "Porifera", "child_count": 0, "descendant_count": 0, **common_fields},
]

def test_get_depth_1_search_term(self) -> None:
Expand All @@ -381,6 +381,7 @@ def test_get_depth_1_search_term(self) -> None:
{
"value": "Archaea",
"child_count": 3,
"descendant_count": 3,
"depth": 0,
"usage_count": 0,
"parent_value": None,
Expand All @@ -399,6 +400,7 @@ def test_get_depth_1_child_search_term(self) -> None:
{
"value": "Archaebacteria",
"child_count": 0,
"descendant_count": 0,
"depth": 1,
"parent_value": "Bacteria",
"external_id": None,
Expand Down Expand Up @@ -439,8 +441,8 @@ def test_get_all(self) -> None:
"Bacteria (None) (children: 2)",
" Archaebacteria (Bacteria) (children: 0)",
" Eubacteria (Bacteria) (children: 0)",
"Eukaryota (None) (children: 5)",
" Animalia (Eukaryota) (children: 7)",
"Eukaryota (None) (children: 5 + 8)",
" Animalia (Eukaryota) (children: 7 + 1)",
" Arthropoda (Animalia) (children: 0)",
" Chordata (Animalia) (children: 1)", # note this has a child but the child is not included
" Cnidaria (Animalia) (children: 0)",
Expand Down Expand Up @@ -475,7 +477,7 @@ def test_search_2(self) -> None:
"""
result = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="chordata"))
assert result == [
"Eukaryota (None) (children: 1)", # Has one child that matches
"Eukaryota (None) (children: 1 + 1)", # Has one child that matches, plus one additional matching descendant
" Animalia (Eukaryota) (children: 1)",
" Chordata (Animalia) (children: 0)", # this is the matching tag.
]
Expand All @@ -489,7 +491,7 @@ def test_search_3(self) -> None:
assert result == [
"Archaea (None) (children: 1)",
" Proteoarchaeota (Archaea) (children: 0)",
"Eukaryota (None) (children: 2)", # Note the "children: 2" is correct - 2 direct children are in the result
"Eukaryota (None) (children: 2 + 2)", # 2 direct matching children, 2 additional matching descendants
" Animalia (Eukaryota) (children: 2)",
" Arthropoda (Animalia) (children: 0)", # match
" Gastrotrich (Animalia) (children: 0)", # match
Expand All @@ -508,6 +510,7 @@ def test_tags_deep(self) -> None:
"depth": 3,
"usage_count": 0,
"child_count": 0,
"descendant_count": 0,
"external_id": None,
"_id": 21, # These IDs are hard-coded in the test fixture file
}
Expand Down Expand Up @@ -571,7 +574,7 @@ def test_tree_sort(self) -> None:
taxonomy = self.create_sort_test_taxonomy()
result = pretty_format_tags(taxonomy.get_filtered_tags())
assert result == [
"1 (None) (children: 4)",
"1 (None) (children: 4 + 1)",
" 1 A (1) (children: 0)",
" 11 (1) (children: 0)",
" 11111 (1) (children: 1)",
Expand All @@ -591,6 +594,37 @@ def test_tree_sort(self) -> None:
" azure (ALPHABET) (children: 0)",
]

def test_descendant_counts(self) -> None:
"""
Test getting the descendant count on a taxonomy known to cause aggregation
bugs unless the aggregations are correctly specified with distinct=True
https://docs.djangoproject.com/en/5.0/topics/db/aggregation/#combining-multiple-aggregations
"""
taxonomy = api.create_taxonomy("ESDC Subset")
api.add_tag_to_taxonomy(taxonomy, "Interests") # root tag
api.add_tag_to_taxonomy(taxonomy, "Holland Codes", parent_tag_value="Interests") # child tag
# Create the grandchild tag:
g_tag = api.add_tag_to_taxonomy(taxonomy, "Interests - Holland Codes", parent_tag_value="Holland Codes")
# Create the 6 great-grandchild tags:
api.add_tag_to_taxonomy(taxonomy, "Artistic", parent_tag_value=g_tag.value)
api.add_tag_to_taxonomy(taxonomy, "Conventional", parent_tag_value=g_tag.value)
api.add_tag_to_taxonomy(taxonomy, "Enterprising", parent_tag_value=g_tag.value)
api.add_tag_to_taxonomy(taxonomy, "Investigative", parent_tag_value=g_tag.value)
api.add_tag_to_taxonomy(taxonomy, "Realistic", parent_tag_value=g_tag.value)
api.add_tag_to_taxonomy(taxonomy, "Social", parent_tag_value=g_tag.value)

result = pretty_format_tags(taxonomy.get_filtered_tags(depth=1, include_counts=True))
assert result == [
"Interests (None) (used: 0, children: 1 + 7)", # 1 child + (1 grandchild and 6 great grandchild tags)
]
result2 = pretty_format_tags(taxonomy.get_filtered_tags(depth=None, include_counts=True))
assert result2 == [
"Interests (None) (used: 0, children: 1 + 7)",
" Holland Codes (Interests) (used: 0, children: 1 + 6)",
" Interests - Holland Codes (Holland Codes) (used: 0, children: 6)",
]


class TestFilteredTagsFreeTextTaxonomy(TestCase):
"""
Expand Down
Loading

0 comments on commit c827081

Please sign in to comment.