From 78251961cac1b75eb59da6cae5184a6aeb443929 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Wed, 7 Jun 2023 13:18:50 -0500 Subject: [PATCH 01/17] feat: add support for Avro dict --- openedx_events/analytics/data.py | 2 ++ .../event_bus/avro/custom_serializers.py | 23 +++++++++++++++++++ openedx_events/event_bus/avro/deserializer.py | 11 +++++++++ openedx_events/event_bus/avro/schema.py | 15 ++++++++++++ openedx_events/event_bus/avro/serializer.py | 1 + 5 files changed, 52 insertions(+) diff --git a/openedx_events/analytics/data.py b/openedx_events/analytics/data.py index 801ebdd5..568eafff 100644 --- a/openedx_events/analytics/data.py +++ b/openedx_events/analytics/data.py @@ -9,6 +9,8 @@ import attr +from typing import Dict + @attr.s(frozen=True) class TrackingLogData: diff --git a/openedx_events/event_bus/avro/custom_serializers.py b/openedx_events/event_bus/avro/custom_serializers.py index d3503011..d1e5e273 100644 --- a/openedx_events/event_bus/avro/custom_serializers.py +++ b/openedx_events/event_bus/avro/custom_serializers.py @@ -13,6 +13,8 @@ from openedx_events.event_bus.avro.types import PYTHON_TYPE_TO_AVRO_MAPPING +import json + class BaseCustomTypeAvroSerializer(ABC): """ Used by openedx_events.avro_utilities class to serialize/deserialize custom types. @@ -93,6 +95,26 @@ def deserialize(data: str): return datetime.fromisoformat(data) +class DictionaryAvroSerializer(BaseCustomTypeAvroSerializer): + """ + CustomTypeAvroSerializer for dictionary class. + """ + + cls = dict + field_type = PYTHON_TYPE_TO_AVRO_MAPPING[dict] + + @staticmethod + def serialize(obj) -> str: + """Serialize obj into str.""" + return obj + + @staticmethod + def deserialize(data: str): + """Deserialize dict into obj.""" + return json.loads(data) + + + class UsageKeyAvroSerializer(BaseCustomTypeAvroSerializer): """ CustomTypeAvroSerializer for UsageKey class. @@ -179,4 +201,5 @@ def deserialize(data: str): LibraryUsageLocatorV2AvroSerializer, UsageKeyAvroSerializer, UuidAvroSerializer, + DictionaryAvroSerializer, ] diff --git a/openedx_events/event_bus/avro/deserializer.py b/openedx_events/event_bus/avro/deserializer.py index 442a9780..16fe21b5 100644 --- a/openedx_events/event_bus/avro/deserializer.py +++ b/openedx_events/event_bus/avro/deserializer.py @@ -51,6 +51,17 @@ def _deserialized_avro_record_dict_to_object(data: dict, data_type, deserializer # check whether list items type is in basic types. if arg_data_type[0] in SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING: return data + elif data_type_origin == dict: + # returns types of dict contents + # if data_type == Dict[str, int], arg_data_type = (str, int) + arg_data_type = get_args(data_type) + if not arg_data_type: + raise TypeError( + "Dict without annotation type is not supported. The argument should be a type, for eg., Dict[str, int]" + ) + # check whether dict items type is in basic types. + if arg_data_type[1] in SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING: + return data elif hasattr(data_type, "__attrs_attrs__"): transformed = {} for attribute in data_type.__attrs_attrs__: diff --git a/openedx_events/event_bus/avro/schema.py b/openedx_events/event_bus/avro/schema.py index afb03086..e16ee405 100644 --- a/openedx_events/event_bus/avro/schema.py +++ b/openedx_events/event_bus/avro/schema.py @@ -83,6 +83,21 @@ def _create_avro_field_definition(data_key, data_type, previously_seen_types, f" {set(SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING.keys())}" ) field["type"] = {"type": PYTHON_TYPE_TO_AVRO_MAPPING[data_type_origin], "items": avro_type} + elif data_type_origin == dict: + # returns types of dict contents + # if data_type == Dict[str, int], arg_data_type = (str, int) + arg_data_type = get_args(data_type) + if not arg_data_type: + raise TypeError( + "Dict without annotation type is not supported. The argument should be a type, for eg., Dict[str, int]" + ) + avro_type = SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING.get(arg_data_type[1]) + if avro_type is None: + raise TypeError( + "Only following types are supported for dict arguments:" + f" {set(SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING.keys())}" + ) + field["type"] = {"type": PYTHON_TYPE_TO_AVRO_MAPPING[data_type_origin], "values": avro_type} # Case 3: data_type is an attrs class elif hasattr(data_type, "__attrs_attrs__"): # Inner Attrs Class diff --git a/openedx_events/event_bus/avro/serializer.py b/openedx_events/event_bus/avro/serializer.py index bd57183c..12e09720 100644 --- a/openedx_events/event_bus/avro/serializer.py +++ b/openedx_events/event_bus/avro/serializer.py @@ -69,6 +69,7 @@ def _event_data_to_avro_record_dict(event_data, serializers=None): def value_to_dict(value): # Case 1: Value is an instance of an attrs-decorated class if hasattr(value, "__attrs_attrs__"): + print("\n\n MY VALUE IN VALUE TO DICT", value, "\n\n") return attr.asdict(value, value_serializer=_get_non_attrs_serializer(serializers)) return _get_non_attrs_serializer(serializers)(None, None, value) From 9aaa04e3bfe84a1d8e63cae9dc3039a365260587 Mon Sep 17 00:00:00 2001 From: Maria Grimaldi Date: Thu, 12 Dec 2024 19:10:44 +0100 Subject: [PATCH 02/17] refactor: add support for dicts as avro maps instead of record --- .../event_bus/avro/custom_serializers.py | 21 ------------------- openedx_events/event_bus/avro/schema.py | 2 +- .../event_bus/avro/tests/test_avro.py | 14 +++++++++++++ openedx_events/event_bus/avro/types.py | 2 +- openedx_events/learning/data.py | 4 ++-- openedx_events/tooling.py | 6 +++--- 6 files changed, 21 insertions(+), 28 deletions(-) diff --git a/openedx_events/event_bus/avro/custom_serializers.py b/openedx_events/event_bus/avro/custom_serializers.py index d1e5e273..ad596c91 100644 --- a/openedx_events/event_bus/avro/custom_serializers.py +++ b/openedx_events/event_bus/avro/custom_serializers.py @@ -95,26 +95,6 @@ def deserialize(data: str): return datetime.fromisoformat(data) -class DictionaryAvroSerializer(BaseCustomTypeAvroSerializer): - """ - CustomTypeAvroSerializer for dictionary class. - """ - - cls = dict - field_type = PYTHON_TYPE_TO_AVRO_MAPPING[dict] - - @staticmethod - def serialize(obj) -> str: - """Serialize obj into str.""" - return obj - - @staticmethod - def deserialize(data: str): - """Deserialize dict into obj.""" - return json.loads(data) - - - class UsageKeyAvroSerializer(BaseCustomTypeAvroSerializer): """ CustomTypeAvroSerializer for UsageKey class. @@ -201,5 +181,4 @@ def deserialize(data: str): LibraryUsageLocatorV2AvroSerializer, UsageKeyAvroSerializer, UuidAvroSerializer, - DictionaryAvroSerializer, ] diff --git a/openedx_events/event_bus/avro/schema.py b/openedx_events/event_bus/avro/schema.py index e16ee405..ae178857 100644 --- a/openedx_events/event_bus/avro/schema.py +++ b/openedx_events/event_bus/avro/schema.py @@ -63,7 +63,7 @@ def _create_avro_field_definition(data_key, data_type, previously_seen_types, field["type"] = field_type # Case 2: data_type is a simple type that can be converted directly to an Avro type elif data_type in PYTHON_TYPE_TO_AVRO_MAPPING: - if PYTHON_TYPE_TO_AVRO_MAPPING[data_type] in ["record", "array"]: + if PYTHON_TYPE_TO_AVRO_MAPPING[data_type] in ["map", "array"]: # pylint: disable-next=broad-exception-raised raise Exception("Unable to generate Avro schema for dict or array fields without annotation types.") avro_type = PYTHON_TYPE_TO_AVRO_MAPPING[data_type] diff --git a/openedx_events/event_bus/avro/tests/test_avro.py b/openedx_events/event_bus/avro/tests/test_avro.py index 87634067..f5cd4311 100644 --- a/openedx_events/event_bus/avro/tests/test_avro.py +++ b/openedx_events/event_bus/avro/tests/test_avro.py @@ -43,6 +43,7 @@ def generate_test_data_for_schema(schema): # pragma: no cover 'string': "default", 'double': 1.0, 'null': None, + 'map': {'key': 'value'}, } def get_default_value_or_raise(schema_field_type): @@ -71,6 +72,9 @@ def get_default_value_or_raise(schema_field_type): elif sub_field_type == "record": # if we're dealing with a record, recurse into the record data_dict.update({key: generate_test_data_for_schema(field_type)}) + elif sub_field_type == "map": + # if we're dealing with a map, "values" will be the type of values in the map + data_dict.update({key: {"key": get_default_value_or_raise(field_type["values"])}}) else: raise Exception(f"Unsupported type {field_type}") # pylint: disable=broad-exception-raised @@ -112,6 +116,16 @@ def generate_test_event_data_for_data_type(data_type): # pragma: no cover datetime: datetime.now(), CCXLocator: CCXLocator(org='edx', course='DemoX', run='Demo_course', ccx='1'), UUID: uuid4(), + dict[str, str]: {'key': 'value'}, + dict[str, int]: {'key': 1}, + dict[str, float]: {'key': 1.0}, + dict[str, bool]: {'key': True}, + dict[str, CourseKey]: {'key': CourseKey.from_string("course-v1:edX+DemoX.1+2014")}, + dict[str, UsageKey]: {'key': UsageKey.from_string( + "block-v1:edx+DemoX+Demo_course+type@video+block@UaEBjyMjcLW65gaTXggB93WmvoxGAJa0JeHRrDThk", + )}, + dict[str, LibraryLocatorV2]: {'key': LibraryLocatorV2.from_string('lib:MITx:reallyhardproblems')}, + dict[str, LibraryUsageLocatorV2]: {'key': LibraryUsageLocatorV2.from_string('lb:MITx:reallyhardproblems:problem:problem1')}, } data_dict = {} for attribute in data_type.__attrs_attrs__: diff --git a/openedx_events/event_bus/avro/types.py b/openedx_events/event_bus/avro/types.py index f3bc2536..b757a899 100644 --- a/openedx_events/event_bus/avro/types.py +++ b/openedx_events/event_bus/avro/types.py @@ -9,6 +9,6 @@ PYTHON_TYPE_TO_AVRO_MAPPING = { **SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING, None: "null", - dict: "record", + dict: "map", list: "array", } diff --git a/openedx_events/learning/data.py b/openedx_events/learning/data.py index 962dda35..06624b73 100644 --- a/openedx_events/learning/data.py +++ b/openedx_events/learning/data.py @@ -422,10 +422,10 @@ class DiscussionThreadData: url = attr.ib(type=str) user = attr.ib(type=UserData) course_id = attr.ib(type=CourseKey) - discussion = attr.ib(type=dict, factory=dict) + discussion = attr.ib(type=dict[str, str], factory=dict) user_course_roles = attr.ib(type=List[str], factory=list) user_forums_roles = attr.ib(type=List[str], factory=list) - options = attr.ib(type=dict, factory=dict) + options = attr.ib(type=dict[str, str], factory=dict) @attr.s(frozen=True) diff --git a/openedx_events/tooling.py b/openedx_events/tooling.py index 1ca53306..c6fa1bbf 100644 --- a/openedx_events/tooling.py +++ b/openedx_events/tooling.py @@ -24,9 +24,9 @@ "org.openedx.content_authoring.course.certificate_config.changed.v1", "org.openedx.content_authoring.course.certificate_config.deleted.v1", "org.openedx.learning.user.notification.requested.v1", - "org.openedx.learning.thread.created.v1", - "org.openedx.learning.response.created.v1", - "org.openedx.learning.comment.created.v1", + #"org.openedx.learning.thread.created.v1", + #"org.openedx.learning.response.created.v1", + #"org.openedx.learning.comment.created.v1", "org.openedx.learning.course.notification.requested.v1", "org.openedx.learning.ora.submission.created.v1", ] From 743bcc461d83198406692114fe6bf05f87ef0a8f Mon Sep 17 00:00:00 2001 From: Maria Grimaldi Date: Fri, 13 Dec 2024 16:08:38 +0100 Subject: [PATCH 03/17] test: add more test cases for serializing dicts --- openedx_events/event_bus/avro/tests/test_avro.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/openedx_events/event_bus/avro/tests/test_avro.py b/openedx_events/event_bus/avro/tests/test_avro.py index f5cd4311..53e01a25 100644 --- a/openedx_events/event_bus/avro/tests/test_avro.py +++ b/openedx_events/event_bus/avro/tests/test_avro.py @@ -2,7 +2,7 @@ import io import os from datetime import datetime -from typing import List +from typing import List, Union from unittest import TestCase from uuid import UUID, uuid4 @@ -126,6 +126,12 @@ def generate_test_event_data_for_data_type(data_type): # pragma: no cover )}, dict[str, LibraryLocatorV2]: {'key': LibraryLocatorV2.from_string('lib:MITx:reallyhardproblems')}, dict[str, LibraryUsageLocatorV2]: {'key': LibraryUsageLocatorV2.from_string('lb:MITx:reallyhardproblems:problem:problem1')}, + dict[str, List[int]]: {'key': [1, 2, 3]}, + dict[str, List[str]]: {'key': ["hi", "there"]}, + dict[str, dict[str, str]]: {'key': {'key': 'value'}}, + dict[str, dict[str, int]]: {'key': {'key': 1}}, + dict[str, Union[str, int]]: {'key': 'value'}, + dict[str, Union[str, int, float]]: {'key': 1.0}, } data_dict = {} for attribute in data_type.__attrs_attrs__: From f4aa824742b954e337e62e82767100bcb055d7c5 Mon Sep 17 00:00:00 2001 From: Maria Grimaldi Date: Fri, 13 Dec 2024 17:18:48 +0100 Subject: [PATCH 04/17] refactor: address quality issues from latest commits --- openedx_events/analytics/data.py | 2 -- openedx_events/event_bus/avro/custom_serializers.py | 2 -- openedx_events/event_bus/avro/tests/test_avro.py | 4 +++- openedx_events/tooling.py | 3 --- 4 files changed, 3 insertions(+), 8 deletions(-) diff --git a/openedx_events/analytics/data.py b/openedx_events/analytics/data.py index 568eafff..801ebdd5 100644 --- a/openedx_events/analytics/data.py +++ b/openedx_events/analytics/data.py @@ -9,8 +9,6 @@ import attr -from typing import Dict - @attr.s(frozen=True) class TrackingLogData: diff --git a/openedx_events/event_bus/avro/custom_serializers.py b/openedx_events/event_bus/avro/custom_serializers.py index ad596c91..d3503011 100644 --- a/openedx_events/event_bus/avro/custom_serializers.py +++ b/openedx_events/event_bus/avro/custom_serializers.py @@ -13,8 +13,6 @@ from openedx_events.event_bus.avro.types import PYTHON_TYPE_TO_AVRO_MAPPING -import json - class BaseCustomTypeAvroSerializer(ABC): """ Used by openedx_events.avro_utilities class to serialize/deserialize custom types. diff --git a/openedx_events/event_bus/avro/tests/test_avro.py b/openedx_events/event_bus/avro/tests/test_avro.py index 53e01a25..67c806bd 100644 --- a/openedx_events/event_bus/avro/tests/test_avro.py +++ b/openedx_events/event_bus/avro/tests/test_avro.py @@ -125,7 +125,9 @@ def generate_test_event_data_for_data_type(data_type): # pragma: no cover "block-v1:edx+DemoX+Demo_course+type@video+block@UaEBjyMjcLW65gaTXggB93WmvoxGAJa0JeHRrDThk", )}, dict[str, LibraryLocatorV2]: {'key': LibraryLocatorV2.from_string('lib:MITx:reallyhardproblems')}, - dict[str, LibraryUsageLocatorV2]: {'key': LibraryUsageLocatorV2.from_string('lb:MITx:reallyhardproblems:problem:problem1')}, + dict[str, LibraryUsageLocatorV2]: { + 'key': LibraryUsageLocatorV2.from_string('lb:MITx:reallyhardproblems:problem:problem1'), + }, dict[str, List[int]]: {'key': [1, 2, 3]}, dict[str, List[str]]: {'key': ["hi", "there"]}, dict[str, dict[str, str]]: {'key': {'key': 'value'}}, diff --git a/openedx_events/tooling.py b/openedx_events/tooling.py index c6fa1bbf..1beb55a0 100644 --- a/openedx_events/tooling.py +++ b/openedx_events/tooling.py @@ -24,9 +24,6 @@ "org.openedx.content_authoring.course.certificate_config.changed.v1", "org.openedx.content_authoring.course.certificate_config.deleted.v1", "org.openedx.learning.user.notification.requested.v1", - #"org.openedx.learning.thread.created.v1", - #"org.openedx.learning.response.created.v1", - #"org.openedx.learning.comment.created.v1", "org.openedx.learning.course.notification.requested.v1", "org.openedx.learning.ora.submission.created.v1", ] From f7beb63e6b2354ea974ee0dbdecc554c559ea962 Mon Sep 17 00:00:00 2001 From: Maria Grimaldi Date: Mon, 16 Dec 2024 10:29:23 +0100 Subject: [PATCH 05/17] refactor: address PR reviews --- openedx_events/event_bus/avro/serializer.py | 1 - openedx_events/event_bus/avro/tests/test_schema.py | 8 ++++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/openedx_events/event_bus/avro/serializer.py b/openedx_events/event_bus/avro/serializer.py index 12e09720..bd57183c 100644 --- a/openedx_events/event_bus/avro/serializer.py +++ b/openedx_events/event_bus/avro/serializer.py @@ -69,7 +69,6 @@ def _event_data_to_avro_record_dict(event_data, serializers=None): def value_to_dict(value): # Case 1: Value is an instance of an attrs-decorated class if hasattr(value, "__attrs_attrs__"): - print("\n\n MY VALUE IN VALUE TO DICT", value, "\n\n") return attr.asdict(value, value_serializer=_get_non_attrs_serializer(serializers)) return _get_non_attrs_serializer(serializers)(None, None, value) diff --git a/openedx_events/event_bus/avro/tests/test_schema.py b/openedx_events/event_bus/avro/tests/test_schema.py index 8ad6245b..ff5899e2 100644 --- a/openedx_events/event_bus/avro/tests/test_schema.py +++ b/openedx_events/event_bus/avro/tests/test_schema.py @@ -1,7 +1,7 @@ """ Tests for event_bus.avro.schema module """ -from typing import List +from typing import List, Dict from unittest import TestCase from openedx_events.event_bus.avro.schema import schema_from_signal @@ -245,8 +245,9 @@ class UnextendedClass: def test_throw_exception_to_list_or_dict_types_without_annotation(self): LIST_SIGNAL = create_simple_signal({"list_input": list}) - DICT_SIGNAL = create_simple_signal({"list_input": dict}) + DICT_SIGNAL = create_simple_signal({"dict_input": dict}) LIST_WITHOUT_ANNOTATION_SIGNAL = create_simple_signal({"list_input": List}) + DICT_WITHOUT_ANNOTATION_SIGNAL = create_simple_signal({"dict_input": Dict}) with self.assertRaises(Exception): schema_from_signal(LIST_SIGNAL) @@ -256,6 +257,9 @@ def test_throw_exception_to_list_or_dict_types_without_annotation(self): with self.assertRaises(TypeError): schema_from_signal(LIST_WITHOUT_ANNOTATION_SIGNAL) + with self.assertRaises(TypeError): + schema_from_signal(DICT_WITHOUT_ANNOTATION_SIGNAL) + def test_list_with_annotation_works(self): LIST_SIGNAL = create_simple_signal({"list_input": List[int]}) expected_dict = { From 482c8d9fd2968442a0adb6dfa4594bda5188558b Mon Sep 17 00:00:00 2001 From: Maria Grimaldi Date: Mon, 16 Dec 2024 10:31:51 +0100 Subject: [PATCH 06/17] fix: address quality issues --- openedx_events/event_bus/avro/tests/test_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openedx_events/event_bus/avro/tests/test_schema.py b/openedx_events/event_bus/avro/tests/test_schema.py index ff5899e2..10793878 100644 --- a/openedx_events/event_bus/avro/tests/test_schema.py +++ b/openedx_events/event_bus/avro/tests/test_schema.py @@ -1,7 +1,7 @@ """ Tests for event_bus.avro.schema module """ -from typing import List, Dict +from typing import Dict, List from unittest import TestCase from openedx_events.event_bus.avro.schema import schema_from_signal From 7d9b8b8072ce520d29fc58db7115d2905202ab1e Mon Sep 17 00:00:00 2001 From: Maria Grimaldi Date: Mon, 16 Dec 2024 10:57:30 +0100 Subject: [PATCH 07/17] test: add deserializer tests for dicts --- .../event_bus/avro/tests/test_deserializer.py | 81 +++++++++++++------ .../event_bus/avro/tests/test_utilities.py | 7 ++ 2 files changed, 64 insertions(+), 24 deletions(-) diff --git a/openedx_events/event_bus/avro/tests/test_deserializer.py b/openedx_events/event_bus/avro/tests/test_deserializer.py index e7037998..f1c26bf6 100644 --- a/openedx_events/event_bus/avro/tests/test_deserializer.py +++ b/openedx_events/event_bus/avro/tests/test_deserializer.py @@ -1,4 +1,5 @@ """Tests for avro.deserializer""" +import ddt import json from datetime import datetime from typing import List @@ -19,10 +20,12 @@ SubTestData0, SubTestData1, create_simple_signal, + ComplexAttrs ) from openedx_events.tests.utils import FreezeSignalCacheMixin +@ddt.ddt class TestAvroSignalDeserializerCache(TestCase, FreezeSignalCacheMixin): """Test AvroSignalDeserializer""" @@ -30,36 +33,66 @@ def setUp(self) -> None: super().setUp() self.maxDiff = None - def test_schema_string(self): + @ddt.data( + ( + SimpleAttrs, + { + "name": "CloudEvent", + "type": "record", + "doc": "Avro Event Format for CloudEvents created with openedx_events/schema", + "namespace": "simple.signal", + "fields": [ + { + "name": "data", + "type": { + "name": "SimpleAttrs", + "type": "record", + "fields": [ + {"name": "boolean_field", "type": "boolean"}, + {"name": "int_field", "type": "long"}, + {"name": "float_field", "type": "double"}, + {"name": "bytes_field", "type": "bytes"}, + {"name": "string_field", "type": "string"}, + ], + }, + }, + ], + } + ), + ( + ComplexAttrs, + { + "name": "CloudEvent", + "type": "record", + "doc": "Avro Event Format for CloudEvents created with openedx_events/schema", + "namespace": "simple.signal", + "fields": [ + { + "name": "data", + "type": { + "name": "ComplexAttrs", + "type": "record", + "fields": [ + {"name": "list_field", "type": {"type": "array", "items": "long"}}, + {"name": "dict_field", "type": {"type": "map", "values": "long"}}, + ], + }, + }, + ], + } + ) + ) + @ddt.unpack + def test_schema_string(self, data_cls, expected_schema): """ Test JSON round-trip; schema creation is tested more fully in test_schema.py. """ SIGNAL = create_simple_signal({ - "data": SimpleAttrs + "data": data_cls }) + actual_schema = json.loads(AvroSignalDeserializer(SIGNAL).schema_string()) - expected_schema = { - 'name': 'CloudEvent', - 'type': 'record', - 'doc': 'Avro Event Format for CloudEvents created with openedx_events/schema', - 'namespace': 'simple.signal', - 'fields': [ - { - 'name': 'data', - 'type': { - 'name': 'SimpleAttrs', - 'type': 'record', - 'fields': [ - {'name': 'boolean_field', 'type': 'boolean'}, - {'name': 'int_field', 'type': 'long'}, - {'name': 'float_field', 'type': 'double'}, - {'name': 'bytes_field', 'type': 'bytes'}, - {'name': 'string_field', 'type': 'string'}, - ] - } - } - ] - } + assert actual_schema == expected_schema def test_convert_dict_to_event_data(self): diff --git a/openedx_events/event_bus/avro/tests/test_utilities.py b/openedx_events/event_bus/avro/tests/test_utilities.py index 1644c3e4..05b1b9c7 100644 --- a/openedx_events/event_bus/avro/tests/test_utilities.py +++ b/openedx_events/event_bus/avro/tests/test_utilities.py @@ -39,6 +39,13 @@ class SimpleAttrs: string_field: str +@attr.s(auto_attribs=True) +class ComplexAttrs: + """Class with all complex type fields""" + list_field: list[int] + dict_field: dict[str, int] + + @attr.s(auto_attribs=True) class SubTestData0: """Subclass for testing nested attrs""" From c76b3d133540aeeaf8a801388874d655cd80b26a Mon Sep 17 00:00:00 2001 From: Maria Grimaldi Date: Mon, 16 Dec 2024 11:18:16 +0100 Subject: [PATCH 08/17] test: add tests for deserialization errors --- .../event_bus/avro/tests/test_deserializer.py | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/openedx_events/event_bus/avro/tests/test_deserializer.py b/openedx_events/event_bus/avro/tests/test_deserializer.py index f1c26bf6..2e93cf6e 100644 --- a/openedx_events/event_bus/avro/tests/test_deserializer.py +++ b/openedx_events/event_bus/avro/tests/test_deserializer.py @@ -2,7 +2,7 @@ import ddt import json from datetime import datetime -from typing import List +from typing import Dict, List from unittest import TestCase from opaque_keys.edx.keys import CourseKey, UsageKey @@ -266,6 +266,38 @@ def test_deserialization_of_list_without_annotation(self): with self.assertRaises(TypeError): deserializer.from_dict(initial_dict) + def test_deserialization_of_dict_with_annotation(self): + """ + Check that deserialization works as expected when dict data is annotated. + """ + DICT_SIGNAL = create_simple_signal({"dict_input": Dict[str, int]}) + initial_dict = {"dict_input": {"key1": 1, "key2": 3}} + + deserializer = AvroSignalDeserializer(DICT_SIGNAL) + event_data = deserializer.from_dict(initial_dict) + expected_event_data = {"key1": 1, "key2": 3} + test_data = event_data["dict_input"] + + self.assertIsInstance(test_data, dict) + self.assertEqual(test_data, expected_event_data) + + def test_deserialization_of_dict_without_annotation(self): + """ + Check that deserialization raises error when dict data is not annotated. + + Create dummy signal to bypass schema check while initializing deserializer. Then, + update signal with incomplete type info to test whether correct exceptions are raised while deserializing data. + """ + SIGNAL = create_simple_signal({"dict_input": Dict[str, int]}) + DICT_SIGNAL = create_simple_signal({"dict_input": Dict}) + initial_dict = {"dict_input": {"key1": 1, "key2": 3}} + + deserializer = AvroSignalDeserializer(SIGNAL) + deserializer.signal = DICT_SIGNAL + + with self.assertRaises(TypeError): + deserializer.from_dict(initial_dict) + def test_deserialization_of_nested_list_fails(self): """ Check that deserialization raises error when nested list data is passed. From ec1fdeca8aca4d9fb24bb2e6e35d4982b28504de Mon Sep 17 00:00:00 2001 From: Maria Grimaldi Date: Mon, 16 Dec 2024 11:47:04 +0100 Subject: [PATCH 09/17] test: add test cases for schema generation with dict types --- .../event_bus/avro/tests/test_schema.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/openedx_events/event_bus/avro/tests/test_schema.py b/openedx_events/event_bus/avro/tests/test_schema.py index 10793878..a3410643 100644 --- a/openedx_events/event_bus/avro/tests/test_schema.py +++ b/openedx_events/event_bus/avro/tests/test_schema.py @@ -260,6 +260,11 @@ def test_throw_exception_to_list_or_dict_types_without_annotation(self): with self.assertRaises(TypeError): schema_from_signal(DICT_WITHOUT_ANNOTATION_SIGNAL) + def test_throw_exception_invalid_dict_annotation(self): + INVALID_DICT_SIGNAL = create_simple_signal({"dict_input": Dict[str, NestedAttrsWithDefaults]}) + with self.assertRaises(TypeError): + schema_from_signal(INVALID_DICT_SIGNAL) + def test_list_with_annotation_works(self): LIST_SIGNAL = create_simple_signal({"list_input": List[int]}) expected_dict = { @@ -274,3 +279,18 @@ def test_list_with_annotation_works(self): } schema = schema_from_signal(LIST_SIGNAL) self.assertDictEqual(schema, expected_dict) + + def test_dict_with_annotation_works(self): + DICT_SIGNAL = create_simple_signal({"dict_input": Dict[str, int]}) + expected_dict = { + 'name': 'CloudEvent', + 'type': 'record', + 'doc': 'Avro Event Format for CloudEvents created with openedx_events/schema', + 'namespace': 'simple.signal', + 'fields': [{ + 'name': 'dict_input', + 'type': {'type': 'map', 'values': 'long'}, + }], + } + schema = schema_from_signal(DICT_SIGNAL) + self.assertDictEqual(schema, expected_dict) From 2d225b93eb8e4fb7927da5ea6b99c4ead37c8de5 Mon Sep 17 00:00:00 2001 From: Maria Grimaldi Date: Mon, 16 Dec 2024 11:56:11 +0100 Subject: [PATCH 10/17] fix: address quality issues --- openedx_events/event_bus/avro/tests/test_deserializer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openedx_events/event_bus/avro/tests/test_deserializer.py b/openedx_events/event_bus/avro/tests/test_deserializer.py index 2e93cf6e..9e3437f1 100644 --- a/openedx_events/event_bus/avro/tests/test_deserializer.py +++ b/openedx_events/event_bus/avro/tests/test_deserializer.py @@ -1,15 +1,16 @@ """Tests for avro.deserializer""" -import ddt import json from datetime import datetime from typing import Dict, List from unittest import TestCase +import ddt from opaque_keys.edx.keys import CourseKey, UsageKey from opaque_keys.edx.locator import LibraryLocatorV2, LibraryUsageLocatorV2 from openedx_events.event_bus.avro.deserializer import AvroSignalDeserializer, deserialize_bytes_to_event_data from openedx_events.event_bus.avro.tests.test_utilities import ( + ComplexAttrs, EventData, NestedAttrsWithDefaults, NestedNonAttrs, @@ -20,7 +21,6 @@ SubTestData0, SubTestData1, create_simple_signal, - ComplexAttrs ) from openedx_events.tests.utils import FreezeSignalCacheMixin From 2839ae79ab73d6fb07796e2ef01b4a7bef0ef7b1 Mon Sep 17 00:00:00 2001 From: Maria Grimaldi Date: Wed, 8 Jan 2025 10:35:20 +0100 Subject: [PATCH 11/17] refactor: rewrite comment with suggestion --- openedx_events/event_bus/avro/schema.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openedx_events/event_bus/avro/schema.py b/openedx_events/event_bus/avro/schema.py index ae178857..3da601b5 100644 --- a/openedx_events/event_bus/avro/schema.py +++ b/openedx_events/event_bus/avro/schema.py @@ -69,8 +69,8 @@ def _create_avro_field_definition(data_key, data_type, previously_seen_types, avro_type = PYTHON_TYPE_TO_AVRO_MAPPING[data_type] field["type"] = avro_type elif data_type_origin == list: - # returns types of list contents - # if data_type == List[int], arg_data_type = (int,) + # Returns types of list contents. + # Example: if data_type == List[int], arg_data_type = (int,) arg_data_type = get_args(data_type) if not arg_data_type: raise TypeError( @@ -84,8 +84,8 @@ def _create_avro_field_definition(data_key, data_type, previously_seen_types, ) field["type"] = {"type": PYTHON_TYPE_TO_AVRO_MAPPING[data_type_origin], "items": avro_type} elif data_type_origin == dict: - # returns types of dict contents - # if data_type == Dict[str, int], arg_data_type = (str, int) + # Returns types of dict contents. + # Example: if data_type == Dict[str, int], arg_data_type = (str, int) arg_data_type = get_args(data_type) if not arg_data_type: raise TypeError( From 4389cfb52be72220dc3d7d1a7c5d4a64795ed5e1 Mon Sep 17 00:00:00 2001 From: Maria Grimaldi Date: Wed, 8 Jan 2025 10:56:48 +0100 Subject: [PATCH 12/17] refactor: include map type instead of replacing record --- openedx_events/event_bus/avro/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openedx_events/event_bus/avro/schema.py b/openedx_events/event_bus/avro/schema.py index 3da601b5..e46aded3 100644 --- a/openedx_events/event_bus/avro/schema.py +++ b/openedx_events/event_bus/avro/schema.py @@ -63,7 +63,7 @@ def _create_avro_field_definition(data_key, data_type, previously_seen_types, field["type"] = field_type # Case 2: data_type is a simple type that can be converted directly to an Avro type elif data_type in PYTHON_TYPE_TO_AVRO_MAPPING: - if PYTHON_TYPE_TO_AVRO_MAPPING[data_type] in ["map", "array"]: + if PYTHON_TYPE_TO_AVRO_MAPPING[data_type] in ["record", "map", "array"]: # pylint: disable-next=broad-exception-raised raise Exception("Unable to generate Avro schema for dict or array fields without annotation types.") avro_type = PYTHON_TYPE_TO_AVRO_MAPPING[data_type] From 8f67ea2d48819cb5d2a817f0df8c3301dfbbf97e Mon Sep 17 00:00:00 2001 From: Maria Grimaldi Date: Wed, 8 Jan 2025 11:08:32 +0100 Subject: [PATCH 13/17] refactor: drop changes in forum events --- openedx_events/learning/data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openedx_events/learning/data.py b/openedx_events/learning/data.py index 06624b73..962dda35 100644 --- a/openedx_events/learning/data.py +++ b/openedx_events/learning/data.py @@ -422,10 +422,10 @@ class DiscussionThreadData: url = attr.ib(type=str) user = attr.ib(type=UserData) course_id = attr.ib(type=CourseKey) - discussion = attr.ib(type=dict[str, str], factory=dict) + discussion = attr.ib(type=dict, factory=dict) user_course_roles = attr.ib(type=List[str], factory=list) user_forums_roles = attr.ib(type=List[str], factory=list) - options = attr.ib(type=dict[str, str], factory=dict) + options = attr.ib(type=dict, factory=dict) @attr.s(frozen=True) From 9ee35f268c8281de3b81a1c10bb725e3c50e8d17 Mon Sep 17 00:00:00 2001 From: Maria Grimaldi Date: Wed, 8 Jan 2025 11:11:09 +0100 Subject: [PATCH 14/17] refactor: drop changes in forum events --- openedx_events/tooling.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openedx_events/tooling.py b/openedx_events/tooling.py index 1beb55a0..1ca53306 100644 --- a/openedx_events/tooling.py +++ b/openedx_events/tooling.py @@ -24,6 +24,9 @@ "org.openedx.content_authoring.course.certificate_config.changed.v1", "org.openedx.content_authoring.course.certificate_config.deleted.v1", "org.openedx.learning.user.notification.requested.v1", + "org.openedx.learning.thread.created.v1", + "org.openedx.learning.response.created.v1", + "org.openedx.learning.comment.created.v1", "org.openedx.learning.course.notification.requested.v1", "org.openedx.learning.ora.submission.created.v1", ] From a32ac768aca68f528d8d50fad0a39182d8973c0f Mon Sep 17 00:00:00 2001 From: Maria Grimaldi Date: Fri, 10 Jan 2025 11:12:53 +0100 Subject: [PATCH 15/17] refactor: address PR reviews --- openedx_events/event_bus/avro/deserializer.py | 12 ++++++------ openedx_events/event_bus/avro/schema.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/openedx_events/event_bus/avro/deserializer.py b/openedx_events/event_bus/avro/deserializer.py index 16fe21b5..43e303c6 100644 --- a/openedx_events/event_bus/avro/deserializer.py +++ b/openedx_events/event_bus/avro/deserializer.py @@ -41,25 +41,25 @@ def _deserialized_avro_record_dict_to_object(data: dict, data_type, deserializer elif data_type in PYTHON_TYPE_TO_AVRO_MAPPING: return data elif data_type_origin == list: - # returns types of list contents - # if data_type == List[int], arg_data_type = (int,) + # Returns types of list contents. + # Example: if data_type == List[int], arg_data_type = (int,) arg_data_type = get_args(data_type) if not arg_data_type: raise TypeError( "List without annotation type is not supported. The argument should be a type, for eg., List[int]" ) - # check whether list items type is in basic types. + # Check whether list items type is in basic types. if arg_data_type[0] in SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING: return data elif data_type_origin == dict: - # returns types of dict contents - # if data_type == Dict[str, int], arg_data_type = (str, int) + # Returns types of dict contents. + # Example: if data_type == Dict[str, int], arg_data_type = (str, int) arg_data_type = get_args(data_type) if not arg_data_type: raise TypeError( "Dict without annotation type is not supported. The argument should be a type, for eg., Dict[str, int]" ) - # check whether dict items type is in basic types. + # Check whether dict items type is in basic types. if arg_data_type[1] in SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING: return data elif hasattr(data_type, "__attrs_attrs__"): diff --git a/openedx_events/event_bus/avro/schema.py b/openedx_events/event_bus/avro/schema.py index e46aded3..3da601b5 100644 --- a/openedx_events/event_bus/avro/schema.py +++ b/openedx_events/event_bus/avro/schema.py @@ -63,7 +63,7 @@ def _create_avro_field_definition(data_key, data_type, previously_seen_types, field["type"] = field_type # Case 2: data_type is a simple type that can be converted directly to an Avro type elif data_type in PYTHON_TYPE_TO_AVRO_MAPPING: - if PYTHON_TYPE_TO_AVRO_MAPPING[data_type] in ["record", "map", "array"]: + if PYTHON_TYPE_TO_AVRO_MAPPING[data_type] in ["map", "array"]: # pylint: disable-next=broad-exception-raised raise Exception("Unable to generate Avro schema for dict or array fields without annotation types.") avro_type = PYTHON_TYPE_TO_AVRO_MAPPING[data_type] From 2980a9391f7ea84f4cef272730ba1dd17e429413 Mon Sep 17 00:00:00 2001 From: Maria Grimaldi Date: Fri, 10 Jan 2025 11:26:16 +0100 Subject: [PATCH 16/17] test: add missing test for de-serializing complex types --- .../event_bus/avro/tests/test_deserializer.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/openedx_events/event_bus/avro/tests/test_deserializer.py b/openedx_events/event_bus/avro/tests/test_deserializer.py index 9e3437f1..b2398022 100644 --- a/openedx_events/event_bus/avro/tests/test_deserializer.py +++ b/openedx_events/event_bus/avro/tests/test_deserializer.py @@ -298,6 +298,20 @@ def test_deserialization_of_dict_without_annotation(self): with self.assertRaises(TypeError): deserializer.from_dict(initial_dict) + def test_deserialization_of_dict_with_complex_types_fails(self): + SIGNAL = create_simple_signal({"dict_input": Dict[str, list]}) + with self.assertRaises(TypeError): + AvroSignalDeserializer(SIGNAL) + initial_dict = {"dict_input": {"key1": [1, 3], "key2": [4, 5]}} + # create dummy signal to bypass schema check while initializing deserializer + # This allows us to test whether correct exceptions are raised while deserializing data + DUMMY_SIGNAL = create_simple_signal({"dict_input": Dict[str, int]}) + deserializer = AvroSignalDeserializer(DUMMY_SIGNAL) + # Update signal with incorrect type info + deserializer.signal = SIGNAL + with self.assertRaises(TypeError): + deserializer.from_dict(initial_dict) + def test_deserialization_of_nested_list_fails(self): """ Check that deserialization raises error when nested list data is passed. From 7eabdaa2304e4b143a86e88db7619c20f50a7f22 Mon Sep 17 00:00:00 2001 From: Maria Grimaldi Date: Fri, 10 Jan 2025 12:15:56 +0100 Subject: [PATCH 17/17] refactor: check for keys and values simple types while de-serializing --- openedx_events/event_bus/avro/deserializer.py | 2 +- openedx_events/event_bus/avro/tests/test_deserializer.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/openedx_events/event_bus/avro/deserializer.py b/openedx_events/event_bus/avro/deserializer.py index 43e303c6..65767ee3 100644 --- a/openedx_events/event_bus/avro/deserializer.py +++ b/openedx_events/event_bus/avro/deserializer.py @@ -60,7 +60,7 @@ def _deserialized_avro_record_dict_to_object(data: dict, data_type, deserializer "Dict without annotation type is not supported. The argument should be a type, for eg., Dict[str, int]" ) # Check whether dict items type is in basic types. - if arg_data_type[1] in SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING: + if all(arg in SIMPLE_PYTHON_TYPE_TO_AVRO_MAPPING for arg in arg_data_type): return data elif hasattr(data_type, "__attrs_attrs__"): transformed = {} diff --git a/openedx_events/event_bus/avro/tests/test_deserializer.py b/openedx_events/event_bus/avro/tests/test_deserializer.py index b2398022..f8522b03 100644 --- a/openedx_events/event_bus/avro/tests/test_deserializer.py +++ b/openedx_events/event_bus/avro/tests/test_deserializer.py @@ -312,6 +312,13 @@ def test_deserialization_of_dict_with_complex_types_fails(self): with self.assertRaises(TypeError): deserializer.from_dict(initial_dict) + def test_deserialization_of_dicts_with_keys_of_complex_types_fails(self): + SIGNAL = create_simple_signal({"dict_input": Dict[CourseKey, int]}) + deserializer = AvroSignalDeserializer(SIGNAL) + initial_dict = {"dict_input": {CourseKey.from_string("course-v1:edX+DemoX.1+2014"): 1}} + with self.assertRaises(TypeError): + deserializer.from_dict(initial_dict) + def test_deserialization_of_nested_list_fails(self): """ Check that deserialization raises error when nested list data is passed.