From 216d2de31697b1a26f69b5eda5abd5d6e0829f75 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Fri, 27 Jan 2023 08:34:58 -0800 Subject: [PATCH 01/11] [#3931] Fix regression in node diff endpoint, add tests and reorganize existing --- .../contentcuration/tests/test_asynctask.py | 2 +- .../test_nodes.py} | 43 +++++++++++++------ .../contentcuration/utils/celery/tasks.py | 10 ++--- .../contentcuration/views/nodes.py | 3 +- 4 files changed, 37 insertions(+), 21 deletions(-) rename contentcuration/contentcuration/tests/{test_node_views.py => views/test_nodes.py} (51%) diff --git a/contentcuration/contentcuration/tests/test_asynctask.py b/contentcuration/contentcuration/tests/test_asynctask.py index bbd2714f47..92bb3fdb4f 100644 --- a/contentcuration/contentcuration/tests/test_asynctask.py +++ b/contentcuration/contentcuration/tests/test_asynctask.py @@ -234,7 +234,7 @@ def test_fetch_or_enqueue_task__channel_id__uuid_then_hex(self): self.assertEqual(expected_task.task_id, async_result.task_id) def test_requeue_task(self): - signature = requeue_test_task._generate_signature({}) + signature = requeue_test_task.generate_signature({}) existing_task_ids = requeue_test_task.find_ids(signature) self.assertEqual(len(existing_task_ids), 0) diff --git a/contentcuration/contentcuration/tests/test_node_views.py b/contentcuration/contentcuration/tests/views/test_nodes.py similarity index 51% rename from contentcuration/contentcuration/tests/test_node_views.py rename to contentcuration/contentcuration/tests/views/test_nodes.py index 922675c721..2367263275 100644 --- a/contentcuration/contentcuration/tests/test_node_views.py +++ b/contentcuration/contentcuration/tests/views/test_nodes.py @@ -1,22 +1,39 @@ -from __future__ import absolute_import - import datetime import json import pytz from django.conf import settings from django.core.cache import cache +from django.urls import reverse +from mock import Mock from mock import patch -from rest_framework.reverse import reverse -from .base import BaseAPITestCase -from .testdata import tree -from contentcuration.models import Channel +from contentcuration.tasks import generatenodediff_task +from contentcuration.tests.base import BaseAPITestCase + + +class NodesViewsTestCase(BaseAPITestCase): + def test_get_node_diff__missing_contentnode(self): + response = self.get(reverse("get_node_diff", kwargs=dict(updated_id="abc123", original_id="def456"))) + self.assertEqual(response.status_code, 404) + + def test_get_node_diff__no_task_processing(self): + pk = self.channel.main_tree.pk + response = self.get(reverse("get_node_diff", kwargs=dict(updated_id=pk, original_id=pk))) + self.assertEqual(response.status_code, 404) + + @patch.object(generatenodediff_task, 'find_incomplete_ids') + def test_get_node_diff__task_processing(self, mock_find_incomplete_ids): + qs = Mock(spec="django.db.models.query.QuerySet") + mock_find_incomplete_ids.return_value = qs() + mock_find_incomplete_ids.return_value.exists.return_value = True + pk = self.channel.main_tree.pk + response = self.get(reverse("get_node_diff", kwargs=dict(updated_id=pk, original_id=pk))) + self.assertEqual(response.status_code, 302) -class NodeViewsUtilityTestCase(BaseAPITestCase): def test_get_channel_details(self): - url = reverse('get_channel_details', [self.channel.id]) + url = reverse('get_channel_details', kwargs={"channel_id": self.channel.id}) response = self.get(url) details = json.loads(response.content) @@ -33,13 +50,13 @@ def test_get_channel_details_cached(self): cache.set(cache_key, json.dumps(data)) with patch("contentcuration.views.nodes.getnodedetails_task") as task_mock: - url = reverse('get_channel_details', [self.channel.id]) + url = reverse('get_channel_details', kwargs={"channel_id": self.channel.id}) self.get(url) # Check that the outdated cache prompts an asynchronous cache update task_mock.enqueue.assert_called_once_with(self.user, node_id=self.channel.main_tree.id) -class GetTopicDetailsEndpointTestCase(BaseAPITestCase): +class ChannelDetailsEndpointTestCase(BaseAPITestCase): def test_200_post(self): response = self.get( reverse("get_channel_details", kwargs={"channel_id": self.channel.id}) @@ -47,10 +64,8 @@ def test_200_post(self): self.assertEqual(response.status_code, 200) def test_404_no_permission(self): - new_channel = Channel.objects.create() - new_channel.main_tree = tree() - new_channel.save() + self.channel.editors.remove(self.user) response = self.get( - reverse("get_channel_details", kwargs={"channel_id": new_channel.id}), + reverse("get_channel_details", kwargs={"channel_id": self.channel.id}), ) self.assertEqual(response.status_code, 404) diff --git a/contentcuration/contentcuration/utils/celery/tasks.py b/contentcuration/contentcuration/utils/celery/tasks.py index 5b55d83bd9..3ff3a9bfe8 100644 --- a/contentcuration/contentcuration/utils/celery/tasks.py +++ b/contentcuration/contentcuration/utils/celery/tasks.py @@ -139,7 +139,7 @@ def _prepare_kwargs(self, kwargs): for key, value in kwargs.items() ) - def _generate_signature(self, kwargs): + def generate_signature(self, kwargs): """ :param kwargs: A dictionary of task kwargs :return: An hex string representing an md5 hash of task metadata @@ -207,7 +207,7 @@ def enqueue(self, user, **kwargs): signature = kwargs.pop('signature', None) if signature is None: - signature = self._generate_signature(kwargs) + signature = self.generate_signature(kwargs) task_id = uuid.uuid4().hex prepared_kwargs = self._prepare_kwargs(kwargs) @@ -249,7 +249,7 @@ def fetch_or_enqueue(self, user, **kwargs): if self.app.conf.task_always_eager: return self.enqueue(user, **kwargs) - signature = self._generate_signature(kwargs) + signature = self.generate_signature(kwargs) # create an advisory lock to obtain exclusive control on preventing task duplicates with self._lock_signature(signature): @@ -278,7 +278,7 @@ def requeue(self, **kwargs): task_result = get_task_model(self, request.id) task_kwargs = request.kwargs.copy() task_kwargs.update(kwargs) - signature = self._generate_signature(kwargs) + signature = self.generate_signature(kwargs) logging.info(f"Re-queuing task {self.name} for user {task_result.user.pk} from {request.id} | {signature}") return self.enqueue(task_result.user, signature=signature, **task_kwargs) @@ -289,7 +289,7 @@ def revoke(self, exclude_task_ids=None, **kwargs): :param kwargs: Task keyword arguments that will be used to match against tasks :return: The number of tasks revoked """ - signature = self._generate_signature(kwargs) + signature = self.generate_signature(kwargs) task_ids = self.find_incomplete_ids(signature) if exclude_task_ids is not None: diff --git a/contentcuration/contentcuration/views/nodes.py b/contentcuration/contentcuration/views/nodes.py index 0d83b29dd4..a1e924c0c7 100644 --- a/contentcuration/contentcuration/views/nodes.py +++ b/contentcuration/contentcuration/views/nodes.py @@ -98,8 +98,9 @@ def get_node_diff(request, updated_id, original_id): if data: return Response(data) + signature = generatenodediff_task.generate_signature(dict(updated_id=updated_id, original_id=original_id)) # See if there's already a staging task in progress - if generatenodediff_task.find_incomplete_ids(updated_id=updated_id, original_id=original_id).exists(): + if generatenodediff_task.find_incomplete_ids(signature).exists(): return Response('Diff is being generated', status=status.HTTP_302_FOUND) except ContentNode.DoesNotExist: pass From 0f39b3315910be040fc0518f9410a9479ae74420 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Fri, 27 Jan 2023 08:38:25 -0800 Subject: [PATCH 02/11] [#3921] Remove signature insurance on task result model --- contentcuration/contentcuration/models.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index 6aa8ca4eef..f9f5852f31 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -75,7 +75,6 @@ from contentcuration.db.models.manager import CustomManager from contentcuration.statistics import record_channel_stats from contentcuration.utils.cache import delete_public_channel_cache_keys -from contentcuration.utils.celery.tasks import generate_task_signature from contentcuration.utils.parser import load_json_string from contentcuration.viewsets.sync.constants import ALL_CHANGES from contentcuration.viewsets.sync.constants import ALL_TABLES @@ -2451,7 +2450,6 @@ class TaskResultCustom(object): signature = models.CharField(null=True, blank=False, max_length=32) super_as_dict = TaskResult.as_dict - super_save = TaskResult.save def as_dict(self): """ @@ -2465,22 +2463,6 @@ def as_dict(self): ) return super_dict - def set_signature(self): - """ - Generates and sets the signature for the task if it isn't set - """ - if self.signature is not None: - # nothing to do - return - self.signature = generate_task_signature(self.task_name, task_kwargs=self.task_kwargs, channel_id=self.channel_id) - - def save(self, *args, **kwargs): - """ - Override save to ensure signature is generated - """ - self.set_signature() - return self.super_save(*args, **kwargs) - @classmethod def contribute_to_class(cls, model_class=TaskResult): """ From 2bc958bf31c536ca68d2aba6419a7165f9f4117e Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Fri, 27 Jan 2023 08:50:30 -0800 Subject: [PATCH 03/11] [#3836] Retry saving of task model --- .../contentcuration/utils/celery/tasks.py | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/contentcuration/contentcuration/utils/celery/tasks.py b/contentcuration/contentcuration/utils/celery/tasks.py index 3ff3a9bfe8..1045be52f2 100644 --- a/contentcuration/contentcuration/utils/celery/tasks.py +++ b/contentcuration/contentcuration/utils/celery/tasks.py @@ -10,6 +10,7 @@ from celery.app.task import Task from celery.result import AsyncResult from django.db import transaction +from django.db.utils import IntegrityError from contentcuration.constants.locking import TASK_LOCK from contentcuration.db.advisory_lock import advisory_lock @@ -224,14 +225,24 @@ def enqueue(self, user, **kwargs): # ensure the result is saved to the backend (database) self.backend.add_pending_result(async_result) - # after calling apply, we should have task result model, so get it and set our custom fields - task_result = get_task_model(self, task_id) - task_result.task_name = self.name - task_result.task_kwargs = self.backend.encode(prepared_kwargs) - task_result.user = user - task_result.channel_id = channel_id - task_result.signature = signature - task_result.save() + saved = False + tries = 0 + while not saved: + # after calling apply, we should ideally have a task result model saved to the DB, but it relies on celery's + # event consumption, and we might try to retrieve it before it has actually saved, so we retry + try: + task_result = get_task_model(self, task_id) + task_result.task_name = self.name + task_result.task_kwargs = self.backend.encode(prepared_kwargs) + task_result.user = user + task_result.channel_id = channel_id + task_result.signature = signature + task_result.save() + saved = True + except IntegrityError as e: + tries += 1 + if tries > 3: + raise e return async_result def fetch_or_enqueue(self, user, **kwargs): From 6c290209e5c911e48d01194a185a8ee9739bd924 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Fri, 27 Jan 2023 08:51:51 -0800 Subject: [PATCH 04/11] [#3925] Ensure debounce sync returns promise, with some delay for logging out --- .../contentcuration/frontend/shared/data/serverSync.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/contentcuration/contentcuration/frontend/shared/data/serverSync.js b/contentcuration/contentcuration/frontend/shared/data/serverSync.js index 68b0e7f02b..ad814f9349 100644 --- a/contentcuration/contentcuration/frontend/shared/data/serverSync.js +++ b/contentcuration/contentcuration/frontend/shared/data/serverSync.js @@ -331,6 +331,8 @@ const debouncedSyncChanges = debounce(() => { if (!syncActive) { return syncChanges(); } + // TODO: actually return promise that resolves when active sync completes + return new Promise(resolve => setTimeout(resolve, 1000)); }, SYNC_IF_NO_CHANGES_FOR * 1000); if (process.env.NODE_ENV !== 'production' && typeof window !== 'undefined') { @@ -391,6 +393,9 @@ export function stopSyncing() { db.on('changes').unsubscribe(handleChanges); } +/** + * @return {Promise} + */ export function forceServerSync() { debouncedSyncChanges(); return debouncedSyncChanges.flush(); From 012094d5242a8978ac459c070734ecb131c28d30 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Wed, 19 Apr 2023 23:11:17 +0530 Subject: [PATCH 05/11] Defensive file duration check --- contentcuration/contentcuration/viewsets/file.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/contentcuration/contentcuration/viewsets/file.py b/contentcuration/contentcuration/viewsets/file.py index 132bfe0769..922d6f276d 100644 --- a/contentcuration/contentcuration/viewsets/file.py +++ b/contentcuration/contentcuration/viewsets/file.py @@ -148,6 +148,8 @@ def upload_url(self, request): if not isinstance(duration, (int, float)): return HttpResponseBadRequest(reason="File duration must be a number") duration = math.floor(duration) + if duration <= 0: + return HttpResponseBadRequest(reason="File duration is equal to or less than 0") try: request.user.check_space(float(size), checksum) From 87b522edc3cdfcc3eae182e9cfecba0f21077a89 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Wed, 26 Apr 2023 11:54:54 -0700 Subject: [PATCH 06/11] Add dependency types to MEDIA_PRESETS --- contentcuration/contentcuration/models.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index f9f5852f31..0baabfb7ce 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -2076,7 +2076,13 @@ class StagedFile(models.Model): FILE_DISTINCT_INDEX_NAME = "file_checksum_file_size_idx" FILE_MODIFIED_DESC_INDEX_NAME = "file_modified_desc_idx" FILE_DURATION_CONSTRAINT = "file_media_duration_int" -MEDIA_PRESETS = [format_presets.AUDIO, format_presets.VIDEO_HIGH_RES, format_presets.VIDEO_LOW_RES] +MEDIA_PRESETS = [ + format_presets.AUDIO, + format_presets.AUDIO_DEPENDENCY, + format_presets.VIDEO_HIGH_RES, + format_presets.VIDEO_LOW_RES, + format_presets.VIDEO_DEPENDENCY, +] class File(models.Model): From 91cf944ad68fbcf56eef503175027add6c36ccc8 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Wed, 26 Apr 2023 11:57:36 -0700 Subject: [PATCH 07/11] Temporarily remove check constraint --- .../0142_remove_file_file_media_duration_int.py | 16 ++++++++++++++++ contentcuration/contentcuration/models.py | 4 +++- 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 contentcuration/contentcuration/migrations/0142_remove_file_file_media_duration_int.py diff --git a/contentcuration/contentcuration/migrations/0142_remove_file_file_media_duration_int.py b/contentcuration/contentcuration/migrations/0142_remove_file_file_media_duration_int.py new file mode 100644 index 0000000000..e497fbd398 --- /dev/null +++ b/contentcuration/contentcuration/migrations/0142_remove_file_file_media_duration_int.py @@ -0,0 +1,16 @@ +# Generated by Django 3.2.18 on 2023-04-26 18:55 +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('contentcuration', '0141_add_task_signature'), + ] + + operations = [ + migrations.RemoveConstraint( + model_name='file', + name='file_media_duration_int', + ), + ] diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index 0baabfb7ce..8bf951e5a3 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -2219,7 +2219,9 @@ class Meta: models.Index(fields=["-modified"], name=FILE_MODIFIED_DESC_INDEX_NAME), ] constraints = [ - models.CheckConstraint(check=(Q(preset__in=MEDIA_PRESETS, duration__gt=0) | Q(duration__isnull=True)), name=FILE_DURATION_CONSTRAINT) + # Commented out to temporarily remove the constraint until we can re-run `set_file_duration` + # on all media files with the dependency preset in the database. + # models.CheckConstraint(check=(Q(preset__in=MEDIA_PRESETS, duration__gt=0) | Q(duration__isnull=True)), name=FILE_DURATION_CONSTRAINT) ] From 9eebeff174bdb8c3e687dce3d9bd6dbae667135c Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Wed, 12 Apr 2023 10:02:14 -0700 Subject: [PATCH 08/11] Remove deprecated and no longer used codecov library. --- requirements-dev.in | 1 - requirements-dev.txt | 4 ---- 2 files changed, 5 deletions(-) diff --git a/requirements-dev.in b/requirements-dev.in index 6c3b1033f8..b2bfb0931c 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -14,7 +14,6 @@ pytest-pythonpath pytest-timeout pytest-watch pre-commit==1.15.1 -codecov coverage pytest-cov nodeenv diff --git a/requirements-dev.txt b/requirements-dev.txt index 383ee77801..07d875e98f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -35,8 +35,6 @@ click==8.1.3 # -c requirements.txt # flask # pip-tools -codecov==2.1.12 - # via -r requirements-dev.in colorama==0.4.4 # via pytest-watch configargparse==1.5.3 @@ -50,7 +48,6 @@ coreschema==0.0.4 coverage[toml]==6.2 # via # -r requirements-dev.in - # codecov # pytest-cov customizable-django-profiler @ git+https://github.com/someshchaturvedi/customizable-django-profiler.git # via -r requirements-dev.in @@ -220,7 +217,6 @@ pyzmq==23.1.0 requests==2.25.1 # via # -c requirements.txt - # codecov # coreapi # locust roundrobin==0.0.2 From 3cc1292ee4db3d4c11d04531aa144fc304148e89 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Wed, 26 Apr 2023 12:56:53 -0700 Subject: [PATCH 09/11] Skip tests for now --- contentcuration/contentcuration/tests/test_models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/contentcuration/contentcuration/tests/test_models.py b/contentcuration/contentcuration/tests/test_models.py index 97d3359ce7..b8dfada4f8 100644 --- a/contentcuration/contentcuration/tests/test_models.py +++ b/contentcuration/contentcuration/tests/test_models.py @@ -651,6 +651,7 @@ def test_duration_check_constraint__acceptable(self): duration=1123123, ) + @pytest.mark.skip(reason="Temporarily removed constraint") def test_duration_check_constraint__negative(self): channel = testdata.channel() with self.assertRaises(IntegrityError, msg=FILE_DURATION_CONSTRAINT): @@ -660,6 +661,7 @@ def test_duration_check_constraint__negative(self): duration=-10, ) + @pytest.mark.skip(reason="Temporarily removed constraint") def test_duration_check_constraint__not_media(self): channel = testdata.channel() with self.assertRaises(IntegrityError, msg=FILE_DURATION_CONSTRAINT): From 2f2c28ef8e88776112ad82380a0596276ae1b335 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Wed, 3 May 2023 08:29:35 -0700 Subject: [PATCH 10/11] Re-add constraint for file-duration --- .../0143_file_file_media_duration_int.py | 17 +++++++++++++++++ contentcuration/contentcuration/models.py | 9 ++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 contentcuration/contentcuration/migrations/0143_file_file_media_duration_int.py diff --git a/contentcuration/contentcuration/migrations/0143_file_file_media_duration_int.py b/contentcuration/contentcuration/migrations/0143_file_file_media_duration_int.py new file mode 100644 index 0000000000..3a7dbae1a0 --- /dev/null +++ b/contentcuration/contentcuration/migrations/0143_file_file_media_duration_int.py @@ -0,0 +1,17 @@ +# Generated by Django 3.2.18 on 2023-05-03 15:28 +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + + dependencies = [ + ('contentcuration', '0142_remove_file_file_media_duration_int'), + ] + + operations = [ + migrations.AddConstraint( + model_name='file', + constraint=models.CheckConstraint(check=models.Q(models.Q(('duration__gt', 0), ('preset__in', ['audio', 'audio_dependency', 'high_res_video', 'low_res_video', 'video_dependency'])), ('duration__isnull', True), _connector='OR'), name='file_media_duration_int'), + ), + ] diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index 8bf951e5a3..133aebcb0c 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -2219,9 +2219,12 @@ class Meta: models.Index(fields=["-modified"], name=FILE_MODIFIED_DESC_INDEX_NAME), ] constraints = [ - # Commented out to temporarily remove the constraint until we can re-run `set_file_duration` - # on all media files with the dependency preset in the database. - # models.CheckConstraint(check=(Q(preset__in=MEDIA_PRESETS, duration__gt=0) | Q(duration__isnull=True)), name=FILE_DURATION_CONSTRAINT) + # enforces that duration is null when not a media preset, but the duration may be null for media presets + # but if not-null, should be greater than 0 + models.CheckConstraint( + check=(Q(preset__in=MEDIA_PRESETS, duration__gt=0) | Q(duration__isnull=True)), + name=FILE_DURATION_CONSTRAINT + ) ] From 5c6183626c8ca088f230901356042cb39e55d50f Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Wed, 3 May 2023 08:30:28 -0700 Subject: [PATCH 11/11] Revert "Skip tests for now" This reverts commit 3cc1292ee4db3d4c11d04531aa144fc304148e89. --- contentcuration/contentcuration/tests/test_models.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/contentcuration/contentcuration/tests/test_models.py b/contentcuration/contentcuration/tests/test_models.py index b8dfada4f8..97d3359ce7 100644 --- a/contentcuration/contentcuration/tests/test_models.py +++ b/contentcuration/contentcuration/tests/test_models.py @@ -651,7 +651,6 @@ def test_duration_check_constraint__acceptable(self): duration=1123123, ) - @pytest.mark.skip(reason="Temporarily removed constraint") def test_duration_check_constraint__negative(self): channel = testdata.channel() with self.assertRaises(IntegrityError, msg=FILE_DURATION_CONSTRAINT): @@ -661,7 +660,6 @@ def test_duration_check_constraint__negative(self): duration=-10, ) - @pytest.mark.skip(reason="Temporarily removed constraint") def test_duration_check_constraint__not_media(self): channel = testdata.channel() with self.assertRaises(IntegrityError, msg=FILE_DURATION_CONSTRAINT):