diff --git a/rdmo/core/tests/utils.py b/rdmo/core/tests/utils.py index 3bf4db8c4..1fce8f674 100644 --- a/rdmo/core/tests/utils.py +++ b/rdmo/core/tests/utils.py @@ -1,3 +1,5 @@ +import hashlib + from rdmo.core.models import Model from rdmo.core.tests.constants import multisite_status_map, status_map_object_permissions @@ -30,3 +32,7 @@ def get_obj_perms_status_code(instance, username, method): except KeyError: # not all users are defined in the method_instance_perms_map return multisite_status_map[method][username] + + +def compute_checksum(string): + return hashlib.sha1(string).hexdigest() diff --git a/rdmo/projects/tests/test_utils.py b/rdmo/projects/tests/test_utils.py index cbb2f4af0..db2bfe2c5 100644 --- a/rdmo/projects/tests/test_utils.py +++ b/rdmo/projects/tests/test_utils.py @@ -4,6 +4,8 @@ from django.contrib.sites.models import Site from django.http import QueryDict +from rdmo.core.tests.utils import compute_checksum + from ..filters import ProjectFilter from ..models import Project from ..utils import copy_project, set_context_querystring_with_filter_and_page @@ -96,11 +98,14 @@ def test_copy_project(db, files): assert getattr(value_copy, field) == getattr(value, field), field if value_copy.file: + assert value_copy.file.path != value.file.path assert value_copy.file.path == value_copy.file.path.replace( f'/projects/{project.id}/values/{value.id}/', f'/projects/{project_copy.id}/values/{value_copy.id}/' ) - assert value_copy.file.size == value_copy.file.size + assert value_copy.file.size == value.file.size + assert compute_checksum(value_copy.file.open('rb').read()) == \ + compute_checksum(value.file.open('rb').read()) else: assert not value.file @@ -113,10 +118,13 @@ def test_copy_project(db, files): assert getattr(value_copy, field) == getattr(value, field) if value_copy.file: + assert value_copy.file.path != value.file.path assert value_copy.file.path == value_copy.file.path.replace( f'/projects/{project.id}/snapshot/{snapshot.id}/values/{value.id}/', f'/projects/{project_copy.id}/snapshot/{snapshot.id}/values/{value_copy.id}/' ) - assert value_copy.file.open('rb').read() == value_copy.file.open('rb').read() + assert value_copy.file.size == value.file.size + assert compute_checksum(value_copy.file.open('rb').read()) == \ + compute_checksum(value.file.open('rb').read()) else: assert not value.file diff --git a/rdmo/projects/utils.py b/rdmo/projects/utils.py index f9a4ec2c0..ab619b54b 100644 --- a/rdmo/projects/utils.py +++ b/rdmo/projects/utils.py @@ -3,7 +3,6 @@ from django.conf import settings from django.contrib.sites.models import Site -from django.db import connection from django.template.loader import render_to_string from django.urls import reverse from django.utils.timezone import now @@ -54,9 +53,6 @@ def copy_project(project, site, owners): for snapshot in project.snapshots.all() } - # create a temporary buffer for all values with files - file_values = [] - # unset the id, set current site and update timestamps project.id = None project.site = site @@ -81,9 +77,12 @@ def copy_project(project, site, owners): value.created = timestamp if value.file: - file_values.append((value, value.file_name, value.file)) - - project_values.append(value) + # file values cannot be bulk created since we need their id and only postgres provides that (reliably) + # https://docs.djangoproject.com/en/4.2/ref/models/querysets/#bulk-create + value.save() + value.copy_file(value.file_name, value.file) + else: + project_values.append(value) # insert the new values using bulk_create Value.objects.bulk_create(project_values) @@ -103,32 +102,14 @@ def copy_project(project, site, owners): value.created = timestamp if value.file: - file_values.append((value, value.file_name, value.file)) - - project_snapshot_values.append(value) + value.save() + value.copy_file(value.file_name, value.file) + else: + project_snapshot_values.append(value) # insert the new snapshot values using bulk_create Value.objects.bulk_create(project_snapshot_values) - for value, file_name, file_content in file_values: - if connection.vendor == 'postgres': - # bulk_create will only set the primary key on cool databases - # https://docs.djangoproject.com/en/4.2/ref/models/querysets/#bulk-create - value.copy_file(file_name, file_content) - else: - # refetch the value from the database, we use filter and first here to be more - # stable against weird cases, where collection_index is not unique - db_value = Value.objects.filter( - project=value.project, - snapshot=value.snapshot, - attribute=value.attribute, - set_prefix=value.set_prefix, - set_index=value.set_index, - collection_index=value.collection_index - ).first() - if db_value and db_value.file_name == file_name: - db_value.copy_file(file_name, file_content) - for owner in owners: membership = Membership(project=project, user=owner, role='owner') membership.save()