From 758e08fa7ad6d03509b1e7b474ac640122a357fb Mon Sep 17 00:00:00 2001 From: nwoodward Date: Tue, 10 Sep 2024 15:55:56 -0500 Subject: [PATCH] update BagIt creation processes to include a timestamp for creation, modify and access time metadata in files in the bag --- pom.xml | 22 ++++++++------- .../org/dspace/pack/bagit/BagItAipWriter.java | 27 ++++++++++++++++--- .../dspace/pack/bagit/CollectionPacker.java | 2 ++ .../dspace/pack/bagit/CommunityPacker.java | 5 ++-- .../org/dspace/pack/bagit/ItemPacker.java | 11 ++++++-- .../org/dspace/pack/bagit/SitePacker.java | 2 ++ .../dspace/pack/bagit/BagItAipWriterTest.java | 4 ++- 7 files changed, 55 insertions(+), 18 deletions(-) diff --git a/pom.xml b/pom.xml index fac3f40..e51b602 100644 --- a/pom.xml +++ b/pom.xml @@ -20,7 +20,7 @@ 8.1.0 - 1.1.0 + 1.2.0 4.0.2 4.0.5 @@ -260,21 +260,25 @@ provided - org.slf4j - slf4j-log4j12 - - - log4j - log4j + commons-io + commons-io javax.xml.bind jaxb-api + + log4j + log4j + org.glassfish.jaxb jaxb-runtime + + org.slf4j + slf4j-log4j12 + @@ -319,12 +323,12 @@ org.apache.logging.log4j log4j-api - 2.23.1 + 2.24.0 org.apache.logging.log4j log4j-core - 2.23.1 + 2.24.0 test diff --git a/src/main/java/org/dspace/pack/bagit/BagItAipWriter.java b/src/main/java/org/dspace/pack/bagit/BagItAipWriter.java index 1b86f24..88c5ec1 100644 --- a/src/main/java/org/dspace/pack/bagit/BagItAipWriter.java +++ b/src/main/java/org/dspace/pack/bagit/BagItAipWriter.java @@ -23,9 +23,11 @@ import java.sql.SQLException; import java.util.Collections; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicLong; + import jakarta.xml.bind.JAXBContext; import jakarta.xml.bind.JAXBException; import jakarta.xml.bind.Marshaller; @@ -35,6 +37,7 @@ import org.dspace.authorize.AuthorizeException; import org.dspace.content.Bitstream; import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.content.Item; import org.dspace.content.service.BitstreamService; import org.dspace.core.Context; import org.dspace.core.Utils; @@ -53,7 +56,6 @@ import org.joda.time.LocalDate; import org.joda.time.format.ISODateTimeFormat; -import jakarta.xml.bind.JAXBException; /** * The BagItAipWriter handles the packaging of DSpaceObjects into their respective bags. It processes the metadata and @@ -81,12 +83,14 @@ public class BagItAipWriter { public static final String TEMPLATE_XML = "template-metadata.xml"; private static final String BITSTREAM_PREFIX = "bitstream_"; + protected static final long DEFAULT_MODIFIED_DATE = 1036368000L * 1000; + private final BitstreamService bitstreamService = ContentServiceFactory.getInstance().getBitstreamService(); - // Fields used for book keeping + // Fields used for bookkeeping private final AtomicLong successBytes = new AtomicLong(); private final AtomicLong successFiles = new AtomicLong(); - private final Map checksums = new HashMap<>(); + private final LinkedHashMap checksums = new LinkedHashMap<>(); /** * The context to use @@ -138,6 +142,11 @@ public class BagItAipWriter { */ private List bitstreams; + /** + * Last modified time of DSpace object + */ + private Long lastModifiedTime; + /** * Constructor for a {@link BagItAipWriter}. Takes a minimal set of information needed in order to write an AIP as a * BagIt bag for dspace consumption. @@ -153,6 +162,7 @@ public BagItAipWriter(final Context context, final File directory, final String this.logo = null; this.policies = null; this.metadata = null; + this.lastModifiedTime = DEFAULT_MODIFIED_DATE; this.archFmt = checkNotNull(archFmt); this.directory = checkNotNull(directory); this.properties = checkNotNull(properties); @@ -213,6 +223,15 @@ public BagItAipWriter withBitstreams(final List bitstreams) { return this; } + /** + * @param lastModifiedTime the {@link Item} to use when writing AIP files + * @return the {@link BagItAipWriter} used for creating the aip + */ + public BagItAipWriter withLastModifiedTime(final long lastModifiedTime) { + this.lastModifiedTime = lastModifiedTime; + return this; + } + /** * Create a serialized BagIt bag using the parameters the BagItAipWriter was instantiated with * @@ -354,7 +373,7 @@ public File packageAip() throws IOException, SQLException, AuthorizeException { bag.write(); final BagSerializer serializer = SerializationSupport.serializerFor(archFmt, profile); - final Path serializedBag = serializer.serialize(directory.toPath()); + final Path serializedBag = serializer.serializeWithTimestamp(directory.toPath(), lastModifiedTime); delete(directory); return serializedBag.toFile(); diff --git a/src/main/java/org/dspace/pack/bagit/CollectionPacker.java b/src/main/java/org/dspace/pack/bagit/CollectionPacker.java index a70d898..2affd3b 100644 --- a/src/main/java/org/dspace/pack/bagit/CollectionPacker.java +++ b/src/main/java/org/dspace/pack/bagit/CollectionPacker.java @@ -13,6 +13,7 @@ import static org.dspace.pack.PackerFactory.OBJFILE; import static org.dspace.pack.PackerFactory.OWNER_ID; import static org.dspace.pack.bagit.BagItAipWriter.BAG_AIP; +import static org.dspace.pack.bagit.BagItAipWriter.DEFAULT_MODIFIED_DATE; import static org.dspace.pack.bagit.BagItAipWriter.OBJ_TYPE_COLLECTION; import static org.dspace.pack.bagit.BagItAipWriter.PROPERTIES_DELIMITER; @@ -143,6 +144,7 @@ public File pack(File packDir) throws AuthorizeException, IOException, SQLExcept .withMetadata(metadata) .withItemTemplate(templateMd) .withDSpaceRoles(dSpaceRoles) + .withLastModifiedTime(DEFAULT_MODIFIED_DATE) .packageAip(); } diff --git a/src/main/java/org/dspace/pack/bagit/CommunityPacker.java b/src/main/java/org/dspace/pack/bagit/CommunityPacker.java index c477e32..65c0d27 100644 --- a/src/main/java/org/dspace/pack/bagit/CommunityPacker.java +++ b/src/main/java/org/dspace/pack/bagit/CommunityPacker.java @@ -13,6 +13,7 @@ import static org.dspace.pack.PackerFactory.OBJFILE; import static org.dspace.pack.PackerFactory.OWNER_ID; import static org.dspace.pack.bagit.BagItAipWriter.BAG_AIP; +import static org.dspace.pack.bagit.BagItAipWriter.DEFAULT_MODIFIED_DATE; import static org.dspace.pack.bagit.BagItAipWriter.OBJ_TYPE_COMMUNITY; import static org.dspace.pack.bagit.BagItAipWriter.PROPERTIES_DELIMITER; @@ -49,8 +50,7 @@ * * @author richardrodgers */ -public class CommunityPacker implements Packer -{ +public class CommunityPacker implements Packer { private CommunityService communityService = ContentServiceFactory.getInstance().getCommunityService(); // NB - these values must remain synchronized with DB schema - @@ -123,6 +123,7 @@ public File pack(File packDir) throws AuthorizeException, SQLException, IOExcept .withPolicies(policy) .withMetadata(metadata) .withDSpaceRoles(dSpaceRoles) + .withLastModifiedTime(DEFAULT_MODIFIED_DATE) .packageAip(); } diff --git a/src/main/java/org/dspace/pack/bagit/ItemPacker.java b/src/main/java/org/dspace/pack/bagit/ItemPacker.java index 0315b70..3ae4679 100644 --- a/src/main/java/org/dspace/pack/bagit/ItemPacker.java +++ b/src/main/java/org/dspace/pack/bagit/ItemPacker.java @@ -16,6 +16,7 @@ import static org.dspace.pack.PackerFactory.OWNER_ID; import static org.dspace.pack.PackerFactory.WITHDRAWN; import static org.dspace.pack.bagit.BagItAipWriter.BAG_AIP; +import static org.dspace.pack.bagit.BagItAipWriter.DEFAULT_MODIFIED_DATE; import static org.dspace.pack.bagit.BagItAipWriter.OBJ_TYPE_ITEM; import static org.dspace.pack.bagit.BagItAipWriter.PROPERTIES_DELIMITER; @@ -94,6 +95,12 @@ public File pack(final File packDir) throws AuthorizeException, IOException, SQL objectProperties.add(OBJECT_TYPE + PROPERTIES_DELIMITER + OBJ_TYPE_ITEM); objectProperties.add(OBJECT_ID + PROPERTIES_DELIMITER + item.getHandle()); + // Use item's lastModifiedDate when creating the BagIt bag + long lmTime = item.getLastModified().getTime(); + if (lmTime <= 0) { + lmTime = DEFAULT_MODIFIED_DATE; + } + final StringBuilder linked = new StringBuilder(); for (Collection coll : item.getCollections()) { if (itemService.isOwningCollection(item, coll)) { @@ -120,7 +127,7 @@ public File pack(final File packDir) throws AuthorizeException, IOException, SQL // policy.xml final Policies policy = BagItPolicyUtil.getPolicy(context, item); - // proceed to bundles, in sub-directories, filtering + // proceed to bundles, in subdirectories, filtering final List bitstreams = new ArrayList<>(); for (Bundle bundle : item.getBundles()) { final String bundleName = bundle.getName(); @@ -159,6 +166,7 @@ public File pack(final File packDir) throws AuthorizeException, IOException, SQL .withPolicies(policy) .withMetadata(metadata) .withBitstreams(bitstreams) + .withLastModifiedTime(lmTime) .packageAip(); } @@ -308,5 +316,4 @@ public RefFilter(String filter) url = parts[2]; } } - } diff --git a/src/main/java/org/dspace/pack/bagit/SitePacker.java b/src/main/java/org/dspace/pack/bagit/SitePacker.java index 44cdf37..a79afea 100644 --- a/src/main/java/org/dspace/pack/bagit/SitePacker.java +++ b/src/main/java/org/dspace/pack/bagit/SitePacker.java @@ -12,6 +12,7 @@ import static org.dspace.pack.PackerFactory.OBJECT_TYPE; import static org.dspace.pack.PackerFactory.OBJFILE; import static org.dspace.pack.bagit.BagItAipWriter.BAG_AIP; +import static org.dspace.pack.bagit.BagItAipWriter.DEFAULT_MODIFIED_DATE; import static org.dspace.pack.bagit.BagItAipWriter.PROPERTIES_DELIMITER; import java.io.File; @@ -96,6 +97,7 @@ public File pack(File packDir) throws AuthorizeException, IOException, SQLExcept return new BagItAipWriter(context, packDir, archFmt, properties) .withDSpaceRoles(dSpaceRoles) + .withLastModifiedTime(DEFAULT_MODIFIED_DATE) .packageAip(); } diff --git a/src/test/java/org/dspace/pack/bagit/BagItAipWriterTest.java b/src/test/java/org/dspace/pack/bagit/BagItAipWriterTest.java index 8c086e4..9a6a5cd 100644 --- a/src/test/java/org/dspace/pack/bagit/BagItAipWriterTest.java +++ b/src/test/java/org/dspace/pack/bagit/BagItAipWriterTest.java @@ -8,6 +8,7 @@ package org.dspace.pack.bagit; import static org.assertj.core.api.Assertions.assertThat; +import static org.dspace.pack.bagit.BagItAipWriter.DEFAULT_MODIFIED_DATE; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.times; @@ -100,7 +101,8 @@ public void testWriteAip() throws Exception { .withLogo(logo) .withMetadata(metadata) .withPolicies(policies) - .withBitstreams(bitstreams); + .withBitstreams(bitstreams) + .withLastModifiedTime(DEFAULT_MODIFIED_DATE); when(bitstreamService.retrieve(any(Context.class), eq(logo))) .thenReturn(new ByteArrayInputStream("logo".getBytes()));