Skip to content

Commit

Permalink
update BagIt creation processes to include a timestamp for creation, …
Browse files Browse the repository at this point in the history
…modify and access time metadata in files in the bag
  • Loading branch information
nwoodward committed Sep 10, 2024
1 parent a2dec7a commit 758e08f
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 18 deletions.
22 changes: 13 additions & 9 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
<!-- DuraCloud Version Information (supported version of DuraCloud) -->
<duracloud.version>8.1.0</duracloud.version>
<!-- DuraSpace BagIt Support Library -->
<bagit-support.version>1.1.0</bagit-support.version>
<bagit-support.version>1.2.0</bagit-support.version>
<jaxb-api.version>4.0.2</jaxb-api.version>
<jaxb-runtime.version>4.0.5</jaxb-runtime.version>
<!-- Replication Task Suite requires Java 17 because DSpace 8 requires it -->
Expand Down Expand Up @@ -260,21 +260,25 @@
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</exclusion>
<exclusion>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.glassfish.jaxb</groupId>
<artifactId>jaxb-runtime</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
Expand Down Expand Up @@ -319,12 +323,12 @@
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.23.1</version>
<version>2.24.0</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.23.1</version>
<version>2.24.0</version>
<scope>test</scope>
</dependency>

Expand Down
27 changes: 23 additions & 4 deletions src/main/java/org/dspace/pack/bagit/BagItAipWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@
import java.sql.SQLException;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;

import jakarta.xml.bind.JAXBContext;
import jakarta.xml.bind.JAXBException;
import jakarta.xml.bind.Marshaller;
Expand All @@ -35,6 +37,7 @@
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.Item;
import org.dspace.content.service.BitstreamService;
import org.dspace.core.Context;
import org.dspace.core.Utils;
Expand All @@ -53,7 +56,6 @@
import org.joda.time.LocalDate;
import org.joda.time.format.ISODateTimeFormat;

import jakarta.xml.bind.JAXBException;

/**
* The BagItAipWriter handles the packaging of DSpaceObjects into their respective bags. It processes the metadata and
Expand Down Expand Up @@ -81,12 +83,14 @@ public class BagItAipWriter {
public static final String TEMPLATE_XML = "template-metadata.xml";
private static final String BITSTREAM_PREFIX = "bitstream_";

protected static final long DEFAULT_MODIFIED_DATE = 1036368000L * 1000;

private final BitstreamService bitstreamService = ContentServiceFactory.getInstance().getBitstreamService();

// Fields used for book keeping
// Fields used for bookkeeping
private final AtomicLong successBytes = new AtomicLong();
private final AtomicLong successFiles = new AtomicLong();
private final Map<File, String> checksums = new HashMap<>();
private final LinkedHashMap<File, String> checksums = new LinkedHashMap<>();

/**
* The context to use
Expand Down Expand Up @@ -138,6 +142,11 @@ public class BagItAipWriter {
*/
private List<BagBitstream> bitstreams;

/**
* Last modified time of DSpace object
*/
private Long lastModifiedTime;

/**
* Constructor for a {@link BagItAipWriter}. Takes a minimal set of information needed in order to write an AIP as a
* BagIt bag for dspace consumption.
Expand All @@ -153,6 +162,7 @@ public BagItAipWriter(final Context context, final File directory, final String
this.logo = null;
this.policies = null;
this.metadata = null;
this.lastModifiedTime = DEFAULT_MODIFIED_DATE;
this.archFmt = checkNotNull(archFmt);
this.directory = checkNotNull(directory);
this.properties = checkNotNull(properties);
Expand Down Expand Up @@ -213,6 +223,15 @@ public BagItAipWriter withBitstreams(final List<BagBitstream> bitstreams) {
return this;
}

/**
* @param lastModifiedTime the {@link Item} to use when writing AIP files
* @return the {@link BagItAipWriter} used for creating the aip
*/
public BagItAipWriter withLastModifiedTime(final long lastModifiedTime) {
this.lastModifiedTime = lastModifiedTime;
return this;
}

/**
* Create a serialized BagIt bag using the parameters the BagItAipWriter was instantiated with
*
Expand Down Expand Up @@ -354,7 +373,7 @@ public File packageAip() throws IOException, SQLException, AuthorizeException {
bag.write();

final BagSerializer serializer = SerializationSupport.serializerFor(archFmt, profile);
final Path serializedBag = serializer.serialize(directory.toPath());
final Path serializedBag = serializer.serializeWithTimestamp(directory.toPath(), lastModifiedTime);
delete(directory);

return serializedBag.toFile();
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/dspace/pack/bagit/CollectionPacker.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import static org.dspace.pack.PackerFactory.OBJFILE;
import static org.dspace.pack.PackerFactory.OWNER_ID;
import static org.dspace.pack.bagit.BagItAipWriter.BAG_AIP;
import static org.dspace.pack.bagit.BagItAipWriter.DEFAULT_MODIFIED_DATE;
import static org.dspace.pack.bagit.BagItAipWriter.OBJ_TYPE_COLLECTION;
import static org.dspace.pack.bagit.BagItAipWriter.PROPERTIES_DELIMITER;

Expand Down Expand Up @@ -143,6 +144,7 @@ public File pack(File packDir) throws AuthorizeException, IOException, SQLExcept
.withMetadata(metadata)
.withItemTemplate(templateMd)
.withDSpaceRoles(dSpaceRoles)
.withLastModifiedTime(DEFAULT_MODIFIED_DATE)
.packageAip();
}

Expand Down
5 changes: 3 additions & 2 deletions src/main/java/org/dspace/pack/bagit/CommunityPacker.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import static org.dspace.pack.PackerFactory.OBJFILE;
import static org.dspace.pack.PackerFactory.OWNER_ID;
import static org.dspace.pack.bagit.BagItAipWriter.BAG_AIP;
import static org.dspace.pack.bagit.BagItAipWriter.DEFAULT_MODIFIED_DATE;
import static org.dspace.pack.bagit.BagItAipWriter.OBJ_TYPE_COMMUNITY;
import static org.dspace.pack.bagit.BagItAipWriter.PROPERTIES_DELIMITER;

Expand Down Expand Up @@ -49,8 +50,7 @@
*
* @author richardrodgers
*/
public class CommunityPacker implements Packer
{
public class CommunityPacker implements Packer {
private CommunityService communityService = ContentServiceFactory.getInstance().getCommunityService();

// NB - these values must remain synchronized with DB schema -
Expand Down Expand Up @@ -123,6 +123,7 @@ public File pack(File packDir) throws AuthorizeException, SQLException, IOExcept
.withPolicies(policy)
.withMetadata(metadata)
.withDSpaceRoles(dSpaceRoles)
.withLastModifiedTime(DEFAULT_MODIFIED_DATE)
.packageAip();
}

Expand Down
11 changes: 9 additions & 2 deletions src/main/java/org/dspace/pack/bagit/ItemPacker.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import static org.dspace.pack.PackerFactory.OWNER_ID;
import static org.dspace.pack.PackerFactory.WITHDRAWN;
import static org.dspace.pack.bagit.BagItAipWriter.BAG_AIP;
import static org.dspace.pack.bagit.BagItAipWriter.DEFAULT_MODIFIED_DATE;
import static org.dspace.pack.bagit.BagItAipWriter.OBJ_TYPE_ITEM;
import static org.dspace.pack.bagit.BagItAipWriter.PROPERTIES_DELIMITER;

Expand Down Expand Up @@ -94,6 +95,12 @@ public File pack(final File packDir) throws AuthorizeException, IOException, SQL
objectProperties.add(OBJECT_TYPE + PROPERTIES_DELIMITER + OBJ_TYPE_ITEM);
objectProperties.add(OBJECT_ID + PROPERTIES_DELIMITER + item.getHandle());

// Use item's lastModifiedDate when creating the BagIt bag
long lmTime = item.getLastModified().getTime();
if (lmTime <= 0) {
lmTime = DEFAULT_MODIFIED_DATE;
}

final StringBuilder linked = new StringBuilder();
for (Collection coll : item.getCollections()) {
if (itemService.isOwningCollection(item, coll)) {
Expand All @@ -120,7 +127,7 @@ public File pack(final File packDir) throws AuthorizeException, IOException, SQL
// policy.xml
final Policies policy = BagItPolicyUtil.getPolicy(context, item);

// proceed to bundles, in sub-directories, filtering
// proceed to bundles, in subdirectories, filtering
final List<BagBitstream> bitstreams = new ArrayList<>();
for (Bundle bundle : item.getBundles()) {
final String bundleName = bundle.getName();
Expand Down Expand Up @@ -159,6 +166,7 @@ public File pack(final File packDir) throws AuthorizeException, IOException, SQL
.withPolicies(policy)
.withMetadata(metadata)
.withBitstreams(bitstreams)
.withLastModifiedTime(lmTime)
.packageAip();
}

Expand Down Expand Up @@ -308,5 +316,4 @@ public RefFilter(String filter)
url = parts[2];
}
}

}
2 changes: 2 additions & 0 deletions src/main/java/org/dspace/pack/bagit/SitePacker.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import static org.dspace.pack.PackerFactory.OBJECT_TYPE;
import static org.dspace.pack.PackerFactory.OBJFILE;
import static org.dspace.pack.bagit.BagItAipWriter.BAG_AIP;
import static org.dspace.pack.bagit.BagItAipWriter.DEFAULT_MODIFIED_DATE;
import static org.dspace.pack.bagit.BagItAipWriter.PROPERTIES_DELIMITER;

import java.io.File;
Expand Down Expand Up @@ -96,6 +97,7 @@ public File pack(File packDir) throws AuthorizeException, IOException, SQLExcept

return new BagItAipWriter(context, packDir, archFmt, properties)
.withDSpaceRoles(dSpaceRoles)
.withLastModifiedTime(DEFAULT_MODIFIED_DATE)
.packageAip();
}

Expand Down
4 changes: 3 additions & 1 deletion src/test/java/org/dspace/pack/bagit/BagItAipWriterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
package org.dspace.pack.bagit;

import static org.assertj.core.api.Assertions.assertThat;
import static org.dspace.pack.bagit.BagItAipWriter.DEFAULT_MODIFIED_DATE;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.times;
Expand Down Expand Up @@ -100,7 +101,8 @@ public void testWriteAip() throws Exception {
.withLogo(logo)
.withMetadata(metadata)
.withPolicies(policies)
.withBitstreams(bitstreams);
.withBitstreams(bitstreams)
.withLastModifiedTime(DEFAULT_MODIFIED_DATE);

when(bitstreamService.retrieve(any(Context.class), eq(logo)))
.thenReturn(new ByteArrayInputStream("logo".getBytes()));
Expand Down

0 comments on commit 758e08f

Please sign in to comment.