Skip to content

Commit

Permalink
Prefix DC identifiers with type if no scheme part is present
Browse files Browse the repository at this point in the history
This makes a PPN identifier `ppn:12345678` in <dc:identifier> elements.
  • Loading branch information
claussni committed Sep 18, 2015
1 parent 6e22d5a commit f306c1f
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,22 @@
import java.security.NoSuchAlgorithmException;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static org.purl.sword.server.fedora.fedoraObjects.State.INACTIVE;

public class METSContainer {

public static final Pattern PATTERN = Pattern.compile("^[a-z][a-z0-9\\+\\.\\-]*\\:.*", Pattern.CASE_INSENSITIVE);

private static final String DS_ID_SLUBINFO = "SLUB-INFO";
private static final String DS_ID_SLUBINFO_LABEL = "SLUB Administrative Metadata";

private static final String DS_ID_QUCOSAXML = "QUCOSA-XML";
private static final String DS_ID_QUCOSAXML_LABEL = "Pristine Qucosa XML Metadata";

private static final String DS_MODS_MIME_TYPE = "application/mods+xml";
private static final String DS_ID_MODS = "MODS";
private static final String DS_ID_MODS_LABEL = "Object Bibliographic Metadata";

private static final String METS_DMDSEC_PREFIX = "/mets:mets/mets:dmdSec";
private static final String MODS_PREFIX = METS_DMDSEC_PREFIX + "/mets:mdWrap[@MDTYPE='MODS']/mets:xmlData/mods:mods";

Expand Down Expand Up @@ -196,12 +197,24 @@ private String getPrimaryTitle() {

private List<String> getIdentifiers() {
try {
return XPATH_IDENTIFIERS.selectValues(metsDocument);
final List<String> identifiers = new LinkedList<>();
final List<Element> elements = XPATH_IDENTIFIERS.selectNodes(metsDocument);
for (Element e : elements) {
final String type = e.getAttributeValue("type");
final String id = e.getTextTrim();
identifiers.add(hasProtocol(id) ? id : type + ":" + id);
}
return identifiers;
} catch (JDOMException e) {
return null;
}
}

private boolean hasProtocol(String id) {
Matcher matcher = PATTERN.matcher(id);
return matcher.matches();
}

private Datastream getDatastream(XPathQuery query, String datastreamID, String datastreamLabel) throws SWORDException {
return getDatastream(query, datastreamID, datastreamLabel, null);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,6 @@ public String selectValue(Document doc) throws JDOMException {
}
}

public List<String> selectValues(Document doc) throws JDOMException {
final List<Element> els = selectNodes(doc);
return new LinkedList<String>() {{
for (Element e : els) add(e.getTextTrim());
}};
}

public Element selectNode(Document doc) throws JDOMException {
return (Element) xpath.selectSingleNode(doc);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,9 @@ public void dcDatastreamHasIdentifiers() throws Exception {

verify(mockFedoraRepository).ingest(argument.capture());
DublinCore result = argument.getValue().getDc();
assertTrue("Should have identifier", result.getIdentifier().contains("urn:nbn:de:bsz:14-qucosa-32992"));
assertTrue("Should have identifier", result.getIdentifier().contains("322202922"));

assertTrue("Should have URN identifier", result.getIdentifier().contains("urn:nbn:de:bsz:14-qucosa-32992"));
assertTrue("Should have PPN identifier", result.getIdentifier().contains("ppn:322202922"));
}

@Test
Expand Down
2 changes: 1 addition & 1 deletion src/test/resources/mets_all_references.xml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
<mods:subTitle>Das EFRE-Projekt Sächsischer Dokumentenserver</mods:subTitle>
</mods:titleInfo>
<mods:identifier type="urn">urn:nbn:de:bsz:14-qucosa-32992</mods:identifier>
<mods:identifier type="swb-ppn">322202922</mods:identifier>
<mods:identifier type="ppn">322202922</mods:identifier>

<mods:relatedItem
type="series"
Expand Down
2 changes: 1 addition & 1 deletion src/test/resources/mets_invalid_file.xml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@
<mods:accessCondition>Dieser Beitrag ist mit Zustimmung des Rechteinhabers frei zugänglich.
</mods:accessCondition>
<mods:identifier type="urn">urn:nbn:de:bsz:14-qucosa-32992</mods:identifier>
<mods:identifier type="swb-ppn">322202922</mods:identifier>
<mods:identifier type="ppn">322202922</mods:identifier>
<mods:relatedItem type="series">
<mods:titleInfo>
<mods:title>BIS - Das Magazin der Bibliotheken in Sachsen</mods:title>
Expand Down
2 changes: 1 addition & 1 deletion src/test/resources/mets_ok.xml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
<mods:accessCondition>Dieser Beitrag ist mit Zustimmung des Rechteinhabers frei zugänglich.
</mods:accessCondition>
<mods:identifier type="urn">urn:nbn:de:bsz:14-qucosa-32992</mods:identifier>
<mods:identifier type="swb-ppn">322202922</mods:identifier>
<mods:identifier type="ppn">322202922</mods:identifier>
<mods:relatedItem type="constituent"
xlink:href="http://nbn-resolving.de/urn:nbn:de:bsz:14-qucosa-32825">
<mods:identifier type="urn">urn:nbn:de:bsz:14-qucosa-32825</mods:identifier>
Expand Down

0 comments on commit f306c1f

Please sign in to comment.