From e1bf3c35f2c085d6831d8f3859a123da80745741 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Konrad=20Per=C5=82owski?= Date: Mon, 15 Jan 2024 12:45:02 +0100 Subject: [PATCH 001/486] Add CrossRef provider logic --- .../source/installation/config.rst | 9 + pom.xml | 5 + .../iq/dataverse/CrossRefRESTfullClient.java | 118 +++++++ .../dataverse/DOICrossRefRegisterService.java | 313 ++++++++++++++++++ .../iq/dataverse/DOICrossRefServiceBean.java | 117 +++++++ .../iq/dataverse/EjbDataverseEngine.java | 8 + .../iq/dataverse/GlobalIdServiceBean.java | 3 +- .../engine/command/CommandContext.java | 24 +- .../iq/dataverse/settings/JvmSettings.java | 11 +- .../dataverse/crossref_metadata_template.xml | 29 ++ .../dataverse/engine/TestCommandContext.java | 5 + 11 files changed, 619 insertions(+), 23 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/CrossRefRESTfullClient.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/DOICrossRefRegisterService.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/DOICrossRefServiceBean.java create mode 100644 src/main/resources/edu/harvard/iq/dataverse/crossref_metadata_template.xml diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index a7d7905ca4a..e7ba624222e 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -245,6 +245,15 @@ this provider. - :ref:`dataverse.pid.ezid.username` - :ref:`dataverse.pid.ezid.password` +**JVM Options for CrossRef:** + +- :ref:`dataverse.pid.crossref.url` +- :ref:`dataverse.pid.crossref.rest-api-url` +- :ref:`dataverse.pid.crossref.username` +- :ref:`dataverse.pid.crossref.password` +- :ref:`dataverse.pid.crossref.depositor` +- :ref:`dataverse.pid.crossref.depositor-email` + **Database Settings:** - :ref:`:DoiProvider <:DoiProvider>` diff --git a/pom.xml b/pom.xml index 7c12a45135c..72ef3391524 100644 --- a/pom.xml +++ b/pom.xml @@ -653,6 +653,11 @@ 3.2.0 test + + org.apache.httpcomponents + fluent-hc + 4.5.14 + diff --git a/src/main/java/edu/harvard/iq/dataverse/CrossRefRESTfullClient.java b/src/main/java/edu/harvard/iq/dataverse/CrossRefRESTfullClient.java new file mode 100644 index 00000000000..4b6728eca57 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/CrossRefRESTfullClient.java @@ -0,0 +1,118 @@ +package edu.harvard.iq.dataverse; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.auth.AuthScope; +import org.apache.http.auth.UsernamePasswordCredentials; +import org.apache.http.client.CredentialsProvider; +import org.apache.http.client.fluent.Request; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.entity.ContentType; +import org.apache.http.entity.mime.HttpMultipartMode; +import org.apache.http.entity.mime.MultipartEntityBuilder; +import org.apache.http.impl.client.BasicCredentialsProvider; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.util.EntityUtils; + +import java.io.Closeable; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class CrossRefRESTfullClient implements Closeable { + + private static final Logger logger = Logger.getLogger(CrossRefRESTfullClient.class.getCanonicalName()); + + private final String url; + private final String apiUrl; + private final String username; + private final String password; + private final CloseableHttpClient httpClient; + private final HttpClientContext context; + private final String encoding = "utf-8"; + + public CrossRefRESTfullClient(String url, String apiUrl, String username, String password) { + this.url = url; + this.apiUrl = apiUrl; + this.username = username; + this.password = password; + try { + context = HttpClientContext.create(); + CredentialsProvider credsProvider = new BasicCredentialsProvider(); + credsProvider.setCredentials(new AuthScope(null, -1), + new UsernamePasswordCredentials(username, password)); + context.setCredentialsProvider(credsProvider); + + httpClient = HttpClients.createDefault(); + } catch (Exception ioe) { + close(); + logger.log(Level.SEVERE,"Fail to init Client",ioe); + throw new RuntimeException("Fail to init Client", ioe); + } + } + + public void close() { + if (this.httpClient != null) { + try { + httpClient.close(); + } catch (IOException io) { + logger.warning("IOException closing hhtpClient: " + io.getMessage()); + } + } + } + + public String getMetadata(String doi) { + HttpGet httpGet = new HttpGet(this.apiUrl + "/works/" + doi); + httpGet.setHeader("Accept", "application/json"); + try { + HttpResponse response = httpClient.execute(httpGet); + String data = EntityUtils.toString(response.getEntity(), encoding); + if (response.getStatusLine().getStatusCode() != 200) { + String errMsg = "Response from getMetadata: " + response.getStatusLine().getStatusCode() + ", " + data; + logger.info(errMsg); + throw new RuntimeException(errMsg); + } + return data; + } catch (IOException ioe) { + logger.info("IOException when get metadata"); + throw new RuntimeException("IOException when get metadata", ioe); + } + } + + public String postMetadata(String xml) throws IOException { + HttpEntity entity = MultipartEntityBuilder.create() + .addTextBody("operation", "doMDUpload") + .addTextBody("login_id", username) + .addTextBody("login_passwd", password) + .addBinaryBody("fname", xml.getBytes(StandardCharsets.UTF_8), ContentType.APPLICATION_XML, "metadata.xml") + .setMode(HttpMultipartMode.BROWSER_COMPATIBLE) + .build(); + HttpResponse response = Request.Post(url + "/servlet/deposit") + .body(entity) + .setHeader("Accept", "*/*") + .execute().returnResponse(); + + String data = EntityUtils.toString(response.getEntity(), encoding); + if (response.getStatusLine().getStatusCode() != 200) { + String errMsg = "Response from postMetadata: " + response.getStatusLine().getStatusCode() + ", " + data; + logger.info(errMsg); + throw new IOException(errMsg); + } + return data; + } + + public boolean testDOIExists(String doi) throws IOException { + HttpGet httpGet = new HttpGet(this.apiUrl + "/works/" + doi); + httpGet.setHeader("Accept", "application/json"); + HttpResponse response = httpClient.execute(httpGet); + if (response.getStatusLine().getStatusCode() != 200) { + EntityUtils.consumeQuietly(response.getEntity()); + return false; + } + EntityUtils.consumeQuietly(response.getEntity()); + return true; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/DOICrossRefRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOICrossRefRegisterService.java new file mode 100644 index 00000000000..6e8ed20d570 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/DOICrossRefRegisterService.java @@ -0,0 +1,313 @@ +package edu.harvard.iq.dataverse; + +import com.fasterxml.jackson.databind.ObjectMapper; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import org.apache.commons.text.StringEscapeUtils; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static edu.harvard.iq.dataverse.util.SystemConfig.getDataverseSiteUrlStatic; + +@Stateless +public class DOICrossRefRegisterService { + private static final Logger logger = Logger.getLogger(DOICrossRefRegisterService.class.getCanonicalName()); + + @EJB + DataverseServiceBean dataverseService; + + private CrossRefRESTfullClient client = null; + + private CrossRefRESTfullClient getClient() { + if (client == null) { + client = new CrossRefRESTfullClient( + JvmSettings.CROSSREF_URL.lookup(), + JvmSettings.CROSSREF_REST_API_URL.lookup(), + JvmSettings.CROSSREF_USERNAME.lookup(), + JvmSettings.CROSSREF_PASSWORD.lookup() + ); + } + return client; + } + + public boolean testDOIExists(String identifier) { + boolean doiExists; + try { + CrossRefRESTfullClient client = getClient(); + doiExists = client.testDOIExists(identifier.substring(identifier.indexOf(":") + 1)); + } catch (Exception e) { + logger.log(Level.INFO, identifier, e); + return false; + } + return doiExists; + } + + public HashMap getMetadata(String identifier) throws IOException { + HashMap metadata = new HashMap<>(); + try { + CrossRefRESTfullClient client = getClient(); + String jsonMetadata = client.getMetadata(identifier.substring(identifier.indexOf(":") + 1)); + Map mappedJson = new ObjectMapper().readValue(jsonMetadata, HashMap.class); + logger.log(Level.FINE, jsonMetadata); + metadata.put("_status", mappedJson.get("status").toString()); + } catch (RuntimeException e) { + logger.log(Level.INFO, identifier, e); + } + return metadata; + } + + public String reserveIdentifier(String identifier, DvObject dvObject) throws IOException { + logger.info("Crossref reserveIdentifier"); + String xmlMetadata = getMetadataFromDvObject(identifier, dvObject); + + CrossRefRESTfullClient client = getClient(); + return client.postMetadata(xmlMetadata); + } + + public void modifyIdentifier(String identifier, DvObject dvObject) throws IOException { + logger.info("Crossref modifyIdentifier"); + String xmlMetadata = getMetadataFromDvObject(identifier, dvObject); + + CrossRefRESTfullClient client = getClient(); + client.postMetadata(xmlMetadata); + } + + public String getMetadataFromDvObject(String identifier, DvObject dvObject) { + Dataset dataset; + + if (dvObject instanceof Dataset) { + dataset = (Dataset) dvObject; + } else { + dataset = (Dataset) dvObject.getOwner(); + } + + CrossRefMetadataTemplate metadataTemplate = new CrossRefMetadataTemplate(); + metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); + metadataTemplate.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); + metadataTemplate.setDepositor(JvmSettings.CROSSREF_DEPOSITOR.lookup()); + metadataTemplate.setDepositorEmail(JvmSettings.CROSSREF_DEPOSITOR_EMAIL.lookup()); + metadataTemplate.setInstitution(dataverseService.getRootDataverseName()); + + String title = dvObject.getCurrentName(); + if (dvObject.isInstanceofDataFile()) { + //Note file title is not currently escaped the way the dataset title is, so adding it here. + title = StringEscapeUtils.escapeXml10(title); + } + + if (title.isEmpty() || title.equals(DatasetField.NA_VALUE)) { + title = AbstractGlobalIdServiceBean.UNAVAILABLE; + } + + metadataTemplate.setTitle(title); + + return metadataTemplate.generateXML(); + } +} + +class CrossRefMetadataTemplate { + + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.edu.harvard.iq.dataverse.CrossRefMetadataTemplate"); + private static String template; + + static { + try (InputStream in = CrossRefMetadataTemplate.class.getResourceAsStream("crossref_metadata_template.xml")) { + template = CrossRefFileUtil.readAndClose(in, "utf-8"); + } catch (Exception e) { + logger.log(Level.SEVERE, "crossref metadata template load error"); + logger.log(Level.SEVERE, "String " + e); + logger.log(Level.SEVERE, "localized message " + e.getLocalizedMessage()); + logger.log(Level.SEVERE, "cause " + e.getCause()); + logger.log(Level.SEVERE, "message " + e.getMessage()); + } + } + + private final String timestamp = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); + private String institution; + private String depositor; + private String depositorEmail; + private String databaseTitle; + private String identifier; + private String title; + private final String baseUrl = getDataverseSiteUrlStatic(); + private List authors; + + public List getAuthors() { + return authors; + } + + public void setAuthors(List authors) { + this.authors = authors; + } + + public CrossRefMetadataTemplate() { + } + + public String generateXML() { + String xmlMetadata = template.replace("${depositor}", depositor) + .replace("${depositorEmail}", depositorEmail) + .replace("${title}", title) + .replace("${institution}", institution) + .replace("${batchId}", identifier + " " + timestamp) + .replace("${timestamp}", timestamp); + + StringBuilder datasetElement = new StringBuilder(); + datasetElement.append(""); + + StringBuilder contributorsElement = new StringBuilder(); + if (authors != null && !authors.isEmpty()) { + contributorsElement.append(""); + for (DatasetAuthor author : authors) { + contributorsElement.append(""); + contributorsElement.append(author.getName().getDisplayValue()); + contributorsElement.append(""); + contributorsElement.append(author.getName().getDisplayValue()); + contributorsElement.append(""); + + if (author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { + contributorsElement.append("") + .append(author.getAffiliation().getDisplayValue()) + .append(""); + } + + if (author.getIdType() != null && + author.getIdValue() != null && + !author.getIdType().isEmpty() && + !author.getIdValue().isEmpty() && + author.getAffiliation() != null && + !author.getAffiliation().getDisplayValue().isEmpty()) { + if (author.getIdType().equals("ORCID")) { + contributorsElement.append("").append("https://orcid.org/").append(author.getIdValue()).append(""); + } + if (author.getIdType().equals("ISNI")) { + contributorsElement.append("").append(author.getIdValue()).append(""); + } + if (author.getIdType().equals("LCNA")) { + contributorsElement.append("").append(author.getIdValue()).append(""); + } + } + + contributorsElement.append(""); + } + contributorsElement.append(""); + + } else { + contributorsElement.append("") + .append(AbstractGlobalIdServiceBean.UNAVAILABLE) + .append(""); + } + + datasetElement.append(contributorsElement); + + datasetElement.append("") + .append(this.title) + .append(""); + + datasetElement.append("") + .append(this.identifier) + .append("") + .append(this.baseUrl).append("/dataset.xhtml?persistentId=doi:").append(this.identifier) + .append(""); + + datasetElement.append(""); + xmlMetadata = xmlMetadata.replace("${datasets}", datasetElement.toString()); + return xmlMetadata; + } + + public static String getTemplate() { + return template; + } + + public static void setTemplate(String template) { + CrossRefMetadataTemplate.template = template; + } + + public String getIdentifier() { + return identifier; + } + + public String getDepositor() { + return depositor; + } + + public void setDepositor(String depositor) { + this.depositor = depositor; + } + + public void setIdentifier(String identifier) { + this.identifier = identifier; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getInstitution() { + return institution; + } + + public void setInstitution(String institution) { + this.institution = institution; + } + + public String getDepositorEmail() { + return depositorEmail; + } + + public void setDepositorEmail(String depositorEmail) { + this.depositorEmail = depositorEmail; + } + + public String getDatabaseTitle() { + return databaseTitle; + } + + public void setDatabaseTitle(String databaseTitle) { + this.databaseTitle = databaseTitle; + } +} + +class CrossRefFileUtil { + + public static void close(InputStream in) { + if (in != null) { + try { + in.close(); + } catch (IOException e) { + throw new RuntimeException("Fail to close InputStream"); + } + } + } + + public static String readAndClose(InputStream inStream, String encoding) { + ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + byte[] buf = new byte[128]; + String data; + try { + int cnt; + while ((cnt = inStream.read(buf)) >= 0) { + outStream.write(buf, 0, cnt); + } + data = outStream.toString(encoding); + } catch (IOException ioe) { + throw new RuntimeException("IOException"); + } finally { + close(inStream); + } + return data; + } +} + diff --git a/src/main/java/edu/harvard/iq/dataverse/DOICrossRefServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DOICrossRefServiceBean.java new file mode 100644 index 00000000000..50fafc5f036 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/DOICrossRefServiceBean.java @@ -0,0 +1,117 @@ +package edu.harvard.iq.dataverse; + +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; + +@Stateless +public class DOICrossRefServiceBean extends DOIServiceBean { + private static final Logger logger = Logger.getLogger(DOICrossRefServiceBean.class.getCanonicalName()); + + @EJB + DOICrossRefRegisterService doiCrossRefRegisterService; + + + @Override + public boolean alreadyRegistered(GlobalId pid, boolean noProviderDefault) throws Exception { + logger.info("CrossRef alreadyRegistered"); + if (pid == null || pid.asString().isEmpty()) { + logger.fine("No identifier sent."); + return false; + } + boolean alreadyExists; + String identifier = pid.asString(); + try { + alreadyExists = doiCrossRefRegisterService.testDOIExists(identifier); + } catch (Exception e) { + logger.log(Level.WARNING, "alreadyExists failed"); + return false; + } + return alreadyExists; + } + + @Override + public boolean registerWhenPublished() { + return true; + } + + @Override + public List getProviderInformation() { + return List.of("CrossRef", "https://status.crossref.org/"); + } + + @Override + protected String getProviderKeyName() { + return "CrossRef"; + } + + @Override + public String createIdentifier(DvObject dvObject) throws Throwable { + logger.info("CrossRef createIdentifier"); + if (dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty()) { + dvObject = generateIdentifier(dvObject); + } + String identifier = getIdentifier(dvObject); + try { + String retString = doiCrossRefRegisterService.reserveIdentifier(identifier, dvObject); + logger.log(Level.FINE, "CrossRef create DOI identifier retString : " + retString); + return retString; + } catch (Exception e) { + logger.log(Level.WARNING, "CrossRef Identifier not created: create failed", e); + throw e; + } + } + + @Override + public Map getIdentifierMetadata(DvObject dvObject) { + logger.info("CrossRef getIdentifierMetadata"); + String identifier = getIdentifier(dvObject); + Map metadata = new HashMap<>(); + try { + metadata = doiCrossRefRegisterService.getMetadata(identifier); + } catch (Exception e) { + logger.log(Level.WARNING, "getIdentifierMetadata failed", e); + } + return metadata; + } + + @Override + public String modifyIdentifierTargetURL(DvObject dvObject) throws Exception { + logger.info("CrossRef modifyIdentifier"); + String identifier = getIdentifier(dvObject); + try { + doiCrossRefRegisterService.modifyIdentifier(identifier, dvObject); + } catch (Exception e) { + logger.log(Level.WARNING, "modifyMetadata failed", e); + throw e; + } + return identifier; + } + + @Override + public void deleteIdentifier(DvObject dvo) throws Exception { + logger.info("CrossRef deleteIdentifier"); + } + + @Override + public boolean publicizeIdentifier(DvObject dvObject) { + logger.info("CrossRef updateIdentifierStatus"); + if (dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty()) { + dvObject = generateIdentifier(dvObject); + } + String identifier = getIdentifier(dvObject); + + try { + doiCrossRefRegisterService.reserveIdentifier(identifier, dvObject); + return true; + } catch (Exception e) { + logger.log(Level.WARNING, "modifyMetadata failed: " + e.getMessage(), e); + return false; + } + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java index 5a689c06019..cfb17caf5a0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java +++ b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java @@ -119,6 +119,9 @@ public class EjbDataverseEngine { @EJB DOIDataCiteServiceBean doiDataCite; + @EJB + DOICrossRefServiceBean doiCrossRef; + @EJB FakePidProviderServiceBean fakePidProvider; @@ -493,6 +496,11 @@ public DOIDataCiteServiceBean doiDataCite() { return doiDataCite; } + @Override + public DOICrossRefServiceBean doiCrossRef() { + return doiCrossRef; + } + @Override public FakePidProviderServiceBean fakePidProvider() { return fakePidProvider; diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java index aebf13778c3..ec96bcf603f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java @@ -204,8 +204,9 @@ class BeanDispatcher { switch ( doiProvider ) { case "EZID": return ctxt.doiEZId(); case "DataCite": return ctxt.doiDataCite(); + case "CrossRef": return ctxt.doiCrossRef(); case "FAKE": return ctxt.fakePidProvider(); - default: + default: logger.log(Level.SEVERE, "Unknown doiProvider: {0}", doiProvider); return null; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java index f74c1222bb0..596a76be172 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java @@ -1,29 +1,9 @@ package edu.harvard.iq.dataverse.engine.command; -import edu.harvard.iq.dataverse.DOIDataCiteServiceBean; -import edu.harvard.iq.dataverse.DOIEZIdServiceBean; -import edu.harvard.iq.dataverse.HandlenetServiceBean; -import edu.harvard.iq.dataverse.DataFileServiceBean; -import edu.harvard.iq.dataverse.DatasetLinkingServiceBean; -import edu.harvard.iq.dataverse.DatasetServiceBean; -import edu.harvard.iq.dataverse.DatasetVersionServiceBean; -import edu.harvard.iq.dataverse.DataverseFacetServiceBean; -import edu.harvard.iq.dataverse.DataverseFieldTypeInputLevelServiceBean; -import edu.harvard.iq.dataverse.DataverseLinkingServiceBean; -import edu.harvard.iq.dataverse.DataverseRoleServiceBean; -import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean; -import edu.harvard.iq.dataverse.DvObjectServiceBean; -import edu.harvard.iq.dataverse.FeaturedDataverseServiceBean; -import edu.harvard.iq.dataverse.FileDownloadServiceBean; -import edu.harvard.iq.dataverse.GuestbookResponseServiceBean; -import edu.harvard.iq.dataverse.GuestbookServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; -import edu.harvard.iq.dataverse.PermissionServiceBean; -import edu.harvard.iq.dataverse.RoleAssigneeServiceBean; import edu.harvard.iq.dataverse.search.SearchServiceBean; -import edu.harvard.iq.dataverse.TemplateServiceBean; -import edu.harvard.iq.dataverse.UserNotificationServiceBean; import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.groups.GroupServiceBean; @@ -104,6 +84,8 @@ public interface CommandContext { public DOIDataCiteServiceBean doiDataCite(); + public DOICrossRefServiceBean doiCrossRef(); + public FakePidProviderServiceBean fakePidProvider(); public HandlenetServiceBean handleNet(); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 3bc06738a7e..0600175deb5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -104,7 +104,16 @@ public enum JvmSettings { DATACITE_REST_API_URL(SCOPE_PID_DATACITE, "rest-api-url", "doi.dataciterestapiurlstring", "doi.mdcbaseurlstring"), DATACITE_USERNAME(SCOPE_PID_DATACITE, "username", "doi.username"), DATACITE_PASSWORD(SCOPE_PID_DATACITE, "password", "doi.password"), - + + // PROVIDER CROSSREF + SCOPE_PID_CROSSREF(SCOPE_PID, "crossref"), + CROSSREF_URL(SCOPE_PID_CROSSREF, "url"), + CROSSREF_REST_API_URL(SCOPE_PID_CROSSREF, "rest-api-url"), + CROSSREF_USERNAME(SCOPE_PID_CROSSREF, "username", "doi.username"), + CROSSREF_PASSWORD(SCOPE_PID_CROSSREF, "password", "doi.password"), + CROSSREF_DEPOSITOR(SCOPE_PID_CROSSREF, "depositor"), + CROSSREF_DEPOSITOR_EMAIL(SCOPE_PID_CROSSREF, "depositor-email"), + // PROVIDER PERMALINK SCOPE_PID_PERMALINK(SCOPE_PID, "permalink"), PERMALINK_BASEURL(SCOPE_PID_PERMALINK, "base-url", "perma.baseurlstring"), diff --git a/src/main/resources/edu/harvard/iq/dataverse/crossref_metadata_template.xml b/src/main/resources/edu/harvard/iq/dataverse/crossref_metadata_template.xml new file mode 100644 index 00000000000..f37ed63cfff --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/crossref_metadata_template.xml @@ -0,0 +1,29 @@ + + + + ${batchId} + ${timestamp} + + ${depositor} + ${depositorEmail} + + Crossref + + + + + + ${title} + + + ${institution} + + + ${datasets} + + + diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java b/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java index a80adb33b8d..15810f85d39 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java @@ -131,6 +131,11 @@ public DOIDataCiteServiceBean doiDataCite() { return null; } + @Override + public DOICrossRefServiceBean doiCrossRef() { + return null; + } + @Override public FakePidProviderServiceBean fakePidProvider() { return null; From a08fbe2b345dd9f10998b758eb962f899bd6653e Mon Sep 17 00:00:00 2001 From: Martin Amouzou <85512093+martinAmouzou@users.noreply.github.com> Date: Wed, 24 Jan 2024 16:13:02 +0100 Subject: [PATCH 002/486] Update version-control.rst --- doc/sphinx-guides/source/developers/version-control.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst index c36c7d1e963..a97adf4149b 100644 --- a/doc/sphinx-guides/source/developers/version-control.rst +++ b/doc/sphinx-guides/source/developers/version-control.rst @@ -286,5 +286,8 @@ GitHub documents how to make changes to a fork at https://help.github.com/articl git push OdumInstitute 4709-postgresql_96 ---- +Develop branch and pull request +-------------------------------------------- +Please do not use your forked develop branch to push a PR and follow the `1st scenario: preparing the first pull request of Version Control Guide `. Previous: :doc:`troubleshooting` | Next: :doc:`sql-upgrade-scripts` From 8277ac5adac8511efa9bb92d3f51dcf99973e67b Mon Sep 17 00:00:00 2001 From: Martin Amouzou <85512093+martinAmouzou@users.noreply.github.com> Date: Wed, 24 Jan 2024 16:15:10 +0100 Subject: [PATCH 003/486] Update version-control.rst --- doc/sphinx-guides/source/developers/version-control.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst index a97adf4149b..11ce0c0d144 100644 --- a/doc/sphinx-guides/source/developers/version-control.rst +++ b/doc/sphinx-guides/source/developers/version-control.rst @@ -286,8 +286,9 @@ GitHub documents how to make changes to a fork at https://help.github.com/articl git push OdumInstitute 4709-postgresql_96 ---- + Develop branch and pull request -------------------------------------------- -Please do not use your forked develop branch to push a PR and follow the `1st scenario: preparing the first pull request of Version Control Guide `. +Please do not use your forked develop branch to push a PR and follow the `1st scenario: preparing the first pull request of Version Control Guide `_. Previous: :doc:`troubleshooting` | Next: :doc:`sql-upgrade-scripts` From be8c167a6454bc86535ce77e66dedb1d76ca0b08 Mon Sep 17 00:00:00 2001 From: Martin Amouzou <85512093+martinAmouzou@users.noreply.github.com> Date: Wed, 24 Jan 2024 16:53:13 +0100 Subject: [PATCH 004/486] Update version-control.rst --- doc/sphinx-guides/source/developers/version-control.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst index 11ce0c0d144..5b809782f76 100644 --- a/doc/sphinx-guides/source/developers/version-control.rst +++ b/doc/sphinx-guides/source/developers/version-control.rst @@ -289,6 +289,6 @@ GitHub documents how to make changes to a fork at https://help.github.com/articl Develop branch and pull request -------------------------------------------- -Please do not use your forked develop branch to push a PR and follow the `1st scenario: preparing the first pull request of Version Control Guide `_. +Please do not use your forked develop branch to push a PR and follow the `1st scenario: preparing the first pull request of Version Control Guide `_. Previous: :doc:`troubleshooting` | Next: :doc:`sql-upgrade-scripts` From 8c5113b9c3f4e8380df45d91c84fdc3acdaf7f1a Mon Sep 17 00:00:00 2001 From: Martin Amouzou <85512093+martinAmouzou@users.noreply.github.com> Date: Wed, 24 Jan 2024 17:04:48 +0100 Subject: [PATCH 005/486] Update version-control.rst --- doc/sphinx-guides/source/developers/version-control.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst index 5b809782f76..35e37ac0471 100644 --- a/doc/sphinx-guides/source/developers/version-control.rst +++ b/doc/sphinx-guides/source/developers/version-control.rst @@ -289,6 +289,6 @@ GitHub documents how to make changes to a fork at https://help.github.com/articl Develop branch and pull request -------------------------------------------- -Please do not use your forked develop branch to push a PR and follow the `1st scenario: preparing the first pull request of Version Control Guide `_. +Please do not use your forked develop branch to push a PR and follow the `1st scenario: preparing the first pull request of Version Control Guide `_. Previous: :doc:`troubleshooting` | Next: :doc:`sql-upgrade-scripts` From 1edb8bac22046dffd5c7c21197e013bb3be80a60 Mon Sep 17 00:00:00 2001 From: Martin Amouzou <85512093+martinAmouzou@users.noreply.github.com> Date: Wed, 24 Jan 2024 17:07:58 +0100 Subject: [PATCH 006/486] Update version-control.rst --- doc/sphinx-guides/source/developers/version-control.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst index 35e37ac0471..99e33164417 100644 --- a/doc/sphinx-guides/source/developers/version-control.rst +++ b/doc/sphinx-guides/source/developers/version-control.rst @@ -289,6 +289,6 @@ GitHub documents how to make changes to a fork at https://help.github.com/articl Develop branch and pull request -------------------------------------------- -Please do not use your forked develop branch to push a PR and follow the `1st scenario: preparing the first pull request of Version Control Guide `_. +Please do not use your forked develop branch to push a PR and follow the `1st scenario: preparing the first pull request of Version Control Guide <#summary-of-git-commands>`_. Previous: :doc:`troubleshooting` | Next: :doc:`sql-upgrade-scripts` From 6df751346325948935582836a69b6e5378fb1b06 Mon Sep 17 00:00:00 2001 From: konradperlowski Date: Wed, 31 Jan 2024 10:31:03 +0100 Subject: [PATCH 007/486] Add CrossRef config properties description --- .../source/installation/config.rst | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index e7ba624222e..24f1d0cce12 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2370,6 +2370,50 @@ should delete the old JVM option and the wrapped password alias, then recreate as shown for :ref:`dataverse.pid.datacite.password` but with the EZID alias name. +.. _dataverse.pid.crossref.url: + +dataverse.pid.crossref.url +++++++++++++++++++++++++++ + +CrossRef url used to post metadata. + +.. _dataverse.pid.crossref.rest-api-url: + +dataverse.pid.crossref.rest-api-url ++++++++++++++++++++++++++++++++++++ + +CrossRef API url to retrieve metadata information + +.. _dataverse.pid.crossref.username: + +dataverse.pid.crossref.username ++++++++++++++++++++++++++++++++ + +CrossRef uses `HTTP Basic authentication `_ +for their APIs. +- Used in conjunction with :ref:`dataverse.pid.crossref.url` and :ref:`dataverse.pid.crossref.password`. + +.. _dataverse.pid.crossref.password: + +dataverse.pid.crossref.password ++++++++++++++++++++++++++++++++ + +- Used in conjunction with :ref:`dataverse.pid.crossref.url` and :ref:`dataverse.pid.crossref.username`. + +.. _dataverse.pid.crossref.depositor: + +dataverse.pid.crossref.depositor +++++++++++++++++++++++++++++++++ + +The entity, such as a person or organization, that deposited the Dataset in the repository + +.. _dataverse.pid.crossref.depositor-email: + +dataverse.pid.crossref.depositor-email +++++++++++++++++++++++++++++++++++++++ + +Contact email to the indicated Depositor + .. _dataverse.timerServer: dataverse.timerServer From 376f31de215a677ad874adebe4c179be675c6ec3 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 10 Apr 2024 23:57:38 +0200 Subject: [PATCH 008/486] ci(ct): do no longer run base push workflow in PRs --- .github/workflows/container_base_push.yml | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index b938851f816..20083c90f33 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -3,17 +3,11 @@ name: Base Container Image on: push: + tags: + - 'v[6-9].*' branches: - 'develop' - - 'master' - paths: - - 'modules/container-base/**' - - 'modules/dataverse-parent/pom.xml' - - '.github/workflows/container_base_push.yml' - pull_request: - branches: - - 'develop' - - 'master' + - '10478-version-base-img' paths: - 'modules/container-base/**' - 'modules/dataverse-parent/pom.xml' From b8b95a995b3cebab201577429622ba086a81634e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:05:25 +0200 Subject: [PATCH 009/486] ci(ct): add a job for base images to discover branches to work on For scheduled maintenance, we will provide updates to the last three base images as well as the develop branch. --- .github/workflows/container_base_push.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 20083c90f33..41935a025a8 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -20,6 +20,25 @@ env: PLATFORMS: linux/amd64,linux/arm64 jobs: + discover: + name: Discover Release Matrix + runs-on: ubuntu-latest + permissions: + contents: read + packages: read + # Only run in upstream repo - avoid unnecessary runs in forks and only for scheduled + if: ${{ github.repository_owner == 'IQSS' }} + steps: + - name: Build branch matrix options + id: matrix + run: | + # Get last three releases and include develop branch as matrix elements + if [[ "${{ github.event_name }}" == "schedule" ]]; then + echo "branches=$(curl -f -sS https://api.github.com/repos/IQSS/dataverse/releases | jq '[ .[0:3] | .[].tag_name, "develop" ]')" | tee -a "$GITHUB_OUTPUT" + else + echo "branches=['develop']" | tee -a "$GITHUB_OUTPUT" + fi + build: name: Build image runs-on: ubuntu-latest From 6f046208086bc7f3bb4078c92b6ae8723f78b030 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:06:24 +0200 Subject: [PATCH 010/486] ci(ct): setup base image build job as matrix of discovered branches Checkout the branch/tag we receive from the discovery job --- .github/workflows/container_base_push.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 41935a025a8..b424936383c 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -45,15 +45,18 @@ jobs: permissions: contents: read packages: read + needs: discover strategy: matrix: - jdk: [ '17' ] + branch: ${{ fromJson(needs.discover.outputs.branches) }} # Only run in upstream repo - avoid unnecessary runs in forks if: ${{ github.repository_owner == 'IQSS' }} steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 + with: + ref: ${{ matrix.branch }} - name: Set up JDK ${{ matrix.jdk }} uses: actions/setup-java@v3 From 4fc0267296944a7dec92e1a70a651649cf9941db Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:07:40 +0200 Subject: [PATCH 011/486] ci(ct): upgrade java action in base image and determine java version from Maven Using Maven properties, we find out which is the projects preferred Java version. We don't need a build matrix here. --- .github/workflows/container_base_push.yml | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index b424936383c..970e67794c2 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -57,14 +57,19 @@ jobs: uses: actions/checkout@v4 with: ref: ${{ matrix.branch }} + - name: Determine Java version from package + run: | + echo "JAVA_VERSION=$(mvn -f modules/container-base -Pct help:evaluate -Dexpression=target.java.version -q -DforceStdout)" >> ${GITHUB_ENV} - - name: Set up JDK ${{ matrix.jdk }} - uses: actions/setup-java@v3 + - name: Set up JDK ${{ env.JAVA_VERSION }} + uses: actions/setup-java@v4 with: - java-version: ${{ matrix.jdk }} - distribution: 'adopt' - - name: Cache Maven packages - uses: actions/cache@v3 + java-version: ${{ env.JAVA_VERSION }} + distribution: 'temurin' + cache: 'maven' + cache-dependency-path: | + modules/container-base/pom.xml + with: path: ~/.m2 key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} From 8d3097181296f70e1948adf43c2823862e8cb1fd Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:08:57 +0200 Subject: [PATCH 012/486] ci(ct): remove pull request safeguards from base image workflow We no longer work on PRs in this workflow, so we don't need to safeguard against not running steps in this case. --- .github/workflows/container_base_push.yml | 33 +++++++++-------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 970e67794c2..31a044ef02a 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -78,8 +78,18 @@ jobs: - name: Build base container image with local architecture run: mvn -f modules/container-base -Pct package - # Run anything below only if this is not a pull request. - # Accessing, pushing tags etc. to DockerHub will only succeed in upstream because secrets. + # Note: Accessing, pushing tags etc. to DockerHub will only succeed in upstream and + # on events in context of upstream because secrets. PRs run in context of forks by default! + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Set up QEMU for multi-arch builds + uses: docker/setup-qemu-action@v2 + - name: Deploy multi-arch base container image to Docker Hub + run: mvn -f modules/container-base -Pct deploy -Ddocker.platforms=${{ env.PLATFORMS }} - if: ${{ github.event_name == 'push' && github.ref_name == 'develop' }} name: Push description to DockerHub @@ -90,23 +100,6 @@ jobs: repository: gdcc/base short-description: "Dataverse Base Container image providing Payara application server and optimized configuration" readme-filepath: ./modules/container-base/README.md - - - if: ${{ github.event_name != 'pull_request' }} - name: Log in to the Container registry - uses: docker/login-action@v2 - with: - registry: ${{ env.REGISTRY }} - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - if: ${{ github.event_name != 'pull_request' }} - name: Set up QEMU for multi-arch builds - uses: docker/setup-qemu-action@v2 - - name: Re-set image tag based on branch - if: ${{ github.ref_name == 'master' }} - run: echo "IMAGE_TAG=alpha" >> $GITHUB_ENV - - if: ${{ github.event_name != 'pull_request' }} - name: Deploy multi-arch base container image to Docker Hub - run: mvn -f modules/container-base -Pct deploy -Dbase.image.tag=${{ env.IMAGE_TAG }} -Ddocker.platforms=${{ env.PLATFORMS }} push-app-img: name: "Rebase & Publish App Image" permissions: @@ -115,6 +108,6 @@ jobs: pull-requests: write needs: build # We do not release a new base image for pull requests, so do not trigger. - if: ${{ github.event_name != 'pull_request' }} + # if: ${{ github.event_name != 'pull_request' }} uses: ./.github/workflows/container_app_push.yml secrets: inherit From bb06a94e27e6edf6f9ccd11dbf6209d90b7d4dc2 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:09:43 +0200 Subject: [PATCH 013/486] ci(ct): remove pull request safeguards from base image workflow We no longer work on PRs in this workflow, so we don't need to safeguard against not running steps in this case. Also we no longer use the static image names, but rely on the truth Maven has in the container-base module about the version of the image. --- .github/workflows/container_base_push.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 31a044ef02a..4295280b639 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -16,7 +16,6 @@ on: - cron: '23 3 * * 0' # Run for 'develop' every Sunday at 03:23 UTC env: - IMAGE_TAG: unstable PLATFORMS: linux/amd64,linux/arm64 jobs: From 0ba86b2e4f68709d49c72a99580ec5c5b1ac4d5a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:10:23 +0200 Subject: [PATCH 014/486] ci(ct): stop on purpose to test the feature branch before we actually push to Docker Hub --- .github/workflows/container_base_push.yml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 4295280b639..bfccefaf9eb 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -69,13 +69,11 @@ jobs: cache-dependency-path: | modules/container-base/pom.xml + - name: Stop on purpose for testing + uses: actions/github-script@v3 with: - path: ~/.m2 - key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} - restore-keys: ${{ runner.os }}-m2 - - - name: Build base container image with local architecture - run: mvn -f modules/container-base -Pct package + script: | + core.setFailed('Stopped on purpose.') # Note: Accessing, pushing tags etc. to DockerHub will only succeed in upstream and # on events in context of upstream because secrets. PRs run in context of forks by default! From 37f4a51b2ff0341f7bb6d930614f04c2238c213b Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:11:00 +0200 Subject: [PATCH 015/486] feat(ct): make the container base image follow sequential, incrementing versions --- modules/container-base/pom.xml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index fc672696df4..2754b081986 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -12,6 +12,8 @@ io.gdcc container-base + + 1 ${packaging.type} Container Base Image This module provides an application server base image to be decorated with the Dataverse app. @@ -40,7 +42,7 @@ docker-build gdcc/base:${base.image.tag} - unstable + R${project.version} eclipse-temurin:${target.java.version}-jre 1000 1000 From 20c6a58915711efd9e18d8454813623ead9dbb9b Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:17:47 +0200 Subject: [PATCH 016/486] ci(ct): add missing output definitions to base image workflow matrix discovery --- .github/workflows/container_base_push.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index bfccefaf9eb..29abab230e3 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -27,6 +27,8 @@ jobs: packages: read # Only run in upstream repo - avoid unnecessary runs in forks and only for scheduled if: ${{ github.repository_owner == 'IQSS' }} + outputs: + branches: ${{ steps.matrix.outputs.branches }} steps: - name: Build branch matrix options id: matrix From e9236638eb2921b46318c3a4fb51e663ec021a56 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:20:41 +0200 Subject: [PATCH 017/486] test(ct): for testing, determine if matrix discovery works for schedule event --- .github/workflows/container_base_push.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 29abab230e3..ab91252efc6 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -34,11 +34,11 @@ jobs: id: matrix run: | # Get last three releases and include develop branch as matrix elements - if [[ "${{ github.event_name }}" == "schedule" ]]; then + #if [[ "${{ github.event_name }}" == "schedule" ]]; then echo "branches=$(curl -f -sS https://api.github.com/repos/IQSS/dataverse/releases | jq '[ .[0:3] | .[].tag_name, "develop" ]')" | tee -a "$GITHUB_OUTPUT" - else - echo "branches=['develop']" | tee -a "$GITHUB_OUTPUT" - fi + #else + # echo "branches=['develop']" | tee -a "$GITHUB_OUTPUT" + #fi build: name: Build image From def5d9e2842c09c3d647619f0cea266528c71a82 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:23:56 +0200 Subject: [PATCH 018/486] fix(ct): base image build matrix output must be on a single line Fix jq output by removing newlines and superfluous whitespace. --- .github/workflows/container_base_push.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index ab91252efc6..705b152d1bc 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -35,7 +35,9 @@ jobs: run: | # Get last three releases and include develop branch as matrix elements #if [[ "${{ github.event_name }}" == "schedule" ]]; then - echo "branches=$(curl -f -sS https://api.github.com/repos/IQSS/dataverse/releases | jq '[ .[0:3] | .[].tag_name, "develop" ]')" | tee -a "$GITHUB_OUTPUT" + echo "branches=$(curl -f -sS https://api.github.com/repos/IQSS/dataverse/releases | \ + jq '[ .[0:3] | .[].tag_name, "develop" ]')" | tr -d "\n" | tr -s " " | \ + tee -a "$GITHUB_OUTPUT" #else # echo "branches=['develop']" | tee -a "$GITHUB_OUTPUT" #fi From b028c2610db5688bf443f436e1bbdb1c7c8fcb6c Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:27:06 +0200 Subject: [PATCH 019/486] fix(ct): do not stop building other base images if one build fails --- .github/workflows/container_base_push.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 705b152d1bc..f36c5a8de2d 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -50,6 +50,7 @@ jobs: packages: read needs: discover strategy: + fail-fast: false matrix: branch: ${{ fromJson(needs.discover.outputs.branches) }} # Only run in upstream repo - avoid unnecessary runs in forks From b453a12b780c4149a3ca2ffb5469d6cfb85c5290 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:45:07 +0200 Subject: [PATCH 020/486] fix(ct): avoid costly lookup of Java version to be used via Maven, grep from XML instead --- .github/workflows/container_base_push.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index f36c5a8de2d..d6af36702f6 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -61,9 +61,9 @@ jobs: uses: actions/checkout@v4 with: ref: ${{ matrix.branch }} - - name: Determine Java version from package + - name: Determine Java version from Parent POM run: | - echo "JAVA_VERSION=$(mvn -f modules/container-base -Pct help:evaluate -Dexpression=target.java.version -q -DforceStdout)" >> ${GITHUB_ENV} + echo "JAVA_VERSION=$(grep '' modules/dataverse-parent/pom.xml | cut -f2 -d'>' | cut -f1 -d'<')" >> ${GITHUB_ENV} - name: Set up JDK ${{ env.JAVA_VERSION }} uses: actions/setup-java@v4 From cb2fac02fb9370f565ef613c0f2e57f5ce7957a0 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 00:48:19 +0200 Subject: [PATCH 021/486] test(ct): no longer stop before Docker, but tell DMP to skip pushing --- .github/workflows/container_base_push.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index d6af36702f6..24b2640f3e3 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -74,12 +74,6 @@ jobs: cache-dependency-path: | modules/container-base/pom.xml - - name: Stop on purpose for testing - uses: actions/github-script@v3 - with: - script: | - core.setFailed('Stopped on purpose.') - # Note: Accessing, pushing tags etc. to DockerHub will only succeed in upstream and # on events in context of upstream because secrets. PRs run in context of forks by default! @@ -91,7 +85,7 @@ jobs: - name: Set up QEMU for multi-arch builds uses: docker/setup-qemu-action@v2 - name: Deploy multi-arch base container image to Docker Hub - run: mvn -f modules/container-base -Pct deploy -Ddocker.platforms=${{ env.PLATFORMS }} + run: mvn -f modules/container-base -Pct deploy -Ddocker.skip.push -Ddocker.platforms=${{ env.PLATFORMS }} - if: ${{ github.event_name == 'push' && github.ref_name == 'develop' }} name: Push description to DockerHub From afa51c78327748d14a1e9bb6f1d6eac78e357f48 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 01:12:13 +0200 Subject: [PATCH 022/486] fix(ct): skip building and pushing the base image for v6.0 and v6.1, which is compatible with the v6.2 image --- .github/workflows/container_base_push.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 24b2640f3e3..cb83a2799bb 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -85,6 +85,8 @@ jobs: - name: Set up QEMU for multi-arch builds uses: docker/setup-qemu-action@v2 - name: Deploy multi-arch base container image to Docker Hub + # Do not build for v6.0 and v6.1. We can simply reuse the one from v6.2. + if: ${{ matrix.branch != 'v6.0' && matrix.branch != 'v6.1' }} run: mvn -f modules/container-base -Pct deploy -Ddocker.skip.push -Ddocker.platforms=${{ env.PLATFORMS }} - if: ${{ github.event_name == 'push' && github.ref_name == 'develop' }} From 8afac921361fcb76752c9425f0ce2e2a5708244e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 01:12:47 +0200 Subject: [PATCH 023/486] test(ct): try to limit QEMU architectures, maybe shaving off a few seconds of setup --- .github/workflows/container_base_push.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index cb83a2799bb..ed9e2a1cc1e 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -83,7 +83,10 @@ jobs: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Set up QEMU for multi-arch builds - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v3 + with: + platforms: ${{ env.PLATFORMS }} + - name: Deploy multi-arch base container image to Docker Hub # Do not build for v6.0 and v6.1. We can simply reuse the one from v6.2. if: ${{ matrix.branch != 'v6.0' && matrix.branch != 'v6.1' }} From 7c4242ea384e06b70664bffde4c5d1828ac70b1a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 01:18:27 +0200 Subject: [PATCH 024/486] feat(ct): add additional tags for base images --- .github/workflows/container_base_push.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index ed9e2a1cc1e..21a15e02c1d 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -86,11 +86,20 @@ jobs: uses: docker/setup-qemu-action@v3 with: platforms: ${{ env.PLATFORMS }} + - name: Add additional tags as options + run: | + # For the development branch, update the latest tag in addition + if [[ "${{ matrix.branch }}" == "develop" ]]; then + echo "DOCKER_TAGS=-Ddocker.tags.develop=latest" | tee -a "${GITHUB_ENV}" + # In case of releases <=6.2, we still need to provide backward compatible names "alpha" and "unstable" + elif [[ "${{ matrix.branch }}" == "v6.2" ]]; then + echo "DOCKER_TAGS=-Ddocker.tags.additional=alpha" | tee -a "${GITHUB_ENV}" + fi - name: Deploy multi-arch base container image to Docker Hub # Do not build for v6.0 and v6.1. We can simply reuse the one from v6.2. if: ${{ matrix.branch != 'v6.0' && matrix.branch != 'v6.1' }} - run: mvn -f modules/container-base -Pct deploy -Ddocker.skip.push -Ddocker.platforms=${{ env.PLATFORMS }} + run: mvn -f modules/container-base -Pct deploy -Ddocker.skip.push ${DOCKER_TAGS} -Ddocker.platforms=${{ env.PLATFORMS }} - if: ${{ github.event_name == 'push' && github.ref_name == 'develop' }} name: Push description to DockerHub From c08220dd4e40895973206d178f9cdeb7c7b5bf1c Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 01:35:45 +0200 Subject: [PATCH 025/486] test(ct): add feature branch to matrix and build for everything but it. --- .github/workflows/container_base_push.yml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 21a15e02c1d..af22b6b1a55 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -32,11 +32,12 @@ jobs: steps: - name: Build branch matrix options id: matrix + # TODO: remove the feature branch and re-enable the if/else! run: | # Get last three releases and include develop branch as matrix elements #if [[ "${{ github.event_name }}" == "schedule" ]]; then echo "branches=$(curl -f -sS https://api.github.com/repos/IQSS/dataverse/releases | \ - jq '[ .[0:3] | .[].tag_name, "develop" ]')" | tr -d "\n" | tr -s " " | \ + jq '[ .[0:3] | .[].tag_name, "develop", "10478-version-base-img" ]')" | tr -d "\n" | tr -s " " | \ tee -a "$GITHUB_OUTPUT" #else # echo "branches=['develop']" | tee -a "$GITHUB_OUTPUT" @@ -87,19 +88,28 @@ jobs: with: platforms: ${{ env.PLATFORMS }} - name: Add additional tags as options + # TODO: remove the feature branch and re-enable the if/else! run: | # For the development branch, update the latest tag in addition - if [[ "${{ matrix.branch }}" == "develop" ]]; then + if [[ "${{ matrix.branch }}" == "develop" || "${{ matrix.branch }}" == "10478-version-base-img" ]]; then echo "DOCKER_TAGS=-Ddocker.tags.develop=latest" | tee -a "${GITHUB_ENV}" # In case of releases <=6.2, we still need to provide backward compatible names "alpha" and "unstable" elif [[ "${{ matrix.branch }}" == "v6.2" ]]; then echo "DOCKER_TAGS=-Ddocker.tags.additional=alpha" | tee -a "${GITHUB_ENV}" fi + # TODO: remove when feature branch is done + - name: Skip all but feature-branch + if: ${{ matrix.branch != '10478-version-base-img' }} + uses: actions/github-script@v3 + with: + script: | + core.setFailed('Stopping on purpose to avoid mayhem') + - name: Deploy multi-arch base container image to Docker Hub # Do not build for v6.0 and v6.1. We can simply reuse the one from v6.2. if: ${{ matrix.branch != 'v6.0' && matrix.branch != 'v6.1' }} - run: mvn -f modules/container-base -Pct deploy -Ddocker.skip.push ${DOCKER_TAGS} -Ddocker.platforms=${{ env.PLATFORMS }} + run: mvn -f modules/container-base -Pct deploy ${DOCKER_TAGS} -Ddocker.platforms=${{ env.PLATFORMS }} - if: ${{ github.event_name == 'push' && github.ref_name == 'develop' }} name: Push description to DockerHub From c4cbc576ef3dba974e8bffa338ad7a74d3962960 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 01:56:10 +0200 Subject: [PATCH 026/486] chore,test(ct): update script action to v7 --- .github/workflows/container_base_push.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index af22b6b1a55..f7f8e889d56 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -100,7 +100,7 @@ jobs: # TODO: remove when feature branch is done - name: Skip all but feature-branch if: ${{ matrix.branch != '10478-version-base-img' }} - uses: actions/github-script@v3 + uses: actions/github-script@v7 with: script: | core.setFailed('Stopping on purpose to avoid mayhem') From 05e48875387f769fb9cc966868699cbd4edce8c1 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 01:56:44 +0200 Subject: [PATCH 027/486] fix(ct): add missing option for DMP to add additional tags to base image --- .github/workflows/container_base_push.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index f7f8e889d56..cefa63786c9 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -92,10 +92,10 @@ jobs: run: | # For the development branch, update the latest tag in addition if [[ "${{ matrix.branch }}" == "develop" || "${{ matrix.branch }}" == "10478-version-base-img" ]]; then - echo "DOCKER_TAGS=-Ddocker.tags.develop=latest" | tee -a "${GITHUB_ENV}" + echo "DOCKER_TAGS=-Ddocker.imagePropertyConfiguration=override -Ddocker.tags.develop=latest" | tee -a "${GITHUB_ENV}" # In case of releases <=6.2, we still need to provide backward compatible names "alpha" and "unstable" elif [[ "${{ matrix.branch }}" == "v6.2" ]]; then - echo "DOCKER_TAGS=-Ddocker.tags.additional=alpha" | tee -a "${GITHUB_ENV}" + echo "DOCKER_TAGS=-Ddocker.imagePropertyConfiguration=override -Ddocker.tags.additional=alpha" | tee -a "${GITHUB_ENV}" fi # TODO: remove when feature branch is done - name: Skip all but feature-branch From be5b9c98cbcab64085ec8a5ffc5b8275a4fec335 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 14:09:53 +0200 Subject: [PATCH 028/486] feat(ct): trigger build of app image when all builds of base image succeed We use the branch name from the matrix as input argument. It's not being used yet. --- .github/workflows/container_app_push.yml | 5 +++++ .github/workflows/container_base_push.yml | 22 ++++++++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/.github/workflows/container_app_push.yml b/.github/workflows/container_app_push.yml index b3e247e376c..fba693eee05 100644 --- a/.github/workflows/container_app_push.yml +++ b/.github/workflows/container_app_push.yml @@ -5,6 +5,11 @@ on: # We are deliberately *not* running on push events here to avoid double runs. # Instead, push events will trigger from the base image and maven unit tests via workflow_call. workflow_call: + inputs: + branch: + type: string + description: "A tag or branch to checkout for building the image" + required: true pull_request: branches: - develop diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index cefa63786c9..4e80caa8586 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -107,6 +107,7 @@ jobs: - name: Deploy multi-arch base container image to Docker Hub + id: build # Do not build for v6.0 and v6.1. We can simply reuse the one from v6.2. if: ${{ matrix.branch != 'v6.0' && matrix.branch != 'v6.1' }} run: mvn -f modules/container-base -Pct deploy ${DOCKER_TAGS} -Ddocker.platforms=${{ env.PLATFORMS }} @@ -120,14 +121,27 @@ jobs: repository: gdcc/base short-description: "Dataverse Base Container image providing Payara application server and optimized configuration" readme-filepath: ./modules/container-base/README.md + + # - if: always() + # name: Save status (workaround for matrix outputs) + # run: | + # # steps.build.outcome is the status BEFORE continue-on-error + # echo "STATUS_$( echo "${{ matrix.branch }}" | tr ".:;,-/ " "_" )=${{ steps.build.outcome }}" | tee -a "${GITHUB_ENV}" + push-app-img: name: "Rebase & Publish App Image" permissions: contents: read packages: write pull-requests: write - needs: build - # We do not release a new base image for pull requests, so do not trigger. - # if: ${{ github.event_name != 'pull_request' }} + needs: + - discover + - build + strategy: + fail-fast: false + matrix: + branch: ${{ fromJson(needs.discover.outputs.branches) }} uses: ./.github/workflows/container_app_push.yml - secrets: inherit + with: + branch: ${{ matrix.branch }} + From d4a196b8a6247542051bf3d0daf24789121d6b94 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 14:12:00 +0200 Subject: [PATCH 029/486] test(ct): let's skip the build for anything but our feature branch --- .github/workflows/container_app_push.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/container_app_push.yml b/.github/workflows/container_app_push.yml index fba693eee05..a92ca4aecbd 100644 --- a/.github/workflows/container_app_push.yml +++ b/.github/workflows/container_app_push.yml @@ -35,7 +35,7 @@ jobs: packages: write pull-requests: write # Only run in upstream repo - avoid unnecessary runs in forks - if: ${{ github.repository_owner == 'IQSS' }} + if: ${{ github.repository_owner == 'IQSS' && inputs.branch == '10478-version-base-img' }} steps: - name: Checkout repository From 77aec0d62dcb0aad1e3e1bf7abdf740fc7bed540 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 14:19:35 +0200 Subject: [PATCH 030/486] style(ct): fix wording to test CI pipeline --- modules/container-base/src/main/docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index f093ced37c1..0905ebb62a1 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -221,7 +221,7 @@ RUN < Date: Thu, 11 Apr 2024 14:22:08 +0200 Subject: [PATCH 031/486] test(ct): try what happens when removing the on purpose failure of base image workflow --- .github/workflows/container_base_push.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 4e80caa8586..8c15c0a7ff7 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -97,13 +97,14 @@ jobs: elif [[ "${{ matrix.branch }}" == "v6.2" ]]; then echo "DOCKER_TAGS=-Ddocker.imagePropertyConfiguration=override -Ddocker.tags.additional=alpha" | tee -a "${GITHUB_ENV}" fi + # TODO: remove when feature branch is done - - name: Skip all but feature-branch - if: ${{ matrix.branch != '10478-version-base-img' }} - uses: actions/github-script@v7 - with: - script: | - core.setFailed('Stopping on purpose to avoid mayhem') + #- name: Skip all but feature-branch + # if: ${{ matrix.branch != '10478-version-base-img' }} + # uses: actions/github-script@v7 + # with: + # script: | + # core.setFailed('Stopping on purpose to avoid mayhem') - name: Deploy multi-arch base container image to Docker Hub From 2887d13ca93103420ad78973e4e975dd0da90b41 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Apr 2024 14:42:40 +0200 Subject: [PATCH 032/486] fix(ct): make secrets available to app workflow again Deleted by accident when working on alternativ matrix way to build app image after base. --- .github/workflows/container_base_push.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 8c15c0a7ff7..1631c08f10e 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -135,6 +135,7 @@ jobs: contents: read packages: write pull-requests: write + secrets: inherit needs: - discover - build From bbe527d4646b09fd41fa4e57c52e2759183d2042 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 08:44:06 +0200 Subject: [PATCH 033/486] chore(ct,build): upgrade to DMP plugin v0.44.0 - Enable -Ddocker.noCache for BuildX - Hopefully straightens out some bugs on Darwin with M1/M2 --- modules/dataverse-parent/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 612902b47a4..d03d3e242fc 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -199,7 +199,7 @@ 1.7.0 - 0.43.4 + 0.44.0 From c496ef31304f8ced339aace0b903a52d69991b71 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 08:45:35 +0200 Subject: [PATCH 034/486] feat(ct): pin down name of the builder for base image This way we have a distinct path and name to inject a BuildX builder instance configuration --- modules/container-base/pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 2754b081986..72811e34e84 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -100,6 +100,7 @@ ${docker.platforms} + dataverse ${project.build.directory}/buildx-state Dockerfile From 82c8e725365c72bbc4a2a906779bc1c61a71ff89 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 08:46:05 +0200 Subject: [PATCH 035/486] ci(ct): make number of past release configurable via env var --- .github/workflows/container_base_push.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 1631c08f10e..fe3b38d0284 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -17,6 +17,7 @@ on: env: PLATFORMS: linux/amd64,linux/arm64 + NUM_PAST_RELEASES: 3 jobs: discover: @@ -37,7 +38,7 @@ jobs: # Get last three releases and include develop branch as matrix elements #if [[ "${{ github.event_name }}" == "schedule" ]]; then echo "branches=$(curl -f -sS https://api.github.com/repos/IQSS/dataverse/releases | \ - jq '[ .[0:3] | .[].tag_name, "develop", "10478-version-base-img" ]')" | tr -d "\n" | tr -s " " | \ + jq '[ .[0:${{ env.NUM_PAST_RELEASES }}] | .[].tag_name, "develop", "10478-version-base-img" ]')" | tr -d "\n" | tr -s " " | \ tee -a "$GITHUB_OUTPUT" #else # echo "branches=['develop']" | tee -a "$GITHUB_OUTPUT" From 0c91541f5c11c3daead61f657dbdee6af4addc43 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 08:54:49 +0200 Subject: [PATCH 036/486] fix(ct): correction of tags specification for base image workflow Per example at https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#running-your-workflow-only-when-a-push-of-specific-tags-occurs it should be two asterisks. --- .github/workflows/container_base_push.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index fe3b38d0284..838321b10f6 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -4,7 +4,7 @@ name: Base Container Image on: push: tags: - - 'v[6-9].*' + - 'v[6-9].**' branches: - 'develop' - '10478-version-base-img' From eef60f295ced438a7c3e91fbef61edc273ec51d9 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 08:56:21 +0200 Subject: [PATCH 037/486] refactor(ct): when pushes for a tag or branch occur, build for it Before, we would have run the workflow for develop only. In case develop has merges before the run starts because of a tag being created, this may result in undesired side effects. Keep in mind that pushes of tags here will also trigger releasing a new application image! --- .github/workflows/container_base_push.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 838321b10f6..ec8881b3ef5 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -41,7 +41,8 @@ jobs: jq '[ .[0:${{ env.NUM_PAST_RELEASES }}] | .[].tag_name, "develop", "10478-version-base-img" ]')" | tr -d "\n" | tr -s " " | \ tee -a "$GITHUB_OUTPUT" #else - # echo "branches=['develop']" | tee -a "$GITHUB_OUTPUT" + # # Note: github.ref_name will be the name of the branch or the tag pushed + # echo "branches=['${{ github.ref_name }}']" | tee -a "$GITHUB_OUTPUT" #fi build: From 90cb9ce129ff1b3b9de9e0585232418b820d7dbb Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 09:59:01 +0200 Subject: [PATCH 038/486] feat(ct): replace QEMU with remote builder for ARM64 in base image --- .github/workflows/container_base_push.yml | 35 +++++++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index ec8881b3ef5..324f8134a64 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -85,10 +85,39 @@ jobs: with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Set up QEMU for multi-arch builds - uses: docker/setup-qemu-action@v3 + + # This is replaced by adding a remote ARM64 build host in addition to the local AMD64 + # - name: Set up QEMU for multi-arch builds + # uses: docker/setup-qemu-action@v3 + # with: + # platforms: ${{ env.PLATFORMS }} + # Setup SSH access to ARM64 builder node + - name: Setup SSH agent + uses: webfactory/ssh-agent@v0.9.0 with: - platforms: ${{ env.PLATFORMS }} + ssh-private-key: ${{ secrets.BUILDER_ARM64_SSH_PRIVATE_KEY }} + - name: Provide the known hosts key and the builder config + run: | + echo "${{ secrets.BUILDER_ARM64_SSH_HOST_KEY }}" > ~/.ssh/known_hosts + mkdir -p modules/container-base/target/buildx-state/buildx/instances + cat modules/container-base/target/buildx-state/buildx/instances/dataverse << EOF + { "Name": "dataverse", + "Driver": "docker-container", + "Dynamic": false, + "Nodes": [{"Name": "dataverse0", + "Endpoint": "unix:///var/run/docker.sock", + "Platforms": [{"os": "linux", "architecture": "amd64"}], + "DriverOpts": null, + "Flags": ["--allow-insecure-entitlement=network.host"], + "Files": null}, + {"Name": "dataverse1", + "Endpoint": "ssh://${{ secret.BUILDER_ARM64_SSH_CONNECTION }}", + "Platforms": [{"os": "linux", "architecture": "arm64"}], + "DriverOpts": null, + "Flags": ["--allow-insecure-entitlement=network.host"], + "Files": null}]} + EOF + - name: Add additional tags as options # TODO: remove the feature branch and re-enable the if/else! run: | From d7fbf871cffe8685e75533280e4ee2fa1f1add60 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 10:00:17 +0200 Subject: [PATCH 039/486] feat(ct): disable caching during base image build The ARM64 remote build host will otherwise reuse the cache with potentially stale content --- .github/workflows/container_base_push.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 324f8134a64..aaf20397f9d 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -142,7 +142,7 @@ jobs: id: build # Do not build for v6.0 and v6.1. We can simply reuse the one from v6.2. if: ${{ matrix.branch != 'v6.0' && matrix.branch != 'v6.1' }} - run: mvn -f modules/container-base -Pct deploy ${DOCKER_TAGS} -Ddocker.platforms=${{ env.PLATFORMS }} + run: mvn -f modules/container-base -Pct deploy -Ddocker.noCache ${DOCKER_TAGS} -Ddocker.platforms=${{ env.PLATFORMS }} - if: ${{ github.event_name == 'push' && github.ref_name == 'develop' }} name: Push description to DockerHub From b6fb1224b9ca6fd0f345efc6ad73ebddea962450 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 10:02:31 +0200 Subject: [PATCH 040/486] style(ct): fix typo in secrets reference --- .github/workflows/container_base_push.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index aaf20397f9d..9243c2725e1 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -111,7 +111,7 @@ jobs: "Flags": ["--allow-insecure-entitlement=network.host"], "Files": null}, {"Name": "dataverse1", - "Endpoint": "ssh://${{ secret.BUILDER_ARM64_SSH_CONNECTION }}", + "Endpoint": "ssh://${{ secrets.BUILDER_ARM64_SSH_CONNECTION }}", "Platforms": [{"os": "linux", "architecture": "arm64"}], "DriverOpts": null, "Flags": ["--allow-insecure-entitlement=network.host"], From b3ac714f98971ef21cd47650e04a370226334fc8 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 10:04:08 +0200 Subject: [PATCH 041/486] fix(ct): add missing pipe redirection for builder config --- .github/workflows/container_base_push.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 9243c2725e1..69cd31afcf4 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -100,7 +100,7 @@ jobs: run: | echo "${{ secrets.BUILDER_ARM64_SSH_HOST_KEY }}" > ~/.ssh/known_hosts mkdir -p modules/container-base/target/buildx-state/buildx/instances - cat modules/container-base/target/buildx-state/buildx/instances/dataverse << EOF + cat > modules/container-base/target/buildx-state/buildx/instances/dataverse << EOF { "Name": "dataverse", "Driver": "docker-container", "Dynamic": false, From 898d9053becaa64151c06ec645080f6544292776 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 11:06:32 +0200 Subject: [PATCH 042/486] refactor(ct): switch back to "maven" as builder name Using "maven" (the default name) again allows us to use the ARM64 builder for previously released images as well as develop before we merge the feature branch --- .github/workflows/container_base_push.yml | 8 ++++---- modules/container-base/pom.xml | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 69cd31afcf4..e4733cca027 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -100,17 +100,17 @@ jobs: run: | echo "${{ secrets.BUILDER_ARM64_SSH_HOST_KEY }}" > ~/.ssh/known_hosts mkdir -p modules/container-base/target/buildx-state/buildx/instances - cat > modules/container-base/target/buildx-state/buildx/instances/dataverse << EOF - { "Name": "dataverse", + cat > modules/container-base/target/buildx-state/buildx/instances/maven << EOF + { "Name": "maven", "Driver": "docker-container", "Dynamic": false, - "Nodes": [{"Name": "dataverse0", + "Nodes": [{"Name": "maven0", "Endpoint": "unix:///var/run/docker.sock", "Platforms": [{"os": "linux", "architecture": "amd64"}], "DriverOpts": null, "Flags": ["--allow-insecure-entitlement=network.host"], "Files": null}, - {"Name": "dataverse1", + {"Name": "maven1", "Endpoint": "ssh://${{ secrets.BUILDER_ARM64_SSH_CONNECTION }}", "Platforms": [{"os": "linux", "architecture": "arm64"}], "DriverOpts": null, diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 72811e34e84..2754b081986 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -100,7 +100,6 @@ ${docker.platforms} - dataverse ${project.build.directory}/buildx-state Dockerfile From 577804b3776847131b6e5f05727c9f0f69232d63 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 13:02:15 +0200 Subject: [PATCH 043/486] refactor(ct): use remote ARM64 builder only for pushes, but not scheduled maintenance --- .github/workflows/container_base_push.yml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index e4733cca027..36741ccd211 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -86,17 +86,21 @@ jobs: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - # This is replaced by adding a remote ARM64 build host in addition to the local AMD64 - # - name: Set up QEMU for multi-arch builds - # uses: docker/setup-qemu-action@v3 - # with: - # platforms: ${{ env.PLATFORMS }} - # Setup SSH access to ARM64 builder node + # In case of scheduled maintenance, we don't care about buildtime: use QEMU for AMD64 + ARM64 + - name: Set up QEMU for multi-arch builds + if: ${{ github.event_name == 'schedule' }} + uses: docker/setup-qemu-action@v3 + with: + platforms: ${{ env.PLATFORMS }} + # In case this is a push to develop, we care about buildtime. + # Configure a remote ARM64 build host in addition to the local AMD64 in two steps. - name: Setup SSH agent + if: ${{ github.event_name != 'schedule' }} uses: webfactory/ssh-agent@v0.9.0 with: ssh-private-key: ${{ secrets.BUILDER_ARM64_SSH_PRIVATE_KEY }} - name: Provide the known hosts key and the builder config + if: ${{ github.event_name != 'schedule' }} run: | echo "${{ secrets.BUILDER_ARM64_SSH_HOST_KEY }}" > ~/.ssh/known_hosts mkdir -p modules/container-base/target/buildx-state/buildx/instances From 25cbdd7b19478f9110ceb55436f9f84bcbb3aff5 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 14:19:35 +0200 Subject: [PATCH 044/486] test(ct): use DMP v0.45-SNAPSHOT to see if pushing images that have a ARG in their ref works with it --- modules/dataverse-parent/pom.xml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index d03d3e242fc..473c143296a 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -199,7 +199,7 @@ 1.7.0 - 0.44.0 + 0.45-SNAPSHOT @@ -215,6 +215,14 @@ never + + oss.sonatype.org + https://oss.sonatype.org/content/repositories/snapshots + + true + always + + From 2de674f6d80249ea2e446275533f4e3b6daa630f Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 15:23:21 +0200 Subject: [PATCH 045/486] feat(build,ci): workflow pre-seeding Maven cache from develop #10428 Contains the feature branch for now, too. Will be deleted later before merging it to develop. --- .github/workflows/maven_cache_seed.yaml | 37 +++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .github/workflows/maven_cache_seed.yaml diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml new file mode 100644 index 00000000000..e82ce36f829 --- /dev/null +++ b/.github/workflows/maven_cache_seed.yaml @@ -0,0 +1,37 @@ +name: Maven Cache Seeding + +on: + push: + branches: + - develop + - 10478-version-base-img + +env: + COMMON_CACHE_NAME: dataverse-maven-cache + +jobs: + preseed: + name: Drop and Re-Seed Local Repository + runs-on: ubuntu-latest + if: ${{ github.event_name == 'push' }} + steps: + - name: Drop common cache + uses: prantlf/delete-cache-action@v2 + with: + key: ${{ env.COMMON_CACHE_NAME }} + - name: Checkout repository + uses: actions/checkout@v4 + - name: Set up JDK + uses: actions/setup-java@v4 + with: + java-version: "17" + distribution: temurin + - name: Re-Seed common cache + run: | + mvn -B -f modules/dataverse-parent dependency:go-offline dependency:resolve-plugins + - name: Save the cache + uses: actions/cache/save@v4 + with: + path: ~/.m2/repository + key: ${{ env.COMMON_CACHE_NAME }} + From b1943c114110405a613c4093fb2d7eee5fa904d7 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 15:35:38 +0200 Subject: [PATCH 046/486] fix(build,ci): ignore not found when deleting common cache --- .github/workflows/maven_cache_seed.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml index e82ce36f829..4650cfcf930 100644 --- a/.github/workflows/maven_cache_seed.yaml +++ b/.github/workflows/maven_cache_seed.yaml @@ -16,6 +16,7 @@ jobs: if: ${{ github.event_name == 'push' }} steps: - name: Drop common cache + continue-on-error: true # we don't care if the cache is not around uses: prantlf/delete-cache-action@v2 with: key: ${{ env.COMMON_CACHE_NAME }} From 941fbc904d5700cebcff9d64b0a8a1f1c2268d36 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 15:51:33 +0200 Subject: [PATCH 047/486] style(ct): remove some stale comments --- .github/workflows/container_base_push.yml | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 36741ccd211..7f2f194cbcd 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -123,7 +123,7 @@ jobs: EOF - name: Add additional tags as options - # TODO: remove the feature branch and re-enable the if/else! + # TODO: remove the feature branch run: | # For the development branch, update the latest tag in addition if [[ "${{ matrix.branch }}" == "develop" || "${{ matrix.branch }}" == "10478-version-base-img" ]]; then @@ -133,15 +133,6 @@ jobs: echo "DOCKER_TAGS=-Ddocker.imagePropertyConfiguration=override -Ddocker.tags.additional=alpha" | tee -a "${GITHUB_ENV}" fi - # TODO: remove when feature branch is done - #- name: Skip all but feature-branch - # if: ${{ matrix.branch != '10478-version-base-img' }} - # uses: actions/github-script@v7 - # with: - # script: | - # core.setFailed('Stopping on purpose to avoid mayhem') - - - name: Deploy multi-arch base container image to Docker Hub id: build # Do not build for v6.0 and v6.1. We can simply reuse the one from v6.2. From b64b6aff9ce38009fdf90c451493b257690472f7 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 15:52:28 +0200 Subject: [PATCH 048/486] feat(ct): make use of common cache in app container builds #10428 --- .github/workflows/container_app_push.yml | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/.github/workflows/container_app_push.yml b/.github/workflows/container_app_push.yml index a92ca4aecbd..347fcf06b86 100644 --- a/.github/workflows/container_app_push.yml +++ b/.github/workflows/container_app_push.yml @@ -42,11 +42,18 @@ jobs: uses: actions/checkout@v3 - name: Set up JDK + id: setup-java uses: actions/setup-java@v3 with: java-version: "17" distribution: temurin cache: maven + - name: Download common cache on branch cache miss + if: ${{ steps.setup-java.outputs.cache-hit != 'true' }} + uses: actions/cache/restore@v4 + with: + key: dataverse-maven-cache + - name: Build app and configbaker container image with local architecture and submodules (profile will skip tests) run: > @@ -111,11 +118,19 @@ jobs: if: needs.check-secrets.outputs.available == 'true' && ( github.event_name != 'push' || ( github.event_name == 'push' && contains(fromJSON('["develop", "master"]'), github.ref_name))) steps: - - uses: actions/checkout@v3 - - uses: actions/setup-java@v3 + - uses: actions/checkout@v4 + - name: Set up JDK + id: setup-java + uses: actions/setup-java@v3 with: java-version: "17" distribution: temurin + cache: maven + - name: Download common cache on branch cache miss + if: ${{ steps.setup-java.outputs.cache-hit != 'true' }} + uses: actions/cache/restore@v4 + with: + key: dataverse-maven-cache # Depending on context, we push to different targets. Login accordingly. - if: github.event_name != 'pull_request' From 4f48123f73674eb56f3f687cc2e7df55908e13db Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 15:52:49 +0200 Subject: [PATCH 049/486] feat(ci): remove feature branch from maven seed workflow --- .github/workflows/maven_cache_seed.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml index 4650cfcf930..d002a446b99 100644 --- a/.github/workflows/maven_cache_seed.yaml +++ b/.github/workflows/maven_cache_seed.yaml @@ -4,7 +4,6 @@ on: push: branches: - develop - - 10478-version-base-img env: COMMON_CACHE_NAME: dataverse-maven-cache From 721814a62672a8483d1fd0708071444bc239e806 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 16:01:47 +0200 Subject: [PATCH 050/486] style(ct): rename base image revision from RX to revX It's just much more readable --- modules/container-base/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 2754b081986..e5de8841530 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -42,7 +42,7 @@ docker-build gdcc/base:${base.image.tag} - R${project.version} + rev${project.version} eclipse-temurin:${target.java.version}-jre 1000 1000 From 3c0650f6710bcda94c0e83278654a454466b5a41 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 16:02:12 +0200 Subject: [PATCH 051/486] feat(ct): make dataverse app image use base rev1 image --- pom.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 8f9d06b8744..fa267b6fbee 100644 --- a/pom.xml +++ b/pom.xml @@ -956,7 +956,8 @@ unstable false gdcc/base:${base.image.tag} - unstable + + rev1 gdcc/configbaker:${conf.image.tag} ${app.image.tag} From 06a21612347e0a64cfe0adfa05d1823c4927f5b2 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 16:15:00 +0200 Subject: [PATCH 052/486] refactor(ci): delete common cache after we downloaded the old one #10428 --- .github/workflows/maven_cache_seed.yaml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml index d002a446b99..5a38abed767 100644 --- a/.github/workflows/maven_cache_seed.yaml +++ b/.github/workflows/maven_cache_seed.yaml @@ -14,11 +14,6 @@ jobs: runs-on: ubuntu-latest if: ${{ github.event_name == 'push' }} steps: - - name: Drop common cache - continue-on-error: true # we don't care if the cache is not around - uses: prantlf/delete-cache-action@v2 - with: - key: ${{ env.COMMON_CACHE_NAME }} - name: Checkout repository uses: actions/checkout@v4 - name: Set up JDK @@ -26,9 +21,16 @@ jobs: with: java-version: "17" distribution: temurin - - name: Re-Seed common cache + - name: Seed common cache run: | mvn -B -f modules/dataverse-parent dependency:go-offline dependency:resolve-plugins + # This non-obvious order is due to the fact that the download via Maven will take a very long time. + # Jobs should not be left without a cache. Deleting and saving in one go leaves only a small chance for a cache miss. + - name: Drop common cache + continue-on-error: true # we don't care if the cache is not around + uses: prantlf/delete-cache-action@v2 + with: + key: ${{ env.COMMON_CACHE_NAME }} - name: Save the cache uses: actions/cache/save@v4 with: From 818bfd4ec32fa0d67f39611f6f3235af730fbfb9 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 16:15:20 +0200 Subject: [PATCH 053/486] test(ci): readd feature branch to test cache handling logic --- .github/workflows/maven_cache_seed.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml index 5a38abed767..f4b13725e70 100644 --- a/.github/workflows/maven_cache_seed.yaml +++ b/.github/workflows/maven_cache_seed.yaml @@ -4,6 +4,7 @@ on: push: branches: - develop + - 10478-version-base-img env: COMMON_CACHE_NAME: dataverse-maven-cache From a28997c815685b0527e3250c7526d9a03521ddae Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 16:16:33 +0200 Subject: [PATCH 054/486] style(ct): add common cache download to base image Without accurate measuring it seems like we use ~1 minute to download Maven artifacts for this workflow. Lets try to cut that down some by reusing the common cache. --- .github/workflows/container_base_push.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 7f2f194cbcd..667808d4d08 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -69,6 +69,7 @@ jobs: echo "JAVA_VERSION=$(grep '' modules/dataverse-parent/pom.xml | cut -f2 -d'>' | cut -f1 -d'<')" >> ${GITHUB_ENV} - name: Set up JDK ${{ env.JAVA_VERSION }} + id: setup-java uses: actions/setup-java@v4 with: java-version: ${{ env.JAVA_VERSION }} @@ -76,6 +77,12 @@ jobs: cache: 'maven' cache-dependency-path: | modules/container-base/pom.xml + - name: Download common cache on branch cache miss + if: ${{ steps.setup-java.outputs.cache-hit != 'true' }} + uses: actions/cache/restore@v4 + with: + key: dataverse-maven-cache + path: ~/.m2/repository # Note: Accessing, pushing tags etc. to DockerHub will only succeed in upstream and # on events in context of upstream because secrets. PRs run in context of forks by default! From 085e41e9c1bd7c550e76ad47b5450ac5fe197450 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 16:19:59 +0200 Subject: [PATCH 055/486] style(ct): remove stale comment about platforms empty in base image --- modules/container-base/pom.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index e5de8841530..60be420bd9a 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -97,7 +97,6 @@ - ${docker.platforms} ${project.build.directory}/buildx-state From 2fe0ca39ac5c9cf044ad833b7e66de449d61efa3 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 16:52:12 +0200 Subject: [PATCH 056/486] fix(build,ci): downgrade to cache action v2 Per https://github.com/actions/cache/issues/1361 restores in v3 and v4 were not working, maybe saving is botched, too? --- .github/workflows/maven_cache_seed.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml index f4b13725e70..148808a4b04 100644 --- a/.github/workflows/maven_cache_seed.yaml +++ b/.github/workflows/maven_cache_seed.yaml @@ -33,7 +33,7 @@ jobs: with: key: ${{ env.COMMON_CACHE_NAME }} - name: Save the cache - uses: actions/cache/save@v4 + uses: actions/cache@v2 with: path: ~/.m2/repository key: ${{ env.COMMON_CACHE_NAME }} From afad97e4160d60e31b01611e8a1e28ad0192631f Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 17:09:50 +0200 Subject: [PATCH 057/486] fix(build,ci,ct): fix missing path for cache restore --- .github/workflows/container_app_push.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/container_app_push.yml b/.github/workflows/container_app_push.yml index 347fcf06b86..3344a8e2d0d 100644 --- a/.github/workflows/container_app_push.yml +++ b/.github/workflows/container_app_push.yml @@ -52,6 +52,7 @@ jobs: if: ${{ steps.setup-java.outputs.cache-hit != 'true' }} uses: actions/cache/restore@v4 with: + path: ~/.m2/repository key: dataverse-maven-cache @@ -130,6 +131,7 @@ jobs: if: ${{ steps.setup-java.outputs.cache-hit != 'true' }} uses: actions/cache/restore@v4 with: + path: ~/.m2/repository key: dataverse-maven-cache # Depending on context, we push to different targets. Login accordingly. From 1c1c30e8b1d11f830354535f5164a92112eb3ca3 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 17:11:51 +0200 Subject: [PATCH 058/486] test(ci): fix a comment to trigger base image workflow --- modules/container-base/src/main/docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index 0905ebb62a1..a8bd3a32ca3 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -203,7 +203,7 @@ RUN < Date: Fri, 12 Apr 2024 17:17:49 +0200 Subject: [PATCH 059/486] style(ci): remove feature branch from maven cache seeding again --- .github/workflows/maven_cache_seed.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml index 148808a4b04..85f1381c789 100644 --- a/.github/workflows/maven_cache_seed.yaml +++ b/.github/workflows/maven_cache_seed.yaml @@ -4,7 +4,6 @@ on: push: branches: - develop - - 10478-version-base-img env: COMMON_CACHE_NAME: dataverse-maven-cache From 43402435ae4d619b872ac9bd14704f54306fc49e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 17:20:08 +0200 Subject: [PATCH 060/486] doc(ci): add note about cache availability to seeding workflow --- .github/workflows/maven_cache_seed.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml index 85f1381c789..e11d8c91c0e 100644 --- a/.github/workflows/maven_cache_seed.yaml +++ b/.github/workflows/maven_cache_seed.yaml @@ -31,6 +31,8 @@ jobs: uses: prantlf/delete-cache-action@v2 with: key: ${{ env.COMMON_CACHE_NAME }} + # NOTE: It is vital here to remember that only caches with the scope of the default branch are + # available to other branches. We use the v2 action here to save it anyway. - name: Save the cache uses: actions/cache@v2 with: From 899e8aa6548d8b6076c266807cc265d4c0bb049b Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Apr 2024 17:31:16 +0200 Subject: [PATCH 061/486] feat(ci): make sure to rejuvenate the common cache every 7 days --- .github/workflows/maven_cache_seed.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml index e11d8c91c0e..d31559138b8 100644 --- a/.github/workflows/maven_cache_seed.yaml +++ b/.github/workflows/maven_cache_seed.yaml @@ -4,6 +4,10 @@ on: push: branches: - develop + # According to https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#usage-limits-and-eviction-policy + # all caches are depleted after 7 days of no access. Make sure we rejuvenate every 7 days to keep it available. + schedule: + - cron: '23 2 * * 0' # Run for 'develop' every Sunday at 02:23 UTC env: COMMON_CACHE_NAME: dataverse-maven-cache @@ -12,7 +16,6 @@ jobs: preseed: name: Drop and Re-Seed Local Repository runs-on: ubuntu-latest - if: ${{ github.event_name == 'push' }} steps: - name: Checkout repository uses: actions/checkout@v4 From f538896971022cc4fa011a38aca41936125a30dc Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 15 Apr 2024 14:14:56 +0200 Subject: [PATCH 062/486] chore(ct): replace wait-for with wait4x Aligning configbaker and base image with same tool. wait4x has much more features to wait for different services. --- doc/sphinx-guides/source/container/base-image.rst | 2 +- modules/container-base/src/main/docker/Dockerfile | 12 ++++++------ .../docker/scripts/init_3_wait_dataverse_db_host.sh | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index c41250d48c5..29c357b91f6 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -46,7 +46,7 @@ The base image provides: - CLI tools necessary to run Dataverse (i. e. ``curl`` or ``jq`` - see also :doc:`../installation/prerequisites` in Installation Guide) - Linux tools for analysis, monitoring and so on - `Jattach `__ (attach to running JVM) -- `wait-for `__ (tool to "wait for" a service to be available) +- `wait4x `__ (tool to "wait for" a service to be available) - `dumb-init `__ (see :ref:`below ` for details) This image is created as a "multi-arch image", see :ref:`below `. diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index a8bd3a32ca3..e3fa6e477a9 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -92,8 +92,7 @@ EOF ARG JATTACH_VERSION="v2.1" ARG JATTACH_CHECKSUM="07885fdc782e02e7302c6d190f54c3930afa10a38140365adf54076ec1086a8e" -ARG WAIT_FOR_VERSION="v2.2.3" -ARG WAIT_FOR_CHECKSUM="70271181be69cd2c7265b2746f97fccfd7e8aa1059894138a775369c23589ff4" +ARG WAIT4X_VERSION="v2.14.0" ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat dumb-init" # Installing the packages in an extra container layer for better caching @@ -108,10 +107,11 @@ RUN < Date: Mon, 15 Apr 2024 14:17:55 +0200 Subject: [PATCH 063/486] build(ct): make target architecture available in base image build As per https://docs.docker.com/reference/dockerfile/#automatic-platform-args-in-the-global-scope BuildKit / buildx will expose the target architecture. It requires adding an ARG in the Dockerfile to inject the data. --- modules/container-base/src/main/docker/Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index e3fa6e477a9..256d9159d2d 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -69,6 +69,9 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin:${SCRIPT_DIR}" \ ### PART 1: SYSTEM ### ARG UID=1000 ARG GID=1000 +# Auto-populated by BuildKit / buildx +#ARG TARGETARCH="amd64" +ARG TARGETARCH USER root WORKDIR / SHELL ["/bin/bash", "-euo", "pipefail", "-c"] From e86133468a0086f0701efc69ad1549e21102f284 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 15 Apr 2024 14:20:18 +0200 Subject: [PATCH 064/486] chore(ct): upgrade base image with jattach v2.2 jattach binary is now available for ARM64 and AMD64, but requires special handling with download URLs and checksums. --- .../container-base/src/main/docker/Dockerfile | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index 256d9159d2d..fbd43604eac 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -93,8 +93,9 @@ RUN < Date: Tue, 16 Apr 2024 17:42:18 +0200 Subject: [PATCH 065/486] chore: remove obsolete Maven Cache seed workflow --- .github/workflows/maven_cache_seed.yaml | 44 ------------------------- 1 file changed, 44 deletions(-) delete mode 100644 .github/workflows/maven_cache_seed.yaml diff --git a/.github/workflows/maven_cache_seed.yaml b/.github/workflows/maven_cache_seed.yaml deleted file mode 100644 index d31559138b8..00000000000 --- a/.github/workflows/maven_cache_seed.yaml +++ /dev/null @@ -1,44 +0,0 @@ -name: Maven Cache Seeding - -on: - push: - branches: - - develop - # According to https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#usage-limits-and-eviction-policy - # all caches are depleted after 7 days of no access. Make sure we rejuvenate every 7 days to keep it available. - schedule: - - cron: '23 2 * * 0' # Run for 'develop' every Sunday at 02:23 UTC - -env: - COMMON_CACHE_NAME: dataverse-maven-cache - -jobs: - preseed: - name: Drop and Re-Seed Local Repository - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Set up JDK - uses: actions/setup-java@v4 - with: - java-version: "17" - distribution: temurin - - name: Seed common cache - run: | - mvn -B -f modules/dataverse-parent dependency:go-offline dependency:resolve-plugins - # This non-obvious order is due to the fact that the download via Maven will take a very long time. - # Jobs should not be left without a cache. Deleting and saving in one go leaves only a small chance for a cache miss. - - name: Drop common cache - continue-on-error: true # we don't care if the cache is not around - uses: prantlf/delete-cache-action@v2 - with: - key: ${{ env.COMMON_CACHE_NAME }} - # NOTE: It is vital here to remember that only caches with the scope of the default branch are - # available to other branches. We use the v2 action here to save it anyway. - - name: Save the cache - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ env.COMMON_CACHE_NAME }} - From 54fe365964f8c726ec2a3e42bee5b28b41f952d0 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 16 Apr 2024 17:43:42 +0200 Subject: [PATCH 066/486] style(ci): remove superfluous empty line --- .github/workflows/container_base_push.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 667808d4d08..660f9bdd861 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -86,7 +86,6 @@ jobs: # Note: Accessing, pushing tags etc. to DockerHub will only succeed in upstream and # on events in context of upstream because secrets. PRs run in context of forks by default! - - name: Log in to the Container registry uses: docker/login-action@v3 with: From fdb5932a1a72e57b3660a123cdc386f8145e70d4 Mon Sep 17 00:00:00 2001 From: paulboon Date: Thu, 18 Apr 2024 11:55:03 +0200 Subject: [PATCH 067/486] Use support email in the system email message 'closing' text --- src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java index 1eee9c65501..49b09cf98ae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java @@ -117,10 +117,11 @@ public boolean sendSystemEmail(String to, String subject, String messageText, bo return false; } InternetAddress systemAddress = optionalAddress.get(); + InternetAddress supportAddress = getSupportAddress().orElse(systemAddress); String body = messageText + BundleUtil.getStringFromBundle(isHtmlContent ? "notification.email.closing.html" : "notification.email.closing", - List.of(BrandingUtil.getSupportTeamEmailAddress(systemAddress), BrandingUtil.getSupportTeamName(systemAddress))); + List.of(BrandingUtil.getSupportTeamEmailAddress(supportAddress), BrandingUtil.getSupportTeamName(supportAddress))); logger.fine(() -> "Sending email to %s. Subject: <<<%s>>>. Body: %s".formatted(to, subject, body)); try { From 2f61d699b6dd1f7d3b700d06d3436e44e399d0d5 Mon Sep 17 00:00:00 2001 From: paulboon Date: Tue, 30 Apr 2024 14:02:01 +0200 Subject: [PATCH 068/486] Added to the documentation and released notes for the use of the support email address in the closing text of the system emails --- .../10287-use-support-address-in-system-email-text.md | 4 ++++ doc/sphinx-guides/source/installation/config.rst | 2 ++ 2 files changed, 6 insertions(+) create mode 100644 doc/release-notes/10287-use-support-address-in-system-email-text.md diff --git a/doc/release-notes/10287-use-support-address-in-system-email-text.md b/doc/release-notes/10287-use-support-address-in-system-email-text.md new file mode 100644 index 00000000000..7375fda68e2 --- /dev/null +++ b/doc/release-notes/10287-use-support-address-in-system-email-text.md @@ -0,0 +1,4 @@ +### Notification Email Improvement + +The system email text has been improved to use the support email address (`dataverse.mail.support-email`) in the text where it states; 'contact us for support at', instead of the default system email address. +Using the system email address here was particularly problematic when it was a 'noreply' address. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 065277c06ee..b6555397f75 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2947,6 +2947,8 @@ If not set, the :ref:`systemEmail` is used for the feedback API/contact form ema Note that only the email address is required, which you can supply without the ``<`` and ``>`` signs, but if you include the text, it's the way to customize the name of your support team, which appears in the "from" address in emails as well as in help text in the UI. If you don't include the text, the installation name (see :ref:`Branding Your Installation`) will appear in the "from" address. +Also note that the support email address is used at the end of notification mails where it states; 'contact us for support at', followed by the support mail address if configured and the system email otherwise. + Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_MAIL_SUPPORT_EMAIL``. See also :ref:`smtp-config`. From 1d394ea901df7cadb7277e47e51eb25716461a2f Mon Sep 17 00:00:00 2001 From: Paul Boon Date: Wed, 8 May 2024 15:29:32 +0200 Subject: [PATCH 069/486] Update doc/sphinx-guides/source/installation/config.rst Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index b6555397f75..034e91187cc 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2947,7 +2947,7 @@ If not set, the :ref:`systemEmail` is used for the feedback API/contact form ema Note that only the email address is required, which you can supply without the ``<`` and ``>`` signs, but if you include the text, it's the way to customize the name of your support team, which appears in the "from" address in emails as well as in help text in the UI. If you don't include the text, the installation name (see :ref:`Branding Your Installation`) will appear in the "from" address. -Also note that the support email address is used at the end of notification mails where it states; 'contact us for support at', followed by the support mail address if configured and the system email otherwise. +Also note that the support email address is used at the end of notification mails where it states; "contact us for support at", followed by the support mail address if configured and the system email otherwise. Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_MAIL_SUPPORT_EMAIL``. From ca4202f4d85857368d29cf16a2e2ca4d4f7d6933 Mon Sep 17 00:00:00 2001 From: Paul Boon Date: Wed, 8 May 2024 15:29:52 +0200 Subject: [PATCH 070/486] Update doc/release-notes/10287-use-support-address-in-system-email-text.md Co-authored-by: Philip Durbin --- .../10287-use-support-address-in-system-email-text.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/10287-use-support-address-in-system-email-text.md b/doc/release-notes/10287-use-support-address-in-system-email-text.md index 7375fda68e2..4c294404298 100644 --- a/doc/release-notes/10287-use-support-address-in-system-email-text.md +++ b/doc/release-notes/10287-use-support-address-in-system-email-text.md @@ -1,4 +1,4 @@ ### Notification Email Improvement -The system email text has been improved to use the support email address (`dataverse.mail.support-email`) in the text where it states; 'contact us for support at', instead of the default system email address. +The system email text has been improved to use the support email address (`dataverse.mail.support-email`) in the text where it states; "contact us for support at", instead of the default system email address. Using the system email address here was particularly problematic when it was a 'noreply' address. From 4878cfe47a284f029b2d98adb64d02dafdb540b6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 10:31:06 -0400 Subject: [PATCH 071/486] separate metadata parsing/params from XML generation code --- .../pidproviders/doi/AbstractDOIProvider.java | 25 ++-- .../pidproviders/doi/DoiMetadata.java | 138 ++++++++++++++++++ .../datacite/DOIDataCiteRegisterService.java | 57 ++++---- 3 files changed, 180 insertions(+), 40 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/DoiMetadata.java diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java index 43e34e74c59..02a7dedce47 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java @@ -91,31 +91,30 @@ public String getMetadataFromDvObject(String identifier, Map met } else { dataset = (Dataset) dvObject.getOwner(); } - - XmlMetadataTemplate metadataTemplate = new XmlMetadataTemplate(); - metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); - metadataTemplate.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); - metadataTemplate.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); + DoiMetadata doiMetadata = new DoiMetadata(); + doiMetadata.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); + doiMetadata.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); + doiMetadata.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); if (dvObject.isInstanceofDataset()) { - metadataTemplate.setDescription(dataset.getLatestVersion().getDescriptionPlainText()); + doiMetadata.setDescription(dataset.getLatestVersion().getDescriptionPlainText()); } if (dvObject.isInstanceofDataFile()) { DataFile df = (DataFile) dvObject; String fileDescription = df.getDescription(); - metadataTemplate.setDescription(fileDescription == null ? "" : fileDescription); + doiMetadata.setDescription(fileDescription == null ? "" : fileDescription); } - metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); - metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers()); - metadataTemplate.setTitle(dvObject.getCurrentName()); + doiMetadata.setContacts(dataset.getLatestVersion().getDatasetContacts()); + doiMetadata.setProducers(dataset.getLatestVersion().getDatasetProducers()); + doiMetadata.setTitle(dvObject.getCurrentName()); String producerString = pidProviderService.getProducer(); if (producerString.isEmpty() || producerString.equals(DatasetField.NA_VALUE)) { producerString = UNAVAILABLE; } - metadataTemplate.setPublisher(producerString); - metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); + doiMetadata.setPublisher(producerString); + doiMetadata.setPublisherYear(metadata.get("datacite.publicationyear")); - String xmlMetadata = metadataTemplate.generateXML(dvObject); + String xmlMetadata = new XmlMetadataTemplate(doiMetadata).generateXML(dvObject); logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); return xmlMetadata; } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/DoiMetadata.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/DoiMetadata.java new file mode 100644 index 00000000000..ffd24747bc2 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/DoiMetadata.java @@ -0,0 +1,138 @@ +package edu.harvard.iq.dataverse.pidproviders.doi; + +import java.util.ArrayList; +import java.util.List; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import edu.harvard.iq.dataverse.DatasetAuthor; + + +//Parses some specific parts of a DataCite XML metadata file +public class DoiMetadata { + + private String identifier; + private List creators; + private String title; + private String publisher; + private String publisherYear; + private List datafileIdentifiers; + private List authors; + private String description; + private List contacts; + private List producers; + + + public DoiMetadata() { + } + + public void parseDataCiteXML(String xmlMetaData) { + Document doc = Jsoup.parseBodyFragment(xmlMetaData); + Elements identifierElements = doc.select("identifier"); + if (identifierElements.size() > 0) { + identifier = identifierElements.get(0).html(); + } + Elements creatorElements = doc.select("creatorName"); + creators = new ArrayList<>(); + for (Element creatorElement : creatorElements) { + creators.add(creatorElement.html()); + } + Elements titleElements = doc.select("title"); + if (titleElements.size() > 0) { + title = titleElements.get(0).html(); + } + Elements publisherElements = doc.select("publisher"); + if (publisherElements.size() > 0) { + publisher = publisherElements.get(0).html(); + } + Elements publisherYearElements = doc.select("publicationYear"); + if (publisherYearElements.size() > 0) { + publisherYear = publisherYearElements.get(0).html(); + } + } + + public String getIdentifier() { + return identifier; + } + + public void setIdentifier(String identifier) { + this.identifier = identifier; + } + + public List getCreators() { + return creators; + } + + public void setCreators(List creators) { + this.creators = creators; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getPublisher() { + return publisher; + } + + public void setPublisher(String publisher) { + this.publisher = publisher; + } + + public String getPublisherYear() { + return publisherYear; + } + + public void setPublisherYear(String publisherYear) { + this.publisherYear = publisherYear; + } + + + public List getProducers() { + return producers; + } + + public void setProducers(List producers) { + this.producers = producers; + } + + public List getContacts() { + return contacts; + } + + public void setContacts(List contacts) { + this.contacts = contacts; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public List getAuthors() { + return authors; + } + + public void setAuthors(List authors) { + this.authors = authors; + } + + + public List getDatafileIdentifiers() { + return datafileIdentifiers; + } + + public void setDatafileIdentifiers(List datafileIdentifiers) { + this.datafileIdentifiers = datafileIdentifiers; + } + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java index 0e322eace05..bc69275ac1d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java @@ -21,6 +21,7 @@ import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.DoiMetadata; import edu.harvard.iq.dataverse.pidproviders.doi.XmlMetadataTemplate; /** @@ -90,28 +91,28 @@ public static String getMetadataFromDvObject(String identifier, Map from HTML, it leaves '&' (at least so we need to xml escape as well String description = StringEscapeUtils.escapeXml10(dataset.getLatestVersion().getDescriptionPlainText()); if (description.isEmpty() || description.equals(DatasetField.NA_VALUE)) { description = AbstractPidProvider.UNAVAILABLE; } - metadataTemplate.setDescription(description); + doiMetadata.setDescription(description); } if (dvObject.isInstanceofDataFile()) { DataFile df = (DataFile) dvObject; //Note: File metadata is not escaped like dataset metadata is, so adding an xml escape here. //This could/should be removed if the datafile methods add escaping String fileDescription = StringEscapeUtils.escapeXml10(df.getDescription()); - metadataTemplate.setDescription(fileDescription == null ? AbstractPidProvider.UNAVAILABLE : fileDescription); + doiMetadata.setDescription(fileDescription == null ? AbstractPidProvider.UNAVAILABLE : fileDescription); } - metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); - metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers()); + doiMetadata.setContacts(dataset.getLatestVersion().getDatasetContacts()); + doiMetadata.setProducers(dataset.getLatestVersion().getDatasetProducers()); String title = dvObject.getCurrentName(); if(dvObject.isInstanceofDataFile()) { //Note file title is not currently escaped the way the dataset title is, so adding it here. @@ -122,40 +123,41 @@ public static String getMetadataFromDvObject(String identifier, Map metadata, DvObject dvObject) { - XmlMetadataTemplate metadataTemplate = new XmlMetadataTemplate(); - metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); - metadataTemplate.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); + DoiMetadata doiMetadata = new DoiMetadata(); + + doiMetadata.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); + doiMetadata.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); - metadataTemplate.setDescription(AbstractPidProvider.UNAVAILABLE); + doiMetadata.setDescription(AbstractPidProvider.UNAVAILABLE); String title =metadata.get("datacite.title"); System.out.print("Map metadata title: "+ metadata.get("datacite.title")); - metadataTemplate.setAuthors(null); + doiMetadata.setAuthors(null); - metadataTemplate.setTitle(title); + doiMetadata.setTitle(title); String producerString = AbstractPidProvider.UNAVAILABLE; - metadataTemplate.setPublisher(producerString); - metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); + doiMetadata.setPublisher(producerString); + doiMetadata.setPublisherYear(metadata.get("datacite.publicationyear")); - String xmlMetadata = metadataTemplate.generateXML(dvObject); + String xmlMetadata = new XmlMetadataTemplate(doiMetadata).generateXML(dvObject); logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); return xmlMetadata; } @@ -209,11 +211,12 @@ Map getMetadata(String identifier) throws IOException { Map metadata = new HashMap<>(); try { String xmlMetadata = client.getMetadata(identifier.substring(identifier.indexOf(":") + 1)); - XmlMetadataTemplate template = new XmlMetadataTemplate(xmlMetadata); - metadata.put("datacite.creator", String.join("; ", template.getCreators())); - metadata.put("datacite.title", template.getTitle()); - metadata.put("datacite.publisher", template.getPublisher()); - metadata.put("datacite.publicationyear", template.getPublisherYear()); + DoiMetadata doiMetadata = new DoiMetadata(); + doiMetadata.parseDataCiteXML(xmlMetadata); + metadata.put("datacite.creator", String.join("; ", doiMetadata.getCreators())); + metadata.put("datacite.title", doiMetadata.getTitle()); + metadata.put("datacite.publisher", doiMetadata.getPublisher()); + metadata.put("datacite.publicationyear", doiMetadata.getPublisherYear()); } catch (RuntimeException e) { logger.log(Level.INFO, identifier, e); } From 68792c2f92c90f716f39caaa5f76b652592186c0 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 10:31:37 -0400 Subject: [PATCH 072/486] extract some common xml writing util code --- .../dataverse/export/ddi/DdiExportUtil.java | 486 ++++++------------ .../iq/dataverse/util/xml/XmlWriterUtil.java | 174 +++++++ 2 files changed, 340 insertions(+), 320 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 9a689f7a4ed..0c861cb6c09 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -24,6 +24,8 @@ import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.xml.XmlPrinter; +import edu.harvard.iq.dataverse.util.xml.XmlWriterUtil; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; @@ -111,9 +113,9 @@ private static void dtoddi(DatasetDTO datasetDto, OutputStream outputStream) thr xmlw.writeDefaultNamespace("ddi:codebook:2_5"); xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); xmlw.writeAttribute("xsi:schemaLocation", DDIExporter.DEFAULT_XML_NAMESPACE + " " + DDIExporter.DEFAULT_XML_SCHEMALOCATION); - writeAttribute(xmlw, "version", DDIExporter.DEFAULT_XML_VERSION); + xmlw.writeAttribute("version", DDIExporter.DEFAULT_XML_VERSION); if(DvObjectContainer.isMetadataLanguageSet(datasetDto.getMetadataLanguage())) { - writeAttribute(xmlw, "xml:lang", datasetDto.getMetadataLanguage()); + xmlw.writeAttribute("xml:lang", datasetDto.getMetadataLanguage()); } createStdyDscr(xmlw, datasetDto); createOtherMats(xmlw, datasetDto.getDatasetVersion().getFiles()); @@ -133,9 +135,9 @@ public static void datasetJson2ddi(JsonObject datasetDtoAsJson, JsonArray fileDe xmlw.writeDefaultNamespace("ddi:codebook:2_5"); xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); xmlw.writeAttribute("xsi:schemaLocation", DDIExporter.DEFAULT_XML_NAMESPACE + " " + DDIExporter.DEFAULT_XML_SCHEMALOCATION); - writeAttribute(xmlw, "version", DDIExporter.DEFAULT_XML_VERSION); + xmlw.writeAttribute("version", DDIExporter.DEFAULT_XML_VERSION); if(DvObjectContainer.isMetadataLanguageSet(datasetDto.getMetadataLanguage())) { - writeAttribute(xmlw, "xml:lang", datasetDto.getMetadataLanguage()); + xmlw.writeAttribute("xml:lang", datasetDto.getMetadataLanguage()); } createStdyDscr(xmlw, datasetDto); createFileDscr(xmlw, fileDetails); @@ -186,15 +188,15 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) xmlw.writeStartElement("citation"); xmlw.writeStartElement("titlStmt"); - writeFullElement(xmlw, "titl", dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); - writeFullElement(xmlw, "subTitl", dto2Primitive(version, DatasetFieldConstant.subTitle)); + XmlWriterUtil.writeFullElement(xmlw, "titl", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); + XmlWriterUtil.writeFullElement(xmlw, "subTitl", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.subTitle)); FieldDTO altField = dto2FieldDTO( version, DatasetFieldConstant.alternativeTitle, "citation" ); if (altField != null) { writeMultipleElement(xmlw, "altTitl", altField, datasetDto.getMetadataLanguage()); } xmlw.writeStartElement("IDNo"); - writeAttribute(xmlw, "agency", persistentAgency); + XmlWriterUtil.writeAttribute(xmlw, "agency", persistentAgency); xmlw.writeCharacters(pid); @@ -218,23 +220,23 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) boolean excludeRepository = settingsService.isTrueForKey(SettingsServiceBean.Key.ExportInstallationAsDistributorOnlyWhenNotSet, false); if (!StringUtils.isEmpty(datasetDto.getPublisher()) && !(excludeRepository && distributorSet)) { xmlw.writeStartElement("distrbtr"); - writeAttribute(xmlw, "source", "archive"); + xmlw.writeAttribute("source", "archive"); xmlw.writeCharacters(datasetDto.getPublisher()); xmlw.writeEndElement(); //distrbtr } writeDistributorsElement(xmlw, version, datasetDto.getMetadataLanguage()); writeContactsElement(xmlw, version); /* per SCHEMA, depositr comes before depDate! - L.A. */ - writeFullElement(xmlw, "depositr", dto2Primitive(version, DatasetFieldConstant.depositor)); + XmlWriterUtil.writeFullElement(xmlw, "depositr", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.depositor)); /* ... and depDate comes before distDate - L.A. */ - writeFullElement(xmlw, "depDate", dto2Primitive(version, DatasetFieldConstant.dateOfDeposit)); - writeFullElement(xmlw, "distDate", dto2Primitive(version, DatasetFieldConstant.distributionDate)); + XmlWriterUtil.writeFullElement(xmlw, "depDate", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.dateOfDeposit)); + XmlWriterUtil.writeFullElement(xmlw, "distDate", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.distributionDate)); xmlw.writeEndElement(); // diststmt writeSeriesElement(xmlw, version); xmlw.writeStartElement("holdings"); - writeAttribute(xmlw, "URI", pidUri); + XmlWriterUtil.writeAttribute(xmlw, "URI", pidUri); xmlw.writeEndElement(); //holdings xmlw.writeEndElement(); // citation @@ -247,7 +249,7 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) writeSubjectElement(xmlw, version, datasetDto.getMetadataLanguage()); //Subject and Keywords writeAbstractElement(xmlw, version, datasetDto.getMetadataLanguage()); // Description writeSummaryDescriptionElement(xmlw, version, datasetDto.getMetadataLanguage()); - writeFullElement(xmlw, "notes", dto2Primitive(version, DatasetFieldConstant.notesText)); + XmlWriterUtil.writeFullElement(xmlw, "notes", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.notesText)); //////// xmlw.writeEndElement(); // stdyInfo @@ -255,7 +257,7 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) writeDataAccess(xmlw , version); writeOtherStudyMaterial(xmlw , version); - writeFullElement(xmlw, "notes", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); + XmlWriterUtil.writeFullElement(xmlw, "notes", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); xmlw.writeEndElement(); // stdyDscr @@ -274,10 +276,10 @@ private static void writeOtherStudyMaterial(XMLStreamWriter xmlw , DatasetVersio return; } xmlw.writeStartElement("othrStdyMat"); - writeFullElementList(xmlw, "relMat", relMaterials); - writeFullElementList(xmlw, "relStdy", relDatasets); + XmlWriterUtil.writeFullElementList(xmlw, "relMat", relMaterials); + XmlWriterUtil.writeFullElementList(xmlw, "relStdy", relDatasets); writeRelPublElement(xmlw, version); - writeFullElementList(xmlw, "othRefs", relReferences); + XmlWriterUtil.writeFullElementList(xmlw, "othRefs", relReferences); xmlw.writeEndElement(); //othrStdyMat } @@ -292,29 +294,29 @@ private static void writeDataAccess(XMLStreamWriter xmlw , DatasetVersionDTO ver xmlw.writeStartElement("dataAccs"); xmlw.writeStartElement("setAvail"); - writeFullElement(xmlw, "accsPlac", version.getDataAccessPlace()); - writeFullElement(xmlw, "origArch", version.getOriginalArchive()); - writeFullElement(xmlw, "avlStatus", version.getAvailabilityStatus()); - writeFullElement(xmlw, "collSize", version.getSizeOfCollection()); - writeFullElement(xmlw, "complete", version.getStudyCompletion()); + XmlWriterUtil.writeFullElement(xmlw, "accsPlac", version.getDataAccessPlace()); + XmlWriterUtil.writeFullElement(xmlw, "origArch", version.getOriginalArchive()); + XmlWriterUtil.writeFullElement(xmlw, "avlStatus", version.getAvailabilityStatus()); + XmlWriterUtil.writeFullElement(xmlw, "collSize", version.getSizeOfCollection()); + XmlWriterUtil.writeFullElement(xmlw, "complete", version.getStudyCompletion()); xmlw.writeEndElement(); //setAvail xmlw.writeStartElement("useStmt"); - writeFullElement(xmlw, "confDec", version.getConfidentialityDeclaration()); - writeFullElement(xmlw, "specPerm", version.getSpecialPermissions()); - writeFullElement(xmlw, "restrctn", version.getRestrictions()); - writeFullElement(xmlw, "contact", version.getContactForAccess()); - writeFullElement(xmlw, "citReq", version.getCitationRequirements()); - writeFullElement(xmlw, "deposReq", version.getDepositorRequirements()); - writeFullElement(xmlw, "conditions", version.getConditions()); - writeFullElement(xmlw, "disclaimer", version.getDisclaimer()); + XmlWriterUtil.writeFullElement(xmlw, "confDec", version.getConfidentialityDeclaration()); + XmlWriterUtil.writeFullElement(xmlw, "specPerm", version.getSpecialPermissions()); + XmlWriterUtil.writeFullElement(xmlw, "restrctn", version.getRestrictions()); + XmlWriterUtil.writeFullElement(xmlw, "contact", version.getContactForAccess()); + XmlWriterUtil.writeFullElement(xmlw, "citReq", version.getCitationRequirements()); + XmlWriterUtil.writeFullElement(xmlw, "deposReq", version.getDepositorRequirements()); + XmlWriterUtil.writeFullElement(xmlw, "conditions", version.getConditions()); + XmlWriterUtil.writeFullElement(xmlw, "disclaimer", version.getDisclaimer()); xmlw.writeEndElement(); //useStmt /* any s: */ if (version.getTermsOfAccess() != null && !version.getTermsOfAccess().trim().equals("")) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "type", NOTE_TYPE_TERMS_OF_ACCESS); - writeAttribute(xmlw, "level", LEVEL_DV); + xmlw.writeAttribute("type", NOTE_TYPE_TERMS_OF_ACCESS); + xmlw.writeAttribute("level", LEVEL_DV); xmlw.writeCharacters(version.getTermsOfAccess()); xmlw.writeEndElement(); //notes } @@ -341,9 +343,9 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase xmlw.writeStartElement("docDscr"); xmlw.writeStartElement("citation"); xmlw.writeStartElement("titlStmt"); - writeFullElement(xmlw, "titl", dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); + XmlWriterUtil.writeFullElement(xmlw, "titl", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); xmlw.writeStartElement("IDNo"); - writeAttribute(xmlw, "agency", persistentAgency); + XmlWriterUtil.writeAttribute(xmlw, "agency", persistentAgency); xmlw.writeCharacters(persistentProtocol + ":" + persistentAuthority + "/" + persistentId); xmlw.writeEndElement(); // IDNo xmlw.writeEndElement(); // titlStmt @@ -351,11 +353,11 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase //The doc is always published by the Dataverse Repository if (!StringUtils.isEmpty(datasetDto.getPublisher())) { xmlw.writeStartElement("distrbtr"); - writeAttribute(xmlw, "source", "archive"); + xmlw.writeAttribute("source", "archive"); xmlw.writeCharacters(datasetDto.getPublisher()); xmlw.writeEndElement(); // distrbtr } - writeFullElement(xmlw, "distDate", datasetDto.getPublicationDate()); + XmlWriterUtil.writeFullElement(xmlw, "distDate", datasetDto.getPublicationDate()); xmlw.writeEndElement(); // diststmt writeVersionStatement(xmlw, version); @@ -369,10 +371,10 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase private static void writeVersionStatement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException{ xmlw.writeStartElement("verStmt"); - writeAttribute(xmlw,"source","archive"); + XmlWriterUtil.writeAttribute(xmlw,"source","archive"); xmlw.writeStartElement("version"); - writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10)); - writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); + XmlWriterUtil.writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10)); + XmlWriterUtil.writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); xmlw.writeCharacters(datasetVersionDTO.getVersionNumber().toString()); xmlw.writeEndElement(); // version xmlw.writeEndElement(); // verStmt @@ -523,14 +525,14 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset * "" entries, then all the "" ones: */ for (String nationEntry : nationList) { - writeFullElement(xmlw, "nation", nationEntry); + XmlWriterUtil.writeFullElement(xmlw, "nation", nationEntry); } for (String geogCoverEntry : geogCoverList) { - writeFullElement(xmlw, "geogCover", geogCoverEntry); + XmlWriterUtil.writeFullElement(xmlw, "geogCover", geogCoverEntry); } } - writeFullElementList(xmlw, "geogUnit", dto2PrimitiveList(datasetVersionDTO, DatasetFieldConstant.geographicUnit)); + XmlWriterUtil.writeFullElementList(xmlw, "geogUnit", dto2PrimitiveList(datasetVersionDTO, DatasetFieldConstant.geographicUnit)); /* Only 1 geoBndBox is allowed in the DDI. So, I'm just going to arbitrarily use the first one, and ignore the rest! -L.A. */ @@ -563,16 +565,16 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset */ if (geoBndBoxMap.get("westBL") != null) { - writeFullElement(xmlw, "westBL", geoBndBoxMap.get("westBL")); + XmlWriterUtil.writeFullElement(xmlw, "westBL", geoBndBoxMap.get("westBL")); } if (geoBndBoxMap.get("eastBL") != null) { - writeFullElement(xmlw, "eastBL", geoBndBoxMap.get("eastBL")); + XmlWriterUtil.writeFullElement(xmlw, "eastBL", geoBndBoxMap.get("eastBL")); } if (geoBndBoxMap.get("southBL") != null) { - writeFullElement(xmlw, "southBL", geoBndBoxMap.get("southBL")); + XmlWriterUtil.writeFullElement(xmlw, "southBL", geoBndBoxMap.get("southBL")); } if (geoBndBoxMap.get("northBL") != null) { - writeFullElement(xmlw, "northBL", geoBndBoxMap.get("northBL")); + XmlWriterUtil.writeFullElement(xmlw, "northBL", geoBndBoxMap.get("northBL")); } xmlw.writeEndElement(); @@ -580,7 +582,7 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset /* analyUnit: */ if (unitOfAnalysisDTO != null) { - writeI18NElementList(xmlw, "anlyUnit", unitOfAnalysisDTO.getMultipleVocab(), "unitOfAnalysis", unitOfAnalysisDTO.getTypeClass(), "socialscience", lang); + XmlWriterUtil.writeI18NElementList(xmlw, "anlyUnit", unitOfAnalysisDTO.getMultipleVocab(), "unitOfAnalysis", unitOfAnalysisDTO.getTypeClass(), "socialscience", lang); } @@ -600,16 +602,16 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset private static void writeMultipleElement(XMLStreamWriter xmlw, String element, FieldDTO fieldDTO, String lang) throws XMLStreamException { for (String value : fieldDTO.getMultiplePrimitive()) { //Write multiple lang vals for controlled vocab, otherwise don't include any lang tag - writeFullElement(xmlw, element, value, fieldDTO.isControlledVocabularyField() ? lang : null); + XmlWriterUtil.writeFullElement(xmlw, element, value, fieldDTO.isControlledVocabularyField() ? lang : null); } } private static void writeDateElement(XMLStreamWriter xmlw, String element, String cycle, String event, String dateIn) throws XMLStreamException { xmlw.writeStartElement(element); - writeAttribute(xmlw, "cycle", cycle); - writeAttribute(xmlw, "event", event); - writeAttribute(xmlw, "date", dateIn); + XmlWriterUtil.writeAttribute(xmlw, "cycle", cycle); + XmlWriterUtil.writeAttribute(xmlw, "event", event); + XmlWriterUtil.writeAttribute(xmlw, "date", dateIn); xmlw.writeCharacters(dateIn); xmlw.writeEndElement(); @@ -641,15 +643,15 @@ private static void writeDateElement(XMLStreamWriter xmlw, String element, Strin private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO version, String lang) throws XMLStreamException{ xmlw.writeStartElement("method"); xmlw.writeStartElement("dataColl"); - writeI18NElement(xmlw, "timeMeth", version, DatasetFieldConstant.timeMethod,lang); - writeI18NElement(xmlw, "dataCollector", version, DatasetFieldConstant.dataCollector, lang); - writeI18NElement(xmlw, "collectorTraining", version, DatasetFieldConstant.collectorTraining, lang); - writeI18NElement(xmlw, "frequenc", version, DatasetFieldConstant.frequencyOfDataCollection, lang); - writeI18NElement(xmlw, "sampProc", version, DatasetFieldConstant.samplingProcedure, lang); + XmlWriterUtil.writeI18NElement(xmlw, "timeMeth", version, DatasetFieldConstant.timeMethod,lang); + XmlWriterUtil.writeI18NElement(xmlw, "dataCollector", version, DatasetFieldConstant.dataCollector, lang); + XmlWriterUtil.writeI18NElement(xmlw, "collectorTraining", version, DatasetFieldConstant.collectorTraining, lang); + XmlWriterUtil.writeI18NElement(xmlw, "frequenc", version, DatasetFieldConstant.frequencyOfDataCollection, lang); + XmlWriterUtil.writeI18NElement(xmlw, "sampProc", version, DatasetFieldConstant.samplingProcedure, lang); writeTargetSampleElement(xmlw, version); - writeI18NElement(xmlw, "deviat", version, DatasetFieldConstant.deviationsFromSampleDesign, lang); + XmlWriterUtil.writeI18NElement(xmlw, "deviat", version, DatasetFieldConstant.deviationsFromSampleDesign, lang); /* comes before : */ FieldDTO collModeFieldDTO = dto2FieldDTO(version, DatasetFieldConstant.collectionMode, "socialscience"); @@ -658,37 +660,37 @@ private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO // Below is a backward compatibility check allowing export to work in // an instance where the metadata block has not been updated yet. if (collModeFieldDTO.getMultiple()) { - writeI18NElementList(xmlw, "collMode", collModeFieldDTO.getMultipleVocab(), DatasetFieldConstant.collectionMode, collModeFieldDTO.getTypeClass(), "socialscience", lang); + XmlWriterUtil.writeI18NElementList(xmlw, "collMode", collModeFieldDTO.getMultipleVocab(), DatasetFieldConstant.collectionMode, collModeFieldDTO.getTypeClass(), "socialscience", lang); } else { - writeI18NElement(xmlw, "collMode", version, DatasetFieldConstant.collectionMode, lang); + XmlWriterUtil.writeI18NElement(xmlw, "collMode", version, DatasetFieldConstant.collectionMode, lang); } } /* and so does : */ - writeI18NElement(xmlw, "resInstru", version, DatasetFieldConstant.researchInstrument, lang); + XmlWriterUtil.writeI18NElement(xmlw, "resInstru", version, DatasetFieldConstant.researchInstrument, lang); xmlw.writeStartElement("sources"); - writeFullElementList(xmlw, "dataSrc", dto2PrimitiveList(version, DatasetFieldConstant.dataSources)); - writeI18NElement(xmlw, "srcOrig", version, DatasetFieldConstant.originOfSources, lang); - writeI18NElement(xmlw, "srcChar", version, DatasetFieldConstant.characteristicOfSources, lang); - writeI18NElement(xmlw, "srcDocu", version, DatasetFieldConstant.accessToSources, lang); + XmlWriterUtil.writeFullElementList(xmlw, "dataSrc", dto2PrimitiveList(version, DatasetFieldConstant.dataSources)); + XmlWriterUtil.writeI18NElement(xmlw, "srcOrig", version, DatasetFieldConstant.originOfSources, lang); + XmlWriterUtil.writeI18NElement(xmlw, "srcChar", version, DatasetFieldConstant.characteristicOfSources, lang); + XmlWriterUtil.writeI18NElement(xmlw, "srcDocu", version, DatasetFieldConstant.accessToSources, lang); xmlw.writeEndElement(); //sources - writeI18NElement(xmlw, "collSitu", version, DatasetFieldConstant.dataCollectionSituation, lang); - writeI18NElement(xmlw, "actMin", version, DatasetFieldConstant.actionsToMinimizeLoss, lang); + XmlWriterUtil.writeI18NElement(xmlw, "collSitu", version, DatasetFieldConstant.dataCollectionSituation, lang); + XmlWriterUtil.writeI18NElement(xmlw, "actMin", version, DatasetFieldConstant.actionsToMinimizeLoss, lang); /* "" has the uppercase C: */ - writeI18NElement(xmlw, "ConOps", version, DatasetFieldConstant.controlOperations, lang); - writeI18NElement(xmlw, "weight", version, DatasetFieldConstant.weighting, lang); - writeI18NElement(xmlw, "cleanOps", version, DatasetFieldConstant.cleaningOperations, lang); + XmlWriterUtil.writeI18NElement(xmlw, "ConOps", version, DatasetFieldConstant.controlOperations, lang); + XmlWriterUtil.writeI18NElement(xmlw, "weight", version, DatasetFieldConstant.weighting, lang); + XmlWriterUtil.writeI18NElement(xmlw, "cleanOps", version, DatasetFieldConstant.cleaningOperations, lang); xmlw.writeEndElement(); //dataColl /* before : */ writeNotesElement(xmlw, version); xmlw.writeStartElement("anlyInfo"); - //writeFullElement(xmlw, "anylInfo", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); - writeI18NElement(xmlw, "respRate", version, DatasetFieldConstant.responseRate, lang); - writeI18NElement(xmlw, "EstSmpErr", version, DatasetFieldConstant.samplingErrorEstimates, lang); - writeI18NElement(xmlw, "dataAppr", version, DatasetFieldConstant.otherDataAppraisal, lang); + //XmlWriterUtil.writeFullElement(xmlw, "anylInfo", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); + XmlWriterUtil.writeI18NElement(xmlw, "respRate", version, DatasetFieldConstant.responseRate, lang); + XmlWriterUtil.writeI18NElement(xmlw, "EstSmpErr", version, DatasetFieldConstant.samplingErrorEstimates, lang); + XmlWriterUtil.writeI18NElement(xmlw, "dataAppr", version, DatasetFieldConstant.otherDataAppraisal, lang); xmlw.writeEndElement(); //anlyInfo xmlw.writeEndElement();//method @@ -705,7 +707,7 @@ private static void writeSubjectElement(XMLStreamWriter xmlw, DatasetVersionDTO if (CITATION_BLOCK_NAME.equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.subject.equals(fieldDTO.getTypeName())) { - writeI18NElementList(xmlw, "keyword", fieldDTO.getMultipleVocab(), "subject", + XmlWriterUtil.writeI18NElementList(xmlw, "keyword", fieldDTO.getMultipleVocab(), "subject", fieldDTO.getTypeClass(), "citation", lang); } @@ -732,14 +734,10 @@ private static void writeSubjectElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!keywordValue.isEmpty()) { xmlw.writeStartElement("keyword"); - if (!keywordVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", keywordVocab); - } - if (!keywordURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", keywordURI); - } + XmlWriterUtil.writeAttribute(xmlw, "vocab", keywordVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", keywordURI); if (lang != null && isCVV) { - writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); xmlw.writeCharacters(ControlledVocabularyValue.getLocaleStrValue(keywordValue, DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, defaultLocale, true)); @@ -753,13 +751,9 @@ DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, new Locale(lang), false); if (translatedValue != null) { xmlw.writeStartElement("keyword"); - if (!keywordVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", keywordVocab); - } - if (!keywordURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", keywordURI); - } - writeAttribute(xmlw, "xml:lang", lang); + XmlWriterUtil.writeAttribute(xmlw, "vocab", keywordVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", keywordURI); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", lang); xmlw.writeCharacters(translatedValue); xmlw.writeEndElement(); // Keyword } @@ -792,14 +786,10 @@ DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, new Locale(lang), } if (!topicClassificationValue.isEmpty()) { xmlw.writeStartElement("topcClas"); - if (!topicClassificationVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", topicClassificationVocab); - } - if (!topicClassificationURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", topicClassificationURI); - } + XmlWriterUtil.writeAttribute(xmlw, "vocab", topicClassificationVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", topicClassificationURI); if (lang != null && isCVV) { - writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); xmlw.writeCharacters(ControlledVocabularyValue.getLocaleStrValue( topicClassificationValue, DatasetFieldConstant.topicClassValue, CITATION_BLOCK_NAME, defaultLocale, true)); @@ -813,13 +803,9 @@ DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, new Locale(lang), CITATION_BLOCK_NAME, new Locale(lang), false); if (translatedValue != null) { xmlw.writeStartElement("topcClas"); - if (!topicClassificationVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", topicClassificationVocab); - } - if (!topicClassificationURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", topicClassificationURI); - } - writeAttribute(xmlw, "xml:lang", lang); + XmlWriterUtil.writeAttribute(xmlw, "vocab", topicClassificationVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", topicClassificationURI); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", lang); xmlw.writeCharacters(translatedValue); xmlw.writeEndElement(); // topcClas } @@ -857,7 +843,7 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO if (!authorName.isEmpty()){ xmlw.writeStartElement("AuthEnty"); if(!authorAffiliation.isEmpty()){ - writeAttribute(xmlw,"affiliation",authorAffiliation); + XmlWriterUtil.writeAttribute(xmlw,"affiliation",authorAffiliation); } xmlw.writeCharacters(authorName); xmlw.writeEndElement(); //AuthEnty @@ -880,7 +866,7 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO if (!contributorName.isEmpty()){ xmlw.writeStartElement("othId"); if(!contributorType.isEmpty()){ - writeAttribute(xmlw,"role", contributorType); + XmlWriterUtil.writeAttribute(xmlw,"role", contributorType); } xmlw.writeCharacters(contributorName); xmlw.writeEndElement(); //othId @@ -922,10 +908,10 @@ private static void writeContactsElement(XMLStreamWriter xmlw, DatasetVersionDTO if (!datasetContactName.isEmpty()){ xmlw.writeStartElement("contact"); if(!datasetContactAffiliation.isEmpty()){ - writeAttribute(xmlw,"affiliation",datasetContactAffiliation); + XmlWriterUtil.writeAttribute(xmlw,"affiliation",datasetContactAffiliation); } if(!datasetContactEmail.isEmpty()){ - writeAttribute(xmlw,"email",datasetContactEmail); + XmlWriterUtil.writeAttribute(xmlw,"email",datasetContactEmail); } xmlw.writeCharacters(datasetContactName); xmlw.writeEndElement(); //AuthEnty @@ -969,14 +955,10 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT } if (!producerName.isEmpty()) { xmlw.writeStartElement("producer"); - if (!producerAffiliation.isEmpty()) { - writeAttribute(xmlw, "affiliation", producerAffiliation); - } - if (!producerAbbreviation.isEmpty()) { - writeAttribute(xmlw, "abbr", producerAbbreviation); - } + XmlWriterUtil.writeAttribute(xmlw, "affiliation", producerAffiliation); + XmlWriterUtil.writeAttribute(xmlw, "abbr", producerAbbreviation); /*if (!producerLogo.isEmpty()) { - writeAttribute(xmlw, "role", producerLogo); + XmlWriterUtil.writeAttribute(xmlw, "role", producerLogo); }*/ xmlw.writeCharacters(producerName); xmlw.writeEndElement(); //AuthEnty @@ -987,7 +969,7 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT } } } - writeFullElement(xmlw, "prodDate", dto2Primitive(version, DatasetFieldConstant.productionDate)); + XmlWriterUtil.writeFullElement(xmlw, "prodDate", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.productionDate)); // productionPlace was made multiple as of 5.14: // (a quick backward compatibility check was added to dto2PrimitiveList(), // see the method for details) @@ -1033,17 +1015,11 @@ private static void writeDistributorsElement(XMLStreamWriter xmlw, DatasetVersio if (!distributorName.isEmpty()) { xmlw.writeStartElement("distrbtr"); if(DvObjectContainer.isMetadataLanguageSet(lang)) { - writeAttribute(xmlw, "xml:lang", lang); - } - if (!distributorAffiliation.isEmpty()) { - writeAttribute(xmlw, "affiliation", distributorAffiliation); - } - if (!distributorAbbreviation.isEmpty()) { - writeAttribute(xmlw, "abbr", distributorAbbreviation); - } - if (!distributorURL.isEmpty()) { - writeAttribute(xmlw, "URI", distributorURL); + xmlw.writeAttribute("xml:lang", lang); } + XmlWriterUtil.writeAttribute(xmlw, "affiliation", distributorAffiliation); + XmlWriterUtil.writeAttribute(xmlw, "abbr", distributorAbbreviation); + XmlWriterUtil.writeAttribute(xmlw, "URI", distributorURL); xmlw.writeCharacters(distributorName); xmlw.writeEndElement(); //AuthEnty } @@ -1102,7 +1078,7 @@ private static void writeRelPublElement(XMLStreamWriter xmlw, DatasetVersionDTO (In other words - titlStmt is mandatory! -L.A.) */ xmlw.writeStartElement("titlStmt"); - writeFullElement(xmlw, "titl", citation); + XmlWriterUtil.writeFullElement(xmlw, "titl", citation); if (IDNo != null && !IDNo.trim().equals("")) { xmlw.writeStartElement("IDNo"); @@ -1115,7 +1091,7 @@ private static void writeRelPublElement(XMLStreamWriter xmlw, DatasetVersionDTO xmlw.writeEndElement(); // titlStmt - writeFullElement(xmlw,"biblCit",citation); + XmlWriterUtil.writeFullElement(xmlw,"biblCit",citation); xmlw.writeEndElement(); //citation if (url != null && !url.trim().equals("") ) { xmlw.writeStartElement("ExtLink"); @@ -1164,10 +1140,10 @@ private static void writeAbstractElement(XMLStreamWriter xmlw, DatasetVersionDTO if (!descriptionText.isEmpty()){ xmlw.writeStartElement("abstract"); if(!descriptionDate.isEmpty()){ - writeAttribute(xmlw,"date",descriptionDate); + XmlWriterUtil.writeAttribute(xmlw,"date",descriptionDate); } if(DvObjectContainer.isMetadataLanguageSet(lang)) { - writeAttribute(xmlw, "xml:lang", lang); + xmlw.writeAttribute("xml:lang", lang); } xmlw.writeCharacters(descriptionText); xmlw.writeEndElement(); //abstract @@ -1201,7 +1177,7 @@ private static void writeGrantElement(XMLStreamWriter xmlw, DatasetVersionDTO da if (!grantNumber.isEmpty()){ xmlw.writeStartElement("grantNo"); if(!grantAgency.isEmpty()){ - writeAttribute(xmlw,"agency",grantAgency); + XmlWriterUtil.writeAttribute(xmlw,"agency",grantAgency); } xmlw.writeCharacters(grantNumber); xmlw.writeEndElement(); //grantno @@ -1235,7 +1211,7 @@ private static void writeOtherIdElement(XMLStreamWriter xmlw, DatasetVersionDTO if (!otherId.isEmpty()){ xmlw.writeStartElement("IDNo"); if(!otherIdAgency.isEmpty()){ - writeAttribute(xmlw,"agency",otherIdAgency); + XmlWriterUtil.writeAttribute(xmlw,"agency",otherIdAgency); } xmlw.writeCharacters(otherId); xmlw.writeEndElement(); //IDNo @@ -1269,7 +1245,7 @@ private static void writeSoftwareElement(XMLStreamWriter xmlw, DatasetVersionDTO if (!softwareName.isEmpty()){ xmlw.writeStartElement("software"); if(!softwareVersion.isEmpty()){ - writeAttribute(xmlw,"version",softwareVersion); + XmlWriterUtil.writeAttribute(xmlw,"version",softwareVersion); } xmlw.writeCharacters(softwareName); xmlw.writeEndElement(); //software @@ -1384,10 +1360,10 @@ private static void writeNotesElement(XMLStreamWriter xmlw, DatasetVersionDTO da if (!notesText.isEmpty()) { xmlw.writeStartElement("notes"); if(!notesType.isEmpty()){ - writeAttribute(xmlw,"type",notesType); + XmlWriterUtil.writeAttribute(xmlw,"type",notesType); } if(!notesSubject.isEmpty()){ - writeAttribute(xmlw,"subject",notesSubject); + XmlWriterUtil.writeAttribute(xmlw,"subject",notesSubject); } xmlw.writeCharacters(notesText); xmlw.writeEndElement(); @@ -1412,14 +1388,14 @@ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos // and observations, etc.) if (fileDTo.getDataFile().getDataTables() == null || fileDTo.getDataFile().getDataTables().isEmpty()) { xmlw.writeStartElement("otherMat"); - writeAttribute(xmlw, "ID", "f" + fileDTo.getDataFile().getId()); + XmlWriterUtil.writeAttribute(xmlw, "ID", "f" + fileDTo.getDataFile().getId()); String pidURL = fileDTo.getDataFile().getPidURL(); if (pidURL != null && !pidURL.isEmpty()){ - writeAttribute(xmlw, "URI", pidURL); + xmlw.writeAttribute("URI", pidURL); } else { - writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + fileDTo.getDataFile().getId()); + xmlw.writeAttribute("URI", dataverseUrl + "/api/access/datafile/" + fileDTo.getDataFile().getId()); } - writeAttribute(xmlw, "level", "datafile"); + xmlw.writeAttribute("level", "datafile"); xmlw.writeStartElement("labl"); xmlw.writeCharacters(fileDTo.getDataFile().getFilename()); xmlw.writeEndElement(); // labl @@ -1430,9 +1406,9 @@ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos String contentType = fileDTo.getDataFile().getContentType(); if (!StringUtilisEmpty(contentType)) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_CONTENTTYPE); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_CONTENTTYPE); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_CONTENTTYPE); + xmlw.writeAttribute("subject", NOTE_SUBJECT_CONTENTTYPE); xmlw.writeCharacters(contentType); xmlw.writeEndElement(); // notes } @@ -1460,14 +1436,14 @@ private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, JsonA // and observations, etc.) if (!fileJson.containsKey("dataTables")) { xmlw.writeStartElement("otherMat"); - writeAttribute(xmlw, "ID", "f" + fileJson.getJsonNumber(("id").toString())); + xmlw.writeAttribute("ID", "f" + fileJson.getJsonNumber(("id").toString())); if (fileJson.containsKey("pidUrl")){ - writeAttribute(xmlw, "URI", fileJson.getString("pidUrl")); + XmlWriterUtil.writeAttribute(xmlw, "URI", fileJson.getString("pidUrl")); } else { - writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + fileJson.getJsonNumber("id").toString()); + xmlw.writeAttribute("URI", dataverseUrl + "/api/access/datafile/" + fileJson.getJsonNumber("id").toString()); } - writeAttribute(xmlw, "level", "datafile"); + xmlw.writeAttribute("level", "datafile"); xmlw.writeStartElement("labl"); xmlw.writeCharacters(fileJson.getString("filename")); xmlw.writeEndElement(); // labl @@ -1482,9 +1458,9 @@ private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, JsonA // specially formatted notes section: if (fileJson.containsKey("contentType")) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_CONTENTTYPE); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_CONTENTTYPE); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_CONTENTTYPE); + xmlw.writeAttribute("subject", NOTE_SUBJECT_CONTENTTYPE); xmlw.writeCharacters(fileJson.getString("contentType")); xmlw.writeEndElement(); // notes } @@ -1502,33 +1478,7 @@ private static void writeFileDescription(XMLStreamWriter xmlw, FileDTO fileDTo) xmlw.writeEndElement(); // txt } - private static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { - for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { - MetadataBlockDTO value = entry.getValue(); - for (FieldDTO fieldDTO : value.getFields()) { - if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { - return fieldDTO.getSinglePrimitive(); - } - } - } - return null; - } - - private static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName, Locale locale) { - for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { - MetadataBlockDTO value = entry.getValue(); - for (FieldDTO fieldDTO : value.getFields()) { - if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { - String rawVal = fieldDTO.getSinglePrimitive(); - if (fieldDTO.isControlledVocabularyField()) { - return ControlledVocabularyValue.getLocaleStrValue(rawVal, datasetFieldTypeName, value.getName(), - locale, false); - } - } - } - } - return null; - } + private static List dto2PrimitiveList(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { @@ -1562,104 +1512,6 @@ private static FieldDTO dto2FieldDTO(DatasetVersionDTO datasetVersionDTO, String return null; } - private static void writeFullElementList(XMLStreamWriter xmlw, String name, List values) throws XMLStreamException { - //For the simplest Elements we can - if (values != null && !values.isEmpty()) { - for (String value : values) { - xmlw.writeStartElement(name); - xmlw.writeCharacters(value); - xmlw.writeEndElement(); // labl - } - } - } - - private static void writeI18NElementList(XMLStreamWriter xmlw, String name, List values, - String fieldTypeName, String fieldTypeClass, String metadataBlockName, String lang) - throws XMLStreamException { - - if (values != null && !values.isEmpty()) { - Locale defaultLocale = Locale.getDefault(); - for (String value : values) { - if (fieldTypeClass.equals("controlledVocabulary")) { - String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, defaultLocale, false); - if (localeVal != null) { - - value = localeVal; - writeFullElement(xmlw, name, value, defaultLocale.getLanguage()); - } else { - writeFullElement(xmlw, name, value); - } - } else { - writeFullElement(xmlw, name, value); - } - } - if (lang != null && !defaultLocale.getLanguage().equals(lang)) { - // Get values in dataset metadata language - // Loop before testing fieldTypeClass to be ready for external CVV - for (String value : values) { - if (fieldTypeClass.equals("controlledVocabulary")) { - String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, new Locale(lang), false); - if (localeVal != null) { - writeFullElement(xmlw, name, localeVal, lang); - } - } - } - } - } - } - - private static void writeI18NElement(XMLStreamWriter xmlw, String name, DatasetVersionDTO version, - String fieldTypeName, String lang) throws XMLStreamException { - // Get the default value - String val = dto2Primitive(version, fieldTypeName); - Locale defaultLocale = Locale.getDefault(); - // Get the language-specific value for the default language - // A null value is returned if this is not a CVV field - String localeVal = dto2Primitive(version, fieldTypeName, defaultLocale); - String requestedLocaleVal = null; - if (lang != null && localeVal != null && !defaultLocale.getLanguage().equals(lang)) { - // Also get the value in the requested locale/lang if that's not the default - // lang. - requestedLocaleVal = dto2Primitive(version, fieldTypeName, new Locale(lang)); - } - // FWIW locale-specific vals will only be non-null for CVV values (at present) - if (localeVal == null && requestedLocaleVal == null) { - // Not CVV/no translations so print without lang tag - writeFullElement(xmlw, name, val); - } else { - // Print in either/both languages if we have values - if (localeVal != null) { - // Print the value for the default locale with it's own lang tag - writeFullElement(xmlw, name, localeVal, defaultLocale.getLanguage()); - } - // Also print in the request lang (i.e. the metadata language for the dataset) if a value exists, print it with a lang tag - if (requestedLocaleVal != null) { - writeFullElement(xmlw, name, requestedLocaleVal, lang); - } - } - } - - private static void writeFullElement(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { - writeFullElement(xmlw, name, value, null); - } - - private static void writeFullElement (XMLStreamWriter xmlw, String name, String value, String lang) throws XMLStreamException { - //For the simplest Elements we can - if (!StringUtilisEmpty(value)) { - xmlw.writeStartElement(name); - if(DvObjectContainer.isMetadataLanguageSet(lang)) { - writeAttribute(xmlw, "xml:lang", lang); - } - xmlw.writeCharacters(value); - xmlw.writeEndElement(); // labl - } - } - - private static void writeAttribute(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { - if (!StringUtilisEmpty(value)) { - xmlw.writeAttribute(name, value); - } - } private static boolean StringUtilisEmpty(String str) { if (str == null || str.trim().equals("")) { @@ -1747,14 +1599,14 @@ public static void createDataDscr(XMLStreamWriter xmlw, JsonArray fileDetails) t } private static void createVarGroupDDI(XMLStreamWriter xmlw, JsonObject varGrp) throws XMLStreamException { xmlw.writeStartElement("varGrp"); - writeAttribute(xmlw, "ID", "VG" + varGrp.getJsonNumber("id").toString()); + xmlw.writeAttribute("ID", "VG" + varGrp.getJsonNumber("id").toString()); String vars = ""; JsonArray varsInGroup = varGrp.getJsonArray("dataVariableIds"); for (int j=0;j sumStat : dvar.getJsonObject("summaryStatistics").entrySet()) { xmlw.writeStartElement("sumStat"); - writeAttribute(xmlw, "type", sumStat.getKey()); + XmlWriterUtil.writeAttribute(xmlw, "type", sumStat.getKey()); xmlw.writeCharacters(((JsonString)sumStat.getValue()).getString()); xmlw.writeEndElement(); // sumStat } @@ -1917,7 +1769,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f JsonObject varCat = varCats.getJsonObject(i); xmlw.writeStartElement("catgry"); if (varCat.getBoolean("isMissing")) { - writeAttribute(xmlw, "missing", "Y"); + xmlw.writeAttribute("missing", "Y"); } // catValu @@ -1928,7 +1780,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // label if (varCat.containsKey("label")) { xmlw.writeStartElement("labl"); - writeAttribute(xmlw, "level", "category"); + xmlw.writeAttribute("level", "category"); xmlw.writeCharacters(varCat.getString("label")); xmlw.writeEndElement(); // labl } @@ -1936,7 +1788,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // catStat if (varCat.containsKey("frequency")) { xmlw.writeStartElement("catStat"); - writeAttribute(xmlw, "type", "freq"); + xmlw.writeAttribute("type", "freq"); Double freq = varCat.getJsonNumber("frequency").doubleValue(); // if frequency is actually a long value, we want to write "100" instead of // "100.0" @@ -1955,8 +1807,8 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f JsonObject cm = catMetas.getJsonObject(j); if (cm.getString("categoryValue").equals(varCat.getString("value"))) { xmlw.writeStartElement("catStat"); - writeAttribute(xmlw, "wgtd", "wgtd"); - writeAttribute(xmlw, "type", "freq"); + xmlw.writeAttribute("wgtd", "wgtd"); + xmlw.writeAttribute("type", "freq"); xmlw.writeCharacters(cm.getJsonNumber("wFreq").toString()); xmlw.writeEndElement(); // catStat break; @@ -1972,24 +1824,24 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // varFormat xmlw.writeEmptyElement("varFormat"); if(dvar.containsKey("variableFormatType")) { - writeAttribute(xmlw, "type", dvar.getString("variableFormatType").toLowerCase()); + XmlWriterUtil.writeAttribute(xmlw, "type", dvar.getString("variableFormatType").toLowerCase()); } else { throw new XMLStreamException("Illegal Variable Format Type!"); } if(dvar.containsKey("format")) { - writeAttribute(xmlw, "formatname", dvar.getString("format")); + XmlWriterUtil.writeAttribute(xmlw, "formatname", dvar.getString("format")); } //experiment writeAttribute(xmlw, "schema", dv.getFormatSchema()); if(dvar.containsKey("formatCategory")) { - writeAttribute(xmlw, "category", dvar.getString("formatCategory")); + XmlWriterUtil.writeAttribute(xmlw, "category", dvar.getString("formatCategory")); } // notes if (dvar.containsKey("UNF") && !dvar.getString("UNF").isBlank()) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "subject", "Universal Numeric Fingerprint"); - writeAttribute(xmlw, "level", "variable"); - writeAttribute(xmlw, "type", "Dataverse:UNF"); + xmlw.writeAttribute("subject", "Universal Numeric Fingerprint"); + xmlw.writeAttribute("level", "variable"); + xmlw.writeAttribute("type", "Dataverse:UNF"); xmlw.writeCharacters(dvar.getString("UNF")); xmlw.writeEndElement(); //notes } @@ -2020,8 +1872,8 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) } xmlw.writeStartElement("fileDscr"); String fileId = fileJson.getJsonNumber("id").toString(); - writeAttribute(xmlw, "ID", "f" + fileId); - writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + fileId); + xmlw.writeAttribute("ID", "f" + fileId); + xmlw.writeAttribute("URI", dataverseUrl + "/api/access/datafile/" + fileId); xmlw.writeStartElement("fileTxt"); xmlw.writeStartElement("fileName"); @@ -2064,9 +1916,9 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) // (Universal Numeric Fingerprint) signature: if ((dt!=null) && (dt.containsKey("UNF") && !dt.getString("UNF").isBlank())) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_UNF); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_UNF); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_UNF); + xmlw.writeAttribute("subject", NOTE_SUBJECT_UNF); xmlw.writeCharacters(dt.getString("UNF")); xmlw.writeEndElement(); // notes } @@ -2075,9 +1927,9 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) JsonArray tags = fileJson.getJsonArray("tabularTags"); for (int j = 0; j < tags.size(); j++) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_TAG); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_TAG); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_TAG); + xmlw.writeAttribute("subject", NOTE_SUBJECT_TAG); xmlw.writeCharacters(tags.getString(j)); xmlw.writeEndElement(); // notes } @@ -2091,13 +1943,7 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) - private static boolean checkParentElement(XMLStreamWriter xmlw, String elementName, boolean elementAdded) throws XMLStreamException { - if (!elementAdded) { - xmlw.writeStartElement(elementName); - } - return true; - } public static void datasetHtmlDDI(InputStream datafile, OutputStream outputStream) throws XMLStreamException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java new file mode 100644 index 00000000000..e932307d3d0 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java @@ -0,0 +1,174 @@ +package edu.harvard.iq.dataverse.util.xml; + +import java.util.List; +import java.util.Locale; +import java.util.Map; + +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; + +import org.apache.commons.lang3.StringUtils; + +import edu.harvard.iq.dataverse.ControlledVocabularyValue; +import edu.harvard.iq.dataverse.DvObjectContainer; +import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO; +import edu.harvard.iq.dataverse.api.dto.FieldDTO; +import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; + +public class XmlWriterUtil { + + public static void writeFullElementList(XMLStreamWriter xmlw, String name, List values) throws XMLStreamException { + // For the simplest Elements we can + if (values != null && !values.isEmpty()) { + for (String value : values) { + xmlw.writeStartElement(name); + xmlw.writeCharacters(value); + xmlw.writeEndElement(); // labl + } + } + } + + public static void writeI18NElementList(XMLStreamWriter xmlw, String name, List values, + String fieldTypeName, String fieldTypeClass, String metadataBlockName, String lang) + throws XMLStreamException { + + if (values != null && !values.isEmpty()) { + Locale defaultLocale = Locale.getDefault(); + for (String value : values) { + if (fieldTypeClass.equals("controlledVocabulary")) { + String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, defaultLocale, false); + if (localeVal != null) { + + value = localeVal; + writeFullElement(xmlw, name, value, defaultLocale.getLanguage()); + } else { + writeFullElement(xmlw, name, value); + } + } else { + writeFullElement(xmlw, name, value); + } + } + if (lang != null && !defaultLocale.getLanguage().equals(lang)) { + // Get values in dataset metadata language + // Loop before testing fieldTypeClass to be ready for external CVV + for (String value : values) { + if (fieldTypeClass.equals("controlledVocabulary")) { + String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, new Locale(lang), false); + if (localeVal != null) { + writeFullElement(xmlw, name, localeVal, lang); + } + } + } + } + } + } + + public static void writeI18NElement(XMLStreamWriter xmlw, String name, DatasetVersionDTO version, + String fieldTypeName, String lang) throws XMLStreamException { + // Get the default value + String val = dto2Primitive(version, fieldTypeName); + Locale defaultLocale = Locale.getDefault(); + // Get the language-specific value for the default language + // A null value is returned if this is not a CVV field + String localeVal = dto2Primitive(version, fieldTypeName, defaultLocale); + String requestedLocaleVal = null; + if (lang != null && localeVal != null && !defaultLocale.getLanguage().equals(lang)) { + // Also get the value in the requested locale/lang if that's not the default + // lang. + requestedLocaleVal = dto2Primitive(version, fieldTypeName, new Locale(lang)); + } + // FWIW locale-specific vals will only be non-null for CVV values (at present) + if (localeVal == null && requestedLocaleVal == null) { + // Not CVV/no translations so print without lang tag + writeFullElement(xmlw, name, val); + } else { + // Print in either/both languages if we have values + if (localeVal != null) { + // Print the value for the default locale with it's own lang tag + writeFullElement(xmlw, name, localeVal, defaultLocale.getLanguage()); + } + // Also print in the request lang (i.e. the metadata language for the dataset) + // if a value exists, print it with a lang tag + if (requestedLocaleVal != null) { + writeFullElement(xmlw, name, requestedLocaleVal, lang); + } + } + } + + public static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { + for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { + MetadataBlockDTO value = entry.getValue(); + for (FieldDTO fieldDTO : value.getFields()) { + if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { + return fieldDTO.getSinglePrimitive(); + } + } + } + return null; + } + + public static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName, Locale locale) { + for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { + MetadataBlockDTO value = entry.getValue(); + for (FieldDTO fieldDTO : value.getFields()) { + if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { + String rawVal = fieldDTO.getSinglePrimitive(); + if (fieldDTO.isControlledVocabularyField()) { + return ControlledVocabularyValue.getLocaleStrValue(rawVal, datasetFieldTypeName, value.getName(), + locale, false); + } + } + } + } + return null; + } + + public static void writeFullElement(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { + writeFullElement(xmlw, name, value, null); + } + + public static void writeFullElement(XMLStreamWriter xmlw, String name, String value, String lang) throws XMLStreamException { + // For the simplest Elements we can + if (!StringUtils.isEmpty(value)) { + xmlw.writeStartElement(name); + if (DvObjectContainer.isMetadataLanguageSet(lang)) { + writeAttribute(xmlw, "xml:lang", lang); + } + xmlw.writeCharacters(value); + xmlw.writeEndElement(); // labl + } + } + + public static void writeAttribute(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { + if (!StringUtils.isEmpty(value)) { + xmlw.writeAttribute(name, value); + } + } + + public static boolean checkParentElement(XMLStreamWriter xmlw, String elementName, boolean elementAdded) throws XMLStreamException { + if (!elementAdded) { + xmlw.writeStartElement(elementName); + } + + return true; + } + + public static void writeFullElementWithAttributes(XMLStreamWriter xmlw, String name, Map attributeMap, String value) throws XMLStreamException { + if (!StringUtils.isEmpty(value)) { + xmlw.writeStartElement(name); + for (String key : attributeMap.keySet()) { + writeAttribute(xmlw, key, attributeMap.get(key)); + } + xmlw.writeCharacters(value); + xmlw.writeEndElement(); // labl + } + } + + public static boolean writeOpenTagIfNeeded(XMLStreamWriter xmlw, String tag, boolean element_check) throws XMLStreamException { + // check if the current tag isn't opened + if (!element_check) { + xmlw.writeStartElement(tag); // + } + return true; + } +} From 1a46155a5ed37545455a194650301cbee5691358 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 10:32:21 -0400 Subject: [PATCH 073/486] note duplicate method --- .../iq/dataverse/export/openaire/OpenAireExportUtil.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index 49ceabc5900..820ced3d6c4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -1428,6 +1428,8 @@ public static void writeFundingReferencesElement(XMLStreamWriter xmlw, DatasetVe writeEndTag(xmlw, fundingReference_check); } + + //Duplicates XmlWriterUtil.dto2Primitive private static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { // give the single value of the given metadata for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { From ace656ce890d6bd4ecb1b7000995e0934a2c214e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 10:33:13 -0400 Subject: [PATCH 074/486] remove xml template doc, refactor to generate xml, adding OA fields --- .../pidproviders/doi/XmlMetadataTemplate.java | 819 +++++++++++++----- .../doi/datacite_metadata_template.xml | 2 +- 2 files changed, 617 insertions(+), 204 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 30e4dfd79cc..8a5fe9f9d32 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1,208 +1,599 @@ package edu.harvard.iq.dataverse.pidproviders.doi; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.net.MalformedURLException; +import java.net.URL; import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; import java.util.List; -import java.util.logging.Level; +import java.util.Map; +import java.util.Optional; import java.util.logging.Logger; +import java.util.stream.Collectors; +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; + +import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; +import org.ocpsoft.common.util.Strings; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetAuthor; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; +import edu.harvard.iq.dataverse.DatasetFieldConstant; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.api.dto.DatasetDTO; +import edu.harvard.iq.dataverse.api.dto.FieldDTO; +import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; +import edu.harvard.iq.dataverse.export.DDIExporter; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; +import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; +import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; +import edu.harvard.iq.dataverse.util.StringUtil; +import edu.harvard.iq.dataverse.util.xml.XmlPrinter; +import edu.harvard.iq.dataverse.util.xml.XmlWriterUtil; +import jakarta.json.JsonObject; public class XmlMetadataTemplate { - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.DataCiteMetadataTemplate"); - private static String template; - - static { - try (InputStream in = XmlMetadataTemplate.class.getResourceAsStream("datacite_metadata_template.xml")) { - template = new String(in.readAllBytes(), StandardCharsets.UTF_8); - } catch (Exception e) { - logger.log(Level.SEVERE, "datacite metadata template load error"); - logger.log(Level.SEVERE, "String " + e.toString()); - logger.log(Level.SEVERE, "localized message " + e.getLocalizedMessage()); - logger.log(Level.SEVERE, "cause " + e.getCause()); - logger.log(Level.SEVERE, "message " + e.getMessage()); - } - } + private static final Logger logger = Logger.getLogger(XmlMetadataTemplate.class.getName()); - private String xmlMetadata; - private String identifier; - private List datafileIdentifiers; - private List creators; - private String title; - private String publisher; - private String publisherYear; - private List authors; - private String description; - private List contacts; - private List producers; - - public List getProducers() { - return producers; - } + public static final String XML_NAMESPACE = "http://datacite.org/schema/kernel-4"; + public static final String XML_SCHEMA_LOCATION = "http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.5/metadata.xsd"; + public static final String XML_XSI = "http://www.w3.org/2001/XMLSchema-instance"; + public static final String XML_SCHEMA_VERSION = "4.5"; - public void setProducers(List producers) { - this.producers = producers; - } + private DoiMetadata doiMetadata; - public List getContacts() { - return contacts; + public XmlMetadataTemplate() { } - public void setContacts(List contacts) { - this.contacts = contacts; + public XmlMetadataTemplate(DoiMetadata doiMetadata) { + this.doiMetadata = doiMetadata; } - public String getDescription() { - return description; + public String generateXML(DvObject dvObject) { + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { + generateXML(dvObject, outputStream); + + String xml = outputStream.toString(); + return XmlPrinter.prettyPrintXml(xml); + } catch (XMLStreamException | IOException e) { + logger.severe("Unable to generate DataCite XML for DOI: " + dvObject.getGlobalId().asString() + " : " + e.getMessage()); + e.printStackTrace(); + } + return null; } - public void setDescription(String description) { - this.description = description; - } + private void generateXML(DvObject dvObject, OutputStream outputStream) throws XMLStreamException { + // Could/should use dataset metadata language for metadata from DvObject itself? + String language = null; // machine locale? e.g. for Publisher which is global + String metadataLanguage = null; // when set, otherwise = language? + XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream); + xmlw.writeStartElement("resource"); + xmlw.writeDefaultNamespace(XML_NAMESPACE); + xmlw.writeAttribute("xmlns:xsi", XML_XSI); + xmlw.writeAttribute("xsi:schemaLocation", XML_SCHEMA_LOCATION); + + writeIdentifier(xmlw, dvObject); + writeCreators(xmlw, doiMetadata.getAuthors()); + writeTitles(xmlw, dvObject, language); + writePublisher(xmlw, dvObject); + writePublicationYear(xmlw, dvObject); + writeSubjects(xmlw, dvObject); + writeContributors(xmlw, dvObject); + writeDates(xmlw, dvObject); + writeLanguage(xmlw, dvObject); + writeResourceType(xmlw, dvObject); + writeAlternateIdentifiers(xmlw, dvObject); + writeRelatedIdentifiers(xmlw, dvObject); + writeSize(xmlw, dvObject); + writeFormats(xmlw, dvObject); + writeVersion(xmlw, dvObject); + writeAccessRights(xmlw, dvObject); + writeDescriptions(xmlw, dvObject); + writeGeoLocations(xmlw, dvObject); + writeFundingReferences(xmlw, dvObject); - public List getAuthors() { - return authors; - } + StringBuilder contributorsElement = new StringBuilder(); + if (doiMetadata.getContacts() != null) { + for (String[] contact : doiMetadata.getContacts()) { + if (!contact[0].isEmpty()) { + contributorsElement.append("" + + StringEscapeUtils.escapeXml10(contact[0]) + ""); + if (!contact[1].isEmpty()) { + contributorsElement.append("" + StringEscapeUtils.escapeXml10(contact[1]) + ""); + } + contributorsElement.append(""); + } + } + } - public void setAuthors(List authors) { - this.authors = authors; - } + if (doiMetadata.getProducers() != null) { + for (String[] producer : doiMetadata.getProducers()) { + contributorsElement.append("" + StringEscapeUtils.escapeXml10(producer[0]) + + ""); + if (!producer[1].isEmpty()) { + contributorsElement.append("" + StringEscapeUtils.escapeXml10(producer[1]) + ""); + } + contributorsElement.append(""); + } + } - public XmlMetadataTemplate() { - } + String relIdentifiers = generateRelatedIdentifiers(dvObject); - public List getDatafileIdentifiers() { - return datafileIdentifiers; } - public void setDatafileIdentifiers(List datafileIdentifiers) { - this.datafileIdentifiers = datafileIdentifiers; - } - public XmlMetadataTemplate(String xmlMetaData) { - this.xmlMetadata = xmlMetaData; - Document doc = Jsoup.parseBodyFragment(xmlMetaData); - Elements identifierElements = doc.select("identifier"); - if (identifierElements.size() > 0) { - identifier = identifierElements.get(0).html(); - } - Elements creatorElements = doc.select("creatorName"); - creators = new ArrayList<>(); - for (Element creatorElement : creatorElements) { - creators.add(creatorElement.html()); + /** + * 3, Title(s) (with optional type sub-properties) (M) + * + * @param xmlw + * The Stream writer + * @param dvObject + * The dataset/file + * @param language + * the metadata language + * @return + * @throws XMLStreamException + */ + private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String language) throws XMLStreamException { + String title = doiMetadata.getTitle(); + String subTitle = null; + List altTitles = null; + // Only Datasets can have a subtitle or alternative titles + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersion(); + Optional subTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.subTitle)).findFirst(); + if (subTitleField.isPresent()) { + subTitle = subTitleField.get().getValue(); + } + Optional altTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.alternativeTitle)).findFirst(); + if (altTitleField.isPresent()) { + altTitles = altTitleField.get().getValues(); + } } - Elements titleElements = doc.select("title"); - if (titleElements.size() > 0) { - title = titleElements.get(0).html(); + + if (StringUtils.isNotBlank(title) || StringUtils.isNotBlank(subTitle) || (altTitles != null && !String.join("", altTitles).isBlank())) { + xmlw.writeStartElement("titles"); + XmlWriterUtil.writeFullElement(xmlw, "title", title, language); + + Map attributes = new HashMap(); + attributes.put("titleType", "Subtitle"); + + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, title); + + attributes.clear(); + attributes.put("titleType", "AlternativeTitle"); + + for (String altTitle : altTitles) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, altTitle); + } + + xmlw.writeEndElement(); } - Elements publisherElements = doc.select("publisher"); - if (publisherElements.size() > 0) { - publisher = publisherElements.get(0).html(); + } + + /** + * 1, Identifier (with mandatory type sub-property) (M) Note DataCite expects + * identifierType="DOI" but OpenAire allows several others (see + * https://guidelines.readthedocs.io/en/latest/data/field_identifier.html#d-identifiertype) + * Dataverse is currently only capable of creating DOI, Handle, or URL types + * from the OpenAire list (the last from PermaLinks) ToDo - If we add,e.g., an + * ARK or PURL provider, this code has to change or we'll need to refactor so + * that the identifiertype and id value can be sent via the JSON/ORE + * + * @param xmlw + * The Steam writer + * @param dvObject + * The dataset or file with the PID + * @throws XMLStreamException + */ + private void writeIdentifier(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + GlobalId pid = dvObject.getGlobalId(); + // identifier with identifierType attribute + Map identifier_map = new HashMap(); + String identifierType = null; + String identifier = null; + switch (pid.getProtocol()) { + case AbstractDOIProvider.DOI_PROTOCOL: + identifierType = AbstractDOIProvider.DOI_PROTOCOL.toUpperCase(); + identifier = pid.asRawIdentifier(); + break; + case HandlePidProvider.HDL_PROTOCOL: + identifierType = "Handle"; + identifier = pid.asRawIdentifier(); + break; + case PermaLinkPidProvider.PERMA_PROTOCOL: + identifierType = "URL"; + identifier = pid.asURL(); + break; } - Elements publisherYearElements = doc.select("publicationYear"); - if (publisherYearElements.size() > 0) { - publisherYear = publisherYearElements.get(0).html(); + Map attributeMap = new HashMap(); + attributeMap.put("identifierType", identifierType); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "identifier", attributeMap, identifier); + } + + /** + * 2, Creator (with optional given name, family name, name identifier and + * affiliation sub-properties) (M) + * + * @param xmlw + * The stream writer + * @param authorList + * - the list of authors + * @throws XMLStreamException + */ + public void writeCreators(XMLStreamWriter xmlw, List authorList) throws XMLStreamException { + // creators -> creator -> creatorName with nameType attribute, givenName, + // familyName, nameIdentifier + // write all creators + xmlw.writeStartElement("creators"); // + + if (authorList != null && !authorList.isEmpty()) { + for (DatasetAuthor author : authorList) { + String creatorName = StringEscapeUtils.escapeXml10(author.getName().getDisplayValue()); + String affiliation = null; + if (author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { + affiliation = StringEscapeUtils.escapeXml10(author.getAffiliation().getDisplayValue()); + } + String nameIdentifier = null; + String nameIdentifierScheme = null; + if (StringUtils.isNotBlank(author.getIdValue()) && StringUtils.isNotBlank(author.getIdType())) { + nameIdentifier = author.getIdValue(); + if(nameIdentifier != null) { + // Normalizes to the URL form of the identifier, returns null if the identifier + // is not valid given the type + nameIdentifier = author.getIdentifierAsUrl(); + } + nameIdentifierScheme = author.getIdType(); + } + + if (StringUtils.isNotBlank(creatorName)) { + xmlw.writeStartElement("creator"); // + JsonObject creatorObj = PersonOrOrgUtil.getPersonOrOrganization(creatorName, false, + StringUtils.containsIgnoreCase(nameIdentifierScheme, "orcid")); + + writeEntityElements(xmlw, "creator", null, creatorObj, affiliation, nameIdentifier, nameIdentifierScheme); + xmlw.writeEndElement(); // + } + + else { + // Authors unavailable + XmlWriterUtil.writeFullElement(xmlw, "creator", "creatorName", AbstractPidProvider.UNAVAILABLE); + } + } } + xmlw.writeEndElement(); // } - public String generateXML(DvObject dvObject) { + private void writePublisher(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // publisher should already be non null - :unav if it wasn't available + XmlWriterUtil.writeFullElement(xmlw, "publisher", doiMetadata.getPublisher()); + } + + private void writePublicationYear(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { // Can't use "UNKNOWN" here because DataCite will respond with "[facet // 'pattern'] the value 'unknown' is not accepted by the pattern '[\d]{4}'" - String publisherYearFinal = "9999"; + String pubYear = "9999"; // FIXME: Investigate why this.publisherYear is sometimes null now that pull // request #4606 has been merged. - if (this.publisherYear != null) { + if (doiMetadata.getPublisherYear() != null) { // Added to prevent a NullPointerException when trying to destroy datasets when // using DataCite rather than EZID. - publisherYearFinal = this.publisherYear; + pubYear = doiMetadata.getPublisherYear(); } - xmlMetadata = template.replace("${identifier}", getIdentifier().trim()).replace("${title}", this.title) - .replace("${publisher}", this.publisher).replace("${publisherYear}", publisherYearFinal) - .replace("${description}", this.description); - - StringBuilder creatorsElement = new StringBuilder(); - if (authors != null && !authors.isEmpty()) { - for (DatasetAuthor author : authors) { - creatorsElement.append(""); - creatorsElement.append(author.getName().getDisplayValue()); - creatorsElement.append(""); - - if (author.getIdType() != null && author.getIdValue() != null && !author.getIdType().isEmpty() - && !author.getIdValue().isEmpty() && author.getAffiliation() != null - && !author.getAffiliation().getDisplayValue().isEmpty()) { - - if (author.getIdType().equals("ORCID")) { - creatorsElement.append( - "" - + author.getIdValue() + ""); - } - if (author.getIdType().equals("ISNI")) { - creatorsElement.append( - "" - + author.getIdValue() + ""); - } - if (author.getIdType().equals("LCNA")) { - creatorsElement.append( - "" - + author.getIdValue() + ""); - } - } - if (author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { - creatorsElement - .append("" + author.getAffiliation().getDisplayValue() + ""); + XmlWriterUtil.writeFullElement(xmlw, "publicationYear", String.valueOf(pubYear)); + } + + /** + * 6, Subject (with scheme sub-property) R + * + * @param xmlw + * The Steam writer + * @param dvObject + * The Dataset/DataFile + * @throws XMLStreamException + */ + private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // subjects -> subject with subjectScheme and schemeURI attributes when + // available + boolean subjectsCreated = false; + List subjects = null; + List compoundKeywords = null; + List compoundTopics = null; + // Dataset Subject= Dataverse subject, keyword, and/or topic classification + // fields + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + dv.getDatasetSubjects(); + for (DatasetField dsf : dv.getDatasetFields()) { + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.keyword)) { + compoundKeywords = dsf.getDatasetFieldCompoundValues(); + } else if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.topicClassification)) { + compoundTopics = dsf.getDatasetFieldCompoundValues(); } - creatorsElement.append(""); } - } else { - creatorsElement.append("").append(AbstractPidProvider.UNAVAILABLE) - .append(""); + } else if (dvObject instanceof DataFile df) { + subjects = df.getTagLabels(); + } + for (String subject : subjects) { + if (StringUtils.isNotBlank(subject)) { + subjectsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "subjects", subjectsCreated); + XmlWriterUtil.writeFullElement(xmlw, "subject", StringEscapeUtils.escapeXml10(subject)); + } } + for (DatasetFieldCompoundValue keywordFieldValue : compoundKeywords) { + String keyword = null; + String scheme = null; + String schemeUri = null; + + for (DatasetField subField : keywordFieldValue.getChildDatasetFields()) { + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.keyword: + keyword = subField.getValue(); + break; + case DatasetFieldConstant.keywordVocab: + scheme = subField.getValue(); + break; + case DatasetFieldConstant.keywordVocabURI: + schemeUri = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(keyword)) { + Map attributesMap = new HashMap(); + if (StringUtils.isNotBlank(scheme)) { + attributesMap.put("subjectScheme", scheme); + } + if (StringUtils.isNotBlank(schemeUri)) { + attributesMap.put("schemeURI", schemeUri); + } + subjectsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "subjects", subjectsCreated); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "subject", attributesMap, StringEscapeUtils.escapeXml10(keyword)); + } + } + for (DatasetFieldCompoundValue topicFieldValue : compoundTopics) { + String topic = null; + String scheme = null; + String schemeUri = null; + + for (DatasetField subField : topicFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.topicClassValue: + topic = subField.getValue(); + break; + case DatasetFieldConstant.topicClassVocab: + scheme = subField.getValue(); + break; + case DatasetFieldConstant.topicClassVocabURI: + schemeUri = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(topic)) { + Map attributesMap = new HashMap(); + if (StringUtils.isNotBlank(scheme)) { + attributesMap.put("subjectScheme", scheme); + } + if (StringUtils.isNotBlank(schemeUri)) { + attributesMap.put("schemeURI", schemeUri); + } + subjectsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "subjects", subjectsCreated); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "subject", attributesMap, StringEscapeUtils.escapeXml10(topic)); + } + } + if (subjectsCreated) { + xmlw.writeEndElement(); + } + } - xmlMetadata = xmlMetadata.replace("${creators}", creatorsElement.toString()); + /** + * 7, Contributor (with optional given name, family name, name identifier + * and affiliation sub-properties) + * + * @see #writeContributorElement(javax.xml.stream.XMLStreamWriter, + * java.lang.String, java.lang.String, java.lang.String) + * + * @param xmlw The stream writer + * @param dvObject The Dataset/DataFile + * @throws XMLStreamException + */ + private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + boolean contributorsCreated = false; + List compoundProducers = null; + List compoundDistributors = null; + List compoundContacts = null; + List compoundContributors = null; + // Dataset Subject= Dataverse subject, keyword, and/or topic classification + // fields + //ToDo Include for files? + /*if(dvObject instanceof DataFile df) { + dvObject = df.getOwner(); + }*/ + + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.producer: + compoundProducers = dsf.getDatasetFieldCompoundValues(); + break; + case DatasetFieldConstant.distributor: + compoundDistributors = dsf.getDatasetFieldCompoundValues(); + break; + case DatasetFieldConstant.contact: + compoundContacts = dsf.getDatasetFieldCompoundValues(); + break; + case DatasetFieldConstant.contributor: + compoundContributors = dsf.getDatasetFieldCompoundValues(); + } + } + } + + + for (DatasetFieldCompoundValue producerFieldValue : compoundProducers) { + String producer = null; + String affiliation = null; + + for (DatasetField subField : producerFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.producerName: + producer = subField.getValue(); + break; + case DatasetFieldConstant.producerAffiliation: + affiliation = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(producer)) { + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(producer, false, false); + writeEntityElements(xmlw, "contributor", "Producer", entityObject, affiliation, null, null); + } - StringBuilder contributorsElement = new StringBuilder(); - if (this.getContacts() != null) { - for (String[] contact : this.getContacts()) { - if (!contact[0].isEmpty()) { - contributorsElement.append("" - + contact[0] + ""); - if (!contact[1].isEmpty()) { - contributorsElement.append("" + contact[1] + ""); - } - contributorsElement.append(""); + } + + for (DatasetFieldCompoundValue distributorFieldValue : compoundDistributors) { + String distributor = null; + String affiliation = null; + + for (DatasetField subField : distributorFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.distributorName: + distributor = subField.getValue(); + break; + case DatasetFieldConstant.distributorAffiliation: + affiliation = subField.getValue(); + break; } } + if (StringUtils.isNotBlank(distributor)) { + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(distributor, false, false); + writeEntityElements(xmlw, "contributor", "Distributor", entityObject, affiliation, null, null); + } + } + for (DatasetFieldCompoundValue contactFieldValue : compoundContacts) { + String contact = null; + String affiliation = null; + + for (DatasetField subField : contactFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.datasetContactName: + contact = subField.getValue(); + break; + case DatasetFieldConstant.datasetContactAffiliation: + affiliation = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(contact)) { + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(contact, false, false); + writeEntityElements(xmlw, "contributor", "ContactPerson", entityObject, affiliation, null, null); + } - if (this.getProducers() != null) { - for (String[] producer : this.getProducers()) { - contributorsElement.append("" + producer[0] - + ""); - if (!producer[1].isEmpty()) { - contributorsElement.append("" + producer[1] + ""); + } + for (DatasetFieldCompoundValue contributorFieldValue : compoundContributors) { + String contributor = null; + String contributorType = null; + + for (DatasetField subField : contributorFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.contributorName: + contributor = subField.getValue(); + break; + case DatasetFieldConstant.contributorType: + contributorType = subField.getValue().replace(" ", ""); + break; } - contributorsElement.append(""); } + // QDR - doesn't have Funder in the contributor type list. + // Using a string isn't i18n + if (StringUtils.isNotBlank(contributor) && !StringUtils.equalsIgnoreCase("Funder", contributorType)) { + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(contributor, false, false); + writeEntityElements(xmlw, "contributor", contributorType, entityObject, null, null, null); + } + + } + + if (contributorsCreated) { + xmlw.writeEndElement(); } + } - String relIdentifiers = generateRelatedIdentifiers(dvObject); + private void writeEntityElements(XMLStreamWriter xmlw, String elementName, String type, JsonObject entityObject, String affiliation, String nameIdentifier, String nameIdentifierScheme) throws XMLStreamException { + xmlw.writeStartElement(elementName); + Map attributeMap = new HashMap(); + if (StringUtils.isNotBlank(type)) { + attributeMap.put("contributorType", type); + } + // person name=, + if (entityObject.getBoolean("isPerson")) { + attributeMap.put("nameType", "Personal"); + } else { + attributeMap.put("nameType", "Organizational"); + } + XmlWriterUtil.writeFullElementWithAttributes(xmlw, elementName + "Name", attributeMap, + StringEscapeUtils.escapeXml10(entityObject.getString("fullName"))); + if (entityObject.containsKey("givenName")) { + XmlWriterUtil.writeFullElement(xmlw, "givenName", StringEscapeUtils.escapeXml10(entityObject.getString("givenName"))); + } + if (entityObject.containsKey("familyName")) { + XmlWriterUtil.writeFullElement(xmlw, "familyName", StringEscapeUtils.escapeXml10(entityObject.getString("familyName"))); + } - xmlMetadata = xmlMetadata.replace("${relatedIdentifiers}", relIdentifiers); + if (nameIdentifier != null) { + attributeMap.clear(); + URL url; + try { + url = new URL(nameIdentifier); + String protocol = url.getProtocol(); + String authority = url.getAuthority(); + String site = String.format("%s://%s", protocol, authority); + attributeMap.put("schemeURI", site); + attributeMap.put("nameIdentifierScheme", nameIdentifierScheme); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "nameIdentifier", attributeMap, nameIdentifier); + } catch (MalformedURLException e) { + logger.warning("DatasetAuthor.getIdentifierAsUrl returned a Malformed URL: " + nameIdentifier); + } + } + + if (StringUtils.isNotBlank(affiliation)) { + attributeMap.clear(); + if (affiliation.startsWith("https://ror.org/")) { - xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString()); - return xmlMetadata; + attributeMap.put("schemeURI", "https://ror.org"); + attributeMap.put("affiliationIdentifierScheme", "ROR"); + } + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "affiliation", attributeMap, StringEscapeUtils.escapeXml10(affiliation)); + } + xmlw.writeEndElement(); } private String generateRelatedIdentifiers(DvObject dvObject) { @@ -210,9 +601,67 @@ private String generateRelatedIdentifiers(DvObject dvObject) { StringBuilder sb = new StringBuilder(); if (dvObject.isInstanceofDataset()) { Dataset dataset = (Dataset) dvObject; + + List relatedPublications = dataset.getLatestVersionForCopy().getRelatedPublications(); + if (!relatedPublications.isEmpty()) { + for (DatasetRelPublication relatedPub : relatedPublications) { + String pubIdType = relatedPub.getIdType(); + String identifier = relatedPub.getIdNumber(); + /* + * Note - with identifier and url fields, it's not clear that there's a single + * way those two fields are used for all identifier types In QDR, at this time, + * doi and isbn types always have the raw number in the identifier field, + * whereas there are examples where URLs are in the identifier or url fields. + * The code here addresses those practices and is not generic. + */ + if (pubIdType != null) { + switch (pubIdType) { + case "doi": + if (identifier != null && identifier.length() != 0) { + appendIdentifier(sb, "DOI", "IsSupplementTo", "doi:" + identifier); + } + break; + case "isbn": + if (identifier != null && identifier.length() != 0) { + appendIdentifier(sb, "ISBN", "IsSupplementTo", "ISBN:" + identifier); + } + break; + case "url": + if (identifier != null && identifier.length() != 0) { + appendIdentifier(sb, "URL", "IsSupplementTo", identifier); + } else { + String pubUrl = relatedPub.getUrl(); + if (pubUrl != null && pubUrl.length() > 0) { + appendIdentifier(sb, "URL", "IsSupplementTo", pubUrl); + } + } + break; + default: + if (identifier != null && identifier.length() != 0) { + if (pubIdType.equalsIgnoreCase("arXiv")) { + pubIdType = "arXiv"; + } else if (pubIdType.equalsIgnoreCase("handle")) { + // Initial cap required for handle + pubIdType = "Handle"; + } else if (!pubIdType.equals("bibcode")) { + pubIdType = pubIdType.toUpperCase(); + } + // For all others, do a generic attempt to match the identifier type to the + // datacite schema and send the raw identifier as the value + appendIdentifier(sb, pubIdType, "IsSupplementTo", identifier); + } + break; + } + + } else { + logger.info(relatedPub.getIdNumber() + relatedPub.getUrl() + relatedPub.getTitle()); + } + } + } + if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { - datafileIdentifiers = new ArrayList<>(); + List datafileIdentifiers = new ArrayList<>(); for (DataFile dataFile : dataset.getFiles()) { if (dataFile.getGlobalId() != null) { if (sb.toString().isEmpty()) { @@ -229,14 +678,23 @@ private String generateRelatedIdentifiers(DvObject dvObject) { } } else if (dvObject.isInstanceofDataFile()) { DataFile df = (DataFile) dvObject; - sb.append(""); - sb.append("" - + df.getOwner().getGlobalId() + ""); - sb.append(""); + appendIdentifier(sb, "DOI", "IsPartOf", df.getOwner().getGlobalId().asString()); + if (sb.length() != 0) { + // Should always be true + sb.append(""); + } } return sb.toString(); } + + private void appendIdentifier(StringBuilder sb, String idType, String relationType, String identifier) { + if (sb.toString().isEmpty()) { + sb.append(""); + } + sb.append("" + identifier + ""); + } + public void generateFileIdentifiers(DvObject dvObject) { if (dvObject.isInstanceofDataset()) { @@ -244,71 +702,26 @@ public void generateFileIdentifiers(DvObject dvObject) { if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { - datafileIdentifiers = new ArrayList<>(); + List datafileIdentifiers = new ArrayList<>(); for (DataFile dataFile : dataset.getFiles()) { datafileIdentifiers.add(dataFile.getIdentifier()); - int x = xmlMetadata.indexOf("") - 1; - xmlMetadata = xmlMetadata.replace("{relatedIdentifier}", dataFile.getIdentifier()); - xmlMetadata = xmlMetadata.substring(0, x) + "${relatedIdentifier}" - + template.substring(x, template.length() - 1); + // int x = xmlMetadata.indexOf("") - 1; + // xmlMetadata = xmlMetadata.replace("{relatedIdentifier}", + // dataFile.getIdentifier()); + // xmlMetadata = xmlMetadata.substring(0, x) + "${relatedIdentifier}" + // + template.substring(x, template.length() - 1); } } else { - xmlMetadata = xmlMetadata.replace( - "${relatedIdentifier}", - ""); + // xmlMetadata = xmlMetadata.replace( + // "${relatedIdentifier}", + // ""); } } } - public static String getTemplate() { - return template; - } - - public static void setTemplate(String template) { - XmlMetadataTemplate.template = template; - } - - public String getIdentifier() { - return identifier; - } - - public void setIdentifier(String identifier) { - this.identifier = identifier; - } - - public List getCreators() { - return creators; - } - - public void setCreators(List creators) { - this.creators = creators; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getPublisher() { - return publisher; - } - - public void setPublisher(String publisher) { - this.publisher = publisher; - } - - public String getPublisherYear() { - return publisherYear; - } - - public void setPublisherYear(String publisherYear) { - this.publisherYear = publisherYear; - } - } \ No newline at end of file diff --git a/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml b/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml index abe7ce79972..8348691d4c7 100644 --- a/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml +++ b/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml @@ -1,5 +1,5 @@ - ${identifier} From dba03e2bb1597d4e01317139d950e305d0d9dec5 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 10:33:40 -0400 Subject: [PATCH 075/486] refactor source of XML info --- .../iq/dataverse/export/DataCiteExporter.java | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java index 8caf32b2df0..c21d6b5cd1a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java @@ -7,6 +7,7 @@ import io.gdcc.spi.export.ExportException; import io.gdcc.spi.export.Exporter; import io.gdcc.spi.export.XMLExporter; +import edu.harvard.iq.dataverse.pidproviders.doi.XmlMetadataTemplate; import edu.harvard.iq.dataverse.util.BundleUtil; import java.io.IOException; import java.io.OutputStream; @@ -20,11 +21,7 @@ */ @AutoService(Exporter.class) public class DataCiteExporter implements XMLExporter { - - private static String DEFAULT_XML_NAMESPACE = "http://datacite.org/schema/kernel-3"; - private static String DEFAULT_XML_SCHEMALOCATION = "http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd"; - private static String DEFAULT_XML_VERSION = "3.0"; - + public static final String NAME = "Datacite"; @Override @@ -60,17 +57,17 @@ public Boolean isAvailableToUsers() { @Override public String getXMLNameSpace() { - return DataCiteExporter.DEFAULT_XML_NAMESPACE; + return XmlMetadataTemplate.XML_NAMESPACE; } @Override public String getXMLSchemaLocation() { - return DataCiteExporter.DEFAULT_XML_SCHEMALOCATION; + return XmlMetadataTemplate.XML_SCHEMA_LOCATION; } @Override public String getXMLSchemaVersion() { - return DataCiteExporter.DEFAULT_XML_VERSION; + return XmlMetadataTemplate.XML_SCHEMA_VERSION; } } From af3e24b0b7bc1bff2c378f2a682455fe6aef0ee2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 10:34:09 -0400 Subject: [PATCH 076/486] add code to get raw alphanumeric pid value --- src/main/java/edu/harvard/iq/dataverse/GlobalId.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java index a542cb52ac0..1c8783c5bd5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java @@ -100,6 +100,13 @@ public String asURL() { } return null; } + + public String asRawIdentifier() { + if (protocol == null || authority == null || identifier == null) { + return ""; + } + return authority + separator + identifier; + } From fa23884647c893285e456d749a741d6d36ac90eb Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 13:58:32 -0400 Subject: [PATCH 077/486] remove duplicate method --- .../edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java index e932307d3d0..8ec426ead1f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java @@ -145,13 +145,6 @@ public static void writeAttribute(XMLStreamWriter xmlw, String name, String valu } } - public static boolean checkParentElement(XMLStreamWriter xmlw, String elementName, boolean elementAdded) throws XMLStreamException { - if (!elementAdded) { - xmlw.writeStartElement(elementName); - } - - return true; - } public static void writeFullElementWithAttributes(XMLStreamWriter xmlw, String name, Map attributeMap, String value) throws XMLStreamException { if (!StringUtils.isEmpty(value)) { From 0d22d6c580df4aa689b019dfdc88321a59e02e4d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 13:59:22 -0400 Subject: [PATCH 078/486] dates, resourceType, alternate Ids --- .../pidproviders/doi/XmlMetadataTemplate.java | 224 +++++++++++++++++- 1 file changed, 215 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 8a5fe9f9d32..92bf7afd273 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -6,12 +6,14 @@ import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.logging.Logger; import java.util.stream.Collectors; @@ -26,6 +28,8 @@ import org.jsoup.select.Elements; import org.ocpsoft.common.util.Strings; +import edu.harvard.iq.dataverse.AlternativePersistentIdentifier; +import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetAuthor; @@ -33,9 +37,11 @@ import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetFieldValue; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.api.Util; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; import edu.harvard.iq.dataverse.api.dto.FieldDTO; import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; @@ -207,8 +213,6 @@ private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String languag */ private void writeIdentifier(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { GlobalId pid = dvObject.getGlobalId(); - // identifier with identifierType attribute - Map identifier_map = new HashMap(); String identifierType = null; String identifier = null; switch (pid.getProtocol()) { @@ -315,9 +319,9 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt // subjects -> subject with subjectScheme and schemeURI attributes when // available boolean subjectsCreated = false; - List subjects = null; - List compoundKeywords = null; - List compoundTopics = null; + List subjects = new ArrayList(); + List compoundKeywords = new ArrayList(); + List compoundTopics = new ArrayList(); // Dataset Subject= Dataverse subject, keyword, and/or topic classification // fields if (dvObject instanceof Dataset d) { @@ -419,10 +423,10 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt */ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { boolean contributorsCreated = false; - List compoundProducers = null; - List compoundDistributors = null; - List compoundContacts = null; - List compoundContributors = null; + List compoundProducers = new ArrayList(); + List compoundDistributors = new ArrayList(); + List compoundContacts = new ArrayList(); + List compoundContributors = new ArrayList(); // Dataset Subject= Dataverse subject, keyword, and/or topic classification // fields //ToDo Include for files? @@ -596,6 +600,208 @@ private void writeEntityElements(XMLStreamWriter xmlw, String elementName, Strin xmlw.writeEndElement(); } + /** + * 8, Date (with type sub-property) (R) + * + * @param xmlw The Steam writer + * @param dvObject The dataset/datafile + * @throws XMLStreamException + */ + private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + boolean datesWritten = false; + String dateOfDistribution = null; + String dateOfProduction = null; + String dateOfDeposit = null; + Date releaseDate = null; + List datesOfCollection = new ArrayList(); + + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + releaseDate = dv.getReleaseTime(); + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.distributionDate: + dateOfDistribution = dsf.getValue(); + break; + case DatasetFieldConstant.productionDate: + dateOfProduction = dsf.getValue(); + break; + case DatasetFieldConstant.dateOfDeposit: + dateOfDeposit = dsf.getValue(); + break; + case DatasetFieldConstant.dateOfCollection: + datesOfCollection = dsf.getDatasetFieldCompoundValues(); + } + } + } + Map attributes = new HashMap(); + if (StringUtils.isNotBlank(dateOfDistribution)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Issued"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, dateOfDistribution); + } + // dates -> date with dateType attribute + + if (StringUtils.isNotBlank(dateOfProduction)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Created"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, dateOfProduction); + } + if (StringUtils.isNotBlank(dateOfDeposit)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Submitted"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, dateOfDeposit); + } + + if (releaseDate != null) { + String date = Util.getDateTimeFormat().format(releaseDate); + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + + attributes.put("dateType", "Available"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, date); + } + if (datesOfCollection != null) { + for (DatasetFieldCompoundValue collectionDateFieldValue : datesOfCollection) { + String startDate = null; + String endDate = null; + + for (DatasetField subField : collectionDateFieldValue.getChildDatasetFields()) { + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.dateOfCollectionStart: + startDate = subField.getValue(); + break; + case DatasetFieldConstant.dateOfCollectionEnd: + endDate = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Collected"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, (startDate + "/" + endDate).trim()); + } + } + } + if (datesWritten) { + xmlw.writeEndElement(); + } + } + + + // 9, Language (MA), language + private void writeLanguage(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + //Currently not supported. Spec indicates one 'primary' language. Could send the first entry in DatasetFieldConstant.language or send iff there is only one entry, and/or default to the machine's default lang? + return; + } + + // 10, ResourceType (with mandatory general type + // description sub- property) (M) + private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + List kindOfDataValues = new ArrayList(); + Map attributes = new HashMap(); + + attributes.put("resourceTypeGeneral", "Dataset"); + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.kindOfData: + kindOfDataValues = dsf.getControlledVocabularyValues(); + break; + } + + if (kindOfDataValues.isEmpty()) { + // Write an attribute only element if there are no kindOfData values. + xmlw.writeStartElement("resourceType"); + xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); + xmlw.writeEndElement(); + } else { + for (ControlledVocabularyValue kindOfDataValue : kindOfDataValues) { + String resourceType = kindOfDataValue.getStrValue(); + if (StringUtils.isNotBlank(resourceType)) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, resourceType); + } + } + } + } + } + } + + /** + * 11 AlternateIdentifier (with type sub-property) (O) + * + * @param xmlw The Steam writer + * @param dvObject The dataset/datafile + * @throws XMLStreamException + */ + private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + List otherIdentifiers = new ArrayList(); + Set altPids = dvObject.getAlternativePersistentIndentifiers(); + + boolean alternatesWritten = false; + + Map attributes = new HashMap(); + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + for (DatasetField dsf : dv.getDatasetFields()) { + if (DatasetFieldConstant.otherId.equals(dsf.getDatasetFieldType().getName())) { + otherIdentifiers = dsf.getDatasetFieldCompoundValues(); + break; + } + } + } + if (!altPids.isEmpty()) { + alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternativeIdentifiers", alternatesWritten); + } + for (AlternativePersistentIdentifier altPid : altPids) { + String identifierType = null; + String identifier = null; + switch (altPid.getProtocol()) { + case AbstractDOIProvider.DOI_PROTOCOL: + identifierType = AbstractDOIProvider.DOI_PROTOCOL.toUpperCase(); + identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); + break; + case HandlePidProvider.HDL_PROTOCOL: + identifierType = "Handle"; + identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); + break; + default: + // The AlternativePersistentIdentifier class isn't really ready for anything but + // doi or handle pids, but will add this as a default. + identifierType = ":unav"; + identifier = altPid.getAuthority() + altPid.getIdentifier(); + break; + } + attributes.put("alternativeIdentifierType", identifierType); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); + + } + for (DatasetFieldCompoundValue otherIdentifier : otherIdentifiers) { + String identifierType = null; + String identifier = null; + for (DatasetField subField : otherIdentifier.getChildDatasetFields()) { + identifierType = ":unav"; + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.otherIdAgency: + identifierType = subField.getValue(); + break; + case DatasetFieldConstant.otherIdValue: + identifier = subField.getValue(); + break; + } + } + attributes.put("alternativeIdentifierType", identifierType); + if (!StringUtils.isBlank(identifier)) { + alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternativeIdentifiers", alternatesWritten); + + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); + } + } + if (alternatesWritten) { + xmlw.writeEndElement(); + } + } + private String generateRelatedIdentifiers(DvObject dvObject) { StringBuilder sb = new StringBuilder(); From d69bf414f3cc3b5fc8e0214b0c5c4fc6f7ec155e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 8 May 2024 13:19:56 -0400 Subject: [PATCH 079/486] more methods --- .../pidproviders/doi/XmlMetadataTemplate.java | 313 +++++++++++++----- 1 file changed, 223 insertions(+), 90 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 92bf7afd273..eb2465257a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -47,6 +47,7 @@ import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; import edu.harvard.iq.dataverse.export.DDIExporter; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; @@ -146,7 +147,6 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM } - /** * 3, Title(s) (with optional type sub-properties) (M) * @@ -802,132 +802,265 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) } } - private String generateRelatedIdentifiers(DvObject dvObject) { + /** + * 12, RelatedIdentifier (with type and relation type sub-properties) (R) + * + * @param xmlw The Steam writer + * @param dvObject the dataset/datafile + * @throws XMLStreamException + */ + private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + + boolean relatedIdentifiersWritten = false; + + Map attributes = new HashMap(); - StringBuilder sb = new StringBuilder(); - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; + if (dvObject instanceof Dataset dataset) { List relatedPublications = dataset.getLatestVersionForCopy().getRelatedPublications(); if (!relatedPublications.isEmpty()) { for (DatasetRelPublication relatedPub : relatedPublications) { + attributes.clear(); + String pubIdType = relatedPub.getIdType(); String identifier = relatedPub.getIdNumber(); + String url = relatedPub.getUrl(); /* * Note - with identifier and url fields, it's not clear that there's a single - * way those two fields are used for all identifier types In QDR, at this time, - * doi and isbn types always have the raw number in the identifier field, - * whereas there are examples where URLs are in the identifier or url fields. - * The code here addresses those practices and is not generic. + * way those two fields are used for all identifier types. The code here is + * ~best effort to interpret those fields. */ - if (pubIdType != null) { - switch (pubIdType) { - case "doi": - if (identifier != null && identifier.length() != 0) { - appendIdentifier(sb, "DOI", "IsSupplementTo", "doi:" + identifier); - } - break; - case "isbn": - if (identifier != null && identifier.length() != 0) { - appendIdentifier(sb, "ISBN", "IsSupplementTo", "ISBN:" + identifier); - } - break; - case "url": - if (identifier != null && identifier.length() != 0) { - appendIdentifier(sb, "URL", "IsSupplementTo", identifier); - } else { - String pubUrl = relatedPub.getUrl(); - if (pubUrl != null && pubUrl.length() > 0) { - appendIdentifier(sb, "URL", "IsSupplementTo", pubUrl); - } - } - break; - default: - if (identifier != null && identifier.length() != 0) { - if (pubIdType.equalsIgnoreCase("arXiv")) { - pubIdType = "arXiv"; - } else if (pubIdType.equalsIgnoreCase("handle")) { - // Initial cap required for handle - pubIdType = "Handle"; - } else if (!pubIdType.equals("bibcode")) { - pubIdType = pubIdType.toUpperCase(); - } - // For all others, do a generic attempt to match the identifier type to the - // datacite schema and send the raw identifier as the value - appendIdentifier(sb, pubIdType, "IsSupplementTo", identifier); - } - break; + pubIdType = getCanonicalPublicationType(pubIdType); + + // Prefer url if set, otherwise check identifier + String relatedIdentifier = url; + if (StringUtils.isBlank(relatedIdentifier)) { + relatedIdentifier = identifier; + } + // For types where we understand the protocol, get the canonical form + switch (pubIdType) { + case "DOI": + if (!relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http")) { + relatedIdentifier = "doi:" + relatedIdentifier; + } + try { + GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); + relatedIdentifier = pid.asRawIdentifier(); + } catch (IllegalArgumentException e) { + relatedIdentifier = null; } + break; + case "Handle": + if (!relatedIdentifier.startsWith("hdl:") || relatedIdentifier.startsWith("http")) { + relatedIdentifier = "hdl:" + relatedIdentifier; + } + try { + GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); + relatedIdentifier = pid.asRawIdentifier(); + } catch (IllegalArgumentException e) { + relatedIdentifier = null; + } + break; + case "URL": + break; + default: + + // For non-URL types, if a URL is given, split the string to get a schemeUri + try { + URL relatedUrl = new URL(relatedIdentifier); + String protocol = relatedUrl.getProtocol(); + String authority = relatedUrl.getAuthority(); + String site = String.format("%s://%s", protocol, authority); + relatedIdentifier = relatedIdentifier.substring(site.length()); + attributes.put("schemeURI", site); + } catch (MalformedURLException e) { + // Just an identifier + } + } - } else { - logger.info(relatedPub.getIdNumber() + relatedPub.getUrl() + relatedPub.getTitle()); + if (StringUtils.isNotBlank(relatedIdentifier)) { + // Still have a valid entry + attributes.put("relatedIdentifierType", pubIdType); + attributes.put("relationType", "IsSupplementTo"); + relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, relatedIdentifier); } } } - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { - - List datafileIdentifiers = new ArrayList<>(); + attributes.clear(); + attributes.put("relationType", "HasPart"); for (DataFile dataFile : dataset.getFiles()) { - if (dataFile.getGlobalId() != null) { - if (sb.toString().isEmpty()) { - sb.append(""); + GlobalId pid = dataFile.getGlobalId(); + if (pid != null) { + String pubIdType = getCanonicalPublicationType(pid.getProtocol()); + if (pubIdType != null) { + attributes.put("relatedIdentifierType", pubIdType); + relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, pid.asRawIdentifier()); } - sb.append("" - + dataFile.getGlobalId() + ""); } } - - if (!sb.toString().isEmpty()) { - sb.append(""); - } } - } else if (dvObject.isInstanceofDataFile()) { - DataFile df = (DataFile) dvObject; - appendIdentifier(sb, "DOI", "IsPartOf", df.getOwner().getGlobalId().asString()); - if (sb.length() != 0) { - // Should always be true - sb.append(""); + } else if (dvObject instanceof DataFile df) { + GlobalId pid = df.getOwner().getGlobalId(); + if (pid != null) { + String pubIdType = getCanonicalPublicationType(pid.getProtocol()); + if (pubIdType != null) { + + attributes.clear(); + attributes.put("relationType", "IsPartOf"); + relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, pid.asRawIdentifier()); + } } } - return sb.toString(); + if (relatedIdentifiersWritten) { + xmlw.writeEndElement(); + } } - private void appendIdentifier(StringBuilder sb, String idType, String relationType, String identifier) { - if (sb.toString().isEmpty()) { - sb.append(""); + static HashMap relatedIdentifierTypeMap = new HashMap(); + + private static String getCanonicalPublicationType(String pubIdType) { + if (relatedIdentifierTypeMap.isEmpty()) { + relatedIdentifierTypeMap.put("ARK".toLowerCase(), "ARK"); + relatedIdentifierTypeMap.put("arXiv", "arXiv"); + relatedIdentifierTypeMap.put("bibcode".toLowerCase(), "bibcode"); + relatedIdentifierTypeMap.put("DOI".toLowerCase(), "DOI"); + relatedIdentifierTypeMap.put("EAN13".toLowerCase(), "EAN13"); + relatedIdentifierTypeMap.put("EISSN".toLowerCase(), "EISSN"); + relatedIdentifierTypeMap.put("Handle".toLowerCase(), "Handle"); + relatedIdentifierTypeMap.put("IGSN".toLowerCase(), "IGSN"); + relatedIdentifierTypeMap.put("ISBN".toLowerCase(), "ISBN"); + relatedIdentifierTypeMap.put("ISSN".toLowerCase(), "ISSN"); + relatedIdentifierTypeMap.put("ISTC".toLowerCase(), "ISTC"); + relatedIdentifierTypeMap.put("LISSN".toLowerCase(), "LISSN"); + relatedIdentifierTypeMap.put("LSID".toLowerCase(), "LSID"); + relatedIdentifierTypeMap.put("PISSN".toLowerCase(), "PISSN"); + relatedIdentifierTypeMap.put("PMID".toLowerCase(), "PMID"); + relatedIdentifierTypeMap.put("PURL".toLowerCase(), "PURL"); + relatedIdentifierTypeMap.put("UPC".toLowerCase(), "UPC"); + relatedIdentifierTypeMap.put("URL".toLowerCase(), "URL"); + relatedIdentifierTypeMap.put("URN".toLowerCase(), "URN"); + relatedIdentifierTypeMap.put("WOS".toLowerCase(), "WOS"); + // Add entry for Handle protocol so this can be used with GlobalId/getProtocol() + relatedIdentifierTypeMap.put("hdl".toLowerCase(), "Handle"); } - sb.append("" + identifier + ""); + return relatedIdentifierTypeMap.get(pubIdType); } - public void generateFileIdentifiers(DvObject dvObject) { + private void writeSize(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // sizes -> size + boolean sizesWritten = false; + List dataFiles = new ArrayList(); - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; + if (dvObject instanceof Dataset dataset) { + dataFiles = dataset.getFiles(); + } else if (dvObject instanceof DataFile df) { + dataFiles.add(df); + } + if (dataFiles != null && !dataFiles.isEmpty()) { + for (DataFile dataFile : dataFiles) { + Long size = dataFile.getFilesize(); + if (size != -1) { + sizesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "sizes", sizesWritten); + XmlWriterUtil.writeFullElement(xmlw, "size", size.toString()); + } + } + } + if (sizesWritten) { + xmlw.writeEndElement(); + } - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { + } - List datafileIdentifiers = new ArrayList<>(); - for (DataFile dataFile : dataset.getFiles()) { - datafileIdentifiers.add(dataFile.getIdentifier()); - // int x = xmlMetadata.indexOf("") - 1; - // xmlMetadata = xmlMetadata.replace("{relatedIdentifier}", - // dataFile.getIdentifier()); - // xmlMetadata = xmlMetadata.substring(0, x) + "${relatedIdentifier}" - // + template.substring(x, template.length() - 1); + private void writeFormats(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + boolean formatsWritten = false; + List dataFiles = new ArrayList(); + + if (dvObject instanceof Dataset dataset) { + dataFiles = dataset.getFiles(); + } else if (dvObject instanceof DataFile df) { + dataFiles.add(df); + } + if (dataFiles != null && !dataFiles.isEmpty()) { + for (DataFile dataFile : dataFiles) { + String format = dataFile.getContentType(); + if (StringUtils.isNotBlank(format)) { + formatsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "formats", formatsWritten); + XmlWriterUtil.writeFullElement(xmlw, "format", format); } + /* Should original formats be sent? What about original sizes above? + if(dataFile.isTabularData()) { + String originalFormat = dataFile.getOriginalFileFormat(); + if(StringUtils.isNotBlank(originalFormat)) { + XmlWriterUtil.writeFullElement(xmlw, "format", format); + } + }*/ + } + } + if (formatsWritten) { + xmlw.writeEndElement(); + } + + } - } else { - // xmlMetadata = xmlMetadata.replace( - // "${relatedIdentifier}", - // ""); + private void writeVersion(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + Dataset d = null; + if(dvObject instanceof Dataset) { + d = (Dataset) dvObject; + } else if (dvObject instanceof DataFile) { + d = ((DataFile) dvObject).getOwner(); + } + if(d !=null) { + DatasetVersion dv = d.getLatestVersionForCopy(); + String version = dv.getFriendlyVersionNumber(); + if (StringUtils.isNotBlank(version)) { + XmlWriterUtil.writeFullElement(xmlw, "version", version); } } + } + private void writeAccessRights(XMLStreamWriter xmlw, DvObject dvObject) { + // rightsList -> rights with rightsURI attribute + xmlw.writeStartElement("rightsList"); // + + // set terms from the info:eu-repo-Access-Terms vocabulary + writeRightsHeader(xmlw, language); + boolean restrict = false; + boolean closed = false; + + if (datasetVersionDTO.isFileAccessRequest()) { + restrict = true; + } + if (datasetVersionDTO.getFiles() != null) { + for (int i = 0; i < datasetVersionDTO.getFiles().size(); i++) { + if (datasetVersionDTO.getFiles().get(i).isRestricted()) { + closed = true; + break; + } + } + } + + if (restrict && closed) { + xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/restrictedAccess"); + } else if (!restrict && closed) { + xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/closedAccess"); + } else { + xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/openAccess"); + } + xmlw.writeEndElement(); // + + writeRightsHeader(xmlw, language); + if (datasetVersionDTO.getLicense() != null) { + xmlw.writeAttribute("rightsURI", datasetVersionDTO.getLicense().getUri()); + xmlw.writeCharacters(datasetVersionDTO.getLicense().getName()); + } + xmlw.writeEndElement(); // + xmlw.writeEndElement(); // + } } \ No newline at end of file From 04b367f641fe8e8da77c8eceafd7a012985f9a1f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 15 May 2024 10:41:07 -0400 Subject: [PATCH 080/486] only one field to look for --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 943693355a3..d723cf3d528 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1344,6 +1344,7 @@ public List getGeographicCoverage() { } } + break; } return geoCoverages; } From 003431dde79bc7b80077c1aa6d0998329e85f4e3 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 15 May 2024 10:41:30 -0400 Subject: [PATCH 081/486] use common util method --- .../edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 0c861cb6c09..c0e3057696a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -1715,12 +1715,12 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // range.getBeginValueType().getName().equals(DB_VAR_RANGE_TYPE_POINT)) { if (range.getBoolean("hasBeginValueType") && range.getBoolean("isBeginValueTypePoint")) { if (range.containsKey("beginValue")) { - invalrngAdded = XmlWriterUtil.checkParentElement(xmlw, "invalrng", invalrngAdded); + invalrngAdded = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "invalrng", invalrngAdded); xmlw.writeEmptyElement("item"); XmlWriterUtil.writeAttribute(xmlw, "VALUE", range.getString("beginValue")); } } else { - invalrngAdded = XmlWriterUtil.checkParentElement(xmlw, "invalrng", invalrngAdded); + invalrngAdded = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "invalrng", invalrngAdded); xmlw.writeEmptyElement("range"); if (range.getBoolean("hasBeginValueType") && range.containsKey("beginValue")) { if (range.getBoolean("isBeginValueTypeMin")) { From fea2f5e01d9a9e3d37f1714e3aaba8dc32f84ab0 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 15 May 2024 10:42:29 -0400 Subject: [PATCH 082/486] access rights descriptions, geolocations, funding refs --- .../pidproviders/doi/XmlMetadataTemplate.java | 297 +++++++++++++++--- 1 file changed, 249 insertions(+), 48 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index eb2465257a1..be55b7a4837 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -41,16 +41,20 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.api.Util; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; import edu.harvard.iq.dataverse.api.dto.FieldDTO; import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; +import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.export.DDIExporter; +import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.xml.XmlPrinter; @@ -117,34 +121,6 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM writeDescriptions(xmlw, dvObject); writeGeoLocations(xmlw, dvObject); writeFundingReferences(xmlw, dvObject); - - StringBuilder contributorsElement = new StringBuilder(); - if (doiMetadata.getContacts() != null) { - for (String[] contact : doiMetadata.getContacts()) { - if (!contact[0].isEmpty()) { - contributorsElement.append("" - + StringEscapeUtils.escapeXml10(contact[0]) + ""); - if (!contact[1].isEmpty()) { - contributorsElement.append("" + StringEscapeUtils.escapeXml10(contact[1]) + ""); - } - contributorsElement.append(""); - } - } - } - - if (doiMetadata.getProducers() != null) { - for (String[] producer : doiMetadata.getProducers()) { - contributorsElement.append("" + StringEscapeUtils.escapeXml10(producer[0]) - + ""); - if (!producer[1].isEmpty()) { - contributorsElement.append("" + StringEscapeUtils.escapeXml10(producer[1]) + ""); - } - contributorsElement.append(""); - } - } - - String relIdentifiers = generateRelatedIdentifiers(dvObject); - } /** @@ -1025,42 +1001,267 @@ private void writeVersion(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStr } - private void writeAccessRights(XMLStreamWriter xmlw, DvObject dvObject) { + private void writeAccessRights(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { // rightsList -> rights with rightsURI attribute xmlw.writeStartElement("rightsList"); // // set terms from the info:eu-repo-Access-Terms vocabulary - writeRightsHeader(xmlw, language); - boolean restrict = false; + xmlw.writeStartElement("rights"); // + DatasetVersion dv = null; boolean closed = false; + if (dvObject instanceof Dataset d) { + dv = d.getLatestVersionForCopy(); + closed = dv.isHasRestrictedFile(); + } else if (dvObject instanceof DataFile df) { + dv = df.getOwner().getLatestVersionForCopy(); - if (datasetVersionDTO.isFileAccessRequest()) { - restrict = true; - } - if (datasetVersionDTO.getFiles() != null) { - for (int i = 0; i < datasetVersionDTO.getFiles().size(); i++) { - if (datasetVersionDTO.getFiles().get(i).isRestricted()) { - closed = true; - break; - } - } + closed = df.isRestricted(); } + TermsOfUseAndAccess terms = dv.getTermsOfUseAndAccess(); + boolean requestsAllowed = terms.isFileAccessRequest(); + License license = terms.getLicense(); - if (restrict && closed) { + if (requestsAllowed && closed) { xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/restrictedAccess"); - } else if (!restrict && closed) { + } else if (!requestsAllowed && closed) { xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/closedAccess"); } else { xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/openAccess"); } xmlw.writeEndElement(); // + xmlw.writeStartElement("rights"); // - writeRightsHeader(xmlw, language); - if (datasetVersionDTO.getLicense() != null) { - xmlw.writeAttribute("rightsURI", datasetVersionDTO.getLicense().getUri()); - xmlw.writeCharacters(datasetVersionDTO.getLicense().getName()); + if (license != null) { + xmlw.writeAttribute("rightsURI", license.getUri().toString()); + xmlw.writeCharacters(license.getName()); + } else { + xmlw.writeAttribute("rightsURI", DatasetUtil.getLicenseURI(dv)); + xmlw.writeCharacters(BundleUtil.getStringFromBundle("license.custom.description")); + ; } xmlw.writeEndElement(); // - xmlw.writeEndElement(); // + xmlw.writeEndElement(); // + } + + private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // descriptions -> description with descriptionType attribute + boolean descriptionsWritten = false; + List descriptions = null; + DatasetVersion dv = null; + + if (dvObject instanceof Dataset d) { + dv = d.getLatestVersionForCopy(); + dv.getDescriptions(); + } else if (dvObject instanceof DataFile df) { + String description = df.getDescription(); + if (description != null) { + descriptions = new ArrayList(); + descriptions.add(description); + } + } + Map attributes = new HashMap(); + attributes.put("descriptionType", "Abstract"); + for (String description : descriptions) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, description); + ; + } + + if (dv != null) { + List dsfs = dv.getDatasetFields(); + + for (DatasetField dsf : dsfs) { + + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.software: + attributes.clear(); + attributes.put("descriptionType", "TechnicalInfo"); + List dsfcvs = dsf.getDatasetFieldCompoundValues(); + for (DatasetFieldCompoundValue dsfcv : dsfcvs) { + + String softwareName = null; + String softwareVersion = null; + List childDsfs = dsfcv.getChildDatasetFields(); + for (DatasetField childDsf : childDsfs) { + if (DatasetFieldConstant.softwareName.equals(childDsf.getDatasetFieldType().getName())) { + softwareName = childDsf.getValue(); + } else if (DatasetFieldConstant.softwareVersion.equals(childDsf.getDatasetFieldType().getName())) { + softwareVersion = childDsf.getValue(); + } + } + if (StringUtils.isNotBlank(softwareName)) { + if (StringUtils.isNotBlank(softwareVersion)) { + } + softwareName = softwareName + ", " + softwareVersion; + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, softwareName); + } + } + break; + case DatasetFieldConstant.originOfSources: + case DatasetFieldConstant.characteristicOfSources: + case DatasetFieldConstant.accessToSources: + attributes.clear(); + attributes.put("descriptionType", "Methods"); + String method = dsf.getValue(); + if (StringUtils.isNotBlank(method)) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, method); + + } + break; + case DatasetFieldConstant.series: + attributes.clear(); + attributes.put("descriptionType", "SeriesInformation"); + dsfcvs = dsf.getDatasetFieldCompoundValues(); + for (DatasetFieldCompoundValue dsfcv : dsfcvs) { + List childDsfs = dsfcv.getChildDatasetFields(); + for (DatasetField childDsf : childDsfs) { + + if (DatasetFieldConstant.seriesInformation.equals(childDsf.getDatasetFieldType().getName())) { + String seriesInformation = childDsf.getValue(); + if (StringUtils.isNotBlank(seriesInformation)) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, seriesInformation); + } + break; + } + } + } + break; + case DatasetFieldConstant.notesText: + attributes.clear(); + attributes.put("descriptionType", "Other"); + String notesText = dsf.getValue(); + if (StringUtils.isNotBlank(notesText)) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, notesText); + } + break; + + } + } + + } + + if (descriptionsWritten) { + xmlw.writeEndElement(); // + } + } + + private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + if (dvObject instanceof Dataset d) { + boolean geoLocationsWritten = false; + DatasetVersion dv = d.getLatestVersionForCopy(); + + List places = dv.getGeographicCoverage(); + if (places != null && !places.isEmpty()) { + // geoLocationPlace + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocation", geoLocationsWritten); + + for (String[] place : places) { + ArrayList placeList = new ArrayList(); + for (String placePart : place) { + placeList.add(placePart); + } + XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", Strings.join(placeList, ", ")); + } + } + boolean boundingBoxFound = false; + boolean productionPlaceFound = false; + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.geographicBoundingBox: + boundingBoxFound = true; + for (DatasetFieldCompoundValue dsfcv : dsf.getDatasetFieldCompoundValues()) { + List childDsfs = dsfcv.getChildDatasetFields(); + String nLatitude = null; + String sLatitude = null; + String eLongitude = null; + String wLongitude = null; + for (DatasetField childDsf : childDsfs) { + switch (childDsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.northLatitude: + nLatitude = childDsf.getValue(); + break; + case DatasetFieldConstant.southLatitude: + sLatitude = childDsf.getValue(); + break; + case DatasetFieldConstant.eastLongitude: + eLongitude = childDsf.getValue(); + break; + case DatasetFieldConstant.westLongitude: + wLongitude = childDsf.getValue(); + + } + } + if (StringUtils.isNoneBlank(wLongitude, eLongitude, nLatitude, sLatitude)) { + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocation", geoLocationsWritten); + if (wLongitude.equals(eLongitude) && nLatitude.equals(sLatitude)) { + // A point + xmlw.writeStartElement("geoLocationPoint"); + XmlWriterUtil.writeFullElement(xmlw, "pointLongitude", eLongitude); + XmlWriterUtil.writeFullElement(xmlw, "pointLatitude", sLatitude); + xmlw.writeEndElement(); + } else { + // A box + xmlw.writeStartElement("geoLocationBox"); + XmlWriterUtil.writeFullElement(xmlw, "westBoundLongitude", wLongitude); + XmlWriterUtil.writeFullElement(xmlw, "eastBoundLongitude", eLongitude); + XmlWriterUtil.writeFullElement(xmlw, "southBoundLatitude", sLatitude); + XmlWriterUtil.writeFullElement(xmlw, "northBoundLatitude", nLatitude); + xmlw.writeEndElement(); + + } + } + } + case DatasetFieldConstant.productionPlace: + productionPlaceFound = true; + // geoLocationPlace + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocation", geoLocationsWritten); + List prodPlaces = dsf.getValues(); + for (String prodPlace : prodPlaces) { + XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", prodPlace); + } + break; + } + if (boundingBoxFound && productionPlaceFound) { + break; + } + } + if (geoLocationsWritten) { + xmlw.writeEndElement(); // + } + } + + } + + + private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // fundingReferences -> fundingReference -> funderName, awardNumber + boolean fundingReferenceWritten = false; + DatasetVersion dv = null; + if (dvObject instanceof Dataset d) { + dv = d.getLatestVersionForCopy(); + } else if (dvObject instanceof DataFile df) { + dv = df.getOwner().getLatestVersionForCopy(); + } + if (dv != null) { + List funders = dv.getFunders(); + if (!funders.isEmpty()) { + + for (String funder : funders) { + if (!StringUtils.isBlank(funder)) { + fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); + xmlw.writeStartElement("fundingReference"); // + XmlWriterUtil.writeFullElement(xmlw, "funderName", funder); + xmlw.writeEndElement(); // + } + } + if (fundingReferenceWritten) { + xmlw.writeEndElement(); // + } + } + } } } \ No newline at end of file From 3c52b6a2031a55c8840948681930a1824b02820b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 17 May 2024 15:36:18 -0400 Subject: [PATCH 083/486] altTitles npe --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index be55b7a4837..a2c744be2ed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -138,7 +138,7 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String language) throws XMLStreamException { String title = doiMetadata.getTitle(); String subTitle = null; - List altTitles = null; + List altTitles = new ArrayList<>(); // Only Datasets can have a subtitle or alternative titles if (dvObject instanceof Dataset d) { DatasetVersion dv = d.getLatestVersion(); From bab2a0d270b766916dfd9578fd25ab05332f0958 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Sat, 18 May 2024 12:25:07 -0400 Subject: [PATCH 084/486] fixes and test --- .../pidproviders/doi/XmlMetadataTemplate.java | 64 ++++++----- .../doi/datacite/XmlMetadataTemplateTest.java | 108 ++++++++++++++++++ 2 files changed, 144 insertions(+), 28 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index a2c744be2ed..e9b7b0faa26 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -21,6 +21,7 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; + import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -84,6 +85,7 @@ public String generateXML(DvObject dvObject) { generateXML(dvObject, outputStream); String xml = outputStream.toString(); + logger.info(xml); return XmlPrinter.prettyPrintXml(xml); } catch (XMLStreamException | IOException e) { logger.severe("Unable to generate DataCite XML for DOI: " + dvObject.getGlobalId().asString() + " : " + e.getMessage()); @@ -98,10 +100,11 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM String metadataLanguage = null; // when set, otherwise = language? XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream); xmlw.writeStartElement("resource"); + xmlw.writeDefaultNamespace(XML_NAMESPACE); xmlw.writeAttribute("xmlns:xsi", XML_XSI); xmlw.writeAttribute("xsi:schemaLocation", XML_SCHEMA_LOCATION); - + writeIdentifier(xmlw, dvObject); writeCreators(xmlw, doiMetadata.getAuthors()); writeTitles(xmlw, dvObject, language); @@ -121,6 +124,8 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM writeDescriptions(xmlw, dvObject); writeGeoLocations(xmlw, dvObject); writeFundingReferences(xmlw, dvObject); + xmlw.writeEndElement(); + xmlw.flush(); } /** @@ -726,32 +731,34 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) } } } - if (!altPids.isEmpty()) { + + if (altPids != null && !altPids.isEmpty()) { alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternativeIdentifiers", alternatesWritten); - } - for (AlternativePersistentIdentifier altPid : altPids) { - String identifierType = null; - String identifier = null; - switch (altPid.getProtocol()) { - case AbstractDOIProvider.DOI_PROTOCOL: - identifierType = AbstractDOIProvider.DOI_PROTOCOL.toUpperCase(); - identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); - break; - case HandlePidProvider.HDL_PROTOCOL: - identifierType = "Handle"; - identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); - break; - default: - // The AlternativePersistentIdentifier class isn't really ready for anything but - // doi or handle pids, but will add this as a default. - identifierType = ":unav"; - identifier = altPid.getAuthority() + altPid.getIdentifier(); - break; - } - attributes.put("alternativeIdentifierType", identifierType); - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); + for (AlternativePersistentIdentifier altPid : altPids) { + String identifierType = null; + String identifier = null; + switch (altPid.getProtocol()) { + case AbstractDOIProvider.DOI_PROTOCOL: + identifierType = AbstractDOIProvider.DOI_PROTOCOL.toUpperCase(); + identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); + break; + case HandlePidProvider.HDL_PROTOCOL: + identifierType = "Handle"; + identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); + break; + default: + // The AlternativePersistentIdentifier class isn't really ready for anything but + // doi or handle pids, but will add this as a default. + identifierType = ":unav"; + identifier = altPid.getAuthority() + altPid.getIdentifier(); + break; + } + attributes.put("alternativeIdentifierType", identifierType); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); + } } + for (DatasetFieldCompoundValue otherIdentifier : otherIdentifiers) { String identifierType = null; String identifier = null; @@ -1061,10 +1068,11 @@ private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject) throws X } Map attributes = new HashMap(); attributes.put("descriptionType", "Abstract"); - for (String description : descriptions) { - descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, description); - ; + if (descriptions != null) { + for (String description : descriptions) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, description); + } } if (dv != null) { diff --git a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java new file mode 100644 index 00000000000..e576398a474 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java @@ -0,0 +1,108 @@ +package edu.harvard.iq.dataverse.pidproviders.doi.datacite; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldConstant; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.DatasetVersion.VersionState; +import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.TermsOfUseAndAccess; +import edu.harvard.iq.dataverse.branding.BrandingUtil; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactoryBean; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.doi.DoiMetadata; +import edu.harvard.iq.dataverse.pidproviders.doi.XmlMetadataTemplate; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.extension.ExtendWith; + +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; + +@ExtendWith(MockitoExtension.class) +@LocalJvmSettings +@JvmSetting(key = JvmSettings.SITE_URL, value = "https://example.com") + +public class XmlMetadataTemplateTest { + + static DataverseServiceBean dataverseSvc; + static SettingsServiceBean settingsSvc; + static PidProviderFactoryBean pidService; + static final String DEFAULT_NAME = "LibraScholar"; + + @BeforeAll + public static void setupMocks() { + dataverseSvc = Mockito.mock(DataverseServiceBean.class); + settingsSvc = Mockito.mock(SettingsServiceBean.class); + BrandingUtil.injectServices(dataverseSvc, settingsSvc); + + // initial values (needed here for other tests where this method is reused!) + Mockito.when(settingsSvc.getValueForKey(SettingsServiceBean.Key.InstallationName)).thenReturn(DEFAULT_NAME); + Mockito.when(dataverseSvc.getRootDataverseName()).thenReturn(DEFAULT_NAME); + + pidService = Mockito.mock(PidProviderFactoryBean.class); + Mockito.when(pidService.isGlobalIdLocallyUnique(any(GlobalId.class))).thenReturn(true); + Mockito.when(pidService.getProducer()).thenReturn("RootDataverse"); + + } + + /** + */ + @Test + public void testDataCiteXMLCreation() throws IOException { + DoiMetadata doiMetadata = new DoiMetadata(); + doiMetadata.setTitle("A Title"); + List creators = new ArrayList(); + creators.add("Alice"); + creators.add("Bob"); + doiMetadata.setCreators(creators); + doiMetadata.setPublisher("Dataverse"); + XmlMetadataTemplate template = new XmlMetadataTemplate(doiMetadata); + + Dataset d = new Dataset(); + GlobalId doi = new GlobalId("doi", "10.5072", "FK2/ABCDEF", null, null, null); + d.setGlobalId(doi); + DatasetVersion dv = new DatasetVersion(); + TermsOfUseAndAccess toa = new TermsOfUseAndAccess(); + toa.setTermsOfUse("Some terms"); + dv.setTermsOfUseAndAccess(toa); + dv.setDataset(d); + DatasetFieldType primitiveDSFType = new DatasetFieldType(DatasetFieldConstant.title, + DatasetFieldType.FieldType.TEXT, false); + DatasetField testDatasetField = new DatasetField(); + + dv.setVersionState(VersionState.DRAFT); + + testDatasetField.setDatasetVersion(dv); + testDatasetField.setDatasetFieldType(primitiveDSFType); + testDatasetField.setSingleValue("First Title"); + List fields = new ArrayList<>(); + fields.add(testDatasetField); + dv.setDatasetFields(fields); + ArrayList dsvs = new ArrayList<>(); + dsvs.add(0, dv); + d.setVersions(dsvs); + + String xml = template.generateXML(d); + System.out.println("Output is " + xml); + + } + +} From 3cca63d2f6ff4052852876d9ccfe52424d2da615 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 20 May 2024 14:55:46 -0400 Subject: [PATCH 085/486] fix for empty rel pub entry --- .../pidproviders/doi/XmlMetadataTemplate.java | 179 +++++++++--------- 1 file changed, 94 insertions(+), 85 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index e9b7b0faa26..8725feca546 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -21,7 +21,6 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; - import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -97,14 +96,14 @@ public String generateXML(DvObject dvObject) { private void generateXML(DvObject dvObject, OutputStream outputStream) throws XMLStreamException { // Could/should use dataset metadata language for metadata from DvObject itself? String language = null; // machine locale? e.g. for Publisher which is global - String metadataLanguage = null; // when set, otherwise = language? + String metadataLanguage = null; // when set, otherwise = language? XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream); xmlw.writeStartElement("resource"); - + xmlw.writeDefaultNamespace(XML_NAMESPACE); xmlw.writeAttribute("xmlns:xsi", XML_XSI); xmlw.writeAttribute("xsi:schemaLocation", XML_SCHEMA_LOCATION); - + writeIdentifier(xmlw, dvObject); writeCreators(xmlw, doiMetadata.getAuthors()); writeTitles(xmlw, dvObject, language); @@ -242,7 +241,7 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList) String nameIdentifierScheme = null; if (StringUtils.isNotBlank(author.getIdValue()) && StringUtils.isNotBlank(author.getIdType())) { nameIdentifier = author.getIdValue(); - if(nameIdentifier != null) { + if (nameIdentifier != null) { // Normalizes to the URL form of the identifier, returns null if the identifier // is not valid given the type nameIdentifier = author.getIdentifierAsUrl(); @@ -392,14 +391,16 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt } /** - * 7, Contributor (with optional given name, family name, name identifier - * and affiliation sub-properties) + * 7, Contributor (with optional given name, family name, name identifier and + * affiliation sub-properties) * * @see #writeContributorElement(javax.xml.stream.XMLStreamWriter, - * java.lang.String, java.lang.String, java.lang.String) + * java.lang.String, java.lang.String, java.lang.String) * - * @param xmlw The stream writer - * @param dvObject The Dataset/DataFile + * @param xmlw + * The stream writer + * @param dvObject + * The Dataset/DataFile * @throws XMLStreamException */ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { @@ -410,11 +411,11 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X List compoundContributors = new ArrayList(); // Dataset Subject= Dataverse subject, keyword, and/or topic classification // fields - //ToDo Include for files? - /*if(dvObject instanceof DataFile df) { - dvObject = df.getOwner(); - }*/ - + // ToDo Include for files? + /* + * if(dvObject instanceof DataFile df) { dvObject = df.getOwner(); } + */ + if (dvObject instanceof Dataset d) { DatasetVersion dv = d.getLatestVersionForCopy(); for (DatasetField dsf : dv.getDatasetFields()) { @@ -433,8 +434,7 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X } } } - - + for (DatasetFieldCompoundValue producerFieldValue : compoundProducers) { String producer = null; String affiliation = null; @@ -457,7 +457,7 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X } } - + for (DatasetFieldCompoundValue distributorFieldValue : compoundDistributors) { String distributor = null; String affiliation = null; @@ -517,7 +517,7 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X break; } } - // QDR - doesn't have Funder in the contributor type list. + // QDR - doesn't have Funder in the contributor type list. // Using a string isn't i18n if (StringUtils.isNotBlank(contributor) && !StringUtils.equalsIgnoreCase("Funder", contributorType)) { contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); @@ -526,7 +526,7 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X } } - + if (contributorsCreated) { xmlw.writeEndElement(); } @@ -568,7 +568,7 @@ private void writeEntityElements(XMLStreamWriter xmlw, String elementName, Strin logger.warning("DatasetAuthor.getIdentifierAsUrl returned a Malformed URL: " + nameIdentifier); } } - + if (StringUtils.isNotBlank(affiliation)) { attributeMap.clear(); if (affiliation.startsWith("https://ror.org/")) { @@ -584,8 +584,10 @@ private void writeEntityElements(XMLStreamWriter xmlw, String elementName, Strin /** * 8, Date (with type sub-property) (R) * - * @param xmlw The Steam writer - * @param dvObject The dataset/datafile + * @param xmlw + * The Steam writer + * @param dvObject + * The dataset/datafile * @throws XMLStreamException */ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { @@ -668,15 +670,16 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea } } - // 9, Language (MA), language private void writeLanguage(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { - //Currently not supported. Spec indicates one 'primary' language. Could send the first entry in DatasetFieldConstant.language or send iff there is only one entry, and/or default to the machine's default lang? + // Currently not supported. Spec indicates one 'primary' language. Could send + // the first entry in DatasetFieldConstant.language or send iff there is only + // one entry, and/or default to the machine's default lang? return; } - - // 10, ResourceType (with mandatory general type - // description sub- property) (M) + + // 10, ResourceType (with mandatory general type + // description sub- property) (M) private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { List kindOfDataValues = new ArrayList(); Map attributes = new HashMap(); @@ -711,8 +714,10 @@ private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws X /** * 11 AlternateIdentifier (with type sub-property) (O) * - * @param xmlw The Steam writer - * @param dvObject The dataset/datafile + * @param xmlw + * The Steam writer + * @param dvObject + * The dataset/datafile * @throws XMLStreamException */ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { @@ -731,7 +736,7 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) } } } - + if (altPids != null && !altPids.isEmpty()) { alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternativeIdentifiers", alternatesWritten); for (AlternativePersistentIdentifier altPid : altPids) { @@ -788,8 +793,10 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) /** * 12, RelatedIdentifier (with type and relation type sub-properties) (R) * - * @param xmlw The Steam writer - * @param dvObject the dataset/datafile + * @param xmlw + * The Steam writer + * @param dvObject + * the dataset/datafile * @throws XMLStreamException */ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { @@ -813,6 +820,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th * way those two fields are used for all identifier types. The code here is * ~best effort to interpret those fields. */ + pubIdType = getCanonicalPublicationType(pubIdType); // Prefer url if set, otherwise check identifier @@ -821,49 +829,52 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th relatedIdentifier = identifier; } // For types where we understand the protocol, get the canonical form - switch (pubIdType) { - case "DOI": - if (!relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http")) { - relatedIdentifier = "doi:" + relatedIdentifier; - } - try { - GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); - relatedIdentifier = pid.asRawIdentifier(); - } catch (IllegalArgumentException e) { - relatedIdentifier = null; - } - break; - case "Handle": - if (!relatedIdentifier.startsWith("hdl:") || relatedIdentifier.startsWith("http")) { - relatedIdentifier = "hdl:" + relatedIdentifier; - } - try { - GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); - relatedIdentifier = pid.asRawIdentifier(); - } catch (IllegalArgumentException e) { - relatedIdentifier = null; - } - break; - case "URL": - break; - default: - - // For non-URL types, if a URL is given, split the string to get a schemeUri - try { - URL relatedUrl = new URL(relatedIdentifier); - String protocol = relatedUrl.getProtocol(); - String authority = relatedUrl.getAuthority(); - String site = String.format("%s://%s", protocol, authority); - relatedIdentifier = relatedIdentifier.substring(site.length()); - attributes.put("schemeURI", site); - } catch (MalformedURLException e) { - // Just an identifier + if (pubIdType != null) { + switch (pubIdType) { + case "DOI": + if (!relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http")) { + relatedIdentifier = "doi:" + relatedIdentifier; + } + try { + GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); + relatedIdentifier = pid.asRawIdentifier(); + } catch (IllegalArgumentException e) { + relatedIdentifier = null; + } + break; + case "Handle": + if (!relatedIdentifier.startsWith("hdl:") || relatedIdentifier.startsWith("http")) { + relatedIdentifier = "hdl:" + relatedIdentifier; + } + try { + GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); + relatedIdentifier = pid.asRawIdentifier(); + } catch (IllegalArgumentException e) { + relatedIdentifier = null; + } + break; + case "URL": + break; + default: + + // For non-URL types, if a URL is given, split the string to get a schemeUri + try { + URL relatedUrl = new URL(relatedIdentifier); + String protocol = relatedUrl.getProtocol(); + String authority = relatedUrl.getAuthority(); + String site = String.format("%s://%s", protocol, authority); + relatedIdentifier = relatedIdentifier.substring(site.length()); + attributes.put("schemeURI", site); + } catch (MalformedURLException e) { + // Just an identifier + } } } - if (StringUtils.isNotBlank(relatedIdentifier)) { // Still have a valid entry - attributes.put("relatedIdentifierType", pubIdType); + if (pubIdType != null) { + attributes.put("relatedIdentifierType", pubIdType); + } attributes.put("relationType", "IsSupplementTo"); relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, relatedIdentifier); @@ -905,7 +916,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th static HashMap relatedIdentifierTypeMap = new HashMap(); - + private static String getCanonicalPublicationType(String pubIdType) { if (relatedIdentifierTypeMap.isEmpty()) { relatedIdentifierTypeMap.put("ARK".toLowerCase(), "ARK"); @@ -976,13 +987,12 @@ private void writeFormats(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStr formatsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "formats", formatsWritten); XmlWriterUtil.writeFullElement(xmlw, "format", format); } - /* Should original formats be sent? What about original sizes above? - if(dataFile.isTabularData()) { - String originalFormat = dataFile.getOriginalFileFormat(); - if(StringUtils.isNotBlank(originalFormat)) { - XmlWriterUtil.writeFullElement(xmlw, "format", format); - } - }*/ + /* + * Should original formats be sent? What about original sizes above? + * if(dataFile.isTabularData()) { String originalFormat = + * dataFile.getOriginalFileFormat(); if(StringUtils.isNotBlank(originalFormat)) + * { XmlWriterUtil.writeFullElement(xmlw, "format", format); } } + */ } } if (formatsWritten) { @@ -993,19 +1003,19 @@ private void writeFormats(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStr private void writeVersion(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { Dataset d = null; - if(dvObject instanceof Dataset) { + if (dvObject instanceof Dataset) { d = (Dataset) dvObject; } else if (dvObject instanceof DataFile) { d = ((DataFile) dvObject).getOwner(); } - if(d !=null) { + if (d != null) { DatasetVersion dv = d.getLatestVersionForCopy(); - String version = dv.getFriendlyVersionNumber(); + String version = dv.getFriendlyVersionNumber(); if (StringUtils.isNotBlank(version)) { XmlWriterUtil.writeFullElement(xmlw, "version", version); } } - + } private void writeAccessRights(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { @@ -1244,7 +1254,6 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X } - private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { // fundingReferences -> fundingReference -> funderName, awardNumber boolean fundingReferenceWritten = false; From 30c80a9a5a27c51d8ca8130375d358aadb447a4f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 20 May 2024 15:32:13 -0400 Subject: [PATCH 086/486] bugs: remove bad nesting, dupe values --- .../pidproviders/doi/XmlMetadataTemplate.java | 51 +++++++++---------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 8725feca546..74da57094c4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -158,20 +158,22 @@ private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String languag if (StringUtils.isNotBlank(title) || StringUtils.isNotBlank(subTitle) || (altTitles != null && !String.join("", altTitles).isBlank())) { xmlw.writeStartElement("titles"); - XmlWriterUtil.writeFullElement(xmlw, "title", title, language); - + if (StringUtils.isNotBlank(title)) { + XmlWriterUtil.writeFullElement(xmlw, "title", title, language); + } Map attributes = new HashMap(); - attributes.put("titleType", "Subtitle"); - - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, title); - - attributes.clear(); - attributes.put("titleType", "AlternativeTitle"); - for (String altTitle : altTitles) { - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, altTitle); + if (StringUtils.isNotBlank(subTitle)) { + attributes.put("titleType", "Subtitle"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, subTitle); + } + if ((altTitles != null && !String.join("", altTitles).isBlank())) { + attributes.clear(); + attributes.put("titleType", "AlternativeTitle"); + for (String altTitle : altTitles) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, altTitle); + } } - xmlw.writeEndElement(); } } @@ -250,12 +252,9 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList) } if (StringUtils.isNotBlank(creatorName)) { - xmlw.writeStartElement("creator"); // JsonObject creatorObj = PersonOrOrgUtil.getPersonOrOrganization(creatorName, false, StringUtils.containsIgnoreCase(nameIdentifierScheme, "orcid")); - writeEntityElements(xmlw, "creator", null, creatorObj, affiliation, nameIdentifier, nameIdentifierScheme); - xmlw.writeEndElement(); // } else { @@ -693,21 +692,21 @@ private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws X kindOfDataValues = dsf.getControlledVocabularyValues(); break; } - - if (kindOfDataValues.isEmpty()) { - // Write an attribute only element if there are no kindOfData values. - xmlw.writeStartElement("resourceType"); - xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); - xmlw.writeEndElement(); - } else { - for (ControlledVocabularyValue kindOfDataValue : kindOfDataValues) { - String resourceType = kindOfDataValue.getStrValue(); - if (StringUtils.isNotBlank(resourceType)) { - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, resourceType); - } + } + if (kindOfDataValues.isEmpty()) { + // Write an attribute only element if there are no kindOfData values. + xmlw.writeStartElement("resourceType"); + xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); + xmlw.writeEndElement(); + } else { + for (ControlledVocabularyValue kindOfDataValue : kindOfDataValues) { + String resourceType = kindOfDataValue.getStrValue(); + if (StringUtils.isNotBlank(resourceType)) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, resourceType); } } } + } } From a2acdebbac758317b5d2d07fc1af01859f8bfa85 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 20 May 2024 16:54:02 -0400 Subject: [PATCH 087/486] add XML Validation to test --- .../iq/dataverse/util/xml/XmlValidator.java | 5 +++ .../doi/datacite/XmlMetadataTemplateTest.java | 45 +++++++++++++++++-- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java index 586ca50b6fd..cec64ab95b7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java @@ -24,7 +24,12 @@ public class XmlValidator { private static final Logger logger = Logger.getLogger(XmlValidator.class.getCanonicalName()); public static boolean validateXmlSchema(String fileToValidate, URL schemaToValidateAgainst) throws MalformedURLException, SAXException, IOException { + Source xmlFile = new StreamSource(new File(fileToValidate)); + return validateXmlSchema(xmlFile, schemaToValidateAgainst); + } + + public static boolean validateXmlSchema(Source xmlFile, URL schemaToValidateAgainst) throws MalformedURLException, SAXException, IOException { SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); Schema schema = schemaFactory.newSchema(schemaToValidateAgainst); Validator validator = schema.newValidator(); diff --git a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java index e576398a474..c1bbc3bebc1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java @@ -1,9 +1,11 @@ package edu.harvard.iq.dataverse.pidproviders.doi.datacite; import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetAuthor; import edu.harvard.iq.dataverse.DatasetField; import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetFieldType.FieldType; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DatasetVersion.VersionState; import edu.harvard.iq.dataverse.DataverseServiceBean; @@ -19,12 +21,17 @@ import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.testing.JvmSetting; import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; +import edu.harvard.iq.dataverse.util.xml.XmlValidator; import java.io.IOException; +import java.io.StringReader; +import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.Map; +import javax.xml.transform.stream.StreamSource; + import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.BeforeAll; @@ -32,6 +39,7 @@ import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; +import org.xml.sax.SAXException; import static org.junit.jupiter.api.Assertions.*; import static org.mockito.ArgumentMatchers.any; @@ -69,10 +77,32 @@ public static void setupMocks() { public void testDataCiteXMLCreation() throws IOException { DoiMetadata doiMetadata = new DoiMetadata(); doiMetadata.setTitle("A Title"); - List creators = new ArrayList(); - creators.add("Alice"); - creators.add("Bob"); - doiMetadata.setCreators(creators); + DatasetFieldType dft = new DatasetFieldType(DatasetFieldConstant.authorName, FieldType.TEXT, false); + dft.setDisplayFormat("#VALUE"); + DatasetFieldType dft2 = new DatasetFieldType(DatasetFieldConstant.authorAffiliation, FieldType.TEXT, false); + dft2.setDisplayFormat("#VALUE"); + DatasetAuthor alice = new DatasetAuthor(); + DatasetField df1 = new DatasetField(); + df1.setDatasetFieldType(dft); + df1.setSingleValue("Alice"); + alice.setName(df1); + DatasetField df2 = new DatasetField(); + df2.setDatasetFieldType(dft2); + df2.setSingleValue("Harvard University"); + alice.setAffiliation(df2); + DatasetAuthor bob = new DatasetAuthor(); + DatasetField df3 = new DatasetField(); + df3.setDatasetFieldType(dft); + df3.setSingleValue("Bob"); + bob.setName(df3); + DatasetField df4 = new DatasetField(); + df4.setDatasetFieldType(dft2); + df4.setSingleValue("QDR"); + bob.setAffiliation(df4); + List authors = new ArrayList<>(); + authors.add(alice); + authors.add(bob); + doiMetadata.setAuthors(authors); doiMetadata.setPublisher("Dataverse"); XmlMetadataTemplate template = new XmlMetadataTemplate(doiMetadata); @@ -102,6 +132,13 @@ public void testDataCiteXMLCreation() throws IOException { String xml = template.generateXML(d); System.out.println("Output is " + xml); + try { + StreamSource source = new StreamSource(new StringReader(xml)); + source.setSystemId("DataCite XML for test dataset"); + assertTrue(XmlValidator.validateXmlSchema(source, new URL("https://schema.datacite.org/meta/kernel-4/metadata.xsd"))); + } catch (SAXException e) { + System.out.println("Invalid schema: " + e.getMessage()); + } } From 3ec7a0b680ec5f04d650e830bb391c6be1f176f2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 23 May 2024 16:24:09 -0400 Subject: [PATCH 088/486] fix contributorType --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 74da57094c4..6e4d81d6248 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -535,7 +535,7 @@ private void writeEntityElements(XMLStreamWriter xmlw, String elementName, Strin xmlw.writeStartElement(elementName); Map attributeMap = new HashMap(); if (StringUtils.isNotBlank(type)) { - attributeMap.put("contributorType", type); + xmlw.writeAttribute("contributorType", type); } // person name=, if (entityObject.getBoolean("isPerson")) { From 842dee678530391264b2869ec71ab70258901189 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 23 May 2024 17:21:57 -0400 Subject: [PATCH 089/486] add geolocations element and multiple geolocation --- .../pidproviders/doi/XmlMetadataTemplate.java | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 6e4d81d6248..d0986616bb4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1174,15 +1174,18 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X List places = dv.getGeographicCoverage(); if (places != null && !places.isEmpty()) { // geoLocationPlace - geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocation", geoLocationsWritten); - + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); for (String[] place : places) { + xmlw.startElement("geoLocation"); // + ArrayList placeList = new ArrayList(); for (String placePart : place) { placeList.add(placePart); } XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", Strings.join(placeList, ", ")); + xmlw.endElement(); // } + } boolean boundingBoxFound = false; boolean productionPlaceFound = false; @@ -1213,7 +1216,8 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X } } if (StringUtils.isNoneBlank(wLongitude, eLongitude, nLatitude, sLatitude)) { - geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocation", geoLocationsWritten); + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); + xmlw.startElement("geoLocation"); // if (wLongitude.equals(eLongitude) && nLatitude.equals(sLatitude)) { // A point xmlw.writeStartElement("geoLocationPoint"); @@ -1230,15 +1234,18 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X xmlw.writeEndElement(); } + xmlw.endElement(); // } } case DatasetFieldConstant.productionPlace: productionPlaceFound = true; // geoLocationPlace - geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocation", geoLocationsWritten); + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); List prodPlaces = dsf.getValues(); for (String prodPlace : prodPlaces) { + xmlw.startElement("geoLocation"); // XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", prodPlace); + xmlw.endElement(); // } break; } From 81a7c4a946ee4e54ae91913c6de6857fb6a553ba Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 23 May 2024 17:29:19 -0400 Subject: [PATCH 090/486] typos --- .../pidproviders/doi/XmlMetadataTemplate.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index d0986616bb4..127a1930860 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1176,14 +1176,14 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X // geoLocationPlace geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); for (String[] place : places) { - xmlw.startElement("geoLocation"); // + xmlw.writeStartElement("geoLocation"); // ArrayList placeList = new ArrayList(); for (String placePart : place) { placeList.add(placePart); } XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", Strings.join(placeList, ", ")); - xmlw.endElement(); // + xmlw.writeEndElement(); // } } @@ -1217,7 +1217,7 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X } if (StringUtils.isNoneBlank(wLongitude, eLongitude, nLatitude, sLatitude)) { geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); - xmlw.startElement("geoLocation"); // + xmlw.writeStartElement("geoLocation"); // if (wLongitude.equals(eLongitude) && nLatitude.equals(sLatitude)) { // A point xmlw.writeStartElement("geoLocationPoint"); @@ -1234,7 +1234,7 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X xmlw.writeEndElement(); } - xmlw.endElement(); // + xmlw.writeEndElement(); // } } case DatasetFieldConstant.productionPlace: @@ -1243,9 +1243,9 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); List prodPlaces = dsf.getValues(); for (String prodPlace : prodPlaces) { - xmlw.startElement("geoLocation"); // + xmlw.writeStartElement("geoLocation"); // XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", prodPlace); - xmlw.endElement(); // + xmlw.writeEndElement(); // } break; } From ed5eab0deb487ebfbb53157a40e2cf409d5f40ab Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 09:53:12 -0400 Subject: [PATCH 091/486] try execute inside the main method trying to avoid a separate tx boundary --- .../command/impl/CuratePublishedDatasetVersionCommand.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java index f83041d87bd..fbff40a9c80 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java @@ -167,9 +167,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { DeleteDatasetVersionCommand cmd; cmd = new DeleteDatasetVersionCommand(getRequest(), savedDataset); - ctxt.engine().submit(cmd); - // Running the command above reindexes the dataset, so we don't need to do it - // again in here. + cmd.execute(ctxt); // And update metadata at PID provider ctxt.engine().submit( From 39673f05e6d4394d3549c58a5d487a9c732113c2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 10:41:42 -0400 Subject: [PATCH 092/486] Fix subject, keyword --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 127a1930860..85e28670cfc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -305,8 +305,10 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt // fields if (dvObject instanceof Dataset d) { DatasetVersion dv = d.getLatestVersionForCopy(); - dv.getDatasetSubjects(); for (DatasetField dsf : dv.getDatasetFields()) { + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.subject)) { + subjects.addAll(dsf.getValues()); + } if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.keyword)) { compoundKeywords = dsf.getDatasetFieldCompoundValues(); } else if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.topicClassification)) { @@ -330,7 +332,7 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt for (DatasetField subField : keywordFieldValue.getChildDatasetFields()) { switch (subField.getDatasetFieldType().getName()) { - case DatasetFieldConstant.keyword: + case DatasetFieldConstant.keywordValue: keyword = subField.getValue(); break; case DatasetFieldConstant.keywordVocab: From 36097d61bbf0c92aab48db01ff02e1c23b86be1a Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 10:41:51 -0400 Subject: [PATCH 093/486] fix geo coverage --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index d723cf3d528..6648419216d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1342,9 +1342,8 @@ public List getGeographicCoverage() { } geoCoverages.add(coverageItem); } - + break; } - break; } return geoCoverages; } From a5d3b3e5a40b049176a6c3a205b1199c2117694e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 11:47:04 -0400 Subject: [PATCH 094/486] adjust funders to include grant number, add xml escaping for description --- .../pidproviders/doi/XmlMetadataTemplate.java | 70 +++++++++++++++---- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 85e28670cfc..3b6a5cb2906 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1082,7 +1082,7 @@ private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject) throws X if (descriptions != null) { for (String description : descriptions) { descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, description); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, StringEscapeUtils.escapeXml10(description)); } } @@ -1272,21 +1272,67 @@ private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) thr dv = df.getOwner().getLatestVersionForCopy(); } if (dv != null) { - List funders = dv.getFunders(); - if (!funders.isEmpty()) { - - for (String funder : funders) { - if (!StringUtils.isBlank(funder)) { - fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); - xmlw.writeStartElement("fundingReference"); // - XmlWriterUtil.writeFullElement(xmlw, "funderName", funder); - xmlw.writeEndElement(); // + List retList = new ArrayList<>(); + for (DatasetField dsf : dv.getDatasetFields()) { + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.contributor)) { + boolean addFunder = false; + for (DatasetFieldCompoundValue contributorValue : dsf.getDatasetFieldCompoundValues()) { + String contributorName = null; + String contributorType = null; + for (DatasetField subField : contributorValue.getChildDatasetFields()) { + if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.contributorName)) { + contributorName = subField.getDisplayValue(); + } + if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.contributorType)) { + contributorType = subField.getRawValue(); + } + } + // SEK 02/12/2019 move outside loop to prevent contrib type to carry over to + // next contributor + // TODO: Consider how this will work in French, Chinese, etc. + if ("Funder".equals(contributorType)) { + if (!StringUtils.isBlank(contributorName)) { + fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); + xmlw.writeStartElement("fundingReference"); // + XmlWriterUtil.writeFullElement(xmlw, "funderName", contributorName); + xmlw.writeEndElement(); // + } + } } } - if (fundingReferenceWritten) { - xmlw.writeEndElement(); // + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.grantNumber)) { + for (DatasetFieldCompoundValue grantObject : dsf.getDatasetFieldCompoundValues()) { + String funder = null; + String awardNumber = null; + for (DatasetField subField : grantObject.getChildDatasetFields()) { + // It would be nice to do something with grantNumberValue (the actual number) + // but schema.org doesn't support it. + if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.grantNumberAgency)) { + String grantAgency = subField.getDisplayValue(); + funder = grantAgency; + } else if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.grantNumberValue)) { + String grantNumberValue = subField.getDisplayValue(); + awardNumber = grantNumberValue; + } + } + if (!StringUtils.isBlank(funder)) { + fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); + xmlw.writeStartElement("fundingReference"); // + XmlWriterUtil.writeFullElement(xmlw, "funderName", funder); + if (StringUtils.isNotBlank(awardNumber)) { + writeFullElement(xmlw, null, "awardNumber", null, awardNumber); + } + xmlw.writeEndElement(); // + } + + } } } + + if (fundingReferenceWritten) { + xmlw.writeEndElement(); // + } + } } } \ No newline at end of file From 8a12444d3b835a1df989bc674337897b7feaf1d2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 11:49:03 -0400 Subject: [PATCH 095/486] bug: add dataset descriptions --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 3b6a5cb2906..564768991cb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1069,7 +1069,7 @@ private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject) throws X if (dvObject instanceof Dataset d) { dv = d.getLatestVersionForCopy(); - dv.getDescriptions(); + descriptions = dv.getDescriptions(); } else if (dvObject instanceof DataFile df) { String description = df.getDescription(); if (description != null) { From f3e5dc1d00e1d68a734b4c593ae5b874bb5d14a2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 11:54:50 -0400 Subject: [PATCH 096/486] typo, add xml escape for funder --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 564768991cb..a4fd4585028 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1294,7 +1294,7 @@ private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) thr if (!StringUtils.isBlank(contributorName)) { fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); xmlw.writeStartElement("fundingReference"); // - XmlWriterUtil.writeFullElement(xmlw, "funderName", contributorName); + XmlWriterUtil.writeFullElement(xmlw, "funderName", StringEscapeUtils.escapeXml10(contributorName)); xmlw.writeEndElement(); // } } @@ -1318,9 +1318,9 @@ private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) thr if (!StringUtils.isBlank(funder)) { fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); xmlw.writeStartElement("fundingReference"); // - XmlWriterUtil.writeFullElement(xmlw, "funderName", funder); + XmlWriterUtil.writeFullElement(xmlw, "funderName", StringEscapeUtils.escapeXml10(funder)); if (StringUtils.isNotBlank(awardNumber)) { - writeFullElement(xmlw, null, "awardNumber", null, awardNumber); + writeFullElement(xmlw, null, "awardNumber", StringEscapeUtils.escapeXml10(awardNumber)); } xmlw.writeEndElement(); // } From 5610c950212f2d3d80d7144c37c98e6cd0b71c5e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 12:00:09 -0400 Subject: [PATCH 097/486] still typo --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index a4fd4585028..e2883cad1f9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1320,7 +1320,7 @@ private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) thr xmlw.writeStartElement("fundingReference"); // XmlWriterUtil.writeFullElement(xmlw, "funderName", StringEscapeUtils.escapeXml10(funder)); if (StringUtils.isNotBlank(awardNumber)) { - writeFullElement(xmlw, null, "awardNumber", StringEscapeUtils.escapeXml10(awardNumber)); + XmlWriterUtil.writeFullElement(xmlw, "awardNumber", StringEscapeUtils.escapeXml10(awardNumber)); } xmlw.writeEndElement(); // } From 7148b03360b00363f6550aed5d5d851ab7c2c356 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 14:14:48 -0400 Subject: [PATCH 098/486] mark contact as deprecated - unused --- .../java/edu/harvard/iq/dataverse/DatasetFieldConstant.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java index 22bad42df96..c3e385dcff2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java @@ -156,6 +156,8 @@ public class DatasetFieldConstant implements java.io.Serializable { public final static String confidentialityDeclaration="confidentialityDeclaration"; public final static String specialPermissions="specialPermissions"; public final static String restrictions="restrictions"; + @Deprecated + //Doesn't appear to be used and is not datasetContact public final static String contact="contact"; public final static String citationRequirements="citationRequirements"; public final static String depositorRequirements="depositorRequirements"; From 0470459316b3338bd940ea2f9afcf9ec1430eab1 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 14:14:59 -0400 Subject: [PATCH 099/486] more fixes --- .../pidproviders/doi/XmlMetadataTemplate.java | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index e2883cad1f9..fd5a4ecf7fb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -427,7 +427,7 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X case DatasetFieldConstant.distributor: compoundDistributors = dsf.getDatasetFieldCompoundValues(); break; - case DatasetFieldConstant.contact: + case DatasetFieldConstant.datasetContact: compoundContacts = dsf.getDatasetFieldCompoundValues(); break; case DatasetFieldConstant.contributor: @@ -638,7 +638,7 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea } if (releaseDate != null) { - String date = Util.getDateTimeFormat().format(releaseDate); + String date = Util.getDateFormat().format(releaseDate); datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Available"); @@ -660,6 +660,14 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea } } if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { + if(StringUtils.isNotBlank(startDate)) { + Date start = Util.getDateTimeFormat().parse(startDate); + startDate = Util.getDateFormat().format(start); + } + if(StringUtils.isNotBlank(endDate)) { + Date end = Util.getDateTimeFormat().parse(endDate); + endDate = Util.getDateFormat().format(end); + } datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Collected"); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, (startDate + "/" + endDate).trim()); @@ -675,14 +683,14 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea private void writeLanguage(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { // Currently not supported. Spec indicates one 'primary' language. Could send // the first entry in DatasetFieldConstant.language or send iff there is only - // one entry, and/or default to the machine's default lang? + // one entry, and/or default to the machine's default lang, or the dataverse metadatalang? return; } // 10, ResourceType (with mandatory general type // description sub- property) (M) private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { - List kindOfDataValues = new ArrayList(); + List kindOfDataValues = new ArrayList(); Map attributes = new HashMap(); attributes.put("resourceTypeGeneral", "Dataset"); @@ -691,7 +699,7 @@ private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws X for (DatasetField dsf : dv.getDatasetFields()) { switch (dsf.getDatasetFieldType().getName()) { case DatasetFieldConstant.kindOfData: - kindOfDataValues = dsf.getControlledVocabularyValues(); + kindOfDataValues.addAll(dsf.getValues()); break; } } @@ -701,8 +709,7 @@ private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws X xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); xmlw.writeEndElement(); } else { - for (ControlledVocabularyValue kindOfDataValue : kindOfDataValues) { - String resourceType = kindOfDataValue.getStrValue(); + for (String resourceType : kindOfDataValues) { if (StringUtils.isNotBlank(resourceType)) { XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, resourceType); } @@ -821,14 +828,16 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th * way those two fields are used for all identifier types. The code here is * ~best effort to interpret those fields. */ + logger.info("Found relpub: " + pubIdType + " " + identifier + " " + url); pubIdType = getCanonicalPublicationType(pubIdType); - +logger.info("Canonical type: " + pubIdType); // Prefer url if set, otherwise check identifier String relatedIdentifier = url; if (StringUtils.isBlank(relatedIdentifier)) { relatedIdentifier = identifier; } + logger.info("Related identifier: " + relatedIdentifier); // For types where we understand the protocol, get the canonical form if (pubIdType != null) { switch (pubIdType) { @@ -836,12 +845,15 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th if (!relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http")) { relatedIdentifier = "doi:" + relatedIdentifier; } + logger.info("Intermediate Related identifier: " + relatedIdentifier); try { GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); relatedIdentifier = pid.asRawIdentifier(); } catch (IllegalArgumentException e) { + logger.warning("Invalid DOI: " + e.getLocalizedMessage()); relatedIdentifier = null; } + logger.info("Final Related identifier: " + relatedIdentifier); break; case "Handle": if (!relatedIdentifier.startsWith("hdl:") || relatedIdentifier.startsWith("http")) { From c0265da5324c6f68e9356a44261ce6b166ded6b8 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 14:22:29 -0400 Subject: [PATCH 100/486] catch parseexception --- .../pidproviders/doi/XmlMetadataTemplate.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index fd5a4ecf7fb..9ed417e77ce 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -5,6 +5,7 @@ import java.io.OutputStream; import java.net.MalformedURLException; import java.net.URL; +import java.text.ParseException; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; @@ -661,12 +662,20 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea } if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { if(StringUtils.isNotBlank(startDate)) { + try { Date start = Util.getDateTimeFormat().parse(startDate); startDate = Util.getDateFormat().format(start); + } catch (ParseException e) { + logger.warning("Could not parse date: " + startDate); + } } if(StringUtils.isNotBlank(endDate)) { + try { Date end = Util.getDateTimeFormat().parse(endDate); - endDate = Util.getDateFormat().format(end); + endDate = Util.getDateFormat().format(end); + } catch (ParseException e) { + logger.warning("Could not parse date: " + endDate); + }; } datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Collected"); From 2ff867850500aa9b2eb5712348b65cf48ed4b917 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 14:52:42 -0400 Subject: [PATCH 101/486] fix alternateIdentifier, related PID parsing, series --- .../pidproviders/doi/XmlMetadataTemplate.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 9ed417e77ce..7f861b3e42d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -755,7 +755,7 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) } if (altPids != null && !altPids.isEmpty()) { - alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternativeIdentifiers", alternatesWritten); + alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternateIdentifiers", alternatesWritten); for (AlternativePersistentIdentifier altPid : altPids) { String identifierType = null; String identifier = null; @@ -775,7 +775,7 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) identifier = altPid.getAuthority() + altPid.getIdentifier(); break; } - attributes.put("alternativeIdentifierType", identifierType); + attributes.put("alternateIdentifierType", identifierType); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); } @@ -795,9 +795,9 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) break; } } - attributes.put("alternativeIdentifierType", identifierType); + attributes.put("alternateIdentifierType", identifierType); if (!StringUtils.isBlank(identifier)) { - alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternativeIdentifiers", alternatesWritten); + alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternateIdentifiers", alternatesWritten); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); } @@ -851,7 +851,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th if (pubIdType != null) { switch (pubIdType) { case "DOI": - if (!relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http")) { + if (!relatedIdentifier.startsWith("doi:") || !relatedIdentifier.startsWith("http")) { relatedIdentifier = "doi:" + relatedIdentifier; } logger.info("Intermediate Related identifier: " + relatedIdentifier); @@ -865,7 +865,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th logger.info("Final Related identifier: " + relatedIdentifier); break; case "Handle": - if (!relatedIdentifier.startsWith("hdl:") || relatedIdentifier.startsWith("http")) { + if (!relatedIdentifier.startsWith("hdl:") || !relatedIdentifier.startsWith("http")) { relatedIdentifier = "hdl:" + relatedIdentifier; } try { @@ -1158,7 +1158,7 @@ private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject) throws X List childDsfs = dsfcv.getChildDatasetFields(); for (DatasetField childDsf : childDsfs) { - if (DatasetFieldConstant.seriesInformation.equals(childDsf.getDatasetFieldType().getName())) { + if (DatasetFieldConstant.seriesName.equals(childDsf.getDatasetFieldType().getName())) { String seriesInformation = childDsf.getValue(); if (StringUtils.isNotBlank(seriesInformation)) { descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); From 182f3d7bca310c54eb44f0452c76671b26b03824 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 14:53:47 -0400 Subject: [PATCH 102/486] catch PID update exception to avoid corrupt dataset --- .../CuratePublishedDatasetVersionCommand.java | 45 ++++++++++++------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java index fbff40a9c80..dd8b19e0c3b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.datavariable.VarGroup; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; @@ -151,7 +152,11 @@ public Dataset execute(CommandContext ctxt) throws CommandException { tempDataset.setThumbnailFile(publishedFmd.getDataFile()); } } - + if(logger.isLoggable(Level.FINE)) { + for(FileMetadata fmd: updateVersion.getFileMetadatas()) { + logger.fine("Id: " + fmd.getId() + " label: " + fmd.getLabel()); + } + } // Update modification time on the published version and the dataset updateVersion.setLastUpdateTime(getTimestamp()); tempDataset.setModificationTime(getTimestamp()); @@ -170,28 +175,38 @@ public Dataset execute(CommandContext ctxt) throws CommandException { cmd.execute(ctxt); // And update metadata at PID provider - ctxt.engine().submit( - new UpdateDvObjectPIDMetadataCommand(savedDataset, getRequest())); - - //And the exported metadata files try { - ExportService instance = ExportService.getInstance(); - instance.exportAllFormats(getDataset()); - } catch (ExportException ex) { - // Just like with indexing, a failure to export is not a fatal condition. - logger.log(Level.WARNING, "Curate Published DatasetVersion: exception while exporting metadata files:{0}", ex.getMessage()); + ctxt.engine().submit( + new UpdateDvObjectPIDMetadataCommand(savedDataset, getRequest())); + } catch (CommandException ex) { + //Make this non-fatal as after the DeleteDatasetVersionCommand, we can't roll back - for some reason no datasetfields remain in the DB + //(The old version doesn't need them and the new version doesn't get updated to include them?) + logger.log(Level.WARNING, "Curate Published DatasetVersion: exception while updating PID metadata:{0}", ex.getMessage()); } - - // Update so that getDataset() in updateDatasetUser will get the up-to-date copy // (with no draft version) setDataset(savedDataset); updateDatasetUser(ctxt); - - - return savedDataset; } + @Override + public boolean onSuccess(CommandContext ctxt, Object r) { + boolean retVal = true; + Dataset d = (Dataset) r; + + ctxt.index().asyncIndexDataset(d, true); + + // And the exported metadata files + try { + ExportService instance = ExportService.getInstance(); + instance.exportAllFormats(d); + } catch (ExportException ex) { + // Just like with indexing, a failure to export is not a fatal condition. + retVal = false; + logger.log(Level.WARNING, "Curate Published DatasetVersion: exception while exporting metadata files:{0}", ex.getMessage()); + } + return retVal; + } } From be903555bcc3c21169b3a7343a076c331d308bf6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 16:24:40 -0400 Subject: [PATCH 103/486] try long sleep --- .../java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index dab0ff43fcf..29de42f3578 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -944,9 +944,9 @@ public void callFinalizePublishCommandAsynchronously(Long datasetId, CommandCont // portion of the PublishDatasetCommand. I'm going to leave the 1 second // sleep below, for just in case reasons: -- L.A. try { - Thread.sleep(1000); + Thread.sleep(5000); } catch (Exception ex) { - logger.warning("Failed to sleep for a second."); + logger.warning("Failed to sleep for five seconds."); } logger.fine("Running FinalizeDatasetPublicationCommand, asynchronously"); Dataset theDataset = find(datasetId); From e458e8ca5c14f48d8da94d83c2cb40e76e9198d6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 16:53:28 -0400 Subject: [PATCH 104/486] set dv released before pid publicize, go back to short time --- .../java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 4 ++-- .../command/impl/FinalizeDatasetPublicationCommand.java | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 29de42f3578..18bd6dc74ee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -944,9 +944,9 @@ public void callFinalizePublishCommandAsynchronously(Long datasetId, CommandCont // portion of the PublishDatasetCommand. I'm going to leave the 1 second // sleep below, for just in case reasons: -- L.A. try { - Thread.sleep(5000); + Thread.sleep(1000); } catch (Exception ex) { - logger.warning("Failed to sleep for five seconds."); + logger.warning("Failed to sleep for one second."); } logger.fine("Running FinalizeDatasetPublicationCommand, asynchronously"); Dataset theDataset = find(datasetId); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index 287e877f6e0..299bb3168de 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -211,7 +211,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { if (theDataset.getLatestVersion().getVersionState() != RELEASED) { // some imported datasets may already be released. - + theDataset.getLatestVersion().setVersionState(RELEASED); if (!datasetExternallyReleased) { publicizeExternalIdentifier(theDataset, ctxt); // Will throw a CommandException, unless successful. @@ -220,7 +220,6 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // a failure - it will remove any locks, and it will send a // proper notification to the user(s). } - theDataset.getLatestVersion().setVersionState(RELEASED); } final Dataset ds = ctxt.em().merge(theDataset); From 27fe7b4d0dad6124d5e036c8ec5d36b31a371a9b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 17:03:13 -0400 Subject: [PATCH 105/486] always use latest version for copy --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 7f861b3e42d..0adc9984b3d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -146,7 +146,7 @@ private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String languag List altTitles = new ArrayList<>(); // Only Datasets can have a subtitle or alternative titles if (dvObject instanceof Dataset d) { - DatasetVersion dv = d.getLatestVersion(); + DatasetVersion dv = d.getLatestVersionForCopy(); Optional subTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.subTitle)).findFirst(); if (subTitleField.isPresent()) { subTitle = subTitleField.get().getValue(); From 00a383007686b49b938f88415b476e377d054f98 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 09:53:40 -0400 Subject: [PATCH 106/486] handle deaccession, fix relatedIDtype for files --- .../edu/harvard/iq/dataverse/DataFile.java | 19 +++ .../pidproviders/doi/XmlMetadataTemplate.java | 117 +++++++++++------- 2 files changed, 90 insertions(+), 46 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index 29a4a14c021..1a610d9ea6e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -1123,4 +1123,23 @@ private boolean tagExists(String tagLabel) { } return false; } + + public boolean isDeaccessioned() { + // return true, if all published versions were deaccessioned + boolean inDeaccessionedVersions = false; + for (FileMetadata fmd : getFileMetadatas()) { + DatasetVersion testDsv = fmd.getDatasetVersion(); + if (testDsv.isReleased()) { + return false; + } + // Also check for draft version + if (testDsv.isDraft()) { + return false; + } + if (testDsv.isDeaccessioned()) { + inDeaccessionedVersions = true; + } + } + return inDeaccessionedVersions; // since any published version would have already returned + } } // end of class diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 0adc9984b3d..96ee84fe13b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -100,30 +100,41 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM String metadataLanguage = null; // when set, otherwise = language? XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream); xmlw.writeStartElement("resource"); - + boolean deaccessioned=false; + if(dvObject instanceof Dataset d) { + deaccessioned=d.isDeaccessioned(); + } else if (dvObject instanceof DataFile df) { + deaccessioned = df.isDeaccessioned(); + } xmlw.writeDefaultNamespace(XML_NAMESPACE); xmlw.writeAttribute("xmlns:xsi", XML_XSI); xmlw.writeAttribute("xsi:schemaLocation", XML_SCHEMA_LOCATION); writeIdentifier(xmlw, dvObject); - writeCreators(xmlw, doiMetadata.getAuthors()); - writeTitles(xmlw, dvObject, language); - writePublisher(xmlw, dvObject); - writePublicationYear(xmlw, dvObject); - writeSubjects(xmlw, dvObject); - writeContributors(xmlw, dvObject); - writeDates(xmlw, dvObject); - writeLanguage(xmlw, dvObject); + writeCreators(xmlw, doiMetadata.getAuthors(), deaccessioned); + writeTitles(xmlw, dvObject, language, deaccessioned); + writePublisher(xmlw, dvObject, deaccessioned); + writePublicationYear(xmlw, dvObject, deaccessioned); + if (!deaccessioned) { + writeSubjects(xmlw, dvObject); + writeContributors(xmlw, dvObject); + writeDates(xmlw, dvObject); + writeLanguage(xmlw, dvObject); + } writeResourceType(xmlw, dvObject); - writeAlternateIdentifiers(xmlw, dvObject); - writeRelatedIdentifiers(xmlw, dvObject); - writeSize(xmlw, dvObject); - writeFormats(xmlw, dvObject); - writeVersion(xmlw, dvObject); - writeAccessRights(xmlw, dvObject); - writeDescriptions(xmlw, dvObject); - writeGeoLocations(xmlw, dvObject); - writeFundingReferences(xmlw, dvObject); + if (!deaccessioned) { + writeAlternateIdentifiers(xmlw, dvObject); + writeRelatedIdentifiers(xmlw, dvObject); + writeSize(xmlw, dvObject); + writeFormats(xmlw, dvObject); + writeVersion(xmlw, dvObject); + writeAccessRights(xmlw, dvObject); + } + writeDescriptions(xmlw, dvObject, deaccessioned); + if (!deaccessioned) { + writeGeoLocations(xmlw, dvObject); + writeFundingReferences(xmlw, dvObject); + } xmlw.writeEndElement(); xmlw.flush(); } @@ -140,23 +151,29 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM * @return * @throws XMLStreamException */ - private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String language) throws XMLStreamException { - String title = doiMetadata.getTitle(); + private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String language, boolean deaccessioned) throws XMLStreamException { + String title = null; String subTitle = null; List altTitles = new ArrayList<>(); - // Only Datasets can have a subtitle or alternative titles - if (dvObject instanceof Dataset d) { - DatasetVersion dv = d.getLatestVersionForCopy(); - Optional subTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.subTitle)).findFirst(); - if (subTitleField.isPresent()) { - subTitle = subTitleField.get().getValue(); - } - Optional altTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.alternativeTitle)).findFirst(); - if (altTitleField.isPresent()) { - altTitles = altTitleField.get().getValues(); + + if (!deaccessioned) { + doiMetadata.getTitle(); + + // Only Datasets can have a subtitle or alternative titles + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + Optional subTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.subTitle)).findFirst(); + if (subTitleField.isPresent()) { + subTitle = subTitleField.get().getValue(); + } + Optional altTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.alternativeTitle)).findFirst(); + if (altTitleField.isPresent()) { + altTitles = altTitleField.get().getValues(); + } } + } else { + title = AbstractDOIProvider.UNAVAILABLE; } - if (StringUtils.isNotBlank(title) || StringUtils.isNotBlank(subTitle) || (altTitles != null && !String.join("", altTitles).isBlank())) { xmlw.writeStartElement("titles"); if (StringUtils.isNotBlank(title)) { @@ -227,13 +244,13 @@ private void writeIdentifier(XMLStreamWriter xmlw, DvObject dvObject) throws XML * - the list of authors * @throws XMLStreamException */ - public void writeCreators(XMLStreamWriter xmlw, List authorList) throws XMLStreamException { + public void writeCreators(XMLStreamWriter xmlw, List authorList, boolean deaccessioned) throws XMLStreamException { // creators -> creator -> creatorName with nameType attribute, givenName, // familyName, nameIdentifier // write all creators xmlw.writeStartElement("creators"); // - if (authorList != null && !authorList.isEmpty()) { + if (!deaccessioned && authorList != null && !authorList.isEmpty()) { for (DatasetAuthor author : authorList) { String creatorName = StringEscapeUtils.escapeXml10(author.getName().getDisplayValue()); String affiliation = null; @@ -267,18 +284,21 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList) xmlw.writeEndElement(); // } - private void writePublisher(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + private void writePublisher(XMLStreamWriter xmlw, DvObject dvObject, boolean deaccessioned) throws XMLStreamException { // publisher should already be non null - :unav if it wasn't available + if(deaccessioned) { + doiMetadata.setPublisher(AbstractPidProvider.UNAVAILABLE); + } XmlWriterUtil.writeFullElement(xmlw, "publisher", doiMetadata.getPublisher()); } - private void writePublicationYear(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + private void writePublicationYear(XMLStreamWriter xmlw, DvObject dvObject, boolean deaccessioned) throws XMLStreamException { // Can't use "UNKNOWN" here because DataCite will respond with "[facet // 'pattern'] the value 'unknown' is not accepted by the pattern '[\d]{4}'" String pubYear = "9999"; // FIXME: Investigate why this.publisherYear is sometimes null now that pull // request #4606 has been merged. - if (doiMetadata.getPublisherYear() != null) { + if (! deaccessioned && (doiMetadata.getPublisherYear() != null)) { // Added to prevent a NullPointerException when trying to destroy datasets when // using DataCite rather than EZID. pubYear = doiMetadata.getPublisherYear(); @@ -926,6 +946,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th attributes.clear(); attributes.put("relationType", "IsPartOf"); + attributes.put("relatedIdentifierType", pubIdType); relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, pid.asRawIdentifier()); } @@ -1082,20 +1103,24 @@ private void writeAccessRights(XMLStreamWriter xmlw, DvObject dvObject) throws X xmlw.writeEndElement(); // } - private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject, boolean deaccessioned) throws XMLStreamException { // descriptions -> description with descriptionType attribute boolean descriptionsWritten = false; List descriptions = null; DatasetVersion dv = null; - - if (dvObject instanceof Dataset d) { - dv = d.getLatestVersionForCopy(); - descriptions = dv.getDescriptions(); - } else if (dvObject instanceof DataFile df) { - String description = df.getDescription(); - if (description != null) { - descriptions = new ArrayList(); - descriptions.add(description); + if(deaccessioned) { + descriptions = new ArrayList(); + descriptions.add(AbstractDOIProvider.UNAVAILABLE); + } else { + if (dvObject instanceof Dataset d) { + dv = d.getLatestVersionForCopy(); + descriptions = dv.getDescriptions(); + } else if (dvObject instanceof DataFile df) { + String description = df.getDescription(); + if (description != null) { + descriptions = new ArrayList(); + descriptions.add(description); + } } } Map attributes = new HashMap(); From 1faf0cd84c5ef52a8e88afb7989e238058870916 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 10:26:30 -0400 Subject: [PATCH 107/486] missed assignment for title --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 96ee84fe13b..b2008e14a89 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -157,7 +157,7 @@ private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String languag List altTitles = new ArrayList<>(); if (!deaccessioned) { - doiMetadata.getTitle(); + title = doiMetadata.getTitle(); // Only Datasets can have a subtitle or alternative titles if (dvObject instanceof Dataset d) { From 23dd581c98b921908b2cdcca13d32e8731c76e7e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 10:54:06 -0400 Subject: [PATCH 108/486] fix creator for deaccessioned --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index b2008e14a89..8f962204302 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -249,8 +249,10 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList, // familyName, nameIdentifier // write all creators xmlw.writeStartElement("creators"); // - - if (!deaccessioned && authorList != null && !authorList.isEmpty()) { + if(deaccessioned) { + authorList = null; + } + if (authorList != null && !authorList.isEmpty()) { for (DatasetAuthor author : authorList) { String creatorName = StringEscapeUtils.escapeXml10(author.getName().getDisplayValue()); String affiliation = null; From 3bbd2e9dfc4cd3d1e5cebd5a9cf7dbbfe52c1fa4 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 11:51:50 -0400 Subject: [PATCH 109/486] correct fix for creators when deaccessioned --- .../pidproviders/doi/XmlMetadataTemplate.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 8f962204302..a3eca9ef9a0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -250,8 +250,10 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList, // write all creators xmlw.writeStartElement("creators"); // if(deaccessioned) { + //skip the loop below authorList = null; } + boolean nothingWritten = true; if (authorList != null && !authorList.isEmpty()) { for (DatasetAuthor author : authorList) { String creatorName = StringEscapeUtils.escapeXml10(author.getName().getDisplayValue()); @@ -274,15 +276,17 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList, if (StringUtils.isNotBlank(creatorName)) { JsonObject creatorObj = PersonOrOrgUtil.getPersonOrOrganization(creatorName, false, StringUtils.containsIgnoreCase(nameIdentifierScheme, "orcid")); + nothingWritten = false; writeEntityElements(xmlw, "creator", null, creatorObj, affiliation, nameIdentifier, nameIdentifierScheme); } - else { - // Authors unavailable - XmlWriterUtil.writeFullElement(xmlw, "creator", "creatorName", AbstractPidProvider.UNAVAILABLE); - } + } } + if (nothingWritten) { + // Authors unavailable + XmlWriterUtil.writeFullElement(xmlw, "creator", "creatorName", AbstractPidProvider.UNAVAILABLE); + } xmlw.writeEndElement(); // } From 4def6da32c207223fb2cf9d5aada50f921ddd474 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 12:00:15 -0400 Subject: [PATCH 110/486] remove bad value and lang --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index a3eca9ef9a0..2d09c67fea9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -285,7 +285,7 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList, } if (nothingWritten) { // Authors unavailable - XmlWriterUtil.writeFullElement(xmlw, "creator", "creatorName", AbstractPidProvider.UNAVAILABLE); + XmlWriterUtil.writeFullElement(xmlw, "creator", AbstractPidProvider.UNAVAILABLE); } xmlw.writeEndElement(); // } From eac477ec3c2fef4f48759b110f6157081b301eff Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 12:46:07 -0400 Subject: [PATCH 111/486] add creatorName sub element for deaccession/no names case --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 2d09c67fea9..a660a80448a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -285,7 +285,9 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList, } if (nothingWritten) { // Authors unavailable - XmlWriterUtil.writeFullElement(xmlw, "creator", AbstractPidProvider.UNAVAILABLE); + xmlw.writeStartElement("creator"); + XmlWriterUtil.writeFullElement(xmlw, "creatorName", AbstractPidProvider.UNAVAILABLE); + xmlw.writeEndElement("creator"); } xmlw.writeEndElement(); // } From 154ac8a91554be29492d62454f1b0e52501b5af2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 12:49:59 -0400 Subject: [PATCH 112/486] typo --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index a660a80448a..732a633116e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -287,7 +287,7 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList, // Authors unavailable xmlw.writeStartElement("creator"); XmlWriterUtil.writeFullElement(xmlw, "creatorName", AbstractPidProvider.UNAVAILABLE); - xmlw.writeEndElement("creator"); + xmlw.writeEndElement(); } xmlw.writeEndElement(); // } From 9144f6c96ae0685a7ac719d2203f0aed3f71e85e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 13:16:32 -0400 Subject: [PATCH 113/486] fix resourceType - always 1 entry --- .../pidproviders/doi/XmlMetadataTemplate.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 732a633116e..8f6211c0730 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -736,23 +736,24 @@ private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws X for (DatasetField dsf : dv.getDatasetFields()) { switch (dsf.getDatasetFieldType().getName()) { case DatasetFieldConstant.kindOfData: - kindOfDataValues.addAll(dsf.getValues()); + List vals = dsf.getValues(); + for(String val: vals) { + if(StringUtils.isNotBlank(val)) { + kindOfDataValues.add(val); + } + } break; } } - if (kindOfDataValues.isEmpty()) { + if (!kindOfDataValues.isEmpty()) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, String.join(", ", kindOfDataValues)); + + } else { // Write an attribute only element if there are no kindOfData values. xmlw.writeStartElement("resourceType"); xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); xmlw.writeEndElement(); - } else { - for (String resourceType : kindOfDataValues) { - if (StringUtils.isNotBlank(resourceType)) { - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, resourceType); - } - } } - } } From a5870fbaf3a89e9c6100b8e1f4371caa291c1e23 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 13:26:41 -0400 Subject: [PATCH 114/486] Also handle file case for resourceType --- .../pidproviders/doi/XmlMetadataTemplate.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 8f6211c0730..7d817d57a2a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -745,16 +745,17 @@ private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws X break; } } - if (!kindOfDataValues.isEmpty()) { - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, String.join(", ", kindOfDataValues)); - - } else { - // Write an attribute only element if there are no kindOfData values. - xmlw.writeStartElement("resourceType"); - xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); - xmlw.writeEndElement(); - } } + if (!kindOfDataValues.isEmpty()) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, String.join(", ", kindOfDataValues)); + + } else { + // Write an attribute only element if there are no kindOfData values. + xmlw.writeStartElement("resourceType"); + xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); + xmlw.writeEndElement(); + } + } /** From 24db2af2bfe9564eedc1ee9aedae8b9048bd551e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 31 May 2024 14:47:06 -0400 Subject: [PATCH 115/486] missed changes --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 7d817d57a2a..f5bd009e8d7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -23,6 +23,7 @@ import javax.xml.stream.XMLStreamWriter; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.text.StringEscapeUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -39,6 +40,7 @@ import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DatasetFieldType; import edu.harvard.iq.dataverse.DatasetFieldValue; +import edu.harvard.iq.dataverse.DatasetRelPublication; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; @@ -703,7 +705,7 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea endDate = Util.getDateFormat().format(end); } catch (ParseException e) { logger.warning("Could not parse date: " + endDate); - }; + }; } datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Collected"); @@ -853,7 +855,6 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th Map attributes = new HashMap(); if (dvObject instanceof Dataset dataset) { - List relatedPublications = dataset.getLatestVersionForCopy().getRelatedPublications(); if (!relatedPublications.isEmpty()) { for (DatasetRelPublication relatedPub : relatedPublications) { @@ -967,7 +968,6 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th } } - static HashMap relatedIdentifierTypeMap = new HashMap(); private static String getCanonicalPublicationType(String pubIdType) { From f0fd61ad555369ef1af9a1529797cf8d73d6efde Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 31 May 2024 15:05:02 -0400 Subject: [PATCH 116/486] simplify - util checks for null and empty --- .../dataverse/export/ddi/DdiExportUtil.java | 46 +++++-------------- 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index c0e3057696a..f5efc448090 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -371,7 +371,7 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase private static void writeVersionStatement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException{ xmlw.writeStartElement("verStmt"); - XmlWriterUtil.writeAttribute(xmlw,"source","archive"); + xmlw.writeAttribute("source","archive"); xmlw.writeStartElement("version"); XmlWriterUtil.writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10)); XmlWriterUtil.writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); @@ -842,9 +842,7 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!authorName.isEmpty()){ xmlw.writeStartElement("AuthEnty"); - if(!authorAffiliation.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"affiliation",authorAffiliation); - } + XmlWriterUtil.writeAttribute(xmlw,"affiliation",authorAffiliation); xmlw.writeCharacters(authorName); xmlw.writeEndElement(); //AuthEnty } @@ -865,9 +863,7 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!contributorName.isEmpty()){ xmlw.writeStartElement("othId"); - if(!contributorType.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"role", contributorType); - } + XmlWriterUtil.writeAttribute(xmlw,"role", contributorType); xmlw.writeCharacters(contributorName); xmlw.writeEndElement(); //othId } @@ -907,12 +903,8 @@ private static void writeContactsElement(XMLStreamWriter xmlw, DatasetVersionDTO // TODO: Since datasetContactEmail is a required field but datasetContactName is not consider not checking if datasetContactName is empty so we can write out datasetContactEmail. if (!datasetContactName.isEmpty()){ xmlw.writeStartElement("contact"); - if(!datasetContactAffiliation.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"affiliation",datasetContactAffiliation); - } - if(!datasetContactEmail.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"email",datasetContactEmail); - } + XmlWriterUtil.writeAttribute(xmlw,"affiliation",datasetContactAffiliation); + XmlWriterUtil.writeAttribute(xmlw,"email",datasetContactEmail); xmlw.writeCharacters(datasetContactName); xmlw.writeEndElement(); //AuthEnty } @@ -957,9 +949,7 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT xmlw.writeStartElement("producer"); XmlWriterUtil.writeAttribute(xmlw, "affiliation", producerAffiliation); XmlWriterUtil.writeAttribute(xmlw, "abbr", producerAbbreviation); - /*if (!producerLogo.isEmpty()) { - XmlWriterUtil.writeAttribute(xmlw, "role", producerLogo); - }*/ + //XmlWriterUtil.writeAttribute(xmlw, "role", producerLogo); xmlw.writeCharacters(producerName); xmlw.writeEndElement(); //AuthEnty } @@ -1139,9 +1129,7 @@ private static void writeAbstractElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!descriptionText.isEmpty()){ xmlw.writeStartElement("abstract"); - if(!descriptionDate.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"date",descriptionDate); - } + XmlWriterUtil.writeAttribute(xmlw,"date",descriptionDate); if(DvObjectContainer.isMetadataLanguageSet(lang)) { xmlw.writeAttribute("xml:lang", lang); } @@ -1176,9 +1164,7 @@ private static void writeGrantElement(XMLStreamWriter xmlw, DatasetVersionDTO da } if (!grantNumber.isEmpty()){ xmlw.writeStartElement("grantNo"); - if(!grantAgency.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"agency",grantAgency); - } + XmlWriterUtil.writeAttribute(xmlw,"agency",grantAgency); xmlw.writeCharacters(grantNumber); xmlw.writeEndElement(); //grantno } @@ -1210,9 +1196,7 @@ private static void writeOtherIdElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!otherId.isEmpty()){ xmlw.writeStartElement("IDNo"); - if(!otherIdAgency.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"agency",otherIdAgency); - } + XmlWriterUtil.writeAttribute(xmlw,"agency",otherIdAgency); xmlw.writeCharacters(otherId); xmlw.writeEndElement(); //IDNo } @@ -1244,9 +1228,7 @@ private static void writeSoftwareElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!softwareName.isEmpty()){ xmlw.writeStartElement("software"); - if(!softwareVersion.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"version",softwareVersion); - } + XmlWriterUtil.writeAttribute(xmlw,"version",softwareVersion); xmlw.writeCharacters(softwareName); xmlw.writeEndElement(); //software } @@ -1359,12 +1341,8 @@ private static void writeNotesElement(XMLStreamWriter xmlw, DatasetVersionDTO da } if (!notesText.isEmpty()) { xmlw.writeStartElement("notes"); - if(!notesType.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"type",notesType); - } - if(!notesSubject.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"subject",notesSubject); - } + XmlWriterUtil.writeAttribute(xmlw,"type",notesType); + XmlWriterUtil.writeAttribute(xmlw,"subject",notesSubject); xmlw.writeCharacters(notesText); xmlw.writeEndElement(); } From ead153f502ef77258d20f6faf4a0fc8282a74687 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 10 Jun 2024 12:18:45 -0400 Subject: [PATCH 117/486] typo in DOI parsing logic --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index f5bd009e8d7..e6c1a1ae6b4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -882,7 +882,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th if (pubIdType != null) { switch (pubIdType) { case "DOI": - if (!relatedIdentifier.startsWith("doi:") || !relatedIdentifier.startsWith("http")) { + if (!(relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http"))) { relatedIdentifier = "doi:" + relatedIdentifier; } logger.info("Intermediate Related identifier: " + relatedIdentifier); From ea75216025ca358f1c9d7d3c11d324b6dbe4f0f2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 10 Jun 2024 12:58:46 -0400 Subject: [PATCH 118/486] only files in latestversionforcopy --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index e6c1a1ae6b4..ae7c21b3308 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -43,6 +43,7 @@ import edu.harvard.iq.dataverse.DatasetRelPublication; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.api.Util; @@ -934,10 +935,12 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th } } } - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { + List fmds = dataset.getLatestVersionForCopy().getFileMetadatas(); + if (!(fmds==null) && fmds.isEmpty()) { attributes.clear(); attributes.put("relationType", "HasPart"); - for (DataFile dataFile : dataset.getFiles()) { + for (FileMetadata fmd : fmds) { + DataFile dataFile = fmd.getDataFile(); GlobalId pid = dataFile.getGlobalId(); if (pid != null) { String pubIdType = getCanonicalPublicationType(pid.getProtocol()); From b6bd530db70dfbc78017445e4e2af79233b6e899 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 11 Jun 2024 16:40:58 -0400 Subject: [PATCH 119/486] fix date parsing, clear bad values --- .../dataverse/pidproviders/doi/XmlMetadataTemplate.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index ae7c21b3308..9ba1e4e3116 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -692,22 +692,27 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea } } if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { + //Minimal clean-up - useful? Parse/format would remove unused chars, and an exception would clear the date so we don't send nonsense if(StringUtils.isNotBlank(startDate)) { try { - Date start = Util.getDateTimeFormat().parse(startDate); + Date start = Util.getDateFormat().parse(startDate); startDate = Util.getDateFormat().format(start); } catch (ParseException e) { logger.warning("Could not parse date: " + startDate); + startDate = null; } } if(StringUtils.isNotBlank(endDate)) { try { - Date end = Util.getDateTimeFormat().parse(endDate); + Date end = Util.getDateFormat().parse(endDate); endDate = Util.getDateFormat().format(end); } catch (ParseException e) { logger.warning("Could not parse date: " + endDate); + endDate = null; }; } + } + if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Collected"); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, (startDate + "/" + endDate).trim()); From e1383d77d3304418ebf3decfd257ca0610994ce1 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 14 Jun 2024 13:10:04 -0400 Subject: [PATCH 120/486] relationType entry in citation block --- conf/solr/9.3.0/schema.xml | 2 + ...dataset-create-new-all-default-fields.json | 2935 ++++++++--------- scripts/api/data/metadatablocks/citation.tsv | 105 +- .../iq/dataverse/DatasetFieldConstant.java | 1 + .../dublincore/DublinCoreExportUtil.java | 35 +- .../export/openaire/OpenAireExportUtil.java | 9 +- .../java/propertyFiles/citation.properties | 2 + .../export/OpenAireExportUtilTest.java | 2 +- 8 files changed, 1502 insertions(+), 1589 deletions(-) diff --git a/conf/solr/9.3.0/schema.xml b/conf/solr/9.3.0/schema.xml index 5dde750573d..32f10d0a621 100644 --- a/conf/solr/9.3.0/schema.xml +++ b/conf/solr/9.3.0/schema.xml @@ -349,6 +349,7 @@ + @@ -589,6 +590,7 @@ + diff --git a/scripts/api/data/dataset-create-new-all-default-fields.json b/scripts/api/data/dataset-create-new-all-default-fields.json index cc856c6372f..e522ab32b1d 100644 --- a/scripts/api/data/dataset-create-new-all-default-fields.json +++ b/scripts/api/data/dataset-create-new-all-default-fields.json @@ -1,1527 +1,1410 @@ { - "datasetVersion": { - "license": { - "name": "CC0 1.0", - "uri": "http://creativecommons.org/publicdomain/zero/1.0" - }, - "metadataBlocks": { - "citation": { - "displayName": "Citation Metadata", - "fields": [ - { - "typeName": "title", - "multiple": false, - "typeClass": "primitive", - "value": "Replication Data for: Title" - }, - { - "typeName": "subtitle", - "multiple": false, - "typeClass": "primitive", - "value": "Subtitle" - }, - { - "typeName": "alternativeTitle", - "multiple": true, - "typeClass": "primitive", - "value": ["Alternative Title"] - }, - { - "typeName": "alternativeURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://AlternativeURL.org" - }, - { - "typeName": "otherId", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "otherIdAgency": { - "typeName": "otherIdAgency", - "multiple": false, - "typeClass": "primitive", - "value": "OtherIDAgency1" - }, - "otherIdValue": { - "typeName": "otherIdValue", - "multiple": false, - "typeClass": "primitive", - "value": "OtherIDIdentifier1" - } - }, - { - "otherIdAgency": { - "typeName": "otherIdAgency", - "multiple": false, - "typeClass": "primitive", - "value": "OtherIDAgency2" - }, - "otherIdValue": { - "typeName": "otherIdValue", - "multiple": false, - "typeClass": "primitive", - "value": "OtherIDIdentifier2" - } - } - ] - }, - { - "typeName": "author", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "authorName": { - "typeName": "authorName", - "multiple": false, - "typeClass": "primitive", - "value": "LastAuthor1, FirstAuthor1" - }, - "authorAffiliation": { - "typeName": "authorAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "AuthorAffiliation1" - }, - "authorIdentifierScheme": { - "typeName": "authorIdentifierScheme", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "ORCID" - }, - "authorIdentifier": { - "typeName": "authorIdentifier", - "multiple": false, - "typeClass": "primitive", - "value": "AuthorIdentifier1" - } - }, - { - "authorName": { - "typeName": "authorName", - "multiple": false, - "typeClass": "primitive", - "value": "LastAuthor2, FirstAuthor2" - }, - "authorAffiliation": { - "typeName": "authorAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "AuthorAffiliation2" - }, - "authorIdentifierScheme": { - "typeName": "authorIdentifierScheme", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "ISNI" - }, - "authorIdentifier": { - "typeName": "authorIdentifier", - "multiple": false, - "typeClass": "primitive", - "value": "AuthorIdentifier2" - } - } - ] - }, - { - "typeName": "datasetContact", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "datasetContactName": { - "typeName": "datasetContactName", - "multiple": false, - "typeClass": "primitive", - "value": "LastContact1, FirstContact1" - }, - "datasetContactAffiliation": { - "typeName": "datasetContactAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "ContactAffiliation1" - }, - "datasetContactEmail": { - "typeName": "datasetContactEmail", - "multiple": false, - "typeClass": "primitive", - "value": "ContactEmail1@mailinator.com" - } - }, - { - "datasetContactName": { - "typeName": "datasetContactName", - "multiple": false, - "typeClass": "primitive", - "value": "LastContact2, FirstContact2" - }, - "datasetContactAffiliation": { - "typeName": "datasetContactAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "ContactAffiliation2" - }, - "datasetContactEmail": { - "typeName": "datasetContactEmail", - "multiple": false, - "typeClass": "primitive", - "value": "ContactEmail2@mailinator.com" - } - } - ] - }, - { - "typeName": "dsDescription", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "dsDescriptionValue": { - "typeName": "dsDescriptionValue", - "multiple": false, - "typeClass": "primitive", - "value": "DescriptionText1" - }, - "dsDescriptionDate": { - "typeName": "dsDescriptionDate", - "multiple": false, - "typeClass": "primitive", - "value": "1000-01-01" - } - }, - { - "dsDescriptionValue": { - "typeName": "dsDescriptionValue", - "multiple": false, - "typeClass": "primitive", - "value": "DescriptionText2" - }, - "dsDescriptionDate": { - "typeName": "dsDescriptionDate", - "multiple": false, - "typeClass": "primitive", - "value": "1000-02-02" - } - } - ] - }, - { - "typeName": "subject", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Agricultural Sciences", - "Business and Management", - "Engineering", - "Law" - ] - }, - { - "typeName": "keyword", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "keywordValue": { - "typeName": "keywordValue", - "multiple": false, - "typeClass": "primitive", - "value": "KeywordTerm1" - }, - "keywordTermURI": { - "typeName": "keywordTermURI", - "multiple": false, - "typeClass": "primitive", - "value": "http://keywordTermURI1.org" - }, - "keywordVocabulary": { - "typeName": "keywordVocabulary", - "multiple": false, - "typeClass": "primitive", - "value": "KeywordVocabulary1" - }, - "keywordVocabularyURI": { - "typeName": "keywordVocabularyURI", - "multiple": false, - "typeClass": "primitive", - "value": "http://KeywordVocabularyURL1.org" - } - }, - { - "keywordValue": { - "typeName": "keywordValue", - "multiple": false, - "typeClass": "primitive", - "value": "KeywordTerm2" - }, - "keywordTermURI": { - "typeName": "keywordTermURI", - "multiple": false, - "typeClass": "primitive", - "value": "http://keywordTermURI2.org" - }, - "keywordVocabulary": { - "typeName": "keywordVocabulary", - "multiple": false, - "typeClass": "primitive", - "value": "KeywordVocabulary2" - }, - "keywordVocabularyURI": { - "typeName": "keywordVocabularyURI", - "multiple": false, - "typeClass": "primitive", - "value": "http://KeywordVocabularyURL2.org" - } - } - ] - }, - { - "typeName": "topicClassification", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "topicClassValue": { - "typeName": "topicClassValue", - "multiple": false, - "typeClass": "primitive", - "value": "Topic Classification Term1" - }, - "topicClassVocab": { - "typeName": "topicClassVocab", - "multiple": false, - "typeClass": "primitive", - "value": "Topic Classification Vocab1" - }, - "topicClassVocabURI": { - "typeName": "topicClassVocabURI", - "multiple": false, - "typeClass": "primitive", - "value": "https://TopicClassificationURL1.com" - } - }, - { - "topicClassValue": { - "typeName": "topicClassValue", - "multiple": false, - "typeClass": "primitive", - "value": "Topic Classification Term2" - }, - "topicClassVocab": { - "typeName": "topicClassVocab", - "multiple": false, - "typeClass": "primitive", - "value": "Topic Classification Vocab2" - }, - "topicClassVocabURI": { - "typeName": "topicClassVocabURI", - "multiple": false, - "typeClass": "primitive", - "value": "https://TopicClassificationURL2.com" - } - } - ] - }, - { - "typeName": "publication", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "publicationCitation": { - "typeName": "publicationCitation", - "multiple": false, - "typeClass": "primitive", - "value": "RelatedPublicationCitation1" - }, - "publicationIDType": { - "typeName": "publicationIDType", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "ark" - }, - "publicationIDNumber": { - "typeName": "publicationIDNumber", - "multiple": false, - "typeClass": "primitive", - "value": "RelatedPublicationIDNumber1" - }, - "publicationURL": { - "typeName": "publicationURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://RelatedPublicationURL1.org" - } - }, - { - "publicationCitation": { - "typeName": "publicationCitation", - "multiple": false, - "typeClass": "primitive", - "value": "RelatedPublicationCitation2" - }, - "publicationIDType": { - "typeName": "publicationIDType", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "arXiv" - }, - "publicationIDNumber": { - "typeName": "publicationIDNumber", - "multiple": false, - "typeClass": "primitive", - "value": "RelatedPublicationIDNumber2" - }, - "publicationURL": { - "typeName": "publicationURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://RelatedPublicationURL2.org" - } - } - ] - }, - { - "typeName": "notesText", - "multiple": false, - "typeClass": "primitive", - "value": "Notes1" - }, - { - "typeName": "language", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Abkhaz", - "Afar" - ] - }, - { - "typeName": "producer", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "producerName": { - "typeName": "producerName", - "multiple": false, - "typeClass": "primitive", - "value": "LastProducer1, FirstProducer1" - }, - "producerAffiliation": { - "typeName": "producerAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "ProducerAffiliation1" - }, - "producerAbbreviation": { - "typeName": "producerAbbreviation", - "multiple": false, - "typeClass": "primitive", - "value": "ProducerAbbreviation1" - }, - "producerURL": { - "typeName": "producerURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://ProducerURL1.org" - }, - "producerLogoURL": { - "typeName": "producerLogoURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://ProducerLogoURL1.org" - } - }, - { - "producerName": { - "typeName": "producerName", - "multiple": false, - "typeClass": "primitive", - "value": "LastProducer2, FirstProducer2" - }, - "producerAffiliation": { - "typeName": "producerAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "ProducerAffiliation2" - }, - "producerAbbreviation": { - "typeName": "producerAbbreviation", - "multiple": false, - "typeClass": "primitive", - "value": "ProducerAbbreviation2" - }, - "producerURL": { - "typeName": "producerURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://ProducerURL2.org" - }, - "producerLogoURL": { - "typeName": "producerLogoURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://ProducerLogoURL2.org" - } - } - ] - }, - { - "typeName": "productionDate", - "multiple": false, - "typeClass": "primitive", - "value": "1003-01-01" - }, - { - "typeName": "productionPlace", - "multiple": true, - "typeClass": "primitive", - "value": ["ProductionPlace"] - }, - { - "typeName": "contributor", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "contributorType": { - "typeName": "contributorType", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "Data Collector" - }, - "contributorName": { - "typeName": "contributorName", - "multiple": false, - "typeClass": "primitive", - "value": "LastContributor1, FirstContributor1" - } - }, - { - "contributorType": { - "typeName": "contributorType", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "Data Curator" - }, - "contributorName": { - "typeName": "contributorName", - "multiple": false, - "typeClass": "primitive", - "value": "LastContributor2, FirstContributor2" - } - } - ] - }, - { - "typeName": "grantNumber", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "grantNumberAgency": { - "typeName": "grantNumberAgency", - "multiple": false, - "typeClass": "primitive", - "value": "GrantInformationGrantAgency1" - }, - "grantNumberValue": { - "typeName": "grantNumberValue", - "multiple": false, - "typeClass": "primitive", - "value": "GrantInformationGrantNumber1" - } - }, - { - "grantNumberAgency": { - "typeName": "grantNumberAgency", - "multiple": false, - "typeClass": "primitive", - "value": "GrantInformationGrantAgency2" - }, - "grantNumberValue": { - "typeName": "grantNumberValue", - "multiple": false, - "typeClass": "primitive", - "value": "GrantInformationGrantNumber2" - } - } - ] - }, - { - "typeName": "distributor", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "distributorName": { - "typeName": "distributorName", - "multiple": false, - "typeClass": "primitive", - "value": "LastDistributor1, FirstDistributor1" - }, - "distributorAffiliation": { - "typeName": "distributorAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "DistributorAffiliation1" - }, - "distributorAbbreviation": { - "typeName": "distributorAbbreviation", - "multiple": false, - "typeClass": "primitive", - "value": "DistributorAbbreviation1" - }, - "distributorURL": { - "typeName": "distributorURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://DistributorURL1.org" - }, - "distributorLogoURL": { - "typeName": "distributorLogoURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://DistributorLogoURL1.org" - } - }, - { - "distributorName": { - "typeName": "distributorName", - "multiple": false, - "typeClass": "primitive", - "value": "LastDistributor2, FirstDistributor2" - }, - "distributorAffiliation": { - "typeName": "distributorAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "DistributorAffiliation2" - }, - "distributorAbbreviation": { - "typeName": "distributorAbbreviation", - "multiple": false, - "typeClass": "primitive", - "value": "DistributorAbbreviation2" - }, - "distributorURL": { - "typeName": "distributorURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://DistributorURL2.org" - }, - "distributorLogoURL": { - "typeName": "distributorLogoURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://DistributorLogoURL2.org" - } - } - ] - }, - { - "typeName": "distributionDate", - "multiple": false, - "typeClass": "primitive", - "value": "1004-01-01" - }, - { - "typeName": "depositor", - "multiple": false, - "typeClass": "primitive", - "value": "LastDepositor, FirstDepositor" - }, - { - "typeName": "dateOfDeposit", - "multiple": false, - "typeClass": "primitive", - "value": "1002-01-01" - }, - { - "typeName": "timePeriodCovered", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "timePeriodCoveredStart": { - "typeName": "timePeriodCoveredStart", - "multiple": false, - "typeClass": "primitive", - "value": "1005-01-01" - }, - "timePeriodCoveredEnd": { - "typeName": "timePeriodCoveredEnd", - "multiple": false, - "typeClass": "primitive", - "value": "1005-01-02" - } - }, - { - "timePeriodCoveredStart": { - "typeName": "timePeriodCoveredStart", - "multiple": false, - "typeClass": "primitive", - "value": "1005-02-01" - }, - "timePeriodCoveredEnd": { - "typeName": "timePeriodCoveredEnd", - "multiple": false, - "typeClass": "primitive", - "value": "1005-02-02" - } - } - ] - }, - { - "typeName": "dateOfCollection", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "dateOfCollectionStart": { - "typeName": "dateOfCollectionStart", - "multiple": false, - "typeClass": "primitive", - "value": "1006-01-01" - }, - "dateOfCollectionEnd": { - "typeName": "dateOfCollectionEnd", - "multiple": false, - "typeClass": "primitive", - "value": "1006-01-01" - } - }, - { - "dateOfCollectionStart": { - "typeName": "dateOfCollectionStart", - "multiple": false, - "typeClass": "primitive", - "value": "1006-02-01" - }, - "dateOfCollectionEnd": { - "typeName": "dateOfCollectionEnd", - "multiple": false, - "typeClass": "primitive", - "value": "1006-02-02" - } - } - ] - }, - { - "typeName": "kindOfData", - "multiple": true, - "typeClass": "primitive", - "value": [ - "KindOfData1", - "KindOfData2" - ] - }, - { - "typeName": "series", - "multiple": true, - "typeClass": "compound", - "value": [{ - "seriesName": { - "typeName": "seriesName", - "multiple": false, - "typeClass": "primitive", - "value": "SeriesName" - }, - "seriesInformation": { - "typeName": "seriesInformation", - "multiple": false, - "typeClass": "primitive", - "value": "SeriesInformation" - } - }] - }, - { - "typeName": "software", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "softwareName": { - "typeName": "softwareName", - "multiple": false, - "typeClass": "primitive", - "value": "SoftwareName1" - }, - "softwareVersion": { - "typeName": "softwareVersion", - "multiple": false, - "typeClass": "primitive", - "value": "SoftwareVersion1" - } - }, - { - "softwareName": { - "typeName": "softwareName", - "multiple": false, - "typeClass": "primitive", - "value": "SoftwareName2" - }, - "softwareVersion": { - "typeName": "softwareVersion", - "multiple": false, - "typeClass": "primitive", - "value": "SoftwareVersion2" - } - } - ] - }, - { - "typeName": "relatedMaterial", - "multiple": true, - "typeClass": "primitive", - "value": [ - "RelatedMaterial1", - "RelatedMaterial2" - ] - }, - { - "typeName": "relatedDatasets", - "multiple": true, - "typeClass": "primitive", - "value": [ - "RelatedDatasets1", - "RelatedDatasets2" - ] - }, - { - "typeName": "otherReferences", - "multiple": true, - "typeClass": "primitive", - "value": [ - "OtherReferences1", - "OtherReferences2" - ] - }, - { - "typeName": "dataSources", - "multiple": true, - "typeClass": "primitive", - "value": [ - "DataSources1", - "DataSources2" - ] - }, - { - "typeName": "originOfSources", - "multiple": false, - "typeClass": "primitive", - "value": "OriginOfSources" - }, - { - "typeName": "characteristicOfSources", - "multiple": false, - "typeClass": "primitive", - "value": "CharacteristicOfSourcesNoted" - }, - { - "typeName": "accessToSources", - "multiple": false, - "typeClass": "primitive", - "value": "DocumentationAndAccessToSources" - } - ] - }, - "geospatial": { - "displayName": "Geospatial Metadata", - "fields": [ - { - "typeName": "geographicCoverage", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "country": { - "typeName": "country", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "Afghanistan" - }, - "state": { - "typeName": "state", - "multiple": false, - "typeClass": "primitive", - "value": "GeographicCoverageStateProvince1" - }, - "city": { - "typeName": "city", - "multiple": false, - "typeClass": "primitive", - "value": "GeographicCoverageCity1" - }, - "otherGeographicCoverage": { - "typeName": "otherGeographicCoverage", - "multiple": false, - "typeClass": "primitive", - "value": "GeographicCoverageOther1" - } - }, - { - "country": { - "typeName": "country", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "Albania" - }, - "state": { - "typeName": "state", - "multiple": false, - "typeClass": "primitive", - "value": "GeographicCoverageStateProvince2" - }, - "city": { - "typeName": "city", - "multiple": false, - "typeClass": "primitive", - "value": "GeographicCoverageCity2" - }, - "otherGeographicCoverage": { - "typeName": "otherGeographicCoverage", - "multiple": false, - "typeClass": "primitive", - "value": "GeographicCoverageOther2" - } - } - ] - }, - { - "typeName": "geographicUnit", - "multiple": true, - "typeClass": "primitive", - "value": [ - "GeographicUnit1", - "GeographicUnit2" - ] - }, - { - "typeName": "geographicBoundingBox", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "westLongitude": { - "typeName": "westLongitude", - "multiple": false, - "typeClass": "primitive", - "value": "-72" - }, - "eastLongitude": { - "typeName": "eastLongitude", - "multiple": false, - "typeClass": "primitive", - "value": "-70" - }, - "northLatitude": { - "typeName": "northLatitude", - "multiple": false, - "typeClass": "primitive", - "value": "43" - }, - "southLatitude": { - "typeName": "southLatitude", - "multiple": false, - "typeClass": "primitive", - "value": "42" - } - }, - { - "westLongitude": { - "typeName": "westLongitude", - "multiple": false, - "typeClass": "primitive", - "value": "-18" - }, - "eastLongitude": { - "typeName": "eastLongitude", - "multiple": false, - "typeClass": "primitive", - "value": "-13" - }, - "northLatitude": { - "typeName": "northLatitude", - "multiple": false, - "typeClass": "primitive", - "value": "29" - }, - "southLatitude": { - "typeName": "southLatitude", - "multiple": false, - "typeClass": "primitive", - "value": "28" - } - } - ] - } - ] - }, - "socialscience": { - "displayName": "Social Science and Humanities Metadata", - "fields": [ - { - "typeName": "unitOfAnalysis", - "multiple": true, - "typeClass": "primitive", - "value": [ - "UnitOfAnalysis1", - "UnitOfAnalysis2" - ] - }, - { - "typeName": "universe", - "multiple": true, - "typeClass": "primitive", - "value": [ - "Universe1", - "Universe2" - ] - }, - { - "typeName": "timeMethod", - "multiple": false, - "typeClass": "primitive", - "value": "TimeMethod" - }, - { - "typeName": "dataCollector", - "multiple": false, - "typeClass": "primitive", - "value": "LastDataCollector1, FirstDataCollector1" - }, - { - "typeName": "collectorTraining", - "multiple": false, - "typeClass": "primitive", - "value": "CollectorTraining" - }, - { - "typeName": "frequencyOfDataCollection", - "multiple": false, - "typeClass": "primitive", - "value": "Frequency" - }, - { - "typeName": "samplingProcedure", - "multiple": false, - "typeClass": "primitive", - "value": "SamplingProcedure" - }, - { - "typeName": "targetSampleSize", - "multiple": false, - "typeClass": "compound", - "value": { - "targetSampleActualSize": { - "typeName": "targetSampleActualSize", - "multiple": false, - "typeClass": "primitive", - "value": "100" - }, - "targetSampleSizeFormula": { - "typeName": "targetSampleSizeFormula", - "multiple": false, - "typeClass": "primitive", - "value": "TargetSampleSizeFormula" - } - } - }, - { - "typeName": "deviationsFromSampleDesign", - "multiple": false, - "typeClass": "primitive", - "value": "MajorDeviationsForSampleDesign" - }, - { - "typeName": "collectionMode", - "multiple": true, - "typeClass": "primitive", - "value": ["CollectionMode"] - }, - { - "typeName": "researchInstrument", - "multiple": false, - "typeClass": "primitive", - "value": "TypeOfResearchInstrument" - }, - { - "typeName": "dataCollectionSituation", - "multiple": false, - "typeClass": "primitive", - "value": "CharacteristicsOfDataCollectionSituation" - }, - { - "typeName": "actionsToMinimizeLoss", - "multiple": false, - "typeClass": "primitive", - "value": "ActionsToMinimizeLosses" - }, - { - "typeName": "controlOperations", - "multiple": false, - "typeClass": "primitive", - "value": "ControlOperations" - }, - { - "typeName": "weighting", - "multiple": false, - "typeClass": "primitive", - "value": "Weighting" - }, - { - "typeName": "cleaningOperations", - "multiple": false, - "typeClass": "primitive", - "value": "CleaningOperations" - }, - { - "typeName": "datasetLevelErrorNotes", - "multiple": false, - "typeClass": "primitive", - "value": "StudyLevelErrorNotes" - }, - { - "typeName": "responseRate", - "multiple": false, - "typeClass": "primitive", - "value": "ResponseRate" - }, - { - "typeName": "samplingErrorEstimates", - "multiple": false, - "typeClass": "primitive", - "value": "EstimatesOfSamplingError" - }, - { - "typeName": "otherDataAppraisal", - "multiple": false, - "typeClass": "primitive", - "value": "OtherFormsOfDataAppraisal" - }, - { - "typeName": "socialScienceNotes", - "multiple": false, - "typeClass": "compound", - "value": { - "socialScienceNotesType": { - "typeName": "socialScienceNotesType", - "multiple": false, - "typeClass": "primitive", - "value": "NotesType" - }, - "socialScienceNotesSubject": { - "typeName": "socialScienceNotesSubject", - "multiple": false, - "typeClass": "primitive", - "value": "NotesSubject" - }, - "socialScienceNotesText": { - "typeName": "socialScienceNotesText", - "multiple": false, - "typeClass": "primitive", - "value": "NotesText" - } - } - } - ] - }, - "astrophysics": { - "displayName": "Astronomy and Astrophysics Metadata", - "fields": [ - { - "typeName": "astroType", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Image", - "Mosaic", - "EventList", - "Cube" - ] - }, - { - "typeName": "astroFacility", - "multiple": true, - "typeClass": "primitive", - "value": [ - "Facility1", - "Facility2" - ] - }, - { - "typeName": "astroInstrument", - "multiple": true, - "typeClass": "primitive", - "value": [ - "Instrument1", - "Instrument2" - ] - }, - { - "typeName": "astroObject", - "multiple": true, - "typeClass": "primitive", - "value": [ - "Object1", - "Object2" - ] - }, - { - "typeName": "resolution.Spatial", - "multiple": false, - "typeClass": "primitive", - "value": "SpatialResolution" - }, - { - "typeName": "resolution.Spectral", - "multiple": false, - "typeClass": "primitive", - "value": "SpectralResolution" - }, - { - "typeName": "resolution.Temporal", - "multiple": false, - "typeClass": "primitive", - "value": "TimeResolution" - }, - { - "typeName": "coverage.Spectral.Bandpass", - "multiple": true, - "typeClass": "primitive", - "value": [ - "Bandpass1", - "Bandpass2" - ] - }, - { - "typeName": "coverage.Spectral.CentralWavelength", - "multiple": true, - "typeClass": "primitive", - "value": [ - "3001", - "3002" - ] - }, - { - "typeName": "coverage.Spectral.Wavelength", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "coverage.Spectral.MinimumWavelength": { - "typeName": "coverage.Spectral.MinimumWavelength", - "multiple": false, - "typeClass": "primitive", - "value": "4001" - }, - "coverage.Spectral.MaximumWavelength": { - "typeName": "coverage.Spectral.MaximumWavelength", - "multiple": false, - "typeClass": "primitive", - "value": "4002" - } - }, - { - "coverage.Spectral.MinimumWavelength": { - "typeName": "coverage.Spectral.MinimumWavelength", - "multiple": false, - "typeClass": "primitive", - "value": "4003" - }, - "coverage.Spectral.MaximumWavelength": { - "typeName": "coverage.Spectral.MaximumWavelength", - "multiple": false, - "typeClass": "primitive", - "value": "4004" - } - } - ] - }, - { - "typeName": "coverage.Temporal", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "coverage.Temporal.StartTime": { - "typeName": "coverage.Temporal.StartTime", - "multiple": false, - "typeClass": "primitive", - "value": "1007-01-01" - }, - "coverage.Temporal.StopTime": { - "typeName": "coverage.Temporal.StopTime", - "multiple": false, - "typeClass": "primitive", - "value": "1007-01-02" - } - }, - { - "coverage.Temporal.StartTime": { - "typeName": "coverage.Temporal.StartTime", - "multiple": false, - "typeClass": "primitive", - "value": "1007-02-01" - }, - "coverage.Temporal.StopTime": { - "typeName": "coverage.Temporal.StopTime", - "multiple": false, - "typeClass": "primitive", - "value": "1007-02-02" - } - } - ] - }, - { - "typeName": "coverage.Spatial", - "multiple": true, - "typeClass": "primitive", - "value": [ - "SkyCoverage1", - "SkyCoverage2" - ] - }, - { - "typeName": "coverage.Depth", - "multiple": false, - "typeClass": "primitive", - "value": "200" - }, - { - "typeName": "coverage.ObjectDensity", - "multiple": false, - "typeClass": "primitive", - "value": "300" - }, - { - "typeName": "coverage.ObjectCount", - "multiple": false, - "typeClass": "primitive", - "value": "400" - }, - { - "typeName": "coverage.SkyFraction", - "multiple": false, - "typeClass": "primitive", - "value": "500" - }, - { - "typeName": "coverage.Polarization", - "multiple": false, - "typeClass": "primitive", - "value": "Polarization" - }, - { - "typeName": "redshiftType", - "multiple": false, - "typeClass": "primitive", - "value": "RedshiftType" - }, - { - "typeName": "resolution.Redshift", - "multiple": false, - "typeClass": "primitive", - "value": "600" - }, - { - "typeName": "coverage.RedshiftValue", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "coverage.Redshift.MinimumValue": { - "typeName": "coverage.Redshift.MinimumValue", - "multiple": false, - "typeClass": "primitive", - "value": "701" - }, - "coverage.Redshift.MaximumValue": { - "typeName": "coverage.Redshift.MaximumValue", - "multiple": false, - "typeClass": "primitive", - "value": "702" - } - }, - { - "coverage.Redshift.MinimumValue": { - "typeName": "coverage.Redshift.MinimumValue", - "multiple": false, - "typeClass": "primitive", - "value": "703" - }, - "coverage.Redshift.MaximumValue": { - "typeName": "coverage.Redshift.MaximumValue", - "multiple": false, - "typeClass": "primitive", - "value": "704" - } - } - ] - } - ] - }, - "biomedical": { - "displayName": "Life Sciences Metadata", - "fields": [ - { - "typeName": "studyDesignType", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Case Control", - "Cross Sectional", - "Cohort Study", - "Not Specified" - ] - }, - { - "typeName": "studyFactorType", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Age", - "Biomarkers", - "Cell Surface Markers", - "Developmental Stage" - ] - }, - { - "typeName": "studyAssayOrganism", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Arabidopsis thaliana", - "Bos taurus", - "Caenorhabditis elegans", - "Danio rerio (zebrafish)" - ] - }, - { - "typeName": "studyAssayOtherOrganism", - "multiple": true, - "typeClass": "primitive", - "value": [ - "OtherOrganism1", - "OtherOrganism2" - ] - }, - { - "typeName": "studyAssayMeasurementType", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "genome sequencing", - "cell sorting", - "clinical chemistry analysis", - "DNA methylation profiling" - ] - }, - { - "typeName": "studyAssayOtherMeasurmentType", - "multiple": true, - "typeClass": "primitive", - "value": [ - "OtherMeasurementType1", - "OtherMeasurementType2" - ] - }, - { - "typeName": "studyAssayTechnologyType", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "culture based drug susceptibility testing, single concentration", - "culture based drug susceptibility testing, two concentrations", - "culture based drug susceptibility testing, three or more concentrations (minimium inhibitory concentration measurement)", - "flow cytometry" - ] - }, - { - "typeName": "studyAssayPlatform", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "210-MS GC Ion Trap (Varian)", - "220-MS GC Ion Trap (Varian)", - "225-MS GC Ion Trap (Varian)", - "300-MS quadrupole GC/MS (Varian)" - ] - }, - { - "typeName": "studyAssayCellType", - "multiple": true, - "typeClass": "primitive", - "value": [ - "CellType1", - "CellType2" - ] - } - ] - }, - "journal": { - "displayName": "Journal Metadata", - "fields": [ - { - "typeName": "journalVolumeIssue", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "journalVolume": { - "typeName": "journalVolume", - "multiple": false, - "typeClass": "primitive", - "value": "JournalVolume1" - }, - "journalIssue": { - "typeName": "journalIssue", - "multiple": false, - "typeClass": "primitive", - "value": "JournalIssue1" - }, - "journalPubDate": { - "typeName": "journalPubDate", - "multiple": false, - "typeClass": "primitive", - "value": "1008-01-01" - } - }, - { - "journalVolume": { - "typeName": "journalVolume", - "multiple": false, - "typeClass": "primitive", - "value": "JournalVolume2" - }, - "journalIssue": { - "typeName": "journalIssue", - "multiple": false, - "typeClass": "primitive", - "value": "JournalIssue2" - }, - "journalPubDate": { - "typeName": "journalPubDate", - "multiple": false, - "typeClass": "primitive", - "value": "1008-02-01" - } - } - ] - }, - { - "typeName": "journalArticleType", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "abstract" - } - ] - } - } - } -} + "datasetVersion" : { + "license" : { + "name" : "CC0 1.0", + "uri" : "http://creativecommons.org/publicdomain/zero/1.0" + }, + "metadataBlocks" : { + "citation" : { + "displayName" : "Citation Metadata", + "fields" : [{ + "typeName" : "title", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Replication Data for: Title" + }, { + "typeName" : "subtitle", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Subtitle" + }, { + "typeName" : "alternativeTitle", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "Alternative Title" + ] + }, { + "typeName" : "alternativeURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://AlternativeURL.org" + }, { + "typeName" : "otherId", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "otherIdAgency" : { + "typeName" : "otherIdAgency", + "multiple" : false, + "typeClass" : "primitive", + "value" : "OtherIDAgency1" + }, + "otherIdValue" : { + "typeName" : "otherIdValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "OtherIDIdentifier1" + } + }, { + "otherIdAgency" : { + "typeName" : "otherIdAgency", + "multiple" : false, + "typeClass" : "primitive", + "value" : "OtherIDAgency2" + }, + "otherIdValue" : { + "typeName" : "otherIdValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "OtherIDIdentifier2" + } + } + ] + }, { + "typeName" : "author", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "authorName" : { + "typeName" : "authorName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastAuthor1, FirstAuthor1" + }, + "authorAffiliation" : { + "typeName" : "authorAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "AuthorAffiliation1" + }, + "authorIdentifierScheme" : { + "typeName" : "authorIdentifierScheme", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "ORCID" + }, + "authorIdentifier" : { + "typeName" : "authorIdentifier", + "multiple" : false, + "typeClass" : "primitive", + "value" : "AuthorIdentifier1" + } + }, { + "authorName" : { + "typeName" : "authorName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastAuthor2, FirstAuthor2" + }, + "authorAffiliation" : { + "typeName" : "authorAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "AuthorAffiliation2" + }, + "authorIdentifierScheme" : { + "typeName" : "authorIdentifierScheme", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "ISNI" + }, + "authorIdentifier" : { + "typeName" : "authorIdentifier", + "multiple" : false, + "typeClass" : "primitive", + "value" : "AuthorIdentifier2" + } + } + ] + }, { + "typeName" : "datasetContact", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "datasetContactName" : { + "typeName" : "datasetContactName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastContact1, FirstContact1" + }, + "datasetContactAffiliation" : { + "typeName" : "datasetContactAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ContactAffiliation1" + }, + "datasetContactEmail" : { + "typeName" : "datasetContactEmail", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ContactEmail1@mailinator.com" + } + }, { + "datasetContactName" : { + "typeName" : "datasetContactName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastContact2, FirstContact2" + }, + "datasetContactAffiliation" : { + "typeName" : "datasetContactAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ContactAffiliation2" + }, + "datasetContactEmail" : { + "typeName" : "datasetContactEmail", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ContactEmail2@mailinator.com" + } + } + ] + }, { + "typeName" : "dsDescription", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "dsDescriptionValue" : { + "typeName" : "dsDescriptionValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DescriptionText1" + }, + "dsDescriptionDate" : { + "typeName" : "dsDescriptionDate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1000-01-01" + } + }, { + "dsDescriptionValue" : { + "typeName" : "dsDescriptionValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DescriptionText2" + }, + "dsDescriptionDate" : { + "typeName" : "dsDescriptionDate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1000-02-02" + } + } + ] + }, { + "typeName" : "subject", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "Agricultural Sciences", + "Business and Management", + "Engineering", + "Law" + ] + }, { + "typeName" : "keyword", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "keywordValue" : { + "typeName" : "keywordValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "KeywordTerm1" + }, + "keywordTermURI" : { + "typeName" : "keywordTermURI", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://keywordTermURI1.org" + }, + "keywordVocabulary" : { + "typeName" : "keywordVocabulary", + "multiple" : false, + "typeClass" : "primitive", + "value" : "KeywordVocabulary1" + }, + "keywordVocabularyURI" : { + "typeName" : "keywordVocabularyURI", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://KeywordVocabularyURL1.org" + } + }, { + "keywordValue" : { + "typeName" : "keywordValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "KeywordTerm2" + }, + "keywordTermURI" : { + "typeName" : "keywordTermURI", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://keywordTermURI2.org" + }, + "keywordVocabulary" : { + "typeName" : "keywordVocabulary", + "multiple" : false, + "typeClass" : "primitive", + "value" : "KeywordVocabulary2" + }, + "keywordVocabularyURI" : { + "typeName" : "keywordVocabularyURI", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://KeywordVocabularyURL2.org" + } + } + ] + }, { + "typeName" : "topicClassification", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "topicClassValue" : { + "typeName" : "topicClassValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Topic Classification Term1" + }, + "topicClassVocab" : { + "typeName" : "topicClassVocab", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Topic Classification Vocab1" + }, + "topicClassVocabURI" : { + "typeName" : "topicClassVocabURI", + "multiple" : false, + "typeClass" : "primitive", + "value" : "https://TopicClassificationURL1.com" + } + }, { + "topicClassValue" : { + "typeName" : "topicClassValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Topic Classification Term2" + }, + "topicClassVocab" : { + "typeName" : "topicClassVocab", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Topic Classification Vocab2" + }, + "topicClassVocabURI" : { + "typeName" : "topicClassVocabURI", + "multiple" : false, + "typeClass" : "primitive", + "value" : "https://TopicClassificationURL2.com" + } + } + ] + }, { + "typeName" : "publication", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "publicationRelationType" : { + "typeName" : "publicationRelationType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "IsSupplementTo" + }, + "publicationCitation" : { + "typeName" : "publicationCitation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "RelatedPublicationCitation1" + }, + "publicationIDType" : { + "typeName" : "publicationIDType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "ark" + }, + "publicationIDNumber" : { + "typeName" : "publicationIDNumber", + "multiple" : false, + "typeClass" : "primitive", + "value" : "RelatedPublicationIDNumber1" + }, + "publicationURL" : { + "typeName" : "publicationURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://RelatedPublicationURL1.org" + } + }, { + "publicationCitation" : { + "typeName" : "publicationCitation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "RelatedPublicationCitation2" + }, + "publicationIDType" : { + "typeName" : "publicationIDType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "arXiv" + }, + "publicationIDNumber" : { + "typeName" : "publicationIDNumber", + "multiple" : false, + "typeClass" : "primitive", + "value" : "RelatedPublicationIDNumber2" + }, + "publicationURL" : { + "typeName" : "publicationURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://RelatedPublicationURL2.org" + } + } + ] + }, { + "typeName" : "notesText", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Notes1" + }, { + "typeName" : "language", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "Abkhaz", + "Afar" + ] + }, { + "typeName" : "producer", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "producerName" : { + "typeName" : "producerName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastProducer1, FirstProducer1" + }, + "producerAffiliation" : { + "typeName" : "producerAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ProducerAffiliation1" + }, + "producerAbbreviation" : { + "typeName" : "producerAbbreviation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ProducerAbbreviation1" + }, + "producerURL" : { + "typeName" : "producerURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://ProducerURL1.org" + }, + "producerLogoURL" : { + "typeName" : "producerLogoURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://ProducerLogoURL1.org" + } + }, { + "producerName" : { + "typeName" : "producerName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastProducer2, FirstProducer2" + }, + "producerAffiliation" : { + "typeName" : "producerAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ProducerAffiliation2" + }, + "producerAbbreviation" : { + "typeName" : "producerAbbreviation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ProducerAbbreviation2" + }, + "producerURL" : { + "typeName" : "producerURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://ProducerURL2.org" + }, + "producerLogoURL" : { + "typeName" : "producerLogoURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://ProducerLogoURL2.org" + } + } + ] + }, { + "typeName" : "productionDate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1003-01-01" + }, { + "typeName" : "productionPlace", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "ProductionPlace" + ] + }, { + "typeName" : "contributor", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "contributorType" : { + "typeName" : "contributorType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "Data Collector" + }, + "contributorName" : { + "typeName" : "contributorName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastContributor1, FirstContributor1" + } + }, { + "contributorType" : { + "typeName" : "contributorType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "Data Curator" + }, + "contributorName" : { + "typeName" : "contributorName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastContributor2, FirstContributor2" + } + } + ] + }, { + "typeName" : "grantNumber", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "grantNumberAgency" : { + "typeName" : "grantNumberAgency", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GrantInformationGrantAgency1" + }, + "grantNumberValue" : { + "typeName" : "grantNumberValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GrantInformationGrantNumber1" + } + }, { + "grantNumberAgency" : { + "typeName" : "grantNumberAgency", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GrantInformationGrantAgency2" + }, + "grantNumberValue" : { + "typeName" : "grantNumberValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GrantInformationGrantNumber2" + } + } + ] + }, { + "typeName" : "distributor", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "distributorName" : { + "typeName" : "distributorName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastDistributor1, FirstDistributor1" + }, + "distributorAffiliation" : { + "typeName" : "distributorAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DistributorAffiliation1" + }, + "distributorAbbreviation" : { + "typeName" : "distributorAbbreviation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DistributorAbbreviation1" + }, + "distributorURL" : { + "typeName" : "distributorURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://DistributorURL1.org" + }, + "distributorLogoURL" : { + "typeName" : "distributorLogoURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://DistributorLogoURL1.org" + } + }, { + "distributorName" : { + "typeName" : "distributorName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastDistributor2, FirstDistributor2" + }, + "distributorAffiliation" : { + "typeName" : "distributorAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DistributorAffiliation2" + }, + "distributorAbbreviation" : { + "typeName" : "distributorAbbreviation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DistributorAbbreviation2" + }, + "distributorURL" : { + "typeName" : "distributorURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://DistributorURL2.org" + }, + "distributorLogoURL" : { + "typeName" : "distributorLogoURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://DistributorLogoURL2.org" + } + } + ] + }, { + "typeName" : "distributionDate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1004-01-01" + }, { + "typeName" : "depositor", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastDepositor, FirstDepositor" + }, { + "typeName" : "dateOfDeposit", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1002-01-01" + }, { + "typeName" : "timePeriodCovered", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "timePeriodCoveredStart" : { + "typeName" : "timePeriodCoveredStart", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1005-01-01" + }, + "timePeriodCoveredEnd" : { + "typeName" : "timePeriodCoveredEnd", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1005-01-02" + } + }, { + "timePeriodCoveredStart" : { + "typeName" : "timePeriodCoveredStart", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1005-02-01" + }, + "timePeriodCoveredEnd" : { + "typeName" : "timePeriodCoveredEnd", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1005-02-02" + } + } + ] + }, { + "typeName" : "dateOfCollection", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "dateOfCollectionStart" : { + "typeName" : "dateOfCollectionStart", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1006-01-01" + }, + "dateOfCollectionEnd" : { + "typeName" : "dateOfCollectionEnd", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1006-01-01" + } + }, { + "dateOfCollectionStart" : { + "typeName" : "dateOfCollectionStart", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1006-02-01" + }, + "dateOfCollectionEnd" : { + "typeName" : "dateOfCollectionEnd", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1006-02-02" + } + } + ] + }, { + "typeName" : "kindOfData", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "KindOfData1", + "KindOfData2" + ] + }, { + "typeName" : "series", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "seriesName" : { + "typeName" : "seriesName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SeriesName" + }, + "seriesInformation" : { + "typeName" : "seriesInformation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SeriesInformation" + } + } + ] + }, { + "typeName" : "software", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "softwareName" : { + "typeName" : "softwareName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SoftwareName1" + }, + "softwareVersion" : { + "typeName" : "softwareVersion", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SoftwareVersion1" + } + }, { + "softwareName" : { + "typeName" : "softwareName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SoftwareName2" + }, + "softwareVersion" : { + "typeName" : "softwareVersion", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SoftwareVersion2" + } + } + ] + }, { + "typeName" : "relatedMaterial", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "RelatedMaterial1", + "RelatedMaterial2" + ] + }, { + "typeName" : "relatedDatasets", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "RelatedDatasets1", + "RelatedDatasets2" + ] + }, { + "typeName" : "otherReferences", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "OtherReferences1", + "OtherReferences2" + ] + }, { + "typeName" : "dataSources", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "DataSources1", + "DataSources2" + ] + }, { + "typeName" : "originOfSources", + "multiple" : false, + "typeClass" : "primitive", + "value" : "OriginOfSources" + }, { + "typeName" : "characteristicOfSources", + "multiple" : false, + "typeClass" : "primitive", + "value" : "CharacteristicOfSourcesNoted" + }, { + "typeName" : "accessToSources", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DocumentationAndAccessToSources" + } + ] + }, + "geospatial" : { + "displayName" : "Geospatial Metadata", + "fields" : [{ + "typeName" : "geographicCoverage", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "country" : { + "typeName" : "country", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "Afghanistan" + }, + "state" : { + "typeName" : "state", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GeographicCoverageStateProvince1" + }, + "city" : { + "typeName" : "city", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GeographicCoverageCity1" + }, + "otherGeographicCoverage" : { + "typeName" : "otherGeographicCoverage", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GeographicCoverageOther1" + } + }, { + "country" : { + "typeName" : "country", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "Albania" + }, + "state" : { + "typeName" : "state", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GeographicCoverageStateProvince2" + }, + "city" : { + "typeName" : "city", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GeographicCoverageCity2" + }, + "otherGeographicCoverage" : { + "typeName" : "otherGeographicCoverage", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GeographicCoverageOther2" + } + } + ] + }, { + "typeName" : "geographicUnit", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "GeographicUnit1", + "GeographicUnit2" + ] + }, { + "typeName" : "geographicBoundingBox", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "westLongitude" : { + "typeName" : "westLongitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "-72" + }, + "eastLongitude" : { + "typeName" : "eastLongitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "-70" + }, + "northLatitude" : { + "typeName" : "northLatitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "43" + }, + "southLatitude" : { + "typeName" : "southLatitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "42" + } + }, { + "westLongitude" : { + "typeName" : "westLongitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "-18" + }, + "eastLongitude" : { + "typeName" : "eastLongitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "-13" + }, + "northLatitude" : { + "typeName" : "northLatitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "29" + }, + "southLatitude" : { + "typeName" : "southLatitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "28" + } + } + ] + } + ] + }, + "socialscience" : { + "displayName" : "Social Science and Humanities Metadata", + "fields" : [{ + "typeName" : "unitOfAnalysis", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "UnitOfAnalysis1", + "UnitOfAnalysis2" + ] + }, { + "typeName" : "universe", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "Universe1", + "Universe2" + ] + }, { + "typeName" : "timeMethod", + "multiple" : false, + "typeClass" : "primitive", + "value" : "TimeMethod" + }, { + "typeName" : "dataCollector", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastDataCollector1, FirstDataCollector1" + }, { + "typeName" : "collectorTraining", + "multiple" : false, + "typeClass" : "primitive", + "value" : "CollectorTraining" + }, { + "typeName" : "frequencyOfDataCollection", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Frequency" + }, { + "typeName" : "samplingProcedure", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SamplingProcedure" + }, { + "typeName" : "targetSampleSize", + "multiple" : false, + "typeClass" : "compound", + "value" : { + "targetSampleActualSize" : { + "typeName" : "targetSampleActualSize", + "multiple" : false, + "typeClass" : "primitive", + "value" : "100" + }, + "targetSampleSizeFormula" : { + "typeName" : "targetSampleSizeFormula", + "multiple" : false, + "typeClass" : "primitive", + "value" : "TargetSampleSizeFormula" + } + } + }, { + "typeName" : "deviationsFromSampleDesign", + "multiple" : false, + "typeClass" : "primitive", + "value" : "MajorDeviationsForSampleDesign" + }, { + "typeName" : "collectionMode", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "CollectionMode" + ] + }, { + "typeName" : "researchInstrument", + "multiple" : false, + "typeClass" : "primitive", + "value" : "TypeOfResearchInstrument" + }, { + "typeName" : "dataCollectionSituation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "CharacteristicsOfDataCollectionSituation" + }, { + "typeName" : "actionsToMinimizeLoss", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ActionsToMinimizeLosses" + }, { + "typeName" : "controlOperations", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ControlOperations" + }, { + "typeName" : "weighting", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Weighting" + }, { + "typeName" : "cleaningOperations", + "multiple" : false, + "typeClass" : "primitive", + "value" : "CleaningOperations" + }, { + "typeName" : "datasetLevelErrorNotes", + "multiple" : false, + "typeClass" : "primitive", + "value" : "StudyLevelErrorNotes" + }, { + "typeName" : "responseRate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ResponseRate" + }, { + "typeName" : "samplingErrorEstimates", + "multiple" : false, + "typeClass" : "primitive", + "value" : "EstimatesOfSamplingError" + }, { + "typeName" : "otherDataAppraisal", + "multiple" : false, + "typeClass" : "primitive", + "value" : "OtherFormsOfDataAppraisal" + }, { + "typeName" : "socialScienceNotes", + "multiple" : false, + "typeClass" : "compound", + "value" : { + "socialScienceNotesType" : { + "typeName" : "socialScienceNotesType", + "multiple" : false, + "typeClass" : "primitive", + "value" : "NotesType" + }, + "socialScienceNotesSubject" : { + "typeName" : "socialScienceNotesSubject", + "multiple" : false, + "typeClass" : "primitive", + "value" : "NotesSubject" + }, + "socialScienceNotesText" : { + "typeName" : "socialScienceNotesText", + "multiple" : false, + "typeClass" : "primitive", + "value" : "NotesText" + } + } + } + ] + }, + "astrophysics" : { + "displayName" : "Astronomy and Astrophysics Metadata", + "fields" : [{ + "typeName" : "astroType", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "Image", + "Mosaic", + "EventList", + "Cube" + ] + }, { + "typeName" : "astroFacility", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "Facility1", + "Facility2" + ] + }, { + "typeName" : "astroInstrument", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "Instrument1", + "Instrument2" + ] + }, { + "typeName" : "astroObject", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "Object1", + "Object2" + ] + }, { + "typeName" : "resolution.Spatial", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SpatialResolution" + }, { + "typeName" : "resolution.Spectral", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SpectralResolution" + }, { + "typeName" : "resolution.Temporal", + "multiple" : false, + "typeClass" : "primitive", + "value" : "TimeResolution" + }, { + "typeName" : "coverage.Spectral.Bandpass", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "Bandpass1", + "Bandpass2" + ] + }, { + "typeName" : "coverage.Spectral.CentralWavelength", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "3001", + "3002" + ] + }, { + "typeName" : "coverage.Spectral.Wavelength", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "coverage.Spectral.MinimumWavelength" : { + "typeName" : "coverage.Spectral.MinimumWavelength", + "multiple" : false, + "typeClass" : "primitive", + "value" : "4001" + }, + "coverage.Spectral.MaximumWavelength" : { + "typeName" : "coverage.Spectral.MaximumWavelength", + "multiple" : false, + "typeClass" : "primitive", + "value" : "4002" + } + }, { + "coverage.Spectral.MinimumWavelength" : { + "typeName" : "coverage.Spectral.MinimumWavelength", + "multiple" : false, + "typeClass" : "primitive", + "value" : "4003" + }, + "coverage.Spectral.MaximumWavelength" : { + "typeName" : "coverage.Spectral.MaximumWavelength", + "multiple" : false, + "typeClass" : "primitive", + "value" : "4004" + } + } + ] + }, { + "typeName" : "coverage.Temporal", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "coverage.Temporal.StartTime" : { + "typeName" : "coverage.Temporal.StartTime", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1007-01-01" + }, + "coverage.Temporal.StopTime" : { + "typeName" : "coverage.Temporal.StopTime", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1007-01-02" + } + }, { + "coverage.Temporal.StartTime" : { + "typeName" : "coverage.Temporal.StartTime", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1007-02-01" + }, + "coverage.Temporal.StopTime" : { + "typeName" : "coverage.Temporal.StopTime", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1007-02-02" + } + } + ] + }, { + "typeName" : "coverage.Spatial", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "SkyCoverage1", + "SkyCoverage2" + ] + }, { + "typeName" : "coverage.Depth", + "multiple" : false, + "typeClass" : "primitive", + "value" : "200" + }, { + "typeName" : "coverage.ObjectDensity", + "multiple" : false, + "typeClass" : "primitive", + "value" : "300" + }, { + "typeName" : "coverage.ObjectCount", + "multiple" : false, + "typeClass" : "primitive", + "value" : "400" + }, { + "typeName" : "coverage.SkyFraction", + "multiple" : false, + "typeClass" : "primitive", + "value" : "500" + }, { + "typeName" : "coverage.Polarization", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Polarization" + }, { + "typeName" : "redshiftType", + "multiple" : false, + "typeClass" : "primitive", + "value" : "RedshiftType" + }, { + "typeName" : "resolution.Redshift", + "multiple" : false, + "typeClass" : "primitive", + "value" : "600" + }, { + "typeName" : "coverage.RedshiftValue", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "coverage.Redshift.MinimumValue" : { + "typeName" : "coverage.Redshift.MinimumValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "701" + }, + "coverage.Redshift.MaximumValue" : { + "typeName" : "coverage.Redshift.MaximumValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "702" + } + }, { + "coverage.Redshift.MinimumValue" : { + "typeName" : "coverage.Redshift.MinimumValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "703" + }, + "coverage.Redshift.MaximumValue" : { + "typeName" : "coverage.Redshift.MaximumValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "704" + } + } + ] + } + ] + }, + "biomedical" : { + "displayName" : "Life Sciences Metadata", + "fields" : [{ + "typeName" : "studyDesignType", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "Case Control", + "Cross Sectional", + "Cohort Study", + "Not Specified" + ] + }, { + "typeName" : "studyFactorType", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "Age", + "Biomarkers", + "Cell Surface Markers", + "Developmental Stage" + ] + }, { + "typeName" : "studyAssayOrganism", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "Arabidopsis thaliana", + "Bos taurus", + "Caenorhabditis elegans", + "Danio rerio (zebrafish)" + ] + }, { + "typeName" : "studyAssayOtherOrganism", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "OtherOrganism1", + "OtherOrganism2" + ] + }, { + "typeName" : "studyAssayMeasurementType", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "genome sequencing", + "cell sorting", + "clinical chemistry analysis", + "DNA methylation profiling" + ] + }, { + "typeName" : "studyAssayOtherMeasurmentType", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "OtherMeasurementType1", + "OtherMeasurementType2" + ] + }, { + "typeName" : "studyAssayTechnologyType", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "culture based drug susceptibility testing, single concentration", + "culture based drug susceptibility testing, two concentrations", + "culture based drug susceptibility testing, three or more concentrations (minimium inhibitory concentration measurement)", + "flow cytometry" + ] + }, { + "typeName" : "studyAssayPlatform", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "210-MS GC Ion Trap (Varian)", + "220-MS GC Ion Trap (Varian)", + "225-MS GC Ion Trap (Varian)", + "300-MS quadrupole GC/MS (Varian)" + ] + }, { + "typeName" : "studyAssayCellType", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "CellType1", + "CellType2" + ] + } + ] + }, + "journal" : { + "displayName" : "Journal Metadata", + "fields" : [{ + "typeName" : "journalVolumeIssue", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "journalVolume" : { + "typeName" : "journalVolume", + "multiple" : false, + "typeClass" : "primitive", + "value" : "JournalVolume1" + }, + "journalIssue" : { + "typeName" : "journalIssue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "JournalIssue1" + }, + "journalPubDate" : { + "typeName" : "journalPubDate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1008-01-01" + } + }, { + "journalVolume" : { + "typeName" : "journalVolume", + "multiple" : false, + "typeClass" : "primitive", + "value" : "JournalVolume2" + }, + "journalIssue" : { + "typeName" : "journalIssue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "JournalIssue2" + }, + "journalPubDate" : { + "typeName" : "journalPubDate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1008-02-01" + } + } + ] + }, { + "typeName" : "journalArticleType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "abstract" + } + ] + } + } + } +} \ No newline at end of file diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index 18354f2b1f7..db4f1c9f30c 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -31,55 +31,56 @@ topicClassVocab Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 27 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation topicClassVocabURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 28 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation publication Related Publication The article or report that uses the data in the Dataset. The full list of related publications will be displayed on the metadata tab none 29 FALSE FALSE TRUE FALSE TRUE FALSE citation http://purl.org/dc/terms/isReferencedBy - publicationCitation Citation The full bibliographic citation for the related publication textbox 30 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation - publicationIDType Identifier Type The type of identifier that uniquely identifies a related publication text 31 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme - publicationIDNumber Identifier The identifier for a related publication text 32 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier - publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 33 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE publication citation https://schema.org/distribution - notesText Notes Additional information about the Dataset textbox 34 FALSE FALSE FALSE FALSE TRUE FALSE citation - language Language A language that the Dataset's files is written in text 35 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language - producer Producer The entity, such a person or organization, managing the finances or other administrative processes involved in the creation of the Dataset none 36 FALSE FALSE TRUE FALSE FALSE FALSE citation - producerName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 37 #VALUE TRUE FALSE FALSE TRUE FALSE TRUE producer citation - producerAffiliation Affiliation The name of the entity affiliated with the producer, e.g. an organization's name Organization XYZ text 38 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerAbbreviation Abbreviated Name The producer's abbreviated name (e.g. IQSS, ICPSR) text 39 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerURL URL The URL of the producer's website https:// url 40 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerLogoURL Logo URL The URL of the producer's logo https:// url 41
FALSE FALSE FALSE FALSE FALSE FALSE producer citation - productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 42 TRUE FALSE FALSE TRUE FALSE FALSE citation - productionPlace Production Location The location where the data and any related materials were produced or collected text 43 TRUE FALSE TRUE TRUE FALSE FALSE citation - contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 44 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor - contributorType Type Indicates the type of contribution made to the dataset text 45 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation - contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 46 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation - grantNumber Funding Information Information about the Dataset's financial support none 47 : FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/sponsor - grantNumberAgency Agency The agency that provided financial support for the Dataset Organization XYZ text 48 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation - grantNumberValue Identifier The grant identifier or contract identifier of the agency that provided financial support for the Dataset text 49 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation - distributor Distributor The entity, such as a person or organization, designated to generate copies of the Dataset, including any editions or revisions none 50 FALSE FALSE TRUE FALSE FALSE FALSE citation - distributorName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 51 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation - distributorAffiliation Affiliation The name of the entity affiliated with the distributor, e.g. an organization's name Organization XYZ text 52 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorAbbreviation Abbreviated Name The distributor's abbreviated name (e.g. IQSS, ICPSR) text 53 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorURL URL The URL of the distributor's webpage https:// url 54 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 55
FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributionDate Distribution Date The date when the Dataset was made available for distribution/presentation YYYY-MM-DD date 56 TRUE FALSE FALSE TRUE FALSE FALSE citation - depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 57 FALSE FALSE FALSE FALSE FALSE FALSE citation - dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 58 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted - timePeriodCovered Time Period The time period that the data refer to. Also known as span. This is the time period covered by the data, not the dates of coding, collecting data, or making documents machine-readable none 59 ; FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/temporalCoverage - timePeriodCoveredStart Start Date The start date of the time period that the data refer to YYYY-MM-DD date 60 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation - timePeriodCoveredEnd End Date The end date of the time period that the data refer to YYYY-MM-DD date 61 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation - dateOfCollection Date of Collection The dates when the data were collected or generated none 62 ; FALSE FALSE TRUE FALSE FALSE FALSE citation - dateOfCollectionStart Start Date The date when the data collection started YYYY-MM-DD date 63 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation - dateOfCollectionEnd End Date The date when the data collection ended YYYY-MM-DD date 64 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation - kindOfData Data Type The type of data included in the files (e.g. survey data, clinical data, or machine-readable text) text 65 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData - series Series Information about the dataset series to which the Dataset belong none 66 : FALSE FALSE TRUE FALSE FALSE FALSE citation - seriesName Name The name of the dataset series text 67 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation - seriesInformation Information Can include 1) a history of the series and 2) a summary of features that apply to the series textbox 68 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation - software Software Information about the software used to generate the Dataset none 69 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy - softwareName Name The name of software used to generate the Dataset text 70 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation - softwareVersion Version The version of the software used to generate the Dataset, e.g. 4.11 text 71 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation - relatedMaterial Related Material Information, such as a persistent ID or citation, about the material related to the Dataset, such as appendices or sampling information available outside of the Dataset textbox 72 FALSE FALSE TRUE FALSE FALSE FALSE citation - relatedDatasets Related Dataset Information, such as a persistent ID or citation, about a related dataset, such as previous research on the Dataset's subject textbox 73 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/relation - otherReferences Other Reference Information, such as a persistent ID or citation, about another type of resource that provides background or supporting material to the Dataset text 74 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/references - dataSources Data Source Information, such as a persistent ID or citation, about sources of the Dataset (e.g. a book, article, serial, or machine-readable data file) textbox 75 FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasDerivedFrom - originOfSources Origin of Historical Sources For historical sources, the origin and any rules followed in establishing them as sources textbox 76 FALSE FALSE FALSE FALSE FALSE FALSE citation - characteristicOfSources Characteristic of Sources Characteristics not already noted elsewhere textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation - accessToSources Documentation and Access to Sources 1) Methods or procedures for accessing data sources and 2) any special permissions needed for access textbox 78 FALSE FALSE FALSE FALSE FALSE FALSE citation + publicationRelationType Relation Type The nature of the relationship between this Dataset and the related publication text 30 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://datacite.org/schema/kernel-4/simpleTypes#relationType + publicationCitation Citation The full bibliographic citation for the related publication textbox 31 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation + publicationIDType Identifier Type The type of identifier that uniquely identifies a related publication text 32 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme + publicationIDNumber Identifier The identifier for a related publication text 33 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier + publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 34 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE publication citation https://schema.org/distribution + notesText Notes Additional information about the Dataset textbox 35 FALSE FALSE FALSE FALSE TRUE FALSE citation + language Language A language that the Dataset's files is written in text 36 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language + producer Producer The entity, such a person or organization, managing the finances or other administrative processes involved in the creation of the Dataset none 37 FALSE FALSE TRUE FALSE FALSE FALSE citation + producerName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 38 #VALUE TRUE FALSE FALSE TRUE FALSE TRUE producer citation + producerAffiliation Affiliation The name of the entity affiliated with the producer, e.g. an organization's name Organization XYZ text 39 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerAbbreviation Abbreviated Name The producer's abbreviated name (e.g. IQSS, ICPSR) text 40 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerURL URL The URL of the producer's website https:// url 41 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerLogoURL Logo URL The URL of the producer's logo https:// url 42
FALSE FALSE FALSE FALSE FALSE FALSE producer citation + productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 43 TRUE FALSE FALSE TRUE FALSE FALSE citation + productionPlace Production Location The location where the data and any related materials were produced or collected text 44 TRUE FALSE TRUE TRUE FALSE FALSE citation + contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 45 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor + contributorType Type Indicates the type of contribution made to the dataset text 46 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation + contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 47 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation + grantNumber Funding Information Information about the Dataset's financial support none 48 : FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/sponsor + grantNumberAgency Agency The agency that provided financial support for the Dataset Organization XYZ text 49 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation + grantNumberValue Identifier The grant identifier or contract identifier of the agency that provided financial support for the Dataset text 50 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation + distributor Distributor The entity, such as a person or organization, designated to generate copies of the Dataset, including any editions or revisions none 51 FALSE FALSE TRUE FALSE FALSE FALSE citation + distributorName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 52 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation + distributorAffiliation Affiliation The name of the entity affiliated with the distributor, e.g. an organization's name Organization XYZ text 53 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorAbbreviation Abbreviated Name The distributor's abbreviated name (e.g. IQSS, ICPSR) text 54 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorURL URL The URL of the distributor's webpage https:// url 55 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 56
FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributionDate Distribution Date The date when the Dataset was made available for distribution/presentation YYYY-MM-DD date 57 TRUE FALSE FALSE TRUE FALSE FALSE citation + depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 58 FALSE FALSE FALSE FALSE FALSE FALSE citation + dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 59 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted + timePeriodCovered Time Period The time period that the data refer to. Also known as span. This is the time period covered by the data, not the dates of coding, collecting data, or making documents machine-readable none 60 ; FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/temporalCoverage + timePeriodCoveredStart Start Date The start date of the time period that the data refer to YYYY-MM-DD date 61 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation + timePeriodCoveredEnd End Date The end date of the time period that the data refer to YYYY-MM-DD date 62 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation + dateOfCollection Date of Collection The dates when the data were collected or generated none 63 ; FALSE FALSE TRUE FALSE FALSE FALSE citation + dateOfCollectionStart Start Date The date when the data collection started YYYY-MM-DD date 64 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation + dateOfCollectionEnd End Date The date when the data collection ended YYYY-MM-DD date 65 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation + kindOfData Data Type The type of data included in the files (e.g. survey data, clinical data, or machine-readable text) text 66 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData + series Series Information about the dataset series to which the Dataset belong none 67 : FALSE FALSE TRUE FALSE FALSE FALSE citation + seriesName Name The name of the dataset series text 68 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation + seriesInformation Information Can include 1) a history of the series and 2) a summary of features that apply to the series textbox 69 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation + software Software Information about the software used to generate the Dataset none 70 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy + softwareName Name The name of software used to generate the Dataset text 71 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation + softwareVersion Version The version of the software used to generate the Dataset, e.g. 4.11 text 72 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation + relatedMaterial Related Material Information, such as a persistent ID or citation, about the material related to the Dataset, such as appendices or sampling information available outside of the Dataset textbox 73 FALSE FALSE TRUE FALSE FALSE FALSE citation + relatedDatasets Related Dataset Information, such as a persistent ID or citation, about a related dataset, such as previous research on the Dataset's subject textbox 74 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/relation + otherReferences Other Reference Information, such as a persistent ID or citation, about another type of resource that provides background or supporting material to the Dataset text 75 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/references + dataSources Data Source Information, such as a persistent ID or citation, about sources of the Dataset (e.g. a book, article, serial, or machine-readable data file) textbox 76 FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasDerivedFrom + originOfSources Origin of Historical Sources For historical sources, the origin and any rules followed in establishing them as sources textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation + characteristicOfSources Characteristic of Sources Characteristics not already noted elsewhere textbox 78 FALSE FALSE FALSE FALSE FALSE FALSE citation + accessToSources Documentation and Access to Sources 1) Methods or procedures for accessing data sources and 2) any special permissions needed for access textbox 79 FALSE FALSE FALSE FALSE FALSE FALSE citation #controlledVocabulary DatasetField Value identifier displayOrder subject Agricultural Sciences D01 0 subject Arts and Humanities D0 1 @@ -325,3 +326,9 @@ language Zhuang, Chuang zha 183 zha za Zhuang Chuang language Zulu zul 184 zul zu language Not applicable 185 + publicationRelationType IsCitedBy RT1 1 + publicationRelationType Cites RT2 2 + publicationRelationType IsSupplementTo RT3 3 + publicationRelationType IsSupplementedBy RT4 4 + publicationRelationType IsReferencedBy RT5 5 + publicationRelationType References RT6 6 \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java index 1313f3415ab..53ab6c7bef7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java @@ -40,6 +40,7 @@ public class DatasetFieldConstant implements java.io.Serializable { public final static String note = "note"; + public final static String publicationRelationType = "publicationRelationType"; public final static String publicationCitation = "publicationCitation"; public final static String publicationIDType = "publicationIDType"; public final static String publicationIDNumber = "publicationIDNumber"; diff --git a/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java index 6b7cb844f3e..e74a2f26af6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java @@ -28,6 +28,8 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; +import org.apache.commons.lang3.StringUtils; + /** * * @author skraffmi @@ -301,26 +303,35 @@ private static void writeRelPublElement(XMLStreamWriter xmlw, DatasetVersionDTO String IDType = ""; String IDNo = ""; String url = ""; + String relationType = null; for (Iterator iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); - if (DatasetFieldConstant.publicationCitation.equals(next.getTypeName())) { - citation = next.getSinglePrimitive(); - } - if (DatasetFieldConstant.publicationIDType.equals(next.getTypeName())) { - IDType = next.getSinglePrimitive(); - } - if (DatasetFieldConstant.publicationIDNumber.equals(next.getTypeName())) { - IDNo = next.getSinglePrimitive(); - } - if (DatasetFieldConstant.publicationURL.equals(next.getTypeName())) { - url = next.getSinglePrimitive(); + switch (next.getTypeName()) { + case DatasetFieldConstant.publicationCitation: + citation = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationIDType: + IDType = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationIDNumber: + IDNo = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationURL: + url = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationRelationType: + relationType = next.getSinglePrimitive(); + break; } } + if(StringUtils.isBlank(relationType)) { + relationType = "isReferencedBy"; + } pubString = appendCommaSeparatedValue(citation, IDType); pubString = appendCommaSeparatedValue(pubString, IDNo); pubString = appendCommaSeparatedValue(pubString, url); if (!pubString.isEmpty()){ - xmlw.writeStartElement(dcFlavor+":"+"isReferencedBy"); + xmlw.writeStartElement(dcFlavor+":" + relationType); xmlw.writeCharacters(pubString); xmlw.writeEndElement(); //relPubl } diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index b4b5e597365..dd01750942d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -932,6 +932,7 @@ public static void writeRelatedIdentifierElement(XMLStreamWriter xmlw, DatasetVe String relatedIdentifierType = null; String relatedIdentifier = null; // is used when relatedIdentifierType variable is not URL String relatedURL = null; // is used when relatedIdentifierType variable is URL + String relationType = null; // is used when relatedIdentifierType variable is URL for (Iterator iterator = fieldDTOs.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); @@ -944,6 +945,9 @@ public static void writeRelatedIdentifierElement(XMLStreamWriter xmlw, DatasetVe if (DatasetFieldConstant.publicationURL.equals(next.getTypeName())) { relatedURL = next.getSinglePrimitive(); } + if (DatasetFieldConstant.publicationRelationType.equals(next.getTypeName())) { + relationType = next.getSinglePrimitive(); + } } if (StringUtils.isNotBlank(relatedIdentifierType)) { @@ -956,7 +960,10 @@ public static void writeRelatedIdentifierElement(XMLStreamWriter xmlw, DatasetVe } relatedIdentifier_map.put("relatedIdentifierType", relatedIdentifierType); - relatedIdentifier_map.put("relationType", "IsCitedBy"); + if(relationType== null) { + relationType = "IsCitedBy"; + } + relatedIdentifier_map.put("relationType", relationType); if (StringUtils.containsIgnoreCase(relatedIdentifierType, "url")) { writeFullElement(xmlw, null, "relatedIdentifier", relatedIdentifier_map, relatedURL, language); diff --git a/src/main/java/propertyFiles/citation.properties b/src/main/java/propertyFiles/citation.properties index cb864eb78e9..a52a599cff3 100644 --- a/src/main/java/propertyFiles/citation.properties +++ b/src/main/java/propertyFiles/citation.properties @@ -31,6 +31,7 @@ datasetfieldtype.topicClassValue.title=Term datasetfieldtype.topicClassVocab.title=Controlled Vocabulary Name datasetfieldtype.topicClassVocabURI.title=Controlled Vocabulary URL datasetfieldtype.publication.title=Related Publication +datasetfieldtype.publicationRelationType.title=Relation Type datasetfieldtype.publicationCitation.title=Citation datasetfieldtype.publicationIDType.title=Identifier Type datasetfieldtype.publicationIDNumber.title=Identifier @@ -110,6 +111,7 @@ datasetfieldtype.topicClassValue.description=A topic or subject term datasetfieldtype.topicClassVocab.description=The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) datasetfieldtype.topicClassVocabURI.description=The URL where one can access information about the term's controlled vocabulary datasetfieldtype.publication.description=The article or report that uses the data in the Dataset. The full list of related publications will be displayed on the metadata tab +datasetfieldtype.publicationRelationType.description=The nature of the relationship between this Dataset and the related publication datasetfieldtype.publicationCitation.description=The full bibliographic citation for the related publication datasetfieldtype.publicationIDType.description=The type of identifier that uniquely identifies a related publication datasetfieldtype.publicationIDNumber.description=The identifier for a related publication diff --git a/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java index 2da15147255..8350c5d9875 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java @@ -617,7 +617,7 @@ public void testWriteRelatedIdentifierElement() throws XMLStreamException, IOExc //then assertEquals("" - + "" + + "" + "RelatedPublicationIDNumber1" + "" + "RelatedPublicationIDNumber2" From 93faadeb7aaa8093369d327cc0394c49ee090850 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 14 Jun 2024 13:24:58 -0400 Subject: [PATCH 121/486] missing element for openaireutil test --- .../harvard/iq/dataverse/export/dataset-all-defaults.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt b/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt index 431f069cb03..6b3c7689bbf 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt +++ b/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt @@ -324,6 +324,12 @@ "typeClass": "compound", "value": [ { + "publicationRelationType": { + "typeName": "publicationRelationType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "IsSupplementTo" + }, "publicationCitation": { "typeName": "publicationCitation", "multiple": false, From c9084e3058045fb54fd960a203588f371ce59b2c Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 14 Jun 2024 13:57:02 -0400 Subject: [PATCH 122/486] contributor type null fix --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 9ba1e4e3116..92bcf8b481f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -546,7 +546,10 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X contributor = subField.getValue(); break; case DatasetFieldConstant.contributorType: - contributorType = subField.getValue().replace(" ", ""); + contributorType = subField.getValue(); + if(contributorType!=null) { + contributorType = contributorType.replace(" ", ""); + } break; } } From cdd6d6fb4357fe2e63dbf597faf57d59c8625670 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 14 Jun 2024 16:05:40 -0400 Subject: [PATCH 123/486] add relationType to base code and DataCite XML --- .../iq/dataverse/DatasetRelPublication.java | 29 ++++++++++----- .../harvard/iq/dataverse/DatasetVersion.java | 37 +++++++++++++------ .../pidproviders/doi/XmlMetadataTemplate.java | 12 ++++-- 3 files changed, 53 insertions(+), 25 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java b/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java index 7680ebc16db..a0696ab38d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java @@ -6,7 +6,6 @@ package edu.harvard.iq.dataverse; - /** * * @author skraffmiller @@ -25,10 +24,12 @@ public class DatasetRelPublication { private String description; private boolean replicationData; private int displayOrder; + private String relationType; public int getDisplayOrder() { return displayOrder; } + public void setDisplayOrder(int displayOrder) { this.displayOrder = displayOrder; } @@ -64,8 +65,7 @@ public String getUrl() { public void setUrl(String url) { this.url = url; } - - + public String getTitle() { return title; } @@ -82,12 +82,21 @@ public void setDescription(String description) { this.description = description; } - public boolean isEmpty() { - return ((text==null || text.trim().equals("")) - && (!replicationData) - && (idType==null || idType.trim().equals("")) - && (idNumber==null || idNumber.trim().equals("")) - && (url==null || url.trim().equals(""))); - } + public void setRelationType(String type) { + relationType = type; + + } + + public String getRelationType() { + return relationType; + } + + public boolean isEmpty() { + return ((text == null || text.trim().equals("")) + && (!replicationData) + && (idType == null || idType.trim().equals("")) + && (idNumber == null || idNumber.trim().equals("")) + && (url == null || url.trim().equals(""))); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 6648419216d..eb6fdd4e923 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1356,24 +1356,37 @@ public List getRelatedPublications() { for (DatasetFieldCompoundValue publication : dsf.getDatasetFieldCompoundValues()) { DatasetRelPublication relatedPublication = new DatasetRelPublication(); for (DatasetField subField : publication.getChildDatasetFields()) { - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationCitation)) { - String citation = subField.getDisplayValue(); - relatedPublication.setText(citation); - } - - - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationURL)) { - // We have to avoid using subField.getDisplayValue() here - because the DisplayFormatType - // for this url metadata field is likely set up so that the display value is automatically - // turned into a clickable HTML HREF block, which we don't want to end in our Schema.org JSON-LD output. - // So we want to use the raw value of the field instead, with - // minimal HTML sanitation, just in case (this would be done on all URLs in getDisplayValue()). + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.publicationCitation: + relatedPublication.setText(subField.getDisplayValue()); + break; + case DatasetFieldConstant.publicationURL: + // We have to avoid using subField.getDisplayValue() here - because the DisplayFormatType + // for this url metadata field is likely set up so that the display value is automatically + // turned into a clickable HTML HREF block, which we don't want to end in our Schema.org + // JSON-LD output. So we want to use the raw value of the field instead, with minimal HTML + // sanitation, just in case (this would be done on all URLs in getDisplayValue()). String url = subField.getValue(); if (StringUtils.isBlank(url) || DatasetField.NA_VALUE.equals(url)) { relatedPublication.setUrl(""); } else { relatedPublication.setUrl(MarkupChecker.sanitizeBasicHTML(url)); } + break; + case DatasetFieldConstant.publicationIDType: + // QDR idType has a trailing : now (Aug 2021) + // Get sanitized value without any display modifications + subField.getDatasetFieldType().setDisplayFormat("#VALUE"); + relatedPublication.setIdType(subField.getDisplayValue()); + break; + case DatasetFieldConstant.publicationIDNumber: + // Get sanitized value without any display modifications + subField.getDatasetFieldType().setDisplayFormat("#VALUE"); + relatedPublication.setIdNumber(subField.getDisplayValue()); + break; + case DatasetFieldConstant.publicationRelationType: + relatedPublication.setRelationType(subField.getDisplayValue()); + break; } } relatedPublications.add(relatedPublication); diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 92bcf8b481f..03d4de99691 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -4,6 +4,8 @@ import java.io.IOException; import java.io.OutputStream; import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URL; import java.text.ParseException; import java.util.ArrayList; @@ -872,6 +874,10 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th String pubIdType = relatedPub.getIdType(); String identifier = relatedPub.getIdNumber(); String url = relatedPub.getUrl(); + String relationType = relatedPub.getRelationType(); + if(StringUtils.isBlank(relationType)) { + relationType = "IsSupplementTo"; + } /* * Note - with identifier and url fields, it's not clear that there's a single * way those two fields are used for all identifier types. The code here is @@ -921,13 +927,13 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th // For non-URL types, if a URL is given, split the string to get a schemeUri try { - URL relatedUrl = new URL(relatedIdentifier); + URL relatedUrl = new URI(relatedIdentifier).toURL(); String protocol = relatedUrl.getProtocol(); String authority = relatedUrl.getAuthority(); String site = String.format("%s://%s", protocol, authority); relatedIdentifier = relatedIdentifier.substring(site.length()); attributes.put("schemeURI", site); - } catch (MalformedURLException e) { + } catch (URISyntaxException | MalformedURLException e) { // Just an identifier } } @@ -937,7 +943,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th if (pubIdType != null) { attributes.put("relatedIdentifierType", pubIdType); } - attributes.put("relationType", "IsSupplementTo"); + attributes.put("relationType", relationType); relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, relatedIdentifier); } From 360d3fac049239cfc4f41b6be83b8ecc0b16b475 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 14 Jun 2024 16:05:57 -0400 Subject: [PATCH 124/486] add relationType to above fold display --- .../iq/dataverse/DatasetVersionUI.java | 32 +++++++++++++------ src/main/webapp/dataset.xhtml | 6 ++-- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java index 55b98c178bb..975de391d8c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java @@ -62,14 +62,14 @@ public void setMetadataBlocksForEdit(TreeMap> public DatasetVersionUI initDatasetVersionUI(DatasetVersion datasetVersion, boolean createBlanks) { /*takes in the values of a dataset version and apportions them into lists for - viewing and editng in the dataset page. + viewing and editing in the dataset page. */ setDatasetVersion(datasetVersion); //this.setDatasetAuthors(new ArrayList()); this.setDatasetRelPublications(new ArrayList<>()); - // loop through vaues to get fields for view mode + // loop through values to get fields for view mode for (DatasetField dsf : datasetVersion.getDatasetFields()) { //Special Handling for various fields displayed above tabs in dataset page view. if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.title)) { @@ -114,17 +114,23 @@ public DatasetVersionUI initDatasetVersionUI(DatasetVersion datasetVersion, boo datasetRelPublication.setTitle(dsf.getDatasetFieldType().getLocaleTitle()); datasetRelPublication.setDescription(dsf.getDatasetFieldType().getLocaleDescription()); for (DatasetField subField : relPubVal.getChildDatasetFields()) { - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationCitation)) { + String value = subField.getValue(); + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.publicationCitation: datasetRelPublication.setText(subField.getValue()); - } - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationIDNumber)) { + break; + case DatasetFieldConstant.publicationIDNumber: datasetRelPublication.setIdNumber(subField.getValue()); - } - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationIDType)) { + break; + case DatasetFieldConstant.publicationIDType: datasetRelPublication.setIdType(subField.getValue()); - } - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationURL)) { + break; + case DatasetFieldConstant.publicationURL: datasetRelPublication.setUrl(subField.getValue()); + break; + case DatasetFieldConstant.publicationRelationType: + datasetRelPublication.setRelationType(subField.getValue()); + break; } } this.getDatasetRelPublications().add(datasetRelPublication); @@ -263,6 +269,14 @@ public String getRelPublicationUrl() { } } + public String getRelPublicationRelationType() { + if (!this.datasetRelPublications.isEmpty()) { + return this.getDatasetRelPublications().get(0).getRelationType(); + } else { + return ""; + } + } + public String getUNF() { //todo get UNF to calculate and display here. return ""; diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 13faf9d7f20..4fd91f24c36 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -631,6 +631,7 @@ or !empty DatasetPage.datasetVersionUI.keywordDisplay or !empty DatasetPage.datasetVersionUI.subject.value or !empty DatasetPage.datasetVersionUI.relPublicationCitation + or !empty DatasetPage.datasetVersionUI.relPublicationUrl or !empty DatasetPage.datasetVersionUI.notes.value) and !empty DatasetPage.datasetSummaryFields}"> @@ -650,8 +651,9 @@ data-toggle="tooltip" data-placement="auto right" data-original-title="#{DatasetPage.datasetVersionUI.datasetRelPublications.get(0).description}">