diff --git a/.env b/.env
new file mode 100644
index 00000000000..3467f8df73b
--- /dev/null
+++ b/.env
@@ -0,0 +1,4 @@
+APP_IMAGE=gdcc/dataverse:unstable
+POSTGRES_VERSION=13
+DATAVERSE_DB_USER=dataverse
+SOLR_VERSION=8.11.1
diff --git a/.github/workflows/container_app_pr.yml b/.github/workflows/container_app_pr.yml
new file mode 100644
index 00000000000..9e514690a13
--- /dev/null
+++ b/.github/workflows/container_app_pr.yml
@@ -0,0 +1,96 @@
+---
+name: Preview Application Container Image
+
+on:
+ # We only run the push commands if we are asked to by an issue comment with the correct command.
+ # This workflow is always taken from the default branch and runs in repo context with access to secrets.
+ repository_dispatch:
+ types: [ push-image-command ]
+
+env:
+ IMAGE_TAG: unstable
+ BASE_IMAGE_TAG: unstable
+ PLATFORMS: "linux/amd64,linux/arm64"
+
+jobs:
+ deploy:
+ name: "Package & Push"
+ runs-on: ubuntu-latest
+ # Only run in upstream repo - avoid unnecessary runs in forks
+ if: ${{ github.repository_owner == 'IQSS' }}
+ steps:
+ # Checkout the pull request code as when merged
+ - uses: actions/checkout@v3
+ with:
+ ref: 'refs/pull/${{ github.event.client_payload.pull_request.number }}/merge'
+ - uses: actions/setup-java@v3
+ with:
+ java-version: "11"
+ distribution: 'adopt'
+ - uses: actions/cache@v3
+ with:
+ path: ~/.m2
+ key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
+ restore-keys: ${{ runner.os }}-m2
+
+ # Note: Accessing, pushing tags etc. to GHCR will only succeed in upstream because secrets.
+ - name: Login to Github Container Registry
+ uses: docker/login-action@v2
+ with:
+ registry: ghcr.io
+ username: ${{ secrets.GHCR_USERNAME }}
+ password: ${{ secrets.GHCR_TOKEN }}
+
+ - name: Set up QEMU for multi-arch builds
+ uses: docker/setup-qemu-action@v2
+
+ # Get the image tag from either the command or default to branch name (Not used for now)
+ #- name: Get the target tag name
+ # id: vars
+ # run: |
+ # tag=${{ github.event.client_payload.slash_command.args.named.tag }}
+ # if [[ -z "$tag" ]]; then tag=$(echo "${{ github.event.client_payload.pull_request.head.ref }}" | tr '\\/_:&+,;#*' '-'); fi
+ # echo "IMAGE_TAG=$tag" >> $GITHUB_ENV
+
+ # Set image tag to branch name of the PR
+ - name: Set image tag to branch name
+ run: |
+ echo "IMAGE_TAG=$(echo "${{ github.event.client_payload.pull_request.head.ref }}" | tr '\\/_:&+,;#*' '-')" >> $GITHUB_ENV
+
+ # Necessary to split as otherwise the submodules are not available (deploy skips install)
+ - name: Build app and configbaker container image with local architecture and submodules (profile will skip tests)
+ run: >
+ mvn -B -f modules/dataverse-parent
+ -P ct -pl edu.harvard.iq:dataverse -am
+ install
+ - name: Deploy multi-arch application and configbaker container image
+ run: >
+ mvn
+ -Dapp.image.tag=${{ env.IMAGE_TAG }} -Dbase.image.tag=${{ env.BASE_IMAGE_TAG }}
+ -Ddocker.registry=ghcr.io -Ddocker.platforms=${{ env.PLATFORMS }}
+ -Pct deploy
+
+ - uses: marocchino/sticky-pull-request-comment@v2
+ with:
+ header: registry-push
+ hide_and_recreate: true
+ hide_classify: "OUTDATED"
+ number: ${{ github.event.client_payload.pull_request.number }}
+ message: |
+ :package: Pushed preview images as
+ ```
+ ghcr.io/gdcc/dataverse:${{ env.IMAGE_TAG }}
+ ```
+ ```
+ ghcr.io/gdcc/configbaker:${{ env.IMAGE_TAG }}
+ ```
+ :ship: [See on GHCR](https://github.com/orgs/gdcc/packages/container). Use by referencing with full name as printed above, mind the registry name.
+
+ # Leave a note when things have gone sideways
+ - uses: peter-evans/create-or-update-comment@v3
+ if: ${{ failure() }}
+ with:
+ issue-number: ${{ github.event.client_payload.pull_request.number }}
+ body: >
+ :package: Could not push preview images :disappointed:.
+ See [log](https://github.com/IQSS/dataverse/actions/runs/${{ github.run_id }}) for details.
diff --git a/.github/workflows/container_app_push.yml b/.github/workflows/container_app_push.yml
new file mode 100644
index 00000000000..c60691b1c85
--- /dev/null
+++ b/.github/workflows/container_app_push.yml
@@ -0,0 +1,167 @@
+---
+name: Application Container Image
+
+on:
+ # We are deliberately *not* running on push events here to avoid double runs.
+ # Instead, push events will trigger from the base image and maven unit tests via workflow_call.
+ workflow_call:
+ pull_request:
+ branches:
+ - develop
+ - master
+ paths:
+ - 'src/main/docker/**'
+ - 'modules/container-configbaker/**'
+ - '.github/workflows/container_app_push.yml'
+
+env:
+ IMAGE_TAG: unstable
+ BASE_IMAGE_TAG: unstable
+ REGISTRY: "" # Empty means default to Docker Hub
+ PLATFORMS: "linux/amd64,linux/arm64"
+ MASTER_BRANCH_TAG: alpha
+
+jobs:
+ build:
+ name: "Build & Test"
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ packages: write
+ pull-requests: write
+ # Only run in upstream repo - avoid unnecessary runs in forks
+ if: ${{ github.repository_owner == 'IQSS' }}
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v3
+
+ - name: Set up JDK 11
+ uses: actions/setup-java@v3
+ with:
+ java-version: "11"
+ distribution: temurin
+ cache: maven
+
+ - name: Build app and configbaker container image with local architecture and submodules (profile will skip tests)
+ run: >
+ mvn -B -f modules/dataverse-parent
+ -P ct -pl edu.harvard.iq:dataverse -am
+ install
+
+ # TODO: add smoke / integration testing here (add "-Pct -DskipIntegrationTests=false")
+
+ hub-description:
+ needs: build
+ name: Push image descriptions to Docker Hub
+ # Run this when triggered via push or schedule as reused workflow from base / maven unit tests.
+ # Excluding PRs here means we will have no trouble with secrets access. Also avoid runs in forks.
+ if: ${{ github.event_name != 'pull_request' && github.ref_name == 'develop' && github.repository_owner == 'IQSS' }}
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - uses: peter-evans/dockerhub-description@v3
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+ repository: gdcc/dataverse
+ short-description: "Dataverse Application Container Image providing the executable"
+ readme-filepath: ./src/main/docker/README.md
+ - uses: peter-evans/dockerhub-description@v3
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+ repository: gdcc/configbaker
+ short-description: "Dataverse Config Baker Container Image providing setup tooling and more"
+ readme-filepath: ./modules/container-configbaker/README.md
+
+ # Note: Accessing, pushing tags etc. to DockerHub or GHCR will only succeed in upstream because secrets.
+ # We check for them here and subsequent jobs can rely on this to decide if they shall run.
+ check-secrets:
+ needs: build
+ name: Check for Secrets Availability
+ runs-on: ubuntu-latest
+ outputs:
+ available: ${{ steps.secret-check.outputs.available }}
+ steps:
+ - id: secret-check
+ # perform secret check & put boolean result as an output
+ shell: bash
+ run: |
+ if [ "${{ secrets.DOCKERHUB_TOKEN }}" != '' ]; then
+ echo "available=true" >> $GITHUB_OUTPUT;
+ else
+ echo "available=false" >> $GITHUB_OUTPUT;
+ fi
+
+ deploy:
+ needs: check-secrets
+ name: "Package & Publish"
+ runs-on: ubuntu-latest
+ # Only run this job if we have access to secrets. This is true for events like push/schedule which run in
+ # context of main repo, but for PRs only true if coming from the main repo! Forks have no secret access.
+ if: needs.check-secrets.outputs.available == 'true'
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-java@v3
+ with:
+ java-version: "11"
+ distribution: temurin
+
+ # Depending on context, we push to different targets. Login accordingly.
+ - if: ${{ github.event_name != 'pull_request' }}
+ name: Log in to Docker Hub registry
+ uses: docker/login-action@v2
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+ - if: ${{ github.event_name == 'pull_request' }}
+ name: Login to Github Container Registry
+ uses: docker/login-action@v2
+ with:
+ registry: ghcr.io
+ username: ${{ secrets.GHCR_USERNAME }}
+ password: ${{ secrets.GHCR_TOKEN }}
+
+ - name: Set up QEMU for multi-arch builds
+ uses: docker/setup-qemu-action@v2
+
+ - name: Re-set image tag based on branch (if master)
+ if: ${{ github.ref_name == 'master' }}
+ run: |
+ echo "IMAGE_TAG=${{ env.MASTER_BRANCH_TAG }}" >> $GITHUB_ENV
+ echo "BASE_IMAGE_TAG=${{ env.MASTER_BRANCH_TAG }}" >> $GITHUB_ENV
+ - name: Re-set image tag and container registry when on PR
+ if: ${{ github.event_name == 'pull_request' }}
+ run: |
+ echo "IMAGE_TAG=$(echo "$GITHUB_HEAD_REF" | tr '\\/_:&+,;#*' '-')" >> $GITHUB_ENV
+ echo "REGISTRY='-Ddocker.registry=ghcr.io'" >> $GITHUB_ENV
+
+ # Necessary to split as otherwise the submodules are not available (deploy skips install)
+ - name: Build app and configbaker container image with local architecture and submodules (profile will skip tests)
+ run: >
+ mvn -B -f modules/dataverse-parent
+ -P ct -pl edu.harvard.iq:dataverse -am
+ install
+ - name: Deploy multi-arch application and configbaker container image
+ run: >
+ mvn
+ -Dapp.image.tag=${{ env.IMAGE_TAG }} -Dbase.image.tag=${{ env.BASE_IMAGE_TAG }}
+ ${{ env.REGISTRY }} -Ddocker.platforms=${{ env.PLATFORMS }}
+ -P ct deploy
+
+ - uses: marocchino/sticky-pull-request-comment@v2
+ if: ${{ github.event_name == 'pull_request' }}
+ with:
+ header: registry-push
+ hide_and_recreate: true
+ hide_classify: "OUTDATED"
+ message: |
+ :package: Pushed preview images as
+ ```
+ ghcr.io/gdcc/dataverse:${{ env.IMAGE_TAG }}
+ ```
+ ```
+ ghcr.io/gdcc/configbaker:${{ env.IMAGE_TAG }}
+ ```
+ :ship: [See on GHCR](https://github.com/orgs/gdcc/packages/container). Use by referencing with full name as printed above, mind the registry name.
diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml
index 8f440151d0c..5c62fb0c811 100644
--- a/.github/workflows/container_base_push.yml
+++ b/.github/workflows/container_base_push.yml
@@ -1,5 +1,5 @@
---
-name: Container Base Module
+name: Base Container Image
on:
push:
@@ -18,9 +18,12 @@ on:
- 'modules/container-base/**'
- 'modules/dataverse-parent/pom.xml'
- '.github/workflows/container_base_push.yml'
+ schedule:
+ - cron: '23 3 * * 0' # Run for 'develop' every Sunday at 03:23 UTC
env:
IMAGE_TAG: unstable
+ PLATFORMS: linux/amd64,linux/arm64
jobs:
build:
@@ -79,7 +82,18 @@ jobs:
uses: docker/setup-qemu-action@v2
- name: Re-set image tag based on branch
if: ${{ github.ref_name == 'master' }}
- run: echo "IMAGE_TAG=stable"
+ run: echo "IMAGE_TAG=alpha" >> $GITHUB_ENV
- if: ${{ github.event_name != 'pull_request' }}
name: Deploy multi-arch base container image to Docker Hub
- run: mvn -f modules/container-base -Pct deploy -Dbase.image.tag=${{ env.IMAGE_TAG }}
+ run: mvn -f modules/container-base -Pct deploy -Dbase.image.tag=${{ env.IMAGE_TAG }} -Ddocker.platforms=${{ env.PLATFORMS }}
+ push-app-img:
+ name: "Rebase & Publish App Image"
+ permissions:
+ contents: read
+ packages: write
+ pull-requests: write
+ needs: build
+ # We do not release a new base image for pull requests, so do not trigger.
+ if: ${{ github.event_name != 'pull_request' }}
+ uses: ./.github/workflows/container_app_push.yml
+ secrets: inherit
diff --git a/.github/workflows/deploy_beta_testing.yml b/.github/workflows/deploy_beta_testing.yml
new file mode 100644
index 00000000000..3e67bfe426e
--- /dev/null
+++ b/.github/workflows/deploy_beta_testing.yml
@@ -0,0 +1,80 @@
+name: 'Deploy to Beta Testing'
+
+on:
+ push:
+ branches:
+ - develop
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ environment: beta-testing
+
+ steps:
+ - uses: actions/checkout@v3
+
+ - uses: actions/setup-java@v3
+ with:
+ distribution: 'zulu'
+ java-version: '11'
+
+ - name: Build application war
+ run: mvn package
+
+ - name: Get war file name
+ working-directory: target
+ run: echo "war_file=$(ls *.war | head -1)">> $GITHUB_ENV
+
+ - name: Upload war artifact
+ uses: actions/upload-artifact@v3
+ with:
+ name: built-app
+ path: ./target/${{ env.war_file }}
+
+ deploy-to-payara:
+ needs: build
+ runs-on: ubuntu-latest
+ environment: beta-testing
+
+ steps:
+ - uses: actions/checkout@v3
+
+ - name: Download war artifact
+ uses: actions/download-artifact@v3
+ with:
+ name: built-app
+ path: ./
+
+ - name: Get war file name
+ run: echo "war_file=$(ls *.war | head -1)">> $GITHUB_ENV
+
+ - name: Copy war file to remote instance
+ uses: appleboy/scp-action@master
+ with:
+ host: ${{ secrets.PAYARA_INSTANCE_HOST }}
+ username: ${{ secrets.PAYARA_INSTANCE_USERNAME }}
+ key: ${{ secrets.PAYARA_INSTANCE_SSH_PRIVATE_KEY }}
+ source: './${{ env.war_file }}'
+ target: '/home/${{ secrets.PAYARA_INSTANCE_USERNAME }}'
+ overwrite: true
+
+ - name: Execute payara war deployment remotely
+ uses: appleboy/ssh-action@v1.0.0
+ env:
+ INPUT_WAR_FILE: ${{ env.war_file }}
+ with:
+ host: ${{ secrets.PAYARA_INSTANCE_HOST }}
+ username: ${{ secrets.PAYARA_INSTANCE_USERNAME }}
+ key: ${{ secrets.PAYARA_INSTANCE_SSH_PRIVATE_KEY }}
+ envs: INPUT_WAR_FILE
+ script: |
+ APPLICATION_NAME=dataverse-backend
+ ASADMIN='/usr/local/payara5/bin/asadmin --user admin'
+ $ASADMIN undeploy $APPLICATION_NAME
+ $ASADMIN stop-domain
+ rm -rf /usr/local/payara5/glassfish/domains/domain1/generated
+ rm -rf /usr/local/payara5/glassfish/domains/domain1/osgi-cache
+ $ASADMIN start-domain
+ $ASADMIN deploy --name $APPLICATION_NAME $INPUT_WAR_FILE
+ $ASADMIN stop-domain
+ $ASADMIN start-domain
diff --git a/.github/workflows/maven_unit_test.yml b/.github/workflows/maven_unit_test.yml
index e2048f73431..45beabf3193 100644
--- a/.github/workflows/maven_unit_test.yml
+++ b/.github/workflows/maven_unit_test.yml
@@ -6,11 +6,15 @@ on:
- "**.java"
- "pom.xml"
- "modules/**/pom.xml"
+ - "!modules/container-base/**"
+ - "!modules/dataverse-spi/**"
pull_request:
paths:
- "**.java"
- "pom.xml"
- "modules/**/pom.xml"
+ - "!modules/container-base/**"
+ - "!modules/dataverse-spi/**"
jobs:
unittest:
@@ -33,22 +37,43 @@ jobs:
continue-on-error: ${{ matrix.experimental }}
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- name: Set up JDK ${{ matrix.jdk }}
- uses: actions/setup-java@v2
+ uses: actions/setup-java@v3
with:
java-version: ${{ matrix.jdk }}
- distribution: 'adopt'
- - name: Cache Maven packages
- uses: actions/cache@v2
- with:
- path: ~/.m2
- key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
- restore-keys: ${{ runner.os }}-m2
+ distribution: temurin
+ cache: maven
+
+ # The reason why we use "install" here is that we want the submodules to be available in the next step.
+ # Also, we can cache them this way for jobs triggered by this one.
- name: Build with Maven
- run: mvn -DcompilerArgument=-Xlint:unchecked -Dtarget.java.version=${{ matrix.jdk }} -P all-unit-tests clean test
+ run: >
+ mvn -B -f modules/dataverse-parent
+ -Dtarget.java.version=${{ matrix.jdk }}
+ -DcompilerArgument=-Xlint:unchecked -P all-unit-tests
+ -pl edu.harvard.iq:dataverse -am
+ install
+
- name: Maven Code Coverage
env:
CI_NAME: github
COVERALLS_SECRET: ${{ secrets.GITHUB_TOKEN }}
- run: mvn -V -B jacoco:report coveralls:report -DrepoToken=${COVERALLS_SECRET} -DpullRequest=${{ github.event.number }}
\ No newline at end of file
+ # The coverage commit is sometimes flaky. Don't bail out just because this optional step failed.
+ continue-on-error: true
+ run: >
+ mvn -B
+ -DrepoToken=${COVERALLS_SECRET} -DpullRequest=${{ github.event.number }}
+ jacoco:report coveralls:report
+
+ # We don't want to cache the WAR file, so delete it
+ - run: rm -rf ~/.m2/repository/edu/harvard/iq/dataverse
+ push-app-img:
+ name: Publish App Image
+ permissions:
+ contents: read
+ packages: write
+ pull-requests: write
+ needs: unittest
+ uses: ./.github/workflows/container_app_push.yml
+ secrets: inherit
diff --git a/.github/workflows/pr_comment_commands.yml b/.github/workflows/pr_comment_commands.yml
new file mode 100644
index 00000000000..5ff75def623
--- /dev/null
+++ b/.github/workflows/pr_comment_commands.yml
@@ -0,0 +1,20 @@
+name: PR Comment Commands
+on:
+ issue_comment:
+ types: [created]
+jobs:
+ dispatch:
+ # Avoid being triggered by forks in upstream
+ if: ${{ github.repository_owner == 'IQSS' }}
+ runs-on: ubuntu-latest
+ steps:
+ - name: Dispatch
+ uses: peter-evans/slash-command-dispatch@v3
+ with:
+ # This token belongs to @dataversebot and has sufficient scope.
+ token: ${{ secrets.GHCR_TOKEN }}
+ commands: |
+ push-image
+ repository: IQSS/dataverse
+ # Commenter must have at least write permission to repo to trigger dispatch
+ permission: write
diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml
index 2d910f54127..94ba041e135 100644
--- a/.github/workflows/shellcheck.yml
+++ b/.github/workflows/shellcheck.yml
@@ -1,19 +1,27 @@
name: "Shellcheck"
on:
push:
+ branches:
+ - develop
paths:
- - conf/solr/**
- - modules/container-base/**
+ - conf/solr/**/.sh
+ - modules/container-base/**/*.sh
+ - modules/container-configbaker/**/*.sh
pull_request:
+ branches:
+ - develop
paths:
- - conf/solr/**
- - modules/container-base/**
+ - conf/solr/**/*.sh
+ - modules/container-base/**/*.sh
+ - modules/container-configbaker/**/*.sh
jobs:
shellcheck:
name: Shellcheck
runs-on: ubuntu-latest
+ permissions:
+ pull-requests: write
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- name: shellcheck
uses: reviewdog/action-shellcheck@v1
with:
@@ -21,4 +29,19 @@ jobs:
reporter: github-pr-review # Change reporter.
fail_on_error: true
# Container base image uses dumb-init shebang, so nail to using bash
- shellcheck_flags: "--shell=bash --external-sources"
\ No newline at end of file
+ shellcheck_flags: "--shell=bash --external-sources"
+ # Exclude old scripts
+ exclude: |
+ */.git/*
+ conf/docker-aio/*
+ doc/*
+ downloads/*
+ scripts/database/*
+ scripts/globalid/*
+ scripts/icons/*
+ scripts/installer/*
+ scripts/issues/*
+ scripts/r/*
+ scripts/tests/*
+ scripts/vagrant/*
+ tests/*
diff --git a/.github/workflows/spi_release.yml b/.github/workflows/spi_release.yml
new file mode 100644
index 00000000000..1fbf05ce693
--- /dev/null
+++ b/.github/workflows/spi_release.yml
@@ -0,0 +1,94 @@
+name: Dataverse SPI
+
+on:
+ push:
+ branch:
+ - "develop"
+ paths:
+ - "modules/dataverse-spi/**"
+ pull_request:
+ branch:
+ - "develop"
+ paths:
+ - "modules/dataverse-spi/**"
+
+jobs:
+ # Note: Pushing packages to Maven Central requires access to secrets, which pull requests from remote forks
+ # don't have. Skip in these cases.
+ check-secrets:
+ name: Check for Secrets Availability
+ runs-on: ubuntu-latest
+ outputs:
+ available: ${{ steps.secret-check.outputs.available }}
+ steps:
+ - id: secret-check
+ # perform secret check & put boolean result as an output
+ shell: bash
+ run: |
+ if [ "${{ secrets.DATAVERSEBOT_SONATYPE_USERNAME }}" != '' ]; then
+ echo "available=true" >> $GITHUB_OUTPUT;
+ else
+ echo "available=false" >> $GITHUB_OUTPUT;
+ fi
+
+ snapshot:
+ name: Release Snapshot
+ needs: check-secrets
+ runs-on: ubuntu-latest
+ if: github.event_name == 'pull_request' && needs.check-secrets.outputs.available == 'true'
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-java@v3
+ with:
+ java-version: '11'
+ distribution: 'adopt'
+ server-id: ossrh
+ server-username: MAVEN_USERNAME
+ server-password: MAVEN_PASSWORD
+ - uses: actions/cache@v2
+ with:
+ path: ~/.m2
+ key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
+ restore-keys: ${{ runner.os }}-m2
+
+ - name: Deploy Snapshot
+ run: mvn -f modules/dataverse-spi -Dproject.version.suffix="-PR${{ github.event.number }}-SNAPSHOT" deploy
+ env:
+ MAVEN_USERNAME: ${{ secrets.DATAVERSEBOT_SONATYPE_USERNAME }}
+ MAVEN_PASSWORD: ${{ secrets.DATAVERSEBOT_SONATYPE_TOKEN }}
+
+ release:
+ name: Release
+ needs: check-secrets
+ runs-on: ubuntu-latest
+ if: github.event_name == 'push' && needs.check-secrets.outputs.available == 'true'
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-java@v3
+ with:
+ java-version: '11'
+ distribution: 'adopt'
+ - uses: actions/cache@v2
+ with:
+ path: ~/.m2
+ key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
+ restore-keys: ${{ runner.os }}-m2
+
+ # Running setup-java again overwrites the settings.xml - IT'S MANDATORY TO DO THIS SECOND SETUP!!!
+ - name: Set up Maven Central Repository
+ uses: actions/setup-java@v3
+ with:
+ java-version: '11'
+ distribution: 'adopt'
+ server-id: ossrh
+ server-username: MAVEN_USERNAME
+ server-password: MAVEN_PASSWORD
+ gpg-private-key: ${{ secrets.DATAVERSEBOT_GPG_KEY }}
+ gpg-passphrase: MAVEN_GPG_PASSPHRASE
+
+ - name: Sign + Publish Release
+ run: mvn -f modules/dataverse-spi -P release deploy
+ env:
+ MAVEN_USERNAME: ${{ secrets.DATAVERSEBOT_SONATYPE_USERNAME }}
+ MAVEN_PASSWORD: ${{ secrets.DATAVERSEBOT_SONATYPE_TOKEN }}
+ MAVEN_GPG_PASSPHRASE: ${{ secrets.DATAVERSEBOT_GPG_PASSWORD }}
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 83671abf43e..d38538fc364 100644
--- a/.gitignore
+++ b/.gitignore
@@ -73,3 +73,6 @@ src/main/webapp/resources/images/dataverseproject.png.thumb140
# apache-maven is downloaded by docker-aio
apache-maven*
+
+# Docker development volumes
+/docker-dev-volumes
diff --git a/.readthedocs.yml b/.readthedocs.yml
new file mode 100644
index 00000000000..cadaedc1448
--- /dev/null
+++ b/.readthedocs.yml
@@ -0,0 +1,21 @@
+version: 2
+
+# HTML is always built, these are additional formats only
+formats:
+ - pdf
+
+build:
+ os: ubuntu-22.04
+ tools:
+ python: "3.10"
+ apt_packages:
+ - graphviz
+
+python:
+ install:
+ - requirements: doc/sphinx-guides/requirements.txt
+
+
+sphinx:
+ configuration: doc/sphinx-guides/source/conf.py
+ fail_on_warning: true
diff --git a/conf/docker-aio/readme.md b/conf/docker-aio/readme.md
index ef4d3626cf0..f3031a5bb6e 100644
--- a/conf/docker-aio/readme.md
+++ b/conf/docker-aio/readme.md
@@ -1,5 +1,9 @@
# Docker All-In-One
+> :information_source: **NOTE: Sunsetting of this module is imminent.** There is no schedule yet, but expect it to go away.
+> Please let the [Dataverse Containerization Working Group](https://ct.gdcc.io) know if you are a user and
+> what should be preserved.
+
First pass docker all-in-one image, intended for running integration tests against.
Also usable for normal development and system evaluation; not intended for production.
diff --git a/conf/keycloak/oidc-keycloak-auth-provider.json b/conf/keycloak/oidc-keycloak-auth-provider.json
index bc70640212d..7d09fe5f36e 100644
--- a/conf/keycloak/oidc-keycloak-auth-provider.json
+++ b/conf/keycloak/oidc-keycloak-auth-provider.json
@@ -3,6 +3,6 @@
"factoryAlias": "oidc",
"title": "OIDC-Keycloak",
"subtitle": "OIDC-Keycloak",
- "factoryData": "type: oidc | issuer: http://localhost:8090/auth/realms/oidc-realm | clientId: oidc-client | clientSecret: ss6gE8mODCDfqesQaSG3gwUwZqZt547E",
+ "factoryData": "type: oidc | issuer: http://keycloak.mydomain.com:8090/realms/oidc-realm | clientId: oidc-client | clientSecret: ss6gE8mODCDfqesQaSG3gwUwZqZt547E",
"enabled": true
}
diff --git a/conf/solr/8.11.1/schema.xml b/conf/solr/8.11.1/schema.xml
index f11938621fc..ceff082f418 100644
--- a/conf/solr/8.11.1/schema.xml
+++ b/conf/solr/8.11.1/schema.xml
@@ -233,6 +233,9 @@
+
+
+
+
+
+ ${docker.platforms}
${project.build.directory}/buildx-state
diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index 07968e92359..bbd02a14328 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -190,6 +190,9 @@ RUN <
+
+
+
+ modules/container-configbaker/scripts
+ scripts
+
+
+
+ conf/solr/8.11.1
+ solr
+
+
+
+ scripts/api
+ setup
+
+ setup-all.sh
+ setup-builtin-roles.sh
+ setup-datasetfields.sh
+ setup-identity-providers.sh
+
+ data/licenses/*.json
+ data/authentication-providers/builtin.json
+ data/metadatablocks/*.tsv
+
+ data/dv-root.json
+
+ data/role-admin.json
+ data/role-curator.json
+ data/role-dsContributor.json
+ data/role-dvContributor.json
+ data/role-editor.json
+ data/role-filedownloader.json
+ data/role-fullContributor.json
+ data/role-member.json
+
+ data/user-admin.json
+
+
+ data/metadatablocks/custom*
+
+
+
+
\ No newline at end of file
diff --git a/modules/container-configbaker/scripts/bootstrap.sh b/modules/container-configbaker/scripts/bootstrap.sh
new file mode 100644
index 00000000000..1aa9e232953
--- /dev/null
+++ b/modules/container-configbaker/scripts/bootstrap.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+# [INFO]: Execute bootstrapping configuration of a freshly baked instance
+
+set -euo pipefail
+
+function usage() {
+ echo "Usage: $(basename "$0") [-h] [-u instanceUrl] [-t timeout] []"
+ echo ""
+ echo "Execute initial configuration (bootstrapping) of an empty Dataverse instance."
+ echo -n "Known personas: "
+ find "${BOOTSTRAP_DIR}" -mindepth 1 -maxdepth 1 -type d -exec basename {} \; | paste -sd ' '
+ echo ""
+ echo "Parameters:"
+ echo "instanceUrl - Location on container network where to reach your instance. Default: 'http://dataverse:8080'"
+ echo " timeout - Provide how long to wait for the instance to become available (using wait4x). Default: '2m'"
+ echo " persona - Configure persona to execute. Calls ${BOOTSTRAP_DIR}//init.sh. Default: 'base'"
+ echo ""
+ echo "Note: This script will wait for the Dataverse instance to be available before executing the bootstrapping."
+ echo " It also checks if already bootstrapped before (availability of metadata blocks) and skip if true."
+ echo ""
+ exit 1
+}
+
+# Set some defaults as documented
+DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"}
+TIMEOUT=${TIMEOUT:-"2m"}
+
+while getopts "u:t:h" OPTION
+do
+ case "$OPTION" in
+ u) DATAVERSE_URL="$OPTARG" ;;
+ t) TIMEOUT="$OPTARG" ;;
+ h) usage;;
+ \?) usage;;
+ esac
+done
+shift $((OPTIND-1))
+
+# Assign persona if present or go default
+PERSONA=${1:-"base"}
+
+# Export the URL to be reused in the actual setup scripts
+export DATAVERSE_URL
+
+# Wait for the instance to become available
+echo "Waiting for ${DATAVERSE_URL} to become ready in max ${TIMEOUT}."
+wait4x http "${DATAVERSE_URL}/api/info/version" -i 8s -t "$TIMEOUT" --expect-status-code 200 --expect-body-json data.version
+
+# Avoid bootstrapping again by checking if a metadata block has been loaded
+BLOCK_COUNT=$(curl -sSf "${DATAVERSE_URL}/api/metadatablocks" | jq ".data | length")
+if [[ $BLOCK_COUNT -gt 0 ]]; then
+ echo "Your instance has been bootstrapped already, skipping."
+ exit 0
+fi
+
+# Now execute the bootstrapping script
+echo "Now executing bootstrapping script at ${BOOTSTRAP_DIR}/${PERSONA}/init.sh."
+exec "${BOOTSTRAP_DIR}/${PERSONA}/init.sh"
diff --git a/modules/container-configbaker/scripts/bootstrap/base/init.sh b/modules/container-configbaker/scripts/bootstrap/base/init.sh
new file mode 100644
index 00000000000..81c2b59f347
--- /dev/null
+++ b/modules/container-configbaker/scripts/bootstrap/base/init.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -euo pipefail
+
+# Set some defaults as documented
+DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"}
+export DATAVERSE_URL
+
+./setup-all.sh
diff --git a/modules/container-configbaker/scripts/bootstrap/dev/init.sh b/modules/container-configbaker/scripts/bootstrap/dev/init.sh
new file mode 100644
index 00000000000..1042478963d
--- /dev/null
+++ b/modules/container-configbaker/scripts/bootstrap/dev/init.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+set -euo pipefail
+
+# Set some defaults as documented
+DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"}
+export DATAVERSE_URL
+
+echo "Running base setup-all.sh (INSECURE MODE)..."
+"${BOOTSTRAP_DIR}"/base/setup-all.sh --insecure -p=admin1 | tee /tmp/setup-all.sh.out
+
+echo "Setting system mail address..."
+curl -X PUT -d "dataverse@localhost" "${DATAVERSE_URL}/api/admin/settings/:SystemEmail"
+
+echo "Setting DOI provider to \"FAKE\"..."
+curl "${DATAVERSE_URL}/api/admin/settings/:DoiProvider" -X PUT -d FAKE
+
+API_TOKEN=$(grep apiToken "/tmp/setup-all.sh.out" | jq ".data.apiToken" | tr -d \")
+export API_TOKEN
+
+echo "Publishing root dataverse..."
+curl -H "X-Dataverse-key:$API_TOKEN" -X POST "${DATAVERSE_URL}/api/dataverses/:root/actions/:publish"
+
+echo "Allowing users to create dataverses and datasets in root..."
+curl -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-type:application/json" -d "{\"assignee\": \":authenticated-users\",\"role\": \"fullContributor\"}" "${DATAVERSE_URL}/api/dataverses/:root/assignments"
+
+echo "Checking Dataverse version..."
+curl "${DATAVERSE_URL}/api/info/version"
+
+echo ""
+echo "Done, your instance has been configured for development. Have a nice day!"
diff --git a/modules/container-configbaker/scripts/fix-fs-perms.sh b/modules/container-configbaker/scripts/fix-fs-perms.sh
new file mode 100644
index 00000000000..9ce8f475d70
--- /dev/null
+++ b/modules/container-configbaker/scripts/fix-fs-perms.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# [INFO]: Fix folder permissions using 'chown' to be writeable by containers not running as root.
+
+set -euo pipefail
+
+if [[ "$(id -un)" != "root" ]]; then
+ echo "This script must be run as user root (not $(id -un)), otherwise no fix is possible."
+fi
+
+DEF_DV_PATH="/dv"
+DEF_SOLR_PATH="/var/solr"
+DEF_DV_UID="1000"
+DEF_SOLR_UID="8983"
+
+function usage() {
+ echo "Usage: $(basename "$0") (dv|solr|[1-9][0-9]{3,4}) [PATH [PATH [...]]]"
+ echo ""
+ echo "You may omit a path when using 'dv' or 'solr' as first argument:"
+ echo " - 'dv' will default to user $DEF_DV_UID and $DEF_DV_PATH"
+ echo " - 'solr' will default to user $DEF_SOLR_UID and $DEF_SOLR_PATH"
+ exit 1
+}
+
+# Get a target name or id
+TARGET=${1:-help}
+# Get the rest of the arguments as paths to apply the fix to
+PATHS=( "${@:2}" )
+
+ID=0
+case "$TARGET" in
+ dv)
+ ID="$DEF_DV_UID"
+ # If there is no path, add the default for our app image
+ if [[ ${#PATHS[@]} -eq 0 ]]; then
+ PATHS=( "$DEF_DV_PATH" )
+ fi
+ ;;
+ solr)
+ ID="$DEF_SOLR_UID"
+ # In case there is no path, add the default path for Solr images
+ if [[ ${#PATHS[@]} -eq 0 ]]; then
+ PATHS=( "$DEF_SOLR_PATH" )
+ fi
+ ;;
+ # If there is a digit in the argument, check if this is a valid UID (>= 1000, ...)
+ *[[:digit:]]* )
+ echo "$TARGET" | grep -q "^[1-9][0-9]\{3,4\}$" || usage
+ ID="$TARGET"
+ ;;
+ *)
+ usage
+ ;;
+esac
+
+# Check that we actually have at least 1 path
+if [[ ${#PATHS[@]} -eq 0 ]]; then
+ usage
+fi
+
+# Do what we came for
+chown -R "$ID:$ID" "${PATHS[@]}"
diff --git a/modules/container-configbaker/scripts/help.sh b/modules/container-configbaker/scripts/help.sh
new file mode 100644
index 00000000000..744ec8c8b4c
--- /dev/null
+++ b/modules/container-configbaker/scripts/help.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+set -euo pipefail
+
+# [INFO]: This script.
+
+# This is the Dataverse logo in ASCII
+# shellcheck disable=SC2016
+echo -e ' â•“mαo\n â•« jh\n `%╥æ╨\n ╫µ\n â•“@M%â•—,\n â–“` â•«U\n ▓² â•«â•›\n â–“M#Mâ•"\n ڑMâ•â•%φ╫┘\n┌╫" "â•«â”\nâ–“ â–“\nâ–“ â–“\n`╫µ ¿╫"\n "â•œ%%MMâ•œ`'
+echo ""
+echo "Hello!"
+echo ""
+echo "My name is Config Baker. I'm a container image with lots of tooling to 'bake' a containerized Dataverse instance!"
+echo "I can cook up an instance (initial config), put icing on your Solr search index configuration, and more!"
+echo ""
+echo "Here's a list of things I can do for you:"
+
+# Get the longest name length
+LENGTH=1
+for SCRIPT in "${SCRIPT_DIR}"/*.sh; do
+ L="$(basename "$SCRIPT" | wc -m)"
+ if [ "$L" -gt "$LENGTH" ]; then
+ LENGTH="$L"
+ fi
+done
+
+# Print script names and info, but formatted
+for SCRIPT in "${SCRIPT_DIR}"/*.sh; do
+ printf "%${LENGTH}s - " "$(basename "$SCRIPT")"
+ grep "# \[INFO\]: " "$SCRIPT" | sed -e "s|# \[INFO\]: ||"
+done
+
+echo ""
+echo "Simply execute this container with the script name (and potentially arguments) as 'command'."
diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index d85d8aed5a1..05f7874d31c 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -14,6 +14,7 @@
../../pom.xml
../../scripts/zipdownload
../container-base
+ ../dataverse-spi
- 5.13
+ 5.14
11
UTF-8
@@ -186,10 +187,19 @@
3.0.0-M5
3.0.0-M5
3.3.0
+ 3.0.0-M7
+ 3.0.1
+ 4.0.0-M4
+ 3.2.1
+ 3.4.1
+ 1.3.0
+
3.1.2
+ 1.6.13
+ 1.7.0
- 0.40.2
+ 0.43.0
@@ -262,6 +272,46 @@
docker-maven-plugin
${fabric8-dmp.version}
+
+ org.apache.maven.plugins
+ maven-site-plugin
+ ${maven-site-plugin.version}
+
+
+ org.apache.maven.plugins
+ maven-source-plugin
+ ${maven-source-plugin.version}
+
+
+ org.apache.maven.plugins
+ maven-javadoc-plugin
+ ${maven-javadoc-plugin.version}
+
+
+ org.apache.maven.plugins
+ maven-gpg-plugin
+ ${maven-gpg-plugin.version}
+
+
+ org.codehaus.mojo
+ flatten-maven-plugin
+ ${maven-flatten-plugin.version}
+
+
+ org.kordamp.maven
+ pomchecker-maven-plugin
+ ${pomchecker-maven-plugin.version}
+
+
+ org.sonatype.plugins
+ nexus-staging-maven-plugin
+ ${nexus-staging-plugin.version}
+
+
+ org.apache.maven.plugins
+ maven-release-plugin
+ ${maven-release-plugin.version}
+
@@ -345,8 +395,9 @@
- 5.2022.4
+ 5.2022.5
diff --git a/modules/dataverse-spi/.gitignore b/modules/dataverse-spi/.gitignore
new file mode 100644
index 00000000000..d75620abf70
--- /dev/null
+++ b/modules/dataverse-spi/.gitignore
@@ -0,0 +1 @@
+.flattened-pom.xml
diff --git a/modules/dataverse-spi/pom.xml b/modules/dataverse-spi/pom.xml
new file mode 100644
index 00000000000..6235d309e89
--- /dev/null
+++ b/modules/dataverse-spi/pom.xml
@@ -0,0 +1,238 @@
+
+
+ 4.0.0
+
+
+ edu.harvard.iq
+ dataverse-parent
+ ${revision}
+ ../dataverse-parent
+
+
+ io.gdcc
+ dataverse-spi
+ 1.0.0${project.version.suffix}
+ jar
+
+ Dataverse SPI Plugin API
+ https://dataverse.org
+
+ A package to create out-of-tree Java code for Dataverse Software. Plugin projects can use this package
+ as an API dependency just like Jakarta EE APIs if they want to create external plugins. These will be loaded
+ at runtime of a Dataverse installation using SPI. See also https://guides.dataverse.org/en/latest/developers
+ for more information.
+
+
+
+
+ Apache-2.0
+ https://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+ Dataverse Core Team
+ support@dataverse.org
+
+
+
+
+ https://github.com/IQSS/dataverse/issues
+ GitHub Issues
+
+
+
+ scm:git:git@github.com:IQSS/dataverse.git
+ scm:git:git@github.com:IQSS/dataverse.git
+ git@github.com:IQSS/dataverse.git
+ HEAD
+
+
+
+ https://github.com/IQSS/dataverse/actions
+ github
+
+
+ dataversebot@gdcc.io
+
+
+
+
+
+
+ ossrh
+ https://s01.oss.sonatype.org/content/repositories/snapshots
+
+
+ ossrh
+ https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/
+
+
+
+
+
+
+ none
+ false
+
+
+
+
+ jakarta.json
+ jakarta.json-api
+ provided
+
+
+
+ jakarta.ws.rs
+ jakarta.ws.rs-api
+ provided
+
+
+
+
+
+
+
+ maven-compiler-plugin
+
+ ${target.java.version}
+
+
+
+
+
+ org.sonatype.plugins
+ nexus-staging-maven-plugin
+ true
+
+ ossrh
+ https://s01.oss.sonatype.org
+ true
+
+
+
+ org.apache.maven.plugins
+ maven-release-plugin
+
+ false
+ release
+ true
+ deploy
+
+
+
+ org.codehaus.mojo
+ flatten-maven-plugin
+
+ true
+ oss
+
+ remove
+ remove
+
+
+
+
+
+ flatten
+ process-resources
+
+ flatten
+
+
+
+
+ flatten.clean
+ clean
+
+ clean
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-deploy-plugin
+
+ ${skipDeploy}
+
+
+
+
+
+
+
+ release
+
+
+
+ org.apache.maven.plugins
+ maven-gpg-plugin
+
+
+ sign-artifacts
+ verify
+
+ sign
+
+
+
+
+
+ org.kordamp.maven
+ pomchecker-maven-plugin
+
+
+ process-resources
+
+ check-maven-central
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-javadoc-plugin
+
+
+ attach-javadocs
+
+ jar
+
+
+
+
+
+ false
+ ${javadoc.lint}
+
+
+
+ org.apache.maven.plugins
+ maven-source-plugin
+
+
+ attach-sources
+
+ jar
+
+
+
+
+
+
+
+
+ ct
+
+ true
+
+
+
+
diff --git a/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportDataProvider.java b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportDataProvider.java
new file mode 100644
index 00000000000..228992c8288
--- /dev/null
+++ b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportDataProvider.java
@@ -0,0 +1,96 @@
+package io.gdcc.spi.export;
+
+import java.io.InputStream;
+import java.util.Optional;
+
+import javax.json.JsonArray;
+import javax.json.JsonObject;
+
+/**
+ * Provides all the metadata Dataverse has about a given dataset that can then
+ * be used by an @see Exporter to create a new metadata export format.
+ *
+ */
+public interface ExportDataProvider {
+
+ /**
+ * @return - dataset metadata in the standard Dataverse JSON format used in the
+ * API and available as the JSON metadata export via the user interface.
+ * @apiNote - there is no JSON schema defining this output, but the format is
+ * well documented in the Dataverse online guides. This, and the
+ * OAI_ORE export are the only two that provide 'complete'
+ * dataset-level metadata along with basic file metadata for each file
+ * in the dataset.
+ */
+ JsonObject getDatasetJson();
+
+ /**
+ *
+ * @return - dataset metadata in the JSON-LD based OAI_ORE format used in
+ * Dataverse's archival bag export mechanism and as available in the
+ * user interface and by API.
+ * @apiNote - THis, and the JSON format are the only two that provide complete
+ * dataset-level metadata along with basic file metadata for each file
+ * in the dataset.
+ */
+ JsonObject getDatasetORE();
+
+ /**
+ * Dataverse is capable of extracting DDI-centric metadata from tabular
+ * datafiles. This detailed metadata, which is only available for successfully
+ * "ingested" tabular files, is not included in the output of any other methods
+ * in this interface.
+ *
+ * @return - a JSONArray with one entry per ingested tabular dataset file.
+ * @apiNote - there is no JSON schema available for this output and the format
+ * is not well documented. Implementers may wish to expore the @see
+ * edu.harvard.iq.dataverse.export.DDIExporter and the @see
+ * edu.harvard.iq.dataverse.util.json.JSONPrinter classes where this
+ * output is used/generated (respectively).
+ */
+ JsonArray getDatasetFileDetails();
+
+ /**
+ *
+ * @return - the subset of metadata conforming to the schema.org standard as
+ * available in the user interface and as included as header metadata in
+ * dataset pages (for use by search engines)
+ * @apiNote - as this metadata export is not complete, it should only be used as
+ * a starting point for an Exporter if it simplifies your exporter
+ * relative to using the JSON or OAI_ORE exports.
+ */
+ JsonObject getDatasetSchemaDotOrg();
+
+ /**
+ *
+ * @return - the subset of metadata conforming to the DataCite standard as
+ * available in the Dataverse user interface and as sent to DataCite when DataCite DOIs are used.
+ * @apiNote - as this metadata export is not complete, it should only be used as
+ * a starting point for an Exporter if it simplifies your exporter
+ * relative to using the JSON or OAI_ORE exports.
+ */
+ String getDataCiteXml();
+
+ /**
+ * If an Exporter has specified a prerequisite format name via the
+ * getPrerequisiteFormatName() method, it can call this method to retrieve
+ * metadata in that format.
+ *
+ * @return - metadata in the specified prerequisite format (if available from
+ * another internal or added Exporter) as an Optional
+ * @apiNote - This functionality is intended as way to easily generate alternate
+ * formats of the ~same metadata, e.g. to support download as XML,
+ * HTML, PDF for a specific metadata standard (e.g. DDI). It can be
+ * particularly useful, reative to starting from the output of one of
+ * the getDataset* methods above, if there are existing libraries that
+ * can convert between these formats. Note that, since Exporters can be
+ * replaced, relying on this method could cause your Exporter to
+ * malfunction, e.g. if you depend on format "ddi" and a third party
+ * Exporter is configured to replace the internal ddi Exporter in
+ * Dataverse.
+ */
+ default Optional getPrerequisiteInputStream() {
+ return Optional.empty();
+ }
+
+}
diff --git a/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportException.java b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportException.java
new file mode 100644
index 00000000000..c816a605860
--- /dev/null
+++ b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportException.java
@@ -0,0 +1,13 @@
+package io.gdcc.spi.export;
+
+import java.io.IOException;
+
+public class ExportException extends IOException {
+ public ExportException(String message) {
+ super(message);
+ }
+
+ public ExportException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
diff --git a/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/Exporter.java b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/Exporter.java
new file mode 100644
index 00000000000..1338a3c9734
--- /dev/null
+++ b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/Exporter.java
@@ -0,0 +1,110 @@
+package io.gdcc.spi.export;
+
+import java.io.OutputStream;
+import java.util.Locale;
+import java.util.Optional;
+
+
+/**
+ * Dataverse allows new metadata export formats to be dynamically added a running instance. This is done by
+ * deploying new classes that implement this Exporter interface.
+ */
+
+public interface Exporter {
+
+
+ /**
+ * When this method is called, the Exporter should write the metadata to the given OutputStream.
+ *
+ * @apiNote When implementing exportDataset, when done writing content, please make sure
+ * to flush() the outputStream, but NOT close() it! This way an exporter can be
+ * used to insert the produced metadata into the body of an HTTP response, etc.
+ * (for example, to insert it into the body of an OAI response, where more XML
+ * needs to be written, for the outer OAI-PMH record). -- L.A. 4.5
+ *
+ * @param dataProvider - the @see ExportDataProvider interface includes several methods that can be used to retrieve the dataset metadata in different formats. An Exporter should use one or more of these to obtain the values needed to generate metadata in the format it supports.
+ * @param outputStream - the OutputStream to write the metadata to
+ * @throws ExportException - if there is an error writing the metadata
+ */
+ void exportDataset(ExportDataProvider dataProvider, OutputStream outputStream) throws ExportException;
+
+ /**
+ * This method should return the name of the metadata format this Exporter
+ * provides.
+ *
+ * @apiNote Format names are unique identifiers for the formats supported in
+ * Dataverse. Reusing the same format name as another Exporter will
+ * result only one implementation being available. Exporters packaged
+ * as an external Jar file have precedence over the default
+ * implementations in Dataverse. Hence re-using one of the existing
+ * format names will result in the Exporter replacing the internal one
+ * with the same name. The precedence between two external Exporters
+ * using the same format name is not defined.
+ * Current format names used internally by Dataverse are:
+ * Datacite
+ * dcterms
+ * ddi
+ * oai_dc
+ * html
+ * dataverse_json
+ * oai_ddi
+ * OAI_ORE
+ * oai_datacite
+ * schema.org
+ *
+ * @return - the unique name of the metadata format this Exporter
+ */
+ String getFormatName();
+
+ /**
+ * This method should return the display name of the metadata format this
+ * Exporter provides. Display names are used in the UI, specifically in the menu
+ * of avaiable Metadata Exports on the dataset page/metadata tab to identify the
+ * format.
+ */
+ String getDisplayName(Locale locale);
+
+ /**
+ * Exporters can specify that they require, as input, the output of another
+ * exporter. This is done by providing the name of that format in response to a
+ * call to this method.
+ *
+ * @implNote The one current example where this is done is with the html(display
+ * name "DDI html codebook") exporter which starts from the XML-based
+ * ddi format produced by that exporter.
+ * @apiNote - The Exporter can expect that the metadata produced by its
+ * prerequisite exporter (as defined with this method) will be
+ * available via the ExportDataProvider.getPrerequisiteInputStream()
+ * method. The default implementation of this method returns an empty
+ * value which means the getPrerequisiteInputStream() method of the
+ * ExportDataProvider sent in the exportDataset method will return an
+ * empty Optional.
+ *
+ */
+ default Optional getPrerequisiteFormatName() {
+ return Optional.empty();
+ }
+
+
+ /**
+ * Harvestable Exporters will be available as options in Dataverse's Harvesting mechanism.
+ * @return true to make this exporter available as a harvesting option.
+ */
+ Boolean isHarvestable();
+
+ /**
+ * If an Exporter is available to users, its format will be generated for every
+ * published dataset and made available via the dataset page/metadata
+ * tab/Metadata Exports menu item and via the API.
+ * @return true to make this exporter available to users.
+ */
+ Boolean isAvailableToUsers();
+
+ /**
+ * To support effective downloads of metadata in this Exporter's format, the Exporter should specify an appropriate mime type.
+ * @apiNote - It is recommended to used the @see javax.ws.rs.core.MediaType enum to specify the mime type.
+ * @return The mime type, e.g. "application/json", "text/plain", etc.
+ */
+ String getMediaType();
+
+}
diff --git a/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/XMLExporter.java b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/XMLExporter.java
new file mode 100644
index 00000000000..9afe7ba1cfd
--- /dev/null
+++ b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/XMLExporter.java
@@ -0,0 +1,37 @@
+package io.gdcc.spi.export;
+
+import javax.ws.rs.core.MediaType;
+
+/**
+ * XML Exporter is an extension of the base Exporter interface that adds the
+ * additional methods needed for generating XML metadata export formats.
+ */
+public interface XMLExporter extends Exporter {
+
+ /**
+ * @implNote for the ddi exporter, this method returns "ddi:codebook:2_5"
+ * @return - the name space of the XML schema
+ */
+ String getXMLNameSpace();
+
+ /**
+ * @apiNote According to the XML specification, the value must be a URI
+ * @implNote for the ddi exporter, this method returns
+ * "https://ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd"
+ * @return - the location of the XML schema as a String (must be a valid URI)
+ */
+ String getXMLSchemaLocation();
+
+ /**
+ * @implNote for the ddi exporter, this method returns "2.5"
+ * @return - the version of the XML schema
+ */
+ String getXMLSchemaVersion();
+
+ /**
+ * @return - should always be MediaType.APPLICATION_XML
+ */
+ public default String getMediaType() {
+ return MediaType.APPLICATION_XML;
+ };
+}
diff --git a/pom.xml b/pom.xml
index 8b6f98c5896..96f598af0f5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -15,10 +15,16 @@
doc/sphinx-guides/source/developers/dependencies.rst
-->
dataverse
- war
+ ${packaging.type}
dataverse
false
+ false
+
+
+
+ war
+
1.2.18.4
8.5.10
1.20.1
@@ -63,7 +69,7 @@
runtime
-
+
org.passay
passay
1.6.0
@@ -178,6 +184,11 @@
provided
+
+ fish.payara.api
+ payara-api
+ provided
+
com.sun.mail
jakarta.mail
@@ -381,7 +392,7 @@
com.nimbusds
oauth2-oidc-sdk
- 9.41.1
+ 10.7.1
@@ -499,7 +510,11 @@
cdm-core
${netcdf.version}
-
+
+ io.gdcc
+ dataverse-spi
+ 1.0.0
+
org.junit.jupiter
@@ -754,22 +769,128 @@
- tc
+ ct
+
true
- 9.6
+ true
+
+ docker-build
+ 13
+
+ gdcc/dataverse:${app.image.tag}
+ unstable
+ gdcc/base:${base.image.tag}
+ unstable
+ gdcc/configbaker:${conf.image.tag}
+ ${app.image.tag}
+
+
+
+
+ ${app.image}
+ ${postgresql.server.version}
+ ${solr.version}
+ dataverse
+
+
+
+ org.apache.maven.plugins
+ maven-war-plugin
+
+
+ prepare-package
+
+ exploded
+
+
+
+
+
+
+
+
+
+ io.fabric8
+ docker-maven-plugin
+ true
+
+
+
+
+ dev_dataverse
+ ${app.image}
+
+
+
+ ${docker.platforms}
+
+
+ Dockerfile
+
+ ${base.image}
+
+ @
+
+ assembly.xml
+
+
+
+
+
+
+
+ compose
+ ${project.basedir}
+ docker-compose-dev.yml
+
+
+
+
+ dev_bootstrap
+ ${conf.image}
+
+
+
+ ${docker.platforms}
+
+
+ ${project.basedir}/modules/container-configbaker/Dockerfile
+
+ ${SOLR_VERSION}
+
+ @
+
+ ${project.basedir}/modules/container-configbaker/assembly.xml
+
+
+
+
+
+ true
+
+
+
+ true
+
+
org.apache.maven.plugins
maven-failsafe-plugin
${maven-failsafe-plugin.version}
- testcontainers
+ end2end
${postgresql.server.version}
+ ${skipIntegrationTests}
diff --git a/scripts/api/data/dataset-create-new-all-default-fields.json b/scripts/api/data/dataset-create-new-all-default-fields.json
index d7ae8cefbf7..4af128955c9 100644
--- a/scripts/api/data/dataset-create-new-all-default-fields.json
+++ b/scripts/api/data/dataset-create-new-all-default-fields.json
@@ -466,9 +466,9 @@
},
{
"typeName": "productionPlace",
- "multiple": false,
+ "multiple": true,
"typeClass": "primitive",
- "value": "ProductionPlace"
+ "value": ["ProductionPlace"]
},
{
"typeName": "contributor",
@@ -710,9 +710,9 @@
},
{
"typeName": "series",
- "multiple": false,
+ "multiple": true,
"typeClass": "compound",
- "value": {
+ "value": [{
"seriesName": {
"typeName": "seriesName",
"multiple": false,
@@ -725,7 +725,7 @@
"typeClass": "primitive",
"value": "SeriesInformation"
}
- }
+ }]
},
{
"typeName": "software",
@@ -899,25 +899,25 @@
"typeName": "westLongitude",
"multiple": false,
"typeClass": "primitive",
- "value": "10"
+ "value": "-72"
},
"eastLongitude": {
"typeName": "eastLongitude",
"multiple": false,
"typeClass": "primitive",
- "value": "20"
+ "value": "-70"
},
"northLongitude": {
"typeName": "northLongitude",
"multiple": false,
"typeClass": "primitive",
- "value": "30"
+ "value": "43"
},
"southLongitude": {
"typeName": "southLongitude",
"multiple": false,
"typeClass": "primitive",
- "value": "40"
+ "value": "42"
}
},
{
@@ -925,25 +925,25 @@
"typeName": "westLongitude",
"multiple": false,
"typeClass": "primitive",
- "value": "50"
+ "value": "-18"
},
"eastLongitude": {
"typeName": "eastLongitude",
"multiple": false,
"typeClass": "primitive",
- "value": "60"
+ "value": "-13"
},
"northLongitude": {
"typeName": "northLongitude",
"multiple": false,
"typeClass": "primitive",
- "value": "70"
+ "value": "29"
},
"southLongitude": {
"typeName": "southLongitude",
"multiple": false,
"typeClass": "primitive",
- "value": "80"
+ "value": "28"
}
}
]
@@ -1404,7 +1404,7 @@
"multiple": true,
"typeClass": "controlledVocabulary",
"value": [
- "cell counting",
+ "genome sequencing",
"cell sorting",
"clinical chemistry analysis",
"DNA methylation profiling"
diff --git a/scripts/api/data/dataset-create-new.json b/scripts/api/data/dataset-create-new.json
index 0017da15974..5831e0b17e6 100644
--- a/scripts/api/data/dataset-create-new.json
+++ b/scripts/api/data/dataset-create-new.json
@@ -4,6 +4,10 @@
"persistentUrl": "http://dx.doi.org/10.5072/FK2/9",
"protocol": "chadham-house-rule",
"datasetVersion": {
+ "license": {
+ "name": "CC0 1.0",
+ "uri": "http://creativecommons.org/publicdomain/zero/1.0"
+ },
"metadataBlocks": {
"citation": {
"displayName": "Citation Metadata",
@@ -121,4 +125,4 @@
}
}
}
-}
\ No newline at end of file
+}
diff --git a/scripts/api/data/dataset-finch1_fr.json b/scripts/api/data/dataset-finch1_fr.json
index ce9616fdef5..848e5e3587e 100644
--- a/scripts/api/data/dataset-finch1_fr.json
+++ b/scripts/api/data/dataset-finch1_fr.json
@@ -1,6 +1,10 @@
{
"metadataLanguage": "fr",
"datasetVersion": {
+ "license": {
+ "name": "CC0 1.0",
+ "uri": "http://creativecommons.org/publicdomain/zero/1.0"
+ },
"metadataBlocks": {
"citation": {
"fields": [
diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv
index be32bb7134e..18bc31c2dd6 100644
--- a/scripts/api/data/metadatablocks/citation.tsv
+++ b/scripts/api/data/metadatablocks/citation.tsv
@@ -66,7 +66,7 @@
dateOfCollectionStart Start Date The date when the data collection started YYYY-MM-DD date 62 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation
dateOfCollectionEnd End Date The date when the data collection ended YYYY-MM-DD date 63 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation
kindOfData Data Type The type of data included in the files (e.g. survey data, clinical data, or machine-readable text) text 64 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData
- series Series Information about the dataset series to which the Dataset belong none 65 : FALSE FALSE FALSE FALSE FALSE FALSE citation
+ series Series Information about the dataset series to which the Dataset belong none 65 : FALSE FALSE TRUE FALSE FALSE FALSE citation
seriesName Name The name of the dataset series text 66 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation
seriesInformation Information Can include 1) a history of the series and 2) a summary of features that apply to the series textbox 67 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation
software Software Information about the software used to generate the Dataset none 68 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy
diff --git a/scripts/api/setup-all.sh b/scripts/api/setup-all.sh
index c4bd6c2c9c5..e247caa72b5 100755
--- a/scripts/api/setup-all.sh
+++ b/scripts/api/setup-all.sh
@@ -3,7 +3,14 @@
SECURESETUP=1
DV_SU_PASSWORD="admin"
-for opt in $*
+DATAVERSE_URL=${DATAVERSE_URL:-"http://localhost:8080"}
+# Make sure scripts we call from this one also get this env var!
+export DATAVERSE_URL
+
+# scripts/api when called from the root of the source tree
+SCRIPT_PATH="$(dirname "$0")"
+
+for opt in "$@"
do
case $opt in
"--insecure")
@@ -24,13 +31,9 @@ do
esac
done
+# shellcheck disable=SC2016
command -v jq >/dev/null 2>&1 || { echo >&2 '`jq` ("sed for JSON") is required, but not installed. Download the binary for your platform from http://stedolan.github.io/jq/ and make sure it is in your $PATH (/usr/bin/jq is fine) and executable with `sudo chmod +x /usr/bin/jq`. On Mac, you can install it with `brew install jq` if you use homebrew: http://brew.sh . Aborting.'; exit 1; }
-echo "deleting all data from Solr"
-curl http://localhost:8983/solr/collection1/update/json?commit=true -H "Content-type: application/json" -X POST -d "{\"delete\": { \"query\":\"*:*\"}}"
-
-SERVER=http://localhost:8080/api
-
# Everything + the kitchen sink, in a single script
# - Setup the metadata blocks and controlled vocabulary
# - Setup the builtin roles
@@ -41,49 +44,49 @@ SERVER=http://localhost:8080/api
echo "Setup the metadata blocks"
-./setup-datasetfields.sh
+"$SCRIPT_PATH"/setup-datasetfields.sh
echo "Setup the builtin roles"
-./setup-builtin-roles.sh
+"$SCRIPT_PATH"/setup-builtin-roles.sh
echo "Setup the authentication providers"
-./setup-identity-providers.sh
+"$SCRIPT_PATH"/setup-identity-providers.sh
echo "Setting up the settings"
echo "- Allow internal signup"
-curl -X PUT -d yes "$SERVER/admin/settings/:AllowSignUp"
-curl -X PUT -d /dataverseuser.xhtml?editMode=CREATE "$SERVER/admin/settings/:SignUpUrl"
-
-curl -X PUT -d doi "$SERVER/admin/settings/:Protocol"
-curl -X PUT -d 10.5072 "$SERVER/admin/settings/:Authority"
-curl -X PUT -d "FK2/" "$SERVER/admin/settings/:Shoulder"
-curl -X PUT -d DataCite "$SERVER/admin/settings/:DoiProvider"
-curl -X PUT -d burrito $SERVER/admin/settings/BuiltinUsers.KEY
-curl -X PUT -d localhost-only $SERVER/admin/settings/:BlockedApiPolicy
-curl -X PUT -d 'native/http' $SERVER/admin/settings/:UploadMethods
+curl -X PUT -d yes "${DATAVERSE_URL}/api/admin/settings/:AllowSignUp"
+curl -X PUT -d "/dataverseuser.xhtml?editMode=CREATE" "${DATAVERSE_URL}/api/admin/settings/:SignUpUrl"
+
+curl -X PUT -d doi "${DATAVERSE_URL}/api/admin/settings/:Protocol"
+curl -X PUT -d 10.5072 "${DATAVERSE_URL}/api/admin/settings/:Authority"
+curl -X PUT -d "FK2/" "${DATAVERSE_URL}/api/admin/settings/:Shoulder"
+curl -X PUT -d DataCite "${DATAVERSE_URL}/api/admin/settings/:DoiProvider"
+curl -X PUT -d burrito "${DATAVERSE_URL}/api/admin/settings/BuiltinUsers.KEY"
+curl -X PUT -d localhost-only "${DATAVERSE_URL}/api/admin/settings/:BlockedApiPolicy"
+curl -X PUT -d 'native/http' "${DATAVERSE_URL}/api/admin/settings/:UploadMethods"
echo
echo "Setting up the admin user (and as superuser)"
-adminResp=$(curl -s -H "Content-type:application/json" -X POST -d @data/user-admin.json "$SERVER/builtin-users?password=$DV_SU_PASSWORD&key=burrito")
-echo $adminResp
-curl -X POST "$SERVER/admin/superuser/dataverseAdmin"
+adminResp=$(curl -s -H "Content-type:application/json" -X POST -d @"$SCRIPT_PATH"/data/user-admin.json "${DATAVERSE_URL}/api/builtin-users?password=$DV_SU_PASSWORD&key=burrito")
+echo "$adminResp"
+curl -X POST "${DATAVERSE_URL}/api/admin/superuser/dataverseAdmin"
echo
echo "Setting up the root dataverse"
-adminKey=$(echo $adminResp | jq .data.apiToken | tr -d \")
-curl -s -H "Content-type:application/json" -X POST -d @data/dv-root.json "$SERVER/dataverses/?key=$adminKey"
+adminKey=$(echo "$adminResp" | jq .data.apiToken | tr -d \")
+curl -s -H "Content-type:application/json" -X POST -d @"$SCRIPT_PATH"/data/dv-root.json "${DATAVERSE_URL}/api/dataverses/?key=$adminKey"
echo
echo "Set the metadata block for Root"
-curl -s -X POST -H "Content-type:application/json" -d "[\"citation\"]" $SERVER/dataverses/:root/metadatablocks/?key=$adminKey
+curl -s -X POST -H "Content-type:application/json" -d "[\"citation\"]" "${DATAVERSE_URL}/api/dataverses/:root/metadatablocks/?key=$adminKey"
echo
echo "Set the default facets for Root"
-curl -s -X POST -H "Content-type:application/json" -d "[\"authorName\",\"subject\",\"keywordValue\",\"dateOfDeposit\"]" $SERVER/dataverses/:root/facets/?key=$adminKey
+curl -s -X POST -H "Content-type:application/json" -d "[\"authorName\",\"subject\",\"keywordValue\",\"dateOfDeposit\"]" "${DATAVERSE_URL}/api/dataverses/:root/facets/?key=$adminKey"
echo
echo "Set up licenses"
# Note: CC0 has been added and set as the default license through
# Flyway script V5.9.0.1__7440-configurable-license-list.sql
-curl -X POST -H 'Content-Type: application/json' -H "X-Dataverse-key:$adminKey" $SERVER/licenses --upload-file data/licenses/licenseCC-BY-4.0.json
+curl -X POST -H 'Content-Type: application/json' -H "X-Dataverse-key:$adminKey" "${DATAVERSE_URL}/api/licenses" --upload-file "$SCRIPT_PATH"/data/licenses/licenseCC-BY-4.0.json
# OPTIONAL USERS AND DATAVERSES
#./setup-optional.sh
@@ -92,8 +95,8 @@ if [ $SECURESETUP = 1 ]
then
# Revoke the "burrito" super-key;
# Block sensitive API endpoints;
- curl -X DELETE $SERVER/admin/settings/BuiltinUsers.KEY
- curl -X PUT -d 'admin,builtin-users' $SERVER/admin/settings/:BlockedApiEndpoints
+ curl -X DELETE "${DATAVERSE_URL}/api/admin/settings/BuiltinUsers.KEY"
+ curl -X PUT -d 'admin,builtin-users' "${DATAVERSE_URL}/api/admin/settings/:BlockedApiEndpoints"
echo "Access to the /api/admin and /api/test is now disabled, except for connections from localhost."
else
echo "IMPORTANT!!!"
diff --git a/scripts/api/setup-builtin-roles.sh b/scripts/api/setup-builtin-roles.sh
index 0f3c1c150cd..f1f268debbc 100755
--- a/scripts/api/setup-builtin-roles.sh
+++ b/scripts/api/setup-builtin-roles.sh
@@ -1,34 +1,37 @@
-SERVER=http://localhost:8080/api
+#!/bin/bash
+
+DATAVERSE_URL=${DATAVERSE_URL:-"http://localhost:8080"}
+SCRIPT_PATH="$(dirname "$0")"
# Setup the builtin roles
echo "Setting up admin role"
-curl -H "Content-type:application/json" -d @data/role-admin.json http://localhost:8080/api/admin/roles/
+curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-admin.json "${DATAVERSE_URL}/api/admin/roles/"
echo
echo "Setting up file downloader role"
-curl -H "Content-type:application/json" -d @data/role-filedownloader.json http://localhost:8080/api/admin/roles/
+curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-filedownloader.json "${DATAVERSE_URL}/api/admin/roles/"
echo
echo "Setting up full contributor role"
-curl -H "Content-type:application/json" -d @data/role-fullContributor.json http://localhost:8080/api/admin/roles/
+curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-fullContributor.json "${DATAVERSE_URL}/api/admin/roles/"
echo
echo "Setting up dv contributor role"
-curl -H "Content-type:application/json" -d @data/role-dvContributor.json http://localhost:8080/api/admin/roles/
+curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-dvContributor.json "${DATAVERSE_URL}/api/admin/roles/"
echo
echo "Setting up ds contributor role"
-curl -H "Content-type:application/json" -d @data/role-dsContributor.json http://localhost:8080/api/admin/roles/
+curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-dsContributor.json "${DATAVERSE_URL}/api/admin/roles/"
echo
echo "Setting up editor role"
-curl -H "Content-type:application/json" -d @data/role-editor.json http://localhost:8080/api/admin/roles/
+curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-editor.json "${DATAVERSE_URL}/api/admin/roles/"
echo
echo "Setting up curator role"
-curl -H "Content-type:application/json" -d @data/role-curator.json http://localhost:8080/api/admin/roles/
+curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-curator.json "${DATAVERSE_URL}/api/admin/roles/"
echo
echo "Setting up member role"
-curl -H "Content-type:application/json" -d @data/role-member.json http://localhost:8080/api/admin/roles/
+curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/role-member.json "${DATAVERSE_URL}/api/admin/roles/"
echo
diff --git a/scripts/api/setup-datasetfields.sh b/scripts/api/setup-datasetfields.sh
index 0d2d60b9538..51da677ceb8 100755
--- a/scripts/api/setup-datasetfields.sh
+++ b/scripts/api/setup-datasetfields.sh
@@ -1,9 +1,13 @@
-#!/bin/sh
-curl http://localhost:8080/api/admin/datasetfield/loadNAControlledVocabularyValue
+#!/bin/bash
+
+DATAVERSE_URL=${DATAVERSE_URL:-"http://localhost:8080"}
+SCRIPT_PATH="$(dirname "$0")"
+
+curl "${DATAVERSE_URL}/api/admin/datasetfield/loadNAControlledVocabularyValue"
# TODO: The "@" is confusing. Consider switching to --upload-file citation.tsv
-curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/citation.tsv -H "Content-type: text/tab-separated-values"
-curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/geospatial.tsv -H "Content-type: text/tab-separated-values"
-curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/social_science.tsv -H "Content-type: text/tab-separated-values"
-curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/astrophysics.tsv -H "Content-type: text/tab-separated-values"
-curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/biomedical.tsv -H "Content-type: text/tab-separated-values"
-curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/journals.tsv -H "Content-type: text/tab-separated-values"
+curl "${DATAVERSE_URL}/api/admin/datasetfield/load" -X POST --data-binary @"$SCRIPT_PATH"/data/metadatablocks/citation.tsv -H "Content-type: text/tab-separated-values"
+curl "${DATAVERSE_URL}/api/admin/datasetfield/load" -X POST --data-binary @"$SCRIPT_PATH"/data/metadatablocks/geospatial.tsv -H "Content-type: text/tab-separated-values"
+curl "${DATAVERSE_URL}/api/admin/datasetfield/load" -X POST --data-binary @"$SCRIPT_PATH"/data/metadatablocks/social_science.tsv -H "Content-type: text/tab-separated-values"
+curl "${DATAVERSE_URL}/api/admin/datasetfield/load" -X POST --data-binary @"$SCRIPT_PATH"/data/metadatablocks/astrophysics.tsv -H "Content-type: text/tab-separated-values"
+curl "${DATAVERSE_URL}/api/admin/datasetfield/load" -X POST --data-binary @"$SCRIPT_PATH"/data/metadatablocks/biomedical.tsv -H "Content-type: text/tab-separated-values"
+curl "${DATAVERSE_URL}/api/admin/datasetfield/load" -X POST --data-binary @"$SCRIPT_PATH"/data/metadatablocks/journals.tsv -H "Content-type: text/tab-separated-values"
diff --git a/scripts/api/setup-identity-providers.sh b/scripts/api/setup-identity-providers.sh
index 89ac59de32f..e877f71c6b0 100755
--- a/scripts/api/setup-identity-providers.sh
+++ b/scripts/api/setup-identity-providers.sh
@@ -1,8 +1,11 @@
-SERVER=http://localhost:8080/api
+#!/bin/bash
+
+DATAVERSE_URL=${DATAVERSE_URL:-"http://localhost:8080"}
+SCRIPT_PATH="$(dirname "$0")"
# Setup the authentication providers
echo "Setting up internal user provider"
-curl -H "Content-type:application/json" -d @data/authentication-providers/builtin.json http://localhost:8080/api/admin/authenticationProviders/
+curl -H "Content-type:application/json" -d @"$SCRIPT_PATH"/data/authentication-providers/builtin.json "${DATAVERSE_URL}/api/admin/authenticationProviders/"
#echo "Setting up Echo providers"
#curl -H "Content-type:application/json" -d @data/authentication-providers/echo.json http://localhost:8080/api/admin/authenticationProviders/
diff --git a/scripts/dev/docker-final-setup.sh b/scripts/dev/docker-final-setup.sh
new file mode 100755
index 00000000000..d2453619ec2
--- /dev/null
+++ b/scripts/dev/docker-final-setup.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+set -euo pipefail
+
+echo "Running setup-all.sh (INSECURE MODE)..."
+cd scripts/api || exit
+./setup-all.sh --insecure -p=admin1 | tee /tmp/setup-all.sh.out
+cd ../..
+
+echo "Setting system mail address..."
+curl -X PUT -d "dataverse@localhost" "http://localhost:8080/api/admin/settings/:SystemEmail"
+
+echo "Setting DOI provider to \"FAKE\"..."
+curl "http://localhost:8080/api/admin/settings/:DoiProvider" -X PUT -d FAKE
+
+API_TOKEN=$(grep apiToken "/tmp/setup-all.sh.out" | jq ".data.apiToken" | tr -d \")
+export API_TOKEN
+
+echo "Publishing root dataverse..."
+curl -H "X-Dataverse-key:$API_TOKEN" -X POST "http://localhost:8080/api/dataverses/:root/actions/:publish"
+
+echo "Allowing users to create dataverses and datasets in root..."
+curl -H "X-Dataverse-key:$API_TOKEN" -X POST -H "Content-type:application/json" -d "{\"assignee\": \":authenticated-users\",\"role\": \"fullContributor\"}" "http://localhost:8080/api/dataverses/:root/assignments"
+
+echo "Checking Dataverse version..."
+curl "http://localhost:8080/api/info/version"
\ No newline at end of file
diff --git a/scripts/installer/as-setup.sh b/scripts/installer/as-setup.sh
index 853db77f471..49ebce059d2 100755
--- a/scripts/installer/as-setup.sh
+++ b/scripts/installer/as-setup.sh
@@ -106,13 +106,13 @@ function preliminary_setup()
# (we can no longer offer EZID with their shared test account)
# jvm-options use colons as separators, escape as literal
DOI_BASEURL_ESC=`echo $DOI_BASEURL | sed -e 's/:/\\\:/'`
- ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddoi.username=${DOI_USERNAME}"
- ./asadmin $ASADMIN_OPTS create-jvm-options '\-Ddoi.password=${ALIAS=doi_password_alias}'
- ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddoi.baseurlstring=$DOI_BASEURL_ESC"
+ ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.datacite.username=${DOI_USERNAME}"
+ ./asadmin $ASADMIN_OPTS create-jvm-options '\-Ddataverse.pid.datacite.password=${ALIAS=doi_password_alias}'
+ ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.datacite.mds-api-url=$DOI_BASEURL_ESC"
# jvm-options use colons as separators, escape as literal
DOI_DATACITERESTAPIURL_ESC=`echo $DOI_DATACITERESTAPIURL | sed -e 's/:/\\\:/'`
- ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddoi.dataciterestapiurlstring=$DOI_DATACITERESTAPIURL_ESC"
+ ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.datacite.rest-api-url=$DOI_DATACITERESTAPIURL_ESC"
./asadmin $ASADMIN_OPTS create-jvm-options "-Ddataverse.timerServer=true"
diff --git a/scripts/installer/install.py b/scripts/installer/install.py
index ea1a69db6a7..5acb4d760a4 100644
--- a/scripts/installer/install.py
+++ b/scripts/installer/install.py
@@ -578,8 +578,8 @@
print("However, you have to contact DataCite (support\@datacite.org) and request a test account, before you ")
print("can publish datasets. Once you receive the account name and password, add them to your domain.xml,")
print("as the following two JVM options:")
-print("\t-Ddoi.username=...")
-print("\t-Ddoi.password=...")
+print("\t-Ddataverse.pid.datacite.username=...")
+print("\t-Ddataverse.pid.datacite.password=...")
print("and restart payara")
print("If this is a production Dataverse and you are planning to register datasets as ")
print("\"real\", non-test DOIs or Handles, consult the \"Persistent Identifiers and Publishing Datasets\"")
diff --git a/scripts/search/tests/data/dataset-finch1-nolicense.json b/scripts/search/tests/data/dataset-finch1-nolicense.json
new file mode 100644
index 00000000000..ec0856a2aa3
--- /dev/null
+++ b/scripts/search/tests/data/dataset-finch1-nolicense.json
@@ -0,0 +1,77 @@
+{
+ "datasetVersion": {
+ "metadataBlocks": {
+ "citation": {
+ "fields": [
+ {
+ "value": "Darwin's Finches",
+ "typeClass": "primitive",
+ "multiple": false,
+ "typeName": "title"
+ },
+ {
+ "value": [
+ {
+ "authorName": {
+ "value": "Finch, Fiona",
+ "typeClass": "primitive",
+ "multiple": false,
+ "typeName": "authorName"
+ },
+ "authorAffiliation": {
+ "value": "Birds Inc.",
+ "typeClass": "primitive",
+ "multiple": false,
+ "typeName": "authorAffiliation"
+ }
+ }
+ ],
+ "typeClass": "compound",
+ "multiple": true,
+ "typeName": "author"
+ },
+ {
+ "value": [
+ { "datasetContactEmail" : {
+ "typeClass": "primitive",
+ "multiple": false,
+ "typeName": "datasetContactEmail",
+ "value" : "finch@mailinator.com"
+ },
+ "datasetContactName" : {
+ "typeClass": "primitive",
+ "multiple": false,
+ "typeName": "datasetContactName",
+ "value": "Finch, Fiona"
+ }
+ }],
+ "typeClass": "compound",
+ "multiple": true,
+ "typeName": "datasetContact"
+ },
+ {
+ "value": [ {
+ "dsDescriptionValue":{
+ "value": "Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.",
+ "multiple":false,
+ "typeClass": "primitive",
+ "typeName": "dsDescriptionValue"
+ }}],
+ "typeClass": "compound",
+ "multiple": true,
+ "typeName": "dsDescription"
+ },
+ {
+ "value": [
+ "Medicine, Health and Life Sciences"
+ ],
+ "typeClass": "controlledVocabulary",
+ "multiple": true,
+ "typeName": "subject"
+ }
+ ],
+ "displayName": "Citation Metadata"
+ }
+ }
+ }
+}
diff --git a/scripts/search/tests/data/dataset-finch1.json b/scripts/search/tests/data/dataset-finch1.json
index ec0856a2aa3..433ea758711 100644
--- a/scripts/search/tests/data/dataset-finch1.json
+++ b/scripts/search/tests/data/dataset-finch1.json
@@ -1,5 +1,9 @@
{
"datasetVersion": {
+ "license": {
+ "name": "CC0 1.0",
+ "uri": "http://creativecommons.org/publicdomain/zero/1.0"
+ },
"metadataBlocks": {
"citation": {
"fields": [
diff --git a/scripts/search/tests/data/dataset-finch2.json b/scripts/search/tests/data/dataset-finch2.json
index d20f835b629..446df54676a 100644
--- a/scripts/search/tests/data/dataset-finch2.json
+++ b/scripts/search/tests/data/dataset-finch2.json
@@ -1,5 +1,9 @@
{
"datasetVersion": {
+ "license": {
+ "name": "CC0 1.0",
+ "uri": "http://creativecommons.org/publicdomain/zero/1.0"
+ },
"metadataBlocks": {
"citation": {
"fields": [
diff --git a/src/main/docker/Dockerfile b/src/main/docker/Dockerfile
new file mode 100644
index 00000000000..88020a118b5
--- /dev/null
+++ b/src/main/docker/Dockerfile
@@ -0,0 +1,54 @@
+# Copyright 2023 Forschungszentrum Jülich GmbH
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+################################################################################################################
+#
+# THIS FILE IS TO BE USED WITH MAVEN DOCKER BUILD:
+# mvn -Pct clean package
+#
+################################################################################################################
+#
+# Some commands used are inspired by https://github.com/payara/Payara/tree/master/appserver/extras/docker-images.
+# Most parts origin from older versions of https://github.com/gdcc/dataverse-kubernetes.
+#
+# We are not using upstream Payara images because:
+# - Their image is less optimised for production usage and Dataverse by design choices
+# - We provide multi-arch images
+# - We provide some tweaks for development and monitoring
+#
+
+# Make the Java base image and version configurable (useful for trying newer Java versions and flavors)
+ARG BASE_IMAGE="gdcc/base:unstable"
+FROM $BASE_IMAGE
+
+# Make Payara use the "ct" profile for MicroProfile Config. Will switch various defaults for the application
+# setup in META-INF/microprofile-config.properties.
+# See also https://download.eclipse.org/microprofile/microprofile-config-3.0/microprofile-config-spec-3.0.html#configprofile
+ENV MP_CONFIG_PROFILE=ct
+
+# Copy app and deps from assembly in proper layers
+COPY --chown=payara:payara maven/deps ${DEPLOY_DIR}/dataverse/WEB-INF/lib/
+COPY --chown=payara:payara maven/app ${DEPLOY_DIR}/dataverse/
+COPY --chown=payara:payara maven/supplements ${DEPLOY_DIR}/dataverse/supplements/
+COPY --chown=payara:payara maven/scripts ${SCRIPT_DIR}/
+RUN chmod +x "${SCRIPT_DIR}"/*
+
+# Create symlinks for jHove
+RUN ln -s "${DEPLOY_DIR}/dataverse/supplements/jhove.conf" "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/jhove.conf" && \
+ ln -s "${DEPLOY_DIR}/dataverse/supplements/jhoveConfig.xsd" "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/jhoveConfig.xsd" && \
+ sed -i "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/config/jhove.conf" -e "s:/usr/local/payara./glassfish/domains/domain1:${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}:g"
+
+LABEL org.opencontainers.image.created="@git.build.time@" \
+ org.opencontainers.image.authors="Research Data Management at FZJ " \
+ org.opencontainers.image.url="https://guides.dataverse.org/en/latest/container/" \
+ org.opencontainers.image.documentation="https://guides.dataverse.org/en/latest/container/" \
+ org.opencontainers.image.source="https://github.com/IQSS/dataverse" \
+ org.opencontainers.image.version="@project.version@" \
+ org.opencontainers.image.revision="@git.commit.id.abbrev@" \
+ org.opencontainers.image.vendor="Global Dataverse Community Consortium" \
+ org.opencontainers.image.licenses="Apache-2.0" \
+ org.opencontainers.image.title="Dataverse Application Image" \
+ org.opencontainers.image.description="This container image provides the research data repository software Dataverse in a box."
\ No newline at end of file
diff --git a/src/main/docker/README.md b/src/main/docker/README.md
new file mode 100644
index 00000000000..06e2769ed6e
--- /dev/null
+++ b/src/main/docker/README.md
@@ -0,0 +1,62 @@
+# Dataverse Application Container Image
+
+The "application image" offers you a deployment-ready Dataverse application running on the underlying
+application server, which is provided by the [base image](https://hub.docker.com/r/gdcc/base).
+Its sole purpose is to bundle the application and any additional material necessary to successfully jumpstart
+the application.
+
+Note: Until all :ref:`jvm-options` are *MicroProfile Config* enabled, it also adds the necessary scripting glue to
+configure the applications domain during booting the application server. See :ref:`app-tunables`.
+
+## Quick Reference
+
+**Maintained by:**
+
+This image is created, maintained and supported by the Dataverse community on a best-effort basis.
+
+**Where to find documentation:**
+
+The [Dataverse Container Guide - Application Image](https://guides.dataverse.org/en/latest/container/app-image.html)
+provides in-depth information about content, building, tuning and so on for this image. You should also consult
+the [Dataverse Container Guide - Base Image](https://guides.dataverse.org/en/latest/container/base-image.html) page
+for more details on tunable settings, locations, etc.
+
+**Where to get help and ask questions:**
+
+IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it.
+You can join the Community Chat on Matrix at https://chat.dataverse.org and https://groups.google.com/g/dataverse-community
+to ask for help and guidance.
+
+## Supported Image Tags
+
+This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse).
+Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/src/main/docker)
+happens there (again, by the community). Community-supported image tags are based on the two most important branches:
+
+- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged.
+ ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/src/main/docker/Dockerfile))
+- The `alpha` tag corresponds to the `master` branch, where releases are cut from.
+ ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/src/main/docker/Dockerfile))
+
+Within the main repository, you may find the application image files at `/src/main/docker`.
+This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image.
+You may use, extend, or alter this image to your liking and/or host in some different registry if you want to.
+
+**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures
+Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2).
+
+## License
+
+Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0),
+like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md).
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and limitations under the License.
+
+As with all Docker images, all images likely also contain other software which may be under other licenses (such as
+[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base
+distribution, along with any direct or indirect (Java) dependencies contained).
+
+As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies
+with any relevant licenses for all software contained within.
diff --git a/src/main/docker/assembly.xml b/src/main/docker/assembly.xml
new file mode 100644
index 00000000000..9f9b39617a3
--- /dev/null
+++ b/src/main/docker/assembly.xml
@@ -0,0 +1,28 @@
+
+
+
+
+ target/${project.artifactId}-${project.version}
+ app
+
+ WEB-INF/lib/**/*
+
+
+
+
+ target/${project.artifactId}-${project.version}/WEB-INF/lib
+ deps
+
+
+
+ conf/jhove
+ supplements
+
+
+
+ src/main/docker/scripts
+ scripts
+
+
+
\ No newline at end of file
diff --git a/src/main/docker/scripts/init_2_configure.sh b/src/main/docker/scripts/init_2_configure.sh
new file mode 100755
index 00000000000..a98f08088c1
--- /dev/null
+++ b/src/main/docker/scripts/init_2_configure.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+################################################################################
+# Configure Payara
+#
+# BEWARE: As this is done for Kubernetes, we will ALWAYS start with a fresh container!
+# When moving to Payara 5+ the option commands are idempotent.
+# The resources are to be created by the application on deployment,
+# once Dataverse has proper refactoring, etc.
+################################################################################
+
+# Fail on any error
+set -euo pipefail
+
+# Include some sane defaults (which are currently not settable via MicroProfile Config).
+# This is an ugly hack and shall be removed once #7000 is resolved.
+export dataverse_auth_password__reset__timeout__in__minutes="${dataverse_auth_password__reset__timeout__in__minutes:-60}"
+export dataverse_timerServer="${dataverse_timerServer:-true}"
+export dataverse_files_storage__driver__id="${dataverse_files_storage__driver__id:-local}"
+if [ "${dataverse_files_storage__driver__id}" = "local" ]; then
+ export dataverse_files_local_type="${dataverse_files_local_type:-file}"
+ export dataverse_files_local_label="${dataverse_files_local_label:-Local}"
+ export dataverse_files_local_directory="${dataverse_files_local_directory:-${STORAGE_DIR}/store}"
+fi
+
+# 0. Define postboot commands file to be read by Payara and clear it
+DV_POSTBOOT=${PAYARA_DIR}/dataverse_postboot
+echo "# Dataverse postboot configuration for Payara" > "${DV_POSTBOOT}"
+
+# 2. Domain-spaced resources (JDBC, JMS, ...)
+# TODO: This is ugly and dirty. It should be replaced with resources from
+# EE 8 code annotations or at least glassfish-resources.xml
+# NOTE: postboot commands is not multi-line capable, thus spaghetti needed.
+
+# JavaMail
+echo "INFO: Defining JavaMail."
+echo "create-javamail-resource --mailhost=${DATAVERSE_MAIL_HOST:-smtp} --mailuser=${DATAVERSE_MAIL_USER:-dataversenotify} --fromaddress=${DATAVERSE_MAIL_FROM:-dataverse@localhost} mail/notifyMailSession" >> "${DV_POSTBOOT}"
+
+# 3. Domain based configuration options
+# Set Dataverse environment variables
+echo "INFO: Defining system properties for Dataverse configuration options."
+#env | grep -Ee "^(dataverse|doi)_" | sort -fd
+env -0 | grep -z -Ee "^(dataverse|doi)_" | while IFS='=' read -r -d '' k v; do
+ # transform __ to -
+ # shellcheck disable=SC2001
+ KEY=$(echo "${k}" | sed -e "s#__#-#g")
+ # transform remaining single _ to .
+ KEY=$(echo "${KEY}" | tr '_' '.')
+
+ # escape colons in values
+ # shellcheck disable=SC2001
+ v=$(echo "${v}" | sed -e 's/:/\\\:/g')
+
+ echo "DEBUG: Handling ${KEY}=${v}."
+ echo "create-system-properties ${KEY}=${v}" >> "${DV_POSTBOOT}"
+done
+
+# 4. Add the commands to the existing postboot file, but insert BEFORE deployment
+TMPFILE=$(mktemp)
+cat "${DV_POSTBOOT}" "${POSTBOOT_COMMANDS}" > "${TMPFILE}" && mv "${TMPFILE}" "${POSTBOOT_COMMANDS}"
+echo "DEBUG: postboot contains the following commands:"
+echo "--------------------------------------------------"
+cat "${POSTBOOT_COMMANDS}"
+echo "--------------------------------------------------"
+
diff --git a/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java
index f6cbd01ece0..2a3f2d50364 100644
--- a/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java
@@ -3,11 +3,13 @@
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.SystemConfig;
import java.io.InputStream;
-
import javax.ejb.EJB;
+import javax.inject.Inject;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;
+
+import org.apache.commons.lang3.RandomStringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
@@ -17,27 +19,21 @@ public abstract class AbstractGlobalIdServiceBean implements GlobalIdServiceBean
private static final Logger logger = Logger.getLogger(AbstractGlobalIdServiceBean.class.getCanonicalName());
- @EJB
+ @Inject
DataverseServiceBean dataverseService;
@EJB
+ protected
SettingsServiceBean settingsService;
- @EJB
- EjbDataverseEngine commandEngine;
- @EJB
- DatasetServiceBean datasetService;
- @EJB
- DataFileServiceBean datafileService;
- @EJB
+ @Inject
+ protected
+ DvObjectServiceBean dvObjectService;
+ @Inject
SystemConfig systemConfig;
+
+ protected Boolean configured = null;
public static String UNAVAILABLE = ":unav";
- @Override
- public String getIdentifierForLookup(String protocol, String authority, String identifier) {
- logger.log(Level.FINE,"getIdentifierForLookup");
- return protocol + ":" + authority + "/" + identifier;
- }
-
@Override
public Map getMetadataForCreateIndicator(DvObject dvObjectIn) {
logger.log(Level.FINE,"getMetadataForCreateIndicator(DvObject)");
@@ -101,14 +97,10 @@ protected String getTargetUrl(DvObject dvObjectIn) {
@Override
public String getIdentifier(DvObject dvObject) {
- return dvObject.getGlobalId().asString();
+ GlobalId gid = dvObject.getGlobalId();
+ return gid != null ? gid.asString() : null;
}
- protected String getTargetUrl(Dataset datasetIn) {
- logger.log(Level.FINE,"getTargetUrl");
- return systemConfig.getDataverseSiteUrl() + Dataset.TARGET_URL + datasetIn.getGlobalIdString();
- }
-
protected String generateYear (DvObject dvObjectIn){
return dvObjectIn.getYearPublishedCreated();
}
@@ -120,16 +112,41 @@ public Map getMetadataForTargetURL(DvObject dvObject) {
return metadata;
}
+ @Override
+ public boolean alreadyRegistered(DvObject dvo) throws Exception {
+ if(dvo==null) {
+ logger.severe("Null DvObject sent to alreadyRegistered().");
+ return false;
+ }
+ GlobalId globalId = dvo.getGlobalId();
+ if(globalId == null) {
+ return false;
+ }
+ return alreadyRegistered(globalId, false);
+ }
+
+ public abstract boolean alreadyRegistered(GlobalId globalId, boolean noProviderDefault) throws Exception;
+
+ /*
+ * ToDo: the DvObject being sent in provides partial support for the case where
+ * it has a different authority/protocol than what is configured (i.e. a legacy
+ * Pid that can actually be updated by the Pid account being used.) Removing
+ * this now would potentially break/make it harder to handle that case prior to
+ * support for configuring multiple Pid providers. Once that exists, it would be
+ * cleaner to always find the PidProvider associated with the
+ * protocol/authority/shoulder of the current dataset and then not pass the
+ * DvObject as a param. (This would also remove calls to get the settings since
+ * that would be done at construction.)
+ */
@Override
public DvObject generateIdentifier(DvObject dvObject) {
String protocol = dvObject.getProtocol() == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Protocol) : dvObject.getProtocol();
String authority = dvObject.getAuthority() == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Authority) : dvObject.getAuthority();
- GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(protocol, commandEngine.getContext());
if (dvObject.isInstanceofDataset()) {
- dvObject.setIdentifier(datasetService.generateDatasetIdentifier((Dataset) dvObject, idServiceBean));
+ dvObject.setIdentifier(generateDatasetIdentifier((Dataset) dvObject));
} else {
- dvObject.setIdentifier(datafileService.generateDataFileIdentifier((DataFile) dvObject, idServiceBean));
+ dvObject.setIdentifier(generateDataFileIdentifier((DataFile) dvObject));
}
if (dvObject.getProtocol() == null) {
dvObject.setProtocol(protocol);
@@ -140,6 +157,227 @@ public DvObject generateIdentifier(DvObject dvObject) {
return dvObject;
}
+ //ToDo just send the DvObject.DType
+ public String generateDatasetIdentifier(Dataset dataset) {
+ //ToDo - track these in the bean
+ String identifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString");
+ String shoulder = settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder, "");
+
+ switch (identifierType) {
+ case "randomString":
+ return generateIdentifierAsRandomString(dataset, shoulder);
+ case "storedProcGenerated":
+ return generateIdentifierFromStoredProcedureIndependent(dataset, shoulder);
+ default:
+ /* Should we throw an exception instead?? -- L.A. 4.6.2 */
+ return generateIdentifierAsRandomString(dataset, shoulder);
+ }
+ }
+
+
+ /**
+ * Check that a identifier entered by the user is unique (not currently used
+ * for any other study in this Dataverse Network) also check for duplicate
+ * in EZID if needed
+ * @param userIdentifier
+ * @param dataset
+ * @return {@code true} if the identifier is unique, {@code false} otherwise.
+ */
+ public boolean isGlobalIdUnique(GlobalId globalId) {
+ if ( ! dvObjectService.isGlobalIdLocallyUnique(globalId) ) {
+ return false; // duplication found in local database
+ }
+
+ // not in local DB, look in the persistent identifier service
+ try {
+ return ! alreadyRegistered(globalId, false);
+ } catch (Exception e){
+ //we can live with failure - means identifier not found remotely
+ }
+
+ return true;
+ }
+
+ /**
+ * Parse a Persistent Id and set the protocol, authority, and identifier
+ *
+ * Example 1: doi:10.5072/FK2/BYM3IW
+ * protocol: doi
+ * authority: 10.5072
+ * identifier: FK2/BYM3IW
+ *
+ * Example 2: hdl:1902.1/111012
+ * protocol: hdl
+ * authority: 1902.1
+ * identifier: 111012
+ *
+ * @param identifierString
+ * @param separator the string that separates the authority from the identifier.
+ * @param destination the global id that will contain the parsed data.
+ * @return {@code destination}, after its fields have been updated, or
+ * {@code null} if parsing failed.
+ */
+ @Override
+ public GlobalId parsePersistentId(String fullIdentifierString) {
+ if(!isConfigured()) {
+ return null;
+ }
+ int index1 = fullIdentifierString.indexOf(':');
+ if (index1 > 0) { // ':' found with one or more characters before it
+ String protocol = fullIdentifierString.substring(0, index1);
+ GlobalId globalId = parsePersistentId(protocol, fullIdentifierString.substring(index1+1));
+ return globalId;
+ }
+ logger.log(Level.INFO, "Error parsing identifier: {0}: '':'' not found in string", fullIdentifierString);
+ return null;
+ }
+
+ protected GlobalId parsePersistentId(String protocol, String identifierString) {
+ if(!isConfigured()) {
+ return null;
+ }
+ String authority;
+ String identifier;
+ if (identifierString == null) {
+ return null;
+ }
+ int index = identifierString.indexOf('/');
+ if (index > 0 && (index + 1) < identifierString.length()) {
+ // '/' found with one or more characters
+ // before and after it
+ // Strip any whitespace, ; and ' from authority (should finding them cause a
+ // failure instead?)
+ authority = GlobalIdServiceBean.formatIdentifierString(identifierString.substring(0, index));
+ if (GlobalIdServiceBean.testforNullTerminator(authority)) {
+ return null;
+ }
+ identifier = GlobalIdServiceBean.formatIdentifierString(identifierString.substring(index + 1));
+ if (GlobalIdServiceBean.testforNullTerminator(identifier)) {
+ return null;
+ }
+ } else {
+ logger.log(Level.INFO, "Error parsing identifier: {0}: '':/'' not found in string",
+ identifierString);
+ return null;
+ }
+ return parsePersistentId(protocol, authority, identifier);
+ }
+
+ public GlobalId parsePersistentId(String protocol, String authority, String identifier) {
+ if(!isConfigured()) {
+ return null;
+ }
+ logger.fine("Parsing: " + protocol + ":" + authority + getSeparator() + identifier + " in " + getProviderInformation().get(0));
+ if(!GlobalIdServiceBean.isValidGlobalId(protocol, authority, identifier)) {
+ return null;
+ }
+ return new GlobalId(protocol, authority, identifier, getSeparator(), getUrlPrefix(),
+ getProviderInformation().get(0));
+ }
+
+
+ public String getSeparator() {
+ //The standard default
+ return "/";
+ }
+
+ @Override
+ public String generateDataFileIdentifier(DataFile datafile) {
+ String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString");
+ String doiDataFileFormat = settingsService.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, SystemConfig.DataFilePIDFormat.DEPENDENT.toString());
+
+ String prepend = "";
+ if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.DEPENDENT.toString())){
+ //If format is dependent then pre-pend the dataset identifier
+ prepend = datafile.getOwner().getIdentifier() + "/";
+ datafile.setProtocol(datafile.getOwner().getProtocol());
+ datafile.setAuthority(datafile.getOwner().getAuthority());
+ } else {
+ //If there's a shoulder prepend independent identifiers with it
+ prepend = settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder, "");
+ datafile.setProtocol(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol));
+ datafile.setAuthority(settingsService.getValueForKey(SettingsServiceBean.Key.Authority));
+ }
+
+ switch (doiIdentifierType) {
+ case "randomString":
+ return generateIdentifierAsRandomString(datafile, prepend);
+ case "storedProcGenerated":
+ if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.INDEPENDENT.toString())){
+ return generateIdentifierFromStoredProcedureIndependent(datafile, prepend);
+ } else {
+ return generateIdentifierFromStoredProcedureDependent(datafile, prepend);
+ }
+ default:
+ /* Should we throw an exception instead?? -- L.A. 4.6.2 */
+ return generateIdentifierAsRandomString(datafile, prepend);
+ }
+ }
+
+
+ /*
+ * This method checks locally for a DvObject with the same PID and if that is OK, checks with the PID service.
+ * @param dvo - the object to check (ToDo - get protocol/authority from this PidProvider object)
+ * @param prepend - for Datasets, this is always the shoulder, for DataFiles, it could be the shoulder or the parent Dataset identifier
+ */
+ private String generateIdentifierAsRandomString(DvObject dvo, String prepend) {
+ String identifier = null;
+ do {
+ identifier = prepend + RandomStringUtils.randomAlphanumeric(6).toUpperCase();
+ } while (!isGlobalIdUnique(new GlobalId(dvo.getProtocol(), dvo.getAuthority(), identifier, this.getSeparator(), this.getUrlPrefix(), this.getProviderInformation().get(0))));
+
+ return identifier;
+ }
+
+ /*
+ * This method checks locally for a DvObject with the same PID and if that is OK, checks with the PID service.
+ * @param dvo - the object to check (ToDo - get protocol/authority from this PidProvider object)
+ * @param prepend - for Datasets, this is always the shoulder, for DataFiles, it could be the shoulder or the parent Dataset identifier
+ */
+
+ private String generateIdentifierFromStoredProcedureIndependent(DvObject dvo, String prepend) {
+ String identifier;
+ do {
+ String identifierFromStoredProcedure = dvObjectService.generateNewIdentifierByStoredProcedure();
+ // some diagnostics here maybe - is it possible to determine that it's failing
+ // because the stored procedure hasn't been created in the database?
+ if (identifierFromStoredProcedure == null) {
+ return null;
+ }
+ identifier = prepend + identifierFromStoredProcedure;
+ } while (!isGlobalIdUnique(new GlobalId(dvo.getProtocol(), dvo.getAuthority(), identifier, this.getSeparator(), this.getUrlPrefix(), this.getProviderInformation().get(0))));
+
+ return identifier;
+ }
+
+ /*This method is only used for DataFiles with DEPENDENT Pids. It is not for Datasets
+ *
+ */
+ private String generateIdentifierFromStoredProcedureDependent(DataFile datafile, String prepend) {
+ String identifier;
+ Long retVal;
+ retVal = Long.valueOf(0L);
+ //ToDo - replace loops with one lookup for largest entry? (the do loop runs ~n**2/2 calls). The check for existingIdentifiers means this is mostly a local loop now, versus involving db or PidProvider calls, but still...)
+
+ // This will catch identifiers already assigned in the current transaction (e.g.
+ // in FinalizeDatasetPublicationCommand) that haven't been committed to the db
+ // without having to make a call to the PIDProvider
+ Set existingIdentifiers = new HashSet();
+ List files = datafile.getOwner().getFiles();
+ for(DataFile f:files) {
+ existingIdentifiers.add(f.getIdentifier());
+ }
+
+ do {
+ retVal++;
+ identifier = prepend + retVal.toString();
+
+ } while (existingIdentifiers.contains(identifier) || !isGlobalIdUnique(new GlobalId(datafile.getProtocol(), datafile.getAuthority(), identifier, this.getSeparator(), this.getUrlPrefix(), this.getProviderInformation().get(0))));
+
+ return identifier;
+ }
+
+
class GlobalIdMetadataTemplate {
@@ -159,7 +397,6 @@ public GlobalIdMetadataTemplate(){
private String xmlMetadata;
private String identifier;
- private String datasetIdentifier;
private List datafileIdentifiers;
private List creators;
private String title;
@@ -245,7 +482,7 @@ public String generateXML(DvObject dvObject) {
// Added to prevent a NullPointerException when trying to destroy datasets when using DataCite rather than EZID.
publisherYearFinal = this.publisherYear;
}
- xmlMetadata = template.replace("${identifier}", this.identifier.trim())
+ xmlMetadata = template.replace("${identifier}", getIdentifier().trim())
.replace("${title}", this.title)
.replace("${publisher}", this.publisher)
.replace("${publisherYear}", publisherYearFinal)
@@ -371,10 +608,6 @@ public void setIdentifier(String identifier) {
this.identifier = identifier;
}
- public void setDatasetIdentifier(String datasetIdentifier) {
- this.datasetIdentifier = datasetIdentifier;
- }
-
public List getCreators() {
return creators;
}
@@ -428,10 +661,6 @@ public String getMetadataFromDvObject(String identifier, Map met
DataFile df = (DataFile) dvObject;
String fileDescription = df.getDescription();
metadataTemplate.setDescription(fileDescription == null ? "" : fileDescription);
- String datasetPid = df.getOwner().getGlobalId().asString();
- metadataTemplate.setDatasetIdentifier(datasetPid);
- } else {
- metadataTemplate.setDatasetIdentifier("");
}
metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts());
@@ -448,5 +677,19 @@ public String getMetadataFromDvObject(String identifier, Map met
logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata);
return xmlMetadata;
}
+
+ @Override
+ public boolean canManagePID() {
+ //The default expectation is that PID providers are configured to manage some set (i.e. based on protocol/authority/shoulder) of PIDs
+ return true;
+ }
+ @Override
+ public boolean isConfigured() {
+ if(configured==null) {
+ return false;
+ } else {
+ return configured.booleanValue();
+ }
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/CitationServlet.java b/src/main/java/edu/harvard/iq/dataverse/CitationServlet.java
index 2b342b09610..f6b4e3dc99a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/CitationServlet.java
+++ b/src/main/java/edu/harvard/iq/dataverse/CitationServlet.java
@@ -5,6 +5,7 @@
*/
package edu.harvard.iq.dataverse;
+import edu.harvard.iq.dataverse.pidproviders.PidUtil;
import edu.harvard.iq.dataverse.util.StringUtil;
import java.io.IOException;
import java.io.PrintWriter;
@@ -21,7 +22,7 @@
public class CitationServlet extends HttpServlet {
@EJB
- DatasetServiceBean datasetService;
+ DvObjectServiceBean dvObjectService;
/**
* Processes requests for both HTTP GET
and POST
@@ -37,10 +38,14 @@ protected void processRequest(HttpServletRequest request, HttpServletResponse re
String persistentId = request.getParameter("persistentId");
if (persistentId != null) {
- Dataset ds = datasetService.findByGlobalId(persistentId);
- if (ds != null) {
- response.sendRedirect("dataset.xhtml?persistentId=" + persistentId);
- return;
+ DvObject dob = dvObjectService.findByGlobalId(PidUtil.parseAsGlobalID(persistentId));
+ if (dob != null) {
+ if (dob instanceof Dataset) {
+ response.sendRedirect("dataset.xhtml?persistentId=" + persistentId);
+ } else if (dob instanceof DataFile) {
+ response.sendRedirect("file.xhtml?persistentId=" + persistentId);
+ }
+ return;
}
}
response.sendError(HttpServletResponse.SC_NOT_FOUND);
diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java
index 218e4c85474..b748897dafe 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java
@@ -23,6 +23,8 @@
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;
import javax.persistence.TypedQuery;
+
+import edu.harvard.iq.dataverse.settings.JvmSettings;
import org.apache.commons.text.StringEscapeUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
@@ -53,7 +55,11 @@ public class DOIDataCiteRegisterService {
private DataCiteRESTfullClient getClient() throws IOException {
if (client == null) {
- client = new DataCiteRESTfullClient(System.getProperty("doi.baseurlstring"), System.getProperty("doi.username"), System.getProperty("doi.password"));
+ client = new DataCiteRESTfullClient(
+ JvmSettings.DATACITE_MDS_API_URL.lookup(),
+ JvmSettings.DATACITE_USERNAME.lookup(),
+ JvmSettings.DATACITE_PASSWORD.lookup()
+ );
}
return client;
}
@@ -546,7 +552,7 @@ private String generateRelatedIdentifiers(DvObject dvObject) {
datafileIdentifiers = new ArrayList<>();
for (DataFile dataFile : dataset.getFiles()) {
- if (!dataFile.getGlobalId().asString().isEmpty()) {
+ if (dataFile.getGlobalId() != null) {
if (sb.toString().isEmpty()) {
sb.append("");
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java
index e7dd49a6926..fa0a745d80f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java
@@ -10,9 +10,11 @@
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
+
import javax.ejb.EJB;
import javax.ejb.Stateless;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
@@ -22,7 +24,7 @@
* @author luopc
*/
@Stateless
-public class DOIDataCiteServiceBean extends AbstractGlobalIdServiceBean {
+public class DOIDataCiteServiceBean extends DOIServiceBean {
private static final Logger logger = Logger.getLogger(DOIDataCiteServiceBean.class.getCanonicalName());
@@ -34,41 +36,30 @@ public class DOIDataCiteServiceBean extends AbstractGlobalIdServiceBean {
@EJB
DOIDataCiteRegisterService doiDataCiteRegisterService;
- public DOIDataCiteServiceBean() {
- }
-
@Override
public boolean registerWhenPublished() {
return false;
}
- @Override
- public boolean alreadyExists(DvObject dvObject) {
- if(dvObject==null) {
- logger.severe("Null DvObject sent to alreadyExists().");
- return false;
- }
- return alreadyExists(dvObject.getGlobalId());
- }
+
@Override
- public boolean alreadyExists(GlobalId pid) {
- logger.log(Level.FINE,"alreadyExists");
+ public boolean alreadyRegistered(GlobalId pid, boolean noProviderDefault) {
+ logger.log(Level.FINE,"alreadyRegistered");
if(pid==null || pid.asString().isEmpty()) {
logger.fine("No identifier sent.");
return false;
}
- boolean alreadyExists;
+ boolean alreadyRegistered;
String identifier = pid.asString();
try{
- alreadyExists = doiDataCiteRegisterService.testDOIExists(identifier);
+ alreadyRegistered = doiDataCiteRegisterService.testDOIExists(identifier);
} catch (Exception e){
- logger.log(Level.WARNING, "alreadyExists failed");
+ logger.log(Level.WARNING, "alreadyRegistered failed");
return false;
}
- return alreadyExists;
+ return alreadyRegistered;
}
-
@Override
public String createIdentifier(DvObject dvObject) throws Exception {
@@ -90,10 +81,10 @@ public String createIdentifier(DvObject dvObject) throws Exception {
}
@Override
- public HashMap getIdentifierMetadata(DvObject dvObject) {
+ public Map getIdentifierMetadata(DvObject dvObject) {
logger.log(Level.FINE,"getIdentifierMetadata");
String identifier = getIdentifier(dvObject);
- HashMap metadata = new HashMap<>();
+ Map metadata = new HashMap<>();
try {
metadata = doiDataCiteRegisterService.getMetadata(identifier);
} catch (Exception e) {
@@ -103,29 +94,6 @@ public HashMap getIdentifierMetadata(DvObject dvObject) {
}
- /**
- * Looks up the metadata for a Global Identifier
- * @param protocol the identifier system, e.g. "doi"
- * @param authority the namespace that the authority manages in the identifier system
- * @param identifier the local identifier part
- * @return a Map of metadata. It is empty when the lookup failed, e.g. when
- * the identifier does not exist.
- */
- @Override
- public HashMap lookupMetadataFromIdentifier(String protocol, String authority, String identifier) {
- logger.log(Level.FINE,"lookupMetadataFromIdentifier");
- String identifierOut = getIdentifierForLookup(protocol, authority, identifier);
- HashMap metadata = new HashMap<>();
- try {
- metadata = doiDataCiteRegisterService.getMetadata(identifierOut);
- } catch (Exception e) {
- logger.log(Level.WARNING, "None existing so we can use this identifier");
- logger.log(Level.WARNING, "identifier: {0}", identifierOut);
- }
- return metadata;
- }
-
-
/**
* Modifies the DOI metadata for a Dataset
* @param dvObject the dvObject whose metadata needs to be modified
@@ -219,9 +187,9 @@ public void deleteIdentifier(DvObject dvObject) throws IOException, HttpExceptio
private void deleteDraftIdentifier(DvObject dvObject) throws IOException {
//ToDo - incorporate into DataCiteRESTfulClient
- String baseUrl = systemConfig.getDataCiteRestApiUrlString();
- String username = System.getProperty("doi.username");
- String password = System.getProperty("doi.password");
+ String baseUrl = JvmSettings.DATACITE_REST_API_URL.lookup();
+ String username = JvmSettings.DATACITE_USERNAME.lookup();
+ String password = JvmSettings.DATACITE_PASSWORD.lookup();
GlobalId doi = dvObject.getGlobalId();
/**
* Deletes the DOI from DataCite if it can. Returns 204 if PID was deleted
@@ -269,13 +237,13 @@ public boolean publicizeIdentifier(DvObject dvObject) {
@Override
public List getProviderInformation(){
- ArrayList providerInfo = new ArrayList<>();
- String providerName = "DataCite";
- String providerLink = "http://status.datacite.org";
- providerInfo.add(providerName);
- providerInfo.add(providerLink);
- return providerInfo;
+ return List.of("DataCite", "https://status.datacite.org");
}
+
+ @Override
+ protected String getProviderKeyName() {
+ return "DataCite";
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java
index d21caf32411..d9b0fde15da 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java
@@ -1,11 +1,12 @@
package edu.harvard.iq.dataverse;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.ucsb.nceas.ezid.EZIDException;
import edu.ucsb.nceas.ezid.EZIDService;
-import edu.ucsb.nceas.ezid.EZIDServiceRequest;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;
+
import javax.ejb.Stateless;
/**
@@ -13,32 +14,44 @@
* @author skraffmiller
*/
@Stateless
-public class DOIEZIdServiceBean extends AbstractGlobalIdServiceBean {
-
+public class DOIEZIdServiceBean extends DOIServiceBean {
+
+ private static final Logger logger = Logger.getLogger(DOIEZIdServiceBean.class.getCanonicalName());
+
EZIDService ezidService;
- EZIDServiceRequest ezidServiceRequest;
- String baseURLString = "https://ezid.cdlib.org";
- private static final Logger logger = Logger.getLogger("edu.harvard.iq.dvn.core.index.DOIEZIdServiceBean");
-
- // get username and password from system properties
- private String USERNAME = "";
- private String PASSWORD = "";
-
+
+ // This has a sane default in microprofile-config.properties
+ private final String baseUrl = JvmSettings.EZID_API_URL.lookup();
+
public DOIEZIdServiceBean() {
- logger.log(Level.FINE,"Constructor");
- baseURLString = System.getProperty("doi.baseurlstring");
- ezidService = new EZIDService(baseURLString);
- USERNAME = System.getProperty("doi.username");
- PASSWORD = System.getProperty("doi.password");
- logger.log(Level.FINE, "Using baseURLString {0}", baseURLString);
+ // Creating the service doesn't do any harm, just initializing some object data here.
+ // Makes sure we don't run into NPEs from the other methods, but will obviously fail if the
+ // login below does not work.
+ this.ezidService = new EZIDService(this.baseUrl);
+
try {
- ezidService.login(USERNAME, PASSWORD);
+ // These have (obviously) no default, but still are optional to make the provider optional
+ String username = JvmSettings.EZID_USERNAME.lookupOptional().orElse(null);
+ String password = JvmSettings.EZID_PASSWORD.lookupOptional().orElse(null);
+
+ if (username != null ^ password != null) {
+ logger.log(Level.WARNING, "You must give both username and password. Will not try to login.");
+ }
+
+ if (username != null && password != null) {
+ this.ezidService.login(username, password);
+ this.configured = true;
+ }
} catch (EZIDException e) {
- logger.log(Level.WARNING, "login failed ");
+ // We only do the warnings here, but the object still needs to be created.
+ // The EJB stateless thing expects this to go through, and it is requested on any
+ // global id parsing.
+ logger.log(Level.WARNING, "Login failed to {0}", this.baseUrl);
logger.log(Level.WARNING, "Exception String: {0}", e.toString());
- logger.log(Level.WARNING, "localized message: {0}", e.getLocalizedMessage());
- logger.log(Level.WARNING, "cause: ", e.getCause());
- logger.log(Level.WARNING, "message {0}", e.getMessage());
+ logger.log(Level.WARNING, "Localized message: {0}", e.getLocalizedMessage());
+ logger.log(Level.WARNING, "Cause:", e.getCause());
+ logger.log(Level.WARNING, "Message {0}", e.getMessage());
+ // TODO: is this antipattern really necessary?
} catch (Exception e) {
logger.log(Level.SEVERE, "Other Error on ezidService.login(USERNAME, PASSWORD) - not EZIDException ", e.getMessage());
}
@@ -50,19 +63,10 @@ public boolean registerWhenPublished() {
}
@Override
- public boolean alreadyExists(DvObject dvObject) throws Exception {
- if(dvObject==null) {
- logger.severe("Null DvObject sent to alreadyExists().");
- return false;
- }
- return alreadyExists(dvObject.getGlobalId());
- }
-
- @Override
- public boolean alreadyExists(GlobalId pid) throws Exception {
- logger.log(Level.FINE,"alreadyExists");
+ public boolean alreadyRegistered(GlobalId pid, boolean noProviderDefault) throws Exception {
+ logger.log(Level.FINE,"alreadyRegistered");
try {
- HashMap result = ezidService.getMetadata(pid.asString());
+ HashMap result = ezidService.getMetadata(pid.asString());
return result != null && !result.isEmpty();
// TODO just check for HTTP status code 200/404, sadly the status code is swept under the carpet
} catch (EZIDException e ){
@@ -74,7 +78,7 @@ public boolean alreadyExists(GlobalId pid) throws Exception {
if (e.getLocalizedMessage().contains("no such identifier")){
return false;
}
- logger.log(Level.WARNING, "alreadyExists failed");
+ logger.log(Level.WARNING, "alreadyRegistered failed");
logger.log(Level.WARNING, "getIdentifier(dvObject) {0}", pid.asString());
logger.log(Level.WARNING, "String {0}", e.toString());
logger.log(Level.WARNING, "localized message {0}", e.getLocalizedMessage());
@@ -102,32 +106,6 @@ public Map getIdentifierMetadata(DvObject dvObject) {
return metadata;
}
- /**
- * Looks up the metadata for a Global Identifier
- *
- * @param protocol the identifier system, e.g. "doi"
- * @param authority the namespace that the authority manages in the
- * identifier system
- * identifier part
- * @param identifier the local identifier part
- * @return a Map of metadata. It is empty when the lookup failed, e.g. when
- * the identifier does not exist.
- */
- @Override
- public HashMap lookupMetadataFromIdentifier(String protocol, String authority, String identifier) {
- logger.log(Level.FINE,"lookupMetadataFromIdentifier");
- String identifierOut = getIdentifierForLookup(protocol, authority, identifier);
- HashMap metadata = new HashMap<>();
- try {
- metadata = ezidService.getMetadata(identifierOut);
- } catch (EZIDException e) {
- logger.log(Level.FINE, "None existing so we can use this identifier");
- logger.log(Level.FINE, "identifier: {0}", identifierOut);
- return metadata;
- }
- return metadata;
- }
-
/**
* Modifies the EZID metadata for a Dataset
*
@@ -249,12 +227,7 @@ private boolean updateIdentifierStatus(DvObject dvObject, String statusIn) {
@Override
public List getProviderInformation(){
- ArrayList providerInfo = new ArrayList<>();
- String providerName = "EZID";
- String providerLink = baseURLString;
- providerInfo.add(providerName);
- providerInfo.add(providerLink);
- return providerInfo;
+ return List.of("EZID", this.baseUrl);
}
@Override
@@ -301,5 +274,10 @@ private HashMap asHashMap(Map map) {
return (map instanceof HashMap) ? (HashMap)map : new HashMap<>(map);
}
+ @Override
+ protected String getProviderKeyName() {
+ return "EZID";
+ }
+
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DOIServiceBean.java
new file mode 100644
index 00000000000..0182c745cd0
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/DOIServiceBean.java
@@ -0,0 +1,78 @@
+package edu.harvard.iq.dataverse;
+
+import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key;
+
+public abstract class DOIServiceBean extends AbstractGlobalIdServiceBean {
+
+ public static final String DOI_PROTOCOL = "doi";
+ public static final String DOI_RESOLVER_URL = "https://doi.org/";
+ public static final String HTTP_DOI_RESOLVER_URL = "http://doi.org/";
+ public static final String DXDOI_RESOLVER_URL = "https://dx.doi.org/";
+ public static final String HTTP_DXDOI_RESOLVER_URL = "http://dx.doi.org/";
+
+ public DOIServiceBean() {
+ super();
+ }
+
+ @Override
+ public GlobalId parsePersistentId(String pidString) {
+ if (pidString.startsWith(DOI_RESOLVER_URL)) {
+ pidString = pidString.replace(DOI_RESOLVER_URL,
+ (DOI_PROTOCOL + ":"));
+ } else if (pidString.startsWith(HTTP_DOI_RESOLVER_URL)) {
+ pidString = pidString.replace(HTTP_DOI_RESOLVER_URL,
+ (DOI_PROTOCOL + ":"));
+ } else if (pidString.startsWith(DXDOI_RESOLVER_URL)) {
+ pidString = pidString.replace(DXDOI_RESOLVER_URL,
+ (DOI_PROTOCOL + ":"));
+ }
+ return super.parsePersistentId(pidString);
+ }
+
+ @Override
+ public GlobalId parsePersistentId(String protocol, String identifierString) {
+
+ if (!DOI_PROTOCOL.equals(protocol)) {
+ return null;
+ }
+ GlobalId globalId = super.parsePersistentId(protocol, identifierString);
+ if (globalId!=null && !GlobalIdServiceBean.checkDOIAuthority(globalId.getAuthority())) {
+ return null;
+ }
+ return globalId;
+ }
+
+ @Override
+ public GlobalId parsePersistentId(String protocol, String authority, String identifier) {
+
+ if (!DOI_PROTOCOL.equals(protocol)) {
+ return null;
+ }
+ return super.parsePersistentId(protocol, authority, identifier);
+ }
+
+ public String getUrlPrefix() {
+ return DOI_RESOLVER_URL;
+ }
+
+ @Override
+ public boolean isConfigured() {
+ if (configured == null) {
+ if (getProviderKeyName() == null) {
+ configured = false;
+ } else {
+ String doiProvider = settingsService.getValueForKey(Key.DoiProvider, "");
+ if (getProviderKeyName().equals(doiProvider)) {
+ configured = true;
+ } else if (!doiProvider.isEmpty()) {
+ configured = false;
+ }
+ }
+ }
+ return super.isConfigured();
+ }
+
+ protected String getProviderKeyName() {
+ return null;
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java
index abe3cc3e6d7..30e03046822 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java
@@ -57,7 +57,7 @@ public class DataCitation {
private String publisher;
private boolean direct;
private List funders;
- private String seriesTitle;
+ private List seriesTitles;
private String description;
private List datesOfCollection;
private List keywords;
@@ -135,7 +135,7 @@ private void getCommonValuesFrom(DatasetVersion dsv) {
datesOfCollection = dsv.getDatesOfCollection();
title = dsv.getTitle();
- seriesTitle = dsv.getSeriesTitle();
+ seriesTitles = dsv.getSeriesTitles();
keywords = dsv.getKeywords();
languages = dsv.getLanguages();
spatialCoverages = dsv.getSpatialCoverages();
@@ -207,7 +207,7 @@ public String toString(boolean html, boolean anonymized) {
if (persistentId != null) {
// always show url format
- citationList.add(formatURL(persistentId.toURL().toString(), persistentId.toURL().toString(), html));
+ citationList.add(formatURL(persistentId.asURL(), persistentId.asURL(), html));
}
citationList.add(formatString(publisher, html));
citationList.add(version);
@@ -298,7 +298,7 @@ public void writeAsBibtexCitation(OutputStream os) throws IOException {
out.write(persistentId.getIdentifier());
out.write("},\r\n");
out.write("url = {");
- out.write(persistentId.toURL().toString());
+ out.write(persistentId.asURL());
out.write("}\r\n");
out.write("}\r\n");
out.flush();
@@ -330,8 +330,10 @@ public void writeAsRISCitation(OutputStream os) throws IOException {
out.write("TY - DATA" + "\r\n");
out.write("T1 - " + getTitle() + "\r\n");
}
- if (seriesTitle != null) {
- out.write("T3 - " + seriesTitle + "\r\n");
+ if (seriesTitles != null) {
+ for (String seriesTitle : seriesTitles) {
+ out.write("T3 - " + seriesTitle + "\r\n");
+ }
}
/* Removing abstract/description per Request from G. King in #3759
if(description!=null) {
@@ -387,7 +389,7 @@ public void writeAsRISCitation(OutputStream os) throws IOException {
out.write("SE - " + date + "\r\n");
- out.write("UR - " + persistentId.toURL().toString() + "\r\n");
+ out.write("UR - " + persistentId.asURL() + "\r\n");
out.write("PB - " + publisher + "\r\n");
// a DataFile citation also includes filename und UNF, if applicable:
@@ -505,12 +507,22 @@ private void createEndNoteXML(XMLStreamWriter xmlw) throws XMLStreamException {
xmlw.writeCharacters(title);
xmlw.writeEndElement(); // title
}
-
- if (seriesTitle != null) {
- xmlw.writeStartElement("tertiary-title");
- xmlw.writeCharacters(seriesTitle);
+
+ /*
+ If I say just !"isEmpty" for series titles I get a failure
+ on testToEndNoteString_withoutTitleAndAuthor
+ with a null pointer on build -SEK 3/31/23
+ */
+ if (seriesTitles != null && !seriesTitles.isEmpty() ) {
+ xmlw.writeStartElement("tertiary-titles");
+ for (String seriesTitle : seriesTitles){
+ xmlw.writeStartElement("tertiary-title");
+ xmlw.writeCharacters(seriesTitle);
+ xmlw.writeEndElement(); // tertiary-title
+ }
xmlw.writeEndElement(); // tertiary-title
}
+
xmlw.writeEndElement(); // titles
xmlw.writeStartElement("section");
@@ -584,7 +596,7 @@ private void createEndNoteXML(XMLStreamWriter xmlw) throws XMLStreamException {
xmlw.writeStartElement("urls");
xmlw.writeStartElement("related-urls");
xmlw.writeStartElement("url");
- xmlw.writeCharacters(getPersistentId().toURL().toString());
+ xmlw.writeCharacters(getPersistentId().asURL());
xmlw.writeEndElement(); // url
xmlw.writeEndElement(); // related-urls
xmlw.writeEndElement(); // urls
@@ -781,18 +793,13 @@ private GlobalId getPIDFrom(DatasetVersion dsv, DvObject dv) {
|| HarvestingClient.HARVEST_STYLE_ICPSR.equals(dsv.getDataset().getHarvestedFrom().getHarvestStyle())
|| HarvestingClient.HARVEST_STYLE_DATAVERSE
.equals(dsv.getDataset().getHarvestedFrom().getHarvestStyle())) {
- // creating a global id like this:
- // persistentId = new GlobalId(dv.getGlobalId());
- // you end up doing new GlobalId((New GlobalId(dv)).toString())
- // - doing an extra formatting-and-parsing-again
- // This achieves the same thing:
if(!isDirect()) {
if (!StringUtils.isEmpty(dsv.getDataset().getIdentifier())) {
- return new GlobalId(dsv.getDataset());
+ return dsv.getDataset().getGlobalId();
}
} else {
if (!StringUtils.isEmpty(dv.getIdentifier())) {
- return new GlobalId(dv);
+ return dv.getGlobalId();
}
}
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java
index 5171e8d49f2..4e323496188 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java
@@ -5,12 +5,11 @@
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.annotations.Expose;
-import com.google.gson.annotations.SerializedName;
import edu.harvard.iq.dataverse.DatasetVersion.VersionState;
+import edu.harvard.iq.dataverse.authorization.RoleAssignee;
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import edu.harvard.iq.dataverse.dataaccess.DataAccess;
import edu.harvard.iq.dataverse.dataaccess.StorageIO;
-import edu.harvard.iq.dataverse.dataset.DatasetThumbnail;
import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker;
import edu.harvard.iq.dataverse.ingest.IngestReport;
import edu.harvard.iq.dataverse.ingest.IngestRequest;
@@ -19,17 +18,17 @@
import edu.harvard.iq.dataverse.util.ShapefileHandler;
import edu.harvard.iq.dataverse.util.StringUtil;
import java.io.IOException;
+import java.util.Date;
import java.util.List;
import java.util.ArrayList;
import java.util.Objects;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.nio.file.Files;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
+import java.util.Set;
import java.util.logging.Logger;
+import java.util.stream.Collectors;
import javax.json.Json;
import javax.json.JsonArrayBuilder;
import javax.persistence.*;
@@ -47,9 +46,9 @@
query = "SELECT o FROM DataFile o WHERE o.creator.id=:creatorId"),
@NamedQuery(name = "DataFile.findByReleaseUserId",
query = "SELECT o FROM DataFile o WHERE o.releaseUser.id=:releaseUserId"),
- @NamedQuery(name="DataFile.findDataFileByIdProtocolAuth",
+ @NamedQuery(name="DataFile.findDataFileByIdProtocolAuth",
query="SELECT s FROM DataFile s WHERE s.identifier=:identifier AND s.protocol=:protocol AND s.authority=:authority"),
- @NamedQuery(name="DataFile.findDataFileThatReplacedId",
+ @NamedQuery(name="DataFile.findDataFileThatReplacedId",
query="SELECT s.id FROM DataFile s WHERE s.previousDataFileId=:identifier")
})
@Entity
@@ -73,7 +72,10 @@ public class DataFile extends DvObject implements Comparable {
@Column( nullable = false )
@Pattern(regexp = "^.*/.*$", message = "{contenttype.slash}")
private String contentType;
-
+
+ public void setFileAccessRequests(List fileAccessRequests) {
+ this.fileAccessRequests = fileAccessRequests;
+ }
// @Expose
// @SerializedName("storageIdentifier")
@@ -416,7 +418,7 @@ public String getIngestReportMessage() {
return ingestReports.get(0).getReport();
}
}
- return "Ingest failed. No further information is available.";
+ return BundleUtil.getStringFromBundle("file.ingestFailed");
}
public boolean isTabularData() {
@@ -747,22 +749,71 @@ public String getUnf() {
}
return null;
}
-
- @ManyToMany
- @JoinTable(name = "fileaccessrequests",
- joinColumns = @JoinColumn(name = "datafile_id"),
- inverseJoinColumns = @JoinColumn(name = "authenticated_user_id"))
- private List fileAccessRequesters;
+ @OneToMany(mappedBy = "dataFile", cascade = {CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}, orphanRemoval = true)
+ private List fileAccessRequests;
- public List getFileAccessRequesters() {
- return fileAccessRequesters;
+ public List getFileAccessRequests() {
+ return fileAccessRequests;
}
- public void setFileAccessRequesters(List fileAccessRequesters) {
- this.fileAccessRequesters = fileAccessRequesters;
+ public void addFileAccessRequester(AuthenticatedUser authenticatedUser) {
+ if (this.fileAccessRequests == null) {
+ this.fileAccessRequests = new ArrayList<>();
+ }
+
+ Set existingUsers = this.fileAccessRequests.stream()
+ .map(FileAccessRequest::getAuthenticatedUser)
+ .collect(Collectors.toSet());
+
+ if (existingUsers.contains(authenticatedUser)) {
+ return;
+ }
+
+ FileAccessRequest request = new FileAccessRequest();
+ request.setCreationTime(new Date());
+ request.setDataFile(this);
+ request.setAuthenticatedUser(authenticatedUser);
+
+ FileAccessRequest.FileAccessRequestKey key = new FileAccessRequest.FileAccessRequestKey();
+ key.setAuthenticatedUser(authenticatedUser.getId());
+ key.setDataFile(this.getId());
+
+ request.setId(key);
+
+ this.fileAccessRequests.add(request);
}
-
+
+ public boolean removeFileAccessRequester(RoleAssignee roleAssignee) {
+ if (this.fileAccessRequests == null) {
+ return false;
+ }
+
+ FileAccessRequest request = this.fileAccessRequests.stream()
+ .filter(fileAccessRequest -> fileAccessRequest.getAuthenticatedUser().equals(roleAssignee))
+ .findFirst()
+ .orElse(null);
+
+ if (request != null) {
+ this.fileAccessRequests.remove(request);
+ return true;
+ }
+
+ return false;
+ }
+
+ public boolean containsFileAccessRequestFromUser(RoleAssignee roleAssignee) {
+ if (this.fileAccessRequests == null) {
+ return false;
+ }
+
+ Set existingUsers = this.fileAccessRequests.stream()
+ .map(FileAccessRequest::getAuthenticatedUser)
+ .collect(Collectors.toSet());
+
+ return existingUsers.contains(roleAssignee);
+ }
+
public boolean isHarvested() {
Dataset ownerDataset = this.getOwner();
@@ -956,7 +1007,7 @@ public JsonObject asGsonObject(boolean prettyPrint){
// https://github.com/IQSS/dataverse/issues/761, https://github.com/IQSS/dataverse/issues/2110, https://github.com/IQSS/dataverse/issues/3191
//
datasetMap.put("title", thisFileMetadata.getDatasetVersion().getTitle());
- datasetMap.put("persistentId", getOwner().getGlobalIdString());
+ datasetMap.put("persistentId", getOwner().getGlobalId().asString());
datasetMap.put("url", getOwner().getPersistentURL());
datasetMap.put("version", thisFileMetadata.getDatasetVersion().getSemanticVersion());
datasetMap.put("id", getOwner().getId());
@@ -1034,6 +1085,10 @@ public String getCreateDateFormattedYYYYMMDD() {
return null;
}
+ @Override
+ public String getTargetUrl() {
+ return DataFile.TARGET_URL;
+ }
} // end of class
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
index 7da06f36be4..c30bfce368a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
@@ -1,7 +1,5 @@
package edu.harvard.iq.dataverse;
-import edu.harvard.iq.dataverse.authorization.AccessRequest;
-import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import edu.harvard.iq.dataverse.dataaccess.DataAccess;
import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
import edu.harvard.iq.dataverse.dataaccess.StorageIO;
@@ -11,19 +9,15 @@
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.FileSortFieldAndOrder;
import edu.harvard.iq.dataverse.util.FileUtil;
-import edu.harvard.iq.dataverse.util.SystemConfig;
import java.io.IOException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.Iterator;
-import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import java.util.UUID;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -36,9 +30,7 @@
import javax.persistence.NoResultException;
import javax.persistence.PersistenceContext;
import javax.persistence.Query;
-import javax.persistence.StoredProcedureQuery;
import javax.persistence.TypedQuery;
-import org.apache.commons.lang3.RandomStringUtils;
/**
*
@@ -73,7 +65,7 @@ public class DataFileServiceBean implements java.io.Serializable {
// Assorted useful mime types:
// 3rd-party and/or proprietary tabular data formasts that we know
- // how to ingest:
+ // how to ingest:
private static final String MIME_TYPE_STATA = "application/x-stata";
private static final String MIME_TYPE_STATA13 = "application/x-stata-13";
@@ -155,7 +147,7 @@ public DataFile find(Object pk) {
}*/
public DataFile findByGlobalId(String globalId) {
- return (DataFile) dvObjectService.findByGlobalId(globalId, DataFile.DATAFILE_DTYPE_STRING);
+ return (DataFile) dvObjectService.findByGlobalId(globalId, DvObject.DType.DataFile);
}
public List findByCreatorId(Long creatorId) {
@@ -199,6 +191,18 @@ public List findByDatasetId(Long studyId) {
.setParameter("studyId", studyId).getResultList();
}
+ /**
+ *
+ * @param collectionId numeric id of the parent collection ("dataverse")
+ * @return list of files in the datasets that are *direct* children of the collection specified
+ * (i.e., no datafiles in sub-collections of this collection will be included)
+ */
+ public List findByDirectCollectionOwner(Long collectionId) {
+ String queryString = "select f from DataFile f, Dataset d where f.owner.id = d.id and d.owner.id = :collectionId order by f.id";
+ return em.createQuery(queryString, DataFile.class)
+ .setParameter("collectionId", collectionId).getResultList();
+ }
+
public List findAllRelatedByRootDatafileId(Long datafileId) {
/*
Get all files with the same root datafile id
@@ -357,7 +361,7 @@ public DataFile findCheapAndEasy(Long id) {
Object[] result;
try {
- result = (Object[]) em.createNativeQuery("SELECT t0.ID, t0.CREATEDATE, t0.INDEXTIME, t0.MODIFICATIONTIME, t0.PERMISSIONINDEXTIME, t0.PERMISSIONMODIFICATIONTIME, t0.PUBLICATIONDATE, t0.CREATOR_ID, t0.RELEASEUSER_ID, t0.PREVIEWIMAGEAVAILABLE, t1.CONTENTTYPE, t0.STORAGEIDENTIFIER, t1.FILESIZE, t1.INGESTSTATUS, t1.CHECKSUMVALUE, t1.RESTRICTED, t3.ID, t2.AUTHORITY, t2.IDENTIFIER, t1.CHECKSUMTYPE, t1.PREVIOUSDATAFILEID, t1.ROOTDATAFILEID, t0.AUTHORITY, T0.PROTOCOL, T0.IDENTIFIER FROM DVOBJECT t0, DATAFILE t1, DVOBJECT t2, DATASET t3 WHERE ((t0.ID = " + id + ") AND (t0.OWNER_ID = t2.ID) AND (t2.ID = t3.ID) AND (t1.ID = t0.ID))").getSingleResult();
+ result = (Object[]) em.createNativeQuery("SELECT t0.ID, t0.CREATEDATE, t0.INDEXTIME, t0.MODIFICATIONTIME, t0.PERMISSIONINDEXTIME, t0.PERMISSIONMODIFICATIONTIME, t0.PUBLICATIONDATE, t0.CREATOR_ID, t0.RELEASEUSER_ID, t0.PREVIEWIMAGEAVAILABLE, t1.CONTENTTYPE, t0.STORAGEIDENTIFIER, t1.FILESIZE, t1.INGESTSTATUS, t1.CHECKSUMVALUE, t1.RESTRICTED, t3.ID, t2.AUTHORITY, t2.IDENTIFIER, t1.CHECKSUMTYPE, t1.PREVIOUSDATAFILEID, t1.ROOTDATAFILEID, t0.AUTHORITY, T0.PROTOCOL, T0.IDENTIFIER, t2.PROTOCOL FROM DVOBJECT t0, DATAFILE t1, DVOBJECT t2, DATASET t3 WHERE ((t0.ID = " + id + ") AND (t0.OWNER_ID = t2.ID) AND (t2.ID = t3.ID) AND (t1.ID = t0.ID))").getSingleResult();
} catch (Exception ex) {
return null;
}
@@ -501,7 +505,9 @@ public DataFile findCheapAndEasy(Long id) {
if (identifier != null) {
dataFile.setIdentifier(identifier);
}
-
+
+ owner.setProtocol((String) result[25]);
+
dataFile.setOwner(owner);
// If content type indicates it's tabular data, spend 2 extra queries
@@ -559,365 +565,6 @@ public DataFile findCheapAndEasy(Long id) {
return dataFile;
}
- /*
- * This is an experimental method for populating the versions of
- * the datafile with the filemetadatas, optimized for making as few db
- * queries as possible.
- * It should only be used to retrieve filemetadata for the DatasetPage!
- * It is not guaranteed to adequately perform anywhere else.
- */
-
- public void findFileMetadataOptimizedExperimental(Dataset owner, DatasetVersion version, AuthenticatedUser au) {
- List dataFiles = new ArrayList<>();
- List dataTables = new ArrayList<>();
- //List retList = new ArrayList<>();
-
- // TODO:
- // replace these maps with simple lists and run binary search on them. -- 4.2.1
-
- Map userMap = new HashMap<>();
- Map filesMap = new HashMap<>();
- Map datatableMap = new HashMap<>();
- Map categoryMap = new HashMap<>();
- Map> fileTagMap = new HashMap<>();
- List accessRequestFileIds = new ArrayList();
-
- List fileTagLabels = DataFileTag.listTags();
-
-
- int i = 0;
- //Cache responses
- Map embargoMap = new HashMap();
-
- List