Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

readme and spring cleaning #11

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions add_mivideo_ids/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Add MiVideo IDs

Updates the items in a [Simple Archive Format](https://wiki.duraspace.org/display/DSDOC5x/Importing+and+Exporting+Items+via+Simple+Archive+Format) directory so that they properly display embedded videos from MiVideo, e.g., in this [Thomas A. Roach interview](https://deepblue.lib.umich.edu/handle/2027.42/134316).

The script will prompt the user for a Deposit ID, then look for the appropriate directory in the Bentley's and MLibrary's shared DeepBlue space. It will then prompt the user for the path (dragging and dropping onto or using the "Copy as path" option and pasting into the Command Prompt should work) to a CSV generated from a deposit to MiVideo.

The script expects the CSV to be formatted like so:

| IDENTIFIER.OTHER | MiVideo_IDs |
|------------------|-------------|
| 2014164_0001_0001 | 1_byeethjd |
| 2014164_0001_0002 | 1_4rl3zhbc |
| 2014164_0001_0003 | 1_a0niphtz; 1_mq3g1qrs |

Note that MiVideo ID entries must be separated by a semicolon space (; ).

The script then:
* checks to make sure items cannot be downloaded in DeepBlue;
* checks to make sure right are correct in DeepBlue metadata;
* adds videostreams with MiVideo and Player IDs to items in DeepBlue; and
* checks to make sure metadata is visible in DeepBlue.

**TO-DOs**
* *We'll eventually move away from the Deposit ID convention and will need to update this script as appropriate.*
202 changes: 202 additions & 0 deletions archivematica_aip_to_dspace_api/archivematica_aip_to_dspace_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
import os
import subprocess
from lxml import etree
import shutil
import requests
from lxml.builder import E
from slacker import Slacker

from credentials import *

# `\media\sf_DeepBlue` is auto-mounted
deep_blue_saf_staging = os.path.join(
os.path.sep,
"media", "sf_DeepBlue", "deepblue_saf_staging")
deep_blue_saf_temp = os.path.join(
os.path.sep,
"media", "sf_DeepBlue", "deepblue_saf_temp")
deep_blue_saf_transfer = os.path.join(
os.path.sep,
"media", "sf_DeepBlue", "deepblue_saf_transfer")

aip_dirs = []

for root, _, files in os.walk(deep_blue_saf_staging):
for name in files:

aip = os.path.join(root, name)

print "unarchiving " + aip

# command = ["unar",
# "-force-overwrite",
# "-output-directory", deep_blue_saf_temp,
# aip]
# subprocess.call(command)

aip_dir = os.path.join(deep_blue_saf_temp, os.path.splitext(name)[0])
aip_dirs.append(aip_dir)

for aip_dir in aip_dirs:

print "\nprocessing " + aip_dir

print " * getting metadata"

mets_dir = os.path.join(aip_dir, "data", [name for name in os.listdir(os.path.join(aip_dir, "data")) if name.startswith("METS")][0])

tree = etree.parse(mets_dir)
namespaces = {
"premis": "info:lc/xmlns/premis-v2",
"dc": "http://purl.org/dc/elements/1.1/"
}

user = [agent.xpath("./premis:agentName", namespaces=namespaces)[0].text for agent in tree.xpath("//premis:agent", namespaces=namespaces) if agent.xpath("./premis:agentType", namespaces=namespaces)[0].text == "Archivematica user"][0]
username = user.split(", ")[0].split('"')[1].split('"')[0]
first_name = user.split(", ")[1].split('"')[1].split('"')[0]
last_name = user.split(", ")[2].split('"')[1].split('"')[0]

print "\nUsername | First Name | Last Name"
print "--- | --- | ---"
print " | ".join([username, first_name, last_name])

accession_no = [event.xpath("./premis:eventOutcomeInformation/premis:eventOutcomeDetail/premis:eventOutcomeDetailNote", namespaces=namespaces)[0].text.split("#")[1] for event in tree.xpath("//premis:event", namespaces=namespaces) if event.xpath("./premis:eventType", namespaces=namespaces)[0].text == "registration"][0]

dcterms_title = tree.xpath(".//dc:title", namespaces=namespaces)[0].text
dcterms_description_abstract = ""
dcterms_contributor_author = tree.xpath(".//dc:creator", namespaces=namespaces)[0].text
dcterms_date_issued = tree.xpath(".//dc:date", namespaces=namespaces)[0].text
dcterms_rights_copyright = tree.xpath(".//dc:rights", namespaces=namespaces)[0].text
dcterms_relation_ispartofseries = ""

print "\ndc.title | dc.description.abstract | dc.contributor.author | dc.date.issued | dc.rights.copyright | dc.relation.ispartofseries"
print "--- | --- | --- | --- | --- | ---"
print " | ".join([dcterms_title, dcterms_description_abstract, dcterms_contributor_author, dcterms_date_issued, dcterms_rights_copyright, dcterms_relation_ispartofseries])

print "\n * getting rights statements"

act = tree.xpath(".//premis:act", namespaces=namespaces)[0].text
restriction = tree.xpath(".//premis:restriction", namespaces=namespaces)[0].text
start_date = tree.xpath(".//premis:startDate", namespaces=namespaces)[0].text
end_date = tree.xpath(".//premis:endDate", namespaces=namespaces)[0].text
rights_granted_note = tree.xpath(".//premis:rightsGrantedNote", namespaces=namespaces)[0].text

print "\nAct | Restriction | Start | End | Grant/restriction Note"
print "--- | --- | --- | --- | ---"
print " | ".join([act, restriction, start_date, end_date, rights_granted_note])

print "\n * splitting package into objects and metadata zips"

# objects_dir = os.path.join(aip_dir, "data", "objects", [name for name in os.listdir(os.path.join(aip_dir, "data", "objects")) if name.startswith("digital_object_component")][0])
# objects_zip = os.path.join(aip_dir + "-temp", "objects.7z")
# command = [
# "7z", "a", # add
# "-bd", # disable percentage indicator
# "-t7z", # type of archive
# "-y", # assume yes on all queries
# "-m0=bzip2", # compression method
# "-mtc=on", "-mtm=on", "-mta=on", # keep timestamps (create, mod, access)
# "-mmt=on", # multithreaded
# objects_zip, # destination
# objects_dir # source
# ]
# subprocess.call(command)

# shutil.rmtree(objects_dir)

# metadata_zip = os.path.join(aip_dir + "-temp", "metadata.7z")
# command = [
# "7z", "a", # add
# "-bd", # disable percentage indicator
# "-t7z", # type of archive
# "-y", # assume yes on all queries
# "-m0=bzip2", # compression method
# "-mtc=on", "-mtm=on", "-mta=on", # keep timestamps (create, mod, access)
# "-mmt=on", # multithreaded
# "-x!" + objects_zip, # exclude objects.7z
# metadata_zip, # destination
# aip_dir # source
# ]
# subprocess.call(command)

# shutil.rmtree(aip_dir)

# os.rename(aip_dir + "-temp", aip_dir)

print " * depositing to deep blue"

dspace_url = "https://dev.deepblue.lib.umich.edu"

url = dspace_url + "/RESTapi/login"
body = {"email": dspace_username, "password": dspace_password}
response = requests.post(url, json=body)

dspace_token = response.text

request = E.request(
E.collectionId("1047"),
E.metadata(
E.field(
E.name("dc.title"),
E.value(dcterms_title)
),
E.field(
E.name("dc.description.abstract"),
E.value(dcterms_description_abstract)
),
E.field(
E.name("dc.contributor.author"),
E.value(dcterms_contributor_author)
),
E.field(
E.name("dc.date.issued"),
E.value(dcterms_date_issued)
),
E.field(
E.name("dc.rights.copyright"),
E.value(dcterms_rights_copyright)
),
E.field(
E.name("dc.relation.ispartofseries"),
E.value(dcterms_relation_ispartofseries)
),
),
E.bundles(
E.bundle(
E.name("ORIGINAL"),
E.bitstreams(
E.bitstream(
E.name("objects.7z"),
E.mimeType("application/zip"),
E.description("Archival material.")
),
E.bitstream(
E.name("metadata.7z"),
E.mimeType("application/zip"),
E.description("Administrative information. Access restricted to Bentley staff")
)
)
)
)
)

package = etree.tostring(request, pretty_print=True, xml_declaration=True, encoding="utf-8")
with open(os.path.join(aip_dir, "package.xml"), mode="w") as f:
f.write(package)

print " * notifying processor"

slack = Slacker(slack_token)
slack.chat.post_message("#digital-processing",
"@" + username + ": check out this item in Deep Blue dev!",
username="Deep Blue Bot",
icon_url="http://www.infodocket.com/wp-content/uploads/2016/09/2016-09-20_10-00-21.jpeg",
attachments=[
{
"author_name": dcterms_contributor_author,
"title": dcterms_title,
"title_link": "",
"text": dcterms_description_abstract
}
]
)
18 changes: 4 additions & 14 deletions archivematica_aip_to_dspace_saf/archivematica_aip_to_dspace_saf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,17 @@
import datetime
import pyttsx

mounting = ["sudo", "mount",
"-t", "cifs",
"-o", "username=eckardm,domain=umroot,rw,uid=eckardm,gid=eckardm",
"//bhl-digitalarchive.m.storage.umich.edu/bhl-digitalarchive",
"/media/Digital_Archive"]
if os.path.isdir(os.path.join(os.path.sep,
"media", "Digital_Archive")) is False:
os.system(" ".join(mounting))

# sf_DeepBlue is auto-mounted
deep_blue_saf_staging = os.path.join(
os.path.sep,
"media", "Digital_Archive",
"MLibrary Drop", "DeepBlue", "deepblue_saf_staging")
"media", "sf_DeepBlue", "deepblue_saf_staging")
deep_blue_saf_temp = os.path.join(
os.path.sep,
"media", "Digital_Archive",
"media", "sf_DeepBlue",
"MLibrary Drop", "DeepBlue", "deepblue_saf_temp")
deep_blue_saf_transfer = os.path.join(
os.path.sep,
"media", "Digital_Archive",
"MLibrary Drop", "DeepBlue", "deepblue_saf_transfer")
"media", "sf_DeepBlue", "deepblue_saf_transfer")

for root, _, files in os.walk(deep_blue_saf_staging):
for name in files:
Expand Down
1 change: 1 addition & 0 deletions dspace_api/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
credentials.py
58 changes: 58 additions & 0 deletions dspace_api/dspace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import requests
import json
from pprint import pprint

from credentials import username, password

dspace_url = "https://dev.deepblue.lib.umich.edu"

collection = "TEMP-BOGUS/304433"

handle_1 = "TEMP-BOGUS/304434"
handle_2 = "TEMP-BOGUS/304435"
handle_3 = "TEMP-BOGUS/304436"

from django.db import models

# Log in to get DSpace REST API token
url = dspace_url + "/RESTapi/login"
body = {"email": username, "password": password}
response = requests.post(url, json=body)
token = response.text

# Fetch bitstream information for item
url = dspace_url + "/RESTapi/handle/" + handle_3
headers = {
"Accept": "application/json",
"rest-dspace-token": token
}
params = {"expand": "bitstreams"}
response = requests.get(url, headers=headers, params=params)

for bitstream in response.json()["bitstreams"]:

# Add bitstream description to objects when depositing to DSpace
if bitstream["name"] == "objects.7z":
url = dspace_url + bitstream["link"]
body = bitstream
body["description"] = "Archival materials."
response = requests.put(url, headers=headers, json=body)
print "objects.7z", response.status_code

# Update bitstream policies
if bitstream["name"] == "metadata.7z":
url = dspace_url + bitstream["link"]
body = bitstream
body["policies"] = [{"action":"READ", "groupId":"1335", "rpType":"TYPE_CUSTOM"}]
# Add bitstream description to metadata when depositing to DSpace
body["description"] = "Administrative information."
response = requests.put(url, headers=headers, json=body)
print "metadata.7z", response.status_code

# # Delete anonymous policy
# url = dspace_url + bitstream["link"] + "/policy"
# response = requests.get(url, headers=headers)
# id = [policy["id"] for policy in response.json() if policy["groupId"] == 0][0]
# url = dspace_url + bitstream["link"] + "/policy/" + str(id)
# response = requests.delete(url, headers=headers, json=body)
# print "anonymous policy", response.status_code
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def make_dublin_core(directory, row, item):

dc_relation_ispartof = " - ".join(row[1].value.split(" - ")[:-1])
if dc_relation_ispartof:
etree.SubElement(dublin_core, "dcvalue", element="relation", qualifier="ispartof").text = dc_relation_ispartof
etree.SubElement(dublin_core, "dcvalue", element="relation", qualifier="ispartofseries").text = dc_relation_ispartof

dc_description_abstract = row[2].value
if dc_description_abstract:
Expand Down