Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create SCCS workflow #77

Merged
merged 3 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dsc/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def main(
workflow = workflow_class(
collection_handle=collection_handle,
batch_id=batch_id,
email_recipients=tuple(email_recipients.split(",")),
email_recipients=email_recipients.split(","),
s3_bucket=s3_bucket,
output_queue=output_queue,
)
Expand Down
5 changes: 3 additions & 2 deletions dsc/workflows/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from dsc.workflows.base import Workflow
from dsc.workflows.base.simple_csv import SimpleCSV
from dsc.workflows.demo import DemoWorkflow
from dsc.workflows.demo import Demo
from dsc.workflows.sccs import SCCS

__all__ = ["DemoWorkflow", "SimpleCSV", "Workflow"]
__all__ = ["SCCS", "Demo", "SimpleCSV", "Workflow"]
21 changes: 14 additions & 7 deletions dsc/workflows/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __init__(
self,
collection_handle: str,
batch_id: str,
email_recipients: tuple[str, ...],
email_recipients: list[str],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the linting is passing then I guess this is all good!

s3_bucket: str | None = None,
output_queue: str | None = None,
) -> None:
Expand Down Expand Up @@ -259,12 +259,19 @@ def create_dspace_metadata(self, item_metadata: dict[str, Any]) -> dict[str, Any
A metadata mapping is a dict with the format seen below:

{
"dc.contributor": {
"source_field_name": "contributor",
"language": None,
"delimiter": "|",
"dc.contributor": {
"source_field_name": "contributor",
"language": "<language>",
"delimiter": "<delimiting character>",
"required": true | false
}
}

When setting up the metadata mapping JSON file, "language" and "delimiter"
can be omitted from the file if not applicable. Required fields ("item_identifier"
and "title") must be set as required (true); if "required" is not listed as a
a config, the field defaults as not required (false).

MUST NOT be overridden by workflow subclasses.

Args:
Expand All @@ -281,8 +288,8 @@ def create_dspace_metadata(self, item_metadata: dict[str, Any]) -> dict[str, Any
f"{field_mapping["source_field_name"]}'"
)
if field_value:
delimiter = field_mapping["delimiter"]
language = field_mapping["language"]
delimiter = field_mapping.get("delimiter")
language = field_mapping.get("language")
if delimiter:
metadata_entries.extend(
[
Expand Down
4 changes: 2 additions & 2 deletions dsc/workflows/demo.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from dsc.workflows.base.simple_csv import SimpleCSV


class DemoWorkflow(SimpleCSV):
class Demo(SimpleCSV):

workflow_name: str = "demo"
submission_system: str = "DSpace@MIT"
metadata_mapping_path: str = "tests/fixtures/demo_metadata_mapping.json"
metadata_mapping_path: str = "dsc/workflows/metadata_mapping/demo.json"
Original file line number Diff line number Diff line change
@@ -1,62 +1,47 @@
{
"item_identifier": {
"source_field_name": "item_identifier",
"language": null,
"delimiter": ""
"required": true
},
"dc.title": {
"source_field_name": "dc.title",
"language": "en_US",
"required": true
},
"dc.publisher": {
"source_field_name": "dc.publisher",
"language": "en_US",
"delimiter": ""
"language": "en_US"
},
"dc.eprint.version": {
"source_field_name": "dc.eprint.version",
"language": "en_US",
"delimiter": ""
"language": "en_US"
},
"dc.type": {
"source_field_name": "dc.type",
"language": "en_US",
"delimiter": ""
"language": "en_US"
},
"dc.source": {
"source_field_name": "dc.source",
"language": "en_US",
"delimiter": ""
"language": "en_US"
},
"dc.contributor.author": {
"source_field_name": "dc.contributor.author",
"language": "en_US",
"delimiter": "|"
},
"dc.relation.isversionof": {
"source_field_name": "dc.relation.isversionof",
"language": "",
"delimiter": ""
},
"dc.title": {
"source_field_name": "dc.title",
"language": "en_US",
"delimiter": ""
"source_field_name": "dc.relation.isversionof"
},
"dc.relation.journal": {
"source_field_name": "dc.relation.journal",
"language": "",
"delimiter": ""
"source_field_name": "dc.relation.journal"
},
"dc.identifier.issn": {
"source_field_name": "dc.identifier.issn",
"language": "",
"delimiter": ""
"source_field_name": "dc.identifier.issn"
},
"dc.date.issued": {
"source_field_name": "dc.date.issued",
"language": "",
"delimiter": ""
"source_field_name": "dc.date.issued"
},
"dc.rights.uri": {
"source_field_name": "dc.rights.uri",
"language": "",
"delimiter": ""
"source_field_name": "dc.rights.uri"
}
}
59 changes: 59 additions & 0 deletions dsc/workflows/metadata_mapping/sccs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
"item_identifier": {
"source_field_name": "item_identifier",
"required": true
},
"dc.title": {
"source_field_name": "dc.title",
"language": "en_US",
"required": true
},
"dc.publisher": {
"source_field_name": "dc.publisher",
"language": "en_US"
},
"dc.identifier.mitlicense": {
"source_field_name": "dc.identifier.mitlicense",
"language": "en_US"
},
"dc.eprint.version": {
"source_field_name": "dc.eprint.version",
"language": "en_US"
},
"dc.type": {
"source_field_name": "dc.type",
"language": "en_US"
},
"dc.source": {
"source_field_name": "dc.source",
"language": "en_US"
},
"dc.contributor.author": {
"source_field_name": "dc.contributor.author",
"language": "en_US",
"delimiter": "|"
},
"dc.relation.isversionof": {
"source_field_name": "dc.relation.isversionof"
},
"dc.relation.journal": {
"source_field_name": "dc.relation.journal"
},
"dc.identifier.issn": {
"source_field_name": "dc.identifier.issn"
},
"dc.date.issued": {
"source_field_name": "dc.date.issued"
},
"dc.rights": {
"source_field_name": "dc.rights",
"language": "en_US"
},
"dc.rights.uri": {
"source_field_name": "dc.rights.uri"
},
"dc.description.sponsorship": {
"source_field_name": "dc.description.sponsorship",
"language": "en_US"
}
}
13 changes: 13 additions & 0 deletions dsc/workflows/sccs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from dsc.workflows import SimpleCSV


class SCCS(SimpleCSV):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When looking at a class this minimal, a couple of thoughts come to mind:

  1. Nice work on class design and inheritance! Clearly much is shared between workflows.
  2. What functionality / opinionation, if anything, could this workflow-specific class still provide?

For # 2, the first thing that comes to mind is validation or hooks of some kind. I don't think it's necessary now, but wondering if there might be room in the future for some kind of registration of validation hooks, that could be used to validate items created? reconciliation logic? maybe others? In theory, these could be called -- if defined -- during the normal flow of the base Workflows.

Sketch:

class MyWorkflow(SimpleCSV):
    
    workflow_name = "Mine!"
    metadata_mapping_path = "path/to/mapping.json"
    
    # defined as class attributes, lists pointing to methods
    reconciliation_hooks = [] # I don't have any reconciliation hooks...
    item_creation_hooks = [
        "confirm_title_elements" # reference to method defined on class
    ]
    
    @classmethod
    def confirm_title_elements(cls):
        # I will check things yielded by Workflow.item_submissions_iter()...
        pass

With a little bit of wiring, this might provide a way for a Workflow class to have some additional validation (or even functional) logic applied to the base flows.

This pattern could also probably be achieved through decorators, which could be even cleaner.

Sketch:

class MyWorkflow(SimpleCSV):
    
    workflow_name = "Mine!"
    metadata_mapping_path = "path/to/mapping.json"
    
    @item_creation_hook
    def confirm_title_elements(cls):
        # I will check things yielded by Workflow.item_submissions_iter()...
        pass
   
    # maybe others like @reconciliation_hook

"""Workflow for SCCS-requested deposits.

The deposits managed by this workflow are requested by the Scholarly
Communication and Collection Strategy (SCCS) department
and are for submission to DSpace@MIT.
"""

workflow_name: str = "sccs"
metadata_mapping_path: str = "dsc/workflows/metadata_mapping/sccs.json"
6 changes: 2 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ class TestWorkflow(Workflow):

workflow_name: str = "test"
submission_system: str = "Test@MIT"
email_recipients: tuple[str] = ("test@test.test",)
metadata_mapping_path: str = "tests/fixtures/test_metadata_mapping.json"

def item_metadata_iter(self):
Expand Down Expand Up @@ -56,10 +55,7 @@ class TestSimpleCSV(SimpleCSV):

workflow_name = "simple_csv"
submission_system: str = "Test@MIT"
email_recipients: tuple[str] = ("test@test.test",)
metadata_mapping_path: str = "tests/fixtures/test_metadata_mapping.json"
s3_bucket: str = "dsc"
output_queue: str = "mock-output_queue"


@pytest.fixture(autouse=True)
Expand All @@ -79,6 +75,7 @@ def base_workflow_instance(item_metadata, metadata_mapping, mocked_s3):
collection_handle="123.4/5678",
batch_id="batch-aaa",
email_recipients=["test@test.test"],
output_queue="mock-output_queue",
)


Expand All @@ -88,6 +85,7 @@ def simple_csv_workflow_instance(metadata_mapping):
collection_handle="123.4/5678",
batch_id="batch-aaa",
email_recipients=["test@test.test"],
output_queue="mock-output_queue",
)


Expand Down
4 changes: 0 additions & 4 deletions tests/fixtures/test_metadata_mapping.json
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
{
"item_identifier": {
"source_field_name": "item_identifier",
"language": null,
"delimiter": "",
"required": true
},
"dc.title": {
"source_field_name": "title",
"language": "en_US",
"delimiter": "",
"required": true
},
"dc.contributor": {
"source_field_name": "contributor",
"language": null,
"delimiter": "|"
}
}
Loading