Skip to content

Commit

Permalink
Move bitstream retrieval process to Item class
Browse files Browse the repository at this point in the history
* Add separate class methods for getting bitstreams, ids, and metadata
* Add class method to consolidate steps required for creating a DSpace item
* Update test suite
  • Loading branch information
jonavellecuerdo committed Mar 28, 2024
1 parent cce9099 commit 8f61a2a
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 30 deletions.
7 changes: 2 additions & 5 deletions dsaps/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,14 +153,11 @@ def additems(
"Option '--metadata-csv' must be used or " "run 'reconcile' before 'additems'"
)

bitstream_file_paths = helpers.get_bitstreams_from_csv(metadata_csv)
dspace_collection = dspace.Collection(uuid=collection_uuid)

with open(metadata_csv, "r") as csvfile:
metadata = csv.DictReader(csvfile)
dspace_collection = dspace_collection.create_metadata_for_items_from_csv(
metadata, mapping
)
dspace_collection = dspace_collection.add_items(metadata, mapping)

for item in dspace_collection.items:
logger.info(f"Posting item: {item}")
Expand All @@ -170,7 +167,7 @@ def additems(
item.uuid = item_uuid
item.handle = item_handle
logger.info(f"Item posted: {item_uuid}")
for file_path in bitstream_file_paths.get(item.item_identifier):
for file_path in item.bitstreams:
file_name = file_path.split("/")[-1]
bitstream = dspace.Bitstream(name=file_name, file_path=file_path)
logger.info(f"Posting bitstream: {bitstream}")
Expand Down
52 changes: 36 additions & 16 deletions dsaps/dspace.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import operator
from __future__ import annotations

import ast
import attr
import operator
import requests
import smart_open
import structlog

import smart_open

from attrs import field, define


logger = structlog.get_logger()
op = operator.attrgetter("name")

Expand Down Expand Up @@ -200,19 +204,39 @@ class Item(Object):
source_system_identifier = field(default=None)

@classmethod
def metadata_from_csv_row(cls, record, mapping):
def create(cls, record, mapping) -> Item:
return cls(
metadata=cls.get_metadata(record, mapping),
bitstreams=cls.get_bitstreams(record),
**cls.get_ids(record, mapping),
)

@classmethod
def get_bitstreams(cls, record) -> list:
if bitstreams := record.get("bitstreams"):
return ast.literal_eval(bitstreams)

@classmethod
def get_ids(cls, record, mapping) -> dict:
ids = {}
if item_id_mapping := mapping.get("item_identifier"):
ids["item_identifier"] = record.get(item_id_mapping["csv_field_name"])
if source_system_id_mapping := mapping.get("source_system_identifier"):
ids["source_system_identifier"] = record.get(
source_system_id_mapping["csv_field_name"]
)
return ids

@classmethod
def get_metadata(cls, record, mapping) -> list:
"""Create metadata for an item based on a CSV row and a JSON mapping field map."""
metadata = []
for field_name, field_mapping in mapping.items():
if field_name in ["item_identifier", "source_system_identifier"]:
continue
field_value = record[field_mapping["csv_field_name"]]

if field_value:
if field_name == "item_identifier":
item_identifier = field_value
continue # file_identifier is not included in DSpace metadata
if field_name == "source_system_identifier":
# source_system_identifier = field
continue # source_system_identifier is not included in DSpace
delimiter = field_mapping["delimiter"]
language = field_mapping["language"]
if delimiter:
Expand All @@ -234,22 +258,18 @@ def metadata_from_csv_row(cls, record, mapping):
language=language,
)
)
return cls(
metadata=metadata,
item_identifier=item_identifier,
# source_system_identifier=source_system_identifier,
)
return metadata


@define
class Collection(Object):
items = field(factory=list)

@classmethod
def create_metadata_for_items_from_csv(cls, csv_reader, field_map):
def add_items(cls, csv_reader, field_map) -> Collection:
"""Create metadata for the collection's items based on a CSV and a JSON mapping
field map."""
items = [Item.metadata_from_csv_row(row, field_map) for row in csv_reader]
items = [Item.create(row, field_map) for row in csv_reader]
return cls(items=items)


Expand Down
7 changes: 7 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ def source_metadata_csv():
yield reader


@pytest.fixture
def source_metadata_csv_with_bitstreams():
with open("tests/fixtures/updated-source_metadata.csv") as file:
reader = csv.DictReader(file)
yield reader


@pytest.fixture()
def dspace_client():
dspace_client = dspace.DSpaceClient("mock://example.com/")
Expand Down
44 changes: 35 additions & 9 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,19 +78,45 @@ def test_build_uuid_list(dspace_client):
assert "1234" in child_list


def test_collection_create_metadata_for_items_from_csv(
source_metadata_csv, source_config
):
collection = Collection.create_metadata_for_items_from_csv(
source_metadata_csv, source_config["mapping"]
)
def test_collection_add_items(source_metadata_csv, source_config):
collection = Collection.add_items(source_metadata_csv, source_config["mapping"])
assert len(collection.items) == 5


def test_item_metadata_from_csv_row(source_metadata_csv, source_config):
def test_item_create(source_metadata_csv_with_bitstreams, source_config):
record = next(source_metadata_csv_with_bitstreams)
assert attr.asdict(Item.create(record, source_config["mapping"])) == {
"uuid": None,
"name": None,
"handle": None,
"link": None,
"type": None,
"metadata": [
{"key": "dc.title", "value": "Title 1", "language": "en_US"},
{"key": "dc.contributor.author", "value": "May Smith", "language": None},
],
"bitstreams": ["s3://mocked-bucket/one-to-one/aaaa_001_01.pdf"],
"item_identifier": "001",
"source_system_identifier": None,
}


def test_item_get_ids(source_metadata_csv, source_config):
record = next(source_metadata_csv)
assert Item.get_ids(record, source_config["mapping"]) == {"item_identifier": "001"}


def test_item_get_bitstreams(source_metadata_csv_with_bitstreams, source_config):
record = next(source_metadata_csv_with_bitstreams)
assert Item.get_bitstreams(record) == [
"s3://mocked-bucket/one-to-one/aaaa_001_01.pdf"
]


def test_item_get_metadata(source_metadata_csv, source_config):
record = next(source_metadata_csv)
item = Item.metadata_from_csv_row(record, source_config["mapping"])
assert attr.asdict(item)["metadata"] == [
metadata = Item.get_metadata(record, source_config["mapping"])
assert [attr.asdict(m) for m in metadata] == [
{"key": "dc.title", "value": "Title 1", "language": "en_US"},
{"key": "dc.contributor.author", "value": "May Smith", "language": None},
]

0 comments on commit 8f61a2a

Please sign in to comment.