Skip to content

Commit

Permalink
reconcile
Browse files Browse the repository at this point in the history
  • Loading branch information
jonavellecuerdo committed Jan 23, 2025
1 parent 4862b8a commit bf7bdbe
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 0 deletions.
24 changes: 24 additions & 0 deletions dsc/workflows/opencourseware.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import zipfile
from collections import defaultdict
from collections.abc import Iterator
from typing import Any

Expand All @@ -19,6 +20,29 @@ class OpenCourseWare(Workflow):
workflow_name: str = "opencourseware"
metadata_mapping_path: str = "dsc/workflows/metadata_mapping/opencourseware.json"

def _build_bitstream_dict(self) -> dict:
"""Build dictionary of bitstreams.
This method will look for zip files within the designated 'batch' folder
of the S3 bucket (i.e., self.batch_path). In the case of OpenCourseWare
deposits, this method expects:
* a single (1) zipped file per item identifier
* filename of zipped file must correspond to item identifier
(i.e., '<item_identifier>'.zip)
"""
s3_client = S3Client()
bitstreams = list(
s3_client.files_iter(
bucket=self.s3_bucket, prefix=self.batch_path, file_type=".zip"
)
)
bitstream_dict: dict[str, list[str]] = defaultdict(list)
for bitstream in bitstreams:
file_name = bitstream.split("/")[-1]
item_identifier = file_name.replace(".zip", "")
bitstream_dict[item_identifier].append(bitstream)
return bitstream_dict

def item_metadata_iter(self) -> Iterator[dict[str, Any]]:
"""Yield source metadata from metadata JSON file in the zip file.
Expand Down
24 changes: 24 additions & 0 deletions tests/test_workflow_opencourseware.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,30 @@
]


def test_workflow_ocw_build_bitstream_dict(
mocked_s3, opencourseware_workflow_instance, s3_client
):
s3_client.put_file(
file_content="",
bucket="dsc",
key="opencourseware/batch-aaa/123.zip",
)
s3_client.put_file(
file_content="",
bucket="dsc",
key="opencourseware/batch-aaa/124.zip",
)
s3_client.put_file(
file_content="",
bucket="dsc",
key="opencourseware/batch-aaa/ignore_me.txt",
)
assert opencourseware_workflow_instance._build_bitstream_dict() == {
"123": ["opencourseware/batch-aaa/123.zip"],
"124": ["opencourseware/batch-aaa/124.zip"],
}


@patch("zipfile.ZipFile")
@patch("smart_open.open")
def test_workflow_ocw_extract_metadata_from_zip_file_success(
Expand Down

0 comments on commit bf7bdbe

Please sign in to comment.