Skip to content

Commit

Permalink
Gdt 82 transformer class refactor (#106)
Browse files Browse the repository at this point in the history
* Rename Transformer > XmlTransformer

* Rename Transformer > XmlTransformer to prepare for new base class
* Rename xml arg to source_record for XmlTransformer methods

* Create Transformer base class

Why these changes are being introduced:
* A format-agnostic Transformer base class is needed for deriving both XmlTransformer and JsonTransformer format classes

How this addresses that need:
* Create a Transformer base class
* Add JSON type for validation
* Refactor XmlTransformer to derive from Transformer class
* Rename arg xml > source_record and update docstrings

Side effects of this change:
* None

Relevant ticket(s):
* https://mitlibraries.atlassian.net/browse/GDT-82

* Rename input_records > source_records across repo

* Add parse_source_records method

Why these changes are being introduced:
* parse_xml_records is a function in the helpers module that will be replaced by a method in the Transformer base class

How this addresses that need:
* Add parse_source_records method to Transformer base class as an abstractmethod
* Add parse_source_records to XmlTransformer class and corresponding unit test

Side effects of this change:
* None

Relevant ticket(s):
* https://mitlibraries.atlassian.net/browse/GDT-82

* Remove parse_xml_records function

* Remove parse_xml_records function from helpers module and corresponding unit test
* Replace all calls of that function with the appropriate class method

* Update dependencies

* Shift get_transformer to Transformer method

* Shift get_transformer from config module to Transformer class staticmethod along with corresponding unit tests
* Update CLI to call new method

* Updates based on discussion in PR #106

* Remove instance from transformer_instance variable name
* Update write_timdex_records_to_json type hinting
* Update get_transformer type hinting
* Update docstrings

* Add load and write_timdex_records_to_json methods

* Add load and write_timdex_records_to_json methods to Transformer class
* Remove write_timdex_records_to_json function from helpers module
* Update CLI command with new methods
* Update variable names and docstrings for consistency
  • Loading branch information
ehanson8 authored Nov 27, 2023
1 parent 8f2d068 commit ee4da86
Show file tree
Hide file tree
Showing 24 changed files with 764 additions and 536 deletions.
513 changes: 276 additions & 237 deletions Pipfile.lock

Large diffs are not rendered by default.

12 changes: 7 additions & 5 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

import transmogrifier.models as timdex
from transmogrifier.config import SOURCES, load_external_config
from transmogrifier.helpers import parse_xml_records
from transmogrifier.sources.datacite import Datacite
from transmogrifier.sources.transformer import XmlTransformer


@pytest.fixture(autouse=True)
Expand Down Expand Up @@ -48,15 +48,17 @@ def runner():

@pytest.fixture()
def datacite_records():
return parse_xml_records("tests/fixtures/datacite/datacite_records.xml")
return XmlTransformer.parse_source_file(
"tests/fixtures/datacite/datacite_records.xml"
)


@pytest.fixture()
def datacite_record_all_fields():
input_records = parse_xml_records(
source_records = XmlTransformer.parse_source_file(
"tests/fixtures/datacite/datacite_record_all_fields.xml"
)
return Datacite("cool-repo", input_records)
return Datacite("cool-repo", source_records)


@pytest.fixture()
Expand All @@ -71,7 +73,7 @@ def marc_content_type_crosswalk():

@pytest.fixture()
def oai_pmh_records():
return parse_xml_records("tests/fixtures/oai_pmh_records.xml")
return XmlTransformer.parse_source_file("tests/fixtures/oai_pmh_records.xml")


@pytest.fixture()
Expand Down
21 changes: 0 additions & 21 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@
from transmogrifier.config import (
configure_logger,
configure_sentry,
get_transformer,
load_external_config,
)
from transmogrifier.sources.datacite import Datacite


def test_configure_logger_not_verbose():
Expand Down Expand Up @@ -44,25 +42,6 @@ def test_configure_sentry_env_variable_is_dsn(monkeypatch):
assert result == "Sentry DSN found, exceptions will be sent to Sentry with env=test"


def test_get_transformer_returns_correct_class_name():
assert get_transformer("jpal") == Datacite


def test_get_transformer_source_missing_class_name_raises_error():
with pytest.raises(KeyError):
get_transformer("cool-repo")


def test_get_transformer_source_wrong_class_name_raises_error(bad_config):
with pytest.raises(AttributeError):
get_transformer("bad-class-name")


def test_get_transformer_source_wrong_module_path_raises_error(bad_config):
with pytest.raises(ImportError):
get_transformer("bad-module-path")


def test_load_external_config_invalid_file_type_raises_error():
with pytest.raises(ValueError):
load_external_config("config/loc-countries.xml", "zxr")
Expand Down
17 changes: 8 additions & 9 deletions tests/test_datacite.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from transmogrifier.helpers import parse_xml_records
from transmogrifier.models import (
AlternateTitle,
Contributor,
Expand Down Expand Up @@ -170,10 +169,10 @@ def test_datacite_transform_with_all_fields_transforms_correctly(


def test_datacite_transform_missing_required_datacite_fields_logs_warning(caplog):
input_records = parse_xml_records(
source_records = Datacite.parse_source_file(
"tests/fixtures/datacite/datacite_record_missing_datacite_required_fields.xml"
)
output_records = Datacite("cool-repo", input_records)
output_records = Datacite("cool-repo", source_records)
next(output_records)

assert (
Expand All @@ -195,10 +194,10 @@ def test_datacite_transform_missing_required_datacite_fields_logs_warning(caplog


def test_datacite_transform_with_optional_fields_blank_transforms_correctly():
input_records = parse_xml_records(
source_records = Datacite.parse_source_file(
"tests/fixtures/datacite/datacite_record_optional_fields_blank.xml"
)
output_records = Datacite("cool-repo", input_records)
output_records = Datacite("cool-repo", source_records)
assert next(output_records) == TimdexRecord(
citation=("Title not provided. https://example.com/doi:10.7910/DVN/19PPE7"),
source="A Cool Repository",
Expand All @@ -218,10 +217,10 @@ def test_datacite_transform_with_optional_fields_blank_transforms_correctly():


def test_datacite_transform_with_optional_fields_missing_transforms_correctly():
input_records = parse_xml_records(
source_records = Datacite.parse_source_file(
"tests/fixtures/datacite/datacite_record_optional_fields_missing.xml"
)
output_records = Datacite("cool-repo", input_records)
output_records = Datacite("cool-repo", source_records)
assert next(output_records) == TimdexRecord(
citation=("Title not provided. https://example.com/doi:10.7910/DVN/19PPE7"),
source="A Cool Repository",
Expand All @@ -241,10 +240,10 @@ def test_datacite_transform_with_optional_fields_missing_transforms_correctly():


def test_datacite_with_attribute_and_subfield_variations_transforms_correctly():
input_records = parse_xml_records(
source_records = Datacite.parse_source_file(
"tests/fixtures/datacite/datacite_record_attribute_and_subfield_variations.xml"
)
output_records = Datacite("cool-repo", input_records)
output_records = Datacite("cool-repo", source_records)
assert next(output_records) == TimdexRecord(
citation=(
"Creator, No affiliation no identifier, Creator, Blank affiliation blank "
Expand Down
17 changes: 8 additions & 9 deletions tests/test_dspace_dim.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import transmogrifier.models as timdex
from transmogrifier.helpers import parse_xml_records
from transmogrifier.sources.dspace_dim import DspaceDim


def test_dspace_dim_transform_with_all_fields_transforms_correctly():
input_records = parse_xml_records(
source_records = DspaceDim.parse_source_file(
"tests/fixtures/dspace/dspace_dim_record_all_fields.xml"
)
output_records = DspaceDim("cool-repo", input_records)
output_records = DspaceDim("cool-repo", source_records)
assert next(output_records) == timdex.TimdexRecord(
citation="Journal of Geophysical Research: Solid Earth 121 (2016): 5859–5879",
source="A Cool Repository",
Expand Down Expand Up @@ -133,10 +132,10 @@ def test_dspace_dim_transform_with_all_fields_transforms_correctly():


def test_dspace_dim_transform_with_attribute_variations_transforms_correctly():
input_records = parse_xml_records(
source_records = DspaceDim.parse_source_file(
"tests/fixtures/dspace/dspace_dim_record_attribute_variations.xml"
)
output_records = DspaceDim("cool-repo", input_records)
output_records = DspaceDim("cool-repo", source_records)
assert next(output_records) == timdex.TimdexRecord(
citation="Title with Blank Qualifier. https://example.com/1912/2641",
source="A Cool Repository",
Expand Down Expand Up @@ -188,10 +187,10 @@ def test_dspace_dim_transform_with_attribute_variations_transforms_correctly():


def test_dspace_dim_transform_with_optional_fields_blank_transforms_correctly():
input_records = parse_xml_records(
source_records = DspaceDim.parse_source_file(
"tests/fixtures/dspace/dspace_dim_record_optional_fields_blank.xml"
)
output_records = DspaceDim("cool-repo", input_records)
output_records = DspaceDim("cool-repo", source_records)
assert next(output_records) == timdex.TimdexRecord(
source="A Cool Repository",
source_link="https://example.com/1912/2641",
Expand All @@ -204,10 +203,10 @@ def test_dspace_dim_transform_with_optional_fields_blank_transforms_correctly():


def test_dspace_dim_transform_with_optional_fields_missing_transforms_correctly():
input_records = parse_xml_records(
source_records = DspaceDim.parse_source_file(
"tests/fixtures/dspace/dspace_dim_record_optional_fields_missing.xml"
)
output_records = DspaceDim("cool-repo", input_records)
output_records = DspaceDim("cool-repo", source_records)
assert next(output_records) == timdex.TimdexRecord(
source="A Cool Repository",
source_link="https://example.com/1912/2641",
Expand Down
9 changes: 4 additions & 5 deletions tests/test_dspace_mets.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import transmogrifier.models as timdex
from transmogrifier.helpers import parse_xml_records
from transmogrifier.sources.dspace_mets import DspaceMets


def test_dspace_mets_transform_with_missing_optional_fields_transforms_correctly():
dspace_xml_records = parse_xml_records(
dspace_xml_records = DspaceMets.parse_source_file(
"tests/fixtures/dspace/dspace_mets_record_optional_fields_missing.xml"
)
output_records = DspaceMets("dspace", dspace_xml_records)
Expand All @@ -20,7 +19,7 @@ def test_dspace_mets_transform_with_missing_optional_fields_transforms_correctly


def test_dspace_mets_transform_with_blank_optional_fields_transforms_correctly():
dspace_xml_records = parse_xml_records(
dspace_xml_records = DspaceMets.parse_source_file(
"tests/fixtures/dspace/dspace_mets_record_optional_fields_blank.xml"
)
output_records = DspaceMets("dspace", dspace_xml_records)
Expand All @@ -36,7 +35,7 @@ def test_dspace_mets_transform_with_blank_optional_fields_transforms_correctly()


def test_dspace_mets_with_attribute_and_subfield_variations_transforms_correctly():
dspace_xml_records = parse_xml_records(
dspace_xml_records = DspaceMets.parse_source_file(
"tests/fixtures/dspace/dspace_mets_record_attribute_and_subfield_variations.xml"
)
output_records = DspaceMets("dspace", dspace_xml_records)
Expand Down Expand Up @@ -85,7 +84,7 @@ def test_dspace_mets_with_attribute_and_subfield_variations_transforms_correctly


def test_dspace_mets_transform_with_all_fields_transforms_correctly():
dspace_xml_records = parse_xml_records(
dspace_xml_records = DspaceMets.parse_source_file(
"tests/fixtures/dspace/dspace_mets_record_all_fields.xml"
)
output_records = DspaceMets("dspace", dspace_xml_records)
Expand Down
19 changes: 10 additions & 9 deletions tests/test_ead.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import logging

import transmogrifier.models as timdex
from transmogrifier.helpers import parse_xml_records
from transmogrifier.sources.ead import Ead


def test_ead_record_all_fields_transform_correctly():
ead_xml_records = parse_xml_records("tests/fixtures/ead/ead_record_all_fields.xml")
ead_xml_records = Ead.parse_source_file(
"tests/fixtures/ead/ead_record_all_fields.xml"
)
output_records = Ead("aspace", ead_xml_records)
assert next(output_records) == timdex.TimdexRecord(
source="MIT ArchivesSpace",
Expand Down Expand Up @@ -216,7 +217,7 @@ def test_ead_record_all_fields_transform_correctly():


def test_ead_record_with_missing_archdesc_logs_error(caplog):
ead_xml_records = parse_xml_records(
ead_xml_records = Ead.parse_source_file(
"tests/fixtures/ead/ead_record_missing_archdesc.xml"
)
output_records = Ead("aspace", ead_xml_records)
Expand All @@ -230,7 +231,7 @@ def test_ead_record_with_missing_archdesc_logs_error(caplog):


def test_ead_record_with_missing_archdesc_did_logs_error(caplog):
ead_xml_records = parse_xml_records(
ead_xml_records = Ead.parse_source_file(
"tests/fixtures/ead/ead_record_missing_archdesc_did.xml"
)
output_records = Ead("aspace", ead_xml_records)
Expand All @@ -244,7 +245,7 @@ def test_ead_record_with_missing_archdesc_did_logs_error(caplog):


def test_ead_record_with_attribute_and_subfield_variations_transforms_correctly():
ead_xml_records = parse_xml_records(
ead_xml_records = Ead.parse_source_file(
"tests/fixtures/ead/ead_record_attribute_and_subfield_variations.xml"
)
output_records = Ead("aspace", ead_xml_records)
Expand Down Expand Up @@ -470,7 +471,7 @@ def test_ead_record_with_attribute_and_subfield_variations_transforms_correctly(


def test_ead_record_with_blank_optional_fields_transforms_correctly():
ead_xml_records = parse_xml_records(
ead_xml_records = Ead.parse_source_file(
"tests/fixtures/ead/ead_record_blank_optional_fields.xml"
)
output_records = Ead("aspace", ead_xml_records)
Expand All @@ -488,7 +489,7 @@ def test_ead_record_with_blank_optional_fields_transforms_correctly():


def test_ead_record_invalid_date_and_date_range_are_omitted(caplog):
ead_xml_records = parse_xml_records(
ead_xml_records = Ead.parse_source_file(
"tests/fixtures/ead/ead_record_attribute_and_subfield_variations.xml"
)
output_record = next(Ead("aspace", ead_xml_records))
Expand All @@ -507,7 +508,7 @@ def test_ead_record_invalid_date_and_date_range_are_omitted(caplog):


def test_ead_record_correct_identifiers_from_multiple_unitid(caplog):
ead_xml_records = parse_xml_records(
ead_xml_records = Ead.parse_source_file(
"tests/fixtures/ead/ead_record_attribute_and_subfield_variations.xml"
)
output_record = next(Ead("aspace", ead_xml_records))
Expand All @@ -516,7 +517,7 @@ def test_ead_record_correct_identifiers_from_multiple_unitid(caplog):


def test_ead_record_with_missing_optional_fields_transforms_correctly():
ead_xml_records = parse_xml_records(
ead_xml_records = Ead.parse_source_file(
"tests/fixtures/ead/ead_record_missing_optional_fields.xml"
)
output_records = Ead("aspace", ead_xml_records)
Expand Down
6 changes: 0 additions & 6 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from transmogrifier.helpers import (
generate_citation,
parse_date_from_string,
parse_xml_records,
validate_date,
validate_date_range,
)
Expand Down Expand Up @@ -189,11 +188,6 @@ def test_generate_citation_with_all_fields():
)


def test_parse_xml_records_returns_record_iterator():
records = parse_xml_records("tests/fixtures/datacite/datacite_records.xml")
assert len(list(records)) == 38


def test_parse_date_from_string_success():
for date in [
"1930",
Expand Down
Loading

0 comments on commit ee4da86

Please sign in to comment.