diff --git a/docs/crosswalk.html b/docs/crosswalk.html index 3d77499..e938263 100644 --- a/docs/crosswalk.html +++ b/docs/crosswalk.html @@ -28,7 +28,8 @@

EQUELLA -> InvenioRDM Crosswalk

%% CONTRIBUTORS["Contributors (0-n)"] %% RIGHTS["Rights (Licenses) (0-n)"] %% SUBJECTS["Subjects (0-n)"] - %% PUBLISHER["Publisher (0-1)"] + OIPUB["originInfo/publisher"] --> PUBLISHER["Publisher (0-1)"] + DBR["relatedItem/title = Design Book Review"] --> |different depending on date| PUBLISHER %% REL["Related Identifiers/Works (0-n)"] %% EXTENT --> SIZES["Sizes (0-n)"] %% LOCATIONS["Locations. We only have place names, no IDs. (0-n)"] diff --git a/migrate/record.py b/migrate/record.py index 5b4741f..3814ed0 100644 --- a/migrate/record.py +++ b/migrate/record.py @@ -280,15 +280,28 @@ def dates(self) -> list[dict[str, Any]]: @property def publisher(self) -> str: + # https://inveniordm.docs.cern.ch/reference/metadata/#publisher-0-1 + publisher = "" # 1) DBR articles have a variable publisher depending on date: # Winter 1983 - Spring 1990: Design Book Review # Winter 1991 - Winter/Spring 1995: MIT Press # Winter 1996/1997: Design Book Review # 1997 - on: California College of the Arts + # https://vault.cca.edu/items/bd3b483b-52b9-423c-a96e-d37863511d75/1/%3CXML%3E + # mods/relatedItem[@type="host"]/titleInfo/title == DBR + # ri = self.xml.get("mods", {}).get("relatedItem", {}) + # 2) CCA/C archives has publisher info mods/originInfo/publisher - # 3) Press Clips will have different publishing organizations + # https://vault.cca.edu/items/c4583fe6-2e85-4613-a1bc-774824b3e826/1/%3CXML%3E + oipub = ( + self.xml.get("mods", {}).get("originInfo", {}).get("publisher", "") + ).strip() + if oipub: + publisher = oipub + + # 3) Press Clips items are not CCA but have only publication, not publisher, info # 4) Student work has no publisher - return "" + return publisher @property def type(self) -> dict[str, str]: @@ -339,8 +352,9 @@ def get(self) -> dict[str, Any]: "formats": self.formats, "locations": [], "publication_date": self.publication_date, - "publisher": "", + "publisher": self.publisher, # relation types: cites, compiles, continues, describes, documents, haspart, hasversion, iscitedby, iscompiledby, iscontinuedby, isderivedfrom, isdescribedby, isdocumentedby, isidenticalto, isnewversionof, isobsoletedby, isoriginalformof, ispartof, ispreviousversionof, isreferencedby, isrequiredby, isreviewedby, issourceof, issupplementto, issupplementedby + # ? use old VAULT item URL here? Might be useful for redirects somehow "related_identifiers": [], # options defined in resource_types.yaml fixture # https://inveniordm.docs.cern.ch/reference/metadata/#resource-type-1 diff --git a/migrate/tests.py b/migrate/tests.py index 3a545af..ab2a78d 100644 --- a/migrate/tests.py +++ b/migrate/tests.py @@ -515,3 +515,22 @@ def test_dates(input, expect): def test_type(input, expect): r = Record(input) assert m(r)["resource_type"] == expect + + +# Publisher +@pytest.mark.parametrize( + "input, expect", + [ + ( # publisher in originInfo + x("foo"), + "foo", + ), + ( # no publisher + x(""), + "", + ), + ], +) +def test_publisher(input, expect): + r = Record(input) + assert m(r)["publisher"] == expect