From ee4da864cb763e30e7c4a6332db853875fdfe2c5 Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Mon, 27 Nov 2023 09:59:45 -0500 Subject: [PATCH] Gdt 82 transformer class refactor (#106) * Rename Transformer > XmlTransformer * Rename Transformer > XmlTransformer to prepare for new base class * Rename xml arg to source_record for XmlTransformer methods * Create Transformer base class Why these changes are being introduced: * A format-agnostic Transformer base class is needed for deriving both XmlTransformer and JsonTransformer format classes How this addresses that need: * Create a Transformer base class * Add JSON type for validation * Refactor XmlTransformer to derive from Transformer class * Rename arg xml > source_record and update docstrings Side effects of this change: * None Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/GDT-82 * Rename input_records > source_records across repo * Add parse_source_records method Why these changes are being introduced: * parse_xml_records is a function in the helpers module that will be replaced by a method in the Transformer base class How this addresses that need: * Add parse_source_records method to Transformer base class as an abstractmethod * Add parse_source_records to XmlTransformer class and corresponding unit test Side effects of this change: * None Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/GDT-82 * Remove parse_xml_records function * Remove parse_xml_records function from helpers module and corresponding unit test * Replace all calls of that function with the appropriate class method * Update dependencies * Shift get_transformer to Transformer method * Shift get_transformer from config module to Transformer class staticmethod along with corresponding unit tests * Update CLI to call new method * Updates based on discussion in PR #106 * Remove instance from transformer_instance variable name * Update write_timdex_records_to_json type hinting * Update get_transformer type hinting * Update docstrings * Add load and write_timdex_records_to_json methods * Add load and write_timdex_records_to_json methods to Transformer class * Remove write_timdex_records_to_json function from helpers module * Update CLI command with new methods * Update variable names and docstrings for consistency --- Pipfile.lock | 513 ++++++++++++++------------ tests/conftest.py | 12 +- tests/test_config.py | 21 -- tests/test_datacite.py | 17 +- tests/test_dspace_dim.py | 17 +- tests/test_dspace_mets.py | 9 +- tests/test_ead.py | 19 +- tests/test_helpers.py | 6 - tests/test_marc.py | 37 +- tests/test_oai_dc.py | 21 +- tests/test_springshare.py | 43 ++- tests/test_transformer.py | 91 +++-- tests/test_whoas.py | 9 +- tests/test_zenodo.py | 15 +- transmogrifier/cli.py | 32 +- transmogrifier/config.py | 12 - transmogrifier/helpers.py | 63 +--- transmogrifier/sources/datacite.py | 4 +- transmogrifier/sources/dspace_dim.py | 4 +- transmogrifier/sources/dspace_mets.py | 4 +- transmogrifier/sources/ead.py | 4 +- transmogrifier/sources/marc.py | 4 +- transmogrifier/sources/oaidc.py | 4 +- transmogrifier/sources/transformer.py | 339 ++++++++++++++--- 24 files changed, 764 insertions(+), 536 deletions(-) diff --git a/Pipfile.lock b/Pipfile.lock index 64cdede..5ffef10 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -22,6 +22,7 @@ "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015" ], "index": "pypi", + "markers": "python_version >= '3.7'", "version": "==23.1.0" }, "beautifulsoup4": { @@ -30,30 +31,31 @@ "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a" ], "index": "pypi", + "markers": "python_full_version >= '3.6.0'", "version": "==4.12.2" }, "boto3": { "hashes": [ - "sha256:2ccbea42fe4cbd22a8ba1e90a37ac65f05c1932e63432e429fb7158d8255bbc0", - "sha256:4713a4e69120db5f358f4d378459fb4ea04be98664a0908088f6e04ab49d2583" + "sha256:ca9b04fc2c75990c2be84c43b9d6edecce828960fc27e07ab29036587a1ca635", + "sha256:d1135647309b89376a014d21407aabfa322998206175f2297def812bf4d824a9" ], - "version": "==1.28.34" + "version": "==1.29.4" }, "botocore": { "hashes": [ - "sha256:23ba9e3a8b4c0e5966bbe2db62edb27f61e16b846f153f22aefda7b3c05c7942", - "sha256:456ef8eb458db35b8643eb10e652ed50750d13e5af431593471b2c705c34b5db" + "sha256:3ee73c0d93bdb944d0c46772f08f09cdcf25ef58bd86962e6f4a24e531198bfa", + "sha256:6bfa75e28c9ad0321cefefa51b00ff233b16b2416f8b95229796263edba45a39" ], "markers": "python_version >= '3.7'", - "version": "==1.31.34" + "version": "==1.32.4" }, "certifi": { "hashes": [ - "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082", - "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9" + "sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1", + "sha256:e036ab49d5b79556f99cfc2d9320b34cfbe5be05c5871b51de9329f0603b0474" ], "markers": "python_version >= '3.6'", - "version": "==2023.7.22" + "version": "==2023.11.17" }, "click": { "hashes": [ @@ -61,6 +63,7 @@ "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de" ], "index": "pypi", + "markers": "python_version >= '3.7'", "version": "==8.1.7" }, "jmespath": { @@ -167,6 +170,7 @@ "sha256:fe4bda6bd4340caa6e5cf95e73f8fea5c4bfc55763dd42f1b50a94c1b4a2fbd4" ], "index": "pypi", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==4.9.3" }, "python-dateutil": { @@ -175,23 +179,24 @@ "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" ], "index": "pypi", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.8.2" }, "s3transfer": { "hashes": [ - "sha256:b014be3a8a2aab98cfe1abc7229cc5a9a0cf05eb9c1f2b86b230fd8df3f78084", - "sha256:cab66d3380cca3e70939ef2255d01cd8aece6a4907a9528740f668c4b0611861" + "sha256:10d6923c6359175f264811ef4bf6161a3156ce8e350e705396a7557d6293c33a", + "sha256:fd3889a66f5fe17299fe75b82eae6cf722554edca744ca5d5fe308b104883d2e" ], "markers": "python_version >= '3.7'", - "version": "==0.6.2" + "version": "==0.7.0" }, "sentry-sdk": { "hashes": [ - "sha256:3e17215d8006612e2df02b0e73115eb8376c37e3f586d8436fa41644e605074d", - "sha256:a99ee105384788c3f228726a88baf515fe7b5f1d2d0f215a03d194369f158df7" + "sha256:25d574f94fdf72199e331c2401fdac60d01b5be8f32822174c51c3ff0fc2f8cb", + "sha256:f32dd16547f2f45e1c71a96fd4a48925e629541f7ddfe3d5d25ef7d5e94eb3c8" ], "index": "pypi", - "version": "==1.29.2" + "version": "==1.36.0" }, "six": { "hashes": [ @@ -206,19 +211,19 @@ "s3" ], "hashes": [ - "sha256:b4c9ae193ad6d3e7add50944b86afa0d150bd821ab8ec21edb26d9a06b66f6a8", - "sha256:d5238825fe9a9340645fac3d75b287c08fbb99fb2b422477de781c9f5f09e019" + "sha256:8d3ef7e6997e8e42dd55c74166ed21e6ac70664caa32dd940b26d54a8f6b4142", + "sha256:be3c92c246fbe80ebce8fbacb180494a481a77fcdcb7c1aadb2ea5b9c2bee8b9" ], - "index": "pypi", - "version": "==6.3.0" + "markers": "python_version >= '3.6' and python_version < '4.0'", + "version": "==6.4.0" }, "soupsieve": { "hashes": [ - "sha256:1c1bfee6819544a3447586c889157365a27e10d88cde3ad3da0cf0ddf646feb8", - "sha256:89d12b2d5dfcd2c9e8c22326da9d9aa9cb3dfab0a83a024f05704076ee8d35ea" + "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690", + "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7" ], - "markers": "python_version >= '3.7'", - "version": "==2.4.1" + "markers": "python_version >= '3.8'", + "version": "==2.5" }, "types-python-dateutil": { "hashes": [ @@ -230,11 +235,11 @@ }, "urllib3": { "hashes": [ - "sha256:8d36afa7616d8ab714608411b4a3b13e58f463aee519024578e062e141dce20f", - "sha256:8f135f6502756bde6b2a9b28989df5fbe87c9970cecaa69041edcce7f0589b14" + "sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84", + "sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e" ], "markers": "python_version >= '3.6'", - "version": "==1.26.16" + "version": "==2.0.7" } }, "develop": { @@ -248,17 +253,10 @@ }, "asttokens": { "hashes": [ - "sha256:4622110b2a6f30b77e1473affaa97e711bc2f07d3f10848420ff1898edbe94f3", - "sha256:6b0ac9e93fb0335014d382b8fa9b3afa7df546984258005da0b9e7095b3deb1c" - ], - "version": "==2.2.1" - }, - "backcall": { - "hashes": [ - "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e", - "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255" + "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24", + "sha256:b03869718ba9a6eb027e134bfdf69f38a236d681c83c160d510768af11254ba0" ], - "version": "==0.2.0" + "version": "==2.4.1" }, "bandit": { "hashes": [ @@ -266,124 +264,137 @@ "sha256:bdfc739baa03b880c2d15d0431b31c658ffc348e907fe197e54e0389dd59e11e" ], "index": "pypi", + "markers": "python_version >= '3.7'", "version": "==1.7.5" }, "black": { "hashes": [ - "sha256:01ede61aac8c154b55f35301fac3e730baf0c9cf8120f65a9cd61a81cfb4a0c3", - "sha256:022a582720b0d9480ed82576c920a8c1dde97cc38ff11d8d8859b3bd6ca9eedb", - "sha256:25cc308838fe71f7065df53aedd20327969d05671bac95b38fdf37ebe70ac087", - "sha256:27eb7a0c71604d5de083757fbdb245b1a4fae60e9596514c6ec497eb63f95320", - "sha256:327a8c2550ddc573b51e2c352adb88143464bb9d92c10416feb86b0f5aee5ff6", - "sha256:47e56d83aad53ca140da0af87678fb38e44fd6bc0af71eebab2d1f59b1acf1d3", - "sha256:501387a9edcb75d7ae8a4412bb8749900386eaef258f1aefab18adddea1936bc", - "sha256:552513d5cd5694590d7ef6f46e1767a4df9af168d449ff767b13b084c020e63f", - "sha256:5c4bc552ab52f6c1c506ccae05681fab58c3f72d59ae6e6639e8885e94fe2587", - "sha256:642496b675095d423f9b8448243336f8ec71c9d4d57ec17bf795b67f08132a91", - "sha256:6d1c6022b86f83b632d06f2b02774134def5d4d4f1dac8bef16d90cda18ba28a", - "sha256:7f3bf2dec7d541b4619b8ce526bda74a6b0bffc480a163fed32eb8b3c9aed8ad", - "sha256:831d8f54c3a8c8cf55f64d0422ee875eecac26f5f649fb6c1df65316b67c8926", - "sha256:8417dbd2f57b5701492cd46edcecc4f9208dc75529bcf76c514864e48da867d9", - "sha256:86cee259349b4448adb4ef9b204bb4467aae74a386bce85d56ba4f5dc0da27be", - "sha256:893695a76b140881531062d48476ebe4a48f5d1e9388177e175d76234ca247cd", - "sha256:9fd59d418c60c0348505f2ddf9609c1e1de8e7493eab96198fc89d9f865e7a96", - "sha256:ad0014efc7acf0bd745792bd0d8857413652979200ab924fbf239062adc12491", - "sha256:b5b0ee6d96b345a8b420100b7d71ebfdd19fab5e8301aff48ec270042cd40ac2", - "sha256:c333286dc3ddca6fdff74670b911cccedacb4ef0a60b34e491b8a67c833b343a", - "sha256:f9062af71c59c004cd519e2fb8f5d25d39e46d3af011b41ab43b9c74e27e236f", - "sha256:fb074d8b213749fa1d077d630db0d5f8cc3b2ae63587ad4116e8a436e9bbe995" + "sha256:250d7e60f323fcfc8ea6c800d5eba12f7967400eb6c2d21ae85ad31c204fb1f4", + "sha256:2a9acad1451632021ee0d146c8765782a0c3846e0e0ea46659d7c4f89d9b212b", + "sha256:412f56bab20ac85927f3a959230331de5614aecda1ede14b373083f62ec24e6f", + "sha256:421f3e44aa67138ab1b9bfbc22ee3780b22fa5b291e4db8ab7eee95200726b07", + "sha256:45aa1d4675964946e53ab81aeec7a37613c1cb71647b5394779e6efb79d6d187", + "sha256:4c44b7211a3a0570cc097e81135faa5f261264f4dfaa22bd5ee2875a4e773bd6", + "sha256:4c68855825ff432d197229846f971bc4d6666ce90492e5b02013bcaca4d9ab05", + "sha256:5133f5507007ba08d8b7b263c7aa0f931af5ba88a29beacc4b2dc23fcefe9c06", + "sha256:54caaa703227c6e0c87b76326d0862184729a69b73d3b7305b6288e1d830067e", + "sha256:58e5f4d08a205b11800332920e285bd25e1a75c54953e05502052738fe16b3b5", + "sha256:698c1e0d5c43354ec5d6f4d914d0d553a9ada56c85415700b81dc90125aac244", + "sha256:6c1cac07e64433f646a9a838cdc00c9768b3c362805afc3fce341af0e6a9ae9f", + "sha256:760415ccc20f9e8747084169110ef75d545f3b0932ee21368f63ac0fee86b221", + "sha256:7f622b6822f02bfaf2a5cd31fdb7cd86fcf33dab6ced5185c35f5db98260b055", + "sha256:cf57719e581cfd48c4efe28543fea3d139c6b6f1238b3f0102a9c73992cbb479", + "sha256:d136ef5b418c81660ad847efe0e55c58c8208b77a57a28a503a5f345ccf01394", + "sha256:dbea0bb8575c6b6303cc65017b46351dc5953eea5c0a59d7b7e3a2d2f433a911", + "sha256:fc7f6a44d52747e65a02558e1d807c82df1d66ffa80a601862040a43ec2e3142" ], "index": "pypi", - "version": "==23.7.0" + "markers": "python_version >= '3.8'", + "version": "==23.11.0" }, "certifi": { "hashes": [ - "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082", - "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9" + "sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1", + "sha256:e036ab49d5b79556f99cfc2d9320b34cfbe5be05c5871b51de9329f0603b0474" ], "markers": "python_version >= '3.6'", - "version": "==2023.7.22" + "version": "==2023.11.17" }, "charset-normalizer": { "hashes": [ - "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96", - "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c", - "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710", - "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706", - "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020", - "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252", - "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad", - "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329", - "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a", - "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f", - "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6", - "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4", - "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a", - "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46", - "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2", - "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23", - "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace", - "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd", - "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982", - "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10", - "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2", - "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea", - "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09", - "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5", - "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149", - "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489", - "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9", - "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80", - "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592", - "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3", - "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6", - "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed", - "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c", - "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200", - "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a", - "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e", - "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d", - "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6", - "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623", - "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669", - "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3", - "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa", - "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9", - "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2", - "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f", - "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1", - "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4", - "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a", - "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8", - "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3", - "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029", - "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f", - "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959", - "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22", - "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7", - "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952", - "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346", - "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e", - "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d", - "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299", - "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd", - "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a", - "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3", - "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037", - "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94", - "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c", - "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858", - "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a", - "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449", - "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c", - "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918", - "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1", - "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c", - "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac", - "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa" + "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027", + "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087", + "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786", + "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8", + "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09", + "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185", + "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574", + "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e", + "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519", + "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898", + "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269", + "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3", + "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f", + "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6", + "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8", + "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a", + "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73", + "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc", + "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714", + "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2", + "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc", + "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce", + "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d", + "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e", + "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6", + "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269", + "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96", + "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d", + "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a", + "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4", + "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77", + "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d", + "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0", + "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed", + "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068", + "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac", + "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25", + "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8", + "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab", + "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26", + "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2", + "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db", + "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f", + "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5", + "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99", + "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c", + "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d", + "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811", + "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa", + "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a", + "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03", + "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b", + "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04", + "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c", + "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001", + "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458", + "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389", + "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99", + "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985", + "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537", + "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238", + "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f", + "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d", + "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796", + "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a", + "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143", + "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8", + "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c", + "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5", + "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5", + "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711", + "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4", + "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6", + "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c", + "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7", + "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4", + "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b", + "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae", + "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12", + "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c", + "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae", + "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8", + "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887", + "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b", + "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4", + "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f", + "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5", + "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33", + "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519", + "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561" ], "markers": "python_full_version >= '3.7.0'", - "version": "==3.2.0" + "version": "==3.3.2" }, "click": { "hashes": [ @@ -391,6 +402,7 @@ "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de" ], "index": "pypi", + "markers": "python_version >= '3.7'", "version": "==8.1.7" }, "coverage": { @@ -447,6 +459,7 @@ "sha256:fc2af30ed0d5ae0b1abdb4ebdce598eafd5b35397d4d75deb341a614d333d987" ], "index": "pypi", + "markers": "python_version >= '3.7'", "version": "==6.5.0" }, "coveralls": { @@ -455,6 +468,7 @@ "sha256:f42015f31d386b351d4226389b387ae173207058832fbf5c8ec4b40e27b16026" ], "index": "pypi", + "markers": "python_version >= '3.5'", "version": "==3.3.1" }, "decorator": { @@ -471,36 +485,46 @@ ], "version": "==0.6.2" }, - "executing": { + "exceptiongroup": { "hashes": [ - "sha256:0314a69e37426e3608aada02473b4161d4caf5a4b244d1d0c48072b8fee7bacc", - "sha256:19da64c18d2d851112f09c287f8d3dbbdf725ab0e569077efb6cdcbd3497c107" + "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14", + "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68" ], + "markers": "python_version < '3.11'", "version": "==1.2.0" }, + "executing": { + "hashes": [ + "sha256:35afe2ce3affba8ee97f2d69927fa823b08b472b7b994e36a52a964b93d16147", + "sha256:eac49ca94516ccc753f9fb5ce82603156e590b27525a8bc32cce8ae302eb61bc" + ], + "markers": "python_version >= '3.5'", + "version": "==2.0.1" + }, "flake8": { "hashes": [ "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23", "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5" ], "index": "pypi", + "markers": "python_full_version >= '3.8.1'", "version": "==6.1.0" }, "gitdb": { "hashes": [ - "sha256:6eb990b69df4e15bad899ea868dc46572c3f75339735663b81de79b06f17eb9a", - "sha256:c286cf298426064079ed96a9e4a9d39e7f3e9bf15ba60701e95f5492f28415c7" + "sha256:81a3407ddd2ee8df444cbacea00e2d038e40150acfa3001696fe0dcf1d3adfa4", + "sha256:bf5421126136d6d0af55bc1e7c1af1c397a34f5b7bd79e776cd3e89785c2b04b" ], "markers": "python_version >= '3.7'", - "version": "==4.0.10" + "version": "==4.0.11" }, "gitpython": { "hashes": [ - "sha256:8d9b8cb1e80b9735e8717c9362079d3ce4c6e5ddeebedd0361b228c3a67a62f6", - "sha256:e3d59b1c2c6ebb9dfa7a184daf3b6dd4914237e7488a1730a6d8f6f5d0b4187f" + "sha256:22b126e9ffb671fdd0c129796343a02bf67bf2994b35449ffc9321aa755e18a4", + "sha256:cf14627d5a8049ffbf49915732e5eddbe8134c3bdb9d476e6182b676fc573f8a" ], "markers": "python_version >= '3.7'", - "version": "==3.1.32" + "version": "==3.1.40" }, "idna": { "hashes": [ @@ -520,11 +544,12 @@ }, "ipython": { "hashes": [ - "sha256:1d197b907b6ba441b692c48cf2a3a2de280dc0ac91a3405b39349a50272ca0a1", - "sha256:248aca623f5c99a6635bc3857677b7320b9b8039f99f070ee0d20a5ca5a8e6bf" + "sha256:126bb57e1895594bb0d91ea3090bbd39384f6fe87c3d57fd558d0670f50339bb", + "sha256:1e4d1d666a023e3c93585ba0d8e962867f7a111af322efff6b9c58062b3e5444" ], "index": "pypi", - "version": "==8.14.0" + "markers": "python_version >= '3.9'", + "version": "==8.17.2" }, "isort": { "hashes": [ @@ -532,15 +557,16 @@ "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6" ], "index": "pypi", + "markers": "python_full_version >= '3.8.0'", "version": "==5.12.0" }, "jedi": { "hashes": [ - "sha256:bcf9894f1753969cbac8022a8c2eaee06bfa3724e4192470aaffe7eb6272b0c4", - "sha256:cb8ce23fbccff0025e9386b5cf85e892f94c9b822378f8da49970471335ac64e" + "sha256:cf0496f3651bc65d7174ac1b7d043eff454892c708a87d1b683e57b569927ffd", + "sha256:e983c654fe5c02867aef4cdfce5a2fbb4a50adc0af145f70504238f18ef5e7e0" ], "markers": "python_version >= '3.6'", - "version": "==0.19.0" + "version": "==0.19.1" }, "markdown-it-py": { "hashes": [ @@ -576,36 +602,37 @@ }, "mypy": { "hashes": [ - "sha256:159aa9acb16086b79bbb0016145034a1a05360626046a929f84579ce1666b315", - "sha256:258b22210a4a258ccd077426c7a181d789d1121aca6db73a83f79372f5569ae0", - "sha256:26f71b535dfc158a71264e6dc805a9f8d2e60b67215ca0bfa26e2e1aa4d4d373", - "sha256:26fb32e4d4afa205b24bf645eddfbb36a1e17e995c5c99d6d00edb24b693406a", - "sha256:2fc3a600f749b1008cc75e02b6fb3d4db8dbcca2d733030fe7a3b3502902f161", - "sha256:32cb59609b0534f0bd67faebb6e022fe534bdb0e2ecab4290d683d248be1b275", - "sha256:330857f9507c24de5c5724235e66858f8364a0693894342485e543f5b07c8693", - "sha256:361da43c4f5a96173220eb53340ace68cda81845cd88218f8862dfb0adc8cddb", - "sha256:4a465ea2ca12804d5b34bb056be3a29dc47aea5973b892d0417c6a10a40b2d65", - "sha256:51cb1323064b1099e177098cb939eab2da42fea5d818d40113957ec954fc85f4", - "sha256:57b10c56016adce71fba6bc6e9fd45d8083f74361f629390c556738565af8eeb", - "sha256:596fae69f2bfcb7305808c75c00f81fe2829b6236eadda536f00610ac5ec2243", - "sha256:5d627124700b92b6bbaa99f27cbe615c8ea7b3402960f6372ea7d65faf376c14", - "sha256:6ac9c21bfe7bc9f7f1b6fae441746e6a106e48fc9de530dea29e8cd37a2c0cc4", - "sha256:82cb6193de9bbb3844bab4c7cf80e6227d5225cc7625b068a06d005d861ad5f1", - "sha256:8f772942d372c8cbac575be99f9cc9d9fb3bd95c8bc2de6c01411e2c84ebca8a", - "sha256:9fece120dbb041771a63eb95e4896791386fe287fefb2837258925b8326d6160", - "sha256:a156e6390944c265eb56afa67c74c0636f10283429171018446b732f1a05af25", - "sha256:a9ec1f695f0c25986e6f7f8778e5ce61659063268836a38c951200c57479cc12", - "sha256:abed92d9c8f08643c7d831300b739562b0a6c9fcb028d211134fc9ab20ccad5d", - "sha256:b031b9601f1060bf1281feab89697324726ba0c0bae9d7cd7ab4b690940f0b92", - "sha256:c543214ffdd422623e9fedd0869166c2f16affe4ba37463975043ef7d2ea8770", - "sha256:d28ddc3e3dfeab553e743e532fb95b4e6afad51d4706dd22f28e1e5e664828d2", - "sha256:f33592ddf9655a4894aef22d134de7393e95fcbdc2d15c1ab65828eee5c66c70", - "sha256:f6b0e77db9ff4fda74de7df13f30016a0a663928d669c9f2c057048ba44f09bb", - "sha256:f757063a83970d67c444f6e01d9550a7402322af3557ce7630d3c957386fa8f5", - "sha256:ff0cedc84184115202475bbb46dd99f8dcb87fe24d5d0ddfc0fe6b8575c88d2f" + "sha256:0e81ffd120ee24959b449b647c4b2fbfcf8acf3465e082b8d58fd6c4c2b27e46", + "sha256:185cff9b9a7fec1f9f7d8352dff8a4c713b2e3eea9c6c4b5ff7f0edf46b91e41", + "sha256:1e280b5697202efa698372d2f39e9a6713a0395a756b1c6bd48995f8d72690dc", + "sha256:1fe46e96ae319df21359c8db77e1aecac8e5949da4773c0274c0ef3d8d1268a9", + "sha256:2b53655a295c1ed1af9e96b462a736bf083adba7b314ae775563e3fb4e6795f5", + "sha256:551d4a0cdcbd1d2cccdcc7cb516bb4ae888794929f5b040bb51aae1846062901", + "sha256:55d28d7963bef00c330cb6461db80b0b72afe2f3c4e2963c99517cf06454e665", + "sha256:5da84d7bf257fd8f66b4f759a904fd2c5a765f70d8b52dde62b521972a0a2357", + "sha256:6cb8d5f6d0fcd9e708bb190b224089e45902cacef6f6915481806b0c77f7786d", + "sha256:7a7b1e399c47b18feb6f8ad4a3eef3813e28c1e871ea7d4ea5d444b2ac03c418", + "sha256:870bd1ffc8a5862e593185a4c169804f2744112b4a7c55b93eb50f48e7a77010", + "sha256:87c076c174e2c7ef8ab416c4e252d94c08cd4980a10967754f91571070bf5fbe", + "sha256:96650d9a4c651bc2a4991cf46f100973f656d69edc7faf91844e87fe627f7e96", + "sha256:a3637c03f4025f6405737570d6cbfa4f1400eb3c649317634d273687a09ffc2f", + "sha256:a79cdc12a02eb526d808a32a934c6fe6df07b05f3573d210e41808020aed8b5d", + "sha256:b633f188fc5ae1b6edca39dae566974d7ef4e9aaaae00bc36efe1f855e5173ac", + "sha256:bf7a2f0a6907f231d5e41adba1a82d7d88cf1f61a70335889412dec99feeb0f8", + "sha256:c1b06b4b109e342f7dccc9efda965fc3970a604db70f8560ddfdee7ef19afb05", + "sha256:cddee95dea7990e2215576fae95f6b78a8c12f4c089d7e4367564704e99118d3", + "sha256:d01921dbd691c4061a3e2ecdbfbfad029410c5c2b1ee88946bf45c62c6c91210", + "sha256:d0fa29919d2e720c8dbaf07d5578f93d7b313c3e9954c8ec05b6d83da592e5d9", + "sha256:d6ed9a3997b90c6f891138e3f83fb8f475c74db4ccaa942a1c7bf99e83a989a1", + "sha256:d93e76c2256aa50d9c82a88e2f569232e9862c9982095f6d54e13509f01222fc", + "sha256:df67fbeb666ee8828f675fee724cc2cbd2e4828cc3df56703e02fe6a421b7401", + "sha256:f29386804c3577c83d76520abf18cfcd7d68264c7e431c5907d250ab502658ee", + "sha256:f65f385a6f43211effe8c682e8ec3f55d79391f70a201575def73d08db68ead1", + "sha256:fc9fe455ad58a20ec68599139ed1113b21f977b536a91b42bef3ffed5cce7391" ], "index": "pypi", - "version": "==1.5.1" + "markers": "python_version >= '3.8'", + "version": "==1.7.0" }, "mypy-extensions": { "hashes": [ @@ -617,11 +644,11 @@ }, "packaging": { "hashes": [ - "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61", - "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f" + "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5", + "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7" ], "markers": "python_version >= '3.7'", - "version": "==23.1" + "version": "==23.2" }, "parso": { "hashes": [ @@ -641,11 +668,11 @@ }, "pbr": { "hashes": [ - "sha256:567f09558bae2b3ab53cb3c1e2e33e726ff3338e7bae3db5dc954b3a44eef12b", - "sha256:aefc51675b0b533d56bb5fd1c8c6c0522fe31896679882e1c4c63d5e4a0fccb3" + "sha256:4a7317d5e3b17a3dccb6a8cfe67dab65b20551404c52c8ed41279fa4f0cb4cda", + "sha256:d1377122a5a00e2f940ee482999518efe16d745d423a670c27773dfbc3c9a7d9" ], "markers": "python_version >= '2.6'", - "version": "==5.11.1" + "version": "==6.0.0" }, "pexpect": { "hashes": [ @@ -655,36 +682,29 @@ "markers": "sys_platform != 'win32'", "version": "==4.8.0" }, - "pickleshare": { - "hashes": [ - "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca", - "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56" - ], - "version": "==0.7.5" - }, "platformdirs": { "hashes": [ - "sha256:b45696dab2d7cc691a3226759c0d3b00c47c8b6e293d96f6436f733303f77f6d", - "sha256:d7c24979f292f916dc9cbf8648319032f551ea8c49a4c9bf2fb556a02070ec1d" + "sha256:118c954d7e949b35437270383a3f2531e99dd93cf7ce4dc8340d3356d30f173b", + "sha256:cb633b2bcf10c51af60beb0ab06d2f1d69064b43abf4c185ca6b28865f3f9731" ], "markers": "python_version >= '3.7'", - "version": "==3.10.0" + "version": "==4.0.0" }, "pluggy": { "hashes": [ - "sha256:c2fd55a7d7a3863cba1a013e4e2414658b1d07b6bc57b3919e0c63c9abb99849", - "sha256:d12f0c4b579b15f5e054301bb226ee85eeeba08ffec228092f8defbaa3a4c4b3" + "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12", + "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7" ], - "markers": "python_version >= '3.7'", - "version": "==1.2.0" + "markers": "python_version >= '3.8'", + "version": "==1.3.0" }, "prompt-toolkit": { "hashes": [ - "sha256:04505ade687dc26dc4284b1ad19a83be2f2afe83e7a828ace0c72f3a1df72aac", - "sha256:9dffbe1d8acf91e3de75f3b544e4842382fc06c6babe903ac9acb74dc6e08d88" + "sha256:941367d97fc815548822aa26c2a269fdc4eb21e9ec05fc5d447cf09bad5d75f0", + "sha256:f36fe301fafb7470e86aaf90f036eef600a3210be4decf461a5b1ca8403d3cb2" ], "markers": "python_full_version >= '3.7.0'", - "version": "==3.0.39" + "version": "==3.0.41" }, "ptyprocess": { "hashes": [ @@ -702,11 +722,11 @@ }, "pycodestyle": { "hashes": [ - "sha256:259bcc17857d8a8b3b4a2327324b79e5f020a13c16074670f9c8c8f872ea76d0", - "sha256:5d1013ba8dc7895b548be5afb05740ca82454fd899971563d2ef625d090326f8" + "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f", + "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67" ], "markers": "python_version >= '3.8'", - "version": "==2.11.0" + "version": "==2.11.1" }, "pyflakes": { "hashes": [ @@ -718,23 +738,26 @@ }, "pygments": { "hashes": [ - "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692", - "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29" + "sha256:1b37f1b1e1bff2af52ecaf28cc601e2ef7077000b227a0675da25aef85784bc4", + "sha256:e45a0e74bf9c530f564ca81b8952343be986a29f6afe7f5ad95c5f06b7bdf5e8" ], "markers": "python_version >= '3.7'", - "version": "==2.16.1" + "version": "==2.17.1" }, "pytest": { "hashes": [ - "sha256:78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32", - "sha256:b4bf8c45bd59934ed84001ad51e11b4ee40d40a1229d2c79f9c592b0a3f6bd8a" + "sha256:0d009c083ea859a71b76adf7c1d502e4bc170b80a8ef002da5806527b9591fac", + "sha256:d989d136982de4e3b29dabcc838ad581c64e8ed52c11fbe86ddebd9da0818cd5" ], "index": "pypi", - "version": "==7.4.0" + "markers": "python_version >= '3.7'", + "version": "==7.4.3" }, "pyyaml": { "hashes": [ + "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5", "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc", + "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df", "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741", "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206", "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27", @@ -742,7 +765,10 @@ "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62", "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98", "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696", + "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290", + "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9", "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d", + "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6", "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867", "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47", "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486", @@ -750,9 +776,12 @@ "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3", "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007", "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938", + "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0", "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c", "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735", "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d", + "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28", + "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4", "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba", "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8", "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5", @@ -767,7 +796,9 @@ "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43", "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859", "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673", + "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54", "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a", + "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b", "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab", "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa", "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c", @@ -788,11 +819,11 @@ }, "rich": { "hashes": [ - "sha256:146a90b3b6b47cac4a73c12866a499e9817426423f57c5a66949c086191a8808", - "sha256:fb9d6c0a0f643c99eed3875b5377a184132ba9be4d61516a55273d3554d75a39" + "sha256:5cb5123b5cf9ee70584244246816e9114227e0b98ad9176eede6ad54bf5403fa", + "sha256:6da14c108c4866ee9520bbffa71f6fe3962e193b7da68720583850cd4548e235" ], "markers": "python_full_version >= '3.7.0'", - "version": "==13.5.2" + "version": "==13.7.0" }, "six": { "hashes": [ @@ -804,18 +835,18 @@ }, "smmap": { "hashes": [ - "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94", - "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936" + "sha256:dceeb6c0028fdb6734471eb07c0cd2aae706ccaecab45965ee83f11c8d3b1f62", + "sha256:e6d8668fa5f93e706934a62d7b4db19c8d9eb8cf2adbb75ef1b675aa332b69da" ], - "markers": "python_version >= '3.6'", - "version": "==5.0.0" + "markers": "python_version >= '3.7'", + "version": "==5.0.1" }, "stack-data": { "hashes": [ - "sha256:32d2dd0376772d01b6cb9fc996f3c8b57a357089dec328ed4b6553d037eaf815", - "sha256:cbb2a53eb64e5785878201a97ed7c7b94883f48b87bfb0bbe8b623c74679e4a8" + "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", + "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695" ], - "version": "==0.6.2" + "version": "==0.6.3" }, "stevedore": { "hashes": [ @@ -825,36 +856,44 @@ "markers": "python_version >= '3.8'", "version": "==5.1.0" }, + "tomli": { + "hashes": [ + "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", + "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" + ], + "markers": "python_version < '3.11'", + "version": "==2.0.1" + }, "traitlets": { "hashes": [ - "sha256:9e6ec080259b9a5940c797d58b613b5e31441c2257b87c2e795c5228ae80d2d8", - "sha256:f6cde21a9c68cf756af02035f72d5a723bf607e862e7be33ece505abf4a3bad9" + "sha256:9b232b9430c8f57288c1024b34a8f0251ddcc47268927367a0dd3eeaca40deb5", + "sha256:baf991e61542da48fe8aef8b779a9ea0aa38d8a54166ee250d5af5ecf4486619" ], - "markers": "python_version >= '3.7'", - "version": "==5.9.0" + "markers": "python_version >= '3.8'", + "version": "==5.13.0" }, "typing-extensions": { "hashes": [ - "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36", - "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2" + "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0", + "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef" ], - "markers": "python_version >= '3.7'", - "version": "==4.7.1" + "markers": "python_version >= '3.8'", + "version": "==4.8.0" }, "urllib3": { "hashes": [ - "sha256:8d36afa7616d8ab714608411b4a3b13e58f463aee519024578e062e141dce20f", - "sha256:8f135f6502756bde6b2a9b28989df5fbe87c9970cecaa69041edcce7f0589b14" + "sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84", + "sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e" ], "markers": "python_version >= '3.6'", - "version": "==1.26.16" + "version": "==2.0.7" }, "wcwidth": { "hashes": [ - "sha256:795b138f6875577cd91bba52baf9e445cd5118fd32723b460e30a0af30ea230e", - "sha256:a5220780a404dbe3353789870978e472cfe477761f06ee55077256e509b156d0" + "sha256:25eb3ecbec328cdb945f56f2a7cfe784bdf7a73a8197398c7a7c65e7fe93e9ae", + "sha256:c4b153acf29f1f0d7fb1b00d097cce82b73de7a2016321c8d7ca71bd76dd848b" ], - "version": "==0.2.6" + "version": "==0.2.11" } } } diff --git a/tests/conftest.py b/tests/conftest.py index 4507579..cd12279 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,8 +5,8 @@ import transmogrifier.models as timdex from transmogrifier.config import SOURCES, load_external_config -from transmogrifier.helpers import parse_xml_records from transmogrifier.sources.datacite import Datacite +from transmogrifier.sources.transformer import XmlTransformer @pytest.fixture(autouse=True) @@ -48,15 +48,17 @@ def runner(): @pytest.fixture() def datacite_records(): - return parse_xml_records("tests/fixtures/datacite/datacite_records.xml") + return XmlTransformer.parse_source_file( + "tests/fixtures/datacite/datacite_records.xml" + ) @pytest.fixture() def datacite_record_all_fields(): - input_records = parse_xml_records( + source_records = XmlTransformer.parse_source_file( "tests/fixtures/datacite/datacite_record_all_fields.xml" ) - return Datacite("cool-repo", input_records) + return Datacite("cool-repo", source_records) @pytest.fixture() @@ -71,7 +73,7 @@ def marc_content_type_crosswalk(): @pytest.fixture() def oai_pmh_records(): - return parse_xml_records("tests/fixtures/oai_pmh_records.xml") + return XmlTransformer.parse_source_file("tests/fixtures/oai_pmh_records.xml") @pytest.fixture() diff --git a/tests/test_config.py b/tests/test_config.py index 695e5fe..15256fe 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -6,10 +6,8 @@ from transmogrifier.config import ( configure_logger, configure_sentry, - get_transformer, load_external_config, ) -from transmogrifier.sources.datacite import Datacite def test_configure_logger_not_verbose(): @@ -44,25 +42,6 @@ def test_configure_sentry_env_variable_is_dsn(monkeypatch): assert result == "Sentry DSN found, exceptions will be sent to Sentry with env=test" -def test_get_transformer_returns_correct_class_name(): - assert get_transformer("jpal") == Datacite - - -def test_get_transformer_source_missing_class_name_raises_error(): - with pytest.raises(KeyError): - get_transformer("cool-repo") - - -def test_get_transformer_source_wrong_class_name_raises_error(bad_config): - with pytest.raises(AttributeError): - get_transformer("bad-class-name") - - -def test_get_transformer_source_wrong_module_path_raises_error(bad_config): - with pytest.raises(ImportError): - get_transformer("bad-module-path") - - def test_load_external_config_invalid_file_type_raises_error(): with pytest.raises(ValueError): load_external_config("config/loc-countries.xml", "zxr") diff --git a/tests/test_datacite.py b/tests/test_datacite.py index db20c2b..661ce5e 100644 --- a/tests/test_datacite.py +++ b/tests/test_datacite.py @@ -1,4 +1,3 @@ -from transmogrifier.helpers import parse_xml_records from transmogrifier.models import ( AlternateTitle, Contributor, @@ -170,10 +169,10 @@ def test_datacite_transform_with_all_fields_transforms_correctly( def test_datacite_transform_missing_required_datacite_fields_logs_warning(caplog): - input_records = parse_xml_records( + source_records = Datacite.parse_source_file( "tests/fixtures/datacite/datacite_record_missing_datacite_required_fields.xml" ) - output_records = Datacite("cool-repo", input_records) + output_records = Datacite("cool-repo", source_records) next(output_records) assert ( @@ -195,10 +194,10 @@ def test_datacite_transform_missing_required_datacite_fields_logs_warning(caplog def test_datacite_transform_with_optional_fields_blank_transforms_correctly(): - input_records = parse_xml_records( + source_records = Datacite.parse_source_file( "tests/fixtures/datacite/datacite_record_optional_fields_blank.xml" ) - output_records = Datacite("cool-repo", input_records) + output_records = Datacite("cool-repo", source_records) assert next(output_records) == TimdexRecord( citation=("Title not provided. https://example.com/doi:10.7910/DVN/19PPE7"), source="A Cool Repository", @@ -218,10 +217,10 @@ def test_datacite_transform_with_optional_fields_blank_transforms_correctly(): def test_datacite_transform_with_optional_fields_missing_transforms_correctly(): - input_records = parse_xml_records( + source_records = Datacite.parse_source_file( "tests/fixtures/datacite/datacite_record_optional_fields_missing.xml" ) - output_records = Datacite("cool-repo", input_records) + output_records = Datacite("cool-repo", source_records) assert next(output_records) == TimdexRecord( citation=("Title not provided. https://example.com/doi:10.7910/DVN/19PPE7"), source="A Cool Repository", @@ -241,10 +240,10 @@ def test_datacite_transform_with_optional_fields_missing_transforms_correctly(): def test_datacite_with_attribute_and_subfield_variations_transforms_correctly(): - input_records = parse_xml_records( + source_records = Datacite.parse_source_file( "tests/fixtures/datacite/datacite_record_attribute_and_subfield_variations.xml" ) - output_records = Datacite("cool-repo", input_records) + output_records = Datacite("cool-repo", source_records) assert next(output_records) == TimdexRecord( citation=( "Creator, No affiliation no identifier, Creator, Blank affiliation blank " diff --git a/tests/test_dspace_dim.py b/tests/test_dspace_dim.py index fcee6ae..b86c9df 100644 --- a/tests/test_dspace_dim.py +++ b/tests/test_dspace_dim.py @@ -1,13 +1,12 @@ import transmogrifier.models as timdex -from transmogrifier.helpers import parse_xml_records from transmogrifier.sources.dspace_dim import DspaceDim def test_dspace_dim_transform_with_all_fields_transforms_correctly(): - input_records = parse_xml_records( + source_records = DspaceDim.parse_source_file( "tests/fixtures/dspace/dspace_dim_record_all_fields.xml" ) - output_records = DspaceDim("cool-repo", input_records) + output_records = DspaceDim("cool-repo", source_records) assert next(output_records) == timdex.TimdexRecord( citation="Journal of Geophysical Research: Solid Earth 121 (2016): 5859–5879", source="A Cool Repository", @@ -133,10 +132,10 @@ def test_dspace_dim_transform_with_all_fields_transforms_correctly(): def test_dspace_dim_transform_with_attribute_variations_transforms_correctly(): - input_records = parse_xml_records( + source_records = DspaceDim.parse_source_file( "tests/fixtures/dspace/dspace_dim_record_attribute_variations.xml" ) - output_records = DspaceDim("cool-repo", input_records) + output_records = DspaceDim("cool-repo", source_records) assert next(output_records) == timdex.TimdexRecord( citation="Title with Blank Qualifier. https://example.com/1912/2641", source="A Cool Repository", @@ -188,10 +187,10 @@ def test_dspace_dim_transform_with_attribute_variations_transforms_correctly(): def test_dspace_dim_transform_with_optional_fields_blank_transforms_correctly(): - input_records = parse_xml_records( + source_records = DspaceDim.parse_source_file( "tests/fixtures/dspace/dspace_dim_record_optional_fields_blank.xml" ) - output_records = DspaceDim("cool-repo", input_records) + output_records = DspaceDim("cool-repo", source_records) assert next(output_records) == timdex.TimdexRecord( source="A Cool Repository", source_link="https://example.com/1912/2641", @@ -204,10 +203,10 @@ def test_dspace_dim_transform_with_optional_fields_blank_transforms_correctly(): def test_dspace_dim_transform_with_optional_fields_missing_transforms_correctly(): - input_records = parse_xml_records( + source_records = DspaceDim.parse_source_file( "tests/fixtures/dspace/dspace_dim_record_optional_fields_missing.xml" ) - output_records = DspaceDim("cool-repo", input_records) + output_records = DspaceDim("cool-repo", source_records) assert next(output_records) == timdex.TimdexRecord( source="A Cool Repository", source_link="https://example.com/1912/2641", diff --git a/tests/test_dspace_mets.py b/tests/test_dspace_mets.py index 57c4bf0..fb7b8bc 100644 --- a/tests/test_dspace_mets.py +++ b/tests/test_dspace_mets.py @@ -1,10 +1,9 @@ import transmogrifier.models as timdex -from transmogrifier.helpers import parse_xml_records from transmogrifier.sources.dspace_mets import DspaceMets def test_dspace_mets_transform_with_missing_optional_fields_transforms_correctly(): - dspace_xml_records = parse_xml_records( + dspace_xml_records = DspaceMets.parse_source_file( "tests/fixtures/dspace/dspace_mets_record_optional_fields_missing.xml" ) output_records = DspaceMets("dspace", dspace_xml_records) @@ -20,7 +19,7 @@ def test_dspace_mets_transform_with_missing_optional_fields_transforms_correctly def test_dspace_mets_transform_with_blank_optional_fields_transforms_correctly(): - dspace_xml_records = parse_xml_records( + dspace_xml_records = DspaceMets.parse_source_file( "tests/fixtures/dspace/dspace_mets_record_optional_fields_blank.xml" ) output_records = DspaceMets("dspace", dspace_xml_records) @@ -36,7 +35,7 @@ def test_dspace_mets_transform_with_blank_optional_fields_transforms_correctly() def test_dspace_mets_with_attribute_and_subfield_variations_transforms_correctly(): - dspace_xml_records = parse_xml_records( + dspace_xml_records = DspaceMets.parse_source_file( "tests/fixtures/dspace/dspace_mets_record_attribute_and_subfield_variations.xml" ) output_records = DspaceMets("dspace", dspace_xml_records) @@ -85,7 +84,7 @@ def test_dspace_mets_with_attribute_and_subfield_variations_transforms_correctly def test_dspace_mets_transform_with_all_fields_transforms_correctly(): - dspace_xml_records = parse_xml_records( + dspace_xml_records = DspaceMets.parse_source_file( "tests/fixtures/dspace/dspace_mets_record_all_fields.xml" ) output_records = DspaceMets("dspace", dspace_xml_records) diff --git a/tests/test_ead.py b/tests/test_ead.py index 4855a58..c7de237 100644 --- a/tests/test_ead.py +++ b/tests/test_ead.py @@ -1,12 +1,13 @@ import logging import transmogrifier.models as timdex -from transmogrifier.helpers import parse_xml_records from transmogrifier.sources.ead import Ead def test_ead_record_all_fields_transform_correctly(): - ead_xml_records = parse_xml_records("tests/fixtures/ead/ead_record_all_fields.xml") + ead_xml_records = Ead.parse_source_file( + "tests/fixtures/ead/ead_record_all_fields.xml" + ) output_records = Ead("aspace", ead_xml_records) assert next(output_records) == timdex.TimdexRecord( source="MIT ArchivesSpace", @@ -216,7 +217,7 @@ def test_ead_record_all_fields_transform_correctly(): def test_ead_record_with_missing_archdesc_logs_error(caplog): - ead_xml_records = parse_xml_records( + ead_xml_records = Ead.parse_source_file( "tests/fixtures/ead/ead_record_missing_archdesc.xml" ) output_records = Ead("aspace", ead_xml_records) @@ -230,7 +231,7 @@ def test_ead_record_with_missing_archdesc_logs_error(caplog): def test_ead_record_with_missing_archdesc_did_logs_error(caplog): - ead_xml_records = parse_xml_records( + ead_xml_records = Ead.parse_source_file( "tests/fixtures/ead/ead_record_missing_archdesc_did.xml" ) output_records = Ead("aspace", ead_xml_records) @@ -244,7 +245,7 @@ def test_ead_record_with_missing_archdesc_did_logs_error(caplog): def test_ead_record_with_attribute_and_subfield_variations_transforms_correctly(): - ead_xml_records = parse_xml_records( + ead_xml_records = Ead.parse_source_file( "tests/fixtures/ead/ead_record_attribute_and_subfield_variations.xml" ) output_records = Ead("aspace", ead_xml_records) @@ -470,7 +471,7 @@ def test_ead_record_with_attribute_and_subfield_variations_transforms_correctly( def test_ead_record_with_blank_optional_fields_transforms_correctly(): - ead_xml_records = parse_xml_records( + ead_xml_records = Ead.parse_source_file( "tests/fixtures/ead/ead_record_blank_optional_fields.xml" ) output_records = Ead("aspace", ead_xml_records) @@ -488,7 +489,7 @@ def test_ead_record_with_blank_optional_fields_transforms_correctly(): def test_ead_record_invalid_date_and_date_range_are_omitted(caplog): - ead_xml_records = parse_xml_records( + ead_xml_records = Ead.parse_source_file( "tests/fixtures/ead/ead_record_attribute_and_subfield_variations.xml" ) output_record = next(Ead("aspace", ead_xml_records)) @@ -507,7 +508,7 @@ def test_ead_record_invalid_date_and_date_range_are_omitted(caplog): def test_ead_record_correct_identifiers_from_multiple_unitid(caplog): - ead_xml_records = parse_xml_records( + ead_xml_records = Ead.parse_source_file( "tests/fixtures/ead/ead_record_attribute_and_subfield_variations.xml" ) output_record = next(Ead("aspace", ead_xml_records)) @@ -516,7 +517,7 @@ def test_ead_record_correct_identifiers_from_multiple_unitid(caplog): def test_ead_record_with_missing_optional_fields_transforms_correctly(): - ead_xml_records = parse_xml_records( + ead_xml_records = Ead.parse_source_file( "tests/fixtures/ead/ead_record_missing_optional_fields.xml" ) output_records = Ead("aspace", ead_xml_records) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index f64bc45..e1008f0 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -4,7 +4,6 @@ from transmogrifier.helpers import ( generate_citation, parse_date_from_string, - parse_xml_records, validate_date, validate_date_range, ) @@ -189,11 +188,6 @@ def test_generate_citation_with_all_fields(): ) -def test_parse_xml_records_returns_record_iterator(): - records = parse_xml_records("tests/fixtures/datacite/datacite_records.xml") - assert len(list(records)) == 38 - - def test_parse_date_from_string_success(): for date in [ "1930", diff --git a/tests/test_marc.py b/tests/test_marc.py index d5f2b61..2256dbc 100644 --- a/tests/test_marc.py +++ b/tests/test_marc.py @@ -3,12 +3,11 @@ from bs4 import BeautifulSoup import transmogrifier.models as timdex -from transmogrifier.helpers import parse_xml_records from transmogrifier.sources.marc import Marc def test_marc_record_all_fields_transform_correctly(): - marc_xml_records = parse_xml_records( + marc_xml_records = Marc.parse_source_file( "tests/fixtures/marc/marc_record_all_fields.xml" ) output_records = Marc("alma", marc_xml_records) @@ -489,7 +488,7 @@ def test_marc_record_all_fields_transform_correctly(): def test_marc_record_attribute_and_subfield_variations_transforms_correctly(): - marc_xml_records = parse_xml_records( + marc_xml_records = Marc.parse_source_file( "tests/fixtures/marc/marc_record_attribute_and_subfield_variations.xml", ) output_records = Marc("alma", marc_xml_records) @@ -706,7 +705,7 @@ def test_marc_record_attribute_and_subfield_variations_transforms_correctly(): def test_marc_record_with_blank_optional_fields_transforms_correctly(): - marc_xml_records = parse_xml_records( + marc_xml_records = Marc.parse_source_file( "tests/fixtures/marc/marc_record_blank_optional_fields.xml" ) output_records = Marc("alma", marc_xml_records) @@ -727,7 +726,7 @@ def test_marc_record_with_blank_optional_fields_transforms_correctly(): def test_marc_record_with_missing_optional_fields_transforms_correctly(): - marc_xml_records = parse_xml_records( + marc_xml_records = Marc.parse_source_file( "tests/fixtures/marc/marc_record_missing_optional_fields.xml" ) output_records = Marc("alma", marc_xml_records) @@ -746,7 +745,7 @@ def test_marc_record_with_missing_optional_fields_transforms_correctly(): def test_marc_record_missing_leader_logs_error(caplog): - marc_xml_records = parse_xml_records( + marc_xml_records = Marc.parse_source_file( "tests/fixtures/marc/marc_record_missing_leader.xml" ) output_records = Marc("alma", marc_xml_records) @@ -760,7 +759,7 @@ def test_marc_record_missing_leader_logs_error(caplog): def test_marc_record_missing_008_logs_error(caplog): - marc_xml_records = parse_xml_records( + marc_xml_records = Marc.parse_source_file( "tests/fixtures/marc/marc_record_missing_008.xml" ) output_records = Marc("alma", marc_xml_records) @@ -774,7 +773,7 @@ def test_marc_record_missing_008_logs_error(caplog): def test_create_subfield_value_list_from_datafield_with_values(): - marc_xml_records = parse_xml_records( + marc_xml_records = Marc.parse_source_file( "tests/fixtures/marc/marc_record_all_fields.xml" ) datafield = next(marc_xml_records).find_all("datafield", tag="130")[0] @@ -786,7 +785,7 @@ def test_create_subfield_value_list_from_datafield_with_values(): def test_create_subfield_value_list_from_datafield_with_blank_values(): - marc_xml_records = parse_xml_records( + marc_xml_records = Marc.parse_source_file( "tests/fixtures/marc/marc_record_blank_optional_fields.xml" ) datafield = next(marc_xml_records).find_all("datafield", tag="130")[0] @@ -794,7 +793,7 @@ def test_create_subfield_value_list_from_datafield_with_blank_values(): def test_create_subfield_value_string_from_datafield_with_values(): - marc_xml_records = parse_xml_records( + marc_xml_records = Marc.parse_source_file( "tests/fixtures/marc/marc_record_all_fields.xml" ) datafield = next(marc_xml_records).find_all("datafield", tag="130")[0] @@ -805,7 +804,7 @@ def test_create_subfield_value_string_from_datafield_with_values(): def test_create_subfield_value_string_from_datafield_with_blank_values(): - marc_xml_records = parse_xml_records( + marc_xml_records = Marc.parse_source_file( "tests/fixtures/marc/marc_record_blank_optional_fields.xml" ) datafield = next(marc_xml_records).find_all("datafield", tag="130")[0] @@ -894,7 +893,7 @@ def test_loc_crosswalk_code_to_name_returns_name(caplog, loc_country_crosswalk): def test_get_main_titles_record_with_title(): - marc_xml_records = parse_xml_records( + marc_xml_records = Marc.parse_source_file( "tests/fixtures/marc/marc_record_all_fields.xml" ) assert Marc.get_main_titles(next(marc_xml_records)) == [ @@ -903,31 +902,35 @@ def test_get_main_titles_record_with_title(): def test_get_main_titles_record_with_blank_title(): - marc_xml_records = parse_xml_records( + marc_xml_records = Marc.parse_source_file( "tests/fixtures/marc/marc_record_blank_optional_fields.xml" ) assert Marc.get_main_titles(next(marc_xml_records)) == [] def test_get_main_titles_record_without_title(): - marc_xml_records = parse_xml_records( + marc_xml_records = Marc.parse_source_file( "tests/fixtures/marc/marc_record_missing_optional_fields.xml" ) assert Marc.get_main_titles(next(marc_xml_records)) == [] def test_get_source_record_id(): - marc_xml_records = parse_xml_records( + marc_xml_records = Marc.parse_source_file( "tests/fixtures/marc/marc_record_all_fields.xml" ) assert Marc.get_source_record_id(next(marc_xml_records)) == "990027185640106761" def test_record_is_deleted_returns_true_if_deleted(): - deleted_record = parse_xml_records("tests/fixtures/marc/marc_record_deleted.xml") + deleted_record = Marc.parse_source_file( + "tests/fixtures/marc/marc_record_deleted.xml" + ) assert Marc.record_is_deleted(next(deleted_record)) is True def test_record_is_deleted_returns_false_if_not_deleted(): - marc_record = parse_xml_records("tests/fixtures/marc/marc_record_all_fields.xml") + marc_record = Marc.parse_source_file( + "tests/fixtures/marc/marc_record_all_fields.xml" + ) assert Marc.record_is_deleted(next(marc_record)) is False diff --git a/tests/test_oai_dc.py b/tests/test_oai_dc.py index a1b98a8..d3103f0 100644 --- a/tests/test_oai_dc.py +++ b/tests/test_oai_dc.py @@ -1,5 +1,4 @@ import transmogrifier.models as timdex -from transmogrifier.helpers import parse_xml_records from transmogrifier.sources.oaidc import OaiDc FIXTURES_PREFIX = "tests/fixtures/oai_dc" @@ -20,8 +19,10 @@ def test_oaidctransform_with_all_fields_transforms_correctly(): - input_records = parse_xml_records(f"{FIXTURES_PREFIX}/oaidc_record_all_fields.xml") - output_records = OaiDc("libguides", input_records) + source_records = OaiDc.parse_source_file( + f"{FIXTURES_PREFIX}/oaidc_record_all_fields.xml" + ) + output_records = OaiDc("libguides", source_records) assert next(output_records) == timdex.TimdexRecord( source="LibGuides", source_link="https://libguides.mit.edu/guides/175846", @@ -52,27 +53,27 @@ def test_oaidctransform_with_all_fields_transforms_correctly(): def test_oaidc_transform_with_optional_fields_blank_transforms_correctly(): - input_records = parse_xml_records( + source_records = OaiDc.parse_source_file( f"{FIXTURES_PREFIX}/oaidc_record_optional_fields_blank.xml" ) - output_records = OaiDc("libguides", input_records) + output_records = OaiDc("libguides", source_records) assert next(output_records) == BLANK_OR_MISSING_OPTIONAL_FIELDS_TIMDEX def test_oaidc_transform_with_optional_fields_missing_transforms_correctly(): - input_records = parse_xml_records( + source_records = OaiDc.parse_source_file( f"{FIXTURES_PREFIX}/oaidc_record_optional_fields_missing.xml" ) - output_records = OaiDc("libguides", input_records) + output_records = OaiDc("libguides", source_records) assert next(output_records) == BLANK_OR_MISSING_OPTIONAL_FIELDS_TIMDEX def test_oaidc_generic_date(): - input_records = parse_xml_records( + source_records = OaiDc.parse_source_file( f"{FIXTURES_PREFIX}/oaidc_record_valid_generic_date.xml" ) - transformer_instance = OaiDc("libguides", input_records) - xml = next(transformer_instance.input_records) + transformer_instance = OaiDc("libguides", source_records) + xml = next(transformer_instance.source_records) assert transformer_instance.get_dates("test_source_record_id", xml) == [ timdex.Date(kind="Unknown", note=None, range=None, value="2008-06-19T17:55:27") ] diff --git a/tests/test_springshare.py b/tests/test_springshare.py index ba1c63c..67a81e4 100644 --- a/tests/test_springshare.py +++ b/tests/test_springshare.py @@ -1,5 +1,4 @@ import transmogrifier.models as timdex -from transmogrifier.helpers import parse_xml_records from transmogrifier.sources.springshare import SpringshareOaiDc SPRINGSHARE_FIXTURES_PREFIX = "tests/fixtures/oai_dc/springshare" @@ -53,11 +52,11 @@ def test_springshare_get_dates_valid(): - input_records = parse_xml_records( + source_records = SpringshareOaiDc.parse_source_file( f"{SPRINGSHARE_FIXTURES_PREFIX}/springshare_valid_dates.xml" ) - transformer_instance = SpringshareOaiDc("libguides", input_records) - for xml in transformer_instance.input_records: + transformer_instance = SpringshareOaiDc("libguides", source_records) + for xml in transformer_instance.source_records: date_field_value = transformer_instance.get_dates("test_get_dates", xml) assert date_field_value == [ timdex.Date( @@ -67,32 +66,32 @@ def test_springshare_get_dates_valid(): def test_springshare_get_dates_invalid_logged_and_skipped(caplog): - input_records = parse_xml_records( + source_records = SpringshareOaiDc.parse_source_file( f"{SPRINGSHARE_FIXTURES_PREFIX}/springshare_invalid_dates.xml" ) - transformer_instance = SpringshareOaiDc("libguides", input_records) - for xml in transformer_instance.input_records: + transformer_instance = SpringshareOaiDc("libguides", source_records) + for xml in transformer_instance.source_records: date_field_value = transformer_instance.get_dates("test_get_dates", xml) assert date_field_value is None assert "has a date that cannot be parsed" in caplog.text def test_springshare_get_links_missing_identifier_logged_and_skipped(caplog): - input_records = parse_xml_records( + source_records = SpringshareOaiDc.parse_source_file( f"{SPRINGSHARE_FIXTURES_PREFIX}/springshare_record_missing_required_fields.xml" ) - transformer_instance = SpringshareOaiDc("libguides", input_records) - for xml in transformer_instance.input_records: + transformer_instance = SpringshareOaiDc("libguides", source_records) + for xml in transformer_instance.source_records: links_field_value = transformer_instance.get_links("test_get_links", xml) assert links_field_value is None assert "has links that cannot be generated" in caplog.text def test_libguide_transform_with_all_fields_transforms_correctly(): - input_records = parse_xml_records( + source_records = SpringshareOaiDc.parse_source_file( f"{LIBGUIDES_FIXTURES_PREFIX}/libguides_record_all_fields.xml" ) - output_records = SpringshareOaiDc("libguides", input_records) + output_records = SpringshareOaiDc("libguides", source_records) assert next(output_records) == timdex.TimdexRecord( source="LibGuides", source_link="https://libguides.mit.edu/materials", @@ -133,26 +132,26 @@ def test_libguide_transform_with_all_fields_transforms_correctly(): def test_libguides_transform_with_optional_fields_blank_transforms_correctly(): - input_records = parse_xml_records( + source_records = SpringshareOaiDc.parse_source_file( f"{LIBGUIDES_FIXTURES_PREFIX}/libguides_record_optional_fields_blank.xml" ) - output_records = SpringshareOaiDc("libguides", input_records) + output_records = SpringshareOaiDc("libguides", source_records) assert next(output_records) == LIBGUIDES_BLANK_OR_MISSING_OPTIONAL_FIELDS_TIMDEX def test_libguides_transform_with_optional_fields_missing_transforms_correctly(): - input_records = parse_xml_records( + source_records = SpringshareOaiDc.parse_source_file( f"{LIBGUIDES_FIXTURES_PREFIX}/libguides_record_optional_fields_missing.xml" ) - output_records = SpringshareOaiDc("libguides", input_records) + output_records = SpringshareOaiDc("libguides", source_records) assert next(output_records) == LIBGUIDES_BLANK_OR_MISSING_OPTIONAL_FIELDS_TIMDEX def test_research_databases_transform_with_all_fields_transforms_correctly(): - input_records = parse_xml_records( + source_records = SpringshareOaiDc.parse_source_file( f"{RESEARCHDATABASES_FIXTURES_PREFIX}/research_databases_record_all_fields.xml" ) - output_records = SpringshareOaiDc("researchdatabases", input_records) + output_records = SpringshareOaiDc("researchdatabases", source_records) assert next(output_records) == timdex.TimdexRecord( source="Research Databases", source_link="https://libguides.mit.edu/llba", @@ -189,11 +188,11 @@ def test_research_databases_transform_with_all_fields_transforms_correctly(): def test_research_databases_transform_with_optional_fields_blank_transforms_correctly(): - input_records = parse_xml_records( + source_records = SpringshareOaiDc.parse_source_file( RESEARCHDATABASES_FIXTURES_PREFIX + "/research_databases_record_optional_fields_blank.xml" ) - output_records = SpringshareOaiDc("researchdatabases", input_records) + output_records = SpringshareOaiDc("researchdatabases", source_records) assert ( next(output_records) == RESEARCHDATABASES_BLANK_OR_MISSING_OPTIONAL_FIELDS_TIMDEX @@ -201,11 +200,11 @@ def test_research_databases_transform_with_optional_fields_blank_transforms_corr def test_research_databases_transform_with_optional_fields_missing_transforms_correctly(): - input_records = parse_xml_records( + source_records = SpringshareOaiDc.parse_source_file( RESEARCHDATABASES_FIXTURES_PREFIX + "/research_databases_record_optional_fields_missing.xml" ) - output_records = SpringshareOaiDc("researchdatabases", input_records) + output_records = SpringshareOaiDc("researchdatabases", source_records) assert ( next(output_records) == RESEARCHDATABASES_BLANK_OR_MISSING_OPTIONAL_FIELDS_TIMDEX diff --git a/tests/test_transformer.py b/tests/test_transformer.py index 57a4985..87e7444 100644 --- a/tests/test_transformer.py +++ b/tests/test_transformer.py @@ -1,9 +1,10 @@ from unittest.mock import patch -from transmogrifier.helpers import parse_xml_records +import pytest + from transmogrifier.models import TimdexRecord from transmogrifier.sources.datacite import Datacite -from transmogrifier.sources.transformer import Transformer +from transmogrifier.sources.transformer import Transformer, XmlTransformer def test_transformer_initializes_with_expected_attributes(oai_pmh_records): @@ -11,25 +12,52 @@ def test_transformer_initializes_with_expected_attributes(oai_pmh_records): assert transformer.source == "cool-repo" assert transformer.source_base_url == "https://example.com/" assert transformer.source_name == "A Cool Repository" - assert transformer.input_records == oai_pmh_records + assert transformer.source_records == oai_pmh_records + + +def test_transformer_get_transformer_returns_correct_class_name(): + assert Transformer.get_transformer("jpal") == Datacite + + +def test_transformer_get_transformer_source_missing_class_name_raises_error(): + with pytest.raises(KeyError): + Transformer.get_transformer("cool-repo") + +def test_transformer_get_transformer_source_wrong_class_name_raises_error(bad_config): + with pytest.raises(AttributeError): + Transformer.get_transformer("bad-class-name") -def test_transformer_iterates_through_all_records(oai_pmh_records): - output_records = Transformer("cool-repo", oai_pmh_records) + +def test_transformer_get_transformer_source_wrong_module_path_raises_error(bad_config): + with pytest.raises(ImportError): + Transformer.get_transformer("bad-module-path") + + +def test_xmltransformer_initializes_with_expected_attributes(oai_pmh_records): + transformer = XmlTransformer("cool-repo", oai_pmh_records) + assert transformer.source == "cool-repo" + assert transformer.source_base_url == "https://example.com/" + assert transformer.source_name == "A Cool Repository" + assert transformer.source_records == oai_pmh_records + + +def test_xmltransformer_iterates_through_all_records(oai_pmh_records): + output_records = XmlTransformer("cool-repo", oai_pmh_records) assert len(list(output_records)) == 2 assert output_records.processed_record_count == 3 assert output_records.transformed_record_count == 2 assert len(output_records.deleted_records) == 1 -def test_transformer_iterates_successfully_if_get_optional_fields_returns_none( +def test_xmltransformer_iterates_successfully_if_get_optional_fields_returns_none( oai_pmh_records, ): with patch( - "transmogrifier.sources.transformer.Transformer.get_optional_fields" + "transmogrifier.sources.transformer.XmlTransformer.get_optional_fields" ) as m: m.return_value = None - output_records = Transformer("cool-repo", oai_pmh_records) + output_records = XmlTransformer("cool-repo", oai_pmh_records) assert len(list(output_records)) == 0 assert output_records.processed_record_count == 3 assert output_records.skipped_record_count == 2 @@ -37,13 +65,22 @@ def test_transformer_iterates_successfully_if_get_optional_fields_returns_none( assert len(output_records.deleted_records) == 1 -def test_transformer_record_is_deleted_returns_true_if_deleted(caplog): - input_records = parse_xml_records("tests/fixtures/record_deleted.xml") - assert Transformer.record_is_deleted(next(input_records)) is True +def test_xmltransformer_parse_source_file_returns_record_iterator(): + records = XmlTransformer.parse_source_file( + "tests/fixtures/datacite/datacite_records.xml" + ) + assert len(list(records)) == 38 + +def test_xmltransformer_record_is_deleted_returns_true_if_deleted(caplog): + source_records = XmlTransformer.parse_source_file( + "tests/fixtures/record_deleted.xml" + ) + assert XmlTransformer.record_is_deleted(next(source_records)) is True -def test_transformer_get_required_fields_returns_expected_values(oai_pmh_records): - transformer = Transformer("cool-repo", oai_pmh_records) + +def test_xmltransformer_get_required_fields_returns_expected_values(oai_pmh_records): + transformer = XmlTransformer("cool-repo", oai_pmh_records) assert transformer.get_required_fields(next(oai_pmh_records)) == { "source": "A Cool Repository", "source_link": "https://example.com/12345", @@ -52,8 +89,8 @@ def test_transformer_get_required_fields_returns_expected_values(oai_pmh_records } -def test_transformer_transform_returns_timdex_record(oai_pmh_records): - transformer = Transformer("cool-repo", oai_pmh_records) +def test_xmltransformer_transform_returns_timdex_record(oai_pmh_records): + transformer = XmlTransformer("cool-repo", oai_pmh_records) assert next(transformer) == TimdexRecord( source="A Cool Repository", source_link="https://example.com/12345", @@ -64,9 +101,11 @@ def test_transformer_transform_returns_timdex_record(oai_pmh_records): ) -def test_get_valid_title_with_title_field_blank_logs_warning(caplog): - input_records = parse_xml_records("tests/fixtures/record_title_field_blank.xml") - output_records = Datacite("cool-repo", input_records) +def test_xmltransformer_get_valid_title_with_title_field_blank_logs_warning(caplog): + source_records = XmlTransformer.parse_source_file( + "tests/fixtures/record_title_field_blank.xml" + ) + output_records = Datacite("cool-repo", source_records) assert next(output_records).title == "Title not provided" assert ( "Record doi:10.7910/DVN/19PPE7 was missing a title, source record should be " @@ -74,9 +113,11 @@ def test_get_valid_title_with_title_field_blank_logs_warning(caplog): ) -def test_get_valid_title_with_title_field_missing_logs_warning(caplog): - input_records = parse_xml_records("tests/fixtures/record_title_field_missing.xml") - output_records = Datacite("cool-repo", input_records) +def test_xmltransformer_get_valid_title_with_title_field_missing_logs_warning(caplog): + source_records = XmlTransformer.parse_source_file( + "tests/fixtures/record_title_field_missing.xml" + ) + output_records = Datacite("cool-repo", source_records) assert next(output_records).title == "Title not provided" assert ( "Record doi:10.7910/DVN/19PPE7 was missing a title, source record should be " @@ -84,9 +125,11 @@ def test_get_valid_title_with_title_field_missing_logs_warning(caplog): ) -def test_get_valid_title_with_title_field_multiple_logs_warning(caplog): - input_records = parse_xml_records("tests/fixtures/record_title_field_multiple.xml") - output_records = Datacite("cool-repo", input_records) +def test_xmltransformer_get_valid_title_with_title_field_multiple_logs_warning(caplog): + source_records = XmlTransformer.parse_source_file( + "tests/fixtures/record_title_field_multiple.xml" + ) + output_records = Datacite("cool-repo", source_records) assert ( next(output_records).title == "The Impact of Maternal Literacy and Participation Programs" diff --git a/tests/test_whoas.py b/tests/test_whoas.py index 2e036d1..29d0664 100644 --- a/tests/test_whoas.py +++ b/tests/test_whoas.py @@ -1,4 +1,3 @@ -from transmogrifier.helpers import parse_xml_records from transmogrifier.sources.whoas import Whoas @@ -32,11 +31,11 @@ def test_valid_content_types_with_all_valid(): def test_whoas_skips_records_with_only_invalid_or_not_present_content_types(): - input_records = list( - parse_xml_records( + source_records = list( + Whoas.parse_source_file( "tests/fixtures/dspace/whoas_records_with_valid_and_invalid_content_types.xml" ) ) - assert len(input_records) == 4 - output_records = Whoas("whoas", iter(input_records)) + assert len(source_records) == 4 + output_records = Whoas("whoas", iter(source_records)) assert len(list(output_records)) == 2 diff --git a/tests/test_zenodo.py b/tests/test_zenodo.py index 03dd3d1..5589e51 100644 --- a/tests/test_zenodo.py +++ b/tests/test_zenodo.py @@ -1,10 +1,11 @@ -from transmogrifier.helpers import parse_xml_records from transmogrifier.sources.zenodo import Zenodo def test_zenodo_create_source_record_id_generates_correct_id(): - input_records = parse_xml_records("tests/fixtures/datacite/zenodo_record.xml") - output_records = Zenodo("zenodo", input_records) + source_records = Zenodo.parse_source_file( + "tests/fixtures/datacite/zenodo_record.xml" + ) + output_records = Zenodo("zenodo", source_records) zenodo_record = next(output_records) assert zenodo_record.source_link == "https://zenodo.org/record/4291646" assert zenodo_record.timdex_record_id == "zenodo:4291646" @@ -39,12 +40,12 @@ def test_valid_content_types_with_all_valid(): def test_zenodo_skips_records_with_invalid_content_types(): - input_records = list( - parse_xml_records( + source_records = list( + Zenodo.parse_source_file( "tests/fixtures/datacite/" "zenodo_records_with_valid_and_invalid_content_types.xml" ) ) - assert len(input_records) == 2 - output_records = Zenodo("zenodo", iter(input_records)) + assert len(source_records) == 2 + output_records = Zenodo("zenodo", iter(source_records)) assert len(list(output_records)) == 1 diff --git a/transmogrifier/cli.py b/transmogrifier/cli.py index f9ea804..ec168f9 100644 --- a/transmogrifier/cli.py +++ b/transmogrifier/cli.py @@ -4,17 +4,9 @@ import click -from transmogrifier.config import ( - SOURCES, - configure_logger, - configure_sentry, - get_transformer, -) -from transmogrifier.helpers import ( - parse_xml_records, - write_deleted_records_to_file, - write_timdex_records_to_json, -) +from transmogrifier.config import SOURCES, configure_logger, configure_sentry +from transmogrifier.helpers import write_deleted_records_to_file +from transmogrifier.sources.transformer import Transformer logger = logging.getLogger(__name__) @@ -49,13 +41,11 @@ def main(source, input_file, output_file, verbose): logger.info(configure_sentry()) logger.info("Running transform for source %s", source) - input_records = parse_xml_records(input_file) - transformer_class = get_transformer(source) - transformer_instance = transformer_class(source, input_records) - write_timdex_records_to_json(transformer_instance, output_file) - if transformer_instance.processed_record_count == 0: + transformer = Transformer.load(source, input_file) + transformer.write_timdex_records_to_json(output_file) + if transformer.processed_record_count == 0: raise ValueError("No records processed from input file, needs investigation") - if deleted_records := transformer_instance.deleted_records: + if deleted_records := transformer.deleted_records: deleted_output_file = output_file.replace("index", "delete").replace( "json", "txt" ) @@ -67,10 +57,10 @@ def main(source, input_file, output_file, verbose): "skipped records: %d, " "deleted records: %d" ), - transformer_instance.processed_record_count, - transformer_instance.transformed_record_count, - transformer_instance.skipped_record_count, - len(transformer_instance.deleted_records), + transformer.processed_record_count, + transformer.transformed_record_count, + transformer.skipped_record_count, + len(transformer.deleted_records), ) elapsed_time = perf_counter() - START_TIME logger.info( diff --git a/transmogrifier/config.py b/transmogrifier/config.py index 94fc58c..ea96c16 100644 --- a/transmogrifier/config.py +++ b/transmogrifier/config.py @@ -2,7 +2,6 @@ import json import logging import os -from importlib import import_module from typing import Literal, Union import sentry_sdk @@ -151,17 +150,6 @@ def configure_sentry() -> str: return "No Sentry DSN found, exceptions will not be sent to Sentry" -def get_transformer(source: str) -> type: - """ - Return configured transformer class for a source. - - Source must be configured with a valid transform class path. - """ - module_name, class_name = SOURCES[source]["transform-class"].rsplit(".", 1) - source_module = import_module(module_name) - return getattr(source_module, class_name) - - def load_external_config( file_path: str, file_type: Literal["json", "xml"] ) -> Union[dict, BeautifulSoup]: diff --git a/transmogrifier/helpers.py b/transmogrifier/helpers.py index 7a4fdc3..c749730 100644 --- a/transmogrifier/helpers.py +++ b/transmogrifier/helpers.py @@ -1,24 +1,10 @@ -import json import logging -import os from datetime import datetime -from typing import TYPE_CHECKING, Iterator, Optional +from typing import Optional -from attrs import asdict -from bs4 import BeautifulSoup, Tag - -# Note: the lxml module in defusedxml is deprecated, so we have to use the -# regular lxml library. Transmogrifier only parses data from known sources so this -# should not be a security issue. -from lxml import etree # nosec B410 from smart_open import open from transmogrifier.config import DATE_FORMATS -from transmogrifier.models import TimdexRecord - -# import Transformer only when type checking to avoid circular dependency -if TYPE_CHECKING: # pragma: no cover - from transmogrifier.sources.transformer import Transformer logger = logging.getLogger(__name__) @@ -73,22 +59,6 @@ def generate_citation(extracted_data: dict) -> str: return citation -def parse_xml_records( - input_file_path: str, -) -> Iterator[Tag]: - with open(input_file_path, "rb") as file: - for _, element in etree.iterparse( - file, - tag="{*}record", - encoding="utf-8", - recover=True, - ): - record_string = etree.tostring(element, encoding="utf-8") - record = BeautifulSoup(record_string, "xml") - yield record - element.clear() - - def parse_date_from_string( date_string: str, ) -> Optional[datetime]: @@ -176,37 +146,6 @@ def write_deleted_records_to_file(deleted_records: list[str], output_file_path: file.write(f"{record_id}\n") -def write_timdex_records_to_json( - transformer_instance: "Transformer", output_file_path: str -) -> int: - count = 0 - try: - record: TimdexRecord = next(transformer_instance) - except StopIteration: - return count - with open(output_file_path, "w") as file: - file.write("[\n") - while record: - file.write( - json.dumps( - asdict(record, filter=lambda attr, value: value is not None), - indent=2, - ) - ) - count += 1 - if count % int(os.getenv("STATUS_UPDATE_INTERVAL", 1000)) == 0: - logger.info( - "Status update: %s records written to output file so far!", count - ) - try: - record: TimdexRecord = next(transformer_instance) # type: ignore[no-redef] # noqa: E501 - except StopIteration: - break - file.write(",\n") - file.write("\n]") - return count - - class DeletedRecord(Exception): """Exception raised for records with a deleted status. diff --git a/transmogrifier/sources/datacite.py b/transmogrifier/sources/datacite.py index d7e2f94..1aee2aa 100644 --- a/transmogrifier/sources/datacite.py +++ b/transmogrifier/sources/datacite.py @@ -5,12 +5,12 @@ import transmogrifier.models as timdex from transmogrifier.helpers import validate_date, validate_date_range -from transmogrifier.sources.transformer import Transformer +from transmogrifier.sources.transformer import XmlTransformer logger = logging.getLogger(__name__) -class Datacite(Transformer): +class Datacite(XmlTransformer): """Datacite transformer.""" def get_optional_fields(self, xml: Tag) -> Optional[dict]: diff --git a/transmogrifier/sources/dspace_dim.py b/transmogrifier/sources/dspace_dim.py index 3c3582c..cee4229 100644 --- a/transmogrifier/sources/dspace_dim.py +++ b/transmogrifier/sources/dspace_dim.py @@ -5,12 +5,12 @@ import transmogrifier.models as timdex from transmogrifier.helpers import validate_date, validate_date_range -from transmogrifier.sources.transformer import Transformer +from transmogrifier.sources.transformer import XmlTransformer logger = logging.getLogger(__name__) -class DspaceDim(Transformer): +class DspaceDim(XmlTransformer): """DSpace DIM transformer.""" def get_optional_fields(self, xml: Tag) -> Optional[dict]: diff --git a/transmogrifier/sources/dspace_mets.py b/transmogrifier/sources/dspace_mets.py index ce4cb64..77c9cb8 100644 --- a/transmogrifier/sources/dspace_mets.py +++ b/transmogrifier/sources/dspace_mets.py @@ -5,12 +5,12 @@ import transmogrifier.models as timdex from transmogrifier.helpers import validate_date -from transmogrifier.sources.transformer import Transformer +from transmogrifier.sources.transformer import XmlTransformer logger = logging.getLogger(__name__) -class DspaceMets(Transformer): +class DspaceMets(XmlTransformer): """DSpace METS transformer.""" def get_optional_fields(self, xml: Tag) -> dict: diff --git a/transmogrifier/sources/ead.py b/transmogrifier/sources/ead.py index d82b1dc..82cdbad 100644 --- a/transmogrifier/sources/ead.py +++ b/transmogrifier/sources/ead.py @@ -6,7 +6,7 @@ import transmogrifier.models as timdex from transmogrifier.config import load_external_config from transmogrifier.helpers import validate_date, validate_date_range -from transmogrifier.sources.transformer import Transformer +from transmogrifier.sources.transformer import XmlTransformer logger = logging.getLogger(__name__) @@ -16,7 +16,7 @@ ) -class Ead(Transformer): +class Ead(XmlTransformer): """EAD transformer.""" def get_optional_fields(self, xml: Tag) -> Optional[dict]: diff --git a/transmogrifier/sources/marc.py b/transmogrifier/sources/marc.py index 95240bf..0077cc4 100644 --- a/transmogrifier/sources/marc.py +++ b/transmogrifier/sources/marc.py @@ -6,7 +6,7 @@ import transmogrifier.models as timdex from transmogrifier.config import load_external_config from transmogrifier.helpers import validate_date -from transmogrifier.sources.transformer import Transformer +from transmogrifier.sources.transformer import XmlTransformer logger = logging.getLogger(__name__) @@ -30,7 +30,7 @@ ) -class Marc(Transformer): +class Marc(XmlTransformer): """Marc transformer.""" def get_optional_fields(self, xml: Tag) -> Optional[dict]: diff --git a/transmogrifier/sources/oaidc.py b/transmogrifier/sources/oaidc.py index 6453e58..5645e17 100644 --- a/transmogrifier/sources/oaidc.py +++ b/transmogrifier/sources/oaidc.py @@ -5,12 +5,12 @@ import transmogrifier.models as timdex from transmogrifier.helpers import validate_date -from transmogrifier.sources.transformer import Transformer +from transmogrifier.sources.transformer import XmlTransformer logger = logging.getLogger(__name__) -class OaiDc(Transformer): +class OaiDc(XmlTransformer): """ Generic OAI DC transformer. diff --git a/transmogrifier/sources/transformer.py b/transmogrifier/sources/transformer.py index 4072a16..1fc0c5d 100644 --- a/transmogrifier/sources/transformer.py +++ b/transmogrifier/sources/transformer.py @@ -1,9 +1,20 @@ """Transformer module.""" +from __future__ import annotations + +import json import logging +import os from abc import ABCMeta, abstractmethod -from typing import Iterator, Optional, final +from importlib import import_module +from typing import Iterator, Optional, TypeAlias, final + +from attrs import asdict +from bs4 import BeautifulSoup, Tag -from bs4 import Tag +# Note: the lxml module in defusedxml is deprecated, so we have to use the +# regular lxml library. Transmogrifier only parses data from known sources so this +# should not be a security issue. +from lxml import etree # nosec B410 from transmogrifier.config import SOURCES from transmogrifier.helpers import DeletedRecord, generate_citation @@ -11,40 +22,45 @@ logger = logging.getLogger(__name__) +JSON: TypeAlias = dict[str, "JSON"] | list["JSON"] | str | int | float | bool | None + class Transformer(object): """Base transformer class.""" __metaclass__ = ABCMeta - def __init__(self, source: str, input_records: Iterator[Tag]) -> None: + @final + def __init__(self, source: str, source_records: Iterator[JSON | Tag]) -> None: """ Initialize Transformer instance. Args: - source: Source repository short label. Must match a source key from - config.SOURCES + source: Source repository label. Must match a source key from config.SOURCES. + source_records: A set of source records to be processed. """ - self.source = source - self.source_base_url = SOURCES[source]["base-url"] + self.source: str = source + self.source_base_url: str = SOURCES[source]["base-url"] self.source_name = SOURCES[source]["name"] - self.input_records = input_records - self.processed_record_count = 0 - self.transformed_record_count = 0 - self.skipped_record_count = 0 + self.source_records: Iterator[JSON | Tag] = source_records + self.processed_record_count: int = 0 + self.transformed_record_count: int = 0 + self.skipped_record_count: int = 0 self.deleted_records: list[str] = [] + @final def __iter__(self) -> Iterator[TimdexRecord]: """Iterate over transformed records.""" return self + @final def __next__(self) -> TimdexRecord: """Return next transformed record.""" while True: - xml = next(self.input_records) + source_record = next(self.source_records) self.processed_record_count += 1 try: - record = self.transform(xml) + record = self.transform(source_record) except DeletedRecord as error: self.deleted_records.append(error.timdex_record_id) continue @@ -55,73 +71,308 @@ def __next__(self) -> TimdexRecord: self.skipped_record_count += 1 continue + @final + def write_timdex_records_to_json(self, output_file: str) -> int: + """ + Write TIMDEX records to JSON file. + + Args: + output_file: The JSON file used for writing TIMDEX records. + + """ + count = 0 + try: + record: TimdexRecord = next(self) + except StopIteration: + return count + with open(output_file, "w") as file: + file.write("[\n") + while record: + file.write( + json.dumps( + asdict(record, filter=lambda attr, value: value is not None), + indent=2, + ) + ) + count += 1 + if count % int(os.getenv("STATUS_UPDATE_INTERVAL", 1000)) == 0: + logger.info( + "Status update: %s records written to output file so far!", + count, + ) + try: + record: TimdexRecord = next(self) # type: ignore[no-redef] # noqa: E501 + except StopIteration: + break + file.write(",\n") + file.write("\n]") + return count + + @final + @classmethod + def load(cls, source: str, source_file: str) -> Transformer: + """ + Instantiate specified transformer class and populate with source records. + + Args: + source: Source repository label. Must match a source key from config.SOURCES. + source_file: A file containing source records to be transformed. + """ + transformer_class = cls.get_transformer(source) + source_records = transformer_class.parse_source_file(source_file) + transformer = transformer_class(source, source_records) + return transformer + + @final + @classmethod + def get_transformer(cls, source: str) -> type[Transformer]: + """ + Return configured transformer class for a source. + + Source must be configured with a valid transform class path. + + Args: + source: Source repository label. Must match a source key from config.SOURCES. + + """ + module_name, class_name = SOURCES[source]["transform-class"].rsplit(".", 1) + source_module = import_module(module_name) + return getattr(source_module, class_name) + + @classmethod @abstractmethod - def get_optional_fields(self, xml: Tag) -> Optional[dict]: + def parse_source_file(cls, source_file: str) -> Iterator[JSON | Tag]: + """ + Parse source file and return source records via an iterator. + + Must be overridden by format subclasses. + + Args: + source_file: A file containing source records to be transformed. + """ + pass + + @abstractmethod + def get_optional_fields(self, source_record: JSON | Tag) -> Optional[dict]: + """ + Retrieve optional TIMDEX fields from a source record. + + Must be overridden by source subclasses. + + Args: + source_record: A single source record. + """ + return {} + + @classmethod + @abstractmethod + def get_main_titles(cls, source_record: JSON | Tag) -> list[Tag | str]: + """ + Retrieve main title(s) from an source record. + + Must be overridden by source subclasses. + + Args: + source_record: A single source record. + """ + return [] + + @classmethod + @abstractmethod + def get_source_record_id(cls, source_record: JSON | Tag) -> str: + """ + Get or generate a source record ID from a source record. + + Must be overridden by source subclasses. + + Args: + source_record: A single source record. + """ + return "" + + @classmethod + @abstractmethod + def record_is_deleted(cls, source_record: JSON | Tag) -> bool: + """ + Determine whether record has a status of deleted. + + Must be overridden by source subclasses. + + Args: + source_record: A single source record. + """ + return False + + @abstractmethod + def get_required_fields(self, source_record: JSON | Tag) -> dict: + """ + Get required TIMDEX fields from a source record. + + Must be overridden by format subclasses. + + Args: + source_record: A single source record. + """ + return {} + + @abstractmethod + def transform(self, source_record: JSON | Tag) -> Optional[TimdexRecord]: + """ + Transform a source record into a TIMDEX record. + + Must be overridden by format subclasses. + + Args: + source_record: A single source record. + """ + return None + + @classmethod + @abstractmethod + def get_valid_title(cls, source_record_id: str, source_record: JSON | Tag) -> str: + """ + Retrieves main title(s) from a source record and returns a valid title string. + + Must be overridden by source subclasses. + + Args: + source_record_id: Record identifier for the source record. + source_record: A single source record. + """ + return "" + + @classmethod + @abstractmethod + def get_source_link( + cls, source_base_url: str, source_record_id: str, source_record: JSON | Tag + ) -> str: + """ + Class method to set the source link for the item. + + Must be overridden by source subclasses. + + Args: + source_base_url: Source base URL. + source_record_id: Record identifier for the source record. + source_record: A single source record. + """ + return "" + + @classmethod + @abstractmethod + def get_timdex_record_id( + cls, source: str, source_record_id: str, source_record: Tag + ) -> str: + """ + Class method to set the TIMDEX record id. + + Must be overridden by source subclasses. + + Args: + source: Source name. + source_record_id: Record identifier for the source record. + source_record: A single source record. + """ + return "" + + +class XmlTransformer(Transformer): + """Base transformer class.""" + + @final + @classmethod + def parse_source_file(cls, source_file: str) -> Iterator[Tag]: + """ + Parse XML file and return source records as bs4 Tags via an iterator. + + May not be overridden. + + Args: + source_file: A file containing source records to be transformed. + """ + with open(source_file, "rb") as file: + for _, element in etree.iterparse( + file, + tag="{*}record", + encoding="utf-8", + recover=True, + ): + record_string = etree.tostring(element, encoding="utf-8") + record = BeautifulSoup(record_string, "xml") + yield record + element.clear() + + @abstractmethod + def get_optional_fields(self, source_record: Tag) -> Optional[dict]: """ Retrieve optional TIMDEX fields from an XML record. Must be overridden by source subclasses. Args: - xml: A BeautifulSoup Tag representing a single XML record + source_record: A BeautifulSoup Tag representing a single XML record """ return {} @classmethod @abstractmethod - def get_main_titles(cls, xml: Tag) -> list[Tag]: + def get_main_titles(cls, source_record: Tag) -> list[Tag]: """ Retrieve main title(s) from an XML record. Must be overridden by source subclasses. Args: - xml: A BeautifulSoup Tag representing a single XML record + source_record: A BeautifulSoup Tag representing a single XML record """ return [] @classmethod - def get_source_record_id(cls, xml: Tag) -> str: + def get_source_record_id(cls, source_record: Tag) -> str: """ Get or generate a source record ID from an XML record. May be overridden by source subclasses if needed. Args: - xml: A BeautifulSoup Tag representing a single XML record + source_record: A BeautifulSoup Tag representing a single XML record """ - return str(xml.header.find("identifier").string) + return str(source_record.header.find("identifier").string) @classmethod - def record_is_deleted(cls, xml: Tag) -> bool: + def record_is_deleted(cls, source_record: Tag) -> bool: """ Determine whether record has a status of deleted. May be overridden by source subclasses if needed. Args: - xml: A BeautifulSoup Tag representing a single XML record + source_record: A BeautifulSoup Tag representing a single XML record """ - if xml.find("header", status="deleted"): + if source_record.find("header", status="deleted"): return True return False @final - def get_required_fields(self, xml: Tag) -> dict: + def get_required_fields(self, source_record: Tag) -> dict: """ Get required TIMDEX fields from an XML record. May not be overridden. Args: - xml: A BeautifulSoup Tag representing a single OAI-PMH XML record. + source_record: A BeautifulSoup Tag representing a single OAI-PMH XML record. """ - source_record_id = self.get_source_record_id(xml) + source_record_id = self.get_source_record_id(source_record) # run methods to generate required fields - source_link = self.get_source_link(self.source_base_url, source_record_id, xml) - timdex_record_id = self.get_timdex_record_id(self.source, source_record_id, xml) - title = self.get_valid_title(source_record_id, xml) + source_link = self.get_source_link( + self.source_base_url, source_record_id, source_record + ) + timdex_record_id = self.get_timdex_record_id( + self.source, source_record_id, source_record + ) + title = self.get_valid_title(source_record_id, source_record) return { "source": self.source_name, @@ -131,25 +382,25 @@ def get_required_fields(self, xml: Tag) -> dict: } @final - def transform(self, xml: Tag) -> Optional[TimdexRecord]: + def transform(self, source_record: Tag) -> Optional[TimdexRecord]: """ - Transform an OAI-PMH XML record into a TIMDEX record. + Transform an XML record into a TIMDEX record. May not be overridden. Args: - xml: A BeautifulSoup Tag representing a single OAI-PMH XML record. + source_record: A BeautifulSoup Tag representing a single XML record. """ - if self.record_is_deleted(xml): - source_record_id = self.get_source_record_id(xml) + if self.record_is_deleted(source_record): + source_record_id = self.get_source_record_id(source_record) timdex_record_id = f"{self.source}:{source_record_id.replace('/', '-')}" raise DeletedRecord(timdex_record_id) - optional_fields = self.get_optional_fields(xml) + optional_fields = self.get_optional_fields(source_record) if optional_fields is None: return None else: fields = { - **self.get_required_fields(xml), + **self.get_required_fields(source_record), **optional_fields, } @@ -163,7 +414,7 @@ def transform(self, xml: Tag) -> Optional[TimdexRecord]: @final @classmethod - def get_valid_title(cls, source_record_id: str, xml: Tag) -> str: + def get_valid_title(cls, source_record_id: str, source_record: Tag) -> str: """ Retrieves main title(s) from an XML record and returns a valid title string. @@ -175,9 +426,9 @@ def get_valid_title(cls, source_record_id: str, xml: Tag) -> str: Args: source_record_id: Record identifier for the source record. - xml: A BeautifulSoup Tag representing a single XML record. + source_record: A BeautifulSoup Tag representing a single XML record. """ - all_titles = cls.get_main_titles(xml) + all_titles = cls.get_main_titles(source_record) if len(all_titles) > 1: logger.warning( "Record %s has multiple titles. Using the first title from the " @@ -199,7 +450,7 @@ def get_valid_title(cls, source_record_id: str, xml: Tag) -> str: @classmethod def get_source_link( - cls, source_base_url: str, source_record_id: str, xml: Tag + cls, source_base_url: str, source_record_id: str, source_record: Tag ) -> str: """ Class method to set the source link for the item. @@ -211,14 +462,16 @@ def get_source_link( Args: source_base_url: Source base URL. source_record_id: Record identifier for the source record. - xml: A BeautifulSoup Tag representing a single XML record. + source_record: A BeautifulSoup Tag representing a single XML record. - not used by default implementation, but could be useful for subclass overrides """ return source_base_url + source_record_id @classmethod - def get_timdex_record_id(cls, source: str, source_record_id: str, xml: Tag) -> str: + def get_timdex_record_id( + cls, source: str, source_record_id: str, source_record: Tag + ) -> str: """ Class method to set the TIMDEX record id. @@ -229,7 +482,7 @@ def get_timdex_record_id(cls, source: str, source_record_id: str, xml: Tag) -> s Args: source: Source name. source_record_id: Record identifier for the source record. - xml: A BeautifulSoup Tag representing a single XML record. + source_record: A BeautifulSoup Tag representing a single XML record. - not used by default implementation, but could be useful for subclass overrides """