diff --git a/Pipfile.lock b/Pipfile.lock index 45fb7b8..e51e3a6 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -26,19 +26,19 @@ }, "boto3": { "hashes": [ - "sha256:0d382baac02ba4ead82230f34ba377fbf5f6481321dca911e6664b752d79b682", - "sha256:eb5d84c2127ffddf8e7f4dd6f9084f86cb18dca8416fb5d6bea278298cf8d84c" + "sha256:a33585ef0d811ee0dffd92a96108344997a3059262c57349be0761d7885f6ae7", + "sha256:cbfabd99c113bbb1708c2892e864b6dd739593b97a76fbb2e090a7d965b63b82" ], "index": "pypi", - "version": "==1.34.46" + "version": "==1.34.72" }, "botocore": { "hashes": [ - "sha256:21a6c391c6b4869aed66bc888b8e6d54581b343514cfe97dbe71ede12026c3cc", - "sha256:f54330ba1e8ce31489a4e09b4ba8afbf84be01bbc48dbb31d44897fb7657f7ad" + "sha256:342edb6f91d5839e790411822fc39f9c712c87cdaa7f3b1999f50b1ca16c4a14", + "sha256:a6b92735a73c19a7e540d77320420da3af3f32c91fa661c738c0b8c9f912d782" ], "markers": "python_version >= '3.8'", - "version": "==1.34.46" + "version": "==1.34.72" }, "certifi": { "hashes": [ @@ -212,41 +212,41 @@ }, "cryptography": { "hashes": [ - "sha256:01911714117642a3f1792c7f376db572aadadbafcd8d75bb527166009c9f1d1b", - "sha256:0e89f7b84f421c56e7ff69f11c441ebda73b8a8e6488d322ef71746224c20fce", - "sha256:12d341bd42cdb7d4937b0cabbdf2a94f949413ac4504904d0cdbdce4a22cbf88", - "sha256:15a1fb843c48b4a604663fa30af60818cd28f895572386e5f9b8a665874c26e7", - "sha256:1cdcdbd117681c88d717437ada72bdd5be9de117f96e3f4d50dab3f59fd9ab20", - "sha256:1df6fcbf60560d2113b5ed90f072dc0b108d64750d4cbd46a21ec882c7aefce9", - "sha256:3c6048f217533d89f2f8f4f0fe3044bf0b2090453b7b73d0b77db47b80af8dff", - "sha256:3e970a2119507d0b104f0a8e281521ad28fc26f2820687b3436b8c9a5fcf20d1", - "sha256:44a64043f743485925d3bcac548d05df0f9bb445c5fcca6681889c7c3ab12764", - "sha256:4e36685cb634af55e0677d435d425043967ac2f3790ec652b2b88ad03b85c27b", - "sha256:5f8907fcf57392cd917892ae83708761c6ff3c37a8e835d7246ff0ad251d9298", - "sha256:69b22ab6506a3fe483d67d1ed878e1602bdd5912a134e6202c1ec672233241c1", - "sha256:6bfadd884e7280df24d26f2186e4e07556a05d37393b0f220a840b083dc6a824", - "sha256:6d0fbe73728c44ca3a241eff9aefe6496ab2656d6e7a4ea2459865f2e8613257", - "sha256:6ffb03d419edcab93b4b19c22ee80c007fb2d708429cecebf1dd3258956a563a", - "sha256:810bcf151caefc03e51a3d61e53335cd5c7316c0a105cc695f0959f2c638b129", - "sha256:831a4b37accef30cccd34fcb916a5d7b5be3cbbe27268a02832c3e450aea39cb", - "sha256:887623fe0d70f48ab3f5e4dbf234986b1329a64c066d719432d0698522749929", - "sha256:a0298bdc6e98ca21382afe914c642620370ce0470a01e1bef6dd9b5354c36854", - "sha256:a1327f280c824ff7885bdeef8578f74690e9079267c1c8bd7dc5cc5aa065ae52", - "sha256:c1f25b252d2c87088abc8bbc4f1ecbf7c919e05508a7e8628e6875c40bc70923", - "sha256:c3a5cbc620e1e17009f30dd34cb0d85c987afd21c41a74352d1719be33380885", - "sha256:ce8613beaffc7c14f091497346ef117c1798c202b01153a8cc7b8e2ebaaf41c0", - "sha256:d2a27aca5597c8a71abbe10209184e1a8e91c1fd470b5070a2ea60cafec35bcd", - "sha256:dad9c385ba8ee025bb0d856714f71d7840020fe176ae0229de618f14dae7a6e2", - "sha256:db4b65b02f59035037fde0998974d84244a64c3265bdef32a827ab9b63d61b18", - "sha256:e09469a2cec88fb7b078e16d4adec594414397e8879a4341c6ace96013463d5b", - "sha256:e53dc41cda40b248ebc40b83b31516487f7db95ab8ceac1f042626bc43a2f992", - "sha256:f1e85a178384bf19e36779d91ff35c7617c885da487d689b05c1366f9933ad74", - "sha256:f47be41843200f7faec0683ad751e5ef11b9a56a220d57f300376cd8aba81660", - "sha256:fb0cef872d8193e487fc6bdb08559c3aa41b659a7d9be48b2e10747f47863925", - "sha256:ffc73996c4fca3d2b6c1c8c12bfd3ad00def8621da24f547626bf06441400449" + "sha256:0270572b8bd2c833c3981724b8ee9747b3ec96f699a9665470018594301439ee", + "sha256:111a0d8553afcf8eb02a4fea6ca4f59d48ddb34497aa8706a6cf536f1a5ec576", + "sha256:16a48c23a62a2f4a285699dba2e4ff2d1cff3115b9df052cdd976a18856d8e3d", + "sha256:1b95b98b0d2af784078fa69f637135e3c317091b615cd0905f8b8a087e86fa30", + "sha256:1f71c10d1e88467126f0efd484bd44bca5e14c664ec2ede64c32f20875c0d413", + "sha256:2424ff4c4ac7f6b8177b53c17ed5d8fa74ae5955656867f5a8affaca36a27abb", + "sha256:2bce03af1ce5a5567ab89bd90d11e7bbdff56b8af3acbbec1faded8f44cb06da", + "sha256:329906dcc7b20ff3cad13c069a78124ed8247adcac44b10bea1130e36caae0b4", + "sha256:37dd623507659e08be98eec89323469e8c7b4c1407c85112634ae3dbdb926fdd", + "sha256:3eaafe47ec0d0ffcc9349e1708be2aaea4c6dd4978d76bf6eb0cb2c13636c6fc", + "sha256:5e6275c09d2badf57aea3afa80d975444f4be8d3bc58f7f80d2a484c6f9485c8", + "sha256:6fe07eec95dfd477eb9530aef5bead34fec819b3aaf6c5bd6d20565da607bfe1", + "sha256:7367d7b2eca6513681127ebad53b2582911d1736dc2ffc19f2c3ae49997496bc", + "sha256:7cde5f38e614f55e28d831754e8a3bacf9ace5d1566235e39d91b35502d6936e", + "sha256:9481ffe3cf013b71b2428b905c4f7a9a4f76ec03065b05ff499bb5682a8d9ad8", + "sha256:98d8dc6d012b82287f2c3d26ce1d2dd130ec200c8679b6213b3c73c08b2b7940", + "sha256:a011a644f6d7d03736214d38832e030d8268bcff4a41f728e6030325fea3e400", + "sha256:a2913c5375154b6ef2e91c10b5720ea6e21007412f6437504ffea2109b5a33d7", + "sha256:a30596bae9403a342c978fb47d9b0ee277699fa53bbafad14706af51fe543d16", + "sha256:b03c2ae5d2f0fc05f9a2c0c997e1bc18c8229f392234e8a0194f202169ccd278", + "sha256:b6cd2203306b63e41acdf39aa93b86fb566049aeb6dc489b70e34bcd07adca74", + "sha256:b7ffe927ee6531c78f81aa17e684e2ff617daeba7f189f911065b2ea2d526dec", + "sha256:b8cac287fafc4ad485b8a9b67d0ee80c66bf3574f655d3b97ef2e1082360faf1", + "sha256:ba334e6e4b1d92442b75ddacc615c5476d4ad55cc29b15d590cc6b86efa487e2", + "sha256:ba3e4a42397c25b7ff88cdec6e2a16c2be18720f317506ee25210f6d31925f9c", + "sha256:c41fb5e6a5fe9ebcd58ca3abfeb51dffb5d83d6775405305bfa8715b76521922", + "sha256:cd2030f6650c089aeb304cf093f3244d34745ce0cfcc39f20c6fbfe030102e2a", + "sha256:cd65d75953847815962c84a4654a84850b2bb4aed3f26fadcc1c13892e1e29f6", + "sha256:e4985a790f921508f36f81831817cbc03b102d643b5fcb81cd33df3fa291a1a1", + "sha256:e807b3188f9eb0eaa7bbb579b462c5ace579f1cedb28107ce8b48a9f7ad3679e", + "sha256:f12764b8fffc7a123f641d7d049d382b73f96a34117e0b637b80643169cec8ac", + "sha256:f8837fe1d6ac4a8052a9a8ddab256bc006242696f03368a4009be7ee3075cdb7" ], "markers": "python_version >= '3.7'", - "version": "==42.0.4" + "version": "==42.0.5" }, "idna": { "hashes": [ @@ -427,18 +427,18 @@ "s3" ], "hashes": [ - "sha256:71bb832a18b64f10fc4cec117b9b0e2305e5831d9a17eb74f6b9819ed7613843", - "sha256:7e27395e5c63ff9554ae14b5baa41bfe6d6b1be0e59eb02977c6ce28411246de" + "sha256:4054360b882b6e7bab25d52d057e196b978b8d15f1921333f534c4d8f6510bbb", + "sha256:8d19125d40c919cb40df62f4576904c2647c4e9a0e1ebc42491dd7787d09e107" ], "index": "pypi", - "version": "==5.0.2" + "version": "==5.0.4" }, "py-partiql-parser": { "hashes": [ - "sha256:53053e70987dea2983e1990ad85f87a7d8cec13dd4a4b065a740bcfd661f5a6b", - "sha256:aeac8f46529d8651bbae88a1a6c14dc3aa38ebc4bc6bd1eb975044c0564246c6" + "sha256:9c79b59bbe0cb50daa8090020f2e7f3e5a0e33f7846b48924f19a8f7704f4877", + "sha256:bdec65fe17d6093c05e9bc1742a99a041ef810b50a71cc0d9e74a88218d938cf" ], - "version": "==0.5.1" + "version": "==0.5.2" }, "pycparser": { "hashes": [ @@ -449,11 +449,11 @@ }, "python-dateutil": { "hashes": [ - "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", - "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" + "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", + "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", - "version": "==2.8.2" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.9.0.post0" }, "pyyaml": { "hashes": [ @@ -529,18 +529,18 @@ }, "s3transfer": { "hashes": [ - "sha256:3cdb40f5cfa6966e812209d0994f2a4709b561c88e90cf00c2696d2df4e56b2e", - "sha256:d0c8bbf672d5eebbe4e57945e23b972d963f07d82f661cabf678a5c88831595b" + "sha256:5683916b4c724f799e600f41dd9e10a9ff19871bf87623cc8f491cb4f5fa0a19", + "sha256:ceb252b11bcf87080fb7850a224fb6e05c8a776bab8f2b64b7f25b969464839d" ], "markers": "python_version >= '3.8'", - "version": "==0.10.0" + "version": "==0.10.1" }, "six": { "hashes": [ "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, "smart-open": { @@ -548,11 +548,11 @@ "s3" ], "hashes": [ - "sha256:8d3ef7e6997e8e42dd55c74166ed21e6ac70664caa32dd940b26d54a8f6b4142", - "sha256:be3c92c246fbe80ebce8fbacb180494a481a77fcdcb7c1aadb2ea5b9c2bee8b9" + "sha256:4e98489932b3372595cddc075e6033194775165702887216b65eba760dfd8d47", + "sha256:62b65852bdd1d1d516839fcb1f6bc50cd0f16e05b4ec44b52f43d38bcb838524" ], "index": "pypi", - "version": "==6.4.0" + "version": "==7.0.4" }, "structlog": { "hashes": [ @@ -578,6 +578,82 @@ "markers": "python_version >= '3.8'", "version": "==3.0.1" }, + "wrapt": { + "hashes": [ + "sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc", + "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81", + "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09", + "sha256:1acd723ee2a8826f3d53910255643e33673e1d11db84ce5880675954183ec47e", + "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca", + "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0", + "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb", + "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487", + "sha256:3ebf019be5c09d400cf7b024aa52b1f3aeebeff51550d007e92c3c1c4afc2a40", + "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c", + "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060", + "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202", + "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41", + "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9", + "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b", + "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664", + "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d", + "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362", + "sha256:66dfbaa7cfa3eb707bbfcd46dab2bc6207b005cbc9caa2199bcbc81d95071a00", + "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc", + "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1", + "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267", + "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956", + "sha256:6f6eac2360f2d543cc875a0e5efd413b6cbd483cb3ad7ebf888884a6e0d2e966", + "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1", + "sha256:73870c364c11f03ed072dda68ff7aea6d2a3a5c3fe250d917a429c7432e15228", + "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72", + "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d", + "sha256:7bd2d7ff69a2cac767fbf7a2b206add2e9a210e57947dd7ce03e25d03d2de292", + "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0", + "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0", + "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36", + "sha256:9153ed35fc5e4fa3b2fe97bddaa7cbec0ed22412b85bcdaf54aeba92ea37428c", + "sha256:9159485323798c8dc530a224bd3ffcf76659319ccc7bbd52e01e73bd0241a0c5", + "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f", + "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73", + "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b", + "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2", + "sha256:a0ea261ce52b5952bf669684a251a66df239ec6d441ccb59ec7afa882265d593", + "sha256:a33a747400b94b6d6b8a165e4480264a64a78c8a4c734b62136062e9a248dd39", + "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389", + "sha256:a86373cf37cd7764f2201b76496aba58a52e76dedfaa698ef9e9688bfd9e41cf", + "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf", + "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89", + "sha256:b3646eefa23daeba62643a58aac816945cadc0afaf21800a1421eeba5f6cfb9c", + "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c", + "sha256:b935ae30c6e7400022b50f8d359c03ed233d45b725cfdd299462f41ee5ffba6f", + "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440", + "sha256:bc57efac2da352a51cc4658878a68d2b1b67dbe9d33c36cb826ca449d80a8465", + "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136", + "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b", + "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8", + "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3", + "sha256:d462f28826f4657968ae51d2181a074dfe03c200d6131690b7d65d55b0f360f8", + "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6", + "sha256:da4813f751142436b075ed7aa012a8778aa43a99f7b36afe9b742d3ed8bdc95e", + "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f", + "sha256:db98ad84a55eb09b3c32a96c576476777e87c520a34e2519d3e59c44710c002c", + "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e", + "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8", + "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2", + "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020", + "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35", + "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d", + "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3", + "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537", + "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809", + "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d", + "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a", + "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4" + ], + "markers": "python_version >= '3.6'", + "version": "==1.16.0" + }, "xmltodict": { "hashes": [ "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56", @@ -590,31 +666,31 @@ "develop": { "black": { "hashes": [ - "sha256:057c3dc602eaa6fdc451069bd027a1b2635028b575a6c3acfd63193ced20d9c8", - "sha256:08654d0797e65f2423f850fc8e16a0ce50925f9337fb4a4a176a7aa4026e63f8", - "sha256:163baf4ef40e6897a2a9b83890e59141cc8c2a98f2dda5080dc15c00ee1e62cd", - "sha256:1e08fb9a15c914b81dd734ddd7fb10513016e5ce7e6704bdd5e1251ceee51ac9", - "sha256:4dd76e9468d5536abd40ffbc7a247f83b2324f0c050556d9c371c2b9a9a95e31", - "sha256:4f9de21bafcba9683853f6c96c2d515e364aee631b178eaa5145fc1c61a3cc92", - "sha256:61a0391772490ddfb8a693c067df1ef5227257e72b0e4108482b8d41b5aee13f", - "sha256:6981eae48b3b33399c8757036c7f5d48a535b962a7c2310d19361edeef64ce29", - "sha256:7e53a8c630f71db01b28cd9602a1ada68c937cbf2c333e6ed041390d6968faf4", - "sha256:810d445ae6069ce64030c78ff6127cd9cd178a9ac3361435708b907d8a04c693", - "sha256:93601c2deb321b4bad8f95df408e3fb3943d85012dddb6121336b8e24a0d1218", - "sha256:992e451b04667116680cb88f63449267c13e1ad134f30087dec8527242e9862a", - "sha256:9db528bccb9e8e20c08e716b3b09c6bdd64da0dd129b11e160bf082d4642ac23", - "sha256:a0057f800de6acc4407fe75bb147b0c2b5cbb7c3ed110d3e5999cd01184d53b0", - "sha256:ba15742a13de85e9b8f3239c8f807723991fbfae24bad92d34a2b12e81904982", - "sha256:bce4f25c27c3435e4dace4815bcb2008b87e167e3bf4ee47ccdc5ce906eb4894", - "sha256:ca610d29415ee1a30a3f30fab7a8f4144e9d34c89a235d81292a1edb2b55f540", - "sha256:d533d5e3259720fdbc1b37444491b024003e012c5173f7d06825a77508085430", - "sha256:d84f29eb3ee44859052073b7636533ec995bd0f64e2fb43aeceefc70090e752b", - "sha256:e37c99f89929af50ffaf912454b3e3b47fd64109659026b678c091a4cd450fb2", - "sha256:e8a6ae970537e67830776488bca52000eaa37fa63b9988e8c487458d9cd5ace6", - "sha256:faf2ee02e6612577ba0181f4347bcbcf591eb122f7841ae5ba233d12c39dcb4d" + "sha256:2818cf72dfd5d289e48f37ccfa08b460bf469e67fb7c4abb07edc2e9f16fb63f", + "sha256:41622020d7120e01d377f74249e677039d20e6344ff5851de8a10f11f513bf93", + "sha256:4acf672def7eb1725f41f38bf6bf425c8237248bb0804faa3965c036f7672d11", + "sha256:4be5bb28e090456adfc1255e03967fb67ca846a03be7aadf6249096100ee32d0", + "sha256:4f1373a7808a8f135b774039f61d59e4be7eb56b2513d3d2f02a8b9365b8a8a9", + "sha256:56f52cfbd3dabe2798d76dbdd299faa046a901041faf2cf33288bc4e6dae57b5", + "sha256:65b76c275e4c1c5ce6e9870911384bff5ca31ab63d19c76811cb1fb162678213", + "sha256:65c02e4ea2ae09d16314d30912a58ada9a5c4fdfedf9512d23326128ac08ac3d", + "sha256:6905238a754ceb7788a73f02b45637d820b2f5478b20fec82ea865e4f5d4d9f7", + "sha256:79dcf34b33e38ed1b17434693763301d7ccbd1c5860674a8f871bd15139e7837", + "sha256:7bb041dca0d784697af4646d3b62ba4a6b028276ae878e53f6b4f74ddd6db99f", + "sha256:7d5e026f8da0322b5662fa7a8e752b3fa2dac1c1cbc213c3d7ff9bdd0ab12395", + "sha256:9f50ea1132e2189d8dff0115ab75b65590a3e97de1e143795adb4ce317934995", + "sha256:a0c9c4a0771afc6919578cec71ce82a3e31e054904e7197deacbc9382671c41f", + "sha256:aadf7a02d947936ee418777e0247ea114f78aff0d0959461057cae8a04f20597", + "sha256:b5991d523eee14756f3c8d5df5231550ae8993e2286b8014e2fdea7156ed0959", + "sha256:bf21b7b230718a5f08bd32d5e4f1db7fc8788345c8aea1d155fc17852b3410f5", + "sha256:c45f8dff244b3c431b36e3224b6be4a127c6aca780853574c00faf99258041eb", + "sha256:c7ed6668cbbfcd231fa0dc1b137d3e40c04c7f786e626b405c62bcd5db5857e4", + "sha256:d7de8d330763c66663661a1ffd432274a2f92f07feeddd89ffd085b5744f85e7", + "sha256:e19cb1c6365fd6dc38a6eae2dcb691d7d83935c10215aef8e6c38edee3f77abd", + "sha256:e2af80566f43c85f5797365077fb64a393861a3730bd110971ab7a0c94e873e7" ], "index": "pypi", - "version": "==24.2.0" + "version": "==24.3.0" }, "certifi": { "hashes": [ @@ -832,11 +908,11 @@ }, "packaging": { "hashes": [ - "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5", - "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7" + "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5", + "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9" ], "markers": "python_version >= '3.7'", - "version": "==23.2" + "version": "==24.0" }, "pathspec": { "hashes": [ @@ -864,11 +940,11 @@ }, "pytest": { "hashes": [ - "sha256:267f6563751877d772019b13aacbe4e860d73fe8f651f28112e9ac37de7513ae", - "sha256:3e4f16fe1c0a9dc9d9389161c127c3edc5d810c38d6793042fb81d9f48a59fca" + "sha256:2a8386cfc11fa9d2c50ee7b2a57e7d898ef90470a7a34c4b949ff59662bb78b7", + "sha256:ac978141a75948948817d360297b7aae0fcb9d6ff6bc9ec6d514b85d5a65c044" ], "index": "pypi", - "version": "==8.0.1" + "version": "==8.1.1" }, "requests": { "hashes": [ @@ -880,19 +956,11 @@ }, "requests-mock": { "hashes": [ - "sha256:ef10b572b489a5f28e09b708697208c4a3b2b89ef80a9f01584340ea357ec3c4", - "sha256:f7fae383f228633f6bececebdab236c478ace2284d6292c6e7e2867b9ab74d15" + "sha256:4e34f2a2752f0b78397fb414526605d95fcdeab021ac1f26d18960e7eb41f6a8", + "sha256:4f6fdf956de568e0bac99eee4ad96b391c602e614cc0ad33e7f5c72edd699e70" ], "index": "pypi", - "version": "==1.11.0" - }, - "six": { - "hashes": [ - "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", - "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", - "version": "==1.16.0" + "version": "==1.12.0" }, "tomli": { "hashes": [ @@ -904,11 +972,11 @@ }, "typing-extensions": { "hashes": [ - "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783", - "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd" + "sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475", + "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb" ], "markers": "python_version < '3.11'", - "version": "==4.9.0" + "version": "==4.10.0" }, "urllib3": { "hashes": [ diff --git a/README.md b/README.md index a83bf0e..73600aa 100644 --- a/README.md +++ b/README.md @@ -16,13 +16,13 @@ Note: Previously, the repository comprised of self-contained scripts that could ### Reconciling files with metadata CSV ```bash -pipenv run dsaps --url $DSPACE_URL -e $DSPACE_EMAIL -p $DSPACE_PASSWORD reconcile -m -o /output -d -t +pipenv run dsaps --config-file $CONFIG_FILE --url $DSPACE_URL -e $DSPACE_EMAIL -p $DSPACE_PASSWORD reconcile -m -o /output -d ``` ### Creating a new collection within a DSpace community ```bash -pipenv run dsaps --url $DSPACE_URL -e $DSPACE_EMAIL -p $DSPACE_PASSWORD newcollection -c -n +pipenv run dsaps --config-file $CONFIG_FILE --url $DSPACE_URL -e $DSPACE_EMAIL -p $DSPACE_PASSWORD newcollection -c -n ``` ### Adding items to a DSpace collection @@ -30,13 +30,20 @@ pipenv run dsaps --url $DSPACE_URL -e $DSPACE_EMAIL -p $DSPACE_PASSWORD newcolle The command below shows `newcollection` and `additems` being run in conjunction with each other. Note that the invocation must call `newcollection` first. In practice, this is the command that is usually run: ```bash -pipenv run dsaps --url $DSPACE_URL -e $DSPACE_EMAIL -p $DSPACE_PASSWORD newcollection -c -n additems -m -f config/.json -d -t +pipenv run dsaps --config-file $CONFIG_FILE --url $DSPACE_URL -e $DSPACE_EMAIL -p $DSPACE_PASSWORD newcollection -c -n additems -m -d ``` ## Environment ### Required +```shell +# The file path to the source configuration JSON with settings for bitstream retrieval and field mappings. +CONFIG_FILE= +``` + +### Optional + ```shell # The url for the DSpace REST API DSPACE_URL= @@ -58,14 +65,16 @@ All CLI commands can be run with `pipenv run `. Usage: -c [OPTIONS] COMMAND1 [ARGS]... [COMMAND2 [ARGS]...]... Options: + --config-file TEXT File path to source configuration JSON with settings + for bitstream retrieval and field mappings. [required] --url TEXT The url for the DSpace REST API. Defaults to env var - DSPACE_URL if not set. [required] + DSPACE_URL if not set. -e, --email TEXT The email associated with the DSpace user account used for authentication. Defaults to env var DSPACE_EMAIL if - not set. [required] + not set. -p, --password TEXT The password associated with the DSpace user account used for authentication. Defaults to env var - DSPACE_PASSWORD if not set. [required] + DSPACE_PASSWORD if not set. --help Show this message and exit. Commands: @@ -87,10 +96,10 @@ Usage: -c reconcile [OPTIONS] file with a corresponding file in the content directory. * no_files.csv: File identifiers for entries in metadata CSV file - without a corresponding file in the content directory. + without a corresponding file in the content directory. * no_metadata.csv: File identifiers for files in the content directory - without a corresponding entry in the metadata CSV file. + without a corresponding entry in the metadata CSV file. * updated-.csv: Entries from the metadata CSV file with a corresponding file in the content directory. @@ -101,8 +110,6 @@ Options: -o, --output-directory TEXT The filepath where output files are written. -d, --content-directory TEXT The name of the S3 bucket containing files for DSpace uploads. [required] - -t, --file-type TEXT The file type for DSpace uploads (i.e., the - file extension, excluding the dot). --help Show this message and exit. ``` @@ -127,20 +134,18 @@ Usage: -c additems [OPTIONS] Add items to a DSpace collection. - The method relies on a CSV file with metadata for uploads, a JSON document - that maps metadata to a DSpace schema, and a directory containing the files - to be uploaded. + The updated metadata CSV file from running 'reconcile' is used for this + process. The method will first add an item to the specified DSpace + collection. The bitstreams (i.e., files) associated with the item are read + from the metadata CSV file, and uploaded to the newly created item on + DSpace. Options: - -m, --metadata-csv FILE The filepath to a CSV file containing metadata - for Dspace uploads. [required] - -f, --field-map FILE The filepath to a JSON document that maps - columns in the metadata CSV file to a DSpace - schema. [required] + -m, --metadata-csv FILE File path to a CSV file describing the + metadata and bitstreams for DSpace uploads. + [required] -d, --content-directory TEXT The name of the S3 bucket containing files for DSpace uploads. [required] - -t, --file-type TEXT The file type for DSpace uploads (i.e., the - file extension, excluding the dot). -r, --ingest-report Create ingest report for updating other systems. -c, --collection-handle TEXT The handle identifying a DSpace collection diff --git a/config/aspace.json b/config/aspace.json index 0244a2c..cb7d463 100644 --- a/config/aspace.json +++ b/config/aspace.json @@ -3,7 +3,7 @@ "bitstream_folders": [ "objects" ], - "id_regex": ".*-(.*?-.*)\\..*$" + "id_regex": ".*-(\\d*?-\\d*).*$" }, "mapping": { "item_identifier": { diff --git a/dsaps/cli.py b/dsaps/cli.py index 17ef937..f51ded7 100644 --- a/dsaps/cli.py +++ b/dsaps/cli.py @@ -1,15 +1,17 @@ import csv import datetime -import json import logging import os -import time + +from datetime import timedelta +from time import perf_counter import click import structlog -from dsaps import helpers -from dsaps.models import Client, Collection +from dsaps import dspace, helpers +from dsaps.s3 import S3Client + logger = structlog.get_logger() @@ -24,19 +26,22 @@ def validate_path(ctx, param, value): @click.group(chain=True) @click.option( - "--config-file", required=True, help="File path to source configuration JSON." + "--config-file", + envvar="CONFIG_FILE", + required=True, + help="File path to source configuration JSON with settings for bitstream retrieval and field mappings.", ) @click.option( "--url", envvar="DSPACE_URL", - required=True, + required=False, help="The url for the DSpace REST API. Defaults to env var DSPACE_URL if not set.", ) @click.option( "-e", "--email", envvar="DSPACE_EMAIL", - required=True, + required=False, help=( "The email associated with the DSpace user account used for authentication. " "Defaults to env var DSPACE_EMAIL if not set." @@ -46,7 +51,7 @@ def validate_path(ctx, param, value): "-p", "--password", envvar="DSPACE_PASSWORD", - required=True, + required=False, hide_input=True, help=( "The password associated with the DSpace user account used for authentication. " @@ -76,14 +81,16 @@ def main(ctx, config_file, url, email, password): handlers=[logging.FileHandler(f"logs/log-{log_suffix}", "w")], level=logging.INFO, ) - logger.info("Application start") - client = Client(url) - client.authenticate(email, password) - start_time = time.time() - ctx.obj["config"] = helpers.load_source_config(config_file) - ctx.obj["client"] = client - ctx.obj["start_time"] = start_time - ctx.obj["log_suffix"] = log_suffix + logger.info("Running process") + source_config = helpers.load_source_config(config_file) + if url: + dspace_client = dspace.DSpaceClient(url) + dspace_client.authenticate(email, password) + ctx.obj["dspace_client"] = dspace_client + ctx.obj["source_config"] = source_config + logger.info("Initializing S3 client") + ctx.obj["s3_client"] = S3Client.get_client() + ctx.obj["start_time"] = perf_counter() @main.command() @@ -92,14 +99,7 @@ def main(ctx, config_file, url, email, password): "--metadata-csv", required=True, type=click.Path(exists=True, file_okay=True, dir_okay=False), - help="The filepath to a CSV file containing metadata for Dspace uploads.", -) -@click.option( - "-f", - "--field-map", - required=True, - type=click.Path(exists=True, file_okay=True, dir_okay=False), - help="The filepath to a JSON document that maps columns in the metadata CSV file to a DSpace schema.", + help="File path to a CSV file describing the metadata and bitstreams for DSpace uploads.", ) @click.option( "-d", @@ -107,12 +107,6 @@ def main(ctx, config_file, url, email, password): required=True, help="The name of the S3 bucket containing files for DSpace uploads.", ) -@click.option( - "-t", - "--file-type", - help="The file type for DSpace uploads (i.e., the file extension, excluding the dot).", - default="*", -) @click.option( "-r", "--ingest-report", @@ -129,40 +123,55 @@ def main(ctx, config_file, url, email, password): def additems( ctx, metadata_csv, - field_map, content_directory, - file_type, ingest_report, collection_handle, ): """Add items to a DSpace collection. - The method relies on a CSV file with metadata for uploads, a JSON document that maps - metadata to a DSpace schema, and a directory containing the files to be uploaded. + The updated metadata CSV file from running 'reconcile' is used for this process. + The method will first add an item to the specified DSpace collection. The bitstreams + (i.e., files) associated with the item are read from the metadata CSV file, and + uploaded to the newly created item on DSpace. """ - client = ctx.obj["client"] - start_time = ctx.obj["start_time"] + mapping = ctx.obj["source_config"]["mapping"] + dspace_client = ctx.obj["dspace_client"] + if "collection_uuid" not in ctx.obj and collection_handle is None: raise click.UsageError( - "collection_handle option must be used or " - "additems must be run after newcollection " - "command." + "Option '--collection-handle' must be used or " + "run 'additems' after 'newcollection' command." ) elif "collection_uuid" in ctx.obj: collection_uuid = ctx.obj["collection_uuid"] else: - collection_uuid = client.get_uuid_from_handle(collection_handle) - with open(metadata_csv, "r") as csvfile, open(field_map, "r") as jsonfile: + collection_uuid = dspace_client.get_uuid_from_handle(collection_handle) + + if metadata_csv is None: + raise click.UsageError("Option '--metadata-csv' must be used.") + + dspace_collection = dspace.Collection(uuid=collection_uuid) + + with open(metadata_csv, "r") as csvfile: metadata = csv.DictReader(csvfile) - mapping = json.load(jsonfile) - collection = Collection.create_metadata_for_items_from_csv(metadata, mapping) - for item in collection.items: - item.bitstreams_in_directory(content_directory, client.s3_client, file_type) - collection.uuid = collection_uuid - for item in collection.post_items(client): - logger.info(item.file_identifier) - elapsed_time = datetime.timedelta(seconds=time.time() - start_time) - logger.info(f"Total runtime : {elapsed_time}") + dspace_collection = dspace_collection.add_items(metadata, mapping) + + for item in dspace_collection.items: + logger.info(f"Posting item: {item}") + item_uuid, item_handle = dspace_client.post_item_to_collection( + collection_uuid, item + ) + item.uuid = item_uuid + item.handle = item_handle + logger.info(f"Item posted: {item_uuid}") + for bitstream in item.bitstreams: + logger.info(f"Posting bitstream: {bitstream}") + dspace_client.post_bitstream(item.uuid, bitstream) + + logger.info( + "Total elapsed: %s", + str(timedelta(seconds=perf_counter() - ctx.obj["start_time"])), + ) @main.command() @@ -181,8 +190,10 @@ def additems( @click.pass_context def newcollection(ctx, community_handle, collection_name): """Create a new DSpace collection within a community.""" - client = ctx.obj["client"] - collection_uuid = client.post_coll_to_comm(community_handle, collection_name) + dspace_client = ctx.obj["dspace_client"] + collection_uuid = dspace_client.post_collection_to_community( + community_handle, collection_name + ) ctx.obj["collection_uuid"] = collection_uuid @@ -225,22 +236,21 @@ def reconcile(ctx, metadata_csv, output_directory, content_directory): * updated-.csv: Entries from the metadata CSV file with a corresponding file in the content directory. """ - source_settings = ctx.obj["config"]["settings"] - client = ctx.obj["client"] - files_dict = helpers.get_files_from_s3( + source_settings = ctx.obj["source_config"]["settings"] + bitstreams = helpers.get_files_from_s3( s3_path=content_directory, - s3_client=client.s3_client, + s3_client=ctx.obj["s3_client"], bitstream_folders=source_settings.get("bitstream_folders"), id_regex=source_settings["id_regex"], ) metadata_ids = helpers.create_metadata_id_list(metadata_csv) - metadata_matches = helpers.match_metadata_to_files(files_dict.keys(), metadata_ids) - file_matches = helpers.match_files_to_metadata(files_dict.keys(), metadata_ids) + metadata_matches = helpers.match_metadata_to_files(bitstreams.keys(), metadata_ids) + file_matches = helpers.match_files_to_metadata(bitstreams.keys(), metadata_ids) no_files = set(metadata_ids) - set(metadata_matches) - no_metadata = set(files_dict.keys()) - set(file_matches) + no_metadata = set(bitstreams.keys()) - set(file_matches) helpers.create_csv_from_list(no_metadata, f"{output_directory}no_metadata") helpers.create_csv_from_list(no_files, f"{output_directory}no_files") helpers.create_csv_from_list(metadata_matches, f"{output_directory}metadata_matches") helpers.update_metadata_csv( - metadata_csv, output_directory, metadata_matches, files_dict + metadata_csv, output_directory, metadata_matches, bitstreams ) diff --git a/dsaps/models.py b/dsaps/dspace.py similarity index 57% rename from dsaps/models.py rename to dsaps/dspace.py index 7d3962c..fc004f8 100644 --- a/dsaps/models.py +++ b/dsaps/dspace.py @@ -1,27 +1,27 @@ -import operator -from functools import partial +from __future__ import annotations +import ast import attr -import boto3 +import operator import requests -import smart_open import structlog -Field = partial(attr.ib, default=None) -Group = partial(attr.ib, default=[]) +import smart_open + +from attrs import field, define + logger = structlog.get_logger() op = operator.attrgetter("name") -class Client: +class DSpaceClient: def __init__(self, url): header = {"content-type": "application/json", "accept": "application/json"} self.url = url.rstrip("/") self.cookies = None self.header = header - self.s3_client = boto3.client("s3") - logger.info("Initializing client") + logger.info("Initializing DSpace client") def authenticate(self, email, password): """Authenticate user to DSpace API.""" @@ -73,10 +73,10 @@ def filtered_item_search(self, key, string, query_type, selected_collections="") def get_uuid_from_handle(self, handle): """Get UUID for an object based on its handle.""" hdl_endpoint = f"{self.url}/handle/{handle}" - rec_obj = requests.get( + record = requests.get( hdl_endpoint, headers=self.header, cookies=self.cookies, timeout=30 ).json() - return rec_obj["uuid"] + return record["uuid"] def get_record(self, uuid, record_type): """Get an individual record of a specified type.""" @@ -85,21 +85,22 @@ def get_record(self, uuid, record_type): url, headers=self.header, cookies=self.cookies, timeout=30 ).json() if record_type == "items": - rec_obj = self._populate_class_instance(Item, record) + dspace_object = self._populate_class_instance(Item, record) elif record_type == "communities": - rec_obj = self._populate_class_instance(Community, record) + dspace_object = self._populate_class_instance(Community, record) elif record_type == "collections": - rec_obj = self._populate_class_instance(Collection, record) + dspace_object = self._populate_class_instance(Collection, record) else: logger.info("Invalid record type.") exit() - return rec_obj + return dspace_object def post_bitstream(self, item_uuid, bitstream): """Post a bitstream to a specified item and return the bitstream ID.""" - endpoint = f"{self.url}/items/{item_uuid}" f"/bitstreams?name={bitstream.name}" + endpoint = f"{self.url}/items/{item_uuid}/bitstreams?name={bitstream.name}" header_upload = {"accept": "application/json"} + logger.info(endpoint) with smart_open.open(bitstream.file_path, "rb") as data: post_response = requests.post( endpoint, @@ -114,7 +115,7 @@ def post_bitstream(self, item_uuid, bitstream): bitstream_uuid = response["uuid"] return bitstream_uuid - def post_coll_to_comm(self, comm_handle, coll_name): + def post_collection_to_community(self, comm_handle, coll_name): """Post a collection to a specified community.""" hdl_endpoint = f"{self.url}/handle/{comm_handle}" community = requests.get( @@ -151,118 +152,130 @@ def post_item_to_collection(self, collection_uuid, item): item_handle = post_response["handle"] return item_uuid, item_handle - def _populate_class_instance(self, class_type, rec_obj): + def _populate_class_instance(self, class_type, record): """Populate class instance with data from record.""" fields = [op(field) for field in attr.fields(class_type)] - kwargs = {k: v for k, v in rec_obj.items() if k in fields} - kwargs["objtype"] = rec_obj["type"] + kwargs = {k: v for k, v in record.items() if k in fields} + kwargs["type"] = record["type"] if class_type == Community: - collections = self._build_uuid_list(rec_obj, kwargs, "collections") - rec_obj["collections"] = collections + collections = self._build_uuid_list(record, kwargs, "collections") + kwargs["collections"] = collections elif class_type == Collection: - items = self._build_uuid_list(rec_obj, "items") - rec_obj["items"] = items - rec_obj = class_type(**kwargs) - return rec_obj + items = self._build_uuid_list(record, "items") + kwargs["items"] = items + return class_type(**kwargs) - def _build_uuid_list(self, rec_obj, children): + def _build_uuid_list(self, record, children): """Build list of the uuids of the object's children.""" child_list = [] - for child in rec_obj[children]: + for child in record[children]: child_list.append(child["uuid"]) return child_list -@attr.s -class BaseRecord: - uuid = Field() - name = Field() - handle = Field() - link = Field() - objtype = Field() - - -@attr.s -class Collection(BaseRecord): - items = Group() - - def post_items(self, client): - """Post items to collection.""" - for item in self.items: - logger.info(f"Posting item: {item}") - item_uuid, item_handle = client.post_item_to_collection(self.uuid, item) - item.uuid = item_uuid - item.handle = item_handle - logger.info(f"Item posted: {item_uuid}") - for bitstream in item.bitstreams: - bitstream_uuid = client.post_bitstream(item_uuid, bitstream) - bitstream.uuid = bitstream_uuid - logger.info(f"Bitstream posted: {bitstream_uuid}") - yield item +@define +class Bitstream: + name = field(default=None) + file_path = field(default=None) - @classmethod - def create_metadata_for_items_from_csv(cls, csv_reader, field_map): - """Create metadata for the collection's items based on a CSV and a JSON mapping - field map.""" - items = [Item.metadata_from_csv_row(row, field_map) for row in csv_reader] - return cls(items=items) +@define +class MetadataEntry: + key = field(default=None) + value = field(default=None) + language = field(default=None) -@attr.s -class Community(BaseRecord): - collections = Field() +@define +class Object: + uuid = field(default=None) + name = field(default=None) + handle = field(default=None) + link = field(default=None) + type = field(default=None) -@attr.s -class Item(BaseRecord): - metadata = Group() - bitstreams = Group() - file_identifier = Field() - source_system_identifier = Field() - def bitstreams_in_directory(self, directory, s3_client, file_type=None): - """Create a list of bitstreams from the specified directory and sort the list.""" - pass +@define +class Item(Object): + metadata = field(factory=list) + bitstreams = field(factory=list) + item_identifier = field(default=None) + source_system_identifier = field(default=None) @classmethod - def metadata_from_csv_row(cls, row, field_map): - """Create metadata for an item based on a CSV row and a JSON mapping field map.""" - metadata = [] - for f in field_map: - field = row[field_map[f]["csv_field_name"]] - if field != "": - if f == "file_identifier": - file_identifier = field - continue # file_identifier is not included in DSpace metadata - if f == "source_system_identifier": - # source_system_identifier = field - continue # source_system_identifier is not included in DSpace - delimiter = field_map[f]["delimiter"] - language = field_map[f]["language"] - if delimiter: - metadata.extend( - [ - MetadataEntry(key=f, value=v, language=language) - for v in field.split(delimiter) - ] - ) - else: - metadata.append(MetadataEntry(key=f, value=field, language=language)) + def create(cls, record, mapping) -> Item: return cls( - metadata=metadata, - file_identifier=file_identifier, - # source_system_identifier=source_system_identifier, + metadata=cls.get_metadata(record, mapping), + bitstreams=cls.get_bitstreams(record), + **cls.get_ids(record, mapping), ) + @classmethod + def get_bitstreams(cls, record) -> list: + if _bitstreams := record.get("bitstreams"): + bitstreams = [] + for file_path in ast.literal_eval(_bitstreams): + file_name = file_path.split("/")[-1] + bitstreams.append(Bitstream(name=file_name, file_path=file_path)) + return bitstreams + + @classmethod + def get_ids(cls, record, mapping) -> dict: + ids = {} + if item_id_mapping := mapping.get("item_identifier"): + ids["item_identifier"] = record.get(item_id_mapping["csv_field_name"]) + if source_system_id_mapping := mapping.get("source_system_identifier"): + ids["source_system_identifier"] = record.get( + source_system_id_mapping["csv_field_name"] + ) + return ids -@attr.s -class Bitstream: - name = Field() - file_path = Field() + @classmethod + def get_metadata(cls, record, mapping) -> list: + """Create metadata for an item based on a CSV row and a JSON mapping field map.""" + metadata = [] + for field_name, field_mapping in mapping.items(): + if field_name not in ["item_identifier", "source_system_identifier"]: + field_value = record[field_mapping["csv_field_name"]] -@attr.s -class MetadataEntry: - key = Field() - value = Field() - language = Field() + if field_value: + delimiter = field_mapping["delimiter"] + language = field_mapping["language"] + if delimiter: + metadata.extend( + [ + MetadataEntry( + key=field_name, + value=value, + language=language, + ) + for value in field_value.split(delimiter) + ] + ) + else: + metadata.append( + MetadataEntry( + key=field_name, + value=field_value, + language=language, + ) + ) + return metadata + + +@define +class Collection(Object): + items = field(factory=list) + + @classmethod + def add_items(cls, csv_reader, field_map) -> Collection: + """Create metadata for the collection's items based on a CSV and a JSON mapping + field map.""" + items = [Item.create(row, field_map) for row in csv_reader] + return cls(items=items) + + +@define +class Community(Object): + collections = field(default=None) diff --git a/dsaps/helpers.py b/dsaps/helpers.py index cb5f21b..c98e0db 100644 --- a/dsaps/helpers.py +++ b/dsaps/helpers.py @@ -1,12 +1,12 @@ +import ast import csv import os import re import yaml -from typing import Literal - import smart_open + S3_BUCKET_REGEX = re.compile(r"^([^\/]*)") S3_PREFIX_REGEX = re.compile(r"(?<=\/)(.*)") @@ -49,14 +49,15 @@ def get_files_from_s3( """ files = {} s3_path = s3_path.removeprefix("s3://") - operation_parameters = {"Bucket": parse_value_from_text(s3_path, S3_BUCKET_REGEX)} + bucket = parse_value_from_text(s3_path, S3_BUCKET_REGEX) + operation_parameters = {"Bucket": bucket} if prefix := parse_value_from_text(s3_path, S3_PREFIX_REGEX): operation_parameters.update({"Prefix": prefix}) paginator = s3_client.get_paginator("list_objects_v2") for page in paginator.paginate(**operation_parameters): for file in page["Contents"]: - file_path = file["Key"] + file_path = f"s3://{bucket}/{file['Key']}" file_name = file_path.split("/")[-1] if bitstream_folders: # if the object is not stored in any of the folders specified @@ -64,8 +65,8 @@ def get_files_from_s3( if not [folder for folder in bitstream_folders if folder in file_path]: continue item_identifier = parse_value_from_text(file_name, id_regex) - files.setdefault(item_identifier, []).append(file["Key"]) - return files + files.setdefault(item_identifier, []).append(file_path) + return dict(sorted(files.items())) def parse_value_from_text( @@ -96,7 +97,7 @@ def create_metadata_id_list(metadata_csv): with open(metadata_csv) as csvfile: reader = csv.DictReader(csvfile) metadata_ids = [ - row["file_identifier"] for row in reader if row["file_identifier"] != "" + row["item_identifier"] for row in reader if row["item_identifier"] != "" ] return metadata_ids @@ -133,6 +134,6 @@ def update_metadata_csv(metadata_csv, output_directory, metadata_matches, files_ writer = csv.DictWriter(updated_csv, fieldnames=fieldnames) writer.writeheader() for row in reader: - if row["file_identifier"] in metadata_matches: - row["bitstreams"] = files_dict[row["file_identifier"]] + if row["item_identifier"] in metadata_matches: + row["bitstreams"] = files_dict[row["item_identifier"]] writer.writerow(row) diff --git a/dsaps/s3.py b/dsaps/s3.py new file mode 100644 index 0000000..5b1bc63 --- /dev/null +++ b/dsaps/s3.py @@ -0,0 +1,7 @@ +import boto3 + + +class S3Client: + @classmethod + def get_client(cls): + return boto3.client("s3") diff --git a/tests/conftest.py b/tests/conftest.py index 06331b2..5209596 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,13 +1,16 @@ import csv import json +import yaml import boto3 import pytest import requests_mock +import smart_open + from click.testing import CliRunner from moto import mock_aws -from dsaps import models +from dsaps import dspace # Env fixtures @@ -19,6 +22,40 @@ def _test_environment(monkeypatch): monkeypatch.setenv("AWS_SESSION_TOKEN", "testing") +@pytest.fixture +def source_config(): + with smart_open.open("tests/fixtures/source_config.json", "r") as file: + return yaml.safe_load(file) + + +@pytest.fixture +def source_metadata_csv(): + with open("tests/fixtures/source_metadata.csv") as file: + reader = csv.DictReader(file) + yield reader + + +@pytest.fixture +def source_metadata_csv_with_bitstreams(): + with open("tests/fixtures/updated-source_metadata.csv") as file: + reader = csv.DictReader(file) + yield reader + + +@pytest.fixture() +def dspace_client(): + dspace_client = dspace.DSpaceClient("mock://example.com/") + dspace_client.header = {} + dspace_client.cookies = {} + dspace_client.user_full_name = "" + return dspace_client + + +@pytest.fixture() +def s3_client(): + return boto3.client("s3", region_name="us-east-1") + + @pytest.fixture def mocked_s3_bucket(): bucket_name = "mocked-bucket" @@ -57,60 +94,19 @@ def mocked_s3_bucket(): yield -@pytest.fixture() -def mocked_s3(): - with mock_aws(): - s3_instance = boto3.client("s3", region_name="us-east-1") - s3_instance.create_bucket(Bucket="test-bucket") - s3_instance.put_object( - Body="", - Bucket="test-bucket", - Key="test_01.pdf", - ) - s3_instance.put_object( - Body="", - Bucket="test-bucket", - Key="test_02.pdf", - ) - s3_instance.put_object( - Body="", - Bucket="test-bucket", - Key="best_01.pdf", - ) - s3_instance.put_object( - Body="", - Bucket="test-bucket", - Key="test_01.jpg", - ) - yield s3_instance - - -@pytest.fixture() -def s3_client(): - return boto3.client("s3", region_name="us-east-1") - - -@pytest.fixture() -def client(): - client = models.Client("mock://example.com/") - client.header = {} - client.cookies = {} - client.user_full_name = "" - return client - - -@pytest.fixture() -def aspace_delimited_csv(): - with open("tests/fixtures/aspace_metadata_delimited.csv") as f: - reader = csv.DictReader(f) - yield reader - - -@pytest.fixture() -def aspace_mapping(): - with open("config/aspace_mapping.json") as f: - mapping = json.load(f) - yield mapping +@pytest.fixture +def mocked_s3_bucket_bitstreams(): + return { + "001": ["s3://mocked-bucket/one-to-one/aaaa_001_01.pdf"], + "002": ["s3://mocked-bucket/one-to-one/aaaa_002_01.pdf"], + "003": [ + "s3://mocked-bucket/many-to-one/bbbb_003_01.jpg", + "s3://mocked-bucket/many-to-one/bbbb_003_01.pdf", + "s3://mocked-bucket/many-to-one/bbbb_003_02.pdf", + ], + "004": ["s3://mocked-bucket/many-to-one/bbbb_004_01.pdf"], + "005": ["s3://mocked-bucket/nested/prefix/objects/include_005_01.pdf"], + } @pytest.fixture() @@ -127,34 +123,93 @@ def runner(): @pytest.fixture(autouse=True) def web_mock(): - with requests_mock.Mocker() as m: + with requests_mock.Mocker() as mocked_request: + # DSpace authentication cookies = {"JSESSIONID": "11111111"} - m.post("mock://example.com/login", cookies=cookies) + mocked_request.post("mock://example.com/login", cookies=cookies) user_json = {"fullname": "User Name"} - m.get("mock://example.com/status", json=user_json) - rec_json = {"metadata": {"title": "Sample title"}, "type": "item"} - m.get("mock://example.com/items/123?expand=all", json=rec_json) - results_json1 = {"items": [{"link": "1234"}]} - results_json2 = {"items": []} - m.get( - "mock://example.com/filtered-items?", - [{"json": results_json1}, {"json": results_json2}], + mocked_request.get("mock://example.com/status", json=user_json) + + # get - retrieve item + item_get_url = "mock://example.com/items/123?expand=all" + item_get_response = {"metadata": {"title": "Sample title"}, "type": "item"} + mocked_request.get(item_get_url, json=item_get_response) + + # get - retrieve uuid from handle + uuid_get_url = "mock://example.com/handle/111.1111" + uuid_get_response = {"uuid": "a1b2"} + mocked_request.get(uuid_get_url, json=uuid_get_response) + + # get - retrieve uuid from handle (for test_cli.test_additems ) + uuid_get_url_2 = "mock://example.com/handle/333.3333" + uuid_get_response_2 = {"uuid": "k1l2"} + mocked_request.get(uuid_get_url_2, json=uuid_get_response_2) + + # get - retrieve filtered set of items + filtered_items_get_url = "mock://example.com/filtered-items?" + filtered_items_get_response = [ + {"json": {"items": [{"link": "1234"}]}}, + {"json": {"items": []}}, + ] + mocked_request.get(filtered_items_get_url, filtered_items_get_response) + + # post - add collection to community + collection_post_url = "mock://example.com/communities/a1b2/collections" + collection_post_response = {"uuid": "c3d4"} + mocked_request.post(collection_post_url, json=collection_post_response) + + # post - add item to collection + item_post_url = "mock://example.com/collections/c3d4/items" + item_post_response = {"uuid": "e5f6", "handle": "222.2222"} + mocked_request.post(item_post_url, json=item_post_response) + + # post - add item to collection (for test_cli.test_additems) + item_post_url_2 = "mock://example.com/collections/k1l2/items" + item_post_response_2 = {"uuid": "e5f6", "handle": "222.2222"} + mocked_request.post(item_post_url_2, json=item_post_response_2) + + # post - add bitstream to item + bitstream_post_url = ( + "mock://example.com/items/e5f6/bitstreams?name=aaaa_001_01.pdf" + ) + bitstream_post_response = {"uuid": "g7h8"} + mocked_request.post(bitstream_post_url, json=bitstream_post_response) + + bitstream_post_url_2 = ( + "mock://example.com/items/e5f6/bitstreams?name=aaaa_002_01.pdf" + ) + bitstream_post_response_2 = {"uuid": "i9j0"} + mocked_request.post(bitstream_post_url_2, json=bitstream_post_response_2) + + bitstream_post_url_3 = ( + "mock://example.com/items/e5f6/bitstreams?name=bbbb_003_01.jpg" + ) + bitstream_post_response_3 = {"uuid": "item_003_01_a"} + mocked_request.post(bitstream_post_url_3, json=bitstream_post_response_3) + + bitstream_post_url_4 = ( + "mock://example.com/items/e5f6/bitstreams?name=bbbb_003_01.pdf" ) - rec_json = {"uuid": "a1b2"} - m.get("mock://example.com/handle/111.1111", json=rec_json) - coll_json = {"uuid": "c3d4"} - m.post("mock://example.com/communities/a1b2/collections", json=coll_json) - item_json = {"uuid": "e5f6", "handle": "222.2222"} - m.post("mock://example.com/collections/c3d4/items", json=item_json) - b_json_1 = {"uuid": "g7h8"} - url_1 = "mock://example.com/items/e5f6/bitstreams?name=test_01.pdf" - m.post(url_1, json=b_json_1) - b_json_2 = {"uuid": "i9j0"} - url_2 = "mock://example.com/items/e5f6/bitstreams?name=test_02.pdf" - m.post(url_2, json=b_json_2) - m.get("mock://remoteserver.com/files/test_01.pdf", content=b"Sample") - coll_json = {"uuid": "k1l2"} - m.get("mock://example.com/handle/333.3333", json=coll_json) - item_json_2 = {"uuid": "e5f6", "handle": "222.2222"} - m.post("mock://example.com/collections/k1l2/items", json=item_json_2) - yield m + bitstream_post_response_4 = {"uuid": "item_003_01_b"} + mocked_request.post(bitstream_post_url_4, json=bitstream_post_response_4) + + bitstream_post_url_5 = ( + "mock://example.com/items/e5f6/bitstreams?name=bbbb_003_02.pdf" + ) + bitstream_post_response_5 = {"uuid": "item_003_02_a"} + mocked_request.post(bitstream_post_url_5, json=bitstream_post_response_5) + + bitstream_post_url_6 = ( + "mock://example.com/items/e5f6/bitstreams?name=bbbb_004_01.pdf" + ) + bitstream_post_response_6 = {"uuid": "item_004_01_a"} + mocked_request.post(bitstream_post_url_6, json=bitstream_post_response_6) + + bitstream_post_url_7 = ( + "mock://example.com/items/e5f6/bitstreams?name=include_005_01.pdf" + ) + bitstream_post_response_7 = {"uuid": "item_005_01_a"} + mocked_request.post(bitstream_post_url_7, json=bitstream_post_response_7) + # mocked_request.get("mock://remoteserver.com/files/test_01.pdf", content=b"Sample") + + yield mocked_request diff --git a/tests/fixtures/source_config.json b/tests/fixtures/source_config.json index 8b1bc59..977de0a 100644 --- a/tests/fixtures/source_config.json +++ b/tests/fixtures/source_config.json @@ -1,13 +1,11 @@ { "settings": { - "bitstream_folders": [ - "objects" - ], - "id_regex": ".*-(.*?-.*)\\..*$" + "bitstream_folders": [], + "id_regex": "_(.*)_" }, "mapping": { - "file_identifier": { - "csv_field_name": "file_identifier", + "item_identifier": { + "csv_field_name": "item_identifier", "language": null, "delimiter": "" }, @@ -16,30 +14,10 @@ "language": "en_US", "delimiter": "" }, - "source_system_identifier": { - "csv_field_name": "uri", - "language": null, - "delimiter": "" - }, "dc.contributor.author": { "csv_field_name": "author", "language": null, "delimiter": "|" - }, - "dc.description": { - "csv_field_name": "description", - "language": "en_US", - "delimiter": "" - }, - "dc.rights": { - "csv_field_name": "rights_statement", - "language": "en_US", - "delimiter": "" - }, - "dc.rights.uri": { - "csv_field_name": "rights_uri", - "language": null, - "delimiter": "" } } } \ No newline at end of file diff --git a/tests/fixtures/source_metadata.csv b/tests/fixtures/source_metadata.csv new file mode 100644 index 0000000..c6cec15 --- /dev/null +++ b/tests/fixtures/source_metadata.csv @@ -0,0 +1,6 @@ +item_identifier,title,author +001,Title 1,May Smith +002,Title 2,May Smith +003,Title 3,June Smith +004,Title 4,June Smith +005,Title 5,July Smith \ No newline at end of file diff --git a/tests/fixtures/updated-source_metadata.csv b/tests/fixtures/updated-source_metadata.csv new file mode 100644 index 0000000..73d5a36 --- /dev/null +++ b/tests/fixtures/updated-source_metadata.csv @@ -0,0 +1,6 @@ +item_identifier,title,author,bitstreams +001,Title 1,May Smith,['s3://mocked-bucket/one-to-one/aaaa_001_01.pdf'] +002,Title 2,May Smith,['s3://mocked-bucket/one-to-one/aaaa_002_01.pdf'] +003,Title 3,June Smith,"['s3://mocked-bucket/many-to-one/bbbb_003_01.jpg', 's3://mocked-bucket/many-to-one/bbbb_003_01.pdf', 's3://mocked-bucket/many-to-one/bbbb_003_02.pdf']" +004,Title 4,June Smith,['s3://mocked-bucket/many-to-one/bbbb_004_01.pdf'] +005,Title 5,July Smith,['s3://mocked-bucket/nested/prefix/objects/include_005_01.pdf'] diff --git a/tests/test_cli.py b/tests/test_cli.py index c0ea336..feba613 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -4,7 +4,7 @@ @mock_aws -def test_additems(runner, mocked_s3): +def test_additems(runner, mocked_s3_bucket, caplog): """Test adding items to a collection.""" result = runner.invoke( main, @@ -19,18 +19,15 @@ def test_additems(runner, mocked_s3): "1234", "additems", "--metadata-csv", - "tests/fixtures/aspace_metadata_delimited.csv", - "--field-map", - "config/aspace_mapping.json", + "tests/fixtures/updated-source_metadata.csv", "--content-directory", - "s3://test-bucket", - "--file-type", - "pdf", + "s3://mocked-bucket", "--collection-handle", "333.3333", ], ) assert result.exit_code == 0 + result = runner.invoke( main, [ @@ -49,13 +46,9 @@ def test_additems(runner, mocked_s3): "Test Collection", "additems", "--metadata-csv", - "tests/fixtures/aspace_metadata_delimited.csv", - "--field-map", - "config/aspace_mapping.json", + "tests/fixtures/updated-source_metadata.csv", "--content-directory", - "s3://test-bucket", - "--file-type", - "pdf", + "s3://mocked-bucket", ], ) assert result.exit_code == 0 @@ -85,7 +78,7 @@ def test_newcollection(runner): @mock_aws -def test_reconcile(runner, mocked_s3, output_dir): +def test_reconcile(runner, mocked_s3_bucket, output_dir): """Test reconcile command.""" result = runner.invoke( main, @@ -100,11 +93,11 @@ def test_reconcile(runner, mocked_s3, output_dir): "1234", "reconcile", "--metadata-csv", - "tests/fixtures/aspace_metadata_delimited.csv", + "tests/fixtures/source_metadata.csv", "--output-directory", output_dir, "--content-directory", - "s3://test-bucket", + "s3://mocked-bucket", ], ) assert result.exit_code == 0 diff --git a/tests/test_helpers.py b/tests/test_helpers.py index fb38680..6d8ceb2 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -11,7 +11,7 @@ parse_value_from_text, update_metadata_csv, ) -from dsaps.models import Item +from dsaps import dspace REGEX_ID_BETWEEN_UNDERSCORES = "_(.*)_" REGEX_ID_BEFORE_UNDERSCORES = "(.*?)_" @@ -20,8 +20,8 @@ def test_load_source_config(): assert load_source_config("tests/fixtures/source_config.json")["settings"] == { - "bitstream_folders": ["objects"], - "id_regex": ".*-(.*?-.*)\\..*$", + "bitstream_folders": [], + "id_regex": "_(.*)_", } @@ -61,10 +61,10 @@ def test_get_files_from_s3_one_file_per_file_id(mocked_s3_bucket, s3_client): ) assert files == { "001": [ - "one-to-one/aaaa_001_01.pdf", + "s3://mocked-bucket/one-to-one/aaaa_001_01.pdf", ], "002": [ - "one-to-one/aaaa_002_01.pdf", + "s3://mocked-bucket/one-to-one/aaaa_002_01.pdf", ], } @@ -77,11 +77,11 @@ def test_get_files_from_s3_many_files_per_file_id(mocked_s3_bucket, s3_client): ) assert files == { "003": [ - "many-to-one/bbbb_003_01.jpg", - "many-to-one/bbbb_003_01.pdf", - "many-to-one/bbbb_003_02.pdf", + "s3://mocked-bucket/many-to-one/bbbb_003_01.jpg", + "s3://mocked-bucket/many-to-one/bbbb_003_01.pdf", + "s3://mocked-bucket/many-to-one/bbbb_003_02.pdf", ], - "004": ["many-to-one/bbbb_004_01.pdf"], + "004": ["s3://mocked-bucket/many-to-one/bbbb_004_01.pdf"], } @@ -92,7 +92,9 @@ def test_get_files_from_s3_with_bitstream_folders(mocked_s3_bucket, s3_client): bitstream_folders=["objects"], id_regex=REGEX_ID_BETWEEN_UNDERSCORES, ) - assert files == {"005": ["nested/prefix/objects/include_005_01.pdf"]} + assert files == { + "005": ["s3://mocked-bucket/nested/prefix/objects/include_005_01.pdf"] + } def test_get_files_from_s3_without_bitstream_folders(mocked_s3_bucket, s3_client): @@ -103,25 +105,25 @@ def test_get_files_from_s3_without_bitstream_folders(mocked_s3_bucket, s3_client ) assert files == { "001": [ - "one-to-one/aaaa_001_01.pdf", + "s3://mocked-bucket/one-to-one/aaaa_001_01.pdf", ], "002": [ - "one-to-one/aaaa_002_01.pdf", + "s3://mocked-bucket/one-to-one/aaaa_002_01.pdf", ], "003": [ - "many-to-one/bbbb_003_01.jpg", - "many-to-one/bbbb_003_01.pdf", - "many-to-one/bbbb_003_02.pdf", + "s3://mocked-bucket/many-to-one/bbbb_003_01.jpg", + "s3://mocked-bucket/many-to-one/bbbb_003_01.pdf", + "s3://mocked-bucket/many-to-one/bbbb_003_02.pdf", ], - "004": ["many-to-one/bbbb_004_01.pdf"], - "005": ["nested/prefix/objects/include_005_01.pdf"], + "004": ["s3://mocked-bucket/many-to-one/bbbb_004_01.pdf"], + "005": ["s3://mocked-bucket/nested/prefix/objects/include_005_01.pdf"], } def test_create_ingest_report(runner, output_dir): """Test create_ingest_report function.""" file_name = "ingest_report.csv" - items = [Item(source_system_identifier="/repo/0/ao/123", handle="111.1111")] + items = [dspace.Item(source_system_identifier="/repo/0/ao/123", handle="111.1111")] create_ingest_report(items, f"{output_dir}{file_name}") with open(f"{output_dir}{file_name}") as csvfile: reader = csv.DictReader(csvfile) @@ -132,10 +134,9 @@ def test_create_ingest_report(runner, output_dir): def test_create_metadata_id_list(): """Test create_metadata_id_list function.""" - metadata_path = "tests/fixtures/aspace_metadata_delimited.csv" + metadata_path = "tests/fixtures/source_metadata.csv" metadata_ids = create_metadata_id_list(metadata_path) - assert "test" in metadata_ids - assert "tast" in metadata_ids + assert metadata_ids == ["001", "002", "003", "004", "005"] def test_match_files_to_metadata(): @@ -156,19 +157,24 @@ def test_match_metadata_to_files(): assert "test" in file_matches -def test_update_metadata_csv(output_dir): +def test_update_metadata_csv( + output_dir, mocked_s3_bucket_bitstreams, source_metadata_csv_with_bitstreams +): """Test update_metadata_csv function.""" - metadata_matches = ["test"] + updated_records = [] + expected_records = [] update_metadata_csv( - "tests/fixtures/aspace_metadata_delimited.csv", - output_dir, - metadata_matches, - {"test": ["/test/test_01.pdf"]}, + metadata_csv="tests/fixtures/source_metadata.csv", + output_directory=output_dir, + metadata_matches=["001", "002", "003", "004", "005"], + files_dict=mocked_s3_bucket_bitstreams, ) - with open(f"{output_dir}updated-aspace_metadata_delimited.csv") as csvfile: + with open(f"{output_dir}/updated-source_metadata.csv") as csvfile: reader = csv.DictReader(csvfile) for row in reader: - assert row["uri"] == "/repo/0/ao/123" - assert row["title"] == "Test Item" - assert row["file_identifier"] == "test" - assert row["bitstreams"] == "['/test/test_01.pdf']" + updated_records.append(row) + + for row in source_metadata_csv_with_bitstreams: + expected_records.append(row) + + assert updated_records == expected_records diff --git a/tests/test_models.py b/tests/test_models.py index 6854570..2058d70 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,134 +1,130 @@ import attr from moto import mock_aws -from dsaps import models +from dsaps.dspace import Bitstream, Collection, Item, MetadataEntry -def test_authenticate(client): - """Test authenticate method.""" +def test_dspace_client_authenticate(dspace_client): email = "test@test.mock" password = "1234" - client.authenticate(email, password) - assert client.user_full_name == "User Name" - assert client.cookies == {"JSESSIONID": "11111111"} - - -def test_filtered_item_search(client): - """Test filtered_item_search method.""" - key = "dc.title" - string = "test" - query_type = "contains" - item_links = client.filtered_item_search( - key, string, query_type, selected_collections="" + dspace_client.authenticate(email, password) + assert dspace_client.user_full_name == "User Name" + assert dspace_client.cookies == {"JSESSIONID": "11111111"} + + +def test_filtered_item_search(dspace_client): + item_links = dspace_client.filtered_item_search( + key="dc.title", string="test", query_type="contains", selected_collections="" ) assert "1234" in item_links -def test_get_uuid_from_handle(client): - """Test get_uuid_from_handle method.""" - id = client.get_uuid_from_handle("111.1111") +def test_get_uuid_from_handle(dspace_client): + id = dspace_client.get_uuid_from_handle("111.1111") assert id == "a1b2" -def test_get_record(client): - """Test get_record method.""" - rec_obj = client.get_record("123", "items") - assert attr.asdict(rec_obj)["metadata"] == {"title": "Sample title"} +def test_get_record(dspace_client): + dspace_item = dspace_client.get_record("123", "items") + assert attr.asdict(dspace_item)["metadata"] == {"title": "Sample title"} -def test_post_bitstream(client, mocked_s3): - """Test post_bitstream method.""" +def test_post_bitstream(dspace_client, mocked_s3_bucket): item_uuid = "e5f6" - bitstream = models.Bitstream( - name="test_01.pdf", file_path="s3://test-bucket/test_01.pdf" + bitstream = Bitstream( + name="aaaa_001_01.pdf", file_path="s3://mocked-bucket/one-to-one/aaaa_001_01.pdf" ) - bit_uuid = client.post_bitstream(item_uuid, bitstream) + bit_uuid = dspace_client.post_bitstream(item_uuid, bitstream) assert bit_uuid == "g7h8" -def test_post_coll_to_comm(client): - """Test post_coll_to_comm method.""" +def test_post_collection_to_community(dspace_client): comm_handle = "111.1111" coll_name = "Test Collection" - coll_uuid = client.post_coll_to_comm(comm_handle, coll_name) + coll_uuid = dspace_client.post_collection_to_community(comm_handle, coll_name) assert coll_uuid == "c3d4" @mock_aws -def test_post_item_to_collection(client, mocked_s3): - """Test post_item_to_collection method.""" - item = models.Item() +def test_post_item_to_collection(dspace_client, mocked_s3_bucket): + item = Item() item.bitstreams = [ - models.Bitstream(name="test_01.pdf", file_path="s3://test-bucket/test_01.pdf") + Bitstream(name="aaaa_001_01.pdf", file_path="s3://mocked-bucket/aaaa_001_01.pdf") ] item.metadata = [ - models.MetadataEntry(key="file_identifier", value="test"), - models.MetadataEntry( - key="dc.title", value="Monitoring Works: Getting Teachers", language="en_US" - ), - models.MetadataEntry(key="dc.relation.isversionof", value="repo/0/ao/123"), + MetadataEntry(key="dc.title", value="Title 1", language="en_US"), + MetadataEntry(key="dc.contributor.author", value="May Smith", language=None), ] - coll_uuid = "c3d4" - item_uuid, item_handle = client.post_item_to_collection(coll_uuid, item) + collection_uuid = "c3d4" + item_uuid, item_handle = dspace_client.post_item_to_collection(collection_uuid, item) assert item_uuid == "e5f6" assert item_handle == "222.2222" -def test__populate_class_instance(client): - """Test _populate_class_instance method.""" - class_type = models.Collection - rec_obj = {"name": "Test title", "type": "collection", "items": []} - rec_obj = client._populate_class_instance(class_type, rec_obj) - assert type(rec_obj) is class_type - assert rec_obj.name == "Test title" +def test_populate_class_instance(dspace_client): + class_type = Collection + dspace_collection = {"name": "Test title", "type": "collection", "items": []} + dspace_collection = dspace_client._populate_class_instance( + class_type, dspace_collection + ) + assert type(dspace_collection) is class_type + assert dspace_collection.name == "Test title" -def test__build_uuid_list(client): - """Test _build_uuid_list method.""" - rec_obj = {"items": [{"uuid": "1234"}]} +def test_build_uuid_list(dspace_client): + dspace_items = {"items": [{"uuid": "1234"}]} children = "items" - child_list = client._build_uuid_list(rec_obj, children) + child_list = dspace_client._build_uuid_list(dspace_items, children) assert "1234" in child_list -def test_collection_create_metadata_for_items_from_csv( - aspace_delimited_csv, aspace_mapping -): - collection = models.Collection.create_metadata_for_items_from_csv( - aspace_delimited_csv, aspace_mapping - ) - assert 2 == len(collection.items) +def test_collection_add_items(source_metadata_csv, source_config): + collection = Collection.add_items(source_metadata_csv, source_config["mapping"]) + assert len(collection.items) == 5 + + +def test_item_create(source_metadata_csv_with_bitstreams, source_config): + record = next(source_metadata_csv_with_bitstreams) + assert attr.asdict(Item.create(record, source_config["mapping"])) == { + "uuid": None, + "name": None, + "handle": None, + "link": None, + "type": None, + "metadata": [ + {"key": "dc.title", "value": "Title 1", "language": "en_US"}, + {"key": "dc.contributor.author", "value": "May Smith", "language": None}, + ], + "bitstreams": [ + { + "name": "aaaa_001_01.pdf", + "file_path": "s3://mocked-bucket/one-to-one/aaaa_001_01.pdf", + } + ], + "item_identifier": "001", + "source_system_identifier": None, + } + + +def test_item_get_ids(source_metadata_csv, source_config): + record = next(source_metadata_csv) + assert Item.get_ids(record, source_config["mapping"]) == {"item_identifier": "001"} + + +def test_item_get_bitstreams(source_metadata_csv_with_bitstreams, source_config): + record = next(source_metadata_csv_with_bitstreams) + assert Item.get_bitstreams(record) == [ + Bitstream( + name="aaaa_001_01.pdf", + file_path="s3://mocked-bucket/one-to-one/aaaa_001_01.pdf", + ) + ] -@mock_aws -def test_collection_post_items( - mocked_s3, - client, - aspace_delimited_csv, - aspace_mapping, -): - collection = models.Collection.create_metadata_for_items_from_csv( - aspace_delimited_csv, aspace_mapping - ) - collection.uuid = "c3d4" - items = collection.post_items(client) - for item in items: - assert item.handle == "222.2222" - assert item.uuid == "e5f6" - - -def test_item_metadata_from_csv_row(aspace_delimited_csv, aspace_mapping): - row = next(aspace_delimited_csv) - item = models.Item.metadata_from_csv_row(row, aspace_mapping) - assert attr.asdict(item)["metadata"] == [ - {"key": "dc.title", "value": "Tast Item", "language": "en_US"}, - {"key": "dc.contributor.author", "value": "Smith, John", "language": None}, - {"key": "dc.contributor.author", "value": "Smith, Jane", "language": None}, - { - "key": "dc.description", - "value": "More info at /repo/0/ao/456", - "language": "en_US", - }, - {"key": "dc.rights", "value": "Totally Free", "language": "en_US"}, - {"key": "dc.rights.uri", "value": "http://free.gov", "language": None}, +def test_item_get_metadata(source_metadata_csv, source_config): + record = next(source_metadata_csv) + metadata = Item.get_metadata(record, source_config["mapping"]) + assert [attr.asdict(m) for m in metadata] == [ + {"key": "dc.title", "value": "Title 1", "language": "en_US"}, + {"key": "dc.contributor.author", "value": "May Smith", "language": None}, ]