From 72adbffd168d5f59cce83306917d2aeffd0b2602 Mon Sep 17 00:00:00 2001 From: Kevin Schaper Date: Wed, 10 Apr 2024 19:49:05 -0700 Subject: [PATCH 1/6] Set knowledge_level & agent_type in all ingests --- poetry.lock | 41 ++++--------------- pyproject.toml | 2 +- src/monarch_ingest/ingests/alliance/gene.py | 5 ++- .../ingests/alliance/gene_to_expression.py | 10 +++-- .../ingests/alliance/gene_to_expression.yaml | 2 + .../ingests/alliance/gene_to_phenotype.py | 7 +++- .../ingests/alliance/gene_to_phenotype.yaml | 2 + .../ingests/bgee/gene_to_expression.yaml | 2 + .../ingests/bgee/gene_to_expression_utils.py | 6 ++- src/monarch_ingest/ingests/biogrid/biogrid.py | 6 ++- .../ingests/biogrid/biogrid.yaml | 2 + .../ingests/biogrid/biogrid_util.py | 2 +- .../ingests/ctd/chemical_to_disease.py | 7 +++- .../ingests/ctd/chemical_to_disease.yaml | 2 + .../ingests/dictybase/gene_to_phenotype.py | 7 +++- .../ingests/dictybase/gene_to_phenotype.yaml | 2 + .../ingests/flybase/publication_to_gene.py | 5 ++- .../ingests/flybase/publication_to_gene.yaml | 2 + src/monarch_ingest/ingests/go/annotation.py | 5 ++- src/monarch_ingest/ingests/go/annotation.yaml | 2 + .../hpoa/disease_mode_of_inheritance.py | 8 ++-- .../hpoa/disease_mode_of_inheritance.yaml | 2 + .../ingests/hpoa/disease_to_phenotype.py | 8 +++- .../ingests/hpoa/disease_to_phenotype.yaml | 2 + .../ingests/hpoa/gene_to_disease.py | 6 ++- .../ingests/hpoa/gene_to_disease.yaml | 2 + .../ingests/hpoa/gene_to_phenotype.py | 7 +++- .../ingests/hpoa/gene_to_phenotype.yaml | 2 + .../ingests/mgi/publication_to_gene.py | 7 +++- .../ingests/mgi/publication_to_gene.yaml | 2 + .../ingests/panther/genome_orthologs.py | 6 ++- .../ingests/panther/genome_orthologs.yaml | 2 + .../ingests/pombase/gene_to_phenotype.py | 7 +++- .../ingests/pombase/gene_to_phenotype.yaml | 2 + .../ingests/reactome/chemical_to_pathway.py | 6 ++- .../ingests/reactome/chemical_to_pathway.yaml | 2 + .../ingests/reactome/gene_to_pathway.py | 4 +- .../ingests/reactome/gene_to_pathway.yaml | 2 + .../ingests/rgd/publication_to_gene.py | 7 +++- .../ingests/rgd/publication_to_gene.yaml | 2 + .../ingests/sgd/publication_to_gene.py | 7 +++- .../ingests/sgd/publication_to_gene.yaml | 2 + .../ingests/string/protein_links.py | 6 ++- .../ingests/string/protein_links.yaml | 2 + .../ingests/xenbase/gene_to_phenotype.py | 6 ++- .../ingests/xenbase/gene_to_phenotype.yaml | 2 + .../ingests/xenbase/non_entrez_orthologs.py | 14 +++++-- .../ingests/xenbase/non_entrez_orthologs.yaml | 2 + .../ingests/xenbase/orthologs.py | 6 ++- .../ingests/xenbase/orthologs.yaml | 2 + .../ingests/xenbase/publication_to_gene.py | 7 +++- .../ingests/xenbase/publication_to_gene.yaml | 2 + .../ingests/zfin/gene_to_phenotype.py | 7 +++- .../ingests/zfin/gene_to_phenotype.yaml | 2 + .../ingests/zfin/publication_to_gene.py | 7 +++- .../ingests/zfin/publication_to_gene.yaml | 2 + 56 files changed, 192 insertions(+), 89 deletions(-) diff --git a/poetry.lock b/poetry.lock index 1d973eb9..70afe73d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -129,8 +129,8 @@ docs = [] [package.source] type = "git" url = "https://github.com/biolink/biolink-model" -reference = "v4.2.0-rc.2" -resolved_reference = "16e0e617de6d3217b37f0514a7f74e0f1ef2edf0" +reference = "master" +resolved_reference = "d44c32ffef2c9141fd19d62a4a25b9ee945a5b8e" [[package]] name = "black" @@ -1108,17 +1108,6 @@ files = [ {file = "ijson-3.2.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4a3a6a2fbbe7550ffe52d151cf76065e6b89cfb3e9d0463e49a7e322a25d0426"}, {file = "ijson-3.2.3-cp311-cp311-win32.whl", hash = "sha256:6a4db2f7fb9acfb855c9ae1aae602e4648dd1f88804a0d5cfb78c3639bcf156c"}, {file = "ijson-3.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:ccd6be56335cbb845f3d3021b1766299c056c70c4c9165fb2fbe2d62258bae3f"}, - {file = "ijson-3.2.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:055b71bbc37af5c3c5861afe789e15211d2d3d06ac51ee5a647adf4def19c0ea"}, - {file = "ijson-3.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c075a547de32f265a5dd139ab2035900fef6653951628862e5cdce0d101af557"}, - {file = "ijson-3.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:457f8a5fc559478ac6b06b6d37ebacb4811f8c5156e997f0d87d708b0d8ab2ae"}, - {file = "ijson-3.2.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9788f0c915351f41f0e69ec2618b81ebfcf9f13d9d67c6d404c7f5afda3e4afb"}, - {file = "ijson-3.2.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fa234ab7a6a33ed51494d9d2197fb96296f9217ecae57f5551a55589091e7853"}, - {file = "ijson-3.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdd0dc5da4f9dc6d12ab6e8e0c57d8b41d3c8f9ceed31a99dae7b2baf9ea769a"}, - {file = "ijson-3.2.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c6beb80df19713e39e68dc5c337b5c76d36ccf69c30b79034634e5e4c14d6904"}, - {file = "ijson-3.2.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:a2973ce57afb142d96f35a14e9cfec08308ef178a2c76b8b5e1e98f3960438bf"}, - {file = "ijson-3.2.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:105c314fd624e81ed20f925271ec506523b8dd236589ab6c0208b8707d652a0e"}, - {file = "ijson-3.2.3-cp312-cp312-win32.whl", hash = "sha256:ac44781de5e901ce8339352bb5594fcb3b94ced315a34dbe840b4cff3450e23b"}, - {file = "ijson-3.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:0567e8c833825b119e74e10a7c29761dc65fcd155f5d4cb10f9d3b8916ef9912"}, {file = "ijson-3.2.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:eeb286639649fb6bed37997a5e30eefcacddac79476d24128348ec890b2a0ccb"}, {file = "ijson-3.2.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:396338a655fb9af4ac59dd09c189885b51fa0eefc84d35408662031023c110d1"}, {file = "ijson-3.2.3-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e0243d166d11a2a47c17c7e885debf3b19ed136be2af1f5d1c34212850236ac"}, @@ -2685,7 +2674,6 @@ description = "A pure Python implementation of the trie data structure." optional = false python-versions = "*" files = [ - {file = "PyTrie-0.4.0-py3-none-any.whl", hash = "sha256:f687c224ee8c66cda8e8628a903011b692635ffbb08d4b39c5f92b18eb78c950"}, {file = "PyTrie-0.4.0.tar.gz", hash = "sha256:8f4488f402d3465993fb6b6efa09866849ed8cda7903b50647b7d0342b805379"}, ] @@ -2738,7 +2726,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -2746,16 +2733,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -2772,7 +2751,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -2780,7 +2758,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -3077,24 +3054,24 @@ python-versions = ">=3.6" files = [ {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b42169467c42b692c19cf539c38d4602069d8c1505e97b86387fcf7afb766e1d"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:07238db9cbdf8fc1e9de2489a4f68474e70dffcb32232db7c08fa61ca0c7c462"}, + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:d92f81886165cb14d7b067ef37e142256f1c6a90a65cd156b063a43da1708cfd"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:fff3573c2db359f091e1589c3d7c5fc2f86f5bdb6f24252c2d8e539d4e45f412"}, - {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:aa2267c6a303eb483de8d02db2871afb5c5fc15618d894300b88958f729ad74f"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:840f0c7f194986a63d2c2465ca63af8ccbbc90ab1c6001b1978f05119b5e7334"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:024cfe1fc7c7f4e1aff4a81e718109e13409767e4f871443cbff3dba3578203d"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-win32.whl", hash = "sha256:c69212f63169ec1cfc9bb44723bf2917cbbd8f6191a00ef3410f5a7fe300722d"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-win_amd64.whl", hash = "sha256:cabddb8d8ead485e255fe80429f833172b4cadf99274db39abc080e068cbcc31"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:bef08cd86169d9eafb3ccb0a39edb11d8e25f3dae2b28f5c52fd997521133069"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:b16420e621d26fdfa949a8b4b47ade8810c56002f5389970db4ddda51dbff248"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:b5edda50e5e9e15e54a6a8a0070302b00c518a9d32accc2346ad6c984aacd279"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:25c515e350e5b739842fc3228d662413ef28f295791af5e5110b543cf0b57d9b"}, - {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-manylinux_2_24_aarch64.whl", hash = "sha256:1707814f0d9791df063f8c19bb51b0d1278b8e9a2353abbb676c2f685dee6afe"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:46d378daaac94f454b3a0e3d8d78cafd78a026b1d71443f4966c696b48a6d899"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:09b055c05697b38ecacb7ac50bdab2240bfca1a0c4872b0fd309bb07dc9aa3a9"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-win32.whl", hash = "sha256:53a300ed9cea38cf5a2a9b069058137c2ca1ce658a874b79baceb8f892f915a7"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-win_amd64.whl", hash = "sha256:c2a72e9109ea74e511e29032f3b670835f8a59bbdc9ce692c5b4ed91ccf1eedb"}, {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ebc06178e8821efc9692ea7544aa5644217358490145629914d8020042c24aa1"}, {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:edaef1c1200c4b4cb914583150dcaa3bc30e592e907c01117c08b13a07255ec2"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:7048c338b6c86627afb27faecf418768acb6331fc24cfa56c93e8c9780f815fa"}, {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d176b57452ab5b7028ac47e7b3cf644bcfdc8cacfecf7e71759f7f51a59e5c92"}, - {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-manylinux_2_24_aarch64.whl", hash = "sha256:1dc67314e7e1086c9fdf2680b7b6c2be1c0d8e3a8279f2e993ca2a7545fecf62"}, {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:3213ece08ea033eb159ac52ae052a4899b56ecc124bb80020d9bbceeb50258e9"}, {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aab7fd643f71d7946f2ee58cc88c9b7bfc97debd71dcc93e03e2d174628e7e2d"}, {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-win32.whl", hash = "sha256:5c365d91c88390c8d0a8545df0b5857172824b1c604e867161e6b3d59a827eaa"}, @@ -3102,7 +3079,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.8-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a5aa27bad2bb83670b71683aae140a1f52b0857a2deff56ad3f6c13a017a26ed"}, {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c58ecd827313af6864893e7af0a3bb85fd529f862b6adbefe14643947cfe2942"}, {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-macosx_12_0_arm64.whl", hash = "sha256:f481f16baec5290e45aebdc2a5168ebc6d35189ae6fea7a58787613a25f6e875"}, - {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-manylinux_2_24_aarch64.whl", hash = "sha256:77159f5d5b5c14f7c34073862a6b7d34944075d9f93e681638f6d753606c6ce6"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:3fcc54cb0c8b811ff66082de1680b4b14cf8a81dce0d4fbf665c2265a81e07a1"}, {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7f67a1ee819dc4562d444bbafb135832b0b909f81cc90f7aa00260968c9ca1b3"}, {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:4ecbf9c3e19f9562c7fdd462e8d18dd902a47ca046a2e64dba80699f0b6c09b7"}, {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:87ea5ff66d8064301a154b3933ae406b0863402a799b16e4a1d24d9fbbcbe0d3"}, @@ -3110,7 +3087,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-win_amd64.whl", hash = "sha256:3f215c5daf6a9d7bbed4a0a4f760f3113b10e82ff4c5c44bec20a68c8014f675"}, {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1b617618914cb00bf5c34d4357c37aa15183fa229b24767259657746c9077615"}, {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:a6a9ffd280b71ad062eae53ac1659ad86a17f59a0fdc7699fd9be40525153337"}, - {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-manylinux_2_24_aarch64.whl", hash = "sha256:305889baa4043a09e5b76f8e2a51d4ffba44259f6b4c72dec8ca56207d9c6fe1"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:665f58bfd29b167039f714c6998178d27ccd83984084c286110ef26b230f259f"}, {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:700e4ebb569e59e16a976857c8798aee258dceac7c7d6b50cab63e080058df91"}, {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:e2b4c44b60eadec492926a7270abb100ef9f72798e18743939bdbf037aab8c28"}, {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e79e5db08739731b0ce4850bed599235d601701d5694c36570a99a0c5ca41a9d"}, @@ -3118,7 +3095,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-win_amd64.whl", hash = "sha256:56f4252222c067b4ce51ae12cbac231bce32aee1d33fbfc9d17e5b8d6966c312"}, {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:03d1162b6d1df1caa3a4bd27aa51ce17c9afc2046c31b0ad60a0a96ec22f8001"}, {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:bba64af9fa9cebe325a62fa398760f5c7206b215201b0ec825005f1b18b9bccf"}, - {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-manylinux_2_24_aarch64.whl", hash = "sha256:a1a45e0bb052edf6a1d3a93baef85319733a888363938e1fc9924cb00c8df24c"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:9eb5dee2772b0f704ca2e45b1713e4e5198c18f515b52743576d196348f374d3"}, {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:da09ad1c359a728e112d60116f626cc9f29730ff3e0e7db72b9a2dbc2e4beed5"}, {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:184565012b60405d93838167f425713180b949e9d8dd0bbc7b49f074407c5a8b"}, {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a75879bacf2c987c003368cf14bed0ffe99e8e85acfa6c0bfffc21a090f16880"}, @@ -4008,4 +3985,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.12" -content-hash = "d5ffccd66fb9187d8ab37ea0d3bb1f6e8a6818061ffadf1d014d343970381677" +content-hash = "c754cc30291543a0780f66564df458a651a2a56b8615a26375e5efe9b8dc4fcd" diff --git a/pyproject.toml b/pyproject.toml index ef47b779..8a8ca346 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ packages = [ python = ">=3.10,<3.12" # biolink-model = "^4.2.0" # When 4.2.0 (or any release after 4.1.6) is released, we can remove the git dependency -biolink-model = { git = "https://github.com/biolink/biolink-model", tag = "v4.2.0-rc.2" } +biolink-model = { git = "https://github.com/biolink/biolink-model", branch = "master" } bmt = "^1.0.15" cat-merge = "0.2.1" closurizer = "0.5.1" diff --git a/src/monarch_ingest/ingests/alliance/gene.py b/src/monarch_ingest/ingests/alliance/gene.py index e3180294..62b9d107 100644 --- a/src/monarch_ingest/ingests/alliance/gene.py +++ b/src/monarch_ingest/ingests/alliance/gene.py @@ -48,7 +48,7 @@ id=gene_id, symbol=row["symbol"], name=row["symbol"], - full_name=row["name"], + full_name=row["name"].replace("\r",""), # Replacement to remove stray carriage returns in XenBase files # No place in the schema for gene type (SO term) right now # type=row["soTermId"], in_taxon=[in_taxon], @@ -62,6 +62,7 @@ for xref in row["basicGeneticEntity"]["crossReferences"] ] if "synonyms" in row["basicGeneticEntity"].keys(): - gene.synonym = row["basicGeneticEntity"]["synonyms"] + # more handling for errant carriage returns + gene.synonym = [synonym.replace("\r","") for synonym in row["basicGeneticEntity"]["synonyms"] ] koza_app.write(gene) diff --git a/src/monarch_ingest/ingests/alliance/gene_to_expression.py b/src/monarch_ingest/ingests/alliance/gene_to_expression.py index 3457fc42..cb6d0dd0 100644 --- a/src/monarch_ingest/ingests/alliance/gene_to_expression.py +++ b/src/monarch_ingest/ingests/alliance/gene_to_expression.py @@ -4,7 +4,7 @@ from koza.cli_runner import get_koza_app from source_translation import source_map -from biolink_model.datamodel.pydanticmodel_v2 import GeneToExpressionSiteAssociation +from biolink_model.datamodel.pydanticmodel_v2 import GeneToExpressionSiteAssociation, KnowledgeLevelEnum, AgentTypeEnum from monarch_ingest.ingests.alliance.utils import get_data @@ -54,7 +54,9 @@ qualifiers=([get_data(row, "assay")] if get_data(row, "assay") else None), publications=publication_ids, aggregator_knowledge_source=["infores:monarchinitiative", "infores:alliancegenome"], - primary_knowledge_source=source + primary_knowledge_source=source, + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) ) @@ -71,7 +73,9 @@ qualifiers=([get_data(row, "assay")] if get_data(row, "assay") else None), publications=publication_ids, aggregator_knowledge_source=["infores:monarchinitiative", "infores:alliancegenome"], - primary_knowledge_source=source + primary_knowledge_source=source, + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) ) else: diff --git a/src/monarch_ingest/ingests/alliance/gene_to_expression.yaml b/src/monarch_ingest/ingests/alliance/gene_to_expression.yaml index 54f2a0df..57735bc7 100644 --- a/src/monarch_ingest/ingests/alliance/gene_to_expression.yaml +++ b/src/monarch_ingest/ingests/alliance/gene_to_expression.yaml @@ -50,5 +50,7 @@ edge_properties: - 'publications' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/alliance/gene_to_phenotype.py b/src/monarch_ingest/ingests/alliance/gene_to_phenotype.py index feb6df44..5f69dba7 100644 --- a/src/monarch_ingest/ingests/alliance/gene_to_phenotype.py +++ b/src/monarch_ingest/ingests/alliance/gene_to_phenotype.py @@ -5,7 +5,7 @@ from koza.cli_runner import get_koza_app from source_translation import source_map -from biolink_model.datamodel.pydanticmodel_v2 import GeneToPhenotypicFeatureAssociation +from biolink_model.datamodel.pydanticmodel_v2 import GeneToPhenotypicFeatureAssociation, KnowledgeLevelEnum, AgentTypeEnum from loguru import logger @@ -41,7 +41,10 @@ object=phenotypic_feature_id, publications=[row["evidence"]["publicationId"]], aggregator_knowledge_source=["infores:monarchinitiative", "infores:alliancegenome"], - primary_knowledge_source=source + primary_knowledge_source=source, + knowledge_level = KnowledgeLevelEnum.knowledge_assertion, + agent_type = AgentTypeEnum.manual_agent + ) if "conditionRelations" in row.keys() and row["conditionRelations"] is not None: diff --git a/src/monarch_ingest/ingests/alliance/gene_to_phenotype.yaml b/src/monarch_ingest/ingests/alliance/gene_to_phenotype.yaml index 5701864f..61a69598 100644 --- a/src/monarch_ingest/ingests/alliance/gene_to_phenotype.yaml +++ b/src/monarch_ingest/ingests/alliance/gene_to_phenotype.yaml @@ -35,5 +35,7 @@ edge_properties: - 'publications' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/bgee/gene_to_expression.yaml b/src/monarch_ingest/ingests/bgee/gene_to_expression.yaml index 3a21ad50..e099e944 100644 --- a/src/monarch_ingest/ingests/bgee/gene_to_expression.yaml +++ b/src/monarch_ingest/ingests/bgee/gene_to_expression.yaml @@ -75,5 +75,7 @@ edge_properties: - 'object' - 'primary_knowledge_source' - 'aggregator_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/bgee/gene_to_expression_utils.py b/src/monarch_ingest/ingests/bgee/gene_to_expression_utils.py index c21626f4..a864d78c 100644 --- a/src/monarch_ingest/ingests/bgee/gene_to_expression_utils.py +++ b/src/monarch_ingest/ingests/bgee/gene_to_expression_utils.py @@ -2,7 +2,7 @@ import pandas as pd from typing import Dict, List, Union from koza.app import KozaApp -from biolink_model.datamodel.pydanticmodel_v2 import GeneToExpressionSiteAssociation +from biolink_model.datamodel.pydanticmodel_v2 import GeneToExpressionSiteAssociation, KnowledgeLevelEnum, AgentTypeEnum def filter_group_by_rank(rows: List, col: str, largest_n: int = 0, smallest_n: int = 0) -> List[Dict]: @@ -42,7 +42,9 @@ def write_group(rows: List, koza_app: KozaApp): predicate='biolink:expressed_in', object=row['Anatomical entity ID'], primary_knowledge_source="infores:bgee", - aggregator_knowledge_source=["infores:monarchinitiative"]) + aggregator_knowledge_source=["infores:monarchinitiative"], + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.not_provided) koza_app.write(association) diff --git a/src/monarch_ingest/ingests/biogrid/biogrid.py b/src/monarch_ingest/ingests/biogrid/biogrid.py index 23bc49dd..7137e9fa 100644 --- a/src/monarch_ingest/ingests/biogrid/biogrid.py +++ b/src/monarch_ingest/ingests/biogrid/biogrid.py @@ -1,6 +1,6 @@ import uuid from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import PairwiseGeneToGeneInteraction +from biolink_model.datamodel.pydanticmodel_v2 import PairwiseGeneToGeneInteraction, KnowledgeLevelEnum, AgentTypeEnum from biogrid_util import get_gene_id, get_evidence, get_publication_ids koza_app = get_koza_app("biogrid") @@ -25,7 +25,9 @@ has_evidence=evidence, publications=publications, primary_knowledge_source="infores:biogrid", - aggregator_knowledge_source=["infores:monarchinitiative"] + aggregator_knowledge_source=["infores:monarchinitiative"], + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.not_provided ) koza_app.write(association) diff --git a/src/monarch_ingest/ingests/biogrid/biogrid.yaml b/src/monarch_ingest/ingests/biogrid/biogrid.yaml index 22fff56f..87517ce5 100644 --- a/src/monarch_ingest/ingests/biogrid/biogrid.yaml +++ b/src/monarch_ingest/ingests/biogrid/biogrid.yaml @@ -40,5 +40,7 @@ edge_properties: - 'publications' - 'primary_knowledge_source' - 'aggregator_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/biogrid/biogrid_util.py b/src/monarch_ingest/ingests/biogrid/biogrid_util.py index a938e822..1c670cd2 100644 --- a/src/monarch_ingest/ingests/biogrid/biogrid_util.py +++ b/src/monarch_ingest/ingests/biogrid/biogrid_util.py @@ -55,7 +55,7 @@ def get_evidence(methods: str) -> Optional[List[str]]: method = method.rstrip(")").split('(')[-1] if method not in EVIDENCE_CODE_MAPPINGS.keys(): err_msg = f"Unknown interaction detection method '{method}'. " +\ - "Assigning default code ECO:0000006 == 'experimental evidence'." + "Assigning default code ECO:0000006 == 'experimental evidence', the ECO root." logger.warning(err_msg) EVIDENCE_CODE_MAPPINGS[method] = "ECO:0000006" diff --git a/src/monarch_ingest/ingests/ctd/chemical_to_disease.py b/src/monarch_ingest/ingests/ctd/chemical_to_disease.py index 45d63b2c..efe5fa3c 100644 --- a/src/monarch_ingest/ingests/ctd/chemical_to_disease.py +++ b/src/monarch_ingest/ingests/ctd/chemical_to_disease.py @@ -2,7 +2,8 @@ from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import ChemicalToDiseaseOrPhenotypicFeatureAssociation +from biolink_model.datamodel.pydanticmodel_v2 import ChemicalToDiseaseOrPhenotypicFeatureAssociation, \ + KnowledgeLevelEnum, AgentTypeEnum from monarch_ingest.constants import BIOLINK_TREATS_OR_APPLIED_OR_STUDIED_TO_TREAT koza_app = get_koza_app("ctd_chemical_to_disease") @@ -25,7 +26,9 @@ object=disease_id, publications=["PMID:" + p for p in row['PubMedIDs'].split("|")], aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:ctd" + primary_knowledge_source="infores:ctd", + knowledge_level=KnowledgeLevelEnum.not_provided, + agent_type=AgentTypeEnum.not_provided ) koza_app.write(association) diff --git a/src/monarch_ingest/ingests/ctd/chemical_to_disease.yaml b/src/monarch_ingest/ingests/ctd/chemical_to_disease.yaml index 7dc798f4..2dd350c0 100644 --- a/src/monarch_ingest/ingests/ctd/chemical_to_disease.yaml +++ b/src/monarch_ingest/ingests/ctd/chemical_to_disease.yaml @@ -35,3 +35,5 @@ edge_properties: - 'publications' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' diff --git a/src/monarch_ingest/ingests/dictybase/gene_to_phenotype.py b/src/monarch_ingest/ingests/dictybase/gene_to_phenotype.py index 89f93e82..b4036947 100644 --- a/src/monarch_ingest/ingests/dictybase/gene_to_phenotype.py +++ b/src/monarch_ingest/ingests/dictybase/gene_to_phenotype.py @@ -4,7 +4,8 @@ from koza.cli_runner import get_koza_app from monarch_ingest.ingests.dictybase.utils import parse_gene_id, parse_phenotypes -from biolink_model.datamodel.pydanticmodel_v2 import GeneToPhenotypicFeatureAssociation +from biolink_model.datamodel.pydanticmodel_v2 import GeneToPhenotypicFeatureAssociation, KnowledgeLevelEnum, \ + AgentTypeEnum koza_app = get_koza_app("dictybase_gene_to_phenotype") @@ -29,7 +30,9 @@ predicate='biolink:has_phenotype', object=phenotype_id, aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:dictybase" + primary_knowledge_source="infores:dictybase", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) koza_app.write(association) diff --git a/src/monarch_ingest/ingests/dictybase/gene_to_phenotype.yaml b/src/monarch_ingest/ingests/dictybase/gene_to_phenotype.yaml index e9482365..75b6ee5a 100644 --- a/src/monarch_ingest/ingests/dictybase/gene_to_phenotype.yaml +++ b/src/monarch_ingest/ingests/dictybase/gene_to_phenotype.yaml @@ -31,5 +31,7 @@ edge_properties: - 'object' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/flybase/publication_to_gene.py b/src/monarch_ingest/ingests/flybase/publication_to_gene.py index 14a65aa2..932f81ee 100644 --- a/src/monarch_ingest/ingests/flybase/publication_to_gene.py +++ b/src/monarch_ingest/ingests/flybase/publication_to_gene.py @@ -2,7 +2,8 @@ from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import InformationContentEntityToNamedThingAssociation +from biolink_model.datamodel.pydanticmodel_v2 import InformationContentEntityToNamedThingAssociation, AgentTypeEnum, \ + KnowledgeLevelEnum koza_app = get_koza_app("flybase_publication_to_gene") @@ -24,6 +25,8 @@ object=publication_id, aggregator_knowledge_source=["infores:monarchinitiative"], primary_knowledge_source="infores:flybase", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) koza_app.write(association) diff --git a/src/monarch_ingest/ingests/flybase/publication_to_gene.yaml b/src/monarch_ingest/ingests/flybase/publication_to_gene.yaml index 61ae01c1..c2bb199e 100644 --- a/src/monarch_ingest/ingests/flybase/publication_to_gene.yaml +++ b/src/monarch_ingest/ingests/flybase/publication_to_gene.yaml @@ -27,5 +27,7 @@ edge_properties: - 'object' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/go/annotation.py b/src/monarch_ingest/ingests/go/annotation.py index a9721fb8..3c85ca91 100644 --- a/src/monarch_ingest/ingests/go/annotation.py +++ b/src/monarch_ingest/ingests/go/annotation.py @@ -6,6 +6,7 @@ """ import uuid +from biolink_model.datamodel.pydanticmodel_v2 import KnowledgeLevelEnum, AgentTypeEnum from koza.cli_runner import get_koza_app from monarch_ingest.ingests.go.annotation_utils import ( @@ -119,7 +120,9 @@ has_evidence=[eco_term], # subject_context_qualifier=ncbitaxa, # Biolink Pydantic model support missing for this slot aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source=assigned_by + primary_knowledge_source=assigned_by, + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) # Write the captured Association out diff --git a/src/monarch_ingest/ingests/go/annotation.yaml b/src/monarch_ingest/ingests/go/annotation.yaml index f17743b1..b8be290b 100644 --- a/src/monarch_ingest/ingests/go/annotation.yaml +++ b/src/monarch_ingest/ingests/go/annotation.yaml @@ -85,5 +85,7 @@ edge_properties: - 'object' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/hpoa/disease_mode_of_inheritance.py b/src/monarch_ingest/ingests/hpoa/disease_mode_of_inheritance.py index b8239c70..b40b85a3 100644 --- a/src/monarch_ingest/ingests/hpoa/disease_mode_of_inheritance.py +++ b/src/monarch_ingest/ingests/hpoa/disease_mode_of_inheritance.py @@ -25,8 +25,8 @@ from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import DiseaseOrPhenotypicFeatureToGeneticInheritanceAssociation - +from biolink_model.datamodel.pydanticmodel_v2 import DiseaseOrPhenotypicFeatureToGeneticInheritanceAssociation, \ + KnowledgeLevelEnum, AgentTypeEnum from loguru import logger @@ -71,7 +71,9 @@ publications=publications, has_evidence=[evidence_curie], aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:hpo-annotations" + primary_knowledge_source="infores:hpo-annotations", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) koza_app.write(association) diff --git a/src/monarch_ingest/ingests/hpoa/disease_mode_of_inheritance.yaml b/src/monarch_ingest/ingests/hpoa/disease_mode_of_inheritance.yaml index 9b4770b8..545fc58d 100644 --- a/src/monarch_ingest/ingests/hpoa/disease_mode_of_inheritance.yaml +++ b/src/monarch_ingest/ingests/hpoa/disease_mode_of_inheritance.yaml @@ -56,3 +56,5 @@ edge_properties: - 'has_evidence' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' diff --git a/src/monarch_ingest/ingests/hpoa/disease_to_phenotype.py b/src/monarch_ingest/ingests/hpoa/disease_to_phenotype.py index 4802c26e..974aefd3 100644 --- a/src/monarch_ingest/ingests/hpoa/disease_to_phenotype.py +++ b/src/monarch_ingest/ingests/hpoa/disease_to_phenotype.py @@ -29,7 +29,8 @@ from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import DiseaseToPhenotypicFeatureAssociation +from biolink_model.datamodel.pydanticmodel_v2 import DiseaseToPhenotypicFeatureAssociation, KnowledgeLevelEnum, \ + AgentTypeEnum from monarch_ingest.ingests.hpoa.hpoa_utils import phenotype_frequency_to_hpo_term, FrequencyHpoTerm, Frequency from loguru import logger @@ -96,6 +97,9 @@ has_count=frequency.has_count, has_total=frequency.has_total, aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:hpo-annotations" + primary_knowledge_source="infores:hpo-annotations", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent + ) koza_app.write(association) diff --git a/src/monarch_ingest/ingests/hpoa/disease_to_phenotype.yaml b/src/monarch_ingest/ingests/hpoa/disease_to_phenotype.yaml index a338c460..8c3c87aa 100644 --- a/src/monarch_ingest/ingests/hpoa/disease_to_phenotype.yaml +++ b/src/monarch_ingest/ingests/hpoa/disease_to_phenotype.yaml @@ -63,3 +63,5 @@ edge_properties: - 'has_evidence' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' \ No newline at end of file diff --git a/src/monarch_ingest/ingests/hpoa/gene_to_disease.py b/src/monarch_ingest/ingests/hpoa/gene_to_disease.py index e8efca12..69a52826 100644 --- a/src/monarch_ingest/ingests/hpoa/gene_to_disease.py +++ b/src/monarch_ingest/ingests/hpoa/gene_to_disease.py @@ -1,7 +1,7 @@ import uuid from biolink_model.datamodel.pydanticmodel_v2 import GeneToDiseaseAssociation, CausalGeneToDiseaseAssociation, \ - CorrelatedGeneToDiseaseAssociation + CorrelatedGeneToDiseaseAssociation, KnowledgeLevelEnum, AgentTypeEnum from koza.cli_runner import get_koza_app from monarch_ingest.constants import INFORES_MONARCHINITIATIVE, BIOLINK_CAUSES @@ -29,7 +29,9 @@ predicate=predicate, object=disease_id, primary_knowledge_source=primary_knowledge_source, - aggregator_knowledge_source=aggregator_knowledge_source + aggregator_knowledge_source=aggregator_knowledge_source, + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) koza_app.write(association) diff --git a/src/monarch_ingest/ingests/hpoa/gene_to_disease.yaml b/src/monarch_ingest/ingests/hpoa/gene_to_disease.yaml index 54ade91c..22998dd8 100644 --- a/src/monarch_ingest/ingests/hpoa/gene_to_disease.yaml +++ b/src/monarch_ingest/ingests/hpoa/gene_to_disease.yaml @@ -22,5 +22,7 @@ edge_properties: - 'object' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/hpoa/gene_to_phenotype.py b/src/monarch_ingest/ingests/hpoa/gene_to_phenotype.py index ff812407..ffa820c2 100644 --- a/src/monarch_ingest/ingests/hpoa/gene_to_phenotype.py +++ b/src/monarch_ingest/ingests/hpoa/gene_to_phenotype.py @@ -8,7 +8,8 @@ from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import GeneToPhenotypicFeatureAssociation +from biolink_model.datamodel.pydanticmodel_v2 import GeneToPhenotypicFeatureAssociation, KnowledgeLevelEnum, \ + AgentTypeEnum koza_app = get_koza_app("hpoa_gene_to_phenotype") @@ -22,7 +23,9 @@ predicate="biolink:has_phenotype", object=phenotype_id, aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:hpo-annotations" + primary_knowledge_source="infores:hpo-annotations", + knowledge_level=KnowledgeLevelEnum.logical_entailment, + agent_type=AgentTypeEnum.automated_agent, ) koza_app.write(association) diff --git a/src/monarch_ingest/ingests/hpoa/gene_to_phenotype.yaml b/src/monarch_ingest/ingests/hpoa/gene_to_phenotype.yaml index 52eab3d4..f8e8a0bf 100644 --- a/src/monarch_ingest/ingests/hpoa/gene_to_phenotype.yaml +++ b/src/monarch_ingest/ingests/hpoa/gene_to_phenotype.yaml @@ -26,5 +26,7 @@ edge_properties: - 'qualifiers' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/mgi/publication_to_gene.py b/src/monarch_ingest/ingests/mgi/publication_to_gene.py index 70896bea..fce8228c 100644 --- a/src/monarch_ingest/ingests/mgi/publication_to_gene.py +++ b/src/monarch_ingest/ingests/mgi/publication_to_gene.py @@ -2,7 +2,8 @@ from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import InformationContentEntityToNamedThingAssociation +from biolink_model.datamodel.pydanticmodel_v2 import InformationContentEntityToNamedThingAssociation, AgentTypeEnum, \ + KnowledgeLevelEnum koza_app = get_koza_app("mgi_publication_to_gene") @@ -24,7 +25,9 @@ predicate="biolink:mentions", object=gene_id, aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:mgi" + primary_knowledge_source="infores:mgi", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) koza_app.write(association) diff --git a/src/monarch_ingest/ingests/mgi/publication_to_gene.yaml b/src/monarch_ingest/ingests/mgi/publication_to_gene.yaml index 66628df3..cabc5c67 100644 --- a/src/monarch_ingest/ingests/mgi/publication_to_gene.yaml +++ b/src/monarch_ingest/ingests/mgi/publication_to_gene.yaml @@ -28,6 +28,8 @@ edge_properties: - 'object' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/panther/genome_orthologs.py b/src/monarch_ingest/ingests/panther/genome_orthologs.py index 252f161f..c3990906 100644 --- a/src/monarch_ingest/ingests/panther/genome_orthologs.py +++ b/src/monarch_ingest/ingests/panther/genome_orthologs.py @@ -6,7 +6,7 @@ from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import GeneToGeneHomologyAssociation +from biolink_model.datamodel.pydanticmodel_v2 import GeneToGeneHomologyAssociation, KnowledgeLevelEnum, AgentTypeEnum from monarch_ingest.ingests.panther.orthology_utils import parse_gene, ncbitaxon_catalog @@ -52,7 +52,9 @@ predicate=predicate, has_evidence=[f"PANTHER.FAMILY:{panther_ortholog_id}"], aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:panther" + primary_knowledge_source="infores:panther", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.not_provided ) # Write the captured Association out diff --git a/src/monarch_ingest/ingests/panther/genome_orthologs.yaml b/src/monarch_ingest/ingests/panther/genome_orthologs.yaml index f38b5069..ce647a0a 100644 --- a/src/monarch_ingest/ingests/panther/genome_orthologs.yaml +++ b/src/monarch_ingest/ingests/panther/genome_orthologs.yaml @@ -34,5 +34,7 @@ edge_properties: - 'has_evidence' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/pombase/gene_to_phenotype.py b/src/monarch_ingest/ingests/pombase/gene_to_phenotype.py index 31de6b4e..6cb87185 100644 --- a/src/monarch_ingest/ingests/pombase/gene_to_phenotype.py +++ b/src/monarch_ingest/ingests/pombase/gene_to_phenotype.py @@ -2,7 +2,8 @@ from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import GeneToPhenotypicFeatureAssociation +from biolink_model.datamodel.pydanticmodel_v2 import GeneToPhenotypicFeatureAssociation, KnowledgeLevelEnum, \ + AgentTypeEnum koza_app = get_koza_app("pombase_gene_to_phenotype") @@ -19,7 +20,9 @@ object=phenotype_id, publications=[row["Reference"]], aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:pombase" + primary_knowledge_source="infores:pombase", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) if row["Condition"]: diff --git a/src/monarch_ingest/ingests/pombase/gene_to_phenotype.yaml b/src/monarch_ingest/ingests/pombase/gene_to_phenotype.yaml index 9767fd4b..7ce749c0 100644 --- a/src/monarch_ingest/ingests/pombase/gene_to_phenotype.yaml +++ b/src/monarch_ingest/ingests/pombase/gene_to_phenotype.yaml @@ -45,5 +45,7 @@ edge_properties: - 'publications' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/reactome/chemical_to_pathway.py b/src/monarch_ingest/ingests/reactome/chemical_to_pathway.py index 296ca05b..eb1714af 100644 --- a/src/monarch_ingest/ingests/reactome/chemical_to_pathway.py +++ b/src/monarch_ingest/ingests/reactome/chemical_to_pathway.py @@ -1,6 +1,6 @@ import uuid from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import ChemicalToPathwayAssociation +from biolink_model.datamodel.pydanticmodel_v2 import ChemicalToPathwayAssociation, AgentTypeEnum, KnowledgeLevelEnum koza_app = get_koza_app("reactome_chemical_to_pathway") @@ -29,7 +29,9 @@ object=pathway_id, has_evidence=[evidence_code_term], aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:reactome" + primary_knowledge_source="infores:reactome", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.not_provided ) koza_app.write(association) diff --git a/src/monarch_ingest/ingests/reactome/chemical_to_pathway.yaml b/src/monarch_ingest/ingests/reactome/chemical_to_pathway.yaml index c883c5f0..62f0b19c 100644 --- a/src/monarch_ingest/ingests/reactome/chemical_to_pathway.yaml +++ b/src/monarch_ingest/ingests/reactome/chemical_to_pathway.yaml @@ -30,6 +30,8 @@ edge_properties: - 'object' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/reactome/gene_to_pathway.py b/src/monarch_ingest/ingests/reactome/gene_to_pathway.py index c53a1fa2..00e9e80a 100644 --- a/src/monarch_ingest/ingests/reactome/gene_to_pathway.py +++ b/src/monarch_ingest/ingests/reactome/gene_to_pathway.py @@ -1,7 +1,7 @@ import uuid from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import GeneToPathwayAssociation +from biolink_model.datamodel.pydanticmodel_v2 import GeneToPathwayAssociation, AgentTypeEnum, KnowledgeLevelEnum koza_app = get_koza_app("reactome_gene_to_pathway") @@ -31,6 +31,8 @@ has_evidence=[evidence_code_term], aggregator_knowledge_source=["infores:monarchinitiative"], primary_knowledge_source="infores:reactome", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.not_provided ) koza_app.write(association) diff --git a/src/monarch_ingest/ingests/reactome/gene_to_pathway.yaml b/src/monarch_ingest/ingests/reactome/gene_to_pathway.yaml index 70f230f2..ff4b1762 100644 --- a/src/monarch_ingest/ingests/reactome/gene_to_pathway.yaml +++ b/src/monarch_ingest/ingests/reactome/gene_to_pathway.yaml @@ -30,5 +30,7 @@ edge_properties: - 'object' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/rgd/publication_to_gene.py b/src/monarch_ingest/ingests/rgd/publication_to_gene.py index f6773acf..cea56e2d 100644 --- a/src/monarch_ingest/ingests/rgd/publication_to_gene.py +++ b/src/monarch_ingest/ingests/rgd/publication_to_gene.py @@ -2,7 +2,8 @@ from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import InformationContentEntityToNamedThingAssociation +from biolink_model.datamodel.pydanticmodel_v2 import InformationContentEntityToNamedThingAssociation, AgentTypeEnum, \ + KnowledgeLevelEnum koza_app = get_koza_app("rgd_publication_to_gene") @@ -24,7 +25,9 @@ predicate="biolink:mentions", object=publication_id, aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:rgd" + primary_knowledge_source="infores:rgd", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) koza_app.write(association) diff --git a/src/monarch_ingest/ingests/rgd/publication_to_gene.yaml b/src/monarch_ingest/ingests/rgd/publication_to_gene.yaml index 17606085..6f781f76 100644 --- a/src/monarch_ingest/ingests/rgd/publication_to_gene.yaml +++ b/src/monarch_ingest/ingests/rgd/publication_to_gene.yaml @@ -73,5 +73,7 @@ edge_properties: - 'category' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/sgd/publication_to_gene.py b/src/monarch_ingest/ingests/sgd/publication_to_gene.py index 2da24503..1ad07211 100644 --- a/src/monarch_ingest/ingests/sgd/publication_to_gene.py +++ b/src/monarch_ingest/ingests/sgd/publication_to_gene.py @@ -2,7 +2,8 @@ from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import InformationContentEntityToNamedThingAssociation +from biolink_model.datamodel.pydanticmodel_v2 import InformationContentEntityToNamedThingAssociation, AgentTypeEnum, \ + KnowledgeLevelEnum koza_app = get_koza_app("sgd_publication_to_gene") @@ -18,7 +19,9 @@ predicate="biolink:mentions", object=publication_id, aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:sgd" + primary_knowledge_source="infores:sgd", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) koza_app.write(association) diff --git a/src/monarch_ingest/ingests/sgd/publication_to_gene.yaml b/src/monarch_ingest/ingests/sgd/publication_to_gene.yaml index eb8d6a7b..e2ef4dbe 100644 --- a/src/monarch_ingest/ingests/sgd/publication_to_gene.yaml +++ b/src/monarch_ingest/ingests/sgd/publication_to_gene.yaml @@ -24,5 +24,7 @@ edge_properties: - 'object' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/string/protein_links.py b/src/monarch_ingest/ingests/string/protein_links.py index 547c9380..e65671e2 100644 --- a/src/monarch_ingest/ingests/string/protein_links.py +++ b/src/monarch_ingest/ingests/string/protein_links.py @@ -3,7 +3,7 @@ from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import PairwiseGeneToGeneInteraction +from biolink_model.datamodel.pydanticmodel_v2 import PairwiseGeneToGeneInteraction, KnowledgeLevelEnum, AgentTypeEnum from loguru import logger @@ -56,7 +56,9 @@ def sorted_id_pair(row) -> str: has_evidence=has_evidence if has_evidence else None, aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:string" + primary_knowledge_source="infores:string", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.not_provided ) seen_rows.add(sorted_id_pair(row)) entities.append(association) diff --git a/src/monarch_ingest/ingests/string/protein_links.yaml b/src/monarch_ingest/ingests/string/protein_links.yaml index 08e62d96..5152e7b9 100644 --- a/src/monarch_ingest/ingests/string/protein_links.yaml +++ b/src/monarch_ingest/ingests/string/protein_links.yaml @@ -59,5 +59,7 @@ edge_properties: - 'aggregator_knowledge_source' - 'primary_knowledge_source' - 'has_evidence' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/xenbase/gene_to_phenotype.py b/src/monarch_ingest/ingests/xenbase/gene_to_phenotype.py index 348824e7..6bbf74ca 100644 --- a/src/monarch_ingest/ingests/xenbase/gene_to_phenotype.py +++ b/src/monarch_ingest/ingests/xenbase/gene_to_phenotype.py @@ -5,7 +5,7 @@ from biolink_model.datamodel.pydanticmodel_v2 import ( Gene, GeneToPhenotypicFeatureAssociation, - PhenotypicFeature + PhenotypicFeature, KnowledgeLevelEnum, AgentTypeEnum ) koza_app = get_koza_app("xenbase_gene_to_phenotype") @@ -29,7 +29,9 @@ object=phenotype.id, publications=[row["SOURCE"]], aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:xenbase" + primary_knowledge_source="infores:xenbase", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) if row["SOURCE"]: diff --git a/src/monarch_ingest/ingests/xenbase/gene_to_phenotype.yaml b/src/monarch_ingest/ingests/xenbase/gene_to_phenotype.yaml index 842c2c60..40084f30 100644 --- a/src/monarch_ingest/ingests/xenbase/gene_to_phenotype.yaml +++ b/src/monarch_ingest/ingests/xenbase/gene_to_phenotype.yaml @@ -37,5 +37,7 @@ edge_properties: - 'publications' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/xenbase/non_entrez_orthologs.py b/src/monarch_ingest/ingests/xenbase/non_entrez_orthologs.py index 525b5b74..61066207 100644 --- a/src/monarch_ingest/ingests/xenbase/non_entrez_orthologs.py +++ b/src/monarch_ingest/ingests/xenbase/non_entrez_orthologs.py @@ -6,7 +6,7 @@ from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import GeneToGeneHomologyAssociation +from biolink_model.datamodel.pydanticmodel_v2 import GeneToGeneHomologyAssociation, AgentTypeEnum, KnowledgeLevelEnum from loguru import logger @@ -31,7 +31,9 @@ predicate=predicate, object=f"OMIM:{omim_id}", aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:xenbase" + primary_knowledge_source="infores:xenbase", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) # Write the captured Association out @@ -44,7 +46,9 @@ predicate=predicate, object=f"MGI:{mgi_id}", aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:xenbase" + primary_knowledge_source="infores:xenbase", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) # Write the captured Association out @@ -57,7 +61,9 @@ predicate=predicate, object=f"ZFIN:{zfin_id}", aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:xenbase" + primary_knowledge_source="infores:xenbase", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) # Write the captured Association out diff --git a/src/monarch_ingest/ingests/xenbase/non_entrez_orthologs.yaml b/src/monarch_ingest/ingests/xenbase/non_entrez_orthologs.yaml index f9d08099..58c3dd3d 100644 --- a/src/monarch_ingest/ingests/xenbase/non_entrez_orthologs.yaml +++ b/src/monarch_ingest/ingests/xenbase/non_entrez_orthologs.yaml @@ -35,5 +35,7 @@ edge_properties: - 'object' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/xenbase/orthologs.py b/src/monarch_ingest/ingests/xenbase/orthologs.py index 875974a4..1999447b 100644 --- a/src/monarch_ingest/ingests/xenbase/orthologs.py +++ b/src/monarch_ingest/ingests/xenbase/orthologs.py @@ -6,7 +6,7 @@ from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import GeneToGeneHomologyAssociation +from biolink_model.datamodel.pydanticmodel_v2 import GeneToGeneHomologyAssociation, AgentTypeEnum, KnowledgeLevelEnum from loguru import logger @@ -30,7 +30,9 @@ predicate=predicate, object=f"NCBIGene:{ortholog_id}", aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:xenbase" + primary_knowledge_source="infores:xenbase", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) # Write the captured Association out diff --git a/src/monarch_ingest/ingests/xenbase/orthologs.yaml b/src/monarch_ingest/ingests/xenbase/orthologs.yaml index fc17f76b..ae34277f 100644 --- a/src/monarch_ingest/ingests/xenbase/orthologs.yaml +++ b/src/monarch_ingest/ingests/xenbase/orthologs.yaml @@ -33,5 +33,7 @@ edge_properties: - 'object' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/xenbase/publication_to_gene.py b/src/monarch_ingest/ingests/xenbase/publication_to_gene.py index 7ae4d482..8ce3baae 100644 --- a/src/monarch_ingest/ingests/xenbase/publication_to_gene.py +++ b/src/monarch_ingest/ingests/xenbase/publication_to_gene.py @@ -3,7 +3,8 @@ from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import InformationContentEntityToNamedThingAssociation +from biolink_model.datamodel.pydanticmodel_v2 import InformationContentEntityToNamedThingAssociation, AgentTypeEnum, \ + KnowledgeLevelEnum from loguru import logger @@ -37,7 +38,9 @@ predicate="biolink:mentions", object=publication_id, aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:xenbase" + primary_knowledge_source="infores:xenbase", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) entities.append(association) diff --git a/src/monarch_ingest/ingests/xenbase/publication_to_gene.yaml b/src/monarch_ingest/ingests/xenbase/publication_to_gene.yaml index 104f5b50..5eb68da2 100644 --- a/src/monarch_ingest/ingests/xenbase/publication_to_gene.yaml +++ b/src/monarch_ingest/ingests/xenbase/publication_to_gene.yaml @@ -26,5 +26,7 @@ edge_properties: - 'object' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/zfin/gene_to_phenotype.py b/src/monarch_ingest/ingests/zfin/gene_to_phenotype.py index 025867f7..bde87fae 100644 --- a/src/monarch_ingest/ingests/zfin/gene_to_phenotype.py +++ b/src/monarch_ingest/ingests/zfin/gene_to_phenotype.py @@ -3,7 +3,8 @@ from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import GeneToPhenotypicFeatureAssociation +from biolink_model.datamodel.pydanticmodel_v2 import GeneToPhenotypicFeatureAssociation, KnowledgeLevelEnum, \ + AgentTypeEnum from loguru import logger @@ -40,7 +41,9 @@ object=zp_term, publications=["ZFIN:" + row["Publication ID"]], aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:zfin" + primary_knowledge_source="infores:zfin", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) koza_app.write(association) diff --git a/src/monarch_ingest/ingests/zfin/gene_to_phenotype.yaml b/src/monarch_ingest/ingests/zfin/gene_to_phenotype.yaml index fb46c577..32141d93 100644 --- a/src/monarch_ingest/ingests/zfin/gene_to_phenotype.yaml +++ b/src/monarch_ingest/ingests/zfin/gene_to_phenotype.yaml @@ -52,6 +52,8 @@ edge_properties: - 'publications' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/zfin/publication_to_gene.py b/src/monarch_ingest/ingests/zfin/publication_to_gene.py index 0a6e2cd7..65246f24 100644 --- a/src/monarch_ingest/ingests/zfin/publication_to_gene.py +++ b/src/monarch_ingest/ingests/zfin/publication_to_gene.py @@ -3,7 +3,8 @@ from koza.cli_runner import get_koza_app -from biolink_model.datamodel.pydanticmodel_v2 import InformationContentEntityToNamedThingAssociation +from biolink_model.datamodel.pydanticmodel_v2 import InformationContentEntityToNamedThingAssociation, AgentTypeEnum, \ + KnowledgeLevelEnum from loguru import logger @@ -21,7 +22,9 @@ predicate="biolink:mentions", object=gene_id, aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:zfin" + primary_knowledge_source="infores:zfin", + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) koza_app.write(association) diff --git a/src/monarch_ingest/ingests/zfin/publication_to_gene.yaml b/src/monarch_ingest/ingests/zfin/publication_to_gene.yaml index feee6cc8..f504facd 100644 --- a/src/monarch_ingest/ingests/zfin/publication_to_gene.yaml +++ b/src/monarch_ingest/ingests/zfin/publication_to_gene.yaml @@ -26,5 +26,7 @@ edge_properties: - 'object' - 'aggregator_knowledge_source' - 'primary_knowledge_source' + - 'knowledge_level' + - 'agent_type' transform_mode: 'flat' From 38709b3dad45c4ee77476ce39b56f163d3818090 Mon Sep 17 00:00:00 2001 From: Kevin Schaper Date: Thu, 11 Apr 2024 19:31:16 -0700 Subject: [PATCH 2/6] Added expected check to expect ingests and counts to go with them --- src/monarch_ingest/main.py | 25 ++++++++++++-- src/monarch_ingest/qc_expect.yaml | 56 +++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 src/monarch_ingest/qc_expect.yaml diff --git a/src/monarch_ingest/main.py b/src/monarch_ingest/main.py index 17dcf5b7..10fc159b 100644 --- a/src/monarch_ingest/main.py +++ b/src/monarch_ingest/main.py @@ -1,5 +1,7 @@ +import sys from typing import List, Optional +import yaml from kghub_downloader.download_utils import download_from_yaml from monarch_ingest.cli_utils import ( apply_closure, @@ -111,8 +113,27 @@ def merge( ), ): """Merge nodes and edges into kg""" - merge_files(input_dir=input_dir, output_dir=output_dir, verbose=verbose) - + # merge_files(input_dir=input_dir, output_dir=output_dir, verbose=verbose) + + # load qc_report.yaml from output_dir + qc_report = yaml.safe_load(open(f"{output_dir}/qc_report.yaml")) + edge_counts = {item["name"]: item["total_number"] for item in qc_report["edges"]} + # load expected count yaml + expected_counts = yaml.safe_load(open(f"src/monarch_ingest/qc_expect.yaml")) + error = False + for type in ['nodes', 'edges']: + counts = {item["name"]: item["total_number"] for item in qc_report[type]} + for key in expected_counts[type]["provided_by"]: + expected = expected_counts[type]["provided_by"][key]["min"] + if key not in counts: + error = True + print(f"ERROR: {type} {key} not found in qc_report.yaml") + else: + if not counts[key] > expected: + error = True + print(f"WARNING: expected {key} to have {expected} {type}, only found {counts[key]}") + if error: + sys.exit(1) @typer_app.command() def closure(): diff --git a/src/monarch_ingest/qc_expect.yaml b/src/monarch_ingest/qc_expect.yaml new file mode 100644 index 00000000..c28daf97 --- /dev/null +++ b/src/monarch_ingest/qc_expect.yaml @@ -0,0 +1,56 @@ +nodes: + provided_by: + alliance_gene_nodes: + min: 290000 + dictybase_gene_nodes: + min: 14000 + hgnc_gene_nodes: + min: 43000 + ncbi_gene_nodes: + min: 196000 + phenio_nodes: + min: 288000 + pombase_gene_nodes: + min: 5000 + reactome_pathway_nodes: + min: 21000 +edges: + provided_by: + alliance_gene_to_expression_edges: + min: 1870000 + alliance_gene_to_phenotype_edges: + min: 300000 + bgee_gene_to_expression_edges: + min: 430000 + biogrid_edges: + min: 1400000 + ctd_chemical_to_disease_edges: + min: 5000 + dictybase_gene_to_phenotype_edges: + min: 1100 + go_annotation_edges: + min: 2500000 + hpoa_disease_mode_of_inheritance_edges: + min: 8400 + hpoa_disease_to_phenotype_edges: + min: 240000 + hpoa_gene_to_disease_edges: + min: 15000 + hpoa_gene_to_phenotype_edges: + min: 300000 + panther_genome_orthologs_edges: + min: 550000 + phenio_edges: + min: 670000 + pombase_gene_to_phenotype_edges: + min: 160000 + reactome_chemical_to_pathway_edges: + min: 65000 + reactome_gene_to_pathway_edges: + min: 200000 + string_protein_links_edges: + min: 1490000 + xenbase_gene_to_phenotype_edges: + min: 2000 + zfin_gene_to_phenotype_edges: + min: 148000 From 5040a7f13b8b585103fb158cd3c7a51e092a7ad1 Mon Sep 17 00:00:00 2001 From: Kevin Schaper Date: Fri, 12 Apr 2024 12:57:17 -0700 Subject: [PATCH 3/6] Turn the merge back on, adjust warning vs error threshold for ingest count expectations --- src/monarch_ingest/main.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/monarch_ingest/main.py b/src/monarch_ingest/main.py index 10fc159b..481eeaad 100644 --- a/src/monarch_ingest/main.py +++ b/src/monarch_ingest/main.py @@ -113,7 +113,7 @@ def merge( ), ): """Merge nodes and edges into kg""" - # merge_files(input_dir=input_dir, output_dir=output_dir, verbose=verbose) + merge_files(input_dir=input_dir, output_dir=output_dir, verbose=verbose) # load qc_report.yaml from output_dir qc_report = yaml.safe_load(open(f"{output_dir}/qc_report.yaml")) @@ -125,13 +125,16 @@ def merge( counts = {item["name"]: item["total_number"] for item in qc_report[type]} for key in expected_counts[type]["provided_by"]: expected = expected_counts[type]["provided_by"][key]["min"] + way_less_than_expected = expected * 0.7 # 70% is our threshold for "way" apparently if key not in counts: error = True print(f"ERROR: {type} {key} not found in qc_report.yaml") else: - if not counts[key] > expected: - error = True + if counts[key] < expected and counts[key] > way_less_than_expected: print(f"WARNING: expected {key} to have {expected} {type}, only found {counts[key]}") + elif counts[key] < expected * 0.7: + print(f"ERROR: expected {key} to have {expected} {type}, only found {counts[key]}") + error = True if error: sys.exit(1) From eb66b3f73454c160590df0d346eb9dcc5ed09b2a Mon Sep 17 00:00:00 2001 From: Kevin Schaper Date: Fri, 12 Apr 2024 13:20:54 -0700 Subject: [PATCH 4/6] Update linkml, temporarily use monarch-app branch to get upcoming schema --- poetry.lock | 27 ++++++++++++++++----------- pyproject.toml | 4 ++-- scripts/load_solr.sh | 4 ++-- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/poetry.lock b/poetry.lock index 70afe73d..d21c2d45 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1553,13 +1553,13 @@ typer-cli = ">=0.0.13,<0.0.14" [[package]] name = "linkml" -version = "1.6.11" +version = "1.7.8" description = "Linked Open Data Modeling Language" optional = false -python-versions = ">=3.8.1,<4.0.0" +python-versions = "<4.0.0,>=3.8.1" files = [ - {file = "linkml-1.6.11-py3-none-any.whl", hash = "sha256:a7f90f8cc5bd3d171812b23eebde04de3b4318ac4f48a5eae476ce890f09907c"}, - {file = "linkml-1.6.11.tar.gz", hash = "sha256:963e88c1548c5462bc885e4cd96cabbda066866f8737bcc3bb92f4b0247419ae"}, + {file = "linkml-1.7.8-py3-none-any.whl", hash = "sha256:4b8ebe33b422517b08ca01802dc2899ac133c502a71d811c58fa6f4263130709"}, + {file = "linkml-1.7.8.tar.gz", hash = "sha256:af48ee1ad6751c8d20de2832dbeefe225da9be5f39a7f8ea7821a691cf7c0148"}, ] [package.dependencies] @@ -1572,11 +1572,11 @@ jinja2 = ">=3.1.0" jsonasobj2 = ">=1.0.3,<2.0.0" jsonschema = {version = ">=4.0.0", extras = ["format"]} linkml-dataops = "*" -linkml-runtime = ">=1.6.0" +linkml-runtime = ">=1.7.4" openpyxl = "*" parse = "*" prefixcommons = ">=0.1.7" -prefixmaps = ">=0.1.3" +prefixmaps = ">=0.2.2" pydantic = ">=1.0.0,<3.0.0" pyjsg = ">=0.11.6" pyshex = ">=0.7.20" @@ -1588,6 +1588,11 @@ requests = ">=2.22" sqlalchemy = ">=1.4.31" watchdog = ">=0.9.0" +[package.extras] +black = ["black (>=24.0.0)"] +shacl = ["pyshacl (>=0.25.0,<0.26.0)"] +tests = ["black (>=24.0.0)", "pyshacl (>=0.25.0,<0.26.0)"] + [[package]] name = "linkml-dataops" version = "0.1.0" @@ -1608,13 +1613,13 @@ linkml-runtime = ">=1.1.6" [[package]] name = "linkml-runtime" -version = "1.6.2" +version = "1.7.5" description = "Runtime environment for LinkML, the Linked open data modeling language" optional = false -python-versions = ">=3.7.6,<4.0.0" +python-versions = "<4.0,>=3.8" files = [ - {file = "linkml_runtime-1.6.2-py3-none-any.whl", hash = "sha256:d9a915faf0ee8c8749f42d38394a06d37f016e155e077ff9cb5c5a21f24e19d6"}, - {file = "linkml_runtime-1.6.2.tar.gz", hash = "sha256:bcbeff96d24433276755db85375e3dff0c9af0fd04f8a05f7ccd8a669a9e9877"}, + {file = "linkml_runtime-1.7.5-py3-none-any.whl", hash = "sha256:c58000c7c68fa97b7d76c50421a85a64e25f07eec5bcac464bc00c4cd79007a6"}, + {file = "linkml_runtime-1.7.5.tar.gz", hash = "sha256:b31197a5398359441ae1ed43470c54377a1d08db961366dda670300dddcd71d7"}, ] [package.dependencies] @@ -3985,4 +3990,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.12" -content-hash = "c754cc30291543a0780f66564df458a651a2a56b8615a26375e5efe9b8dc4fcd" +content-hash = "2fcecdcba2df34fc83f7843b87da0d70c6f5be03bd9510d3ef7f2870ce6819d4" diff --git a/pyproject.toml b/pyproject.toml index 8a8ca346..7e553236 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,8 +25,8 @@ closurizer = "0.5.1" kghub-downloader = "^0.3.2" kgx = { git = "https://github.com/biolink/kgx", branch = "master" } # ">=2.1" koza = ">=0.5.2" -linkml = "^1.6.3" -linkml-runtime = "1.6.2" +linkml = "^1.7.8" +linkml-runtime = "^1.7.5" linkml-solr = "0.1.5" # "^0.1.3" multi-indexer = "0.0.5" # Other Dependencies diff --git a/scripts/load_solr.sh b/scripts/load_solr.sh index ee98c344..25ca588b 100755 --- a/scripts/load_solr.sh +++ b/scripts/load_solr.sh @@ -16,8 +16,8 @@ echo "Download the schema from monarch-py" # retrieve the schema from the main branch on monarch-app -curl -O https://raw.githubusercontent.com/monarch-initiative/monarch-app/main/backend/src/monarch_py/datamodels/model.yaml -curl -O https://raw.githubusercontent.com/monarch-initiative/monarch-app/main/backend/src/monarch_py/datamodels/similarity.yaml +curl -O https://raw.githubusercontent.com/monarch-initiative/monarch-app/issue-675-add-kl-at/backend/src/monarch_py/datamodels/model.yaml +curl -O https://raw.githubusercontent.com/monarch-initiative/monarch-app/issue-675-add-kl-at/backend/src/monarch_py/datamodels/similarity.yaml echo "Starting the server" poetry run lsolr start-server From d554069d635dafeb8d4e9cb70abd6dbf554b7058 Mon Sep 17 00:00:00 2001 From: Kevin Schaper Date: Tue, 16 Apr 2024 08:25:37 -0700 Subject: [PATCH 5/6] dd handling of knowledge_level & agent_type in phenio transform, stick with linkml 1.6 for now to avoid parsing problem related to an older biolink model that bmt looks at --- poetry.lock | 27 +++++++++++---------------- pyproject.toml | 4 ++-- src/monarch_ingest/cli_utils.py | 16 +++++++++++++++- 3 files changed, 28 insertions(+), 19 deletions(-) diff --git a/poetry.lock b/poetry.lock index d21c2d45..2a0a3446 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1553,13 +1553,13 @@ typer-cli = ">=0.0.13,<0.0.14" [[package]] name = "linkml" -version = "1.7.8" +version = "1.6.3" description = "Linked Open Data Modeling Language" optional = false -python-versions = "<4.0.0,>=3.8.1" +python-versions = ">=3.8.1,<4.0.0" files = [ - {file = "linkml-1.7.8-py3-none-any.whl", hash = "sha256:4b8ebe33b422517b08ca01802dc2899ac133c502a71d811c58fa6f4263130709"}, - {file = "linkml-1.7.8.tar.gz", hash = "sha256:af48ee1ad6751c8d20de2832dbeefe225da9be5f39a7f8ea7821a691cf7c0148"}, + {file = "linkml-1.6.3-py3-none-any.whl", hash = "sha256:2bcfa9e35b3e0d868f396fa48ede1b9e3c6016df956dd5ca92d70ff621ffc77e"}, + {file = "linkml-1.6.3.tar.gz", hash = "sha256:2692243c9fb1c262ea83b725c70d6f537844ae7a9405101c94bccf65def6d952"}, ] [package.dependencies] @@ -1572,11 +1572,11 @@ jinja2 = ">=3.1.0" jsonasobj2 = ">=1.0.3,<2.0.0" jsonschema = {version = ">=4.0.0", extras = ["format"]} linkml-dataops = "*" -linkml-runtime = ">=1.7.4" +linkml-runtime = ">=1.6.0" openpyxl = "*" parse = "*" prefixcommons = ">=0.1.7" -prefixmaps = ">=0.2.2" +prefixmaps = ">=0.1.3" pydantic = ">=1.0.0,<3.0.0" pyjsg = ">=0.11.6" pyshex = ">=0.7.20" @@ -1588,11 +1588,6 @@ requests = ">=2.22" sqlalchemy = ">=1.4.31" watchdog = ">=0.9.0" -[package.extras] -black = ["black (>=24.0.0)"] -shacl = ["pyshacl (>=0.25.0,<0.26.0)"] -tests = ["black (>=24.0.0)", "pyshacl (>=0.25.0,<0.26.0)"] - [[package]] name = "linkml-dataops" version = "0.1.0" @@ -1613,13 +1608,13 @@ linkml-runtime = ">=1.1.6" [[package]] name = "linkml-runtime" -version = "1.7.5" +version = "1.6.3" description = "Runtime environment for LinkML, the Linked open data modeling language" optional = false -python-versions = "<4.0,>=3.8" +python-versions = ">=3.7.6,<4.0.0" files = [ - {file = "linkml_runtime-1.7.5-py3-none-any.whl", hash = "sha256:c58000c7c68fa97b7d76c50421a85a64e25f07eec5bcac464bc00c4cd79007a6"}, - {file = "linkml_runtime-1.7.5.tar.gz", hash = "sha256:b31197a5398359441ae1ed43470c54377a1d08db961366dda670300dddcd71d7"}, + {file = "linkml_runtime-1.6.3-py3-none-any.whl", hash = "sha256:08f616302ce493be775104c87bd5a2ec0eb2c67624e611a3f7e28c978d08a4d1"}, + {file = "linkml_runtime-1.6.3.tar.gz", hash = "sha256:88e3d0b776055723d187128c03527145dc1ffdc2d2a69e89f8aabce203d418a3"}, ] [package.dependencies] @@ -3990,4 +3985,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.12" -content-hash = "2fcecdcba2df34fc83f7843b87da0d70c6f5be03bd9510d3ef7f2870ce6819d4" +content-hash = "124411a2f853848695af9adf07d18c32ea666412954c689edf45894400daa226" diff --git a/pyproject.toml b/pyproject.toml index 7e553236..fccf2ab6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,8 +25,7 @@ closurizer = "0.5.1" kghub-downloader = "^0.3.2" kgx = { git = "https://github.com/biolink/kgx", branch = "master" } # ">=2.1" koza = ">=0.5.2" -linkml = "^1.7.8" -linkml-runtime = "^1.7.5" +linkml = "1.6.3" linkml-solr = "0.1.5" # "^0.1.3" multi-indexer = "0.0.5" # Other Dependencies @@ -38,6 +37,7 @@ sh = "^1.14.3" typer = "^0.7" typer-cli = "^0.0.13" yamllint = "^1.35.1" +linkml-runtime = "1.6.3" [tool.poetry.group.dev] optional = true diff --git a/src/monarch_ingest/cli_utils.py b/src/monarch_ingest/cli_utils.py index 85d2c0e3..eea60384 100644 --- a/src/monarch_ingest/cli_utils.py +++ b/src/monarch_ingest/cli_utils.py @@ -1,6 +1,7 @@ import csv import gc import os +import pkgutil import sys import tarfile import yaml @@ -188,15 +189,28 @@ def transform_phenio( "predicate", "object", "category", - "relation", "primary_knowledge_source", "aggregator_knowledge_source", + "knowledge_level", + "agent_type" ] ), axis=1, inplace=True, ) + # if knowledge level doesn't exist, add it and assign to knowledge_assertion + if "knowledge_level" not in edges_df.columns: + edges_df["knowledge_level"] = "knowledge_assertion" + # same for agent_type, setting it to manual_agent + if "agent_type" not in edges_df.columns: + edges_df["agent_type"] = "manual_agent" + + # prepend infores:monarchinitiative to the aggregator_knowledge_source column for edges that don't have it + edges_df["aggregator_knowledge_source"] = edges_df["aggregator_knowledge_source"].apply( + lambda x: f"infores:monarchinitiative|{x}" if not x.startswith("infores:monarchinitiative") else x + ) + edges_df = edges_df[edges_df["predicate"].str.contains(":")] # assign level association category if edge category is empty From efdea051721e0a7fd5ea6ec7fc4a34456fcda0c7 Mon Sep 17 00:00:00 2001 From: Kevin Schaper Date: Tue, 16 Apr 2024 08:40:55 -0700 Subject: [PATCH 6/6] Update ctd kl/at --- src/monarch_ingest/ingests/ctd/chemical_to_disease.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/monarch_ingest/ingests/ctd/chemical_to_disease.py b/src/monarch_ingest/ingests/ctd/chemical_to_disease.py index efe5fa3c..ef4821a4 100644 --- a/src/monarch_ingest/ingests/ctd/chemical_to_disease.py +++ b/src/monarch_ingest/ingests/ctd/chemical_to_disease.py @@ -27,8 +27,8 @@ publications=["PMID:" + p for p in row['PubMedIDs'].split("|")], aggregator_knowledge_source=["infores:monarchinitiative"], primary_knowledge_source="infores:ctd", - knowledge_level=KnowledgeLevelEnum.not_provided, - agent_type=AgentTypeEnum.not_provided + knowledge_level=KnowledgeLevelEnum.knowledge_assertion, + agent_type=AgentTypeEnum.manual_agent ) koza_app.write(association)