diff --git a/python-requirements-apple-silicon.txt b/python-requirements-apple-silicon.txt index 7af503b98..53f694ff7 100644 --- a/python-requirements-apple-silicon.txt +++ b/python-requirements-apple-silicon.txt @@ -15,7 +15,7 @@ charset-normalizer==3.3.2 class-resolver==0.4.2 click==8.1.7 colorama==0.4.6 -curies==0.7.4 +curies==0.7.6 Deprecated==1.2.14 deprecation==2.1.0 distlib==0.3.7 diff --git a/python-requirements.txt b/python-requirements.txt index 4731bf75f..ca771da21 100644 --- a/python-requirements.txt +++ b/python-requirements.txt @@ -22,7 +22,7 @@ class-resolver==0.4.2 click==8.1.7 colorama==0.4.6 commonmark==0.9.1 -curies==0.6.4 +curies==0.7.6 decorator==5.1.1 Deprecated==1.2.13 deprecation==2.1.0 diff --git a/src/ontology/config/prefixes.csv b/src/ontology/config/prefixes.csv index f29f5f415..0efab0571 100644 --- a/src/ontology/config/prefixes.csv +++ b/src/ontology/config/prefixes.csv @@ -3,7 +3,7 @@ rdf,http://www.w3.org/1999/02/22-rdf-syntax-ns# rdfs,http://www.w3.org/2000/01/rdf-schema# xsd,http://www.w3.org/2001/XMLSchema# owl,http://www.w3.org/2002/07/owl# -oio,http://www.geneontology.org/formats/oboInOwl# +oboInOwl,http://www.geneontology.org/formats/oboInOwl# dce,http://purl.org/dc/elements/1.1/ dct,http://purl.org/dc/terms/ foaf,http://xmlns.com/foaf/0.1/ diff --git a/src/ontology/metadata/mondo.sssom.config.yml b/src/ontology/metadata/mondo.sssom.config.yml index 6875e9cfb..6fb52f357 100644 --- a/src/ontology/metadata/mondo.sssom.config.yml +++ b/src/ontology/metadata/mondo.sssom.config.yml @@ -34,7 +34,7 @@ curie_map: semapv: https://w3id.org/semapv/vocab/ rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# sssom: https://w3id.org/sssom/ - oio: http://www.geneontology.org/formats/oboInOwl# +# oio: http://www.geneontology.org/formats/oboInOwl# GTR: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/GTR/" NCI: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/NCI/" NIFSTD: "http://purl.obolibrary.org/obo/mondo/mappings/unknown_prefix/NIFSTD/" diff --git a/src/scripts/match-mondo-sources-all-lexical.py b/src/scripts/match-mondo-sources-all-lexical.py index 1da046468..40eb28a10 100644 --- a/src/scripts/match-mondo-sources-all-lexical.py +++ b/src/scripts/match-mondo-sources-all-lexical.py @@ -1,17 +1,20 @@ -# Basic matching pipeline that takes in +"""Match mondo sources, all lexical +Basic matching pipeline that takes in -# Input: -# 1. MERGED_ONTOLOGY = tmp/merged.owl -# 2. SSSOM_CONFIG = metadata/mondo.sssom.config.yml -# 3. OUTPUT_SSSOM = mapping/mondo-sources-all-lexical.sssom.tsv +Input: +1. MERGED_ONTOLOGY = tmp/merged.owl +2. SSSOM_CONFIG = metadata/mondo.sssom.config.yml +3. OUTPUT_SSSOM = mapping/mondo-sources-all-lexical.sssom.tsv -# I would try some basic things first: - -# Use synonymiser -# Use oak.mapping() pipeline +I would try some basic things first: +Use synonymiser +Use oak.mapping() pipeline +""" import logging from pathlib import Path + +from curies import Converter from oaklib.resource import OntologyResource from oaklib.implementations.sqldb.sql_implementation import SqlImplementation from oaklib.utilities.lexical.lexical_indexer import ( @@ -25,11 +28,11 @@ import yaml import pandas as pd -from sssom.constants import SUBJECT_ID, OBJECT_ID, PREDICATE_MODIFIER +from sssom.constants import SUBJECT_ID, OBJECT_ID from sssom.util import filter_prefixes, is_curie, is_iri from sssom.parsers import parse_sssom_table from sssom.writers import write_table -from sssom.io import get_metadata_and_prefix_map, filter_file +from sssom.io import filter_file from bioregistry import curie_from_iri SRC = Path(__file__).resolve().parents[1] @@ -49,6 +52,7 @@ ) +# todo: duplicated code fragment w/ lexmatch-sssom-compare: solution, move to a lexmatch_utils.py and import to both @click.group() @click.option("-v", "--verbose", count=True) @click.option("-q", "--quiet") @@ -82,12 +86,12 @@ def main(verbose: int, quiet: bool): help="SSSOM TSV file containing rejected mappings that need to be filtered out.", ) @output_option -def run(input: str, config: str, rules: str, rejects: str, output: str): - # Implemented `meta` param in `lexical_index_to_sssom` - - meta = get_metadata_and_prefix_map(config) +def run(_input: str, config: str, rules: str, rejects: str, output: str): + """Run the script""" + # Get metadata config with open(config, "r") as f: yml = yaml.safe_load(f) + converter = Converter.from_extended_prefix_map(yml.pop('extended_prefix_map', {})) # Get mondo.sssom.tsv mapping_msdf = parse_sssom_table(SSSOM_MAP_FILE) @@ -108,9 +112,7 @@ def run(input: str, config: str, rules: str, rejects: str, output: str): # .reset_index(drop=True) # ) - prefix_of_interest = yml["subject_prefixes"] - - resource = OntologyResource(slug=f"sqlite:///{Path(input).absolute()}") + resource = OntologyResource(slug=f"sqlite:///{Path(_input).absolute()}") oi = SqlImplementation(resource=resource) ruleset = load_mapping_rules(rules) # syn_rules = [x.synonymizer for x in ruleset.rules if x.synonymizer] @@ -118,9 +120,9 @@ def run(input: str, config: str, rules: str, rejects: str, output: str): save_lexical_index(lexical_index, OUT_INDEX_DB) if rules: - msdf = lexical_index_to_sssom(oi, lexical_index, ruleset=ruleset, meta=meta) + msdf = lexical_index_to_sssom(oi, lexical_index, ruleset=ruleset, prefix_map=converter) else: - msdf = lexical_index_to_sssom(oi, lexical_index, meta=meta) + msdf = lexical_index_to_sssom(oi, lexical_index) # msdf.prefix_map = sssom_yaml['curie_map'] # msdf.metadata = sssom_yaml['global_metadata'] @@ -131,8 +133,9 @@ def run(input: str, config: str, rules: str, rejects: str, output: str): # msdf.df[OBJECT_ID] = msdf.df[OBJECT_ID].apply( # lambda x: iri_to_curie(x) if x.startswith("