diff --git a/makefile b/makefile index 76d3244..76beb06 100644 --- a/makefile +++ b/makefile @@ -42,13 +42,7 @@ tmp/input/Orphanet_Nomenclature_Pack_EN/ORPHA_ICD11_mapping_en_newversion_2023.x wget https://www.orphadata.com/data/nomenclature/packs/Orphanet_Nomenclature_Pack_EN.zip -O tmp/input/Orphanet_Nomenclature_Pack_EN.zip unzip tmp/input/Orphanet_Nomenclature_Pack_EN.zip -d tmp/input/Orphanet_Nomenclature_Pack_EN -tmp/input/ordo.owl: | tmp/input/ - wget http://www.orphadata.org/data/ORDO/ordo_orphanet.owl -O $@ - -tmp/output/icd11mms-exact-matches.tsv: tmp/input/ordo.owl - robot query -i $< --query src/icd11mms-exact-matches.sparql $@ - -tmp/output/release/ordo-icd11.sssom.tsv: tmp/input/Orphanet_Nomenclature_Pack_EN/ORPHA_ICD11_mapping_en_newversion_2023.xml tmp/output/icd11mms-exact-matches.tsv | tmp/output/release/ +tmp/output/release/ordo-icd11.sssom.tsv: tmp/input/Orphanet_Nomenclature_Pack_EN/ORPHA_ICD11_mapping_en_newversion_2023.xml | tmp/output/release/ python3 src/mappings.py \ --input-nomenclature-xml tmp/input/Orphanet_Nomenclature_Pack_EN/ORPHA_ICD11_mapping_en_newversion_2023.xml \ --input-sssom-config config/icd11.sssom-metadata.yml \ diff --git a/src/icd11mms-exact-matches.sparql b/src/icd11mms-exact-matches.sparql deleted file mode 100644 index 59b0894..0000000 --- a/src/icd11mms-exact-matches.sparql +++ /dev/null @@ -1,22 +0,0 @@ -prefix skos: -prefix ECO: -prefix owl: -prefix oboInOwl: -prefix sssom: - -SELECT ?cls ?xref -WHERE { - VALUES ?mapping_pred { oboInOwl:hasDbXref } - - ?cls a owl:Class; - ?mapping_pred ?xref . - - ?xref_anno a owl:Axiom ; - owl:annotatedSource ?cls ; - owl:annotatedProperty ?mapping_pred ; - owl:annotatedTarget ?xref ; - ECO:0000218 ?mapping_precision_string . - - FILTER(STRSTARTS(STR(?mapping_precision_string), "- E (Exact mapping: the two concepts are equivalent).")) - FILTER(STRSTARTS(STR(?xref), "ICD-11:")) -} diff --git a/src/temp_compare_matches_owl_and_nomenclature.py b/src/temp_compare_matches_owl_and_nomenclature.py deleted file mode 100644 index febaa44..0000000 --- a/src/temp_compare_matches_owl_and_nomenclature.py +++ /dev/null @@ -1,30 +0,0 @@ -"""Temporary analysis file to compare exact matches to ICD11MMS between Orphanet OWL release and nomenclature files.""" -import pandas as pd - -from mappings import INPUT_MMS_MATCHES_TSV, xml_as_df - - -# Read and clean dataframes -nom_df = xml_as_df() -nom_df['predicate_id'] = nom_df['DisorderMappingRelation'].apply( - lambda x: 'skos:exactMatch' if x.startswith('E (') - else 'skos:narrowMatch' if x.startswith('NTBT (') - else 'skos:broadMatch' if x.startswith('BTNT (') - else 'skos:relatedMatch') # 0 instances of relatedMatch; just a fallback -nom_df = nom_df[nom_df['predicate_id'] == 'skos:exactMatch'][['OrphaCode', 'Reference']]\ - .sort_values(['OrphaCode', 'Reference']).reset_index(drop=True) - -owl_df = pd.read_csv(INPUT_MMS_MATCHES_TSV, sep="\t").rename(columns={ - '?cls': 'OrphaCode', - '?xref': 'Reference', -}).sort_values(['OrphaCode', 'Reference']).reset_index(drop=True) -owl_df['OrphaCode'] = owl_df['OrphaCode'].apply(lambda x: x.split('_')[1][:-1]) # get code and remove > char -owl_df['Reference'] = owl_df['Reference'].apply(lambda x: x.split(':')[1]) # get unprefixed code - -# Convert to sets of tuple edges -set_tups = lambda df: set(zip(df['OrphaCode'], df['Reference'])) -nom_set = set_tups(nom_df) # n=1,340 -owl_set = set_tups(owl_df) # n=1,459 -i = nom_set.intersection(owl_set) # n=1,323 -n_diff = nom_set.difference(owl_set) # n=17 -o_diff = owl_set.difference(nom_set) # n=136