Skip to content

Commit

Permalink
Fixes Min Score Bug
Browse files Browse the repository at this point in the history
Fixes a bug where the min_score argument in the mapper was ignored when not using TFIDF
  • Loading branch information
paynejason committed May 12, 2023
1 parent 70d95c2 commit f76c562
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 11 deletions.
12 changes: 6 additions & 6 deletions test/simple-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ def main():
pizza = "https://protege.stanford.edu/ontologies/pizza/pizza.owl"
ncit = "http://purl.obolibrary.org/obo/ncit/releases/2022-08-19/ncit.owl"
# print(bioregistry.get_owl_download("eFo"))
# if not text2term.cache_exists("EFO"):
# cached_onto = text2term.cache_ontology("EFO")
# # df = cached_onto.map_terms(["asthma", "disease location", "obsolete food allergy"], excl_deprecated=True, term_type="classes")
# print("Cache exists:", cached_onto.cache_exists())
caches = text2term.cache_ontology_set("text2term/resources/ontologies.csv")
df = text2term.map_terms(["asthma", "disease location", "obsolete food allergy"], "EFO", excl_deprecated=True, use_cache=True, term_type="classes")
if not text2term.cache_exists("EFO"):
cached_onto = text2term.cache_ontology("EFO")
# df = cached_onto.map_terms(["asthma", "disease location", "obsolete food allergy"], excl_deprecated=True, term_type="classes")
print("Cache exists:", cached_onto.cache_exists())
# caches = text2term.cache_ontology_set("text2term/resources/ontologies.csv")
df = text2term.map_terms(["asthma", "disease location", "obsolete food allergy"], "EFO", min_score=.8, mapper=text2term.Mapper.JARO_WINKLER, excl_deprecated=True, use_cache=True, term_type="classes")
# df = text2term.map_terms(["contains", "asthma"], "EFO", term_type="classes")
print(df.to_string())

Expand Down
2 changes: 1 addition & 1 deletion text2term/config.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
VERSION = "2.3.1"
VERSION = "2.3.2"
17 changes: 13 additions & 4 deletions text2term/t2t.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,18 +219,27 @@ def _load_ontology(ontology, iris, exclude_deprecated, use_cache=False, term_typ
def _do_mapping(source_terms, source_term_ids, ontology_terms, mapper, max_mappings, min_score):
if mapper == Mapper.TFIDF:
term_mapper = TFIDFMapper(ontology_terms)
return term_mapper.map(source_terms, source_term_ids, max_mappings=max_mappings, min_score=min_score)
mappings_df = term_mapper.map(source_terms, source_term_ids, max_mappings=max_mappings, min_score=min_score)
elif mapper == Mapper.ZOOMA:
term_mapper = ZoomaMapper()
return term_mapper.map(source_terms, source_term_ids, ontologies=ontology_terms, max_mappings=max_mappings)
mappings_df = term_mapper.map(source_terms, source_term_ids, ontologies=ontology_terms, max_mappings=max_mappings)
elif mapper == Mapper.BIOPORTAL:
term_mapper = BioPortalAnnotatorMapper("8f0cbe43-2906-431a-9572-8600d3f4266e")
return term_mapper.map(source_terms, source_term_ids, ontologies=ontology_terms, max_mappings=max_mappings)
mappings_df = term_mapper.map(source_terms, source_term_ids, ontologies=ontology_terms, max_mappings=max_mappings)
elif mapper in {Mapper.LEVENSHTEIN, Mapper.JARO, Mapper.JARO_WINKLER, Mapper.INDEL, Mapper.FUZZY, Mapper.JACCARD}:
term_mapper = SyntacticMapper(ontology_terms)
return term_mapper.map(source_terms, source_term_ids, mapper, max_mappings=max_mappings)
mappings_df = term_mapper.map(source_terms, source_term_ids, mapper, max_mappings=max_mappings)
else:
raise ValueError("Unsupported mapper: " + mapper)
df = _filter_mappings(mappings_df, min_score)
return df

def _filter_mappings(mappings_df, min_score):
new_df = pd.DataFrame(columns=mappings_df.columns)
for index, row in mappings_df.iterrows():
if row['Mapping Score'] >= min_score:
new_df.loc[len(new_df.index)] = row
return new_df

def _save_mappings(mappings, output_file, min_score, mapper, target_ontology, base_iris, excl_deprecated, max_mappings, term_type):
if os.path.dirname(output_file): # create output directories if needed
Expand Down

0 comments on commit f76c562

Please sign in to comment.