From f76c562fca03498b415be8839e872d38953440c6 Mon Sep 17 00:00:00 2001
From: Jason Payne <paynejason@gmail.com>
Date: Fri, 12 May 2023 11:10:31 -0400
Subject: [PATCH] Fixes Min Score Bug

Fixes a bug where the min_score argument in the mapper was ignored when not using TFIDF
---
 test/simple-test.py | 12 ++++++------
 text2term/config.py |  2 +-
 text2term/t2t.py    | 17 +++++++++++++----
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/test/simple-test.py b/test/simple-test.py
index 0145ca7..7143c1f 100644
--- a/test/simple-test.py
+++ b/test/simple-test.py
@@ -6,12 +6,12 @@ def main():
 	pizza = "https://protege.stanford.edu/ontologies/pizza/pizza.owl"
 	ncit = "http://purl.obolibrary.org/obo/ncit/releases/2022-08-19/ncit.owl"
 	# print(bioregistry.get_owl_download("eFo"))
-	# if not text2term.cache_exists("EFO"):
-	# 	cached_onto = text2term.cache_ontology("EFO")
-	# 	# df = cached_onto.map_terms(["asthma", "disease location", "obsolete food allergy"], excl_deprecated=True, term_type="classes")
-	# 	print("Cache exists:", cached_onto.cache_exists())
-	caches = text2term.cache_ontology_set("text2term/resources/ontologies.csv")
-	df = text2term.map_terms(["asthma", "disease location", "obsolete food allergy"], "EFO", excl_deprecated=True, use_cache=True, term_type="classes")
+	if not text2term.cache_exists("EFO"):
+		cached_onto = text2term.cache_ontology("EFO")
+		# df = cached_onto.map_terms(["asthma", "disease location", "obsolete food allergy"], excl_deprecated=True, term_type="classes")
+		print("Cache exists:", cached_onto.cache_exists())
+	# caches = text2term.cache_ontology_set("text2term/resources/ontologies.csv")
+	df = text2term.map_terms(["asthma", "disease location", "obsolete food allergy"], "EFO", min_score=.8, mapper=text2term.Mapper.JARO_WINKLER, excl_deprecated=True, use_cache=True, term_type="classes")
 	# df = text2term.map_terms(["contains", "asthma"], "EFO", term_type="classes")
 	print(df.to_string())
 
diff --git a/text2term/config.py b/text2term/config.py
index 9b332ca..388faa9 100644
--- a/text2term/config.py
+++ b/text2term/config.py
@@ -1 +1 @@
-VERSION = "2.3.1"
\ No newline at end of file
+VERSION = "2.3.2"
\ No newline at end of file
diff --git a/text2term/t2t.py b/text2term/t2t.py
index 711c3c0..12cc402 100644
--- a/text2term/t2t.py
+++ b/text2term/t2t.py
@@ -219,18 +219,27 @@ def _load_ontology(ontology, iris, exclude_deprecated, use_cache=False, term_typ
 def _do_mapping(source_terms, source_term_ids, ontology_terms, mapper, max_mappings, min_score):
     if mapper == Mapper.TFIDF:
         term_mapper = TFIDFMapper(ontology_terms)
-        return term_mapper.map(source_terms, source_term_ids, max_mappings=max_mappings, min_score=min_score)
+        mappings_df = term_mapper.map(source_terms, source_term_ids, max_mappings=max_mappings, min_score=min_score)
     elif mapper == Mapper.ZOOMA:
         term_mapper = ZoomaMapper()
-        return term_mapper.map(source_terms, source_term_ids, ontologies=ontology_terms, max_mappings=max_mappings)
+        mappings_df = term_mapper.map(source_terms, source_term_ids, ontologies=ontology_terms, max_mappings=max_mappings)
     elif mapper == Mapper.BIOPORTAL:
         term_mapper = BioPortalAnnotatorMapper("8f0cbe43-2906-431a-9572-8600d3f4266e")
-        return term_mapper.map(source_terms, source_term_ids, ontologies=ontology_terms, max_mappings=max_mappings)
+        mappings_df = term_mapper.map(source_terms, source_term_ids, ontologies=ontology_terms, max_mappings=max_mappings)
     elif mapper in {Mapper.LEVENSHTEIN, Mapper.JARO, Mapper.JARO_WINKLER, Mapper.INDEL, Mapper.FUZZY, Mapper.JACCARD}:
         term_mapper = SyntacticMapper(ontology_terms)
-        return term_mapper.map(source_terms, source_term_ids, mapper, max_mappings=max_mappings)
+        mappings_df = term_mapper.map(source_terms, source_term_ids, mapper, max_mappings=max_mappings)
     else:
         raise ValueError("Unsupported mapper: " + mapper)
+    df = _filter_mappings(mappings_df, min_score)
+    return df
+
+def _filter_mappings(mappings_df, min_score):
+    new_df = pd.DataFrame(columns=mappings_df.columns)
+    for index, row in mappings_df.iterrows():
+        if row['Mapping Score'] >= min_score:
+            new_df.loc[len(new_df.index)] = row
+    return new_df
 
 def _save_mappings(mappings, output_file, min_score, mapper, target_ontology, base_iris, excl_deprecated, max_mappings, term_type):
     if os.path.dirname(output_file):  # create output directories if needed