Skip to content

Commit

Permalink
Merge branch 'main' into support-skos-mappings-for-simple-obo
Browse files Browse the repository at this point in the history
  • Loading branch information
cmungall authored May 16, 2024
2 parents 1780f83 + 377f5ac commit eaaa173
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 47 deletions.
2 changes: 1 addition & 1 deletion docs/howtos/use-oak-expression-language.rst
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ To find all descendants of a term, use the ``.desc`` operator followed by the re

.. code-block::
runoak -i sqlite:obo:fbbt search '.desc//p=i,p nucleus'
runoak -i sqlite:obo:fbbt search .desc//p=i,p nucleus
This will find all descendants of "nucleus" using the :term:`is-a` (``i``) and
:term:`part-of` (``p``) relationships.
Expand Down
10 changes: 9 additions & 1 deletion src/oaklib/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1875,6 +1875,12 @@ def viz(
if stylemap is None:
stylemap = default_stylemap_path()
actual_predicates = _process_predicates_arg(predicates)
curies_hightlight = None
if "@" in terms:
ix = terms.index("@")
terms_highlight = terms[0:ix]
terms = terms[ix + 1 :]
curies_hightlight = list(query_terms_iterator(terms_highlight, impl))
curies = list(query_terms_iterator(terms, impl))
if add_mrcas:
if isinstance(impl, SemanticSimilarityInterface):
Expand Down Expand Up @@ -1910,11 +1916,13 @@ def viz(
impl.add_metadata(graph)
if not graph.nodes:
raise ValueError(f"No nodes in graph for {curies}")
if curies_hightlight is None:
curies_hightlight = curies
# TODO: abstract this out
if not output_type or output_type in ["png", "svg", "dot"]:
graph_to_image(
graph,
seeds=curies,
seeds=curies_hightlight,
stylemap=stylemap,
configure=configure,
imgfile=output,
Expand Down
8 changes: 7 additions & 1 deletion src/oaklib/conf/obograph-style.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@
"penwidth": 3,
"label": ""
},
"BFO:0000051": {
"arrowhead": "box",
"color": "teal",
"penwidth": 2,
"label": ""
},
"RO:0004009": {
"arrowhead": "box",
"color": "green",
Expand Down Expand Up @@ -170,7 +176,7 @@
"fillcolor": "cyan"
},
"CHEBI": {
"fillcolor": "cyan"
"fillcolor": "mediumturquoise"
},
"NCBITaxon": {
"fillcolor": "burlywood1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
)
from oaklib.datamodels.vocabulary import OWL_THING
from oaklib.implementations.sqldb.sql_implementation import SqlImplementation
from oaklib.interfaces.association_provider_interface import AssociationProviderInterface
from oaklib.interfaces.basic_ontology_interface import BasicOntologyInterface
from oaklib.interfaces.obograph_interface import OboGraphInterface
from oaklib.interfaces.search_interface import SearchInterface
Expand All @@ -28,7 +29,9 @@


@dataclass
class SemSimianImplementation(SearchInterface, SemanticSimilarityInterface, OboGraphInterface):
class SemSimianImplementation(
SearchInterface, SemanticSimilarityInterface, OboGraphInterface, AssociationProviderInterface
):
"""Rust implementation of semantic similarity measures."""

delegated_methods: ClassVar[List[str]] = [
Expand All @@ -47,6 +50,8 @@ class SemSimianImplementation(SearchInterface, SemanticSimilarityInterface, OboG
OboGraphInterface.descendants,
SemanticSimilarityInterface.get_information_content,
SemanticSimilarityInterface.information_content_scores,
AssociationProviderInterface.associations,
AssociationProviderInterface.add_associations,
]

semsimian_object_cache: Dict[Tuple[PRED_CURIE], Optional["Semsimian"]] = field(default_factory=dict) # type: ignore # noqa
Expand Down
31 changes: 16 additions & 15 deletions src/oaklib/interfaces/association_provider_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def associations(
raise NotImplementedError
ix = self._association_index
if ix is None:
logging.warning("No association index")
logging.warning(f"No association index for {type(self)}")
return
yield from ix.lookup(subjects, predicates, objects)

Expand Down Expand Up @@ -464,21 +464,22 @@ def association_counts(
)
assoc_map = defaultdict(list)
cached = {}
if isinstance(self, OboGraphInterface):
for association in association_it:
if group_by == "object":
grp = association.object
if grp not in cached:
grps = list(self.ancestors([grp], predicates=object_closure_predicates))
cached[grp] = grps
else:
grps = cached[grp]
elif group_by == "subject":
grps = [association.subject]
if not isinstance(self, OboGraphInterface):
raise ValueError("This method requires an OboGraphInterface")
for association in association_it:
if group_by == "object":
grp = association.object
if grp not in cached:
grps = list(self.ancestors([grp], predicates=object_closure_predicates))
cached[grp] = grps
else:
raise ValueError(f"Unknown group_by: {group_by}")
for grp in grps:
assoc_map[grp].append(association)
grps = cached[grp]
elif group_by == "subject":
grps = [association.subject]
else:
raise ValueError(f"Unknown group_by: {group_by}")
for grp in grps:
assoc_map[grp].append(association)
for k, v in assoc_map.items():
yield k, len(v)

Expand Down
35 changes: 20 additions & 15 deletions src/oaklib/interfaces/semsim_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,12 +221,8 @@ def information_content_scores(
:return:
"""
curies = list(curies)
if self.cached_information_content_map is not None:
for curie in curies:
if curie in self.cached_information_content_map:
yield curie, self.cached_information_content_map[curie]
return
if use_associations:
if self.cached_information_content_map is None and use_associations:
logging.info("Calculating and caching IC map from associations")
from oaklib.interfaces.association_provider_interface import (
AssociationProviderInterface,
)
Expand All @@ -235,22 +231,31 @@ def information_content_scores(
raise ValueError(
f"unable to retrieve associations from this interface, type {type(self)}"
)
self.cached_information_content_map = {}
all_entities = set()
for a in self.associations():
all_entities.add(a.subject)
num_entities = len(all_entities)
logging.info(f"num_entities={num_entities}")
for term, count in self.association_subject_counts(
predicates=predicates, object_closure_predicates=object_closure_predicates
):
if count > num_entities:
raise AssertionError(f"Count {count} > num_entities {num_entities}")
self.cached_information_content_map[term] = -math.log(
count / num_entities
) / math.log(2)
if curies:
for curie in curies:
if curie not in self.cached_information_content_map:
self.cached_information_content_map[curie] = 0.0
if self.cached_information_content_map is not None:
logging.info("Using cached IC map")
for curie in curies:
entities = list(
self.associations_subjects(
objects=[curie],
predicates=predicates,
object_closure_predicates=object_closure_predicates,
)
)
if entities:
yield curie, -math.log(len(entities) / num_entities)
if curie in self.cached_information_content_map:
yield curie, self.cached_information_content_map[curie]
return
logging.info("Calculating and caching IC map from ontology")
all_entities = list(self.entities())
num_entities = len(all_entities)
if not isinstance(self, OboGraphInterface):
Expand Down
15 changes: 2 additions & 13 deletions tests/test_implementations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1996,21 +1996,10 @@ def test_information_content_scores(
use_associations=use_associations,
):
m[curie] = score
print(f"{curie} IC= {score}")
test.assertGreater(len(m), 0)
if use_associations:
test.assertCountEqual(
[
OWL_THING,
VACUOLE,
IMBO,
NUCLEAR_ENVELOPE,
NUCLEUS,
CELLULAR_COMPONENT,
PHOTORECEPTOR_OUTER_SEGMENT,
],
m.keys(),
)
test.assertEqual(m[CELLULAR_COMPONENT], 0.0, "all genes are under cell component")
# test.assertEqual(m[CELLULAR_COMPONENT], 0.0, "all genes are under cell component")
test.assertEqual(m[PHOTORECEPTOR_OUTER_SEGMENT], 0.0, "not in graph")
test.assertGreater(m[VACUOLE], 1.0)
test.assertLess(m[NUCLEAR_ENVELOPE], 1.0)
Expand Down

0 comments on commit eaaa173

Please sign in to comment.