Skip to content

Commit

Permalink
Add directionality param to semsim search api (#762)
Browse files Browse the repository at this point in the history
This passes the new directionality param back to semsimian-server (and
then to semsimian). No change to the UI yet.
  • Loading branch information
kevinschaper authored Jul 17, 2024
1 parent d89360b commit 164aaf1
Show file tree
Hide file tree
Showing 35 changed files with 96,251 additions and 85,290 deletions.
9 changes: 9 additions & 0 deletions backend/src/monarch_py/api/additional_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ class SemsimSearchGroup(Enum):
MONDO = "Human Diseases"


class SemsimDirectionality(str, Enum):
BIDIRECTIONAL = "bidirectional"
SUBJECT_TO_OBJECT = "subject_to_object"
OBJECT_TO_SUBJECT = "object_to_subject"


class SemsimCompareRequest(BaseModel):
subjects: List[str] = Field(..., title="List of subjects for comparison")
objects: List[str] = Field(..., title="List of objects for comparison")
Expand All @@ -60,6 +66,9 @@ class SemsimSearchRequest(BaseModel):
termset: List[str] = Field(..., title="Termset to search")
group: SemsimSearchGroup = Field(..., title="Group of entities to search within (e.g. Human Genes)")
metric: SemsimMetric = Field(SemsimMetric.ANCESTOR_INFORMATION_CONTENT, title="Similarity metric to use")
directionality: SemsimDirectionality = (
Query(SemsimDirectionality.BIDIRECTIONAL, title="Directionality of the search"),
)
limit: Optional[int] = Field(10, title="Limit the number of results", ge=1, le=50)


Expand Down
8 changes: 7 additions & 1 deletion backend/src/monarch_py/api/semsim.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
SemsimSearchRequest,
SemsimSearchGroup,
SemsimMultiCompareRequest,
SemsimDirectionality,
)
from monarch_py.api.config import semsimian, solr
from monarch_py.api.utils.similarity_utils import parse_similarity_prefix
Expand Down Expand Up @@ -124,6 +125,9 @@ def _search(
termset: str = Path(..., title="Termset to search"),
group: SemsimSearchGroup = Path(..., title="Group of entities to search within (e.g. Human Genes)"),
metric: SemsimMetric = Query(SemsimMetric.ANCESTOR_INFORMATION_CONTENT, title="Similarity metric to use"),
directionality: SemsimDirectionality = Query(
SemsimDirectionality.BIDIRECTIONAL, title="Directionality of the search"
),
limit: int = Query(default=10, ge=1, le=50),
):
"""Search for terms in a termset
Expand All @@ -138,7 +142,9 @@ def _search(
List[str]: List of matching terms
"""
terms = [term.strip() for term in termset.split(",")]
results = semsimian().search(termset=terms, prefix=parse_similarity_prefix(group), metric=metric, limit=limit)
results = semsimian().search(
termset=terms, prefix=parse_similarity_prefix(group), metric=metric, directionality=directionality, limit=limit
)
return results


Expand Down
5 changes: 3 additions & 2 deletions backend/src/monarch_py/service/semsim_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from pydantic import BaseModel

from monarch_py.api.additional_models import SemsimMetric, SemsimMultiCompareRequest
from monarch_py.api.additional_models import SemsimMetric, SemsimMultiCompareRequest, SemsimDirectionality
from monarch_py.datamodels.model import TermSetPairwiseSimilarity, SemsimSearchResult, Entity


Expand Down Expand Up @@ -75,10 +75,11 @@ def search(
termset: List[str],
prefix: str,
metric: SemsimMetric = SemsimMetric.ANCESTOR_INFORMATION_CONTENT,
directionality: SemsimDirectionality = SemsimDirectionality.BIDIRECTIONAL,
limit: int = 10,
) -> List[SemsimSearchResult]:
host = f"http://{self.semsim_server_host}:{self.semsim_server_port}"
path = f"search/{','.join(termset)}/{prefix}/{metric}?limit={limit}"
path = f"search/{','.join(termset)}/{prefix}:/{metric}?limit={limit}&directionality={directionality.value} "
url = f"{host}/{path}"

print(f"Fetching {url}...")
Expand Down
9 changes: 8 additions & 1 deletion backend/src/monarch_py/service/solr_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,14 @@ def query(self, q: SolrQuery) -> SolrQueryResult:
response.raise_for_status()
solr_query_result = SolrQueryResult.model_validate(data, from_attributes=True)
for doc in solr_query_result.response.docs:
self._strip_json(doc, "_version_", "iri")
self._strip_json(
doc,
"_version_",
"iri",
"frequency_computed_sortable_float",
"has_quotient_sortable_float",
"has_percentage_sortable_float",
)

return solr_query_result

Expand Down
23 changes: 19 additions & 4 deletions backend/tests/api/test_semsim_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
from fastapi import status
from unittest.mock import MagicMock, patch

from monarch_py.api.additional_models import SemsimMetric, SemsimSearchGroup, SemsimMultiCompareRequest
from monarch_py.api.additional_models import (
SemsimMetric,
SemsimSearchGroup,
SemsimMultiCompareRequest,
SemsimDirectionality,
)
from monarch_py.api.semsim import router
from monarch_py.datamodels.category_enums import AssociationPredicate, EntityCategory

Expand Down Expand Up @@ -79,9 +84,11 @@ def test_get_search(mock_search, termset: str, metric: SemsimMetric):
limit = 5

response = client.get(f"/search/{termset}/{group.value}?metric={metric}&limit={limit}")

directionality = SemsimDirectionality.BIDIRECTIONAL
assert response.status_code == status.HTTP_200_OK
mock_search.assert_called_once_with(termset=["HP:123", "HP:456"], prefix=group.name, metric=metric, limit=limit)
mock_search.assert_called_once_with(
termset=["HP:123", "HP:456"], prefix=group.name, metric=metric, directionality=directionality, limit=limit
)


@patch("monarch_py.service.semsim_service.SemsimianService.search")
Expand All @@ -91,10 +98,18 @@ def test_post_search(mock_search):
termset = ["HP:123", "HP:456"]
group = SemsimSearchGroup.HGNC
metric = SemsimMetric.JACCARD_SIMILARITY
directionality = SemsimDirectionality.BIDIRECTIONAL
limit = 5

response = client.post(
f"/search/", json={"termset": termset, "group": group.value, "metric": metric, "limit": limit}
f"/search/",
json={
"termset": termset,
"group": group.value,
"metric": metric,
"directionality": directionality,
"limit": limit,
},
)

assert response.status_code == status.HTTP_200_OK
Expand Down
4 changes: 3 additions & 1 deletion backend/tests/fixtures/association_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ def association_counts():
"items": [
{
"label": "Phenotype to Disease",
"count": 3959,
"count": 4012,
"category": "biolink:DiseaseToPhenotypicFeatureAssociation",
},
{"label": "Causal Gene", "count": 126, "category": "biolink:CausalGeneToDiseaseAssociation"},
{"label": "Correlated Gene", "count": 146, "category": "biolink:CorrelatedGeneToDiseaseAssociation"},
{"label": "Variant to Disease", "count": 1, "category": "biolink:VariantToDiseaseAssociation"},
{"label": "Disease Model", "count": 237, "category": "biolink:GenotypeToDiseaseAssociation"},
]
}
Loading

0 comments on commit 164aaf1

Please sign in to comment.