Skip to content

Commit

Permalink
Implement SemsimServer search (#499)
Browse files Browse the repository at this point in the history
Closes #233

- [x] Implement in backend
- [ ] Implement in frontend
- [x] Add fixture data
- [ ] Add tests?

---------

Co-authored-by: Kevin Schaper <kevinschaper@gmail.com>
  • Loading branch information
glass-ships and kevinschaper authored Dec 13, 2023
1 parent d5a6124 commit 3efaf1c
Show file tree
Hide file tree
Showing 20 changed files with 2,412 additions and 1,324 deletions.
20 changes: 18 additions & 2 deletions backend/src/monarch_py/api/additional_models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import List
from enum import Enum
from typing import List, Optional

from fastapi import Query, Request
from pydantic import BaseModel, Field
Expand All @@ -13,6 +14,21 @@ class Config:
arbitrary_types_allowed = True


class CompareRequest(BaseModel):
class SemsimSearchCategory(Enum):
HGNC = "Human Genes"
MGI = "Mouse Genes"
RGD = "Rat Genes"
ZFIN = "Zebrafish Genes"
WB = "C. Elegans Genes"
MONDO = "Human Diseases"


class SemsimCompareRequest(BaseModel):
subjects: List[str] = Field(..., title="List of subjects for comparison")
objects: List[str] = Field(..., title="List of objects for comparison")


class SemsimSearchRequest(BaseModel):
termset: List[str] = Field(..., title="Termset to search")
category: SemsimSearchCategory = Field(..., title="Category to search for")
limit: Optional[int] = Field(10, title="Limit the number of results", ge=1, le=50)
61 changes: 39 additions & 22 deletions backend/src/monarch_py/api/config.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import os
import requests as rq

from functools import lru_cache
from typing import List

from pydantic import BaseSettings

from monarch_py.implementations.solr.solr_implementation import SolrImplementation
from monarch_py.datamodels.model import TermSetPairwiseSimilarity
from monarch_py.datamodels.model import TermSetPairwiseSimilarity, SemsimSearchResult


class Settings(BaseSettings):
Expand All @@ -15,8 +15,8 @@ class Settings(BaseSettings):
solr_url = os.getenv("SOLR_URL") if os.getenv("SOLR_URL") else f"http://{solr_host}:{solr_port}/solr"
phenio_db_path = os.getenv("PHENIO_DB_PATH") if os.getenv("PHENIO_DB_PATH") else "/data/phenio.db"

oak_server_host = os.getenv("OAK_SERVER_HOST", "127.0.0.1")
oak_server_port = os.getenv("OAK_SERVER_PORT", 18811)
semsim_server_host = os.getenv("SEMSIM_SERVER_HOST", "127.0.0.1")
semsim_server_port = os.getenv("SEMSIM_SERVER_PORT", 18811)


settings = Settings()
Expand All @@ -40,26 +40,20 @@ def convert_nans(input_dict, to_value=None):
return input_dict


class OakHTTPRequester:
def compare(self, subjects, objects):
host = f"http://{settings.oak_server_host}:{settings.oak_server_port}"
path = f"/compare/{','.join(subjects)}/{','.join(objects)}"
url = f"{host}/{path}"
class SemsimianHTTPRequester:
"""A class that makes HTTP requests to the semsimian_server."""

print(f"Fetching {url}...")
response = rq.get(url=url)
data = response.json()

# FIXME: currently, the response returned from semsimian_server doesn't
# 100% match the TermSetPairwiseSimilarity model, so we perform some
# transformations below. once it does, we can remove all the code below
# and just return TermSetPairwiseSimilarity(**data)
def convert_tsps_data(self, data):
"""Convert to a format that can be coerced into a TermSetPairwiseSimilarity model
FIXME: currently, the response returned from semsimian_server doesn't
100% match the TermSetPairwiseSimilarity model, so we perform some
transformations below. once it does, we can remove all the code below
and just return TermSetPairwiseSimilarity(**data)
"""
# remove these similarity maps and fold them into the _best_matches dicts
object_best_matches_similarity_map = convert_nans(data.pop("object_best_matches_similarity_map"))
subject_best_matches_similarity_map = convert_nans(data.pop("subject_best_matches_similarity_map"))

# convert to a format that can be coerced into a TermSetPairwiseSimilarity
converted_data = {
**data,
**{
Expand All @@ -76,10 +70,33 @@ def compare(self, subjects, objects):
},
},
}
return converted_data

def compare(self, subjects: List[str], objects: List[str]):
host = f"http://{settings.semsim_server_host}:{settings.semsim_server_port}"
path = f"compare/{','.join(subjects)}/{','.join(objects)}"
url = f"{host}/{path}"

return TermSetPairwiseSimilarity(**converted_data)
print(f"Fetching {url}...")
response = rq.get(url=url)
data = response.json()
results = self.convert_tsps_data(data)
return TermSetPairwiseSimilarity(**results)

def search(self, termset: List[str], prefix: str, limit: int):
host = f"http://{settings.semsim_server_host}:{settings.semsim_server_port}"
path = f"search/{','.join(termset)}/{prefix}?limit={limit}"
url = f"{host}/{path}"

print(f"Fetching {url}...")
response = rq.get(url=url)
data = response.json()
results = [
SemsimSearchResult(score=i[0], similarity=self.convert_tsps_data(i[1]), subject_id=i[2]) for i in data
]
return results


@lru_cache(maxsize=1)
def oak():
return OakHTTPRequester()
def semsimian():
return SemsimianHTTPRequester()
10 changes: 5 additions & 5 deletions backend/src/monarch_py/api/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ async def _get_entity(
) -> Node:
"""Retrieves the entity with the specified id
Args:
<b>Args:</b> <br>
id (str): ID for the entity to retrieve, ex: MONDO:0019391
Raises:
<b>Raises:</b> <br>
HTTPException: 404 if the entity is not found
Returns:
<b>Returns:</b> <br>
Node: Entity details for the specified id
"""
response = solr().get_entity(id, extra=True)
Expand Down Expand Up @@ -52,13 +52,13 @@ def _association_table(
"""
Retrieves association table data for a given entity and association type
Args:
<b>Args:</b> <br>
id (str): ID of the entity to retrieve association table data, ex: MONDO:0019391
category (str): Category of association to retrieve association table data for, ex: biolink:DiseaseToPhenotypicFeatureAssociation
Path (str, optional): Path string to limit results to a subset. Defaults to None.
pagination (PaginationParams, optional): Pagination parameters. Defaults to Depends().
Returns:
<b>Returns:</b> <br>
AssociationResults: Association table data for the specified entity and association type
"""
response = solr().get_association_table(
Expand Down
4 changes: 2 additions & 2 deletions backend/src/monarch_py/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import RedirectResponse
from monarch_py.api import association, entity, histopheno, search, semsim
from monarch_py.api.config import oak
from monarch_py.api.config import semsimian
from monarch_py.api.middleware.logging_middleware import LoggingMiddleware
from monarch_py.service.curie_service import CurieService

Expand All @@ -17,7 +17,7 @@

@app.on_event("startup")
async def initialize_app():
oak()
semsimian()
# Let the curie service singleton initialize itself
CurieService()

Expand Down
67 changes: 57 additions & 10 deletions backend/src/monarch_py/api/semsim.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from fastapi import APIRouter, Path
from fastapi import APIRouter, Path, Query

from monarch_py.api.additional_models import CompareRequest
from monarch_py.api.config import oak
from monarch_py.api.additional_models import SemsimCompareRequest, SemsimSearchRequest, SemsimSearchCategory
from monarch_py.api.config import semsimian
from monarch_py.api.utils.similarity_utils import parse_similarity_prefix

router = APIRouter(tags=["semsim"], responses={404: {"description": "Not Found"}})

Expand All @@ -13,11 +14,11 @@ def _compare(
):
"""Get pairwise similarity between two sets of terms
Args:
subjects (str, optional): List of subjects for comparison. Defaults to "".
objects (str, optional): List of objects for comparison. Defaults to "".
<b>Args:</b> <br>
subjects (str, optional): List of subjects for comparison. Defaults to "". <br>
objects (str, optional): List of objects for comparison. Defaults to "". <br>
Returns:
<b>Returns:</b> <br>
TermSetPairwiseSimilarity: Pairwise similarity between subjects and objects
"""
print(
Expand All @@ -27,15 +28,15 @@ def _compare(
objects: {objects.split(',')}
"""
)
results = oak().compare(
results = semsimian().compare(
subjects=subjects.split(","),
objects=objects.split(","),
)
return results


@router.post("/compare")
def _post_compare(request: CompareRequest):
def _post_compare(request: SemsimCompareRequest):
"""
Pairwise similarity between two sets of terms <br>
<br>
Expand All @@ -47,4 +48,50 @@ def _post_compare(request: CompareRequest):
}
</pre>
"""
return oak().compare(request.subjects, request.objects)
return semsimian().compare(request.subjects, request.objects)


@router.get("/search/{termset}/{prefix}")
def _search(
termset: str = Path(..., title="Termset to search"),
category: SemsimSearchCategory = Path(..., title="Category of entities to search for"),
limit: int = Query(default=10, ge=1, le=50),
):
"""Search for terms in a termset
<b>Args:</b> <br>
termset (str, optional): Comma separated list of term IDs to find matches for. <br>
category (str, optional): Category of entities to search for. <br>
limit (int, optional): Limit the number of results. Defaults to 10.
<b>Returns:</b> <br>
List[str]: List of matching terms
"""

print(
f"""
Running semsim search:
termset: {termset}
category: {category}
"""
)

results = semsimian().search(termset=termset.split(","), prefix=parse_similarity_prefix(category), limit=limit)
return results


@router.post("/search")
def _post_search(request: SemsimSearchRequest):
"""
Search for terms in a termset <br>
<br>
Example: <br>
<pre>
{
"termset": ["HP:0002104", "HP:0012378", "HP:0012378", "HP:0012378"],
"category": "Human Diseases",
"limit": 5
}
</pre>
"""
return semsimian().search(request.termset, parse_similarity_prefix(request.category.value), request.limit)
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
from oaklib.constants import OAKLIB_MODULE
from oaklib.implementations.sqldb.sql_implementation import SqlImplementation

from fastapi import HTTPException

from monarch_py.api.additional_models import SemsimSearchCategory

IS_A = omd.slots.subClassOf.curie
HP_DB_URL = "https://s3.amazonaws.com/bbop-sqlite/hp.db.gz"

Expand All @@ -18,3 +22,13 @@ def compare_termsets(
oi = SqlImplementation(OntologyResource(slug=hp_db))
results = oi.termset_pairwise_similarity(subjects, objects, predicates)
return results


def parse_similarity_prefix(prefix: str):
if prefix in SemsimSearchCategory._member_names_:
prefix = prefix
elif SemsimSearchCategory(prefix):
prefix = SemsimSearchCategory(prefix).name
else:
raise HTTPException(status_code=404, detail="Prefix not found")
return prefix
6 changes: 3 additions & 3 deletions backend/src/monarch_py/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import typer
from monarch_py import solr_cli, sql_cli
from monarch_py.api.config import oak
from monarch_py.api.config import semsimian
from monarch_py.utils.solr_cli_utils import check_for_docker
from monarch_py.utils.utils import set_log_level, format_output
from typing_extensions import Annotated
Expand Down Expand Up @@ -272,10 +272,10 @@ def compare(
),
output: str = typer.Option(None, "--output", "-o", help="The path to the output file"),
):
"""Compare two entities using semantic similarity via OAK"""
"""Compare two sets of phenotypes using semantic similarity via SemSimian"""
subjects = subjects.split(",")
objects = objects.split(",")
response = oak().compare(subjects, objects)
response = semsimian().compare(subjects, objects)
format_output(fmt, response, output)


Expand Down
Loading

0 comments on commit 3efaf1c

Please sign in to comment.