Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement SemsimServer search #499

Merged
merged 2 commits into from
Dec 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions backend/src/monarch_py/api/additional_models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import List
from enum import Enum
from typing import List, Optional

from fastapi import Query, Request
from pydantic import BaseModel, Field
Expand All @@ -13,6 +14,21 @@ class Config:
arbitrary_types_allowed = True


class CompareRequest(BaseModel):
class SemsimSearchCategory(Enum):
HGNC = "Human Genes"
MGI = "Mouse Genes"
RGD = "Rat Genes"
ZFIN = "Zebrafish Genes"
WB = "C. Elegans Genes"
MONDO = "Human Diseases"


class SemsimCompareRequest(BaseModel):
subjects: List[str] = Field(..., title="List of subjects for comparison")
objects: List[str] = Field(..., title="List of objects for comparison")


class SemsimSearchRequest(BaseModel):
termset: List[str] = Field(..., title="Termset to search")
category: SemsimSearchCategory = Field(..., title="Category to search for")
limit: Optional[int] = Field(10, title="Limit the number of results", ge=1, le=50)
61 changes: 39 additions & 22 deletions backend/src/monarch_py/api/config.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import os
import requests as rq

from functools import lru_cache
from typing import List

from pydantic import BaseSettings

from monarch_py.implementations.solr.solr_implementation import SolrImplementation
from monarch_py.datamodels.model import TermSetPairwiseSimilarity
from monarch_py.datamodels.model import TermSetPairwiseSimilarity, SemsimSearchResult


class Settings(BaseSettings):
Expand All @@ -15,8 +15,8 @@ class Settings(BaseSettings):
solr_url = os.getenv("SOLR_URL") if os.getenv("SOLR_URL") else f"http://{solr_host}:{solr_port}/solr"
phenio_db_path = os.getenv("PHENIO_DB_PATH") if os.getenv("PHENIO_DB_PATH") else "/data/phenio.db"

oak_server_host = os.getenv("OAK_SERVER_HOST", "127.0.0.1")
oak_server_port = os.getenv("OAK_SERVER_PORT", 18811)
semsim_server_host = os.getenv("SEMSIM_SERVER_HOST", "127.0.0.1")
semsim_server_port = os.getenv("SEMSIM_SERVER_PORT", 18811)


settings = Settings()
Expand All @@ -40,26 +40,20 @@ def convert_nans(input_dict, to_value=None):
return input_dict


class OakHTTPRequester:
def compare(self, subjects, objects):
host = f"http://{settings.oak_server_host}:{settings.oak_server_port}"
path = f"/compare/{','.join(subjects)}/{','.join(objects)}"
url = f"{host}/{path}"
class SemsimianHTTPRequester:
"""A class that makes HTTP requests to the semsimian_server."""

print(f"Fetching {url}...")
response = rq.get(url=url)
data = response.json()

# FIXME: currently, the response returned from semsimian_server doesn't
# 100% match the TermSetPairwiseSimilarity model, so we perform some
# transformations below. once it does, we can remove all the code below
# and just return TermSetPairwiseSimilarity(**data)
def convert_tsps_data(self, data):
"""Convert to a format that can be coerced into a TermSetPairwiseSimilarity model

FIXME: currently, the response returned from semsimian_server doesn't
100% match the TermSetPairwiseSimilarity model, so we perform some
transformations below. once it does, we can remove all the code below
and just return TermSetPairwiseSimilarity(**data)
"""
# remove these similarity maps and fold them into the _best_matches dicts
object_best_matches_similarity_map = convert_nans(data.pop("object_best_matches_similarity_map"))
subject_best_matches_similarity_map = convert_nans(data.pop("subject_best_matches_similarity_map"))

# convert to a format that can be coerced into a TermSetPairwiseSimilarity
converted_data = {
**data,
**{
Expand All @@ -76,10 +70,33 @@ def compare(self, subjects, objects):
},
},
}
return converted_data

def compare(self, subjects: List[str], objects: List[str]):
host = f"http://{settings.semsim_server_host}:{settings.semsim_server_port}"
path = f"compare/{','.join(subjects)}/{','.join(objects)}"
url = f"{host}/{path}"

return TermSetPairwiseSimilarity(**converted_data)
print(f"Fetching {url}...")
response = rq.get(url=url)
data = response.json()
results = self.convert_tsps_data(data)
return TermSetPairwiseSimilarity(**results)

def search(self, termset: List[str], prefix: str, limit: int):
host = f"http://{settings.semsim_server_host}:{settings.semsim_server_port}"
path = f"search/{','.join(termset)}/{prefix}?limit={limit}"
url = f"{host}/{path}"

print(f"Fetching {url}...")
response = rq.get(url=url)
data = response.json()
results = [
SemsimSearchResult(score=i[0], similarity=self.convert_tsps_data(i[1]), subject_id=i[2]) for i in data
]
return results


@lru_cache(maxsize=1)
def oak():
return OakHTTPRequester()
def semsimian():
return SemsimianHTTPRequester()
10 changes: 5 additions & 5 deletions backend/src/monarch_py/api/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ async def _get_entity(
) -> Node:
"""Retrieves the entity with the specified id

Args:
<b>Args:</b> <br>
id (str): ID for the entity to retrieve, ex: MONDO:0019391

Raises:
<b>Raises:</b> <br>
HTTPException: 404 if the entity is not found

Returns:
<b>Returns:</b> <br>
Node: Entity details for the specified id
"""
response = solr().get_entity(id, extra=True)
Expand Down Expand Up @@ -52,13 +52,13 @@ def _association_table(
"""
Retrieves association table data for a given entity and association type

Args:
<b>Args:</b> <br>
id (str): ID of the entity to retrieve association table data, ex: MONDO:0019391
category (str): Category of association to retrieve association table data for, ex: biolink:DiseaseToPhenotypicFeatureAssociation
Path (str, optional): Path string to limit results to a subset. Defaults to None.
pagination (PaginationParams, optional): Pagination parameters. Defaults to Depends().

Returns:
<b>Returns:</b> <br>
AssociationResults: Association table data for the specified entity and association type
"""
response = solr().get_association_table(
Expand Down
4 changes: 2 additions & 2 deletions backend/src/monarch_py/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import RedirectResponse
from monarch_py.api import association, entity, histopheno, search, semsim
from monarch_py.api.config import oak
from monarch_py.api.config import semsimian
from monarch_py.api.middleware.logging_middleware import LoggingMiddleware
from monarch_py.service.curie_service import CurieService

Expand All @@ -17,7 +17,7 @@

@app.on_event("startup")
async def initialize_app():
oak()
semsimian()
# Let the curie service singleton initialize itself
CurieService()

Expand Down
67 changes: 57 additions & 10 deletions backend/src/monarch_py/api/semsim.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from fastapi import APIRouter, Path
from fastapi import APIRouter, Path, Query

from monarch_py.api.additional_models import CompareRequest
from monarch_py.api.config import oak
from monarch_py.api.additional_models import SemsimCompareRequest, SemsimSearchRequest, SemsimSearchCategory
from monarch_py.api.config import semsimian
from monarch_py.api.utils.similarity_utils import parse_similarity_prefix

router = APIRouter(tags=["semsim"], responses={404: {"description": "Not Found"}})

Expand All @@ -13,11 +14,11 @@ def _compare(
):
"""Get pairwise similarity between two sets of terms

Args:
subjects (str, optional): List of subjects for comparison. Defaults to "".
objects (str, optional): List of objects for comparison. Defaults to "".
<b>Args:</b> <br>
subjects (str, optional): List of subjects for comparison. Defaults to "". <br>
objects (str, optional): List of objects for comparison. Defaults to "". <br>

Returns:
<b>Returns:</b> <br>
TermSetPairwiseSimilarity: Pairwise similarity between subjects and objects
"""
print(
Expand All @@ -27,15 +28,15 @@ def _compare(
objects: {objects.split(',')}
"""
)
results = oak().compare(
results = semsimian().compare(
subjects=subjects.split(","),
objects=objects.split(","),
)
return results


@router.post("/compare")
def _post_compare(request: CompareRequest):
def _post_compare(request: SemsimCompareRequest):
"""
Pairwise similarity between two sets of terms <br>
<br>
Expand All @@ -47,4 +48,50 @@ def _post_compare(request: CompareRequest):
}
</pre>
"""
return oak().compare(request.subjects, request.objects)
return semsimian().compare(request.subjects, request.objects)


@router.get("/search/{termset}/{prefix}")
def _search(
termset: str = Path(..., title="Termset to search"),
category: SemsimSearchCategory = Path(..., title="Category of entities to search for"),
limit: int = Query(default=10, ge=1, le=50),
):
"""Search for terms in a termset

<b>Args:</b> <br>
termset (str, optional): Comma separated list of term IDs to find matches for. <br>
category (str, optional): Category of entities to search for. <br>
limit (int, optional): Limit the number of results. Defaults to 10.

<b>Returns:</b> <br>
List[str]: List of matching terms
"""

print(
f"""
Running semsim search:
termset: {termset}
category: {category}
"""
)

results = semsimian().search(termset=termset.split(","), prefix=parse_similarity_prefix(category), limit=limit)
return results


@router.post("/search")
def _post_search(request: SemsimSearchRequest):
"""
Search for terms in a termset <br>
<br>
Example: <br>
<pre>
{
"termset": ["HP:0002104", "HP:0012378", "HP:0012378", "HP:0012378"],
"category": "Human Diseases",
"limit": 5
}
</pre>
"""
return semsimian().search(request.termset, parse_similarity_prefix(request.category.value), request.limit)
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
from oaklib.constants import OAKLIB_MODULE
from oaklib.implementations.sqldb.sql_implementation import SqlImplementation

from fastapi import HTTPException

from monarch_py.api.additional_models import SemsimSearchCategory

IS_A = omd.slots.subClassOf.curie
HP_DB_URL = "https://s3.amazonaws.com/bbop-sqlite/hp.db.gz"

Expand All @@ -18,3 +22,13 @@ def compare_termsets(
oi = SqlImplementation(OntologyResource(slug=hp_db))
results = oi.termset_pairwise_similarity(subjects, objects, predicates)
return results


def parse_similarity_prefix(prefix: str):
if prefix in SemsimSearchCategory._member_names_:
prefix = prefix
elif SemsimSearchCategory(prefix):
prefix = SemsimSearchCategory(prefix).name
else:
raise HTTPException(status_code=404, detail="Prefix not found")
return prefix
6 changes: 3 additions & 3 deletions backend/src/monarch_py/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import typer
from monarch_py import solr_cli, sql_cli
from monarch_py.api.config import oak
from monarch_py.api.config import semsimian
from monarch_py.utils.solr_cli_utils import check_for_docker
from monarch_py.utils.utils import set_log_level, format_output
from typing_extensions import Annotated
Expand Down Expand Up @@ -272,10 +272,10 @@ def compare(
),
output: str = typer.Option(None, "--output", "-o", help="The path to the output file"),
):
"""Compare two entities using semantic similarity via OAK"""
"""Compare two sets of phenotypes using semantic similarity via SemSimian"""
subjects = subjects.split(",")
objects = objects.split(",")
response = oak().compare(subjects, objects)
response = semsimian().compare(subjects, objects)
format_output(fmt, response, output)


Expand Down
Loading