Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: expose cool-seq-tool feature overlap endpoint #523

Merged
merged 1 commit into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ pydantic = "==1.*"
gene-normalizer = "~=0.1.36"
boto3 = "*"
"ga4gh.vrsatile.pydantic" = "~=0.0.13"
cool-seq-tool = "~=0.1.14.dev0"
cool-seq-tool = "~=0.1.14.dev3"
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ install_requires =
gene-normalizer ~= 0.1.36
boto3
ga4gh.vrsatile.pydantic ~= 0.0.13
cool-seq-tool ~= 0.1.14.dev0
cool-seq-tool ~= 0.1.14.dev3

tests_require =
pytest
Expand Down
74 changes: 74 additions & 0 deletions variation/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
import pkg_resources
import python_jsonschema_objects
from bioutils.exceptions import BioutilsError
from cool_seq_tool.data_sources.feature_overlap import (
FeatureOverlap,
FeatureOverlapError,
)
from cool_seq_tool.schemas import Assembly, ResidueMode
from fastapi import FastAPI, Query
from ga4gh.vrs import models
Expand Down Expand Up @@ -35,6 +39,7 @@
)
from variation.schemas.service_schema import (
ClinVarAssembly,
FeatureOverlapService,
ToCdnaService,
ToGenomicService,
)
Expand All @@ -59,9 +64,11 @@ class Tag(Enum):
VRS_PYTHON = "VRS-Python"
TO_COPY_NUMBER_VARIATION = "To Copy Number Variation"
ALIGNMENT_MAPPER = "Alignment Mapper"
FEATURE_OVERLAP = "Feature Overlap"


query_handler = QueryHandler()
feature_overlap = FeatureOverlap(query_handler.seqrepo_access)


app = FastAPI(
Expand Down Expand Up @@ -841,3 +848,70 @@ async def p_to_g(
warnings=[w] if w else [],
service_meta=ServiceMeta(version=__version__, response_datetime=datetime.now()),
)


@app.get(
"/variation/feature_overlap",
summary="Given GRCh38 genomic data, find the overlapping MANE features (gene and cds)",
response_description="A response to a validly-formed query.",
description="The genomic data is specified as a sequence location by `chromosome`, `start`, `end`. All CDS regions with which the input sequence location has nonzero base pair overlap will be returned.",
response_model=FeatureOverlapService,
tags=[Tag.FEATURE_OVERLAP],
)
def get_feature_overlap(
start: int = Query(..., description="GRCh38 start position"),
end: int = Query(..., description="GRCh38 end position"),
chromosome: Optional[str] = Query(
None,
description="Chromosome. 1..22, X, or Y. If not provided, must provide `identifier`. If both `chromosome` and `identifier` are provided, `chromosome` will be used.",
),
identifier: Optional[str] = Query(
None,
description="Genomic identifier on GRCh38 assembly. If not provided, must provide `chromosome`. If both `chromosome` and `identifier` are provided, `chromosome` will be used.",
),
residue_mode: ResidueMode = Query(
ResidueMode.RESIDUE, description="Residue mode for `start` and `end`"
),
) -> FeatureOverlapService:
"""Given GRCh38 genomic data, find the overlapping MANE features (gene and cds)
The genomic data is specified as a sequence location by `chromosome`, `start`,
`end`. All CDS regions with which the input sequence location has nonzero base
pair overlap will be returned.

:param start: GRCh38 start position
:param end: GRCh38 end position
:param chromosome: Chromosome. 1..22, X, or Y. If not provided, must provide
`identifier`. If both `chromosome` and `identifier` are provided,
`chromosome` will be used.
:param identifier: Genomic identifier on GRCh38 assembly. If not provided, must
provide `chromosome`. If both `chromosome` and `identifier` are provided,
`chromosome` will be used.
:param residue_mode: Residue mode for `start` and `end`
:return: MANE feature (gene/cds) overlap data represented as a dict. The
dictionary will be keyed by genes which overlap the input sequence location.
Each gene contains a list of the overlapping CDS regions with the beginning
and end of the input sequence location's overlap with each
"""
try:
overlap_data = feature_overlap.get_grch38_mane_gene_cds_overlap(
start=start,
end=end,
chromosome=chromosome,
identifier=identifier,
residue_mode=residue_mode,
)
errors = []
except FeatureOverlapError as e:
errors = [str(e)]
overlap_data = None
except Exception as e:
logger.error("Unhandled exception: %s", str(e))
errors = ["Unhandled exception. See logs for more information."]
overlap_data = None
return FeatureOverlapService(
feature_overlap=overlap_data,
warnings=errors,
service_meta_=ServiceMeta(
version=__version__, response_datetime=datetime.now()
),
)
69 changes: 65 additions & 4 deletions variation/schemas/service_schema.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
"""Module containing schemas for services"""
from enum import Enum
from typing import Any, Dict, Type
from typing import Any, Dict, List, Optional, Type

from cool_seq_tool.schemas import ToCdnaService as ToCdna
from cool_seq_tool.schemas import ToGenomicService as ToGenomic
from cool_seq_tool.schemas import (
CdsOverlap,
)
from cool_seq_tool.schemas import (
ToCdnaService as ToCdna,
)
from cool_seq_tool.schemas import (
ToGenomicService as ToGenomic,
)

from variation.schemas.normalize_response_schema import ServiceMeta
from variation.schemas.normalize_response_schema import ServiceMeta, ServiceResponse


class ClinVarAssembly(str, Enum):
Expand Down Expand Up @@ -82,3 +89,57 @@ def schema_extra(schema: Dict[str, Any], model: Type["ToCdnaService"]) -> None:
"url": "https://github.com/cancervariants/variation-normalization",
},
}


class FeatureOverlapService(ServiceResponse):
"""Define model for representing Feature Overlap response"""

feature_overlap: Optional[Dict[str, List[CdsOverlap]]] = None

class Config:
"""Configure model."""

@staticmethod
def schema_extra(
schema: Dict[str, Any], model: Type["FeatureOverlapService"]
) -> None:
"""Configure OpenAPI schema."""
if "title" in schema.keys():
schema.pop("title", None)
for prop in schema.get("properties", {}).values():
prop.pop("title", None)
schema["example"] = {
"feature_overlap": {
"BRAF": [
{
"cds": {
"_id": "ga4gh:VSL._H2ST69A4RkWCSRHOoMv-edt-R45fPdq",
"type": "SequenceLocation",
"sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
"interval": {
"type": "SequenceInterval",
"start": {"value": 140726493, "type": "Number"},
"end": {"value": 140726516, "type": "Number"},
},
},
"overlap": {
"_id": "ga4gh:VSL._H2ST69A4RkWCSRHOoMv-edt-R45fPdq",
"type": "SequenceLocation",
"sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
"interval": {
"type": "SequenceInterval",
"start": {"value": 140726493, "type": "Number"},
"end": {"value": 140726516, "type": "Number"},
},
},
}
]
},
"warnings": [],
"service_meta": {
"version": "0.5.4",
"response_datetime": "2022-09-29T15:08:18.696882",
"name": "variation-normalizer",
"url": "https://github.com/cancervariants/variation-normalization",
},
}