Skip to content

Commit

Permalink
feat: return MANE gene(s) in normalize endpoint for genomic queries (#…
Browse files Browse the repository at this point in the history
…576)

initial work for #551
  • Loading branch information
korikuzma committed Jan 3, 2025
1 parent d8a6130 commit f75d30d
Show file tree
Hide file tree
Showing 11 changed files with 187 additions and 97 deletions.
42 changes: 33 additions & 9 deletions src/variation/hgvs_dup_del_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from cool_seq_tool.handlers import SeqRepoAccess
from cool_seq_tool.schemas import ResidueMode
from ga4gh.core import ga4gh_identify
from ga4gh.core import entity_models, ga4gh_identify
from ga4gh.vrs import models, normalize

from variation.schemas.normalize_response_schema import HGVSDupDelModeOption
Expand Down Expand Up @@ -49,6 +49,7 @@ def default_mode(
baseline_copies: int | None = None,
copy_change: models.CopyChange | None = None,
alt: str | None = None,
extensions: list[entity_models.Extension] | None = None,
) -> dict | None:
"""Use default characteristics to return a variation.
If baseline_copies not provided and endpoints are ambiguous - copy_number_change
Expand All @@ -65,31 +66,40 @@ def default_mode(
:param baseline_copies: Baseline copies for Copy Number Count variation
:param copy_change: copy change for Copy Number Change Variation
:param alt: Alteration
:param extensions: List of extensions for variation
:raises ValueError: If ``alt_type`` not one of ``DELS_DUPS``.
:return: VRS Variation object represented as a dict
"""
_check_supported_alt_type(alt_type)

variation = None
if not baseline_copies and alt_type in AMBIGUOUS_REGIONS:
variation = self.copy_number_change_mode(alt_type, location, copy_change)
variation = self.copy_number_change_mode(
alt_type, location, copy_change, extensions=extensions
)
elif baseline_copies:
variation = self.copy_number_count_mode(alt_type, location, baseline_copies)
variation = self.copy_number_count_mode(
alt_type, location, baseline_copies, extensions=extensions
)
else:
variation = self.allele_mode(location, alt_type, vrs_seq_loc_ac, alt)
variation = self.allele_mode(
location, alt_type, vrs_seq_loc_ac, alt, extensions=extensions
)
return variation

def copy_number_count_mode(
self,
alt_type: AltType,
location: dict,
baseline_copies: int,
extensions: list[entity_models.Extension] | None = None,
) -> dict:
"""Return a VRS Copy Number Variation.
:param alt_type: The type of alteration. Must be one of ``DELS_DUPS``.
:param location: VRS SequenceLocation
:param baseline_copies: Baseline copies number
:param extensions: List of extensions for variation
:raises ValueError: If ``alt_type`` not one of ``DELS_DUPS``.
:return: VRS Copy Number object represented as a dict
"""
Expand All @@ -98,7 +108,9 @@ def copy_number_count_mode(
copies = baseline_copies - 1 if alt_type in DELS else baseline_copies + 1
seq_loc = models.SequenceLocation(**location)
seq_loc.id = ga4gh_identify(seq_loc)
cn = models.CopyNumberCount(copies=copies, location=seq_loc)
cn = models.CopyNumberCount(
copies=copies, location=seq_loc, extensions=extensions
)
cn.id = ga4gh_identify(cn)
return cn.model_dump(exclude_none=True)

Expand All @@ -107,12 +119,14 @@ def copy_number_change_mode(
alt_type: AltType,
location: dict,
copy_change: models.CopyChange | None = None,
extensions: list[entity_models.Extension] | None = None,
) -> dict:
"""Return copy number change variation
:param alt_type: The type of alteration. Must be one of ``DELS_DUPS``.
:param location: VRS SequenceLocation
:param copy_change: The copy change
:param extensions: List of extensions for variation
:raises ValueError: If ``alt_type`` not one of ``DELS_DUPS``.
:return: Copy Number Change variation as a dict
"""
Expand All @@ -127,7 +141,9 @@ def copy_number_change_mode(

seq_loc = models.SequenceLocation(**location)
seq_loc.id = ga4gh_identify(seq_loc)
cx = models.CopyNumberChange(location=seq_loc, copyChange=copy_change)
cx = models.CopyNumberChange(
location=seq_loc, copyChange=copy_change, extensions=extensions
)
cx.id = ga4gh_identify(cx)
return cx.model_dump(exclude_none=True)

Expand All @@ -137,6 +153,7 @@ def allele_mode(
alt_type: AltType,
vrs_seq_loc_ac: str,
alt: str,
extensions: list[entity_models.Extension] | None = None,
) -> dict | None:
"""Return a VRS Allele with a normalized LiteralSequenceExpression or
ReferenceLengthExpression.
Expand All @@ -145,6 +162,7 @@ def allele_mode(
:param alt_type: Alteration type
:param vrs_seq_loc_ac: Accession used in VRS Sequence Location
:param alt: Alteration
:param extensions: List of extensions for variation
:return: VRS Allele object represented as a dict
"""
if alt_type in AMBIGUOUS_REGIONS:
Expand All @@ -168,6 +186,7 @@ def allele_mode(
allele = models.Allele(
location=models.SequenceLocation(**location),
state=models.LiteralSequenceExpression(sequence=state),
extensions=extensions,
)

try:
Expand All @@ -189,6 +208,7 @@ def interpret_variation(
baseline_copies: int | None = None,
copy_change: models.CopyChange | None = None,
alt: str | None = None,
extensions: list[entity_models.Extension] | None = None,
) -> dict:
"""Interpret variation using HGVSDupDelMode
Expand All @@ -201,6 +221,7 @@ def interpret_variation(
:param baseline_copies: Baseline copies number
:param copy_change: The copy change
:param alt: The alteration
:param extensions: List of extensions for variation
:return: VRS Variation object
"""
variation = None
Expand All @@ -212,21 +233,24 @@ def interpret_variation(
baseline_copies=baseline_copies,
copy_change=copy_change,
alt=alt,
extensions=extensions,
)
elif hgvs_dup_del_mode == HGVSDupDelModeOption.ALLELE:
variation = self.allele_mode(location, alt_type, vrs_seq_loc_ac, alt)
variation = self.allele_mode(
location, alt_type, vrs_seq_loc_ac, alt, extensions=extensions
)
elif hgvs_dup_del_mode == HGVSDupDelModeOption.COPY_NUMBER_COUNT:
if baseline_copies:
variation = self.copy_number_count_mode(
alt_type, location, baseline_copies
alt_type, location, baseline_copies, extensions=extensions
)
else:
errors.append(
"`baseline_copies` must be provided for Copy Number Count Variation"
)
elif hgvs_dup_del_mode == HGVSDupDelModeOption.COPY_NUMBER_CHANGE:
variation = self.copy_number_change_mode(
alt_type, location, copy_change=copy_change
alt_type, location, copy_change=copy_change, extensions=extensions
)

if not variation:
Expand Down
27 changes: 21 additions & 6 deletions src/variation/translators/genomic_del_dup_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import NamedTuple

from cool_seq_tool.schemas import ResidueMode
from cool_seq_tool.schemas import ManeGeneData, ResidueMode
from ga4gh.vrs import models
from pydantic import StrictInt, StrictStr, ValidationError

Expand Down Expand Up @@ -30,6 +30,7 @@ class DelDupData(NamedTuple):
ac: StrictStr
pos0: StrictInt
pos1: StrictInt | None
mane_genes: list[ManeGeneData] | None


class GenomicDelDupTranslator(Translator):
Expand All @@ -51,32 +52,34 @@ async def get_grch38_data(
:param ac: Genomic RefSeq accession
:return: Data on GRCh38 assembly if successful liftover. Else, `None`
"""
pos0, pos1, new_ac = None, None, None
pos0, pos1, new_ac, mane_genes = None, None, None, None

if classification.pos1:
# `g_to_grch38` return inter-residue, but we want residue here
# so we increment start by 1
grch38_pos = await self.mane_transcript.g_to_grch38(
ac, classification.pos0 + 1, classification.pos1
ac, classification.pos0 + 1, classification.pos1, get_mane_genes=True
)
if grch38_pos:
pos0, pos1 = grch38_pos.pos
new_ac = grch38_pos.ac
mane_genes = grch38_pos.mane_genes
else:
# `g_to_grch38` return inter-residue, but we want residue here
# so we increment start by 1
grch38_pos = await self.mane_transcript.g_to_grch38(
ac, classification.pos0 + 1, classification.pos0
ac, classification.pos0 + 1, classification.pos0, get_mane_genes=True
)
if grch38_pos:
pos0, _ = grch38_pos.pos
new_ac = grch38_pos.ac
mane_genes = grch38_pos.mane_genes

if not new_ac:
errors.append(f"Unable to find a GRCh38 accession for: {ac}")

try:
data = DelDupData(ac=new_ac, pos0=pos0, pos1=pos1)
data = DelDupData(ac=new_ac, pos0=pos0, pos1=pos1, mane_genes=mane_genes)
except ValidationError:
data = None
return data
Expand Down Expand Up @@ -114,6 +117,7 @@ async def translate(
vrs_variation = None
vrs_seq_loc_ac_status = VrsSeqLocAcStatus.NA
residue_mode = ResidueMode.RESIDUE
mane_genes = None

if do_liftover or endpoint_name == Endpoint.NORMALIZE:
errors = []
Expand All @@ -131,6 +135,7 @@ async def translate(
warnings += errors
return None

mane_genes = grch38_data.mane_genes
pos0 = grch38_data.pos0 - 1
if grch38_data.pos1 is None:
pos1 = grch38_data.pos0
Expand Down Expand Up @@ -158,7 +163,15 @@ async def translate(
pos0 = classification.pos0
pos1 = classification.pos1
ac = validation_result.accession
grch38_data = DelDupData(ac=ac, pos0=pos0, pos1=pos1)
# `g_to_grch38` return inter-residue, but we want residue here
# so we increment start by 1
_grch38_data = await self.mane_transcript.g_to_grch38(
ac, pos0 + 1, pos0, get_mane_genes=True
)
mane_genes = _grch38_data.mane_genes
grch38_data = DelDupData(
ac=ac, pos0=pos0, pos1=pos1, mane_genes=mane_genes
)

assembly = ClinVarAssembly.GRCH38
else:
Expand All @@ -184,6 +197,7 @@ async def translate(
ac = grch38_data.ac
pos0 = grch38_data.pos0 - 1
pos1 = grch38_data.pos0 if grch38_data.pos1 is None else grch38_data.pos1
mane_genes = grch38_data.mane_genes
residue_mode = ResidueMode.INTER_RESIDUE
self.is_valid(classification.gene_token, ac, pos0, pos1, errors)

Expand Down Expand Up @@ -246,6 +260,7 @@ async def translate(
baseline_copies=baseline_copies,
copy_change=copy_change,
alt=alt,
extensions=self._mane_gene_extensions(mane_genes),
)
elif endpoint_name == Endpoint.HGVS_TO_COPY_NUMBER_COUNT:
vrs_variation = self.hgvs_dup_del_mode.copy_number_count_mode(
Expand Down
3 changes: 3 additions & 0 deletions src/variation/translators/genomic_delins.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,11 @@ async def translate(
vrs_seq_loc_ac = mane.refseq
coord_type = AnnotationLayer.CDNA
validation_result.classification = classification
extensions = None
else:
vrs_seq_loc_ac = mane.ac
coord_type = AnnotationLayer.GENOMIC
extensions = self._mane_gene_extensions(mane.mane_genes)

vrs_allele = self.vrs.to_vrs_allele(
vrs_seq_loc_ac,
Expand All @@ -102,6 +104,7 @@ async def translate(
alt=classification.inserted_sequence,
cds_start=mane.coding_start_site if gene else None,
residue_mode=ResidueMode.INTER_RESIDUE,
extensions=extensions,
)
else:
vrs_seq_loc_ac = validation_result.accession
Expand Down
3 changes: 3 additions & 0 deletions src/variation/translators/genomic_insertion.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,11 @@ async def translate(
vrs_seq_loc_ac = mane.refseq
coord_type = AnnotationLayer.CDNA
validation_result.classification = classification
extensions = None
else:
vrs_seq_loc_ac = mane.ac
coord_type = AnnotationLayer.GENOMIC
extensions = self._mane_gene_extensions(mane.mane_genes)

vrs_allele = self.vrs.to_vrs_allele(
vrs_seq_loc_ac,
Expand All @@ -103,6 +105,7 @@ async def translate(
alt=classification.inserted_sequence,
cds_start=mane.coding_start_site if gene else None,
residue_mode=ResidueMode.INTER_RESIDUE,
extensions=extensions,
)
else:
vrs_seq_loc_ac = validation_result.accession
Expand Down
3 changes: 3 additions & 0 deletions src/variation/translators/genomic_reference_agree.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,11 @@ async def translate(
vrs_seq_loc_ac = mane.refseq
coord_type = AnnotationLayer.CDNA
validation_result.classification = classification
extensions = None
else:
vrs_seq_loc_ac = mane.ac
coord_type = AnnotationLayer.GENOMIC
extensions = self._mane_gene_extensions(mane.mane_genes)

vrs_allele = self.vrs.to_vrs_allele(
vrs_seq_loc_ac,
Expand All @@ -100,6 +102,7 @@ async def translate(
warnings,
cds_start=mane.coding_start_site if gene else None,
residue_mode=ResidueMode.INTER_RESIDUE,
extensions=extensions,
)
else:
vrs_seq_loc_ac = validation_result.accession
Expand Down
3 changes: 3 additions & 0 deletions src/variation/translators/genomic_substitution.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,11 @@ async def translate(
vrs_seq_loc_ac = mane.refseq
coord_type = AnnotationLayer.CDNA
validation_result.classification = classification
extensions = None
else:
vrs_seq_loc_ac = mane.ac
coord_type = AnnotationLayer.GENOMIC
extensions = self._mane_gene_extensions(mane.mane_genes)

vrs_allele = self.vrs.to_vrs_allele(
vrs_seq_loc_ac,
Expand All @@ -123,6 +125,7 @@ async def translate(
alt=classification.alt,
cds_start=mane.coding_start_site if gene else None,
residue_mode=ResidueMode.INTER_RESIDUE,
extensions=extensions,
)
else:
vrs_seq_loc_ac = validation_result.accession
Expand Down
25 changes: 24 additions & 1 deletion src/variation/translators/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

from cool_seq_tool.handlers import SeqRepoAccess
from cool_seq_tool.mappers import ManeTranscript
from cool_seq_tool.schemas import AnnotationLayer, ResidueMode
from cool_seq_tool.schemas import AnnotationLayer, ManeGeneData, ResidueMode
from cool_seq_tool.sources import UtaDatabase
from ga4gh.core import entity_models
from ga4gh.vrs import models

from variation.hgvs_dup_del_mode import HGVSDupDelMode
Expand Down Expand Up @@ -253,3 +254,25 @@ async def get_p_or_cdna_translation_result(
)

return None

@staticmethod
def _mane_gene_extensions(
mane_genes: list[ManeGeneData],
) -> list[entity_models.Extension] | None:
"""Transform mane genes to list of extensions
This is only used in Genomic translators
:param mane_genes: Optional list of mane gene data
:return: List of extensions containing mane gene data if found. Otherwise,
``None``
"""
mane_genes_exts = None
if mane_genes:
mane_genes_exts = [
entity_models.Extension(
name="mane_genes",
value=mane_genes,
)
]
return mane_genes_exts
Loading

0 comments on commit f75d30d

Please sign in to comment.