Skip to content

Commit

Permalink
Merge pull request #4524 from broadinstitute/lookup-mulit-build
Browse files Browse the repository at this point in the history
Variant Lookup multi genome build build
  • Loading branch information
hanars authored Jan 6, 2025
2 parents d1e3686 + 88d0442 commit b697e9a
Show file tree
Hide file tree
Showing 45 changed files with 94 additions and 19 deletions.
Binary file modified hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.README.txt.crc
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/README.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
Written with version 0.2.128-eead8100a1c1
Created at 2024/08/16 15:39:04
Created at 2025/01/02 17:23:59
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/metadata.json.gz
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
Written with version 0.2.128-eead8100a1c1
Created at 2025/01/02 17:57:36
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
12 changes: 9 additions & 3 deletions hail_search/queries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1222,7 +1222,7 @@ def gene_counts(self):
def _filter_variant_ids(self, ht, variant_ids):
return ht

def lookup_variants(self, variant_ids):
def lookup_variants(self, variant_ids, additional_annotations=None):
self._parse_intervals(intervals=None, variant_ids=variant_ids, variant_keys=variant_ids)
ht = self._read_table('annotations.ht', drop_globals=['versions'])
ht = self._filter_variant_ids(ht, variant_ids)
Expand All @@ -1232,6 +1232,8 @@ def lookup_variants(self, variant_ids):
k: v for k, v in self.annotation_fields(include_genotype_overrides=False).items()
if k not in {FAMILY_GUID_FIELD, GENOTYPES_FIELD}
}
if additional_annotations:
annotation_fields.update(additional_annotations)
formatted = self._format_results(ht.key_by(), annotation_fields=annotation_fields, include_genotype_overrides=False)

return formatted.aggregate(hl.agg.take(formatted.row, len(variant_ids)))
Expand All @@ -1246,9 +1248,13 @@ def _get_variant_project_data(self, variant_id, **kwargs):
return project_data[0] if project_data else {}

def lookup_variant(self, variant_id, **kwargs):
variants = self.lookup_variants([variant_id])
variants = self.lookup_variants([variant_id], additional_annotations=self._lookup_variant_annotations())
if not variants:
raise HTTPNotFound()
variant = dict(variants[0])
variant.update(self._get_variant_project_data(variant_id, **kwargs))
variant.update(self._get_variant_project_data(variant_id, variant=variant, **kwargs))
return variant

@staticmethod
def _lookup_variant_annotations():
return {}
7 changes: 6 additions & 1 deletion hail_search/queries/snv_indel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import hail as hl

from hail_search.constants import GENOME_VERSION_GRCh38, SCREEN_KEY, PREFILTER_FREQ_CUTOFF, ALPHAMISSENSE_SORT, \
UTR_ANNOTATOR_KEY, EXTENDED_SPLICE_KEY, MOTIF_FEATURES_KEY, REGULATORY_FEATURES_KEY
UTR_ANNOTATOR_KEY, EXTENDED_SPLICE_KEY, MOTIF_FEATURES_KEY, REGULATORY_FEATURES_KEY, GENOME_VERSION_GRCh37
from hail_search.queries.base import BaseHailTableQuery, PredictionPath
from hail_search.queries.snv_indel_37 import SnvIndelHailTableQuery37

Expand All @@ -12,6 +12,7 @@
class SnvIndelHailTableQuery(SnvIndelHailTableQuery37):

GENOME_VERSION = GENOME_VERSION_GRCh38
LIFT_GENOME_VERSION = GENOME_VERSION_GRCh37
PREDICTION_FIELDS_CONFIG = {
**SnvIndelHailTableQuery37.PREDICTION_FIELDS_CONFIG,
'fathmm': PredictionPath('dbnsfp', 'fathmm_MKL_coding_score'),
Expand Down Expand Up @@ -96,3 +97,7 @@ def _get_annotation_override_filters(self, ht, annotation_overrides):
)

return annotation_filters

@staticmethod
def _lookup_variant_annotations():
return {'liftover_locus': lambda r: r.rg37_locus}
27 changes: 26 additions & 1 deletion hail_search/queries/snv_indel_37.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from aiohttp.web import HTTPNotFound
from collections import OrderedDict
import hail as hl

from hail_search.constants import CLINVAR_KEY, HGMD_KEY, HGMD_PATH_RANGES, \
GNOMAD_GENOMES_FIELD, PREFILTER_FREQ_CUTOFF, PATH_FREQ_OVERRIDE_CUTOFF, PATHOGENICTY_HGMD_SORT_KEY, \
SPLICE_AI_FIELD, GENOME_VERSION_GRCh37
SPLICE_AI_FIELD, GENOME_VERSION_GRCh37, GENOME_VERSION_GRCh38
from hail_search.queries.base import PredictionPath, QualityFilterFormat
from hail_search.queries.mito import MitoHailTableQuery

Expand All @@ -12,6 +13,7 @@ class SnvIndelHailTableQuery37(MitoHailTableQuery):

DATA_TYPE = 'SNV_INDEL'
GENOME_VERSION = GENOME_VERSION_GRCh37
LIFT_GENOME_VERSION = GENOME_VERSION_GRCh38

GENOTYPE_FIELDS = {f.lower(): f for f in ['DP', 'GQ', 'AB']}
QUALITY_FILTER_FORMAT = {
Expand Down Expand Up @@ -133,3 +135,26 @@ def _get_annotation_override_filters(self, ht, annotation_overrides):
@staticmethod
def _stat_has_non_ref(s):
return (s.het_samples > 0) | (s.hom_samples > 0)

@staticmethod
def _lookup_variant_annotations():
return {'liftover_locus': lambda r: r.rg38_locus}

@classmethod
def _get_lifted_table_path(cls, path):
return f'{cls._get_table_dir(path)}/{cls.LIFT_GENOME_VERSION}/{cls.DATA_TYPE}/{path}'

def _get_variant_project_data(self, variant_id, variant=None, **kwargs):
project_data = super()._get_variant_project_data(variant_id, **kwargs)
liftover_locus = variant.pop('liftover_locus')
if not liftover_locus:
return project_data
interval = hl.eval(hl.interval(liftover_locus, liftover_locus, includes_start=True, includes_end=True))
self._load_table_kwargs['_intervals'] = [interval]
self._get_table_path = self._get_lifted_table_path
try:
lift_project_data = super()._get_variant_project_data(variant_id, **kwargs)
except HTTPNotFound:
return project_data
project_data['familyGenotypes'].update(lift_project_data['familyGenotypes'])
return project_data.annotate(liftedFamilyGuids=sorted(lift_project_data['familyGenotypes'].keys()))
5 changes: 5 additions & 0 deletions hail_search/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,7 @@

VARIANT_LOOKUP_VARIANT = {
**VARIANT1,
'liftedFamilyGuids': ['F000014_14'],
'familyGenotypes': {
VARIANT1['familyGuids'][0]: sorted([
{k: v for k, v in g.items() if k != 'individualGuid'} for g in VARIANT1['genotypes'].values()
Expand All @@ -502,6 +503,10 @@
'sampleId': 'NA20885', 'sampleType': 'WES', 'familyGuid': 'F000011_11',
'numAlt': 2, 'dp': 6, 'gq': 16, 'ab': 1.0, 'filters': [],
}],
'F000014_14': [{
'sampleId': 'NA21234', 'sampleType': 'WGS', 'familyGuid': 'F000014_14',
'numAlt': 1, 'dp': 27, 'gq': 87, 'ab': 0.531000018119812, 'filters': None,
}],
}
}
for k in {'familyGuids', 'genotypes'}:
Expand Down
4 changes: 3 additions & 1 deletion seqr/views/apis/variant_search_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,9 +602,11 @@ def _update_lookup_variant(variant, response):
for genotype in variant['familyGenotypes'].pop(family_guid)
})

for i, genotypes in enumerate(variant.pop('familyGenotypes').values()):
for i, (unmapped_family_guid, genotypes) in enumerate(variant.pop('familyGenotypes').items()):
family_guid = f'F{i}_{variant["variantId"]}'
variant['lookupFamilyGuids'].append(family_guid)
if unmapped_family_guid in variant.get('liftedFamilyGuids', []):
variant['liftedFamilyGuids'][variant['liftedFamilyGuids'].index(unmapped_family_guid)] = family_guid
for j, genotype in enumerate(genotypes):
individual_guid = f'I{j}_{family_guid}'
individual = individual_summary_map[(genotype.pop('familyGuid'), genotype.pop('sampleId'))]
Expand Down
32 changes: 23 additions & 9 deletions seqr/views/apis/variant_search_api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -804,12 +804,14 @@ def test_variant_lookup(self, mock_variant_lookup):
expected_variant = {
**VARIANT_LOOKUP_VARIANT,
'familyGuids': [],
'lookupFamilyGuids': ['F0_1-10439-AC-A', 'F1_1-10439-AC-A'],
'lookupFamilyGuids': ['F0_1-10439-AC-A', 'F1_1-10439-AC-A', 'F2_1-10439-AC-A'],
'liftedFamilyGuids': ['F2_1-10439-AC-A'],
'genotypes': {
'I0_F0_1-10439-AC-A': {'ab': 0.0, 'dp': 60, 'gq': 20, 'numAlt': 0, 'filters': [], 'sampleType': 'WES'},
'I1_F0_1-10439-AC-A': {'ab': 0.0, 'dp': 24, 'gq': 0, 'numAlt': 0, 'filters': [], 'sampleType': 'WES'},
'I2_F0_1-10439-AC-A': {'ab': 0.5, 'dp': 10, 'gq': 99, 'numAlt': 1, 'filters': [], 'sampleType': 'WES'},
'I0_F1_1-10439-AC-A': {'ab': 1.0, 'dp': 6, 'gq': 16, 'numAlt': 2, 'filters': [], 'sampleType': 'WES'},
'I0_F2_1-10439-AC-A': {'ab': 0.531000018119812, 'dp': 27, 'gq': 87, 'numAlt': 1, 'filters': None, 'sampleType': 'WGS'},
},
}
del expected_variant['familyGenotypes']
Expand All @@ -830,6 +832,11 @@ def test_variant_lookup(self, mock_variant_lookup):
'features': [{'category': 'HP:0001626', 'label': '1 terms'}, {'category': 'Other', 'label': '1 terms'}],
'vlmContactEmail': 'seqr-test@gmail.com,test@broadinstitute.org',
},
'I0_F2_1-10439-AC-A': {
'affected': 'A', 'familyGuid': 'F2_1-10439-AC-A', 'features': [],
'individualGuid': 'I0_F2_1-10439-AC-A', 'sex': 'F',
'vlmContactEmail': 'vlm@broadinstitute.org',
},
'I1_F0_1-10439-AC-A': {
'affected': 'N', 'familyGuid': 'F0_1-10439-AC-A', 'features': [],
'individualGuid': 'I1_F0_1-10439-AC-A', 'sex': 'M',
Expand Down Expand Up @@ -860,7 +867,7 @@ def test_variant_lookup(self, mock_variant_lookup):

response_variant['variantId'] = '1-248367227-TC-T'
response_variant['genomeVersion'] = '37'
self.login_collaborator()
self.login_manager()
response = self.client.get(url.replace("38", "37"))
self.assertEqual(response.status_code, 200)

Expand All @@ -869,9 +876,11 @@ def test_variant_lookup(self, mock_variant_lookup):
('I000005_hg00732', 'I1_F0_1-10439-AC-A', {'sampleId': 'HG00732', 'familyGuid': 'F000002_2'}),
('I000004_hg00731', 'I2_F0_1-10439-AC-A', {'sampleId': 'HG00731', 'familyGuid': 'F000002_2'}),
('I000015_na20885', 'I0_F1_1-10439-AC-A', {'sampleId': 'NA20885', 'familyGuid': 'F000011_11'}),
('I000018_na21234', 'I0_F2_1-10439-AC-A', {'sampleId': 'NA21234', 'familyGuid': 'F000014_14'}),
]
expected_variant.update({
'lookupFamilyGuids': ['F000002_2', 'F000011_11'],
'lookupFamilyGuids': ['F000002_2', 'F000011_11', 'F000014_14'],
'liftedFamilyGuids': ['F000014_14'],
'genotypes': {
individual_guid: {**expected_variant['genotypes'][anon_individual_guid], **genotype}
for individual_guid, anon_individual_guid, genotype in individual_guid_map
Expand All @@ -881,32 +890,37 @@ def test_variant_lookup(self, mock_variant_lookup):
})
expected_body.update({
**{k: {**EXPECTED_SEARCH_RESPONSE[k]} for k in {
'savedVariantsByGuid', 'variantTagsByGuid', 'variantNotesByGuid',
'mmeSubmissionsByGuid', 'variantTagsByGuid', 'variantNotesByGuid',
}},
**EXPECTED_TRANSCRIPTS_RESPONSE,
'omimIntervals': {},
'savedVariantsByGuid': {'SV0000002_1248367227_r0390_100': EXPECTED_SAVED_VARIANT},
'variantFunctionalDataByGuid': {},
'locusListsByGuid': EXPECTED_SEARCH_CONTEXT_RESPONSE['locusListsByGuid'],
'projectsByGuid': {
p: {k: mock.ANY for k in PROJECT_TAG_TYPE_FIELDS}
for p in [PROJECT_GUID, 'R0003_test']
for p in [PROJECT_GUID, 'R0003_test', 'R0004_non_analyst_project']
},
'familiesByGuid': {
f: {k: mock.ANY for k in [*FAMILY_FIELDS, 'individualGuids']}
for f in ['F000002_2', 'F000011_11']
for f in ['F000002_2', 'F000011_11', 'F000014_14']
},
'individualsByGuid': {
i[0]: {k: mock.ANY for k in [*INDIVIDUAL_FIELDS, 'igvSampleGuids']}
for i in individual_guid_map
for i in individual_guid_map + [('I000019_na21987',)]
},
})
expected_body['genesById']['ENSG00000227232'] = expected_pa_gene
del expected_body['savedVariantsByGuid']['SV0000001_2103343353_r0390_100']
expected_body['mmeSubmissionsByGuid']['MS000018_P0004517'] = expected_body['mmeSubmissionsByGuid'].pop('MS000001_na19675')
expected_body['savedVariantsByGuid']['SV0000006_1248367227_r0004_non'] = mock.ANY
expected_body['variantTagsByGuid']['VT1726970_2103343353_r0004_tes'] = EXPECTED_TAG
expected_body['variantTagsByGuid']['VT1726961_2103343353_r0005_tes'] = EXPECTED_TAG
for k in ['VT1708633_2103343353_r0390_100', 'VT1726961_2103343353_r0390_100']:
del expected_body['variantTagsByGuid'][k]

self.assertDictEqual(response.json(), expected_body)
mock_variant_lookup.assert_called_with(
self.collaborator_user, ('1', 10439, 'AC', 'A'), genome_version='37',
self.manager_user, ('1', 10439, 'AC', 'A'), genome_version='37',
)

@mock.patch('seqr.views.apis.variant_search_api.sv_variant_lookup')
Expand Down
21 changes: 18 additions & 3 deletions ui/pages/SummaryData/components/VariantLookup.jsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import React from 'react'
import { connect } from 'react-redux'
import PropTypes from 'prop-types'
import { Grid, Header } from 'semantic-ui-react'
import { Grid, Header, Label } from 'semantic-ui-react'

import { RECEIVE_DATA } from 'redux/utils/reducerUtils'
import { QueryParamsEditor } from 'shared/components/QueryParamEditor'
Expand All @@ -14,7 +14,7 @@ import FamilyReads from 'shared/components/panel/family/FamilyReads'
import FamilyVariantTags from 'shared/components/panel/variants/FamilyVariantTags'
import Variants, { Variant, StyledVariantRow } from 'shared/components/panel/variants/Variants'
import { FamilyVariantIndividuals } from 'shared/components/panel/variants/VariantIndividuals'
import { GENOME_VERSION_FIELD } from 'shared/utils/constants'
import { GENOME_VERSION_FIELD, GENOME_VERSION_37, GENOME_VERSION_38 } from 'shared/utils/constants'
import { sendVlmContactEmail } from '../reducers'
import { getVlmDefaultContactEmails, getVlmFamiliesByContactEmail } from '../selectors'

Expand Down Expand Up @@ -43,10 +43,19 @@ const mapContactDispatchToProps = {

const ContactButton = connect(null, mapContactDispatchToProps)(SendEmailButton)

const LookupFamilyLayout = ({ topContent, bottomContent, children, ...buttonProps }) => (
const liftoverGenomeVersion = genomeVersion => (
genomeVersion === GENOME_VERSION_37 ? GENOME_VERSION_38 : GENOME_VERSION_37
)

const LookupFamilyLayout = ({ topContent, bottomContent, hasLiftover, genomeVersion, children, ...buttonProps }) => (
<StyledVariantRow>
{topContent}
<Grid.Column width={4}>
<Label
content={`GRCh${hasLiftover ? liftoverGenomeVersion(genomeVersion) : genomeVersion}`}
basic
color={hasLiftover ? 'orange' : 'green'}
/>
<ContactButton {...buttonProps} />
</Grid.Column>
<Grid.Column width={12}>
Expand All @@ -60,6 +69,8 @@ LookupFamilyLayout.propTypes = {
topContent: PropTypes.node,
bottomContent: PropTypes.node,
children: PropTypes.node,
hasLiftover: PropTypes.bool,
genomeVersion: PropTypes.string,
}

const InternalFamily = ({ familyGuid, variant, reads, showReads }) => (
Expand All @@ -70,6 +81,8 @@ const InternalFamily = ({ familyGuid, variant, reads, showReads }) => (
</Grid.Column>
)}
bottomContent={<Grid.Column width={16}>{reads}</Grid.Column>}
hasLiftover={variant.liftedFamilyGuids?.includes(familyGuid)}
genomeVersion={variant.genomeVersion}
>
<FamilyVariantIndividuals familyGuid={familyGuid} variant={variant} />
{showReads}
Expand All @@ -96,6 +109,8 @@ const BaseLookupVariant = ({ variant, familiesByContactEmail, vlmDefaultContactE
key={contactEmail}
defaultEmail={vlmDefaultContactEmails[contactEmail]}
modalId={contactEmail}
hasLiftover={(variant.liftedFamilyGuids || []).some(familyGuid => families.includes(familyGuid))}
genomeVersion={variant.genomeVersion}
>
<Grid stackable divided="vertically">
{families.map(familyGuid => (
Expand Down

0 comments on commit b697e9a

Please sign in to comment.