Skip to content

Commit

Permalink
Merge pull request #4110 from broadinstitute/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
hanars authored May 24, 2024
2 parents 133e7ec + f0130ef commit 9c8fc2e
Show file tree
Hide file tree
Showing 59 changed files with 1,277 additions and 895 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

## dev

## 5/24/24
* Adds external_data to Family model (REQUIRES DB MIGRATION)
* Adds post_discovery_mondo_id to Family model (REQUIRES DB MIGRATION)
* Adds guid and created fields to PhenotypePrioritization model (REQUIRES DB MIGRATION)
* Enable "Reports" tab by default for local installations

## 5/8/24
* Adds dynamic analysis groups (REQUIRES DB MIGRATION)

Expand Down
24 changes: 14 additions & 10 deletions hail_search/queries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,11 +384,7 @@ def _filter_entries_table(self, ht, sample_data, inheritance_filter=None, qualit

ht, sorted_family_sample_data = self._add_entry_sample_families(ht, sample_data)

quality_filter = quality_filter or {}
if quality_filter.get('vcf_filter'):
ht = self._filter_vcf_filters(ht)

passes_quality_filter = self._get_family_passes_quality_filter(quality_filter, ht=ht, **kwargs)
passes_quality_filter = self._get_family_passes_quality_filter(quality_filter, ht, **kwargs)
if passes_quality_filter is not None:
ht = ht.annotate(family_entries=ht.family_entries.map(
lambda entries: hl.or_missing(passes_quality_filter(entries), entries)
Expand Down Expand Up @@ -538,7 +534,9 @@ def _valid_genotype_family_entries(cls, entries, gentoype_entry_indices, genotyp
is_valid &= unaffected_filter
return hl.or_missing(is_valid, entries)

def _get_family_passes_quality_filter(self, quality_filter, **kwargs):
def _get_family_passes_quality_filter(self, quality_filter, ht, **kwargs):
quality_filter = quality_filter or {}

affected_only = quality_filter.get('affected_only')
passes_quality_filters = []
for filter_k, value in quality_filter.items():
Expand All @@ -547,10 +545,16 @@ def _get_family_passes_quality_filter(self, quality_filter, **kwargs):
if field and value:
passes_quality_filters.append(self._get_genotype_passes_quality_field(field, value, affected_only))

if not passes_quality_filters:
has_vcf_filter = quality_filter.get('vcf_filter')
if not (passes_quality_filters or has_vcf_filter):
return None

return lambda entries: entries.all(lambda gt: hl.all([f(gt) for f in passes_quality_filters]))
def passes_quality(entries):
passes_filters = entries.all(lambda gt: hl.all([f(gt) for f in passes_quality_filters])) if passes_quality_filters else True
passes_vcf_filters = self._passes_vcf_filters(ht) if has_vcf_filter else True
return passes_filters & passes_vcf_filters

return passes_quality

@classmethod
def _get_genotype_passes_quality_field(cls, field, value, affected_only):
Expand All @@ -569,8 +573,8 @@ def passes_quality_field(gt):
return passes_quality_field

@staticmethod
def _filter_vcf_filters(ht):
return ht.filter(hl.is_missing(ht.filters) | (ht.filters.length() < 1))
def _passes_vcf_filters(ht):
return hl.is_missing(ht.filters) | (ht.filters.length() < 1)

def _parse_variant_keys(self, variant_keys=None, **kwargs):
return [hl.struct(**{self.KEY_FIELD[0]: key}) for key in (variant_keys or [])]
Expand Down
4 changes: 2 additions & 2 deletions hail_search/queries/mito.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,8 @@ def _parse_intervals(self, intervals, exclude_intervals=False, **kwargs):
self._load_table_kwargs = {'_intervals': parsed_intervals, '_filter_intervals': True}
return parsed_intervals

def _get_family_passes_quality_filter(self, quality_filter, ht=None, pathogenicity=None, **kwargs):
passes_quality = super()._get_family_passes_quality_filter(quality_filter)
def _get_family_passes_quality_filter(self, quality_filter, ht, pathogenicity=None, **kwargs):
passes_quality = super()._get_family_passes_quality_filter(quality_filter, ht)
clinvar_path_ht = False if passes_quality is None else self._get_loaded_clinvar_prefilter_ht(pathogenicity)
if not clinvar_path_ht:
return passes_quality
Expand Down
4 changes: 2 additions & 2 deletions hail_search/queries/sv.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ def _parse_annotations(self, annotations, *args, **kwargs):
parsed_annotations[NEW_SV_FIELD] = (annotations or {}).get(NEW_SV_FIELD)
return parsed_annotations

def _get_family_passes_quality_filter(self, quality_filter, parsed_annotations=None, **kwargs):
passes_quality = super()._get_family_passes_quality_filter(quality_filter)
def _get_family_passes_quality_filter(self, quality_filter, ht, parsed_annotations=None, **kwargs):
passes_quality = super()._get_family_passes_quality_filter(quality_filter, ht)
if not (parsed_annotations or {}).get(NEW_SV_FIELD):
return passes_quality

Expand Down
5 changes: 3 additions & 2 deletions hail_search/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,15 +577,16 @@ async def test_quality_filter(self):
omit_sample_type='SV_WES',
)

quality_filter = {'min_gq': 40, 'min_ab': 50}
quality_filter.update({'min_gq': 40, 'min_ab': 50})
await self._assert_expected_search(
[VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
)

annotations = {'splice_ai': '0.0'} # Ensures no variants are filtered out by annotation/path filters
await self._assert_expected_search(
[VARIANT1, VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
[VARIANT1, VARIANT2, FAMILY_3_VARIANT, MITO_VARIANT1, MITO_VARIANT3], quality_filter=quality_filter, omit_sample_type='SV_WES',
annotations=annotations, pathogenicity={'clinvar': ['likely_pathogenic', 'vus_or_conflicting']},
sample_data={**EXPECTED_SAMPLE_DATA, **FAMILY_2_MITO_SAMPLE_DATA},
)

await self._assert_expected_search(
Expand Down
12 changes: 4 additions & 8 deletions matchmaker/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ class MatchmakerSubmission(ModelWithGUID):
def __unicode__(self):
return '{}_submission_{}'.format(str(self.individual), self.id)

def _compute_guid(self):
return 'MS%07d_%s' % (self.id, str(self.individual))
GUID_PREFIX = 'MS'

class Meta:
json_fields = [
Expand All @@ -46,8 +45,7 @@ class MatchmakerIncomingQuery(ModelWithGUID):
def __unicode__(self):
return '{}_{}_query'.format(self.patient_id or self.id, self.institution)

def _compute_guid(self):
return 'MIQ%07d_%s_%s' % (self.id, self.patient_id, self.institution.replace(' ', '_'))
GUID_PREFIX = 'MIQ'

class Meta:
json_fields = ['guid', 'created_date']
Expand All @@ -71,8 +69,7 @@ class MatchmakerResult(ModelWithGUID):
def __unicode__(self):
return '{}_{}_result'.format(self.id, str(self.submission))

def _compute_guid(self):
return 'MR%07d_%s' % (self.id, str(self.submission))
GUID_PREFIX = 'MR'

class Meta:
json_fields = [
Expand All @@ -88,8 +85,7 @@ class MatchmakerContactNotes(ModelWithGUID):
def __unicode__(self):
return '{}_{}_contact'.format(self.id, self.institution)

def _compute_guid(self):
return 'MCN%07d_%s' % (self.id, self.institution.replace(' ', '_'))
GUID_PREFIX = 'MCN'

class Meta:
json_fields = []
Expand Down
2 changes: 2 additions & 0 deletions panelapp/panelapp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def _extract_ensembl_id_from_json(raw_gene_json):
panel_genes_url = '{}/panels/{}/genes'.format(panel_app_api_url, panel_app_id)
pa_locus_list = _create_or_update_locus_list_from_panel(user, panel_genes_url, panel, label)
all_genes_for_panel = genes_by_panel_id.get(panel_app_id, [])
if not all_genes_for_panel:
continue # Genes in 'super panels' are associated with sub panels
panel_genes_by_id = {_extract_ensembl_id_from_json(gene): gene for gene in all_genes_for_panel
if _extract_ensembl_id_from_json(gene)}
raw_ensbl_38_gene_ids_csv = ','.join(panel_genes_by_id.keys())
Expand Down
8 changes: 5 additions & 3 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,10 @@ idna==3.7
# requests
mock==5.0.1
# via -r requirements-dev.in
packaging==23.0
# via build
packaging==24.0
# via
# -c requirements.txt
# build
pip-tools==6.12.2
# via -r requirements-dev.in
pyproject-hooks==1.0.0
Expand All @@ -51,7 +53,7 @@ pytz==2022.7.1
# django
rcssmin==1.1.1
# via django-compressor
requests==2.31.0
requests==2.32.0
# via
# -c requirements.txt
# responses
Expand Down
5 changes: 1 addition & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ pytz==2022.7.1
# django-notifications-hq
redis==4.5.4
# via -r requirements.in
requests==2.31.0
requests==2.32.0
# via
# -r requirements.in
# django-anymail
Expand Down Expand Up @@ -179,6 +179,3 @@ urllib3==1.26.18
# requests
whitenoise==6.3.0
# via -r requirements.in

# The following packages are considered to be unsafe in a requirements file:
# setuptools
21 changes: 17 additions & 4 deletions seqr/fixtures/1kg_project.json
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@
"analysis_status": "Q",
"coded_phenotype": "myopathy",
"pubmed_ids": ["34415322", "33665635"],
"external_data": ["M"],
"case_review_notes": "<div>initial notes with uni&ccedil;&oslash;de</div>\n<div>test</div>",
"case_review_summary": "<div>internal case review summary with uni&ccedil;&oslash;de</div>"
}
Expand All @@ -149,7 +150,8 @@
"pedigree_image": "ped_2.png",
"analysis_status": "Q",
"coded_phenotype": "microcephaly, seizures",
"mondo_id": "MONDO:0044970",
"mondo_id": "MONDO:0044976",
"post_discovery_mondo_id": "MONDO:0044970",
"case_review_notes": "<div>internal notes 2</div>\n<div>&nbsp;</div>",
"case_review_summary": "<div>internal case review summary 2</div>\n<div>&nbsp;</div>"
}
Expand Down Expand Up @@ -333,7 +335,8 @@
"analysis_status": "Q",
"success_story": "Published with Gleeson and Reza (PMID 31668703)",
"success_story_types": ["C", "D"],
"mondo_id": "0008788",
"post_discovery_mondo_id": "0008788",
"post_discovery_omim_numbers": [616126],
"case_review_notes": "<div><span style=\"text-decoration: underline;\"><span style=\"font-family: 'book antiqua', palatino, serif;\">case review notes for family 12</span></span></div>\n<ul>\n<li><span style=\"font-family: 'book antiqua', palatino, serif;\">note1</span></li>\n<li><span style=\"font-family: 'book antiqua', palatino, serif;\">note 2</span></li>\n<li><span style=\"font-family: 'book antiqua', palatino, serif;\">note 3</span></li>\n</ul>",
"case_review_summary": "<div><span style=\"font-family: 'courier new', courier, monospace;\"><strong>summary for family 12</strong></span></div>"
}
Expand Down Expand Up @@ -1041,7 +1044,6 @@
"created_date": "2017-02-05T06:42:55.397Z",
"created_by": null,
"last_modified_date": "2017-03-13T09:07:50.052Z",

"sample_id": "NA20872",
"sample_type": "WES",
"is_active": false,
Expand Down Expand Up @@ -1078,7 +1080,6 @@
"created_date": "2017-02-05T06:42:55.397Z",
"created_by": null,
"last_modified_date": "2017-03-13T09:07:50.111Z",

"sample_id": "NA20875",
"sample_type": "WES",
"is_active": false,
Expand Down Expand Up @@ -1553,6 +1554,9 @@
"model": "seqr.phenotypeprioritization",
"pk": 1,
"fields": {
"guid": "PP000001_NA19675_1ENSG00000268",
"created_date": "2024-05-02T06:42:55.397Z",
"created_by": null,
"individual": 1,
"gene_id": "ENSG00000268903",
"tool": "exomiser",
Expand All @@ -1570,6 +1574,9 @@
"model": "seqr.phenotypeprioritization",
"pk": 2,
"fields": {
"guid": "PP000002_NA19675_ENSG000002689",
"created_date": "2024-05-02T06:42:55.397Z",
"created_by": null,
"individual": 1,
"gene_id": "ENSG00000268903",
"tool": "exomiser",
Expand All @@ -1587,6 +1594,9 @@
"model": "seqr.phenotypeprioritization",
"pk": 3,
"fields": {
"guid": "PP000003_NA19678_ENSG000002689",
"created_date": "2024-05-02T06:42:55.397Z",
"created_by": null,
"individual": 2,
"gene_id": "ENSG00000268903",
"tool": "lirical",
Expand All @@ -1603,6 +1613,9 @@
"model": "seqr.phenotypeprioritization",
"pk": 4,
"fields": {
"guid": "PP000004_NA19675_ENSG000002689",
"created_date": "2024-05-02T06:42:55.397Z",
"created_by": null,
"individual": 1,
"gene_id": "ENSG00000268904",
"tool": "lirical",
Expand Down
15 changes: 8 additions & 7 deletions seqr/management/commands/load_rna_seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,22 +29,23 @@ def handle(self, *args, **options):
config = RNA_DATA_TYPE_CONFIGS[data_type]
model_cls = config['model_class']

sample_data_by_guid = defaultdict(list)
sample_data_by_key = defaultdict(list)

def _save_sample_data(sample_guid, row):
sample_data_by_guid[sample_guid].append(row)
def _save_sample_data(sample_key, row):
sample_data_by_key[sample_key].append(row)

possible_sample_guids, _, _ = load_rna_seq(
possible_sample_guids_to_keys, _, _ = load_rna_seq(
data_type, options['input_file'], _save_sample_data,
mapping_file=mapping_file, ignore_extra_samples=options['ignore_extra_samples'])

sample_models_by_guid = {
s.guid: s for s in Sample.objects.filter(guid__in=sample_data_by_guid)
s.guid: s for s in Sample.objects.filter(guid__in=possible_sample_guids_to_keys)
}
errors = []
sample_guids = []
for sample_guid in possible_sample_guids:
data_rows, error = post_process_rna_data(sample_guid, sample_data_by_guid[sample_guid], **config.get('post_process_kwargs', {}))
for sample_guid in possible_sample_guids_to_keys:
sample_key = possible_sample_guids_to_keys[sample_guid]
data_rows, error = post_process_rna_data(sample_guid, sample_data_by_key[sample_key], **config.get('post_process_kwargs', {}))
if error:
errors.append(error)
continue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
MOCK_HAIL_HOST = 'http://test-hail-host'

GUID_ID = 54321
NEW_SAMPLE_GUID_P3 = f'S{GUID_ID}_NA20888'
NEW_SAMPLE_GUID_P4 = f'S{GUID_ID}_NA21234'
REPLACED_SAMPLE_GUID = f'S{GUID_ID}_NA20885'
NEW_SAMPLE_GUID_P3 = f'S00000{GUID_ID}_na20888'
NEW_SAMPLE_GUID_P4 = f'S00000{GUID_ID}_na21234'
REPLACED_SAMPLE_GUID = f'S00000{GUID_ID}_na20885'
EXISTING_SAMPLE_GUID = 'S000154_na20889'
EXISTING_WGS_SAMPLE_GUID = 'S000144_na20888'
EXISTING_SV_SAMPLE_GUID = 'S000147_na21234'
Expand Down Expand Up @@ -49,7 +49,7 @@


@mock.patch('seqr.utils.search.hail_search_utils.HAIL_BACKEND_SERVICE_HOSTNAME', MOCK_HAIL_HOST)
@mock.patch('seqr.views.utils.dataset_utils.random.randint', lambda *args: GUID_ID)
@mock.patch('seqr.models.random.randint', lambda *args: GUID_ID)
@mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_URL', 'http://testairtable')
@mock.patch('seqr.utils.search.add_data_utils.BASE_URL', SEQR_URL)
@mock.patch('seqr.utils.search.add_data_utils.SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL', 'anvil-data-loading')
Expand Down Expand Up @@ -381,7 +381,7 @@ def test_gcnv_command(self):
'sample_type': 'WES',
'family_samples': {'F000004_4': ['NA20872'], 'F000012_12': ['NA20889']},
}
self._test_success('GRCh37/GCNV', metadata, dataset_type='SV', sample_guids={f'S{GUID_ID}_NA20872', f'S{GUID_ID}_NA20889'}, reload_calls=[{
self._test_success('GRCh37/GCNV', metadata, dataset_type='SV', sample_guids={f'S00000{GUID_ID}_na20872', f'S00000{GUID_ID}_na20889'}, reload_calls=[{
'genome_version': 'GRCh37', 'num_results': 1, 'variant_ids': [], 'variant_keys': ['prefix_19107_DEL'],
'sample_data': {'SV_WES': [{'individual_guid': 'I000017_na20889', 'family_guid': 'F000012_12', 'project_guid': 'R0003_test', 'affected': 'A', 'sample_id': 'NA20889'}]},
}], reload_annotations_logs=['No additional saved variants to update'])
Expand Down
57 changes: 57 additions & 0 deletions seqr/migrations/0064_alter_phenotypeprioritization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Generated by Django 3.2.25 on 2024-05-02 17:45
from django.conf import settings
from django.db import migrations, models
import django.utils.timezone

from seqr.models import _slugify

MAX_GUID_SIZE = 30


class Migration(migrations.Migration):

dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('seqr', '0063_dynamicanalysisgroup'),
]

def update_guids(apps, schema_editor):
PhenotypePrioritization = apps.get_model('seqr', 'PhenotypePrioritization')
db_alias = schema_editor.connection.alias
pps = PhenotypePrioritization.objects.using(db_alias).all()
individual_id_map = dict(pps.values_list('id', 'individual__individual_id'))
for pp in pps:
ids_as_str = "%s:%s:%s" % (individual_id_map[pp.id], pp.gene_id, pp.disease_id)
pp.guid = 'PP%07d_%s' % (pp.id, _slugify(str(ids_as_str)))[:MAX_GUID_SIZE]
PhenotypePrioritization.objects.using(db_alias).bulk_update(pps, ['guid'], batch_size=1000)

operations = [
migrations.AddField(
model_name='phenotypeprioritization',
name='created_by',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL),
),
migrations.AddField(
model_name='phenotypeprioritization',
name='created_date',
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
),
migrations.AddField(
model_name='phenotypeprioritization',
name='guid',
field=models.CharField(default='', max_length=30),
preserve_default=False,
),
migrations.AddField(
model_name='phenotypeprioritization',
name='last_modified_date',
field=models.DateTimeField(blank=True, db_index=True, null=True),
),
migrations.RunPython(update_guids, reverse_code=migrations.RunPython.noop),
# Add uniqueness constraint to guid after default is replaced by update_guids
migrations.AlterField(
model_name='phenotypeprioritization',
name='guid',
field=models.CharField(db_index=True, unique=True, max_length=30),
),
]
Loading

0 comments on commit 9c8fc2e

Please sign in to comment.