From e2949f2acd59fc3389c68f679edad66c99c3166e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 5 Dec 2024 16:46:38 -0500 Subject: [PATCH 01/30] multi build abstraction --- hail_search/queries/base.py | 22 ++++++++++++++-------- hail_search/queries/snv_indel.py | 4 ++++ hail_search/queries/snv_indel_37.py | 14 ++++++++++++++ 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index cd0496f3f5..1ba0d56760 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -1222,7 +1222,7 @@ def gene_counts(self): def _filter_variant_ids(self, ht, variant_ids): return ht - def lookup_variants(self, variant_ids): + def lookup_variants(self, variant_ids, additional_annotations=None): self._parse_intervals(intervals=None, variant_ids=variant_ids, variant_keys=variant_ids) ht = self._read_table('annotations.ht', drop_globals=['versions']) ht = self._filter_variant_ids(ht, variant_ids) @@ -1232,6 +1232,8 @@ def lookup_variants(self, variant_ids): k: v for k, v in self.annotation_fields(include_genotype_overrides=False).items() if k not in {FAMILY_GUID_FIELD, GENOTYPES_FIELD} } + if additional_annotations: + annotation_fields.update(additional_annotations) formatted = self._format_results(ht.key_by(), annotation_fields=annotation_fields, include_genotype_overrides=False) return formatted.aggregate(hl.agg.take(formatted.row, len(variant_ids))) @@ -1240,15 +1242,19 @@ def _import_variant_projects_ht(self, project_samples, variant_id): projects_ht, _ = self._import_and_filter_multiple_project_hts(project_samples, n_partitions=1) return self._filter_variant_ids(projects_ht, [variant_id]).key_by() + def _get_variant_project_data(self, variant, sample_data, variant_id): + projects_ht = self._import_variant_projects_ht(sample_data, variant_id) + project_data = projects_ht.aggregate(hl.agg.take(projects_ht.row, 1)) + return project_data[0] if project_data else {} + def lookup_variant(self, variant_id, sample_data): - variants = self.lookup_variants([variant_id]) + variants = self.lookup_variants([variant_id], additional_annotations=self._lookup_variant_annotations()) if not variants: raise HTTPNotFound() variant = dict(variants[0]) - - projects_ht = self._import_variant_projects_ht(sample_data, variant_id) - project_data = projects_ht.aggregate(hl.agg.take(projects_ht.row, 1)) - if project_data: - variant.update(project_data[0]) - + variant.update(self._get_variant_project_data(variant, sample_data, variant_id)) return variant + + @staticmethod + def _lookup_variant_annotations(): + return {} diff --git a/hail_search/queries/snv_indel.py b/hail_search/queries/snv_indel.py index d55eaf52a6..808eee0667 100644 --- a/hail_search/queries/snv_indel.py +++ b/hail_search/queries/snv_indel.py @@ -96,3 +96,7 @@ def _get_annotation_override_filters(self, ht, annotation_overrides): ) return annotation_filters + + @staticmethod + def _lookup_variant_annotations(): + return {'liftover_locus': lambda r: r.rg37_locus} diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py index bd0453b5a5..5ce2582f46 100644 --- a/hail_search/queries/snv_indel_37.py +++ b/hail_search/queries/snv_indel_37.py @@ -133,3 +133,17 @@ def _get_annotation_override_filters(self, ht, annotation_overrides): @staticmethod def _stat_has_non_ref(s): return (s.het_samples > 0) | (s.hom_samples > 0) + + @staticmethod + def _lookup_variant_annotations(): + return {'liftover_locus': lambda r: r.rg38_locus} + + def _get_variant_project_data(self, variant, sample_data, variant_id): + project_data = super()._get_variant_project_data(variant, sample_data, variant_id) + liftover_locus = variant.pop('liftover_locus') + if liftover_locus: + # TODO change build version + lift_project_data = super()._get_variant_project_data(variant, sample_data, variant_id) + project_data['familyGenotypes'].update(lift_project_data['familyGenotypes']) + + return project_data From a6ad490c99ef94e9c9bd0768e3753e56ea92c5e7 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 5 Dec 2024 17:07:16 -0500 Subject: [PATCH 02/30] clean up --- hail_search/queries/snv_indel_37.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py index 5ce2582f46..a00c3ae17c 100644 --- a/hail_search/queries/snv_indel_37.py +++ b/hail_search/queries/snv_indel_37.py @@ -138,12 +138,12 @@ def _stat_has_non_ref(s): def _lookup_variant_annotations(): return {'liftover_locus': lambda r: r.rg38_locus} - def _get_variant_project_data(self, variant, sample_data, variant_id): - project_data = super()._get_variant_project_data(variant, sample_data, variant_id) + def _get_variant_project_data(self, variant_id, variant=None, **kwargs): + project_data = super()._get_variant_project_data(variant_id, **kwargs) liftover_locus = variant.pop('liftover_locus') if liftover_locus: # TODO change build version - lift_project_data = super()._get_variant_project_data(variant, sample_data, variant_id) + lift_project_data = super()._get_variant_project_data(variant_id, **kwargs) project_data['familyGenotypes'].update(lift_project_data['familyGenotypes']) return project_data From 9b9f3334156fe09a8f882fb4ebcfa9330c3b1db6 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 6 Dec 2024 12:19:51 -0500 Subject: [PATCH 03/30] actually query lift version --- hail_search/queries/snv_indel.py | 3 ++- hail_search/queries/snv_indel_37.py | 11 +++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/hail_search/queries/snv_indel.py b/hail_search/queries/snv_indel.py index 808eee0667..ed4e598aa1 100644 --- a/hail_search/queries/snv_indel.py +++ b/hail_search/queries/snv_indel.py @@ -2,7 +2,7 @@ import hail as hl from hail_search.constants import GENOME_VERSION_GRCh38, SCREEN_KEY, PREFILTER_FREQ_CUTOFF, ALPHAMISSENSE_SORT, \ - UTR_ANNOTATOR_KEY, EXTENDED_SPLICE_KEY, MOTIF_FEATURES_KEY, REGULATORY_FEATURES_KEY + UTR_ANNOTATOR_KEY, EXTENDED_SPLICE_KEY, MOTIF_FEATURES_KEY, REGULATORY_FEATURES_KEY, GENOME_VERSION_GRCh37 from hail_search.queries.base import BaseHailTableQuery, PredictionPath from hail_search.queries.snv_indel_37 import SnvIndelHailTableQuery37 @@ -12,6 +12,7 @@ class SnvIndelHailTableQuery(SnvIndelHailTableQuery37): GENOME_VERSION = GENOME_VERSION_GRCh38 + LIFT_GENOME_VERSION = GENOME_VERSION_GRCh37 PREDICTION_FIELDS_CONFIG = { **SnvIndelHailTableQuery37.PREDICTION_FIELDS_CONFIG, 'fathmm': PredictionPath('dbnsfp', 'fathmm_MKL_coding_score'), diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py index a00c3ae17c..6cfed55fd1 100644 --- a/hail_search/queries/snv_indel_37.py +++ b/hail_search/queries/snv_indel_37.py @@ -3,7 +3,7 @@ from hail_search.constants import CLINVAR_KEY, HGMD_KEY, HGMD_PATH_RANGES, \ GNOMAD_GENOMES_FIELD, PREFILTER_FREQ_CUTOFF, PATH_FREQ_OVERRIDE_CUTOFF, PATHOGENICTY_HGMD_SORT_KEY, \ - SPLICE_AI_FIELD, GENOME_VERSION_GRCh37 + SPLICE_AI_FIELD, GENOME_VERSION_GRCh37, GENOME_VERSION_GRCh38 from hail_search.queries.base import PredictionPath, QualityFilterFormat from hail_search.queries.mito import MitoHailTableQuery @@ -12,6 +12,7 @@ class SnvIndelHailTableQuery37(MitoHailTableQuery): DATA_TYPE = 'SNV_INDEL' GENOME_VERSION = GENOME_VERSION_GRCh37 + LIFT_GENOME_VERSION = GENOME_VERSION_GRCh38 GENOTYPE_FIELDS = {f.lower(): f for f in ['DP', 'GQ', 'AB']} QUALITY_FILTER_FORMAT = { @@ -138,11 +139,17 @@ def _stat_has_non_ref(s): def _lookup_variant_annotations(): return {'liftover_locus': lambda r: r.rg38_locus} + @classmethod + def _get_lifted_table_path(cls, path): + return f'{cls._get_table_dir(path)}/{cls.LIFT_GENOME_VERSION}/{cls.DATA_TYPE}/{path}' + def _get_variant_project_data(self, variant_id, variant=None, **kwargs): project_data = super()._get_variant_project_data(variant_id, **kwargs) liftover_locus = variant.pop('liftover_locus') if liftover_locus: - # TODO change build version + interval = hl.eval(hl.interval(liftover_locus, liftover_locus, includes_start=True, includes_end=True)) + self._load_table_kwargs['_intervals'] = [interval] + self._get_table_path = self._get_lifted_table_path lift_project_data = super()._get_variant_project_data(variant_id, **kwargs) project_data['familyGenotypes'].update(lift_project_data['familyGenotypes']) From 44bda85ae6af33f7bd067112e270ef576412ed57 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 6 Dec 2024 13:02:42 -0500 Subject: [PATCH 04/30] track build lifted guid buid --- hail_search/queries/snv_indel_37.py | 1 + seqr/views/apis/variant_search_api.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py index 6cfed55fd1..243fce026b 100644 --- a/hail_search/queries/snv_indel_37.py +++ b/hail_search/queries/snv_indel_37.py @@ -152,5 +152,6 @@ def _get_variant_project_data(self, variant_id, variant=None, **kwargs): self._get_table_path = self._get_lifted_table_path lift_project_data = super()._get_variant_project_data(variant_id, **kwargs) project_data['familyGenotypes'].update(lift_project_data['familyGenotypes']) + project_data = project_data.annotate(liftedFamilyGuids=sorted(lift_project_data['familyGenotypes'].keys())) return project_data diff --git a/seqr/views/apis/variant_search_api.py b/seqr/views/apis/variant_search_api.py index 56b6a02160..99cf4f345d 100644 --- a/seqr/views/apis/variant_search_api.py +++ b/seqr/views/apis/variant_search_api.py @@ -602,9 +602,11 @@ def _update_lookup_variant(variant, response): for genotype in variant['familyGenotypes'].pop(family_guid) }) - for i, genotypes in enumerate(variant.pop('familyGenotypes').values()): + for i, (unmapped_family_guid, genotypes) in enumerate(variant.pop('familyGenotypes').items()): family_guid = f'F{i}_{variant["variantId"]}' variant['lookupFamilyGuids'].append(family_guid) + if unmapped_family_guid in variant.get('liftedFamilyGuids', []): + variant['liftedFamilyGuids'][variant['liftedFamilyGuids'].index(unmapped_family_guid)] = family_guid for j, genotype in enumerate(genotypes): individual_guid = f'I{j}_{family_guid}' individual = individual_summary_map[(genotype.pop('familyGuid'), genotype.pop('sampleId'))] From 594148c6227bf2866cbb1e16ad814f047fbc792f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 6 Dec 2024 15:13:57 -0500 Subject: [PATCH 05/30] show lookup genome version in ui --- .../SummaryData/components/VariantLookup.jsx | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/ui/pages/SummaryData/components/VariantLookup.jsx b/ui/pages/SummaryData/components/VariantLookup.jsx index 9e6ad7e00e..0e9ef3e8a0 100644 --- a/ui/pages/SummaryData/components/VariantLookup.jsx +++ b/ui/pages/SummaryData/components/VariantLookup.jsx @@ -1,7 +1,7 @@ import React from 'react' import { connect } from 'react-redux' import PropTypes from 'prop-types' -import { Grid, Header } from 'semantic-ui-react' +import { Grid, Header, Label } from 'semantic-ui-react' import { RECEIVE_DATA } from 'redux/utils/reducerUtils' import { QueryParamsEditor } from 'shared/components/QueryParamEditor' @@ -14,7 +14,7 @@ import FamilyReads from 'shared/components/panel/family/FamilyReads' import FamilyVariantTags from 'shared/components/panel/variants/FamilyVariantTags' import Variants, { Variant, StyledVariantRow } from 'shared/components/panel/variants/Variants' import { FamilyVariantIndividuals } from 'shared/components/panel/variants/VariantIndividuals' -import { GENOME_VERSION_FIELD } from 'shared/utils/constants' +import { GENOME_VERSION_FIELD, GENOME_VERSION_37, GENOME_VERSION_38 } from 'shared/utils/constants' import { sendVlmContactEmail } from '../reducers' import { getVlmDefaultContactEmails, getVlmFamiliesByContactEmail } from '../selectors' @@ -43,10 +43,19 @@ const mapContactDispatchToProps = { const ContactButton = connect(null, mapContactDispatchToProps)(SendEmailButton) -const LookupFamilyLayout = ({ topContent, bottomContent, children, ...buttonProps }) => ( +const liftoverGenomeVersion = genomeVersion => ( + genomeVersion === GENOME_VERSION_37 ? GENOME_VERSION_38 : GENOME_VERSION_37 +) + +const LookupFamilyLayout = ({ topContent, bottomContent, hasLiftover, genomeVersion, children, ...buttonProps }) => ( {topContent} + @@ -60,6 +69,8 @@ LookupFamilyLayout.propTypes = { topContent: PropTypes.node, bottomContent: PropTypes.node, children: PropTypes.node, + hasLiftover: PropTypes.bool, + genomeVersion: PropTypes.string, } const InternalFamily = ({ familyGuid, variant, reads, showReads }) => ( @@ -70,6 +81,8 @@ const InternalFamily = ({ familyGuid, variant, reads, showReads }) => ( )} bottomContent={{reads}} + hasLiftover={variant.liftedFamilyGuids?.includes(familyGuid)} + genomeVersion={variant.genomeVersion} > {showReads} @@ -96,6 +109,8 @@ const BaseLookupVariant = ({ variant, familiesByContactEmail, vlmDefaultContactE key={contactEmail} defaultEmail={vlmDefaultContactEmails[contactEmail]} modalId={contactEmail} + hasLiftover={(variant.liftedFamilyGuids || []).some(familyGuid => families.includes(familyGuid))} + genomeVersion={variant.genomeVersion} > {families.map(familyGuid => ( From 3455c1ae87e56498dfbb85c5cc2ecdcc8b1c90f6 Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Sun, 8 Dec 2024 05:16:42 +0000 Subject: [PATCH 06/30] fix: requirements-dev.txt to reduce vulnerabilities The following vulnerabilities are fixed by pinning transitive dependencies: - https://snyk.io/vuln/SNYK-PYTHON-DJANGO-8456315 - https://snyk.io/vuln/SNYK-PYTHON-DJANGO-8456316 --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index ef0123029f..029242c825 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -22,7 +22,7 @@ click==8.1.3 # via pip-tools coverage==5.1 # via -r requirements-dev.in -django==4.2.16 +django==4.2.17 # via # -c requirements.txt # django-appconf From b73dc2d131fdb8c33bac92f443411357ee9f8030 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 2 Jan 2025 11:26:26 -0500 Subject: [PATCH 07/30] handle missing lifted variant --- hail_search/queries/snv_indel_37.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py index 243fce026b..1368dbdf78 100644 --- a/hail_search/queries/snv_indel_37.py +++ b/hail_search/queries/snv_indel_37.py @@ -1,3 +1,4 @@ +from aiohttp.web import HTTPNotFound from collections import OrderedDict import hail as hl @@ -147,11 +148,18 @@ def _get_variant_project_data(self, variant_id, variant=None, **kwargs): project_data = super()._get_variant_project_data(variant_id, **kwargs) liftover_locus = variant.pop('liftover_locus') if liftover_locus: - interval = hl.eval(hl.interval(liftover_locus, liftover_locus, includes_start=True, includes_end=True)) - self._load_table_kwargs['_intervals'] = [interval] - self._get_table_path = self._get_lifted_table_path - lift_project_data = super()._get_variant_project_data(variant_id, **kwargs) - project_data['familyGenotypes'].update(lift_project_data['familyGenotypes']) - project_data = project_data.annotate(liftedFamilyGuids=sorted(lift_project_data['familyGenotypes'].keys())) + liftover_data = self._get_liftover_variant_project_data(variant_id, liftover_locus, **kwargs) + if liftover_data: + project_data['familyGenotypes'].update(liftover_data['familyGenotypes']) + project_data = project_data.annotate(liftedFamilyGuids=sorted(liftover_data['familyGenotypes'].keys())) return project_data + + def _get_liftover_variant_project_data(self, variant_id, liftover_locus, **kwargs): + interval = hl.eval(hl.interval(liftover_locus, liftover_locus, includes_start=True, includes_end=True)) + self._load_table_kwargs['_intervals'] = [interval] + self._get_table_path = self._get_lifted_table_path + try: + return super()._get_variant_project_data(variant_id, **kwargs) + except HTTPNotFound: + return None From fde9df94738696e753e7660890e9a154596f8050 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 2 Jan 2025 11:46:29 -0500 Subject: [PATCH 08/30] fix super --- hail_search/queries/snv_indel_37.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py index 1368dbdf78..71323282b1 100644 --- a/hail_search/queries/snv_indel_37.py +++ b/hail_search/queries/snv_indel_37.py @@ -147,19 +147,14 @@ def _get_lifted_table_path(cls, path): def _get_variant_project_data(self, variant_id, variant=None, **kwargs): project_data = super()._get_variant_project_data(variant_id, **kwargs) liftover_locus = variant.pop('liftover_locus') - if liftover_locus: - liftover_data = self._get_liftover_variant_project_data(variant_id, liftover_locus, **kwargs) - if liftover_data: - project_data['familyGenotypes'].update(liftover_data['familyGenotypes']) - project_data = project_data.annotate(liftedFamilyGuids=sorted(liftover_data['familyGenotypes'].keys())) - - return project_data - - def _get_liftover_variant_project_data(self, variant_id, liftover_locus, **kwargs): + if not liftover_locus: + return project_data interval = hl.eval(hl.interval(liftover_locus, liftover_locus, includes_start=True, includes_end=True)) self._load_table_kwargs['_intervals'] = [interval] self._get_table_path = self._get_lifted_table_path try: - return super()._get_variant_project_data(variant_id, **kwargs) + lift_project_data = super()._get_variant_project_data(variant_id, **kwargs) except HTTPNotFound: - return None + return project_data + project_data['familyGenotypes'].update(lift_project_data['familyGenotypes']) + return project_data.annotate(liftedFamilyGuids=sorted(lift_project_data['familyGenotypes'].keys())) From ded2cddc68cd40e36a18fdd3e5c8379965fa5263 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 2 Jan 2025 13:11:40 -0500 Subject: [PATCH 09/30] testmutli build lookup --- .../GRCh37/SNV_INDEL/lookup.ht/.README.txt.crc | Bin 12 -> 12 bytes .../SNV_INDEL/lookup.ht/.metadata.json.gz.crc | Bin 12 -> 12 bytes .../GRCh37/SNV_INDEL/lookup.ht/README.txt | 2 +- .../lookup.ht/globals/parts/.part-0.crc | Bin 12 -> 12 bytes .../SNV_INDEL/lookup.ht/globals/parts/part-0 | Bin 69 -> 114 bytes .../.index.crc | Bin 12 -> 0 bytes .../.metadata.json.gz.crc | Bin 12 -> 0 bytes .../index | Bin 65 -> 0 bytes .../metadata.json.gz | Bin 185 -> 0 bytes .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 75 bytes .../metadata.json.gz | Bin 0 -> 183 bytes .../GRCh37/SNV_INDEL/lookup.ht/metadata.json.gz | Bin 356 -> 356 bytes .../lookup.ht/rows/.metadata.json.gz.crc | Bin 16 -> 16 bytes .../SNV_INDEL/lookup.ht/rows/metadata.json.gz | Bin 606 -> 618 bytes ...t-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.crc | Bin 12 -> 0 bytes ...t-0-d213a419-0c77-4952-b924-b6af300f393b.crc | Bin 0 -> 12 bytes .../part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818 | Bin 52 -> 0 bytes .../part-0-d213a419-0c77-4952-b924-b6af300f393b | Bin 0 -> 70 bytes .../.README.txt.crc | Bin 0 -> 12 bytes .../R0004_non_analyst_project.ht/._SUCCESS.crc | Bin 0 -> 8 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../WGS/R0004_non_analyst_project.ht/README.txt | 3 +++ .../WGS/R0004_non_analyst_project.ht/_SUCCESS | 0 .../globals/.metadata.json.gz.crc | Bin 0 -> 12 bytes .../globals/metadata.json.gz | Bin 0 -> 333 bytes .../globals/parts/.part-0.crc | Bin 0 -> 12 bytes .../globals/parts/part-0 | Bin 0 -> 141 bytes .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 63 bytes .../metadata.json.gz | Bin 0 -> 184 bytes .../metadata.json.gz | Bin 0 -> 372 bytes .../rows/.metadata.json.gz.crc | Bin 0 -> 16 bytes .../rows/metadata.json.gz | Bin 0 -> 619 bytes ...t-0-85535ceb-5403-4697-bec1-5eccf7ff958a.crc | Bin 0 -> 12 bytes .../part-0-85535ceb-5403-4697-bec1-5eccf7ff958a | Bin 0 -> 58 bytes hail_search/test_utils.py | 5 +++++ 39 files changed, 9 insertions(+), 1 deletion(-) delete mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.index.crc delete mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/index delete mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/.index.crc create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/index create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/.part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.crc create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/.part-0-d213a419-0c77-4952-b924-b6af300f393b.crc delete mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818 create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/part-0-d213a419-0c77-4952-b924-b6af300f393b create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/.README.txt.crc create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/._SUCCESS.crc create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/README.txt create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/_SUCCESS create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/parts/.part-0.crc create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/parts/part-0 create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/.index.crc create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/index create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/parts/.part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.crc create mode 100644 hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/parts/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.README.txt.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.README.txt.crc index b8eaa2d478e189846de5e8165c6d446d224fa9af..17e2f8ae70ba902a4fb797c1a1b41c429b64b6ac 100644 GIT binary patch literal 12 TcmYc;N@ieSU}AVLXWt3{5}5-G literal 12 TcmYc;N@ieSU}8AI)a?ua5&r_y diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.metadata.json.gz.crc index 8d6507b0100c50e8d6ee30420d567d9a808e7ef5..55d0cdd18d688c3a7c902325c814d92771301803 100644 GIT binary patch literal 12 TcmYc;N@ieSU}E6-v?mq-5=#S4 literal 12 TcmYc;N@ieSU}DI;?y3v`5>^7^ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/README.txt b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/README.txt index 5daea17753..22050e815e 100644 --- a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/README.txt +++ b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/README.txt @@ -1,3 +1,3 @@ This folder comprises a Hail (www.hail.is) native Table or MatrixTable. Written with version 0.2.128-eead8100a1c1 - Created at 2024/08/16 15:39:04 \ No newline at end of file + Created at 2025/01/02 17:23:59 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/.part-0.crc index 905a30feecaf6f082dfeb576d78198faa8bee189..871216cce145dda17e10d163a766e88221cc3d16 100644 GIT binary patch literal 12 TcmYc;N@ieSU}AX8BJ=|Q5;OxW literal 12 TcmYc;N@ieSU}7-2V166`5%mK- diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/part-0 index 05ec205c54a6ecbeb75db969956d48149ca772d3..c33540def91eb8f00ead25af95785a68ee7b3dcc 100644 GIT binary patch literal 114 zcma!LU|^^LVvVi(e-&z4nHat>GcqwSa0VF|7#PMIW~VcUy9P5daJm^7#Tzj&NSegw z<>$pG<|XD-7MH{q6y;~7CYLaWy93p58Jff!nlP|3IG8Np4E{W2$NWIH1)RJf literal 69 zcmXqDU|=u+VvVi(e-#X585wRcGBPl51{oL_7{(iBr!$AU1~W2nx)~V78!<33IGjDv Pq{a(W4+3C43_#@o#7_+y diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.index.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.index.crc deleted file mode 100644 index 78fad9791a2cb58bbe1666966c0af14f039b3b79..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D(g@^vZz6T1V& diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.metadata.json.gz.crc deleted file mode 100644 index ca274b3389a33a5773a7d1ce93a6b7b617301ef4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}BiCA|nX^66ymz diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/index b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/index deleted file mode 100644 index 3d8c9a969ba727feb8ae1c78543e6b46e2aed60b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 65 ycmb1PU|`?}VvVi(e--#Efh-0_M)Ns`R=6-RIx@O5$N~ifVDbWuAXy+rQUCyR*$d+U diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/metadata.json.gz deleted file mode 100644 index 14e2c0d67c660e738ce10971071199f6c6915139..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 185 zcmV;q07m~GiwFP!0000009B5`3c@fDME_+^3OQ6;s^%tw9uyQ6FXADs+a?$iNw#1p z{dc$CybKF7`(}D*jKv#6+dM z5-t?FntJQ&F3Q8Glihiv^TkvsYtdqq>kw4BbC8LT0R$-LJSpPJf5_;$J*Eh9CcwBS neol%s2YxrZ!FxJ3O+=oN^A1H!tvW(}>4)J9{jxpC&j0`bO+iqn diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/.index.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/.index.crc new file mode 100644 index 0000000000000000000000000000000000000000..9e82cfb61effec80dffe2ecec0f03363fe0e0794 GIT binary patch literal 12 TcmYc;N@ieSU}C6Zee4VX5r6{V literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..e05cfb69de77e5d8801db170fbdd78f4463d7999 GIT binary patch literal 12 TcmYc;N@ieSU}BK`=k*T&5^w{W literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/index b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/index new file mode 100644 index 0000000000000000000000000000000000000000..0734b2c39522db842dd14621aa7ebbc3f5161394 GIT binary patch literal 75 zcmY#qU|^5}VvVi(e-)%I0$B`9jE2VpnV1}%866oI&F36i;ljk|$mlL81C-;3Ddn$Z LU|?hfVuV@%3PKHi literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..879fa5f3300aa66735f6a61d98898c8a04706b93 GIT binary patch literal 183 zcmV;o07(BIiwFP!0000009B5`3c@fDME_+^3ON*8s^%tw9uyQ6FXADs+a?$iNw#1p z{dc#$c^L*~_RaRt7>jod(Rc^4EVE)IpbE;>+PHS7YVrYOk*!Ms8cnMS;35;B0|-#gc~Zoa|Bx|r2TXC=Fz$ul lvm(uf$#WXKr&H5J1I|mM342`3lO-hum+C5#=_HxVlH~&c-!Es=DW8cMp~S%f9B2VPcc601 zu?X;04G8kj5RbKmP}y~d=BUgkS8R)}64+R0zCd7p;H6X&JbznDf$6TY3etc`eWd4C zMxUJH^&n;$jZV)?JM{4E%=~4~TKWw&OO+9or(7ve;4|vXDUQ-t*jx9!d)};y`Qkou zY}EVO8ac+=h3KNf4u-ird(k*gXaRVum~yPNVa7J>OqkLd4H`XR4LPr?|J1zf*0}jt zuOhOwAo`oQtWQ7kZBWhW|Gpj$Cd(a44ELT3Qw=i=0q?Q2>MOTlkx!GX%W=j+;9-<- o5eLth+Dng83sm(h)UYQkY!lixqz(T559+P$FWO-QFuDQ&0MC!AoB#j- delta 342 zcmV-c0jd7v0^|aa7k|W7n3A`lFvj*URQ6Iz$>P;!OOue?PKxxuFRhJI*~4Di=G^a` zd%5X}?4^R@2b`Bi6ZSZtr3*?*F4a{e(@8o>(!~P+-z}2qlqVt~lsMRf11-Q84pd<| zmI1!10YUy5;<2$1D%Rz-(Jv1zA9(KG5?k zr%%rDW)Sn7MyKbM9eQ|nX8yA0E&YauWy*-!Q?3*!@Hutn7)R+V?5%s=J+GJLY8Fk>5bCQN0G292Ju6FIM||J1zd*0}yy zEhDnAAo`oQtWQ7kZBWhe|Gpj$Cd(a44EJ6LQx7u?0q?Q2>?^lnk7rZC0hld diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/metadata.json.gz index 5e76d5dbbaef5c6bee2b7bbe5fbd6d340eaf98c7..d379d392863f9df5b67d3f6d185cd947c1f308d1 100644 GIT binary patch literal 618 zcmV-w0+szAiwFP!000000NqwyZ`v>v{V#snw4}=yWaJGPtB|T`f@u${P-Nm8a2uS+ zF;f}JfA4it5|S`!RO(B4h{(Ce_nu=PDLxWGcR&I%7Yv?&Eq;AnW)L~VAmJk%NN`V* zG}`C*PLl)@$jJ&zQOY3FL=qfN9ey4%VAHyQEYJ{EKFuaHhozGHNFB>|K?{mfq4-8) zTu^y1vNY$4rvj0=P_9!LFS8LCOGyv+C+f<`WcmP+3UftOQ*UU^6bW&d(_L2eDH|JG zP<<@jb+KZ#Dzu?LFNRuH?)3A+?P9uh{g1WMN&phM*mPA^wVKft*O%58){?c}$1%vx zaoo)&w)17ulnfGWdBV;p6EyIM`dXy52ABLV5{83;JM1s}ohw!@y>cB|1-k35Ec6de za2>#ohxrLB^C%(tLDd%Vo14!iU84!mxJ=!DMe#fkF_*atIeV&(dFUCK_*TXmE>?m! zjB#P4ah!vyl~5s4%u2QTY(`OBOk!i@%t$L>+Q7B3tyT3jb=16*04k7zCf_kPuM1Q; zPlC7{p)#>y%o7MZhusfig}iy_sxcCxRI?*rg9IO-D!6zh;E)yslIXj?Z@kYelabRIkrnZdt_RX@0;Fe;F#-?0^9Q3QPEsHaQJwNj4{FU zCLq}5CVsn#=Q_S+`%b43l5WdBTtvy9VXjM6Xd7y5rblR`&_<|IQco(1XIpiB>@^7h E0Pq4NZ~y=R literal 606 zcmV-k0-^mMiwFP!000000Ns|~Z`v>r$Nv{UZEDgHem8HWV--?0O)%|Y6^e{~2DibE z95a=n{O>&{C6I(lqgr3eLqzV*ci*r5k>VXubPXgV6UE^P`1IG;Spuw0JQ6*^mW1~t z)@+mFJB?$AAtiH6G0lOwLK5!w9e!>T;5TIgtH6aYb1@##6lT_JnBC>=!b*r~W#!Vv zq@rf)B|4Q>YK4)BvYmG3z0AhIwJ~&izb~D8iO~2`Xv>I^=YQVRiCDAlqqv&fYdb#YB;O<@&Ti;WOq zYUgn`9tG{;=#cUVTgjLom5k89BkHRZEnILW|6)PE*X#6~%YLVp=S$CAhgN~E>nk(& zLle>wSo1L1$8sPI#6PHAVf^y)OJ44xF;J0X_fISy10keIZ6x_qam@X$hsakTJJez( zd`FHmPYaQPEtOEEG_q2xKAMqbCrLb5IWkfK%o#Wr+fr3SQ%%hW0nma`H2#j&65s<> diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/.part-0-d213a419-0c77-4952-b924-b6af300f393b.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/.part-0-d213a419-0c77-4952-b924-b6af300f393b.crc new file mode 100644 index 0000000000000000000000000000000000000000..433649fc5733e5365581e99b364cdf2c1bc8631f GIT binary patch literal 12 TcmYc;N@ieSU}DJ1DfkHh5=8@g literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818 b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818 deleted file mode 100644 index 2389c50627052e0f0994fdb969360ce6e3c060ab..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 52 xcmb1VU|XM(jWj)IVvVi(e-%^|85tNE4UY#hF*!OjIx;abGJyol=Nww$!o=vv=*|QbVq|3C O1!@2RuqFmZ1_l6ox(Y7< literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/.README.txt.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/.README.txt.crc new file mode 100644 index 0000000000000000000000000000000000000000..d5ad8f7e2d0c422b4ea5c846b528c45ab59b55de GIT binary patch literal 12 TcmYc;N@ieSU}6y2dG0Cz5^Mvj literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..cbb87d8879b009e207acf7fa2dd30e188b0978c3 GIT binary patch literal 12 TcmYc;N@ieSU}E6;F^v%b5yb-5 literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/README.txt b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/README.txt new file mode 100644 index 0000000000..c7b6eb301a --- /dev/null +++ b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.128-eead8100a1c1 + Created at 2025/01/02 17:57:36 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/_SUCCESS b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/_SUCCESS new file mode 100644 index 0000000000..e69de29bb2 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..52404ebea7b5583be6004f74544a0fd27afab689 GIT binary patch literal 12 TcmYc;N@ieSU}9j6{S66|9<@lpVHI0@R(@YYly zo+biS;2>nYkg^&gqb)aW&5A0AM2b4_!bh!Hvk$3MOgFJVa0YEi_NZ?w!30h1ZI-W2A?M^ zrrt_E+WeWYpX*@L6!c;dFCo2KJJUk{>Zw4I*mg*m&T0Iwexg80{dRT;{pHxUu&3V literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/parts/.part-0.crc new file mode 100644 index 0000000000000000000000000000000000000000..581ceab411ec4b01ec95392a47a11df0956f26c8 GIT binary patch literal 12 TcmYc;N@ieSU}6yMii`vR5PbrS literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/parts/part-0 new file mode 100644 index 0000000000000000000000000000000000000000..2da408adba902a707a40c7ef734fb23acf74e6b0 GIT binary patch literal 141 zcmbFXD#78dEIB$gx= zr7Ud=8=$9Gml_jU?C8bv}GH|&W i0D+-NyrBsLGXt}8gMqc%u6b#^Kzl&|Y&QcV0|Nk~yd@m~ literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/.index.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/.index.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b5f783b8e930aa21db2d7858d126e992a06d3d6 GIT binary patch literal 12 TcmYc;N@ieSU}Ct!wSGSU6AJ@} literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..8a49d39d8085dd41ffe821e1211bdc6c6781861a GIT binary patch literal 12 TcmYc;N@ieSU}AXEINbsO6Zr#g literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/index b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/index new file mode 100644 index 0000000000000000000000000000000000000000..bf0064cce9c3895da769e8c935bed73361b53c01 GIT binary patch literal 63 wcmb1UU|`?_VvVi(e--!&fh-0_M#JNQOiYf>jE)R4Kt4ZAn!gex3B(9_05X9I*#H0l literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..63e4c6ca648f383ce11aebc2c60d085419c3f4f4 GIT binary patch literal 184 zcmV;p07w5HiwFP!0000009B5`3c@f9hTkP8g&w9GQ+pFZ4+@Hk7xB=|Y%P{n+6*jX z?{4PJO9=d5zWhBj#^Mb_G~R(M%dA)msDg5}Hm=>OntT9RWUEquM$>8nIT!H4bWLvr z2^R`oO}+JX7v*7`lihiv^TkvsYtdqq>kw4BGswor00NYAo)q!Rf617+J*Eh9CcwBS meol%s1HYZq;5{9iCL-6!nNh^nsw3o+ei*)=Z@VJT0001)VNsa? literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..e43fa94230e053472621205068ac2368e7081a1b GIT binary patch literal 372 zcmV-)0gL`0iwFP!000000F9DSYr-%Th5t*QHps+Qn3A`xG8x;0Q`w7@64R@OrAbNB zNh$sByRB_Sw}-uiL_FVNyf1UW%BDF~2W56#v9-5_@Rj3Pj)Y7dd$N|7 zThygK#I!3kR(95Ck*C~)V{7mOw&$5tWpXHfCv>HAkYjTxdK!Ax+_NAz;+=li1Hi(-XYjQvyt`Y{Z^ITJ@bY5!VRrv-bq=ez{psq2k S^SA%2wbnnC1;vKg0ssI_q_^Y% literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..3ee859d28626d945787dbad2d30f83702e4c182f GIT binary patch literal 16 XcmYc;N@ieSU}Bh);PHx|$(RuUAlCz~ literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..4b5589861446b21bd5ee70902d845fb6705532a3 GIT binary patch literal 619 zcmV-x0+jt9iwFP!000000NqwwZ`v>z{V#snw4~z}E_oxABBW~SpxVPKgpBhAyaoq3 zW-3Ge@BN&RD@+=l`cfXiIp6IZ`v5)?LAO9$G8I0&fIs_jJKq6o5sNIJ;XvFc5=Lwv z<1-CI2q7j546(=uW)g{e%sM;J& z?)WZ@s9E1WWa#Aaw9<=(Ho|_NgjFQnoA*zX*=W=5ea;!D(=d^Xb-{KHs=%s5(}KM0 z#4o2;VwaN};joO_Y99n3Ye#cG_L{A}SA;CWHazr8DISV=Mn09Kl>z7c4<>XSr`yz>E46u{2yV<+(=g4j#G4Yr2dXxR-`spn(hV8{4H5_blP${@2)K;Z zlKU^I=eivW-Cyxrk&6-c9WAY`CGr0A)-G(+o)ZY-pfgjD=w1YB#~oYfhp z8k-MdK>V5t8h%6HyvC!P0SS0<+q^|T~z5^BS zohz^FuB*P@sh6buMaV}qFK*f!|DTdQD0ZjXMHAC}N2R*s<{p4o1M^@v>^`z|7_4lk z(`ehB?x1I{fHiFg7z=tqFmU=bxisN!3$VEO0 F0074CCXxUE literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/parts/.part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/parts/.part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.crc new file mode 100644 index 0000000000000000000000000000000000000000..006238b850c6201686ae4d16e8c515f91596bbd0 GIT binary patch literal 12 TcmYc;N@ieSU}BidbD Date: Thu, 2 Jan 2025 13:41:23 -0500 Subject: [PATCH 10/30] update action to increment seqr and hail-search chart versions (#4534) * update action to increment seqr and hail-search chart versions * more explicit steps * update hail-search chart * remove prerelease-tag * hail search --- .../workflows/prod-hail-search-release.yaml | 21 +++++++++++++++++-- .github/workflows/prod-release.yaml | 21 +++++++++++++++++-- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/.github/workflows/prod-hail-search-release.yaml b/.github/workflows/prod-hail-search-release.yaml index 58772f0013..0a2dbb0ee7 100644 --- a/.github/workflows/prod-hail-search-release.yaml +++ b/.github/workflows/prod-hail-search-release.yaml @@ -45,11 +45,28 @@ jobs: persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal token fetch-depth: 0 # otherwise, you will failed to push refs to dest repo - - name: Update appVersion in hail-search Chart file + - name: Get latest hail-search version uses: mikefarah/yq@v4.22.1 + id: current with: cmd: > - yq -i '.appVersion = "${{ github.event.workflow_run.head_sha }}"' charts/hail-search/Chart.yaml + yq -r '.version' charts/hail-search/Chart.yaml + + - name: Bump version + id: bump + uses: cbrgm/semver-bump-action@main + with: + current-version: ${{ steps.current.outputs.result }} + bump-level: minor + + - name: Update appVersion and version in seqr Chart file + uses: mikefarah/yq@v4.22.1 + with: + cmd: > + yq -i ' + .appVersion = "${{ github.event.workflow_run.head_sha }}" | + .version = "${{ steps.bump.outputs.new_version }}" + ' charts/hail-search/Chart.yaml - name: Commit and Push changes uses: Andro999b/push@v1.3 diff --git a/.github/workflows/prod-release.yaml b/.github/workflows/prod-release.yaml index 391929e892..b4ab01854c 100644 --- a/.github/workflows/prod-release.yaml +++ b/.github/workflows/prod-release.yaml @@ -45,11 +45,28 @@ jobs: persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal token fetch-depth: 0 # otherwise, you will failed to push refs to dest repo - - name: Update appVersion in seqr Chart file + - name: Get latest seqr version uses: mikefarah/yq@v4.22.1 + id: current with: cmd: > - yq -i '.appVersion = "${{ github.event.workflow_run.head_sha }}"' charts/seqr/Chart.yaml + yq -r '.version' charts/seqr/Chart.yaml + + - name: Bump version + id: bump + uses: cbrgm/semver-bump-action@main + with: + current-version: ${{ steps.current.outputs.result }} + bump-level: minor + + - name: Update appVersion and version in seqr Chart file + uses: mikefarah/yq@v4.22.1 + with: + cmd: > + yq -i ' + .appVersion = "${{ github.event.workflow_run.head_sha }}" | + .version = "${{ steps.bump.outputs.new_version }}" + ' charts/hail-search/Chart.yaml - name: Commit and Push changes uses: Andro999b/push@v1.3 From 88d04425d2695b34d6e3d0236c67335f0e33edfb Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 2 Jan 2025 15:03:04 -0500 Subject: [PATCH 11/30] update lookup test --- seqr/views/apis/variant_search_api_tests.py | 32 +++++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py index eef51399c7..c8e03aa305 100644 --- a/seqr/views/apis/variant_search_api_tests.py +++ b/seqr/views/apis/variant_search_api_tests.py @@ -804,12 +804,14 @@ def test_variant_lookup(self, mock_variant_lookup): expected_variant = { **VARIANT_LOOKUP_VARIANT, 'familyGuids': [], - 'lookupFamilyGuids': ['F0_1-10439-AC-A', 'F1_1-10439-AC-A'], + 'lookupFamilyGuids': ['F0_1-10439-AC-A', 'F1_1-10439-AC-A', 'F2_1-10439-AC-A'], + 'liftedFamilyGuids': ['F2_1-10439-AC-A'], 'genotypes': { 'I0_F0_1-10439-AC-A': {'ab': 0.0, 'dp': 60, 'gq': 20, 'numAlt': 0, 'filters': [], 'sampleType': 'WES'}, 'I1_F0_1-10439-AC-A': {'ab': 0.0, 'dp': 24, 'gq': 0, 'numAlt': 0, 'filters': [], 'sampleType': 'WES'}, 'I2_F0_1-10439-AC-A': {'ab': 0.5, 'dp': 10, 'gq': 99, 'numAlt': 1, 'filters': [], 'sampleType': 'WES'}, 'I0_F1_1-10439-AC-A': {'ab': 1.0, 'dp': 6, 'gq': 16, 'numAlt': 2, 'filters': [], 'sampleType': 'WES'}, + 'I0_F2_1-10439-AC-A': {'ab': 0.531000018119812, 'dp': 27, 'gq': 87, 'numAlt': 1, 'filters': None, 'sampleType': 'WGS'}, }, } del expected_variant['familyGenotypes'] @@ -830,6 +832,11 @@ def test_variant_lookup(self, mock_variant_lookup): 'features': [{'category': 'HP:0001626', 'label': '1 terms'}, {'category': 'Other', 'label': '1 terms'}], 'vlmContactEmail': 'seqr-test@gmail.com,test@broadinstitute.org', }, + 'I0_F2_1-10439-AC-A': { + 'affected': 'A', 'familyGuid': 'F2_1-10439-AC-A', 'features': [], + 'individualGuid': 'I0_F2_1-10439-AC-A', 'sex': 'F', + 'vlmContactEmail': 'vlm@broadinstitute.org', + }, 'I1_F0_1-10439-AC-A': { 'affected': 'N', 'familyGuid': 'F0_1-10439-AC-A', 'features': [], 'individualGuid': 'I1_F0_1-10439-AC-A', 'sex': 'M', @@ -860,7 +867,7 @@ def test_variant_lookup(self, mock_variant_lookup): response_variant['variantId'] = '1-248367227-TC-T' response_variant['genomeVersion'] = '37' - self.login_collaborator() + self.login_manager() response = self.client.get(url.replace("38", "37")) self.assertEqual(response.status_code, 200) @@ -869,9 +876,11 @@ def test_variant_lookup(self, mock_variant_lookup): ('I000005_hg00732', 'I1_F0_1-10439-AC-A', {'sampleId': 'HG00732', 'familyGuid': 'F000002_2'}), ('I000004_hg00731', 'I2_F0_1-10439-AC-A', {'sampleId': 'HG00731', 'familyGuid': 'F000002_2'}), ('I000015_na20885', 'I0_F1_1-10439-AC-A', {'sampleId': 'NA20885', 'familyGuid': 'F000011_11'}), + ('I000018_na21234', 'I0_F2_1-10439-AC-A', {'sampleId': 'NA21234', 'familyGuid': 'F000014_14'}), ] expected_variant.update({ - 'lookupFamilyGuids': ['F000002_2', 'F000011_11'], + 'lookupFamilyGuids': ['F000002_2', 'F000011_11', 'F000014_14'], + 'liftedFamilyGuids': ['F000014_14'], 'genotypes': { individual_guid: {**expected_variant['genotypes'][anon_individual_guid], **genotype} for individual_guid, anon_individual_guid, genotype in individual_guid_map @@ -881,32 +890,37 @@ def test_variant_lookup(self, mock_variant_lookup): }) expected_body.update({ **{k: {**EXPECTED_SEARCH_RESPONSE[k]} for k in { - 'savedVariantsByGuid', 'variantTagsByGuid', 'variantNotesByGuid', + 'mmeSubmissionsByGuid', 'variantTagsByGuid', 'variantNotesByGuid', }}, **EXPECTED_TRANSCRIPTS_RESPONSE, + 'omimIntervals': {}, + 'savedVariantsByGuid': {'SV0000002_1248367227_r0390_100': EXPECTED_SAVED_VARIANT}, 'variantFunctionalDataByGuid': {}, 'locusListsByGuid': EXPECTED_SEARCH_CONTEXT_RESPONSE['locusListsByGuid'], 'projectsByGuid': { p: {k: mock.ANY for k in PROJECT_TAG_TYPE_FIELDS} - for p in [PROJECT_GUID, 'R0003_test'] + for p in [PROJECT_GUID, 'R0003_test', 'R0004_non_analyst_project'] }, 'familiesByGuid': { f: {k: mock.ANY for k in [*FAMILY_FIELDS, 'individualGuids']} - for f in ['F000002_2', 'F000011_11'] + for f in ['F000002_2', 'F000011_11', 'F000014_14'] }, 'individualsByGuid': { i[0]: {k: mock.ANY for k in [*INDIVIDUAL_FIELDS, 'igvSampleGuids']} - for i in individual_guid_map + for i in individual_guid_map + [('I000019_na21987',)] }, }) expected_body['genesById']['ENSG00000227232'] = expected_pa_gene - del expected_body['savedVariantsByGuid']['SV0000001_2103343353_r0390_100'] + expected_body['mmeSubmissionsByGuid']['MS000018_P0004517'] = expected_body['mmeSubmissionsByGuid'].pop('MS000001_na19675') + expected_body['savedVariantsByGuid']['SV0000006_1248367227_r0004_non'] = mock.ANY + expected_body['variantTagsByGuid']['VT1726970_2103343353_r0004_tes'] = EXPECTED_TAG + expected_body['variantTagsByGuid']['VT1726961_2103343353_r0005_tes'] = EXPECTED_TAG for k in ['VT1708633_2103343353_r0390_100', 'VT1726961_2103343353_r0390_100']: del expected_body['variantTagsByGuid'][k] self.assertDictEqual(response.json(), expected_body) mock_variant_lookup.assert_called_with( - self.collaborator_user, ('1', 10439, 'AC', 'A'), genome_version='37', + self.manager_user, ('1', 10439, 'AC', 'A'), genome_version='37', ) @mock.patch('seqr.views.apis.variant_search_api.sv_variant_lookup') From 911c56711585ea8836ffd11cabfbb790001562b0 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 2 Jan 2025 17:02:13 -0500 Subject: [PATCH 12/30] include analysed by in family metadata --- seqr/views/apis/report_api.py | 23 ++++++++++++-- seqr/views/apis/report_api_tests.py | 30 +++++++++++++++++++ ui/pages/Report/components/FamilyMetadata.jsx | 1 + 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 428e6265fd..1a97c3581f 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -1,7 +1,7 @@ from collections import defaultdict from datetime import datetime, timedelta -from django.db.models import Count, Q, Value +from django.db.models import Count, Q, F, Value from django.contrib.postgres.aggregates import ArrayAgg import json import re @@ -17,12 +17,13 @@ EXPERIMENT_TABLE, EXPERIMENT_LOOKUP_TABLE, FINDINGS_TABLE, GENE_COLUMN, FAMILY_INDIVIDUAL_FIELDS from seqr.views.utils.export_utils import export_multiple_files, write_multiple_files from seqr.views.utils.json_utils import create_json_response +from seqr.views.utils.orm_to_json_utils import get_json_for_queryset from seqr.views.utils.permissions_utils import user_is_analyst, get_project_and_check_permissions, \ get_project_guids_user_can_view, get_internal_projects, pm_or_analyst_required, active_user_has_policies_and_passes_test from seqr.views.utils.terra_api_utils import anvil_enabled from seqr.views.utils.variant_utils import DISCOVERY_CATEGORY -from seqr.models import Project, Family, Sample, RnaSample, Individual +from seqr.models import Project, Family, FamilyAnalysedBy, Sample, RnaSample, Individual from settings import GREGOR_DATA_MODEL_URL @@ -890,6 +891,16 @@ def _add_row(row, family_id, row_type): parse_anvil_metadata( projects, user=request.user, add_row=_add_row, omit_airtable=True, include_family_sample_metadata=True, include_no_individual_families=True) + analysed_by = get_json_for_queryset( + FamilyAnalysedBy.objects.filter(family_id__in=families_by_id).order_by('last_modified_date'), + additional_values={'familyId': F('family_id')}, + ) + analysed_by_family_type = defaultdict(lambda: defaultdict(list)) + for fab in analysed_by: + analysed_by_family_type[fab['familyId']][fab['dataType']].append( + f"{fab['createdBy']} ({fab['lastModifiedDate']:%-m/%-d/%Y})" + ) + for family_id, f in families_by_id.items(): individuals_by_id = family_individuals[family_id] proband = next((i for i in individuals_by_id.values() if i['proband_relationship'] == 'Self'), None) @@ -910,6 +921,10 @@ def _add_row(row, family_id, row_type): sorted_samples = sorted(individuals_by_id.values(), key=lambda x: x.get('date_data_generation', '')) earliest_sample = next((s for s in [proband or {}] + sorted_samples if s.get('date_data_generation')), {}) + analysed_by = [ + f'{ANALYSIS_DATA_TYPE_LOOKUP[data_type]}: {", ".join(analysed)}' + for data_type, analysed in analysed_by_family_type[family_id].items() + ] inheritance_models = f.pop('inheritance_models', []) f.update({ 'individual_count': len(individuals_by_id), @@ -920,6 +935,7 @@ def _add_row(row, family_id, row_type): 'genes': '; '.join(sorted(f.get('genes', []))), 'actual_inheritance': 'unknown' if inheritance_models == {'unknown'} else ';'.join( sorted([i for i in inheritance_models if i != 'unknown'])), + 'analysed_by': '; '.join(analysed_by), }) return create_json_response({'rows': list(families_by_id.values())}) @@ -933,6 +949,9 @@ def _get_metadata_projects(project_guid, user): return [get_project_and_check_permissions(project_guid, user)] +ANALYSIS_DATA_TYPE_LOOKUP = dict(FamilyAnalysedBy.DATA_TYPE_CHOICES) + + FAMILY_STRUCTURES = { 1: 'singleton', 2: 'duo', diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index db3dbb36f0..704dfd1dc9 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -1221,6 +1221,7 @@ def test_family_metadata(self): 'phenotype_description': None, 'analysisStatus': 'Q', 'analysis_groups': '', + 'analysed_by': '', 'consanguinity': 'Unknown', }) @@ -1234,6 +1235,34 @@ def test_family_metadata(self): 'F000001_1', 'F000002_2', 'F000003_3', 'F000004_4', 'F000005_5', 'F000006_6', 'F000007_7', 'F000008_8', 'F000009_9', 'F000010_10', 'F000011_11', 'F000012_12', 'F000013_13'] self.assertListEqual(sorted([r['familyGuid'] for r in response_json['rows']]), expected_families) + test_row = next(r for r in response_json['rows'] if r['familyGuid'] == 'F000001_1') + self.assertDictEqual(test_row, { + 'projectGuid': 'R0001_1kg', + 'internal_project_id': '1kg project nåme with uniçøde', + 'familyGuid': 'F000001_1', + 'family_id': '1', + 'displayName': '1', + 'solve_status': 'Unsolved', + 'actual_inheritance': 'de novo', + 'date_data_generation': '2017-02-05', + 'data_type': 'WES', + 'proband_id': 'NA19675_1', + 'maternal_id': 'NA19679', + 'paternal_id': 'NA19678', + 'other_individual_ids': '', + 'individual_count': 3, + 'family_structure': 'trio', + 'genes': 'RP11', + 'pmid_id': '34415322', + 'phenotype_description': 'myopathy', + 'analysisStatus': 'Q', + 'analysis_groups': 'Test Group 1', + 'analysed_by': 'WES/WGS: Test No Access User (7/22/2022)', + 'consanguinity': 'Present', + 'condition_id': 'OMIM:615120', + 'known_condition_name': 'Myasthenic syndrome, congenital, 8, with pre- and postsynaptic defects', + 'condition_inheritance': 'Autosomal recessive|X-linked', + }) test_row = next(r for r in response_json['rows'] if r['familyGuid'] == 'F000003_3') self.assertDictEqual(test_row, { 'projectGuid': 'R0001_1kg', @@ -1253,6 +1282,7 @@ def test_family_metadata(self): 'phenotype_description': None, 'analysisStatus': 'Q', 'analysis_groups': 'Accepted; Test Group 1', + 'analysed_by': '', 'consanguinity': 'Unknown', 'condition_id': 'OMIM:615123', 'known_condition_name': '', diff --git a/ui/pages/Report/components/FamilyMetadata.jsx b/ui/pages/Report/components/FamilyMetadata.jsx index 93bfde2892..9f20b94625 100644 --- a/ui/pages/Report/components/FamilyMetadata.jsx +++ b/ui/pages/Report/components/FamilyMetadata.jsx @@ -15,6 +15,7 @@ const COLUMNS = [ { name: 'paternal_id' }, { name: 'maternal_id' }, { name: 'other_individual_ids' }, + { name: 'analysed_by', style: { minWidth: '400px' } }, ] const FamilyMetadata = props => ( From ba4f78a36294c19b43055c1a2035b3efadd2fc19 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 3 Jan 2025 11:46:15 -0500 Subject: [PATCH 13/30] update fixtures (#4563) --- .../families/WGS/F000002_2.ht/.README.txt.crc | Bin 12 -> 12 bytes .../WGS/F000002_2.ht/.metadata.json.gz.crc | Bin 12 -> 12 bytes .../families/WGS/F000002_2.ht/README.txt | 4 +- .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../WGS/F000002_2.ht/metadata.json.gz | Bin 352 -> 351 bytes .../F000002_2.ht/rows/.metadata.json.gz.crc | Bin 16 -> 16 bytes .../WGS/F000002_2.ht/rows/metadata.json.gz | Bin 628 -> 628 bytes ...0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.crc | Bin 0 -> 12 bytes ...0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.crc | Bin 12 -> 0 bytes ...art-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd | Bin 0 -> 208 bytes ...art-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac | Bin 207 -> 0 bytes hail_search/test_search.py | 9 ++-- hail_search/test_utils.py | 45 ++++++++---------- 16 files changed, 27 insertions(+), 31 deletions(-) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/{part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.idx => part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.idx}/.index.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/{part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.idx => part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.idx}/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/{part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.idx => part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.idx}/index (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/{part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.idx => part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.idx}/metadata.json.gz (100%) create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.README.txt.crc index 3b6f654d6c82ab84756a952e252b70193d3aaff9..46b5687838a74b08a409ed7002165d192895e847 100644 GIT binary patch literal 12 TcmYc;N@ieSU}EUF$=Ct_60rjz literal 12 TcmYc;N@ieSU}AVIzG@->6E*{I diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.metadata.json.gz.crc index fee8fcb4687e14806f37b6d919092dba23677b46..63190d7e43b8f26b9be44ce6688ad6f7d0cf61f3 100644 GIT binary patch literal 12 TcmYc;N@ieSU}7-$=y(AD5iA2L literal 12 TcmYc;N@ieSU}E^xX(h*;Qx1;xQ-7G!3aKOlTf5%Fw`dS4pA%DR53_EK+|q$_U=?kmTR1__ZnI<=OS z8_b2;r%_>Y=E2bw_)FHpwT_;%5Pd^QH&XM$MNCSN;L^zIQ|E6g^rL=0%$La_o;-Hk z-G(>_k2k*A5EL%{lC^UFN#B%@2hsF}zDmvfXc)~FZ8l%i1pOfm0k6;X(P)JhM`%UP zH*c1T9qWlKzh+5?tPJpGsFz80e85zn{@>TY*kqXX!s92*xGvf=w1P!fN}9^Otm8Ms xwDwqS0C~7ET*9{Jmdb_mLUAw4cPJqz6lVt^psnq1eukzs`~oCMz-fg7001N_s~!LV literal 352 zcmV-m0iXUKiwFP!000000F9DeZ-OushW|^iwxlyooFBb&n3-j}aJuZq5K_uf$trEy zGDArJ`wj>?z1j^a?|IJC_CQ6pnS}TUjLlR|*k~LKCY0=XCNClB_XfSkp#P8n?+??d z7*CTYLWzMb7{~?K*no^RcYA~{DI$(1sCT6XtbEol)lRB}OSX2V5U#TPppnQ@%Vx&# za)Wu6?;=|0lsm9&4ep$^aIIsf%+qfu*+wc+SjwdY2{xie9Xo$hVej?xZm|mY!Q{T< zZZ^b0IK1)IhLGdpFIg)WAM7=E!GO-5*^5-%jfQl-Y_r8D3(@Z*@A3LV9kj~PVhO9r z>E_Kc=ZAVCtFL+3A!`ky8R~hG9q%z!$N%>=Fg9s!T;}i-#zGbC8Ct=zD}_zvR@U*G yVN`pJ(ttc%8O~wbb4{h0by+T)EZ?AnlrWqfc#pQWzxf%OR{slSOi%iS0ssI=ovz~m diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/.metadata.json.gz.crc index 6a1017e8ebf299656da8b2b4dee4cd373e563000..ebb4a5208f14f44ba59c8d668fbcbd2d3b7faede 100644 GIT binary patch literal 16 XcmYc;N@ieSU}88gY1on_)R+MPAe96? literal 16 XcmYc;N@ieSU}8A2OZP$7&J9}uDVPRw diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/metadata.json.gz index eeeb78776570901cc864e22a18bd0d1ed5646f88..affb24f42675838fede8ae69229c9b0d286e1e1b 100644 GIT binary patch literal 628 zcmV-)0*n10iwFP!000000NqwiZ`wc*{V#iJRXYV^zI?-kAf%{D0rk)bp~X9fZJae) zFI6b=f6uJHZKxC?PejmXAP$)c1~0(ozit@Se5)}o zs5~0`Fye}b0+E?e9V<4n-Uy7Oq{qiRb!GTGP#`1cEC51D4n?5aZ~D_o3mYTXncM{- zRXy9deaO-2GJA?wvDWc7(@|4!um8VH=i^;x@U4D| zL1&EhGcucl?}Kd2{(j=Nt$|yH4C3v0z$z&&(tbh)wWN&(7yKV4^t#z; zmFrL#(C6mLSpQH2*KTWQnB`bvJ_*4CRXfCQZoVh!8V!K@iN)UJyXv0%Tt;fm*-L6F zoG;qDW~ee3BhCkUHa0@vk3bbns1Tu|+3HG<__F^>DHS0#J81#enzdkc#;L*PlNb;` zOhJPm=$rQ$D(NRdT((f+*foCtux@Pjxe-(3!%dgAk>G`rT_$y<4~q%Zymzj=sk^EA zdZ%8J@*g3e(Y!dccb-Ve49e-$tHeYX-&3jXxIXCd?AI@5%e1Y5Z4IeyLf-<@?%K?x zEzdUl8*e}vbXnhL$-UsuoQf^D;XyFH{zM{$6|}JQ7J(0`hq0@-lWnAvdzd)wmfab~ z<|P5j3s< literal 628 zcmV-)0*n10iwFP!000000NqwiZ{jcz{VzW4YLhMr5J7H0DMG4N-GcV86+*5(0dG?W zIc8NB<-d2F?*vvVm3k=$l)M?wyf=Q4eI$}T1M$gRa(DrL|Lc0O1LhKote@aO{09<7 zY#-x04MPYaCMyiFhyyc;#6K1lmWLhiNg2Q_Fc3z*O~*8bg;sl}k7+r-0kKGGu`!sC zR2{5c6bmgPiO5{)uA5kSZv;k4(ZfTLy0QWhYEZF%5&)@{fHF|;H~Z;lg{{%ppWcKK z)n3oKzAMn#GJi@|iP7;l*U?aLYyQ8??kC&s;A=@RYlewJZYpvM&?Q#zDl>CNC7~Lh zlFkI{=VTuDK>(^X``hW*aR=inWD&L%A+M#3k@gcZXe2!va3TI-g4geNz0+mCW6jg0 z7p_BLK%d(y6Z1n6!nm!aVP0UV`4ohARBsW#y851`8#DwOq!xRV@9KLV2o>wK;4hh} zaK32ohM}rljQAhu*~E&1AO>ABp;AVg=BsNxlJownq|}5o>|_L7YSxm~38xmDPhvp) zFeMFtpl{x1sI;E~dEP>)W7qio!@9NEr$)?>4>w)bMgfZyKTqmHAC?nnc<)qsTX$Ra zv#x8C270R!N8{zN1Rt2y@Wm*>7I#j?KLR=PvEq9qJ71p38^! zkbADpdja!$p67HZOYa4L?y20sIND2Y)*nf%v4Rnn-6HTI^DuU0Tjd~~!o$ROI=${N zsV*AWe>B025y34dAo&~X44uB?cmuClYKb|sw33i4+;cQ{N)5F`iA_xtgCs@`)moWe ON%9JAp*YYd2><}TI47I{ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.crc new file mode 100644 index 0000000000000000000000000000000000000000..ff3efb74115b772fcecd4ef72b1a35c05f868e4c GIT binary patch literal 12 TcmYc;N@ieSU}D()?%Oc{73c(@ literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.crc deleted file mode 100644 index 4155311e5a4dac9c8a79c47c1560d5917061165e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}88fdg&Yh6HNoR diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd new file mode 100644 index 0000000000000000000000000000000000000000..e03622879d36a95fdee5a706a94fdcb5b53b3eb1 GIT binary patch literal 208 zcmdnZz`*bWh&8tA|5f-=%F58f!N|apoKa+WJdlaW(V5Yafti6pgu#Y|A(=sf=Yc(! z$l+tVg_sya7~M4#pV%v~e6Uw({CGi|iP1e-f8pMiZ$h{1rFj^VYp?MxJCT0ZUKOZ7 zgQso#MQujuu)yG;kfOxA Date: Fri, 3 Jan 2025 14:59:01 -0500 Subject: [PATCH 14/30] add loading failed analysis status --- .../0079_alter_family_analysis_status.py | 18 ++++++++++++++++++ seqr/models.py | 2 ++ seqr/views/utils/dataset_utils.py | 3 ++- ui/shared/utils/constants.js | 2 ++ 4 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 seqr/migrations/0079_alter_family_analysis_status.py diff --git a/seqr/migrations/0079_alter_family_analysis_status.py b/seqr/migrations/0079_alter_family_analysis_status.py new file mode 100644 index 0000000000..f8922c0148 --- /dev/null +++ b/seqr/migrations/0079_alter_family_analysis_status.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.16 on 2025-01-03 19:55 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('seqr', '0078_rename_submit_to_clinvar_variantnote_report'), + ] + + operations = [ + migrations.AlterField( + model_name='family', + name='analysis_status', + field=models.CharField(choices=[('S', 'S'), ('S_kgfp', 'S'), ('S_kgdp', 'S'), ('S_ng', 'S'), ('ES', 'E'), ('Sc_kgfp', 'S'), ('Sc_kgdp', 'S'), ('Sc_ng', 'S'), ('Rcpc', 'R'), ('Rncc', 'R'), ('C', 'C'), ('PB', 'P'), ('P', 'P'), ('I', 'A'), ('Q', 'W'), ('F', 'L'), ('N', 'N')], default='Q', max_length=10), + ), + ] diff --git a/seqr/models.py b/seqr/models.py index 1dd9135acc..b52c4a8cca 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -286,6 +286,7 @@ class Family(ModelWithGUID): ANALYSIS_STATUS_PARTIAL_SOLVE = 'P' ANALYSIS_STATUS_PROBABLE_SOLVE = 'PB' ANALYSIS_STATUS_WAITING_FOR_DATA='Q' + ANALYSIS_STATUS_LOADING_FAILED = 'F' SOLVED_ANALYSIS_STATUS_CHOICES = ( ('S', 'Solved'), ('S_kgfp', 'Solved - known gene for phenotype'), @@ -308,6 +309,7 @@ class Family(ModelWithGUID): (ANALYSIS_STATUS_PARTIAL_SOLVE, 'Partial Solve - Analysis in Progress'), (ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS, 'Analysis in Progress'), (ANALYSIS_STATUS_WAITING_FOR_DATA, 'Waiting for data'), + (ANALYSIS_STATUS_LOADING_FAILED, 'Loading failed'), ('N', 'No data expected'), ) SOLVED_ANALYSIS_STATUSES = [status for status, _ in SOLVED_ANALYSIS_STATUS_CHOICES] diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py index c113e9985f..991856f1ba 100644 --- a/seqr/views/utils/dataset_utils.py +++ b/seqr/views/utils/dataset_utils.py @@ -216,7 +216,8 @@ def match_and_update_search_samples( updated_samples = Sample.objects.filter(guid__in=activated_sample_guids) family_guids_to_update = [ - family_guid for family_guid, analysis_status in included_families.items() if analysis_status == Family.ANALYSIS_STATUS_WAITING_FOR_DATA + family_guid for family_guid, analysis_status in included_families.items() + if analysis_status in {Family.ANALYSIS_STATUS_WAITING_FOR_DATA, Family.ANALYSIS_STATUS_LOADING_FAILED} ] Family.bulk_update( user, {'analysis_status': Family.ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS}, guid__in=family_guids_to_update) diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 2e92d2746c..de87f40d4c 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -164,6 +164,7 @@ const FAMILY_STATUS_CLOSED = 'C' const FAMILY_STATUS_PARTIAL_SOLVE = 'P' const FAMILY_STATUS_ANALYSIS_IN_PROGRESS = 'I' const FAMILY_STATUS_WAITING_FOR_DATA = 'Q' +const FAMILY_STATUS_LOADING_FAILED = 'F' const FAMILY_STATUS_NO_DATA = 'N' const DEPRECATED_FAMILY_ANALYSIS_STATUS_OPTIONS = [ @@ -184,6 +185,7 @@ export const SELECTABLE_FAMILY_ANALYSIS_STATUS_OPTIONS = [ { value: FAMILY_STATUS_PARTIAL_SOLVE, color: '#288582', name: 'Partial Solve - Analysis in Progress' }, { value: FAMILY_STATUS_ANALYSIS_IN_PROGRESS, color: '#4682B4', name: 'Analysis in Progress' }, { value: FAMILY_STATUS_WAITING_FOR_DATA, color: '#FFC107', name: 'Waiting for data' }, + { value: FAMILY_STATUS_LOADING_FAILED, color: '#ba4c12', name: 'Loading failed' }, { value: FAMILY_STATUS_NO_DATA, color: '#646464', name: 'No data expected' }, ] export const ALL_FAMILY_ANALYSIS_STATUS_OPTIONS = [ From 583b82a89684f86c87324249f8ccff75c8cf36f6 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 3 Jan 2025 15:10:50 -0500 Subject: [PATCH 15/30] update failed analysis status for failed families --- .../commands/check_for_new_samples_from_pipeline.py | 5 +++++ .../tests/check_for_new_samples_from_pipeline_tests.py | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index 243bd092dd..1f66958f69 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -166,6 +166,11 @@ def _load_new_samples(cls, metadata_path, genome_version, dataset_type, run_vers failed_families_by_guid = {f['guid']: f for f in Family.objects.filter( guid__in={family for families in failed_family_samples.values() for family in families} ).values('guid', 'family_id', 'project__name')} + if failed_families_by_guid: + Family.bulk_update( + user=None, update_json={'analysis_status': Family.ANALYSIS_STATUS_LOADING_FAILED}, + guid__in=failed_families_by_guid, analysis_status=Family.ANALYSIS_STATUS_WAITING_FOR_DATA + ) failures_by_project_check = defaultdict(lambda: defaultdict(list)) for check, check_failures in failed_family_samples.items(): for family_guid, failure_data in check_failures.items(): diff --git a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py index 93d701a1f3..d05bfdbc64 100644 --- a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py +++ b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py @@ -462,6 +462,10 @@ def test_command(self, mock_email, mock_airtable_utils): {'analysis_status': 'I', 'analysis_status_last_modified_date': None}, {'analysis_status': 'I', 'analysis_status_last_modified_date': None}, ]) + self.assertSetEqual( + set(Family.objects.filter(guid__in=['F000001_1', 'F000002_2', 'F000003_3']).values_list('analysis_status', flat=True)), + {'F'}, + ) self.assertEqual(Family.objects.get(guid='F000014_14').analysis_status, 'Rncc') # Test airtable PDO updates From e5adc01ba41688773df5b807f381137c4c8dd42d Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 3 Jan 2025 15:31:37 -0500 Subject: [PATCH 16/30] show family sort in search ui --- ui/shared/utils/constants.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 2e92d2746c..3719dcc7b7 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1395,7 +1395,7 @@ const VARIANT_SORT_OPTONS = [ ), }, ] -const VARIANT_SEARCH_SORT_OPTONS = VARIANT_SORT_OPTONS.slice(1, VARIANT_SORT_OPTONS.length - 1) +const VARIANT_SEARCH_SORT_OPTONS = VARIANT_SORT_OPTONS.slice(0, VARIANT_SORT_OPTONS.length - 1) export const VARIANT_SORT_LOOKUP = VARIANT_SORT_OPTONS.reduce( (acc, opt) => ({ From 3e9e2f27832f30c8eddd1c1d6dffa40c7b5c0994 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 6 Jan 2025 12:39:45 -0500 Subject: [PATCH 17/30] add family sort --- hail_search/queries/base.py | 3 +++ hail_search/test_search.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index a518120408..0d2f7dc49d 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -85,6 +85,9 @@ class BaseHailTableQuery(object): SORTS = { XPOS: lambda r: [r.xpos], + 'family_guid': lambda r: [ + hl.int(r.family_entries.find(hl.is_defined).first().familyGuid.first_match_in('(\d+)').first()) + ], } @classmethod diff --git a/hail_search/test_search.py b/hail_search/test_search.py index dede157102..f8c19ca847 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -1242,6 +1242,12 @@ async def test_sort(self): sort_metadata={'ENSG00000177000': 3}, ) + await self._assert_expected_search( + [_sorted(MULTI_PROJECT_VARIANT1, [2]), _sorted(MULTI_PROJECT_VARIANT2, [2]), + _sorted(VARIANT3, [2]), _sorted(VARIANT4, [2]), _sorted(PROJECT_2_VARIANT, [11])], + sort='family_guid', sample_data=MULTI_PROJECT_SAMPLE_DATA, + ) + # size sort only applies to SVs, so has no impact on other variant await self._assert_expected_search( [_sorted(GCNV_VARIANT1, [-171766]), _sorted(GCNV_VARIANT2, [-17768]), _sorted(GCNV_VARIANT4, [-14487]), From c072b2fe72423b6c7befe90098303b5d5010df94 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 6 Jan 2025 13:58:11 -0500 Subject: [PATCH 18/30] remove clinvar tag description --- seqr/fixtures/variant_tag_types.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/fixtures/variant_tag_types.json b/seqr/fixtures/variant_tag_types.json index 8eaae2bdcd..ddc6ed8fbd 100644 --- a/seqr/fixtures/variant_tag_types.json +++ b/seqr/fixtures/variant_tag_types.json @@ -480,7 +480,7 @@ "project": null, "name": "Submit to Clinvar", "category": "Data Sharing", - "description": "By selecting this tag, you are notifying CMG staff that this variant should be submitted to ClinVar. Generally, this is for pathogenic or likely pathogenic variants in known disease genes or for any benign or likely benign variants that are incorrectly annotated in ClinVar. Please also add a note that describes supporting evidence for how you interpreted this variant.", + "description": "", "color": "#8A62AE", "order": 25.0 } From 2c534dfac73049ee8cb645d75eb3c408b78df9ee Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 7 Jan 2025 17:11:28 -0500 Subject: [PATCH 19/30] shared notification url formatting --- ...eck_for_new_samples_from_pipeline_tests.py | 14 +++---- seqr/utils/communication_utils.py | 20 +++++++++- seqr/utils/search/add_data_utils.py | 40 +++++++------------ seqr/views/apis/data_manager_api.py | 9 +---- seqr/views/apis/data_manager_api_tests.py | 24 +++++------ seqr/views/apis/dataset_api_tests.py | 4 +- seqr/views/utils/dataset_utils.py | 18 +++------ 7 files changed, 59 insertions(+), 70 deletions(-) diff --git a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py index d05bfdbc64..c2cb446483 100644 --- a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py +++ b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py @@ -44,12 +44,12 @@ f'
Let us know if you have any questions.

All the best,
The seqr team' INTERNAL_TEXT_EMAIL = """Dear seqr user, -This is to notify you that 2 new WES samples have been loaded in seqr project Test Reprocessed Project +This is to notify you that data for 2 new WES samples has been loaded in seqr project Test Reprocessed Project All the best, The seqr team""" INTERNAL_HTML_EMAIL = f'Dear seqr user,

' \ - f'This is to notify you that 2 new WES samples have been loaded in seqr project ' \ + f'This is to notify you that data for 2 new WES samples has been loaded in seqr project ' \ f'Test Reprocessed Project' \ f'

All the best,
The seqr team' @@ -195,7 +195,7 @@ def mock_metadata_file(index): @mock.patch('seqr.utils.file_utils.os.path.isfile', lambda *args: True) @mock.patch('seqr.utils.search.hail_search_utils.HAIL_BACKEND_SERVICE_HOSTNAME', MOCK_HAIL_HOST) @mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_URL', 'http://testairtable') -@mock.patch('seqr.utils.search.add_data_utils.BASE_URL', SEQR_URL) +@mock.patch('seqr.utils.communication_utils.BASE_URL', SEQR_URL) @mock.patch('seqr.utils.search.add_data_utils.SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL', 'anvil-data-loading') @mock.patch('seqr.utils.search.add_data_utils.SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL', 'seqr-data-loading') class CheckNewSamplesTest(AnvilAuthenticationTestCase): @@ -548,11 +548,11 @@ def test_command(self, mock_email, mock_airtable_utils): self.mock_send_slack.assert_has_calls([ mock.call( 'seqr-data-loading', - f'2 new WES samples are loaded in {SEQR_URL}project/{PROJECT_GUID}/project_page\n```NA20888, NA20889```', + f'2 new WES samples are loaded in <{SEQR_URL}project/{PROJECT_GUID}/project_page|Test Reprocessed Project>\n```NA20888, NA20889```', ), mock.call( 'anvil-data-loading', - f'1 new WES samples are loaded in {SEQR_URL}project/{EXTERNAL_PROJECT_GUID}/project_page', + f'1 new WES samples are loaded in <{SEQR_URL}project/{EXTERNAL_PROJECT_GUID}/project_page|Non-Analyst Project>', ), mock.call( 'seqr_loading_notifications', @@ -592,10 +592,10 @@ def test_command(self, mock_email, mock_airtable_utils): ), mock.call( 'seqr-data-loading', - f'1 new WES SV samples are loaded in {SEQR_URL}project/R0001_1kg/project_page\n```NA20872```', + f'1 new WES SV samples are loaded in <{SEQR_URL}project/R0001_1kg/project_page|1kg project nåme with uniçøde>\n```NA20872```', ), mock.call( 'seqr-data-loading', - f'1 new WES SV samples are loaded in {SEQR_URL}project/{PROJECT_GUID}/project_page\n```NA20889```', + f'1 new WES SV samples are loaded in <{SEQR_URL}project/{PROJECT_GUID}/project_page|Test Reprocessed Project>\n```NA20889```', ), ]) diff --git a/seqr/utils/communication_utils.py b/seqr/utils/communication_utils.py index 14a6d24aa4..008f2917cd 100644 --- a/seqr/utils/communication_utils.py +++ b/seqr/utils/communication_utils.py @@ -7,6 +7,7 @@ from notifications.signals import notify BASE_EMAIL_TEMPLATE = 'Dear seqr user,\n\n{}\n\nAll the best,\nThe seqr team' +EMAIL_MESSAGE_TEMPLATE = 'This is to notify you that data for {notification} has been loaded in seqr project {project_link}' logger = logging.getLogger(__name__) @@ -55,9 +56,16 @@ def send_html_email(email_body, process_message=None, **kwargs): email_message.send() -def send_project_notification(project, notification, email, subject): +def send_project_notification(project, notification, subject, notification_prefix='Loaded ', email_template=None, slack_channel=None, slack_detail=None): users = project.subscribers.user_set.all() - notify.send(project, recipient=users, verb=notification) + notify.send(project, recipient=users, verb=f'{notification_prefix}{notification}') + + url = f'{BASE_URL}project/{project.guid}/project_page' + + email = (email_template or EMAIL_MESSAGE_TEMPLATE).format( + notification=notification, + project_link=f'{project.name}', + ) email_kwargs = dict( email_body=BASE_EMAIL_TEMPLATE.format(email), to=list(users.values_list('email', flat=True)), @@ -69,6 +77,14 @@ def send_project_notification(project, notification, email, subject): except Exception as e: logger.error(f'Error sending project email for {project.guid}: {e}', extra={'detail': email_kwargs}) + if slack_channel: + slack_message = f'{notification} are loaded in <{url}|{project.name}>' + if slack_detail: + slack_message += f'\n```{slack_detail}```' + safe_post_to_slack(slack_channel, slack_message) + + return url + def _set_bulk_notification_stream(message): set_email_message_stream(message, 'seqr-notifications') diff --git a/seqr/utils/search/add_data_utils.py b/seqr/utils/search/add_data_utils.py index 2864eb3823..c54389cbd2 100644 --- a/seqr/utils/search/add_data_utils.py +++ b/seqr/utils/search/add_data_utils.py @@ -4,7 +4,7 @@ from reference_data.models import GENOME_VERSION_LOOKUP from seqr.models import Sample, Individual, Project -from seqr.utils.communication_utils import send_project_notification, safe_post_to_slack +from seqr.utils.communication_utils import send_project_notification from seqr.utils.logging_utils import SeqrLogger from seqr.utils.middleware import ErrorsWarningsException from seqr.utils.search.utils import backend_specific_call @@ -13,7 +13,7 @@ from seqr.views.utils.dataset_utils import match_and_update_search_samples, load_mapping_file from seqr.views.utils.export_utils import write_multiple_files from seqr.views.utils.pedigree_info_utils import get_no_affected_families -from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, BASE_URL, ANVIL_UI_URL, \ +from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, ANVIL_UI_URL, \ SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL logger = SeqrLogger(__name__) @@ -58,55 +58,43 @@ def add_new_es_search_samples(request_json, project, user, notify=False, expecte return inactivated_sample_guids, updated_family_guids, updated_samples -def _format_email(sample_summary, project_link, *args): - return f'This is to notify you that {sample_summary} have been loaded in seqr project {project_link}' - - -def _basic_notify_search_data_loaded(project, dataset_type, sample_type, inactivated_sample_guids, updated_samples, format_email=_format_email): +def _basic_notify_search_data_loaded(project, dataset_type, sample_type, inactivated_sample_guids, updated_samples, format_email=None, slack_channel=None, include_slack_detail=False): previous_loaded_individuals = set(Sample.objects.filter(guid__in=inactivated_sample_guids).values_list('individual_id', flat=True)) new_sample_ids = [sample['sample_id'] for sample in updated_samples if sample['individual_id'] not in previous_loaded_individuals] - url = f'{BASE_URL}project/{project.guid}/project_page' msg_dataset_type = '' if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS else f' {dataset_type}' num_new_samples = len(new_sample_ids) sample_summary = f'{num_new_samples} new {sample_type}{msg_dataset_type} samples' - project_link = f'{project.name}' - email = format_email(sample_summary, project_link, num_new_samples) - - send_project_notification( + return send_project_notification( project, - notification=f'Loaded {sample_summary}', - email=email, + notification=sample_summary, + email_template=format_email(num_new_samples) if format_email else None, subject='New data available in seqr', + slack_channel=slack_channel, + slack_detail=', '.join(sorted(new_sample_ids)) if include_slack_detail else None, ) - return sample_summary, new_sample_ids, url - def notify_search_data_loaded(project, is_internal, dataset_type, sample_type, inactivated_sample_guids, updated_samples, num_samples): if is_internal: - format_email = _format_email + format_email = None else: workspace_name = f'{project.workspace_namespace}/{project.workspace_name}' - def format_email(sample_summary, project_link, num_new_samples): + def format_email(num_new_samples): reload_summary = f' and {num_samples - num_new_samples} re-loaded samples' if num_samples > num_new_samples else '' return '\n'.join([ f'We are following up on the request to load data from AnVIL on {project.created_date.date().strftime("%B %d, %Y")}.', - f'We have loaded {sample_summary}{reload_summary} from the AnVIL workspace {workspace_name} to the corresponding seqr project {project_link}.', + f'We have loaded {{notification}}{reload_summary} from the AnVIL workspace {workspace_name} to the corresponding seqr project {{project_link}}.', 'Let us know if you have any questions.', ]) - sample_summary, new_sample_ids, url = _basic_notify_search_data_loaded( + url = _basic_notify_search_data_loaded( project, dataset_type, sample_type, inactivated_sample_guids, updated_samples, format_email=format_email, + slack_channel=SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL if is_internal else SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL, + include_slack_detail=is_internal, ) - sample_id_list = f'\n```{", ".join(sorted(new_sample_ids))}```' if is_internal else '' - summary_message = f'{sample_summary} are loaded in {url}{sample_id_list}' - safe_post_to_slack( - SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL if is_internal else SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL, - summary_message) - if not is_internal: AirtableSession(user=None, base=AirtableSession.ANVIL_BASE, no_auth=True).safe_patch_records( ANVIL_REQUEST_TRACKING_TABLE, max_records=1, diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 71a93b96d3..cd7a2f88c3 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -346,16 +346,9 @@ def load_rna_seq_sample_data(request, sample_guid): def _notify_phenotype_prioritization_loaded(project, tool, num_samples): - url = f'{BASE_URL}project/{project.guid}/project_page' - project_link = f'{project.name}' - email = ( - f'This is to notify you that {tool.title()} data for {num_samples} sample(s) ' - f'has been loaded in seqr project {project_link}' - ) send_project_notification( project, - notification=f'Loaded {num_samples} {tool.title()} sample(s)', - email=email, + notification=f'{num_samples} {tool.title()} sample(s)', subject=f'New {tool.title()} data available in seqr', ) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index b07f6c82e9..8cd9527b91 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -936,11 +936,11 @@ def test_update_rna_tpm(self, *args, **kwargs): def test_update_rna_splice_outlier(self, *args, **kwargs): self._test_update_rna_seq('splice_outlier', *args, **kwargs) - @mock.patch('seqr.views.utils.dataset_utils.BASE_URL', 'https://test-seqr.org/') + @mock.patch('seqr.utils.communication_utils.BASE_URL', 'https://test-seqr.org/') @mock.patch('seqr.views.utils.dataset_utils.SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL', 'seqr-data-loading') @mock.patch('seqr.views.utils.file_utils.tempfile.gettempdir', lambda: 'tmp/') @mock.patch('seqr.utils.communication_utils.send_html_email') - @mock.patch('seqr.views.utils.dataset_utils.safe_post_to_slack') + @mock.patch('seqr.utils.communication_utils.safe_post_to_slack') @mock.patch('seqr.views.apis.data_manager_api.datetime') @mock.patch('seqr.views.apis.data_manager_api.os.mkdir') @mock.patch('seqr.views.apis.data_manager_api.os.rename') @@ -1099,10 +1099,10 @@ def _test_basic_data_loading(data, num_parsed_samples, num_loaded_samples, new_s mock_send_slack.assert_has_calls([ mock.call( 'seqr-data-loading', - f'0 new RNA {params["message_data_type"]} samples are loaded in \n``````', + f'0 new RNA {params["message_data_type"]} sample(s) are loaded in ', ), mock.call( 'seqr-data-loading', - f'1 new RNA {params["message_data_type"]} samples are loaded in \n```NA20888```', + f'1 new RNA {params["message_data_type"]} sample(s) are loaded in \n```NA20888```', ), ]) self.assertEqual(mock_send_email.call_count, 2) @@ -1262,7 +1262,7 @@ def test_load_rna_seq_sample_data(self): def _join_data(cls, data): return ['\t'.join(line).encode('utf-8') for line in data] - @mock.patch('seqr.views.apis.data_manager_api.BASE_URL', 'https://test-seqr.org/') + @mock.patch('seqr.utils.communication_utils.BASE_URL', 'https://test-seqr.org/') @mock.patch('seqr.models.random') @mock.patch('seqr.utils.communication_utils.send_html_email') @mock.patch('seqr.utils.file_utils.subprocess.Popen') @@ -1345,10 +1345,10 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess, mock_send_ema self.assertListEqual(saved_data, EXPECTED_LIRICAL_DATA) mock_subprocess.assert_called_with('gsutil cat gs://seqr_data/lirical_data.tsv.gz | gunzip -c -q - ', stdout=-1, stderr=-2, shell=True) # nosec self._assert_expected_notifications(mock_send_email, [ - {'data_type': 'Lirical', 'user': self.data_manager_user, 'email_body': 'Lirical data for 1 sample(s)'}, - {'data_type': 'Lirical', 'user': self.data_manager_user, 'email_body': 'Lirical data for 1 sample(s)', + {'data_type': 'Lirical', 'user': self.data_manager_user, 'email_body': 'data for 1 Lirical sample(s)'}, + {'data_type': 'Lirical', 'user': self.data_manager_user, 'email_body': 'data for 1 Lirical sample(s)', 'project_guid': 'R0003_test', 'project_name': 'Test Reprocessed Project'} - ], has_html=True) + ]) # Test uploading new data self.reset_logs() @@ -1376,17 +1376,17 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess, mock_send_ema nested_fields=[{'fields': ('individual', 'guid'), 'key': 'individualGuid'}]) self.assertListEqual(saved_data, EXPECTED_UPDATED_LIRICAL_DATA) self._assert_expected_notifications(mock_send_email, [ - {'data_type': 'Lirical', 'user': self.data_manager_user, 'email_body': 'Lirical data for 2 sample(s)'}, - ], has_html=True) + {'data_type': 'Lirical', 'user': self.data_manager_user, 'email_body': 'data for 2 Lirical sample(s)'}, + ]) @staticmethod - def _assert_expected_notifications(mock_send_email, expected_notifs: list[dict], has_html=False): + def _assert_expected_notifications(mock_send_email, expected_notifs: list[dict]): calls = [] for notif_dict in expected_notifs: project_guid = notif_dict.get('project_guid', PROJECT_GUID) project_name = notif_dict.get('project_name', '1kg project nåme with uniçøde') url = f'https://test-seqr.org/project/{project_guid}/project_page' - project_link = f'{project_name}' if has_html else f'<{url}|{project_name}>' + project_link = f'{project_name}' expected_email_body = ( f'Dear seqr user,\n\nThis is to notify you that {notif_dict["email_body"]} ' f'has been loaded in seqr project {project_link}\n\nAll the best,\nThe seqr team' diff --git a/seqr/views/apis/dataset_api_tests.py b/seqr/views/apis/dataset_api_tests.py index 721018a6f6..a3a88e6ad8 100644 --- a/seqr/views/apis/dataset_api_tests.py +++ b/seqr/views/apis/dataset_api_tests.py @@ -48,7 +48,7 @@ class DatasetAPITest(object): @mock.patch('seqr.models.random.randint') @mock.patch('seqr.utils.communication_utils.logger') @mock.patch('seqr.utils.communication_utils.send_html_email') - @mock.patch('seqr.utils.search.add_data_utils.BASE_URL', 'https://seqr.broadinstitute.org/') + @mock.patch('seqr.utils.communication_utils.BASE_URL', 'https://seqr.broadinstitute.org/') @urllib3_responses.activate def test_add_variants_dataset(self, mock_send_email, mock_logger, mock_random): url = reverse(add_variants_dataset_handler, args=[PROJECT_GUID]) @@ -269,7 +269,7 @@ def _assert_expected_notification(self, mock_send_email, sample_type, count, ema project_guid=PROJECT_GUID, project_name='1kg project nåme with uniçøde', recipient='test_user_manager@test.com'): if not email_content: - email_content = f'This is to notify you that {count} new {sample_type} samples have been loaded in seqr project {project_name}' + email_content = f'This is to notify you that data for {count} new {sample_type} samples has been loaded in seqr project {project_name}' mock_send_email.assert_called_once_with( email_body=f'Dear seqr user,\n\n{email_content}\n\nAll the best,\nThe seqr team', subject='New data available in seqr', to=[recipient], process_message=mock.ANY, diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py index 991856f1ba..f75e93eb00 100644 --- a/seqr/views/utils/dataset_utils.py +++ b/seqr/views/utils/dataset_utils.py @@ -5,7 +5,7 @@ from tqdm import tqdm from seqr.models import Sample, Individual, Family, Project, RnaSample, RnaSeqOutlier, RnaSeqTpm, RnaSeqSpliceOutlier -from seqr.utils.communication_utils import safe_post_to_slack, send_project_notification +from seqr.utils.communication_utils import send_project_notification from seqr.utils.file_utils import file_iter from seqr.utils.logging_utils import SeqrLogger from seqr.utils.middleware import ErrorsWarningsException @@ -14,7 +14,7 @@ from seqr.views.utils.permissions_utils import get_internal_projects from seqr.views.utils.json_utils import _to_snake_case, _to_camel_case from reference_data.models import GeneInfo -from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, BASE_URL +from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL logger = SeqrLogger(__name__) @@ -558,20 +558,12 @@ def _notify_rna_loading(model_cls, sample_projects, internal_projects): data_type = RNA_MODEL_DISPLAY_NAME[model_cls] for project_agg in sample_projects: new_ids = project_agg["new_sample_ids"] - project_link = f'<{BASE_URL}project/{project_agg["guid"]}/project_page|{project_agg["name"]}>' - safe_post_to_slack( - SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, - f'{len(new_ids)} new RNA {data_type} samples are loaded in {project_link}\n```{", ".join(new_ids)}```' - ) - email = ( - f'This is to notify you that data for {len(new_ids)} new RNA {data_type} sample(s) ' - f'has been loaded in seqr project {project_link}' - ) send_project_notification( project=projects_by_name[project_agg["name"]], - notification=f'Loaded {len(new_ids)} new RNA {data_type} sample(s)', - email=email, + notification=f'{len(new_ids)} new RNA {data_type} sample(s)', subject=f'New RNA {data_type} data available in seqr', + slack_channel=SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, + slack_detail=', '.join(new_ids), ) From 407f81348f09270fbfd4e9fa85517086590c332c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 8 Jan 2025 11:03:06 -0500 Subject: [PATCH 20/30] better identity check for saved searches with gene list --- ui/pages/Search/components/SavedSearch.jsx | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ui/pages/Search/components/SavedSearch.jsx b/ui/pages/Search/components/SavedSearch.jsx index 5ae575497f..a3b163e422 100644 --- a/ui/pages/Search/components/SavedSearch.jsx +++ b/ui/pages/Search/components/SavedSearch.jsx @@ -1,4 +1,5 @@ import React from 'react' +import isEqual from 'lodash/isEqual' import PropTypes from 'prop-types' import { connect } from 'react-redux' import { FormSpy } from 'react-final-form' @@ -31,11 +32,17 @@ const FormButtonContainer = styled.div` const SUBSCRIPTION = { values: true } +const isSameSearch = ({ locus: locus1, ...search1 }, { locus: locus2, ...search2 }) => ( + isEqual(search1, search2) && ( + locus1?.locusListGuid ? locus1.locusListGuid === locus2.locusListGuid : isEqual(locus1, locus2) + ) +) + const CurrentSavedSearchProvider = ({ element, ...props }) => ( {({ values }) => { const currentSavedSearch = values.search && Object.values(props.savedSearchesByGuid).find( - ({ search }) => search === values.search, + ({ search }) => isSameSearch(search, values.search), ) return React.createElement(element, { currentSavedSearch, search: values.search, ...props }) }} From cbf2d0c0dac1684c77204ad1f712b58c145d7368 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 8 Jan 2025 11:21:31 -0500 Subject: [PATCH 21/30] add dropdown for loadable vcf --- .../DataManagement/components/LoadData.jsx | 62 ++++++++++++------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/ui/pages/DataManagement/components/LoadData.jsx b/ui/pages/DataManagement/components/LoadData.jsx index 6dc9bdcdba..e9ad6d7494 100644 --- a/ui/pages/DataManagement/components/LoadData.jsx +++ b/ui/pages/DataManagement/components/LoadData.jsx @@ -48,32 +48,43 @@ const LoadedProjectOptions = props => ( ) +const FILE_PATH_FIELD = { + name: 'filePath', + validate: validators.required, +} + +const CALLSET_PAGE_FIELDS = [ + { + name: 'skipValidation', + label: 'Skip Callset Validation', + component: InlineToggle, + asFormInput: true, + }, + { + ...GENOME_VERSION_FIELD, + component: ButtonRadioGroup, + validate: validators.required, + }, + { + name: 'sampleType', + label: 'Sample Type', + component: ButtonRadioGroup, + options: [SAMPLE_TYPE_EXOME, SAMPLE_TYPE_GENOME].map(value => ({ value, text: value })), + validate: validators.required, + }, +] + const CALLSET_PAGE = { fields: [ { - name: 'filePath', - label: 'Callset File Path', - placeholder: 'gs://', - validate: validators.required, - }, - { - name: 'skipValidation', - label: 'Skip Callset Validation', - component: InlineToggle, - asFormInput: true, - }, - { - ...GENOME_VERSION_FIELD, - component: ButtonRadioGroup, - validate: validators.required, - }, - { - name: 'sampleType', - label: 'Sample Type', - component: ButtonRadioGroup, - options: [SAMPLE_TYPE_EXOME, SAMPLE_TYPE_GENOME].map(value => ({ value, text: value })), - validate: validators.required, + label: 'VCF', + component: LoadOptionsSelect, + url: '/api/data_management/loading_vcfs', + optionsResponseKey: 'vcfs', + validationErrorMessage: 'No VCFs found in the loading datasets directory', + ...FILE_PATH_FIELD, }, + ...CALLSET_PAGE_FIELDS, ], submitUrl: '/api/data_management/validate_callset', } @@ -81,7 +92,12 @@ const CALLSET_PAGE = { const MULTI_DATA_TYPE_CALLSET_PAGE = { ...CALLSET_PAGE, fields: [ - ...CALLSET_PAGE.fields, + { + label: 'Callset File Path', + placeholder: 'gs://', + ...FILE_PATH_FIELD, + }, + ...CALLSET_PAGE_FIELDS, { name: 'datasetType', label: 'Dataset Type', From 272052fcc4ce834a4b6467db5c61ccb606e7cec9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 8 Jan 2025 11:22:57 -0500 Subject: [PATCH 22/30] update docs --- deploy/LOCAL_INSTALL_HELM.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/LOCAL_INSTALL_HELM.md b/deploy/LOCAL_INSTALL_HELM.md index 4fe8122b6b..1c1bd417da 100644 --- a/deploy/LOCAL_INSTALL_HELM.md +++ b/deploy/LOCAL_INSTALL_HELM.md @@ -76,7 +76,7 @@ loading_pipeline_queue test.vcf.gz ``` - In the top header of *seqr*, click on the **Data Management** button. - In the subheader, click on **Load Data**. -- Type the name of the callset path into the **Callset File Path** text box (without the directory prefix), and select the appropriate Sample Type (WES/WGS) and Genome Version (GRCh37/GRCh38) for your project. The pipeline includes a sequence of validation steps to insure the validity of your VCF, but these may be skipped by enabling the **Skip Callset Validation**option. We strongly recommend leaving validation enabled to ensure the quality of your analysis. +- Select your VCF from the dropdown and select the appropriate Sample Type (WES/WGS) and Genome Version (GRCh37/GRCh38) for your project. The pipeline includes a sequence of validation steps to insure the validity of your VCF, but these may be skipped by enabling the **Skip Callset Validation**option. We strongly recommend leaving validation enabled to ensure the quality of your analysis. - Click through to the next page and select your project from the **Projects to Load** dropdown, then click **Submit**. - If you wish to check the status of the loading request, you can click through to the **Pipeline Status** tab to view the loading pipeline interface. - Data should be loaded into the search backend automatically, usually within a few hours. From dddd9304965c36448dd9aac4f8d4c49b51181413 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 8 Jan 2025 12:01:30 -0500 Subject: [PATCH 23/30] load availabel VCFS from loading dir --- seqr/urls.py | 3 +- seqr/utils/file_utils.py | 15 ++++----- seqr/utils/vcf_utils.py | 26 ++++++++++++++++ seqr/views/apis/anvil_workspace_api.py | 43 ++++++-------------------- seqr/views/apis/data_manager_api.py | 9 +++++- 5 files changed, 54 insertions(+), 42 deletions(-) diff --git a/seqr/urls.py b/seqr/urls.py index 296d496880..126ea39153 100644 --- a/seqr/urls.py +++ b/seqr/urls.py @@ -122,7 +122,7 @@ from seqr.views.apis.data_manager_api import elasticsearch_status, upload_qc_pipeline_output, delete_index, \ update_rna_seq, load_rna_seq_sample_data, proxy_to_kibana, load_phenotype_prioritization_data, \ - validate_callset, get_loaded_projects, load_data, proxy_to_luigi + validate_callset, get_loaded_projects, load_data, loading_vcfs, proxy_to_luigi from seqr.views.apis.report_api import \ anvil_export, \ family_metadata, \ @@ -332,6 +332,7 @@ 'data_management/update_rna_seq': update_rna_seq, 'data_management/load_rna_seq_sample/(?P[^/]+)': load_rna_seq_sample_data, 'data_management/load_phenotype_prioritization_data': load_phenotype_prioritization_data, + 'data_management/loading_vcfs': loading_vcfs, 'data_management/validate_callset': validate_callset, 'data_management/loaded_projects/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)': get_loaded_projects, 'data_management/load_data': load_data, diff --git a/seqr/utils/file_utils.py b/seqr/utils/file_utils.py index 76e1258da0..fd88cdfee2 100644 --- a/seqr/utils/file_utils.py +++ b/seqr/utils/file_utils.py @@ -48,10 +48,12 @@ def does_file_exist(file_path, user=None): return os.path.isfile(file_path) -def list_files(wildcard_path, user): +def list_files(wildcard_path, user, check_subfolders=False): + if check_subfolders: + wildcard_path = f'{wildcard_path.rstrip("/")}/**' if is_google_bucket_file_path(wildcard_path): - return get_gs_file_list(wildcard_path, user, check_subfolders=False, allow_missing=True) - return [file_path for file_path in glob.glob(wildcard_path) if os.path.isfile(file_path)] + return _get_gs_file_list(wildcard_path, user, check_subfolders=check_subfolders) + return [file_path for file_path in glob.glob(wildcard_path, recursive=check_subfolders) if os.path.isfile(file_path)] def file_iter(file_path, byte_range=None, raw_content=False, user=None, **kwargs): @@ -91,18 +93,17 @@ def mv_file_to_gs(local_path, gs_path, user=None): run_gsutil_with_wait(command, gs_path, user) -def get_gs_file_list(gs_path, user=None, check_subfolders=True, allow_missing=False): +def _get_gs_file_list(gs_path, user=None, check_subfolders=True): gs_path = gs_path.rstrip('/') command = 'ls' if check_subfolders: # If a bucket is empty gsutil throws an error when running ls with ** instead of returning an empty list - subfolders = _run_gsutil_with_stdout(command, gs_path, user) + subfolders = _run_gsutil_with_stdout(command, gs_path.replace('/**', ''), user) if not subfolders: return [] - gs_path = f'{gs_path}/**' - all_lines = _run_gsutil_with_stdout(command, gs_path, user, allow_missing=allow_missing) + all_lines = _run_gsutil_with_stdout(command, gs_path, user, allow_missing=True) return [line for line in all_lines if is_google_bucket_file_path(line)] diff --git a/seqr/utils/vcf_utils.py b/seqr/utils/vcf_utils.py index 7a421db930..5207f7a3de 100644 --- a/seqr/utils/vcf_utils.py +++ b/seqr/utils/vcf_utils.py @@ -1,3 +1,4 @@ +import os import re from collections import defaultdict @@ -107,3 +108,28 @@ def validate_vcf_exists(data_path, user, path_name=None, allowed_exts=None): raise ErrorsWarningsException(['Data file or path {} is not found.'.format(path_name or data_path)]) return file_to_check + + +def get_vcf_list(data_path, user): + file_list = list_files(data_path, user, check_subfolders=True) + data_path_list = [path.replace(data_path, '') for path in file_list if path.endswith(VCF_FILE_EXTENSIONS)] + return _merge_sharded_vcf(data_path_list) + + +def _merge_sharded_vcf(vcf_files): + files_by_path = defaultdict(list) + + for vcf_file in vcf_files: + subfolder_path, file = vcf_file.rsplit('/', 1) + files_by_path[subfolder_path].append(file) + + # discover the sharded VCF files in each folder, replace the sharded VCF files with a single path with '*' + for subfolder_path, files in files_by_path.items(): + if len(files) < 2: + continue + prefix = os.path.commonprefix(files) + suffix = re.fullmatch(r'{}\d*(?P\D.*)'.format(prefix), files[0]).groupdict()['suffix'] + if all([re.fullmatch(r'{}\d+{}'.format(prefix, suffix), file) for file in files]): + files_by_path[subfolder_path] = [f'{prefix}*{suffix}'] + + return [f'{path}/{file}' for path, files in files_by_path.items() for file in files] diff --git a/seqr/views/apis/anvil_workspace_api.py b/seqr/views/apis/anvil_workspace_api.py index 1791cdc295..b357aa17d3 100644 --- a/seqr/views/apis/anvil_workspace_api.py +++ b/seqr/views/apis/anvil_workspace_api.py @@ -1,8 +1,6 @@ """APIs for management of projects related to AnVIL workspaces.""" import json import time -import os -import re from datetime import datetime from functools import wraps from collections import defaultdict @@ -16,7 +14,6 @@ from seqr.models import Project, CAN_EDIT, Sample, Individual, IgvSample from seqr.views.react_app import render_app_html from seqr.views.utils.airtable_utils import AirtableSession, ANVIL_REQUEST_TRACKING_TABLE -from seqr.utils.search.constants import VCF_FILE_EXTENSIONS from seqr.utils.search.utils import get_search_samples from seqr.views.utils.airflow_utils import trigger_airflow_data_loading from seqr.views.utils.json_to_orm_utils import create_model_from_json @@ -27,8 +24,8 @@ from seqr.views.utils.pedigree_info_utils import parse_basic_pedigree_table, JsonConstants from seqr.views.utils.individual_utils import add_or_update_individuals_and_families from seqr.utils.communication_utils import send_html_email -from seqr.utils.file_utils import get_gs_file_list -from seqr.utils.vcf_utils import validate_vcf_and_get_samples, validate_vcf_exists +from seqr.utils.file_utils import list_files +from seqr.utils.vcf_utils import validate_vcf_and_get_samples, validate_vcf_exists, get_vcf_list from seqr.utils.logging_utils import SeqrLogger from seqr.utils.middleware import ErrorsWarningsException from seqr.views.utils.permissions_utils import is_anvil_authenticated, check_workspace_perm, login_and_policies_required @@ -109,24 +106,23 @@ def grant_workspace_access(request, namespace, name): return create_json_response({'success': True}) -def _get_workspace_files(request, namespace, name, workspace_meta): +def _get_workspace_bucket(namespace, name, workspace_meta): bucket_name = workspace_meta['workspace']['bucketName'] - bucket_path = 'gs://{bucket}'.format(bucket=bucket_name.rstrip('/')) - return bucket_path, get_gs_file_list(bucket_path, request.user) + return 'gs://{bucket}'.format(bucket=bucket_name.rstrip('/')) @anvil_workspace_access_required(meta_fields=['workspace.bucketName']) -def get_anvil_vcf_list(*args): - bucket_path, file_list = _get_workspace_files(*args) - data_path_list = [path.replace(bucket_path, '') for path in file_list if path.endswith(VCF_FILE_EXTENSIONS)] - data_path_list = _merge_sharded_vcf(data_path_list) +def get_anvil_vcf_list(request, *args): + bucket_path = _get_workspace_bucket(*args) + data_path_list = get_vcf_list(bucket_path, request.user) return create_json_response({'dataPathList': data_path_list}) @anvil_workspace_access_required(meta_fields=['workspace.bucketName']) -def get_anvil_igv_options(*args): - bucket_path, file_list = _get_workspace_files(*args) +def get_anvil_igv_options(request, *args): + bucket_path = _get_workspace_bucket(*args) + file_list = list_files(bucket_path, request.user, check_subfolders=True) igv_options = [ {'name': path.replace(bucket_path, ''), 'value': path} for path in file_list if path.endswith(IgvSample.SAMPLE_TYPE_FILE_EXTENSIONS[IgvSample.SAMPLE_TYPE_ALIGNMENT]) @@ -340,22 +336,3 @@ def _wait_for_service_account_access(user, namespace, name): def _get_seqr_project_url(project): return f'{BASE_URL}project/{project.guid}/project_page' - - -def _merge_sharded_vcf(vcf_files): - files_by_path = defaultdict(list) - - for vcf_file in vcf_files: - subfolder_path, file = vcf_file.rsplit('/', 1) - files_by_path[subfolder_path].append(file) - - # discover the sharded VCF files in each folder, replace the sharded VCF files with a single path with '*' - for subfolder_path, files in files_by_path.items(): - if len(files) < 2: - continue - prefix = os.path.commonprefix(files) - suffix = re.fullmatch(r'{}\d*(?P\D.*)'.format(prefix), files[0]).groupdict()['suffix'] - if all([re.fullmatch(r'{}\d+{}'.format(prefix, suffix), file) for file in files]): - files_by_path[subfolder_path] = [f'{prefix}*{suffix}'] - - return [f'{path}/{file}' for path, files in files_by_path.items() for file in files] diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 71a93b96d3..ed8224f035 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -20,7 +20,7 @@ from seqr.utils.file_utils import file_iter, does_file_exist from seqr.utils.logging_utils import SeqrLogger from seqr.utils.middleware import ErrorsWarningsException -from seqr.utils.vcf_utils import validate_vcf_exists +from seqr.utils.vcf_utils import validate_vcf_exists, get_vcf_list from seqr.views.utils.airflow_utils import trigger_airflow_data_loading from seqr.views.utils.airtable_utils import AirtableSession, LOADABLE_PDO_STATUSES, AVAILABLE_PDO_STATUS @@ -445,6 +445,13 @@ def load_phenotype_prioritization_data(request): } +@pm_or_data_manager_required +def loading_vcfs(request): + return create_json_response({ + 'vcfs': get_vcf_list(LOADING_DATASETS_DIR, request.user), + }) + + @pm_or_data_manager_required def validate_callset(request): request_json = json.loads(request.body) From 1928dcbedbed0fec8246100eb120512a4f2854a5 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 8 Jan 2025 12:08:56 -0500 Subject: [PATCH 24/30] loading vcfs local only --- seqr/views/apis/data_manager_api.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index ed8224f035..f8d7dc9c17 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -9,6 +9,7 @@ import urllib3 from django.contrib.postgres.aggregates import ArrayAgg +from django.core.exceptions import PermissionDenied from django.db.models import Max, F, Q, Count from django.http.response import HttpResponse from django.views.decorators.csrf import csrf_exempt @@ -30,6 +31,7 @@ from seqr.views.utils.json_utils import create_json_response from seqr.views.utils.json_to_orm_utils import update_model_from_json from seqr.views.utils.permissions_utils import data_manager_required, pm_or_data_manager_required, get_internal_projects +from seqr.views.utils.terra_api_utils import anvil_enabled from seqr.models import Sample, RnaSample, Individual, Project, PhenotypePrioritization @@ -447,6 +449,8 @@ def load_phenotype_prioritization_data(request): @pm_or_data_manager_required def loading_vcfs(request): + if anvil_enabled(): + raise PermissionDenied() return create_json_response({ 'vcfs': get_vcf_list(LOADING_DATASETS_DIR, request.user), }) From dcf3faeef93e721aa47545a2e12c6f09663a3ad9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 8 Jan 2025 12:22:50 -0500 Subject: [PATCH 25/30] add test --- seqr/views/apis/data_manager_api_tests.py | 31 ++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index b07f6c82e9..138afb72bb 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -8,7 +8,7 @@ from seqr.utils.communication_utils import _set_bulk_notification_stream from seqr.views.apis.data_manager_api import elasticsearch_status, upload_qc_pipeline_output, delete_index, \ - update_rna_seq, load_rna_seq_sample_data, load_phenotype_prioritization_data, validate_callset, \ + update_rna_seq, load_rna_seq_sample_data, load_phenotype_prioritization_data, validate_callset, loading_vcfs, \ get_loaded_projects, load_data from seqr.views.utils.orm_to_json_utils import _get_json_for_models from seqr.views.utils.test_utils import AuthenticationTestCase, AirflowTestCase, AirtableTest @@ -1401,6 +1401,32 @@ def _assert_expected_notifications(mock_send_email, expected_notifs: list[dict], ) mock_send_email.assert_has_calls(calls) + @mock.patch('seqr.utils.file_utils.os.path.isfile', lambda *args: True) + @mock.patch('seqr.utils.file_utils.glob.glob') + def test_loading_vcfs(self, mock_glob): + url = reverse(loading_vcfs) + self.check_pm_login(url) + + mock_glob.return_value = [] + response = self.client.get(url, content_type='application/json') + self._test_expected_vcf_responses(response, mock_glob, url) + + def _test_expected_vcf_responses(self, response, mock_glob, url): + self.assertEqual(response.status_code, 200) + self.assertDictEqual(response.json(), {'vcfs': []}) + mock_glob.assert_called_with('/local_datasets/**', recursive=True) + + mock_glob.return_value = ['/local_datasets/sharded_vcf/part001.vcf', '/local_datasets/sharded_vcf/part002.vcf', '/local_datasets/test.vcf.gz'] + response = self.client.get(url, content_type='application/json') + self.assertEqual(response.status_code, 200) + self.assertDictEqual(response.json(), {'vcfs': ['/sharded_vcf/part00*.vcf', '/test.vcf.gz']}) + mock_glob.assert_called_with('/local_datasets/**', recursive=True) + + # test data manager access + self.login_data_manager_user() + response = self.client.get(url, content_type='application/json') + self.assertEqual(response.status_code, 200) + @mock.patch('seqr.utils.file_utils.os.path.isfile') @mock.patch('seqr.utils.file_utils.glob.glob') @mock.patch('seqr.utils.file_utils.subprocess.Popen') @@ -1928,3 +1954,6 @@ def _assert_write_pedigree_error(self, response): def _test_no_affected_family(self, url, body): # Sample ID filtering skips the unaffected family pass + + def _test_expected_vcf_responses(self, response, mock_glob, url): + self.assertEqual(response.status_code, 403) From 175bd79913aabe86fa6f54f6fe41d2f1db5244ec Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 8 Jan 2025 12:32:56 -0500 Subject: [PATCH 26/30] fix test --- .../tests/check_for_new_samples_from_pipeline_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py index d05bfdbc64..063b64224e 100644 --- a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py +++ b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py @@ -324,7 +324,7 @@ def test_command(self, mock_email, mock_airtable_utils): with self.assertRaises(CommandError) as ce: call_command('check_for_new_samples_from_pipeline', '--genome_version=GRCh37', '--dataset_type=MITO') self.assertEqual(str(ce.exception), 'No successful runs found for genome_version=GRCh37, dataset_type=MITO') - self.mock_glob.assert_called_with('/seqr/seqr-hail-search-data/GRCh37/MITO/runs/*/_SUCCESS') + self.mock_glob.assert_called_with('/seqr/seqr-hail-search-data/GRCh37/MITO/runs/*/_SUCCESS', recursive=False) self.mock_subprocess.assert_not_called() call_command('check_for_new_samples_from_pipeline') @@ -344,7 +344,7 @@ def test_command(self, mock_email, mock_airtable_utils): iter([json.dumps(METADATA_FILES[i])]) for i in range(len(local_files)) ] call_command('check_for_new_samples_from_pipeline') - self.mock_glob.assert_called_with('/seqr/seqr-hail-search-data/*/*/runs/*/_SUCCESS') + self.mock_glob.assert_called_with('/seqr/seqr-hail-search-data/*/*/runs/*/_SUCCESS', recursive=False) self.mock_open.assert_has_calls( [mock.call(path.replace('_SUCCESS', 'metadata.json'), 'r') for path in local_files], any_order=True) self.mock_subprocess.assert_not_called() From cbae51a6aec4d82b684d37d1f1ed3811f566deeb Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 8 Jan 2025 12:39:05 -0500 Subject: [PATCH 27/30] correctly set allow missing --- seqr/utils/file_utils.py | 8 ++++---- seqr/utils/vcf_utils.py | 2 +- seqr/views/apis/anvil_workspace_api.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/seqr/utils/file_utils.py b/seqr/utils/file_utils.py index fd88cdfee2..335417e7c2 100644 --- a/seqr/utils/file_utils.py +++ b/seqr/utils/file_utils.py @@ -48,11 +48,11 @@ def does_file_exist(file_path, user=None): return os.path.isfile(file_path) -def list_files(wildcard_path, user, check_subfolders=False): +def list_files(wildcard_path, user, check_subfolders=False, allow_missing=True): if check_subfolders: wildcard_path = f'{wildcard_path.rstrip("/")}/**' if is_google_bucket_file_path(wildcard_path): - return _get_gs_file_list(wildcard_path, user, check_subfolders=check_subfolders) + return _get_gs_file_list(wildcard_path, user, check_subfolders=check_subfolders, allow_missing=allow_missing) return [file_path for file_path in glob.glob(wildcard_path, recursive=check_subfolders) if os.path.isfile(file_path)] @@ -93,7 +93,7 @@ def mv_file_to_gs(local_path, gs_path, user=None): run_gsutil_with_wait(command, gs_path, user) -def _get_gs_file_list(gs_path, user=None, check_subfolders=True): +def _get_gs_file_list(gs_path, user=None, check_subfolders=True, allow_missing=False): gs_path = gs_path.rstrip('/') command = 'ls' @@ -103,7 +103,7 @@ def _get_gs_file_list(gs_path, user=None, check_subfolders=True): if not subfolders: return [] - all_lines = _run_gsutil_with_stdout(command, gs_path, user, allow_missing=True) + all_lines = _run_gsutil_with_stdout(command, gs_path, user, allow_missing=allow_missing) return [line for line in all_lines if is_google_bucket_file_path(line)] diff --git a/seqr/utils/vcf_utils.py b/seqr/utils/vcf_utils.py index 5207f7a3de..ad5bf1b856 100644 --- a/seqr/utils/vcf_utils.py +++ b/seqr/utils/vcf_utils.py @@ -111,7 +111,7 @@ def validate_vcf_exists(data_path, user, path_name=None, allowed_exts=None): def get_vcf_list(data_path, user): - file_list = list_files(data_path, user, check_subfolders=True) + file_list = list_files(data_path, user, check_subfolders=True, allow_missing=False) data_path_list = [path.replace(data_path, '') for path in file_list if path.endswith(VCF_FILE_EXTENSIONS)] return _merge_sharded_vcf(data_path_list) diff --git a/seqr/views/apis/anvil_workspace_api.py b/seqr/views/apis/anvil_workspace_api.py index b357aa17d3..f1cf52f371 100644 --- a/seqr/views/apis/anvil_workspace_api.py +++ b/seqr/views/apis/anvil_workspace_api.py @@ -122,7 +122,7 @@ def get_anvil_vcf_list(request, *args): @anvil_workspace_access_required(meta_fields=['workspace.bucketName']) def get_anvil_igv_options(request, *args): bucket_path = _get_workspace_bucket(*args) - file_list = list_files(bucket_path, request.user, check_subfolders=True) + file_list = list_files(bucket_path, request.user, check_subfolders=True, allow_missing=False) igv_options = [ {'name': path.replace(bucket_path, ''), 'value': path} for path in file_list if path.endswith(IgvSample.SAMPLE_TYPE_FILE_EXTENSIONS[IgvSample.SAMPLE_TYPE_ALIGNMENT]) From 6db86ae4f08466cfad389ee700efbc3bf5e45661 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 8 Jan 2025 12:51:59 -0500 Subject: [PATCH 28/30] move utility tests into usage test --- seqr/utils/file_utils_tests.py | 31 +------------------- seqr/views/apis/anvil_workspace_api_tests.py | 6 ++++ 2 files changed, 7 insertions(+), 30 deletions(-) diff --git a/seqr/utils/file_utils_tests.py b/seqr/utils/file_utils_tests.py index 32a7bbcb91..86ec92523b 100644 --- a/seqr/utils/file_utils_tests.py +++ b/seqr/utils/file_utils_tests.py @@ -1,7 +1,7 @@ import mock from unittest import TestCase -from seqr.utils.file_utils import mv_file_to_gs, get_gs_file_list +from seqr.utils.file_utils import mv_file_to_gs class FileUtilsTest(TestCase): @@ -30,32 +30,3 @@ def test_mv_file_to_gs(self, mock_logger, mock_subproc): mock_subproc.Popen.assert_called_with('gsutil mv /temp_path gs://bucket/target_path', stdout=mock_subproc.PIPE, stderr=mock_subproc.STDOUT, shell=True) # nosec mock_logger.info.assert_called_with('==> gsutil mv /temp_path gs://bucket/target_path', None) process.wait.assert_called_with() - - @mock.patch('seqr.utils.file_utils.subprocess') - @mock.patch('seqr.utils.file_utils.logger') - def test_get_gs_file_list(self, mock_logger, mock_subproc): - with self.assertRaises(Exception) as ee: - get_gs_file_list('/temp_path') - self.assertEqual(str(ee.exception), 'A Google Storage path is expected.') - - process = mock_subproc.Popen.return_value - process.communicate.return_value = b'', b'-bash: gsutil: command not found.\nPlease check the path.\n' - with self.assertRaises(Exception) as ee: - get_gs_file_list('gs://bucket/target_path/', user=None) - self.assertEqual(str(ee.exception), 'Run command failed: -bash: gsutil: command not found. Please check the path.') - mock_subproc.Popen.assert_called_with('gsutil ls gs://bucket/target_path', stdout=mock_subproc.PIPE, - stderr=mock_subproc.PIPE, shell=True) # nosec - mock_logger.info.assert_called_with('==> gsutil ls gs://bucket/target_path', None) - process.communicate.assert_called_with() - - mock_subproc.reset_mock() - mock_logger.reset_mock() - process.communicate.return_value = b'\n\nUpdates are available for some Cloud SDK components. To install them,\n' \ - b'please run:\n $ gcloud components update\ngs://bucket/target_path/id_file.txt\n' \ - b'gs://bucket/target_path/data.vcf.gz\n', b'' - file_list = get_gs_file_list('gs://bucket/target_path', user=None) - mock_subproc.Popen.assert_called_with('gsutil ls gs://bucket/target_path/**', stdout=mock_subproc.PIPE, - stderr=mock_subproc.PIPE, shell=True) # nosec - mock_logger.info.assert_called_with('==> gsutil ls gs://bucket/target_path/**', None) - process.communicate.assert_called_with() - self.assertEqual(file_list, ['gs://bucket/target_path/id_file.txt', 'gs://bucket/target_path/data.vcf.gz']) diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py index 928fb22840..07c348e5f5 100644 --- a/seqr/views/apis/anvil_workspace_api_tests.py +++ b/seqr/views/apis/anvil_workspace_api_tests.py @@ -443,6 +443,12 @@ def _test_get_workspace_files(self, url, response_key, expected_files, mock_subp .format(TEST_WORKSPACE_NAMESPACE, TEST_WORKSPACE_NAME1), self.collaborator_user) + # Test gsutil error + mock_subprocess.return_value.communicate.return_value = b'', b'-bash: gsutil: command not found.\nPlease check the path.\n' + response = self.client.get(url, content_type='application/json') + self.assertEqual(response.status_code, 500) + self.assertEqual(response.json()['error'], 'Run command failed: -bash: gsutil: command not found. Please check the path.') + # Test empty bucket mock_subprocess.return_value.communicate.return_value = b'', None response = self.client.get(url, content_type='application/json') From aeaba01bc8e92db97650e4164201f89d87a585ef Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 8 Jan 2025 13:00:55 -0500 Subject: [PATCH 29/30] clean up --- seqr/utils/file_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/seqr/utils/file_utils.py b/seqr/utils/file_utils.py index 335417e7c2..474974d330 100644 --- a/seqr/utils/file_utils.py +++ b/seqr/utils/file_utils.py @@ -52,7 +52,7 @@ def list_files(wildcard_path, user, check_subfolders=False, allow_missing=True): if check_subfolders: wildcard_path = f'{wildcard_path.rstrip("/")}/**' if is_google_bucket_file_path(wildcard_path): - return _get_gs_file_list(wildcard_path, user, check_subfolders=check_subfolders, allow_missing=allow_missing) + return _get_gs_file_list(wildcard_path, user, check_subfolders, allow_missing) return [file_path for file_path in glob.glob(wildcard_path, recursive=check_subfolders) if os.path.isfile(file_path)] @@ -93,7 +93,7 @@ def mv_file_to_gs(local_path, gs_path, user=None): run_gsutil_with_wait(command, gs_path, user) -def _get_gs_file_list(gs_path, user=None, check_subfolders=True, allow_missing=False): +def _get_gs_file_list(gs_path, user, check_subfolders, allow_missing): gs_path = gs_path.rstrip('/') command = 'ls' From fb664ede8003a54e6636ed59c0eaa59ff6fce413 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 9 Jan 2025 16:17:01 -0500 Subject: [PATCH 30/30] pr feedback --- seqr/utils/communication_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/seqr/utils/communication_utils.py b/seqr/utils/communication_utils.py index 008f2917cd..5df9d32825 100644 --- a/seqr/utils/communication_utils.py +++ b/seqr/utils/communication_utils.py @@ -56,9 +56,9 @@ def send_html_email(email_body, process_message=None, **kwargs): email_message.send() -def send_project_notification(project, notification, subject, notification_prefix='Loaded ', email_template=None, slack_channel=None, slack_detail=None): +def send_project_notification(project, notification, subject, email_template=None, slack_channel=None, slack_detail=None): users = project.subscribers.user_set.all() - notify.send(project, recipient=users, verb=f'{notification_prefix}{notification}') + notify.send(project, recipient=users, verb=f'Loaded {notification}') url = f'{BASE_URL}project/{project.guid}/project_page'