From fc6da803af82cc9b56d995768783c7d2699854a6 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 9 Nov 2023 16:26:23 -0500 Subject: [PATCH 1/4] clean up multi family merge --- hail_search/queries/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 5850f89e89..985f94648f 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -297,7 +297,7 @@ def import_filtered_table(self, sample_data, intervals=None, **kwargs): entry_type = families_ht.family_entries.dtype.element_type for project_ht, num_project_families in filtered_project_hts[1:]: families_ht = families_ht.join(project_ht, how='outer') - families_ht = families_ht.annotate_globals( + families_ht = families_ht.select_globals( family_guids=families_ht.family_guids.extend(families_ht.family_guids_1) ) select_fields = { From ff6bdccdeaa93fb487dfdd60ba2d9373c572966c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 9 Nov 2023 16:47:36 -0500 Subject: [PATCH 2/4] do not apply secondary filters if not comp het search --- hail_search/queries/base.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 985f94648f..7bf0b34b33 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -635,12 +635,13 @@ def _filter_by_annotations(self, pathogenicity, annotations, annotations_seconda annotation_override_filters = self._get_annotation_override_filters(annotations, pathogenicity=pathogenicity) annotation_exprs, _ = self._get_allowed_consequences_annotations(annotations, annotation_override_filters) - secondary_exprs, allowed_secondary_consequences = self._get_allowed_consequences_annotations( - annotations_secondary or {}, annotation_override_filters, is_secondary=True) - if secondary_exprs: - annotation_exprs.update({f'{k}_secondary': v for k, v in secondary_exprs.items()}) - if secondary_exprs or allowed_secondary_consequences: - self._has_secondary_annotations = True + if self._has_comp_het_search: + secondary_exprs, allowed_secondary_consequences = self._get_allowed_consequences_annotations( + annotations_secondary or {}, annotation_override_filters, is_secondary=True) + if secondary_exprs: + annotation_exprs.update({f'{k}_secondary': v for k, v in secondary_exprs.items()}) + if secondary_exprs or allowed_secondary_consequences: + self._has_secondary_annotations = True if not annotation_exprs: return From 74f61f89415cdbf79ba2f34211a2a9f2a1f000ce Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 13 Nov 2023 10:41:04 -0500 Subject: [PATCH 3/4] improve sv comp het performance --- hail_search/queries/base.py | 3 ++- hail_search/queries/multi_data_types.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 7bf0b34b33..70a520fbb1 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -734,7 +734,8 @@ def _filter_compound_hets(self): # Format pairs as lists and de-duplicate ch_ht = ch_ht._key_by_assert_sorted(key_pair=hl.sorted([ - hl.tuple([ch_ht[v][k] for k in self.KEY_FIELD]) for v in ['v1', 'v2'] + ch_ht[v][self.KEY_FIELD[0]] if len(self.KEY_FIELD) == 1 else hl.tuple([ch_ht[v][k] for k in self.KEY_FIELD]) + for v in ['v1', 'v2'] ])) ch_ht = ch_ht.distinct().key_by() diff --git a/hail_search/queries/multi_data_types.py b/hail_search/queries/multi_data_types.py index ed3cdb9027..b2392e6706 100644 --- a/hail_search/queries/multi_data_types.py +++ b/hail_search/queries/multi_data_types.py @@ -57,6 +57,7 @@ def _filter_data_type_comp_hets(self, variant_ht, variant_families, sv_query): sv_ch_ht = self._family_filtered_ch_ht(sv_ht, overlapped_families, sv_families, 'v2') ch_ht = variant_ch_ht.join(sv_ch_ht) + # TODO not working for sv/varint pairs, phasing is wrong return self._filter_grouped_compound_hets(ch_ht) @staticmethod From cd4ac98f5d76cdef86d710099700849529a50d82 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 13 Nov 2023 16:17:29 -0500 Subject: [PATCH 4/4] fix multi data type comp het --- hail_search/queries/base.py | 2 +- hail_search/queries/multi_data_types.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 70a520fbb1..136f823745 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -372,7 +372,7 @@ def _add_entry_sample_families(cls, ht, sample_data): num_families = len(family_index_map) family_sample_indices = [None] * num_families sample_id_family_index_map = {} - for sample_id, family_guid in sample_id_family_map.items(): + for sample_id, family_guid in sorted(sample_id_family_map.items()): sample_index = sample_id_index_map[sample_id] family_index = family_index_map[family_guid] if not family_sample_indices[family_index]: diff --git a/hail_search/queries/multi_data_types.py b/hail_search/queries/multi_data_types.py index b2392e6706..ed3cdb9027 100644 --- a/hail_search/queries/multi_data_types.py +++ b/hail_search/queries/multi_data_types.py @@ -57,7 +57,6 @@ def _filter_data_type_comp_hets(self, variant_ht, variant_families, sv_query): sv_ch_ht = self._family_filtered_ch_ht(sv_ht, overlapped_families, sv_families, 'v2') ch_ht = variant_ch_ht.join(sv_ch_ht) - # TODO not working for sv/varint pairs, phasing is wrong return self._filter_grouped_compound_hets(ch_ht) @staticmethod