From 831cb1ced15193b2bccd9aede9b638e87f2d98c7 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Thu, 30 May 2024 17:43:30 -0400
Subject: [PATCH 01/47] add functional tag

---
 ...ariantfunctionaldata_functional_data_tag.py | 18 ++++++++++++++++++
 seqr/models.py                                 |  4 ++++
 2 files changed, 22 insertions(+)
 create mode 100644 seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py

diff --git a/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py b/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py
new file mode 100644
index 0000000000..027652323a
--- /dev/null
+++ b/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py
@@ -0,0 +1,18 @@
+# Generated by Django 3.2.23 on 2024-05-30 21:41
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('seqr', '0066_family_post_discovery_mondo_id'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='variantfunctionaldata',
+            name='functional_data_tag',
+            field=models.TextField(choices=[('Functional Data', (('Biochemical Function', '{"description": "Gene product performs a biochemical function shared with other known genes in the disease of interest, or consistent with the phenotype.", "color": "#311B92"}'), ('Protein Interaction', '{"description": "Gene product interacts with proteins previously implicated (genetically or biochemically) in the disease of interest.", "color": "#4A148C"}'), ('Expression', '{"description": "Gene is expressed in tissues relevant to the disease of interest and/or is altered in expression in patients who have the disease.", "color": "#7C4DFF"}'), ('Patient Cells', '{"description": "Gene and/or gene product function is demonstrably altered in patients carrying candidate mutations.", "color": "#B388FF"}'), ('Non-patient cells', '{"description": "Gene and/or gene product function is demonstrably altered in human cell culture models carrying candidate mutations.", "color": "#9575CD"}'), ('Animal Model', '{"description": "Non-human animal models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#AA00FF"}'), ('Non-human cell culture model', '{"description": "Non-human cell-culture models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#BA68C8"}'), ('Rescue', '{"description": "The cellular phenotype in patient-derived cells or engineered equivalents can be rescued by addition of the wild-type gene product.", "color": "#663399"}'))), ('Functional Scores', (('Genome-wide Linkage', '{"metadata_title": "LOD Score", "description": "Max LOD score used in analysis to restrict where you looked for causal variants; provide best score available, whether it be a cumulative LOD score across multiple families or just the best family\'s LOD score.", "color": "#880E4F"}'), ('Bonferroni corrected p-value', '{"metadata_title": "P-value", "description": "Bonferroni-corrected p-value for gene if association testing/burden testing/etc was used to identify the gene.", "color": "#E91E63"}'), ('Kindreds w/ Overlapping SV & Similar Phenotype', '{"metadata_title": "#", "description": "Number of kindreds (1+) previously reported/in databases as having structural variant overlapping the gene and a similar phenotype.", "color": "#FF5252"}'))), ('Additional Kindreds (Literature, MME)', (('Additional Unrelated Kindreds w/ Causal Variants in Gene', '{"metadata_title": "# additional families", "description": "Number of additional kindreds with causal variants in this gene (Any other kindreds from collaborators, MME, literature etc). Do not count your family in this total.", "color": "#D84315"}'),)), ('Additional Information', (('Incomplete Penetrance', '{"description": "Variant has been shown to be disease-causing (in literature, functional studies, etc.) but one or more individuals in this family with the variant do not present with clinical features of the disorder.", "color": "#E985DC"}'), ('Partial Phenotype Contribution', '{"description": "Variant is believed to be part but not all of the solve, explaining only some of the phenotypes.", "color": "#1F42D9"}')))]),
+        ),
+    ]
diff --git a/seqr/models.py b/seqr/models.py
index 945cf17894..d04c4a0258 100644
--- a/seqr/models.py
+++ b/seqr/models.py
@@ -935,6 +935,10 @@ class VariantFunctionalData(ModelWithGUID):
                 'description': 'Variant has been shown to be disease-causing (in literature, functional studies, etc.) but one or more individuals in this family with the variant do not present with clinical features of the disorder.',
                 'color': '#E985DC',
             })),
+            ('Partial Phenotype Contribution', json.dumps({
+                'description': 'Variant is believed to be part but not all of the solve, explaining only some of the phenotypes.',
+                'color': '#1F42D9',
+            })),
         )),
     )
 

From 0c98322dc475af5de0ca70fc23bab714f5a1cb3a Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Thu, 30 May 2024 17:52:40 -0400
Subject: [PATCH 02/47] add metadata title

---
 .../0067_alter_variantfunctionaldata_functional_data_tag.py   | 4 ++--
 seqr/models.py                                                | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py b/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py
index 027652323a..e8f2e6358a 100644
--- a/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py
+++ b/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py
@@ -1,4 +1,4 @@
-# Generated by Django 3.2.23 on 2024-05-30 21:41
+# Generated by Django 3.2.23 on 2024-05-30 21:51
 
 from django.db import migrations, models
 
@@ -13,6 +13,6 @@ class Migration(migrations.Migration):
         migrations.AlterField(
             model_name='variantfunctionaldata',
             name='functional_data_tag',
-            field=models.TextField(choices=[('Functional Data', (('Biochemical Function', '{"description": "Gene product performs a biochemical function shared with other known genes in the disease of interest, or consistent with the phenotype.", "color": "#311B92"}'), ('Protein Interaction', '{"description": "Gene product interacts with proteins previously implicated (genetically or biochemically) in the disease of interest.", "color": "#4A148C"}'), ('Expression', '{"description": "Gene is expressed in tissues relevant to the disease of interest and/or is altered in expression in patients who have the disease.", "color": "#7C4DFF"}'), ('Patient Cells', '{"description": "Gene and/or gene product function is demonstrably altered in patients carrying candidate mutations.", "color": "#B388FF"}'), ('Non-patient cells', '{"description": "Gene and/or gene product function is demonstrably altered in human cell culture models carrying candidate mutations.", "color": "#9575CD"}'), ('Animal Model', '{"description": "Non-human animal models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#AA00FF"}'), ('Non-human cell culture model', '{"description": "Non-human cell-culture models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#BA68C8"}'), ('Rescue', '{"description": "The cellular phenotype in patient-derived cells or engineered equivalents can be rescued by addition of the wild-type gene product.", "color": "#663399"}'))), ('Functional Scores', (('Genome-wide Linkage', '{"metadata_title": "LOD Score", "description": "Max LOD score used in analysis to restrict where you looked for causal variants; provide best score available, whether it be a cumulative LOD score across multiple families or just the best family\'s LOD score.", "color": "#880E4F"}'), ('Bonferroni corrected p-value', '{"metadata_title": "P-value", "description": "Bonferroni-corrected p-value for gene if association testing/burden testing/etc was used to identify the gene.", "color": "#E91E63"}'), ('Kindreds w/ Overlapping SV & Similar Phenotype', '{"metadata_title": "#", "description": "Number of kindreds (1+) previously reported/in databases as having structural variant overlapping the gene and a similar phenotype.", "color": "#FF5252"}'))), ('Additional Kindreds (Literature, MME)', (('Additional Unrelated Kindreds w/ Causal Variants in Gene', '{"metadata_title": "# additional families", "description": "Number of additional kindreds with causal variants in this gene (Any other kindreds from collaborators, MME, literature etc). Do not count your family in this total.", "color": "#D84315"}'),)), ('Additional Information', (('Incomplete Penetrance', '{"description": "Variant has been shown to be disease-causing (in literature, functional studies, etc.) but one or more individuals in this family with the variant do not present with clinical features of the disorder.", "color": "#E985DC"}'), ('Partial Phenotype Contribution', '{"description": "Variant is believed to be part but not all of the solve, explaining only some of the phenotypes.", "color": "#1F42D9"}')))]),
+            field=models.TextField(choices=[('Functional Data', (('Biochemical Function', '{"description": "Gene product performs a biochemical function shared with other known genes in the disease of interest, or consistent with the phenotype.", "color": "#311B92"}'), ('Protein Interaction', '{"description": "Gene product interacts with proteins previously implicated (genetically or biochemically) in the disease of interest.", "color": "#4A148C"}'), ('Expression', '{"description": "Gene is expressed in tissues relevant to the disease of interest and/or is altered in expression in patients who have the disease.", "color": "#7C4DFF"}'), ('Patient Cells', '{"description": "Gene and/or gene product function is demonstrably altered in patients carrying candidate mutations.", "color": "#B388FF"}'), ('Non-patient cells', '{"description": "Gene and/or gene product function is demonstrably altered in human cell culture models carrying candidate mutations.", "color": "#9575CD"}'), ('Animal Model', '{"description": "Non-human animal models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#AA00FF"}'), ('Non-human cell culture model', '{"description": "Non-human cell-culture models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#BA68C8"}'), ('Rescue', '{"description": "The cellular phenotype in patient-derived cells or engineered equivalents can be rescued by addition of the wild-type gene product.", "color": "#663399"}'))), ('Functional Scores', (('Genome-wide Linkage', '{"metadata_title": "LOD Score", "description": "Max LOD score used in analysis to restrict where you looked for causal variants; provide best score available, whether it be a cumulative LOD score across multiple families or just the best family\'s LOD score.", "color": "#880E4F"}'), ('Bonferroni corrected p-value', '{"metadata_title": "P-value", "description": "Bonferroni-corrected p-value for gene if association testing/burden testing/etc was used to identify the gene.", "color": "#E91E63"}'), ('Kindreds w/ Overlapping SV & Similar Phenotype', '{"metadata_title": "#", "description": "Number of kindreds (1+) previously reported/in databases as having structural variant overlapping the gene and a similar phenotype.", "color": "#FF5252"}'))), ('Additional Kindreds (Literature, MME)', (('Additional Unrelated Kindreds w/ Causal Variants in Gene', '{"metadata_title": "# additional families", "description": "Number of additional kindreds with causal variants in this gene (Any other kindreds from collaborators, MME, literature etc). Do not count your family in this total.", "color": "#D84315"}'),)), ('Additional Information', (('Incomplete Penetrance', '{"description": "Variant has been shown to be disease-causing (in literature, functional studies, etc.) but one or more individuals in this family with the variant do not present with clinical features of the disorder.", "color": "#E985DC"}'), ('Partial Phenotype Contribution', '{"metadata_title": "HPO Terms", "description": "Variant is believed to be part of the solve, explaining only some of the phenotypes.", "color": "#1F42D9"}')))]),
         ),
     ]
diff --git a/seqr/models.py b/seqr/models.py
index d04c4a0258..1cd1f46b0e 100644
--- a/seqr/models.py
+++ b/seqr/models.py
@@ -936,7 +936,8 @@ class VariantFunctionalData(ModelWithGUID):
                 'color': '#E985DC',
             })),
             ('Partial Phenotype Contribution', json.dumps({
-                'description': 'Variant is believed to be part but not all of the solve, explaining only some of the phenotypes.',
+                'metadata_title': 'HPO Terms',
+                'description': 'Variant is believed to be part of the solve, explaining only some of the phenotypes.',
                 'color': '#1F42D9',
             })),
         )),

From 323e8cd4c0d0020ea90cdc4cc88fbebc7468fdfb Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 11:30:41 -0400
Subject: [PATCH 03/47] delect HPO terms for phenotype functional tag

---
 seqr/views/utils/orm_to_json_utils.py         |  3 +-
 ui/redux/selectors.js                         | 10 +++++++
 .../panel/view-fields/TagFieldView.jsx        | 28 ++++++++++++++++---
 3 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py
index daaa38d98a..67a943fc8f 100644
--- a/seqr/views/utils/orm_to_json_utils.py
+++ b/seqr/views/utils/orm_to_json_utils.py
@@ -441,8 +441,7 @@ def _format_functional_tags(tags):
         display_data = VariantFunctionalData.FUNCTIONAL_DATA_TAG_LOOKUP[name]
         tag.update({
             'name': name,
-            'metadataTitle': display_data.get('metadata_title', 'Notes'),
-            'color': display_data['color'],
+            **{k: display_data[k] for k in ['metadataTitle', 'color']},
         })
     return tags
 
diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js
index db076a4937..cf9a0a989d 100644
--- a/ui/redux/selectors.js
+++ b/ui/redux/selectors.js
@@ -425,6 +425,16 @@ export const getUserOptions = createSelector(
   ),
 )
 
+export const getHpoTermOptionsByFamily = createSelector(
+  getIndividualsByFamily,
+  individualsByFamily => Object.entries(individualsByFamily).reduce((acc, [familyGuid, individuals]) => ({
+    ...acc,
+    [familyGuid]: individuals.reduce((fAcc, { features }) => ([...fAcc, ...(features || []).map(
+      ({ id, label }) => ({ value: id, text: label, description: id }),
+    )]), []),
+  }), {}),
+)
+
 export const getRnaSeqSignificantJunctionData = createSelector(
   getGenesById,
   getIndividualsByGuid,
diff --git a/ui/shared/components/panel/view-fields/TagFieldView.jsx b/ui/shared/components/panel/view-fields/TagFieldView.jsx
index 9b40e354a5..ab492b93e1 100644
--- a/ui/shared/components/panel/view-fields/TagFieldView.jsx
+++ b/ui/shared/components/panel/view-fields/TagFieldView.jsx
@@ -1,10 +1,12 @@
 import React from 'react'
+import { connect } from 'react-redux'
 import { NavLink } from 'react-router-dom'
 import PropTypes from 'prop-types'
 import styled from 'styled-components'
 import { Popup, Form } from 'semantic-ui-react'
 import { Field } from 'react-final-form'
 
+import { getHpoTermOptionsByFamily } from 'redux/selectors'
 import { HorizontalSpacer } from '../../Spacers'
 import { ColoredLabel, ColoredOutlineLabel } from '../../StyledComponents'
 import { LargeMultiselect, Multiselect } from '../../form/Inputs'
@@ -32,6 +34,15 @@ MultiselectField.propTypes = {
   input: PropTypes.object,
 }
 
+const mapHpoDropdownStateToProps = (state, ownProps) => ({
+  options: getHpoTermOptionsByFamily(state)[ownProps.metadataId],
+})
+
+const LIST_FORMAT_PROPS = {
+  format: val => (val || '').split(', ').filter(v => v),
+  parse: val => (val || []).join(', '),
+}
+
 const METADATA_FIELD_PROPS = {
   [NOTES_METADATA_TITLE]: { width: 16, maxLength: 50, placeholder: 'Enter up to 50 characters' },
   Reason: { width: 16, maxLength: 50, placeholder: 'Brief reason for excluding. Enter up to 50 characters' },
@@ -43,12 +54,16 @@ const METADATA_FIELD_PROPS = {
     addValueOptions: true,
     options: ['Sanger', 'Segregation', 'SV', 'Splicing'].map(value => ({ value })),
     placeholder: 'Select test types or add your own',
-    format: val => (val || '').split(', ').filter(v => v),
-    parse: val => (val || []).join(', '),
+    ...LIST_FORMAT_PROPS,
+  },
+  'HPO Terms': {
+    width: 16,
+    component: connect(mapHpoDropdownStateToProps)(MultiselectField),
+    ...LIST_FORMAT_PROPS,
   },
 }
 
-const MetadataField = React.memo(({ value, name, error }) => {
+const MetadataField = React.memo(({ value, name, error, metadataId }) => {
   if (!value.metadataTitle) {
     return null
   }
@@ -62,6 +77,7 @@ const MetadataField = React.memo(({ value, name, error }) => {
         component={Form.Input}
         label={value.metadataTitle}
         error={error}
+        metadataId={metadataId}
         {...fieldProps}
       />
     </MetadataFormGroup>
@@ -72,6 +88,7 @@ MetadataField.propTypes = {
   value: PropTypes.object,
   name: PropTypes.string,
   error: PropTypes.bool,
+  metadataId: PropTypes.string,
 }
 
 export const TagFieldDisplay = React.memo(({
@@ -129,6 +146,7 @@ class TagFieldView extends React.PureComponent {
     noEditTagTypes: PropTypes.arrayOf(PropTypes.string),
     linkTagType: PropTypes.string,
     tagLinkUrl: PropTypes.string,
+    modalId: PropTypes.string,
   }
 
   getSimplifiedProps() {
@@ -199,7 +217,7 @@ class TagFieldView extends React.PureComponent {
 
   render() {
     const {
-      simplifiedValue, field, tagOptions, popup, tagAnnotation, validate, displayMetadata, ...props
+      simplifiedValue, field, tagOptions, popup, tagAnnotation, validate, displayMetadata, modalId, ...props
     } = this.props
 
     const additionalFields = tagOptions.some(({ metadataTitle }) => metadataTitle) ? [{
@@ -208,6 +226,7 @@ class TagFieldView extends React.PureComponent {
       isArrayField: true,
       validate: val => ((!val || !val.metadataTitle || val.metadataTitle === NOTES_METADATA_TITLE || val.metadata) ? undefined : 'Required'),
       component: MetadataField,
+      metadataId: modalId,
     }] : []
 
     return (
@@ -216,6 +235,7 @@ class TagFieldView extends React.PureComponent {
         additionalEditFields={additionalFields}
         modalStyle={MODAL_STYLE}
         fieldDisplay={this.fieldDisplay}
+        modalId={modalId}
         {...props}
         {...(simplifiedValue ? this.getSimplifiedProps() : this.getMappedProps())}
       />

From 97499ea95fe862c1715337393838610873ec4700 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 11:52:08 -0400
Subject: [PATCH 04/47] add partial contribution to gregor report

---
 seqr/views/apis/report_api.py                  | 4 ++--
 seqr/views/utils/anvil_metadata_utils.py       | 7 ++++++-
 ui/pages/Report/components/VariantMetadata.jsx | 1 +
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py
index 7316cd48b0..da586f4cdb 100644
--- a/seqr/views/apis/report_api.py
+++ b/seqr/views/apis/report_api.py
@@ -241,7 +241,7 @@ def _add_row(row, family_id, row_type):
 }
 GENETIC_FINDINGS_TABLE_COLUMNS = {
     'chrom', 'pos', 'ref', 'alt', 'variant_type', 'variant_reference_assembly', GENE_COLUMN, 'transcript', 'hgvsc', 'hgvsp',
-    'hgvs', 'sv_type', 'chrom_end', 'pos_end', 'copy_number', *FINDING_METADATA_COLUMNS[:4], 'phenotype_contribution',
+    'hgvs', 'sv_type', 'chrom_end', 'pos_end', 'copy_number', *FINDING_METADATA_COLUMNS[:4], 'phenotype_contribution', 'partial_contribution_explained',
     'genetic_findings_id', 'participant_id', 'experiment_id', 'zygosity', 'allele_balance_or_heteroplasmy_percentage',
     'variant_inheritance', 'linked_variant', 'additional_family_members_with_variant', 'method_of_discovery',
     'gene_disease_validity',
@@ -379,7 +379,7 @@ def _add_row(row, family_id, row_type):
         elif row_type == DISCOVERY_ROW_TYPE and row:
             for variant in row:
                 genetic_findings_rows.append({
-                    **variant, 'phenotype_contribution': 'Full', 'variant_type': 'SNV/INDEL',
+                    **variant, 'variant_type': 'SNV/INDEL',
                 })
 
     parse_anvil_metadata(
diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py
index 8a8d0cdc95..b16a929dcb 100644
--- a/seqr/views/utils/anvil_metadata_utils.py
+++ b/seqr/views/utils/anvil_metadata_utils.py
@@ -328,7 +328,10 @@ def _get_parsed_saved_discovery_variants_by_family(
     project_saved_variants = SavedVariant.objects.filter(
         varianttag__variant_tag_type__in=tag_types, family__id__in=families,
         **(variant_filter or {}),
-    ).order_by('created_date').distinct().annotate(tags=ArrayAgg('varianttag__variant_tag_type__name', distinct=True))
+    ).order_by('created_date').distinct().annotate(
+        tags=ArrayAgg('varianttag__variant_tag_type__name', distinct=True),
+        partial_hpo_terms=ArrayAgg('variantfunctionaldata__metadata', distinct=True, filter=Q(variantfunctionaldata__functional_data_tag='Partial Phenotype Contribution')),
+    )
 
     variants = []
     gene_ids = set()
@@ -348,6 +351,8 @@ def _get_parsed_saved_discovery_variants_by_family(
             'gene_ids': [gene_id] if gene_id else variant_json.get('transcripts', {}).keys(),
             'seqr_chosen_consequence': main_transcript.get('majorConsequence'),
             'gene_known_for_phenotype': 'Known' if 'Known gene for phenotype' in variant.tags else 'Candidate',
+            'phenotype_contribution': 'Partial' if variant.partial_hpo_terms else 'Full',
+            'partial_contribution_explained': variant.partial_hpo_terms[0].replace(', ', '|') if variant.partial_hpo_terms else None,
             **{k: _get_transcript_field(k, config, main_transcript) for k, config in TRANSCRIPT_FIELDS.items()},
             **{k: variant_json.get(k) for k in ['genotypes', 'svType', 'svName', 'end'] + (variant_json_fields or [])},
             **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt', 'tags']},
diff --git a/ui/pages/Report/components/VariantMetadata.jsx b/ui/pages/Report/components/VariantMetadata.jsx
index b09db6a2ab..7a2e7e03f6 100644
--- a/ui/pages/Report/components/VariantMetadata.jsx
+++ b/ui/pages/Report/components/VariantMetadata.jsx
@@ -19,6 +19,7 @@ const COLUMNS = [
   { name: 'condition_id' },
   { name: 'condition_inheritance' },
   { name: 'phenotype_contribution' },
+  { name: 'partial_contribution_explained' },
   { name: 'additional_family_members_with_variant' },
   { name: 'method_of_discovery' },
   { name: 'Submitted to MME', format: ({ MME }) => (MME ? 'Yes' : 'No') },

From 64a11ff2435aa49ad33def5318f22be3fc2fbacd Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 11:57:23 -0400
Subject: [PATCH 05/47] support uncertain contribution

---
 seqr/views/utils/anvil_metadata_utils.py | 10 ++++++++--
 ui/redux/selectors.js                    |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py
index b16a929dcb..3b4638b9ea 100644
--- a/seqr/views/utils/anvil_metadata_utils.py
+++ b/seqr/views/utils/anvil_metadata_utils.py
@@ -343,6 +343,12 @@ def _get_parsed_saved_discovery_variants_by_family(
         gene_id = main_transcript.get('geneId')
         gene_ids.add(gene_id)
 
+        partial_hpo_terms = variant.partial_hpo_terms[0] if variant.partial_hpo_terms else ''
+        phenotype_contribution = 'Partial' if partial_hpo_terms else 'Full'
+        if partial_hpo_terms == 'Uncertain':
+            phenotype_contribution = 'Uncertain'
+            partial_hpo_terms = ''
+
         variants.append({
             'chrom': chrom,
             'pos': pos,
@@ -351,8 +357,8 @@ def _get_parsed_saved_discovery_variants_by_family(
             'gene_ids': [gene_id] if gene_id else variant_json.get('transcripts', {}).keys(),
             'seqr_chosen_consequence': main_transcript.get('majorConsequence'),
             'gene_known_for_phenotype': 'Known' if 'Known gene for phenotype' in variant.tags else 'Candidate',
-            'phenotype_contribution': 'Partial' if variant.partial_hpo_terms else 'Full',
-            'partial_contribution_explained': variant.partial_hpo_terms[0].replace(', ', '|') if variant.partial_hpo_terms else None,
+            'phenotype_contribution': phenotype_contribution,
+            'partial_contribution_explained': partial_hpo_terms.replace(', ', '|'),
             **{k: _get_transcript_field(k, config, main_transcript) for k, config in TRANSCRIPT_FIELDS.items()},
             **{k: variant_json.get(k) for k in ['genotypes', 'svType', 'svName', 'end'] + (variant_json_fields or [])},
             **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt', 'tags']},
diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js
index cf9a0a989d..fbc57692b0 100644
--- a/ui/redux/selectors.js
+++ b/ui/redux/selectors.js
@@ -431,7 +431,7 @@ export const getHpoTermOptionsByFamily = createSelector(
     ...acc,
     [familyGuid]: individuals.reduce((fAcc, { features }) => ([...fAcc, ...(features || []).map(
       ({ id, label }) => ({ value: id, text: label, description: id }),
-    )]), []),
+    )]), [{ value: 'Uncertain' }]),
   }), {}),
 )
 

From ce901d0cc8681caf8bafc895e5737c585088eb90 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 12:02:19 -0400
Subject: [PATCH 06/47] include phenotype contribution in individual metadata

---
 ui/pages/Report/components/VariantMetadata.jsx | 2 --
 ui/shared/utils/constants.js                   | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ui/pages/Report/components/VariantMetadata.jsx b/ui/pages/Report/components/VariantMetadata.jsx
index 7a2e7e03f6..ee7fe71e10 100644
--- a/ui/pages/Report/components/VariantMetadata.jsx
+++ b/ui/pages/Report/components/VariantMetadata.jsx
@@ -18,8 +18,6 @@ const COLUMNS = [
   { name: 'known_condition_name' },
   { name: 'condition_id' },
   { name: 'condition_inheritance' },
-  { name: 'phenotype_contribution' },
-  { name: 'partial_contribution_explained' },
   { name: 'additional_family_members_with_variant' },
   { name: 'method_of_discovery' },
   { name: 'Submitted to MME', format: ({ MME }) => (MME ? 'Yes' : 'No') },
diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js
index 4d107cc89a..78a34356c5 100644
--- a/ui/shared/utils/constants.js
+++ b/ui/shared/utils/constants.js
@@ -1867,6 +1867,8 @@ export const VARIANT_METADATA_COLUMNS = [
   { name: 'sv_type', fieldName: 'svType', format: ({ svType }) => SVTYPE_LOOKUP[svType] || svType },
   { name: 'variant_inheritance' },
   { name: 'gene_known_for_phenotype' },
+  { name: 'phenotype_contribution' },
+  { name: 'partial_contribution_explained' },
   { name: 'notes' },
 ]
 

From 5fb0015cbb471e64a733d8e6b7cd2df61842763c Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 12:32:57 -0400
Subject: [PATCH 07/47] add tests

---
 seqr/fixtures/report_variants.json        | 26 +++++++++++++++++++++++
 seqr/views/apis/report_api_tests.py       |  9 +++++---
 seqr/views/apis/summary_data_api_tests.py | 15 +++++++++----
 3 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/seqr/fixtures/report_variants.json b/seqr/fixtures/report_variants.json
index bae02ef233..cb096d1e70 100644
--- a/seqr/fixtures/report_variants.json
+++ b/seqr/fixtures/report_variants.json
@@ -139,5 +139,31 @@
         "variant_tag_type": 4,
         "search_hash": null
     }
+},
+{
+    "model": "seqr.variantfunctionaldata",
+    "pk": 29,
+    "fields": {
+        "guid": "VFD0000029_1248367227_r0390_10",
+        "created_date": "2018-05-24T15:34:01.353Z",
+        "created_by": null,
+        "last_modified_date": "2024-05-24T15:34:01.365Z",
+        "saved_variants": [6],
+        "functional_data_tag": "Partial Phenotype Contribution",
+        "metadata": "HP:0000501, HP:0000365"
+    }
+},
+{
+    "model": "seqr.variantfunctionaldata",
+    "pk": 30,
+    "fields": {
+        "guid": "VFD0000030_1248367227_r0390_10",
+        "created_date": "2018-05-24T15:34:01.353Z",
+        "created_by": null,
+        "last_modified_date": "2024-05-24T15:34:01.365Z",
+        "saved_variants": [2],
+        "functional_data_tag": "Partial Phenotype Contribution",
+        "metadata": "Uncertain"
+    }
 }
 ]
\ No newline at end of file
diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py
index 6e3ef0ed8d..c99b6b84c1 100644
--- a/seqr/views/apis/report_api_tests.py
+++ b/seqr/views/apis/report_api_tests.py
@@ -508,6 +508,7 @@
     'method_of_discovery': 'SR-ES',
     'notes': None,
     'phenotype_contribution': 'Full',
+    'partial_contribution_explained': '',
     'phenotype_description': None,
     'pmid_id': None,
     'seqr_chosen_consequence': None,
@@ -612,12 +613,12 @@
     ], [
         'Broad_HG00731_1_248367227', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh37', '1',
         '248367227', 'TC', 'T', '', 'RP11', '', '', '', 'Homozygous', '', 'paternal', '', '', 'Known', '',
-        'MONDO:0044970', '', 'Full', '', 'Broad_HG00732', 'SR-ES', '', '', '', '', '', '', '',
+        'MONDO:0044970', '', 'Uncertain', '', 'Broad_HG00732', 'SR-ES', '', '', '', '', '', '', '',
     ], [
         'Broad_NA20889_1_248367227', 'Broad_NA20889', '', 'SNV/INDEL', 'GRCh37', '1', '248367227', 'TC', 'T',
         '', 'OR4G11P', 'ENST00000505820', 'c.3955G>A', 'c.1586-17C>G', 'Heterozygous', '', 'unknown',
         'Broad_NA20889_1_249045487', '', 'Candidate', 'IRIDA syndrome', 'MONDO:0008788', 'Autosomal dominant',
-        'Full', '', '', 'SR-ES', '', '', '', '', '', '', '',
+        'Partial', 'HP:0000501|HP:0000365', '', 'SR-ES', '', '', '', '', '', '', '',
     ], [
         'Broad_NA20889_1_249045487', 'Broad_NA20889', '', 'SNV/INDEL', 'GRCh37', '1', '249045487', 'A', 'G', '',
         'OR4G11P', '', '', '', 'Heterozygous', '', 'unknown', 'Broad_NA20889_1_248367227', '', 'Candidate',
@@ -1219,7 +1220,7 @@ def test_variant_metadata(self):
             'genetic_findings_id': 'HG00731_1_248367227',
             'known_condition_name': 'mitochondrial disease',
             'participant_id': 'HG00731',
-            'phenotype_contribution': 'Full',
+            'phenotype_contribution': 'Uncertain',
             'phenotype_description': 'microcephaly; seizures',
             'pos': 248367227,
             'projectGuid': 'R0001_1kg',
@@ -1290,6 +1291,8 @@ def test_variant_metadata(self):
             'hgvsp': 'c.1586-17C>G',
             'participant_id': 'NA20889',
             'pos': 248367227,
+            'partial_contribution_explained': 'HP:0000501|HP:0000365',
+            'phenotype_contribution': 'Partial',
             'projectGuid': 'R0003_test',
             'internal_project_id': 'Test Reprocessed Project',
             'ref': 'TC',
diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py
index 2c8a9a5354..96a6f5d580 100644
--- a/seqr/views/apis/summary_data_api_tests.py
+++ b/seqr/views/apis/summary_data_api_tests.py
@@ -107,6 +107,10 @@
     'notes-2': None,
     'tags-1': ['Tier 1 - Novel gene and phenotype'],
     'tags-2': ['Tier 1 - Novel gene and phenotype'],
+    'phenotype_contribution-1': 'Partial',
+    'phenotype_contribution-2': 'Full',
+    'partial_contribution_explained-1': 'HP:0000501|HP:0000365',
+    'partial_contribution_explained-2': '',
     'condition_id': 'OMIM:616126',
     'condition_inheritance': 'Autosomal recessive',
     'known_condition_name': 'Immunodeficiency 38',
@@ -153,6 +157,8 @@
     'chrom-1': '1',
     'gene_known_for_phenotype-1': 'Candidate',
     'tags-1': ['Tier 1 - Novel gene and phenotype'],
+    'phenotype_contribution-1': 'Full',
+    'partial_contribution_explained-1': '',
     'pos-1': 248367227,
     'end-1': None,
     'ref-1': 'TC',
@@ -352,7 +358,8 @@ def test_saved_variants_page(self):
         response = self.client.get('{}?gene=ENSG00000135953'.format(all_tag_url))
         self.assertEqual(response.status_code, 200)
         expected_variant_guids.add('SV0000002_1248367227_r0390_100')
-        self.assertSetEqual(set(response.json()['savedVariantsByGuid'].keys()), expected_variant_guids)
+        report_variants = {'SV0027168_191912632_r0384_rare', 'SV0027167_191912633_r0384_rare', 'SV0027166_191912634_r0384_rare'}
+        self.assertSetEqual(set(response.json()['savedVariantsByGuid'].keys()), {*report_variants, *expected_variant_guids})
 
         multi_tag_url = reverse(saved_variants_page, args=['Review;Tier 1 - Novel gene and phenotype'])
         response = self.client.get('{}?gene=ENSG00000135953'.format(multi_tag_url))
@@ -369,7 +376,7 @@ def test_saved_variants_page(self):
         self.assertEqual(response.status_code, 200)
         self.assertSetEqual(set(response.json()['savedVariantsByGuid'].keys()), {
             'SV0000001_2103343353_r0390_100', 'SV0000002_1248367227_r0390_100', 'SV0000007_prefix_19107_DEL_r00',
-            'SV0000006_1248367227_r0003_tes',
+            'SV0000006_1248367227_r0003_tes', *report_variants,
         })
 
         multi_discovery_tag_url = reverse(saved_variants_page, args=['CMG Discovery Tags;Review'])
@@ -707,7 +714,7 @@ def test_sample_metadata_export(self, mock_google_authenticated):
 
 # Tests for AnVIL access disabled
 class LocalSummaryDataAPITest(AuthenticationTestCase, SummaryDataAPITest):
-    fixtures = ['users', '1kg_project', 'reference_data']
+    fixtures = ['users', '1kg_project', 'reference_data', 'report_variants']
     NUM_MANAGER_SUBMISSIONS = 4
     ADDITIONAL_SAMPLES = ['NA21234', 'NA21987']
 
@@ -723,7 +730,7 @@ def assert_has_expected_calls(self, users, skip_group_call_idxs=None):
 
 # Test for permissions from AnVIL only
 class AnvilSummaryDataAPITest(AnvilAuthenticationTestCase, SummaryDataAPITest):
-    fixtures = ['users', 'social_auth', '1kg_project', 'reference_data']
+    fixtures = ['users', 'social_auth', '1kg_project', 'reference_data', 'report_variants']
     NUM_MANAGER_SUBMISSIONS = 4
     ADDITIONAL_SAMPLES = []
 

From d9c53dabb4c23807fad51f40e9e660821f2abda1 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 12:36:32 -0400
Subject: [PATCH 08/47] bump changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 64626f8078..d52b04bd41 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,7 @@
 # _seqr_ Changes
 
 ## dev
+* Add "Partial Phenotype Contribution" functional tag (REQUIRES DB MIGRATION)
 
 ## 5/24/24
 * Adds external_data to Family model (REQUIRES DB MIGRATION)

From e03b6bbe57d5284cf228c927c62c191d8fbe3685 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 12:41:08 -0400
Subject: [PATCH 09/47] updat eui tests

---
 .../components/IndividualMetadata.test.js           | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/ui/pages/SummaryData/components/IndividualMetadata.test.js b/ui/pages/SummaryData/components/IndividualMetadata.test.js
index 625bdf05e8..f2e4a34c7c 100644
--- a/ui/pages/SummaryData/components/IndividualMetadata.test.js
+++ b/ui/pages/SummaryData/components/IndividualMetadata.test.js
@@ -62,6 +62,10 @@ const DATA = [
     participant_id: 'NA20889',
     individual_guid: 'I000017_na20889',
     proband_relationship: 'Self',
+    'phenotype_contribution-1': 'Partial',
+    'phenotype_contribution-2': 'Full',
+    'partial_contribution_explained-1': 'HP:0000501|HP:0000365',
+    'partial_contribution_explained-2': '',
   },
 ]
 
@@ -77,17 +81,18 @@ test('IndividualMetadata render and export', () => {
     'filter_flags', 'consanguinity', 'family_history', 'genetic_findings_id-1', 'variant_reference_assembly-1',
     'chrom-1', 'pos-1', 'ref-1', 'alt-1', 'gene_of_interest-1', 'gene_id-1', 'seqr_chosen_consequence-1', 'transcript-1',
     'hgvsc-1', 'hgvsp-1', 'zygosity-1', 'sv_name-1', 'sv_type-1', 'variant_inheritance-1', 'gene_known_for_phenotype-1',
-    'notes-1', 'genetic_findings_id-2', 'variant_reference_assembly-2', 'chrom-2', 'pos-2',
+    'phenotype_contribution-1', 'partial_contribution_explained-1', 'notes-1', 'genetic_findings_id-2', 'variant_reference_assembly-2', 'chrom-2', 'pos-2',
     'ref-2', 'alt-2', 'gene_of_interest-2', 'gene_id-2', 'seqr_chosen_consequence-2', 'transcript-2', 'hgvsc-2', 'hgvsp-2',
-    'zygosity-2', 'sv_name-2', 'sv_type-2', 'variant_inheritance-2', 'gene_known_for_phenotype-2', 'notes-2'])
+    'zygosity-2', 'sv_name-2', 'sv_type-2', 'variant_inheritance-2', 'gene_known_for_phenotype-2',
+    'phenotype_contribution-2', 'partial_contribution_explained-2', 'notes-2'])
   expect(exportConfig.processRow(DATA[0])).toEqual([
     'Test Reprocessed Project', 'R0003_test', '12', 'F000012_12', 'NA20889', 'I000017_na20889', null, '', '', '', '',
     'Self', 'Female', 'Ashkenazi Jewish', undefined, undefined, null, 'Affected',
     'HP:0011675 (Arrhythmia)|HP:0001509 ()', '', null, undefined, 'Waiting for data', 'Tier 1', 'Y', 'WES', '2017-02-05', '',
     undefined, 'Yes', 'NA20889_1_248367227', undefined, '1', 248367227, 'TC', 'T', 'OR4G11P', 'ENSG00000240361',
     'intron_variant', 'ENST00000505820', 'c.3955G>A', 'c.1586-17C>G', 'Heterozygous', undefined, undefined,
-    'unknown', 'Candidate', undefined, 'NA20889_1_249045487', undefined, '12', '49045487', undefined,
+    'unknown', 'Candidate', 'Partial', 'HP:0000501|HP:0000365', undefined, 'NA20889_1_249045487', undefined, '12', '49045487', undefined,
     undefined, undefined, undefined, undefined,
     undefined, undefined, undefined, 'Heterozygous', 'DEL:chr12:49045487-49045898', 'Deletion',
-    'unknown', 'Candidate', undefined])
+    'unknown', 'Candidate', 'Full', '', undefined])
 })

From fdcdb9b4812b38530279022e03e5347349535c08 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 13:21:06 -0400
Subject: [PATCH 10/47] clean up extra participant fields

---
 seqr/views/apis/report_api.py            | 18 ++++++++++++------
 seqr/views/utils/anvil_metadata_utils.py | 23 +++++++++++++++--------
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py
index da586f4cdb..86db286bfb 100644
--- a/seqr/views/apis/report_api.py
+++ b/seqr/views/apis/report_api.py
@@ -408,10 +408,11 @@ def _add_row(row, family_id, row_type):
     for participant in participant_rows:
         phenotype_rows += _parse_participant_phenotype_rows(participant)
 
-        if not participant[PARTICIPANT_ID_FIELD]:
+        airtable_participant_id = participant.pop(PARTICIPANT_ID_FIELD)
+        if not airtable_participant_id:
             continue
 
-        airtable_metadata = airtable_metadata_by_participant.get(participant[PARTICIPANT_ID_FIELD]) or {}
+        airtable_metadata = airtable_metadata_by_participant.get(airtable_participant_id) or {}
         data_types = grouped_data_type_individuals[participant['participant_id']]
         _parse_participant_airtable_rows(
             participant, airtable_metadata, data_types, experiment_ids_by_participant,
@@ -467,11 +468,11 @@ def _get_individual_data_types(projects):
 def _parse_participant_phenotype_rows(participant):
     base_phenotype_row = {'participant_id': participant['participant_id'], 'presence': 'Present', 'ontology': 'HPO'}
     present_rows = [
-        dict(**base_phenotype_row, **_get_phenotype_row(feature)) for feature in participant['features'] or []
+        dict(**base_phenotype_row, **_get_phenotype_row(feature)) for feature in participant.pop('features') or []
     ]
     base_phenotype_row['presence'] = 'Absent'
     return present_rows + [
-        dict(**base_phenotype_row, **_get_phenotype_row(feature)) for feature in participant['absent_features'] or []
+        dict(**base_phenotype_row, **_get_phenotype_row(feature)) for feature in participant.pop('absent_features') or []
     ]
 
 
@@ -492,8 +493,10 @@ def _parse_participant_airtable_rows(participant, airtable_metadata, data_types,
             {'participant_id': participant['participant_id'], **_get_experiment_lookup_row(is_rna, row)}
         )
 
-    if participant['analyte_id'] and not has_analyte:
-        analyte_rows.append(participant)
+    # TODO constant
+    analyte_row = {k: participant.pop(k) for k in ['analyte_id', 'analyte_type', 'primary_biosample', 'tissue_affected_status']}
+    if analyte_row['analyte_id'] and not has_analyte:
+        analyte_rows.append(analyte_row)
 
 
 def _get_gregor_airtable_data(participants, user):
@@ -647,12 +650,15 @@ def _populate_gregor_files(file_data):
 
         files.append((file_name, list(table_config.keys()), data))
 
+        expected_columns = {k for d in data for k, v in d.items() if v}  # TODO
         extra_columns = expected_columns.difference(table_config.keys())
         if extra_columns:
             col_summary = ', '.join(sorted(extra_columns))
             warnings.insert(
                 0, f'The following columns are computed for the "{file_name}" table but are missing from the data model: {col_summary}',
             )
+            errors.append(warnings[0]) # TODO
+            continue
         invalid_data_type_columns = {
             col: config['data_type'] for col, config in table_config.items()
             if config.get('data_type') and config['data_type'] not in DATA_TYPE_VALIDATORS
diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py
index 3b4638b9ea..0a31d270a5 100644
--- a/seqr/views/utils/anvil_metadata_utils.py
+++ b/seqr/views/utils/anvil_metadata_utils.py
@@ -136,7 +136,8 @@ def _get_family_metadata(family_filter, family_fields, include_metadata, include
     family_data_by_id = {}
     for f in family_data:
         family_id = f.pop('id')
-        solve_status = ANALYSIS_SOLVE_STATUS_LOOKUP.get(f['analysisStatus'], Individual.UNSOLVED)
+        analysis_status = f['analysisStatus'] if include_metadata else f.pop('analysisStatus')
+        solve_status = ANALYSIS_SOLVE_STATUS_LOOKUP.get(analysis_status, Individual.UNSOLVED)
         f.update({
             'solve_status': Individual.SOLVE_STATUS_LOOKUP[solve_status],
             **{k: v['format'](f) for k, v in (family_fields or {}).items()},
@@ -230,7 +231,7 @@ def parse_anvil_metadata(
 
             subject_row = _get_subject_row(
                 individual, has_dbgap_submission, airtable_metadata, individual_ids_map, get_additional_individual_fields,
-                format_id,
+                format_id, include_metadata,
             )
             if individual.id in matchmaker_individuals:
                 subject_row['MME'] = matchmaker_individuals[individual.id] if mme_values else 'Yes'
@@ -400,7 +401,7 @@ def _get_transcript_field(field, config, transcript):
     return value
 
 
-def _get_subject_row(individual, has_dbgap_submission, airtable_metadata, individual_ids_map, get_additional_individual_fields, format_id):
+def _get_subject_row(individual, has_dbgap_submission, airtable_metadata, individual_ids_map, get_additional_individual_fields, format_id, include_metadata):
     paternal_ids = individual_ids_map.get(individual.father_id, ('', ''))
     maternal_ids = individual_ids_map.get(individual.mother_id, ('', ''))
     subject_row = {
@@ -414,19 +415,25 @@ def _get_subject_row(individual, has_dbgap_submission, airtable_metadata, indivi
         'absent_features': individual.absent_features,
         'proband_relationship': Individual.RELATIONSHIP_LOOKUP.get(individual.proband_relationship, ''),
         'paternal_id': format_id(paternal_ids[0]),
-        'paternal_guid': paternal_ids[1],
         'maternal_id': format_id(maternal_ids[0]),
-        'maternal_guid': maternal_ids[1],
     }
+    if include_metadata:
+        subject_row.update({
+            'paternal_guid': paternal_ids[1],
+            'maternal_guid': maternal_ids[1],
+        })
     if airtable_metadata is not None:
         sequencing = airtable_metadata.get('SequencingProduct') or set()
         subject_row.update({
-            'dbgap_submission': 'Yes' if has_dbgap_submission else 'No',
             'dbgap_study_id': airtable_metadata.get('dbgap_study_id', '') if has_dbgap_submission else '',
             'dbgap_subject_id': airtable_metadata.get('dbgap_subject_id', '') if has_dbgap_submission else '',
-            'multiple_datasets': 'Yes' if len(sequencing) > 1 or (
-            len(sequencing) == 1 and list(sequencing)[0] in MULTIPLE_DATASET_PRODUCTS) else 'No',
         })
+        if include_metadata:
+            subject_row.update({
+                'dbgap_submission': 'Yes' if has_dbgap_submission else 'No',
+                'multiple_datasets': 'Yes' if len(sequencing) > 1 or (
+                        len(sequencing) == 1 and list(sequencing)[0] in MULTIPLE_DATASET_PRODUCTS) else 'No',
+            })
     if get_additional_individual_fields:
         subject_row.update(get_additional_individual_fields(individual, airtable_metadata))
     return subject_row

From 438a9ff7342d98a32b7f0db8e501d66a98081ec6 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 13:34:43 -0400
Subject: [PATCH 11/47] clean up extra family fields

---
 seqr/views/utils/anvil_metadata_utils.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py
index 0a31d270a5..321df328cc 100644
--- a/seqr/views/utils/anvil_metadata_utils.py
+++ b/seqr/views/utils/anvil_metadata_utils.py
@@ -202,10 +202,13 @@ def parse_anvil_metadata(
             family_subject_row, saved_variants, *condition_map, set_conditions_for_variants=proband_only_variants,
         )
 
-        affected_individuals = [individual for individual in family_individuals if individual.affected == Individual.AFFECTED_STATUS_AFFECTED]
+        affected_individuals = [
+            individual for individual in family_individuals if individual.affected == Individual.AFFECTED_STATUS_AFFECTED
+        ] if include_metadata else []
 
+        subject_family_row = {k: family_subject_row.pop(k) for k in ['family_id', 'internal_project_id', 'phenotype_description', 'pmid_id', 'solve_status']}  # TODO constant
         family_row = {
-            'family_id': family_subject_row['family_id'],
+            'family_id': subject_family_row['family_id'],
             'consanguinity': next((
                 'Present' if individual.consanguinity else 'None suspected'
                 for individual in family_individuals if individual.consanguinity is not None
@@ -235,7 +238,7 @@ def parse_anvil_metadata(
             )
             if individual.id in matchmaker_individuals:
                 subject_row['MME'] = matchmaker_individuals[individual.id] if mme_values else 'Yes'
-            subject_row.update(family_subject_row)
+            subject_row.update(subject_family_row)
             if individual.solve_status:
                 subject_row['solve_status'] = Individual.SOLVE_STATUS_LOOKUP[individual.solve_status]
             elif individual.affected != Individual.AFFECTED_STATUS_AFFECTED:

From d6dccefb0e3a07a3e9b60d92ccb06bef8262deb4 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 14:13:25 -0400
Subject: [PATCH 12/47] clean up airtable rows

---
 seqr/views/apis/report_api.py | 50 ++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 15 deletions(-)

diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py
index 86db286bfb..352948a20c 100644
--- a/seqr/views/apis/report_api.py
+++ b/seqr/views/apis/report_api.py
@@ -207,9 +207,10 @@ def _add_row(row, family_id, row_type):
     'targeted_region_bed_file', 'date_data_generation', 'target_insert_size', 'sequencing_platform',
 ]
 EXPERIMENT_COLUMNS = {'analyte_id', 'experiment_sample_id'}
-EXPERIMENT_TABLE_COLUMNS = {'experiment_dna_short_read_id', 'sequencing_event_details'}
+EXPERIMENT_TABLE_COLUMNS = {'experiment_dna_short_read_id'}
 EXPERIMENT_TABLE_COLUMNS.update(EXPERIMENT_COLUMNS)
 EXPERIMENT_TABLE_COLUMNS.update(EXPERIMENT_TABLE_AIRTABLE_FIELDS)
+EXPERIMENT_RNA_TABLE = 'experiment_rna_short_read'
 EXPERIMENT_RNA_TABLE_AIRTABLE_FIELDS = [
     'library_prep_type', 'single_or_paired_ends', 'within_site_batch_name', 'RIN', 'estimated_library_size',
     'total_reads', 'percent_rRNA', 'percent_mRNA', '5prime3prime_bias',
@@ -219,12 +220,14 @@ def _add_row(row, family_id, row_type):
 EXPERIMENT_RNA_TABLE_COLUMNS.update(EXPERIMENT_RNA_TABLE_AIRTABLE_FIELDS)
 EXPERIMENT_RNA_TABLE_COLUMNS.update([c for c in EXPERIMENT_TABLE_AIRTABLE_FIELDS if not c.startswith('target')])
 EXPERIMENT_LOOKUP_TABLE_COLUMNS = {'experiment_id', 'table_name', 'id_in_table', 'participant_id'}
+READ_TABLE = 'aligned_dna_short_read'
 READ_TABLE_AIRTABLE_FIELDS = [
     'aligned_dna_short_read_file', 'aligned_dna_short_read_index_file', 'md5sum', 'reference_assembly',
     'mean_coverage', 'alignment_software', 'analysis_details',
 ]
 READ_TABLE_COLUMNS = {'aligned_dna_short_read_id', 'experiment_dna_short_read_id'}
 READ_TABLE_COLUMNS.update(READ_TABLE_AIRTABLE_FIELDS)
+READ_RNA_TABLE = 'aligned_rna_short_read'
 READ_RNA_TABLE_AIRTABLE_ID_FIELDS = ['aligned_rna_short_read_file', 'aligned_rna_short_read_index_file']
 READ_RNA_TABLE_AIRTABLE_FIELDS = [
     'gene_annotation', 'alignment_software', 'alignment_log_file', 'percent_uniquely_aligned', 'percent_multimapped', 'percent_unaligned',
@@ -233,12 +236,25 @@ def _add_row(row, family_id, row_type):
 READ_RNA_TABLE_COLUMNS.update(READ_RNA_TABLE_AIRTABLE_ID_FIELDS)
 READ_RNA_TABLE_COLUMNS.update(READ_RNA_TABLE_AIRTABLE_FIELDS)
 READ_RNA_TABLE_COLUMNS.update(READ_TABLE_AIRTABLE_FIELDS[2:-1])
+READ_SET_TABLE = 'aligned_dna_short_read_set'
 READ_SET_TABLE_COLUMNS = {'aligned_dna_short_read_set_id', 'aligned_dna_short_read_id'}
+CALLED_TABLE = 'called_variants_dna_short_read'
 CALLED_VARIANT_FILE_COLUMN = 'called_variants_dna_file'
 CALLED_TABLE_COLUMNS = {
     'called_variants_dna_short_read_id', 'aligned_dna_short_read_set_id', CALLED_VARIANT_FILE_COLUMN, 'md5sum',
     'caller_software', 'variant_types', 'analysis_details',
 }
+AIRTABLE_TABLE_COLUMNS = {
+    EXPERIMENT_TABLE: EXPERIMENT_TABLE_COLUMNS,
+    READ_TABLE: READ_TABLE_COLUMNS,
+    READ_SET_TABLE: READ_SET_TABLE_COLUMNS,
+    CALLED_TABLE: CALLED_TABLE_COLUMNS,
+    EXPERIMENT_RNA_TABLE: EXPERIMENT_RNA_TABLE_COLUMNS,
+    READ_RNA_TABLE: READ_RNA_TABLE_COLUMNS,
+}
+RNA_AIRTABLE_TABLES = {EXPERIMENT_RNA_TABLE, READ_RNA_TABLE}
+DNA_AIRTABLE_TABLES = set(AIRTABLE_TABLE_COLUMNS.keys()) - RNA_AIRTABLE_TABLES
+
 GENETIC_FINDINGS_TABLE_COLUMNS = {
     'chrom', 'pos', 'ref', 'alt', 'variant_type', 'variant_reference_assembly', GENE_COLUMN, 'transcript', 'hgvsc', 'hgvsp',
     'hgvs', 'sv_type', 'chrom_end', 'pos_end', 'copy_number', *FINDING_METADATA_COLUMNS[:4], 'phenotype_contribution', 'partial_contribution_explained',
@@ -401,8 +417,7 @@ def _add_row(row, family_id, row_type):
 
     phenotype_rows = []
     analyte_rows = []
-    airtable_rows = []
-    airtable_rna_rows = []
+    airtable_rows = {table: [] for table in AIRTABLE_TABLE_COLUMNS.keys()}
     experiment_lookup_rows = []
     experiment_ids_by_participant = {}
     for participant in participant_rows:
@@ -416,7 +431,7 @@ def _add_row(row, family_id, row_type):
         data_types = grouped_data_type_individuals[participant['participant_id']]
         _parse_participant_airtable_rows(
             participant, airtable_metadata, data_types, experiment_ids_by_participant,
-            analyte_rows, airtable_rows, airtable_rna_rows, experiment_lookup_rows,
+            analyte_rows, airtable_rows, experiment_lookup_rows,
         )
 
     # Add experiment IDs
@@ -428,14 +443,7 @@ def _add_row(row, family_id, row_type):
         ('family', GREGOR_FAMILY_TABLE_COLUMNS, list(family_map.values())),
         (PHENOTYPE_TABLE, PHENOTYPE_TABLE_COLUMNS, phenotype_rows),
         ('analyte', ANALYTE_TABLE_COLUMNS, analyte_rows),
-        (EXPERIMENT_TABLE, EXPERIMENT_TABLE_COLUMNS, airtable_rows),
-        ('aligned_dna_short_read', READ_TABLE_COLUMNS, airtable_rows),
-        ('aligned_dna_short_read_set', READ_SET_TABLE_COLUMNS, airtable_rows),
-        ('called_variants_dna_short_read', CALLED_TABLE_COLUMNS, [
-            row for row in airtable_rows if row.get(CALLED_VARIANT_FILE_COLUMN)
-        ]),
-        ('experiment_rna_short_read', EXPERIMENT_RNA_TABLE_COLUMNS, airtable_rna_rows),
-        ('aligned_rna_short_read', READ_RNA_TABLE_COLUMNS, airtable_rna_rows),
+        *[(table, AIRTABLE_TABLE_COLUMNS[table], rows) for table, rows in airtable_rows.items()],
         (EXPERIMENT_LOOKUP_TABLE, EXPERIMENT_LOOKUP_TABLE_COLUMNS, experiment_lookup_rows),
         (FINDINGS_TABLE, GENETIC_FINDINGS_TABLE_COLUMNS, genetic_findings_rows),
     ]
@@ -477,7 +485,7 @@ def _parse_participant_phenotype_rows(participant):
 
 
 def _parse_participant_airtable_rows(participant, airtable_metadata, data_types, experiment_ids_by_participant,
-                                     analyte_rows, airtable_rows, airtable_rna_rows, experiment_lookup_rows):
+                                     analyte_rows, airtable_rows, experiment_lookup_rows):
     has_analyte = False
     # airtable data
     for data_type in data_types:
@@ -488,7 +496,16 @@ def _parse_participant_airtable_rows(participant, airtable_metadata, data_types,
         analyte_rows.append({**participant, **row})
         if not is_rna:
             experiment_ids_by_participant[participant['participant_id']] = row['experiment_dna_short_read_id']
-        (airtable_rna_rows if is_rna else airtable_rows).append(row)
+        for table in (RNA_AIRTABLE_TABLES if is_rna else DNA_AIRTABLE_TABLES):
+            if table == CALLED_TABLE and not row.get(CALLED_VARIANT_FILE_COLUMN):
+                continue
+            try:
+                airtable_rows[table].append({k: row[k] for k in AIRTABLE_TABLE_COLUMNS[table] if k in row})
+            except KeyError as e:
+                # TODO
+                import pdb; pdb.set_trace()
+                raise e
+
         experiment_lookup_rows.append(
             {'participant_id': participant['participant_id'], **_get_experiment_lookup_row(is_rna, row)}
         )
@@ -798,7 +815,10 @@ def _validate_column_data(column, file_name, data, column_validator, warnings, e
 
 
 def _get_row_id(row):
-    id_col = next(col for col in ['genetic_findings_id', 'participant_id', 'experiment_sample_id', 'family_id'] if col in row)
+    id_col = next(col for col in [
+        'genetic_findings_id', 'participant_id', 'experiment_sample_id', 'analyte_id',
+        'aligned_dna_short_read_id', 'aligned_rna_short_read_id', 'family_id',
+    ] if col in row)
     return row[id_col]
 
 

From c27890849d4180642cc97d302532fab61c984374 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 14:21:55 -0400
Subject: [PATCH 13/47] clean up analyte table

---
 seqr/views/apis/report_api.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py
index 352948a20c..8549d5cf4b 100644
--- a/seqr/views/apis/report_api.py
+++ b/seqr/views/apis/report_api.py
@@ -487,31 +487,26 @@ def _parse_participant_phenotype_rows(participant):
 def _parse_participant_airtable_rows(participant, airtable_metadata, data_types, experiment_ids_by_participant,
                                      analyte_rows, airtable_rows, experiment_lookup_rows):
     has_analyte = False
+    analyte_row = {k: participant.pop(k) for k in ANALYTE_TABLE_COLUMNS}
+    participant['participant_id'] = analyte_row['participant_id']
     # airtable data
     for data_type in data_types:
         if data_type not in airtable_metadata:
             continue
         is_rna, row = _get_airtable_row(data_type, airtable_metadata)
         has_analyte = True
-        analyte_rows.append({**participant, **row})
+        analyte_rows.append({**analyte_row, **{k: row[k] for k in ANALYTE_TABLE_COLUMNS if k in row}})
         if not is_rna:
             experiment_ids_by_participant[participant['participant_id']] = row['experiment_dna_short_read_id']
         for table in (RNA_AIRTABLE_TABLES if is_rna else DNA_AIRTABLE_TABLES):
             if table == CALLED_TABLE and not row.get(CALLED_VARIANT_FILE_COLUMN):
                 continue
-            try:
-                airtable_rows[table].append({k: row[k] for k in AIRTABLE_TABLE_COLUMNS[table] if k in row})
-            except KeyError as e:
-                # TODO
-                import pdb; pdb.set_trace()
-                raise e
+            airtable_rows[table].append({k: row[k] for k in AIRTABLE_TABLE_COLUMNS[table] if k in row})
 
         experiment_lookup_rows.append(
             {'participant_id': participant['participant_id'], **_get_experiment_lookup_row(is_rna, row)}
         )
 
-    # TODO constant
-    analyte_row = {k: participant.pop(k) for k in ['analyte_id', 'analyte_type', 'primary_biosample', 'tissue_affected_status']}
     if analyte_row['analyte_id'] and not has_analyte:
         analyte_rows.append(analyte_row)
 

From 9947b335ccf559628f220d18af942ad0db8802c9 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 14:33:15 -0400
Subject: [PATCH 14/47] clean up extra findings fields

---
 seqr/views/apis/report_api.py            |  1 -
 seqr/views/apis/summary_data_api.py      |  1 -
 seqr/views/utils/anvil_metadata_utils.py | 23 +++++++++++++++--------
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py
index 8549d5cf4b..b072e7b8b4 100644
--- a/seqr/views/apis/report_api.py
+++ b/seqr/views/apis/report_api.py
@@ -918,7 +918,6 @@ def _add_row(row, family_id, row_type):
         elif row_type == DISCOVERY_ROW_TYPE:
             family = families_by_id[family_id]
             for variant in row:
-                del variant['gene_ids']
                 variant_rows.append({
                     'MME': variant.pop('variantId') in participant_mme[variant['participant_id']].get('variant_ids', []),
                     'phenotype_contribution': 'Full',
diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py
index 4ebcc4db3e..811052e52a 100644
--- a/seqr/views/apis/summary_data_api.py
+++ b/seqr/views/apis/summary_data_api.py
@@ -285,7 +285,6 @@ def _add_row(row, family_id, row_type):
             family_rows_by_id[family_id] = row
         elif row_type == DISCOVERY_ROW_TYPE:
             for i, discovery_row in enumerate(row):
-                del discovery_row['gene_ids']
                 participant_id = discovery_row.pop('participant_id')
                 parsed_row = {'{}-{}'.format(k, i + 1): v for k, v in discovery_row.items()}
                 parsed_row['num_saved_variants'] = len(row)
diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py
index 321df328cc..7a6e184c08 100644
--- a/seqr/views/utils/anvil_metadata_utils.py
+++ b/seqr/views/utils/anvil_metadata_utils.py
@@ -182,7 +182,7 @@ def parse_anvil_metadata(
             sample_ids.add(sample.sample_id)
 
     saved_variants_by_family = _get_parsed_saved_discovery_variants_by_family(
-        list(family_data_by_id.keys()), variant_filter=variant_filter, variant_json_fields=variant_json_fields,
+        list(family_data_by_id.keys()), include_metadata, variant_filter=variant_filter, variant_json_fields=variant_json_fields,
     )
 
     condition_map = _get_condition_map(family_data_by_id.values())
@@ -325,7 +325,7 @@ def _post_process_variant_metadata(v, gene_variants, include_parent_mnvs=False):
 
 
 def _get_parsed_saved_discovery_variants_by_family(
-        families: Iterable[Family], variant_filter: dict, variant_json_fields: list[str],
+        families: Iterable[Family], include_metadata: bool, variant_filter: dict, variant_json_fields: list[str],
 ):
     tag_types = VariantTagType.objects.filter(project__isnull=True, category=DISCOVERY_CATEGORY)
 
@@ -353,26 +353,32 @@ def _get_parsed_saved_discovery_variants_by_family(
             phenotype_contribution = 'Uncertain'
             partial_hpo_terms = ''
 
-        variants.append({
+        variant = {
             'chrom': chrom,
             'pos': pos,
             'variant_reference_assembly': GENOME_VERSION_LOOKUP[variant_json['genomeVersion']],
             'gene_id': gene_id,
             'gene_ids': [gene_id] if gene_id else variant_json.get('transcripts', {}).keys(),
-            'seqr_chosen_consequence': main_transcript.get('majorConsequence'),
             'gene_known_for_phenotype': 'Known' if 'Known gene for phenotype' in variant.tags else 'Candidate',
             'phenotype_contribution': phenotype_contribution,
             'partial_contribution_explained': partial_hpo_terms.replace(', ', '|'),
             **{k: _get_transcript_field(k, config, main_transcript) for k, config in TRANSCRIPT_FIELDS.items()},
             **{k: variant_json.get(k) for k in ['genotypes', 'svType', 'svName', 'end'] + (variant_json_fields or [])},
-            **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt', 'tags']},
-        })
+            **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt']},
+        }
+        if include_metadata:
+            variant.update({
+                'seqr_chosen_consequence': main_transcript.get('majorConsequence'),
+                'tags': variant.tags,
+            })
+        variants.append(variant)
 
     genes_by_id = get_genes(gene_ids)
 
     saved_variants_by_family = defaultdict(list)
     for row in variants:
-        row[GENE_COLUMN] = genes_by_id.get(row['gene_id'], {}).get('geneSymbol')
+        gene_id = row['gene_id'] if include_metadata else row.pop('gene_id')
+        row[GENE_COLUMN] = genes_by_id.get(gene_id, {}).get('geneSymbol')
         family_id = row.pop('family_id')
         saved_variants_by_family[family_id].append(row)
 
@@ -586,8 +592,9 @@ def _update_conditions(family_subject_row, variants, omim_conditions, mondo_cond
             c for mim_number in mim_numbers for c in omim_conditions[mim_number][None]
             if c['chrom'] == v['chrom'] and c['start'] <= v['pos'] <= c['end']
         ]
+        gene_ids = v.pop('gene_ids')
         for mim_number in mim_numbers:
-            for gene_id in v['gene_ids']:
+            for gene_id in gene_ids:
                 variant_conditions += omim_conditions[mim_number][gene_id]
 
         if set_conditions_for_variants:

From 7c7f19590efe0e5ecdb3b555c5110decea0b7e75 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 15:03:50 -0400
Subject: [PATCH 15/47] clean up metadata

---
 seqr/views/apis/report_api.py            |  3 +++
 seqr/views/apis/report_api_tests.py      | 13 +------------
 seqr/views/utils/anvil_metadata_utils.py |  6 +++---
 3 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py
index b072e7b8b4..9638710c32 100644
--- a/seqr/views/apis/report_api.py
+++ b/seqr/views/apis/report_api.py
@@ -854,6 +854,9 @@ def _add_row(row, family_id, row_type):
             }
             f.update(known_ids)
             individuals_ids -= set(known_ids.values())
+        individual = proband or next(iter(individuals_by_id.values()), None)
+        if individual:
+            f.update({k: individual[k] for k in ['phenotype_description', 'pmid_id', 'solve_status']})  # TODO constant?
 
         sorted_samples = sorted(individuals_by_id.values(), key=lambda x: x.get('date_data_generation', ''))
         earliest_sample = next((s for s in [proband or {}] + sorted_samples if s.get('date_data_generation')), {})
diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py
index c99b6b84c1..b7cb51a5fd 100644
--- a/seqr/views/apis/report_api_tests.py
+++ b/seqr/views/apis/report_api_tests.py
@@ -509,10 +509,7 @@
     'notes': None,
     'phenotype_contribution': 'Full',
     'partial_contribution_explained': '',
-    'phenotype_description': None,
-    'pmid_id': None,
     'seqr_chosen_consequence': None,
-    'solve_status': 'Unsolved',
     'svName': None,
     'svType': None,
     'sv_name': None,
@@ -1120,11 +1117,10 @@ def test_family_metadata(self):
         test_row = next(r for r in response_json['rows'] if r['familyGuid'] == 'F000012_12')
         self.assertDictEqual(test_row, {
             'projectGuid': 'R0003_test',
-            'internal_project_id': 'Test Reprocessed Project',
             'familyGuid': 'F000012_12',
             'family_id': '12',
             'displayName': '12',
-            'solve_status': 'Unsolved',
+            'solve_status': 'Partially solved',
             'actual_inheritance': 'unknown',
             'condition_id': 'OMIM:616126',
             'condition_inheritance': 'Autosomal recessive',
@@ -1159,7 +1155,6 @@ def test_family_metadata(self):
         test_row = next(r for r in response_json['rows'] if r['familyGuid'] == 'F000003_3')
         self.assertDictEqual(test_row, {
             'projectGuid': 'R0001_1kg',
-            'internal_project_id': '1kg project nåme with uniçøde',
             'familyGuid': 'F000003_3',
             'family_id': '3',
             'displayName': '3',
@@ -1221,10 +1216,8 @@ def test_variant_metadata(self):
             'known_condition_name': 'mitochondrial disease',
             'participant_id': 'HG00731',
             'phenotype_contribution': 'Uncertain',
-            'phenotype_description': 'microcephaly; seizures',
             'pos': 248367227,
             'projectGuid': 'R0001_1kg',
-            'internal_project_id': '1kg project nåme with uniçøde',
             'ref': 'TC',
             'tags': ['Known gene for phenotype'],
             'variant_inheritance': 'paternal',
@@ -1249,10 +1242,8 @@ def test_variant_metadata(self):
             'known_condition_name': 'mitochondrial disease',
             'notes': 'The following variants are part of the multinucleotide variant 19-1912632-GC-TT (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T',
             'participant_id': 'HG00731',
-            'phenotype_description': 'microcephaly; seizures',
             'pos': 1912634,
             'projectGuid': 'R0001_1kg',
-            'internal_project_id': '1kg project nåme with uniçøde',
             'ref': 'C',
             'tags': ['Known gene for phenotype'],
             'transcript': 'ENST00000371839',
@@ -1294,7 +1285,6 @@ def test_variant_metadata(self):
             'partial_contribution_explained': 'HP:0000501|HP:0000365',
             'phenotype_contribution': 'Partial',
             'projectGuid': 'R0003_test',
-            'internal_project_id': 'Test Reprocessed Project',
             'ref': 'TC',
             'seqr_chosen_consequence': 'intron_variant',
             'tags': ['Tier 1 - Novel gene and phenotype'],
@@ -1322,7 +1312,6 @@ def test_variant_metadata(self):
             'participant_id': 'NA20889',
             'pos': 249045487,
             'projectGuid': 'R0003_test',
-            'internal_project_id': 'Test Reprocessed Project',
             'ref': None,
             'svType': 'DEL',
             'sv_name': 'DEL:chr1:249045487-249045898',
diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py
index 7a6e184c08..65ffa09049 100644
--- a/seqr/views/utils/anvil_metadata_utils.py
+++ b/seqr/views/utils/anvil_metadata_utils.py
@@ -353,7 +353,7 @@ def _get_parsed_saved_discovery_variants_by_family(
             phenotype_contribution = 'Uncertain'
             partial_hpo_terms = ''
 
-        variant = {
+        parsed_variant = {
             'chrom': chrom,
             'pos': pos,
             'variant_reference_assembly': GENOME_VERSION_LOOKUP[variant_json['genomeVersion']],
@@ -367,11 +367,11 @@ def _get_parsed_saved_discovery_variants_by_family(
             **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt']},
         }
         if include_metadata:
-            variant.update({
+            parsed_variant.update({
                 'seqr_chosen_consequence': main_transcript.get('majorConsequence'),
                 'tags': variant.tags,
             })
-        variants.append(variant)
+        variants.append(parsed_variant)
 
     genes_by_id = get_genes(gene_ids)
 

From 419fdd73b0e04d4af870d19979f5a647bd39041c Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 15:36:31 -0400
Subject: [PATCH 16/47] fix anvil export

---
 seqr/views/apis/report_api.py            | 20 +++++++++++-----
 seqr/views/apis/summary_data_api.py      |  7 ++++--
 seqr/views/utils/anvil_metadata_utils.py | 29 ++++++++++++------------
 3 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py
index 9638710c32..a00903431d 100644
--- a/seqr/views/apis/report_api.py
+++ b/seqr/views/apis/report_api.py
@@ -12,7 +12,7 @@
 from seqr.utils.middleware import ErrorsWarningsException
 
 from seqr.views.utils.airtable_utils import AirtableSession
-from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, \
+from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, anvil_export_airtable_fields, \
     FAMILY_ROW_TYPE, SUBJECT_ROW_TYPE, SAMPLE_ROW_TYPE, DISCOVERY_ROW_TYPE, PARTICIPANT_TABLE, PHENOTYPE_TABLE, \
     EXPERIMENT_TABLE, EXPERIMENT_LOOKUP_TABLE, FINDINGS_TABLE, FINDING_METADATA_COLUMNS, GENE_COLUMN
 from seqr.views.utils.export_utils import export_multiple_files, write_multiple_files_to_gs
@@ -112,12 +112,13 @@ def anvil_export(request, project_guid):
     project = get_project_and_check_permissions(project_guid, request.user)
 
     parsed_rows = defaultdict(list)
+    family_diseases = {}
 
     def _add_row(row, family_id, row_type):
         if row_type == DISCOVERY_ROW_TYPE:
             missing_gene_rows = [
                 '{chrom}-{pos}-{ref}-{alt}'.format(**discovery_row) for discovery_row in row
-                if not (discovery_row.get('gene_id') or discovery_row.get('svType'))]
+                if not (discovery_row.get(GENE_COLUMN) or discovery_row.get('svType'))]
             if missing_gene_rows:
                 raise ErrorsWarningsException(
                     [f'Discovery variant(s) {", ".join(missing_gene_rows)} in family {family_id} have no associated gene'])
@@ -146,19 +147,23 @@ def _add_row(row, family_id, row_type):
                 row.update({
                     'project_id': row.pop('internal_project_id'),
                     'solve_state': row.pop('solve_status'),
-                    'disease_id': row.get('condition_id', '').replace('|', ';'),
-                    'disease_description': row.get('known_condition_name', '').replace('|', ';'),
                     'hpo_present': '|'.join([feature['id'] for feature in row.get('features') or []]),
                     'hpo_absent': '|'.join([feature['id'] for feature in row.get('absent_features') or []]),
                     'ancestry': row['reported_ethnicity'] or row['reported_race'],
                 })
+            if row_type == FAMILY_ROW_TYPE:
+                family_diseases[row[entity_id_field]] = {
+                    'disease_id': row.get('condition_id', '').replace('|', ';'),
+                    'disease_description': row.get('known_condition_name', '').replace('|', ';'),
+                }
             parsed_rows[row_type].append(row)
 
     max_loaded_date = request.GET.get('loadedBefore') or (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d')
     parse_anvil_metadata(
         [project], request.user, _add_row, max_loaded_date=max_loaded_date, include_discovery_sample_id=True,
-        get_additional_individual_fields=lambda individual, *args: {
+        get_additional_individual_fields=lambda individual, airtable_metadata, has_dbgap_submission, *args: {
             'congenital_status': Individual.ONSET_AGE_LOOKUP[individual.onset_age] if individual.onset_age else 'Unknown',
+            **anvil_export_airtable_fields(airtable_metadata, has_dbgap_submission),
         },
         get_additional_sample_fields=lambda sample, *args: {
             'entity:sample_id': sample.individual.individual_id,
@@ -173,6 +178,9 @@ def _add_row(row, family_id, row_type):
         }},
     )
 
+    for row in parsed_rows[SUBJECT_ROW_TYPE]:
+        row.update(family_diseases[row['family_id']])
+
     return export_multiple_files([
         ['{}_PI_Subject'.format(project.name), SUBJECT_TABLE_COLUMNS, parsed_rows[SUBJECT_ROW_TYPE]],
         ['{}_PI_Sample'.format(project.name), SAMPLE_TABLE_COLUMNS, parsed_rows[SAMPLE_ROW_TYPE]],
@@ -530,7 +538,7 @@ def _get_gregor_airtable_data(participants, user):
     return airtable_metadata_by_participant
 
 
-def _get_participant_row(individual, airtable_sample):
+def _get_participant_row(individual, airtable_sample, *args):
     participant = {
         'gregor_center': 'BROAD',
         'prior_testing': '|'.join([gene.get('gene', gene['comments']) for gene in individual.rejected_genes or []]),
diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py
index 811052e52a..2c8663b76d 100644
--- a/seqr/views/apis/summary_data_api.py
+++ b/seqr/views/apis/summary_data_api.py
@@ -23,7 +23,7 @@
     add_individual_hpo_details, INDIVIDUAL_DISPLAY_NAME_EXPR, AIP_TAG_TYPE
 from seqr.views.utils.permissions_utils import analyst_required, user_is_analyst, get_project_guids_user_can_view, \
     login_and_policies_required, get_project_and_check_permissions, get_internal_projects
-from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, FAMILY_ROW_TYPE, SUBJECT_ROW_TYPE, DISCOVERY_ROW_TYPE
+from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, anvil_export_airtable_fields, FAMILY_ROW_TYPE, SUBJECT_ROW_TYPE, DISCOVERY_ROW_TYPE
 from seqr.views.utils.variant_utils import get_variants_response, bulk_create_tagged_variants, DISCOVERY_CATEGORY
 from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL
 
@@ -312,11 +312,14 @@ def _add_row(row, family_id, row_type):
         projects, request.user, _add_row, max_loaded_date=request.GET.get('loadedBefore'),
         include_metadata=True,
         omit_airtable=not include_airtable,
-        get_additional_individual_fields=lambda individual, airtable_metadata: {
+        get_additional_individual_fields=lambda individual, airtable_metadata, has_dbgap_submission, maternal_ids, paternal_ids: {
             'Collaborator': (airtable_metadata or {}).get('Collaborator'),
             'individual_guid': individual.guid,
             'disorders': individual.disorders,
             'filter_flags': json.dumps(individual.filter_flags) if individual.filter_flags else '',
+            'paternal_guid': paternal_ids[1],
+            'maternal_guid': maternal_ids[1],
+            **anvil_export_airtable_fields(airtable_metadata, has_dbgap_submission),
         },
     )
 
diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py
index 65ffa09049..52dee02b61 100644
--- a/seqr/views/utils/anvil_metadata_utils.py
+++ b/seqr/views/utils/anvil_metadata_utils.py
@@ -234,7 +234,7 @@ def parse_anvil_metadata(
 
             subject_row = _get_subject_row(
                 individual, has_dbgap_submission, airtable_metadata, individual_ids_map, get_additional_individual_fields,
-                format_id, include_metadata,
+                format_id,
             )
             if individual.id in matchmaker_individuals:
                 subject_row['MME'] = matchmaker_individuals[individual.id] if mme_values else 'Yes'
@@ -410,7 +410,7 @@ def _get_transcript_field(field, config, transcript):
     return value
 
 
-def _get_subject_row(individual, has_dbgap_submission, airtable_metadata, individual_ids_map, get_additional_individual_fields, format_id, include_metadata):
+def _get_subject_row(individual, has_dbgap_submission, airtable_metadata, individual_ids_map, get_additional_individual_fields, format_id):
     paternal_ids = individual_ids_map.get(individual.father_id, ('', ''))
     maternal_ids = individual_ids_map.get(individual.mother_id, ('', ''))
     subject_row = {
@@ -426,28 +426,27 @@ def _get_subject_row(individual, has_dbgap_submission, airtable_metadata, indivi
         'paternal_id': format_id(paternal_ids[0]),
         'maternal_id': format_id(maternal_ids[0]),
     }
-    if include_metadata:
-        subject_row.update({
-            'paternal_guid': paternal_ids[1],
-            'maternal_guid': maternal_ids[1],
-        })
     if airtable_metadata is not None:
-        sequencing = airtable_metadata.get('SequencingProduct') or set()
         subject_row.update({
             'dbgap_study_id': airtable_metadata.get('dbgap_study_id', '') if has_dbgap_submission else '',
             'dbgap_subject_id': airtable_metadata.get('dbgap_subject_id', '') if has_dbgap_submission else '',
         })
-        if include_metadata:
-            subject_row.update({
-                'dbgap_submission': 'Yes' if has_dbgap_submission else 'No',
-                'multiple_datasets': 'Yes' if len(sequencing) > 1 or (
-                        len(sequencing) == 1 and list(sequencing)[0] in MULTIPLE_DATASET_PRODUCTS) else 'No',
-            })
     if get_additional_individual_fields:
-        subject_row.update(get_additional_individual_fields(individual, airtable_metadata))
+        subject_row.update(get_additional_individual_fields(individual, airtable_metadata, has_dbgap_submission, maternal_ids, paternal_ids))
     return subject_row
 
 
+def anvil_export_airtable_fields(airtable_metadata, has_dbgap_submission):
+    if airtable_metadata is None:
+        return {}
+    sequencing = airtable_metadata.get('SequencingProduct') or set()
+    return {
+        'dbgap_submission': 'Yes' if has_dbgap_submission else 'No',
+        'multiple_datasets': 'Yes' if len(sequencing) > 1 or (
+                len(sequencing) == 1 and list(sequencing)[0] in MULTIPLE_DATASET_PRODUCTS) else 'No',
+    }
+
+
 def _get_sample_row(sample, participant_id, has_dbgap_submission, airtable_metadata, include_metadata, get_additional_sample_fields=None):
     sample_row = {
         'participant_id': participant_id,

From 7facc258821f2566371801010e7a3a1065b9b1b4 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 15:45:51 -0400
Subject: [PATCH 17/47] remove hardcoded column lists

---
 seqr/views/apis/report_api.py            | 63 ++++++++----------------
 seqr/views/utils/anvil_metadata_utils.py |  4 +-
 2 files changed, 24 insertions(+), 43 deletions(-)

diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py
index a00903431d..2463e6c0f0 100644
--- a/seqr/views/apis/report_api.py
+++ b/seqr/views/apis/report_api.py
@@ -196,17 +196,6 @@ def _add_row(row, family_id, row_type):
 SMID_FIELD = 'SMID'
 PARTICIPANT_ID_FIELD = 'CollaboratorParticipantID'
 COLLABORATOR_SAMPLE_ID_FIELD = 'CollaboratorSampleID'
-PARTICIPANT_TABLE_COLUMNS = {
-    'participant_id', 'internal_project_id', 'gregor_center', 'consent_code', 'recontactable', 'prior_testing',
-    'pmid_id', 'family_id', 'paternal_id', 'maternal_id', 'proband_relationship',
-    'sex', 'reported_race', 'reported_ethnicity', 'ancestry_detail', 'solve_status', 'missing_variant_case',
-    'age_at_last_observation', 'affected_status', 'phenotype_description', 'age_at_enrollment',
-}
-GREGOR_FAMILY_TABLE_COLUMNS = {'family_id', 'consanguinity'}
-PHENOTYPE_TABLE_COLUMNS = {
-    'phenotype_id', 'participant_id', 'term_id', 'presence', 'ontology', 'additional_details', 'onset_age_range',
-    'additional_modifiers',
-}
 ANALYTE_TABLE_COLUMNS = {
     'analyte_id', 'participant_id', 'analyte_type', 'primary_biosample', 'tissue_affected_status',
 }
@@ -227,7 +216,6 @@ def _add_row(row, family_id, row_type):
 EXPERIMENT_RNA_TABLE_COLUMNS.update(EXPERIMENT_COLUMNS)
 EXPERIMENT_RNA_TABLE_COLUMNS.update(EXPERIMENT_RNA_TABLE_AIRTABLE_FIELDS)
 EXPERIMENT_RNA_TABLE_COLUMNS.update([c for c in EXPERIMENT_TABLE_AIRTABLE_FIELDS if not c.startswith('target')])
-EXPERIMENT_LOOKUP_TABLE_COLUMNS = {'experiment_id', 'table_name', 'id_in_table', 'participant_id'}
 READ_TABLE = 'aligned_dna_short_read'
 READ_TABLE_AIRTABLE_FIELDS = [
     'aligned_dna_short_read_file', 'aligned_dna_short_read_index_file', 'md5sum', 'reference_assembly',
@@ -252,24 +240,6 @@ def _add_row(row, family_id, row_type):
     'called_variants_dna_short_read_id', 'aligned_dna_short_read_set_id', CALLED_VARIANT_FILE_COLUMN, 'md5sum',
     'caller_software', 'variant_types', 'analysis_details',
 }
-AIRTABLE_TABLE_COLUMNS = {
-    EXPERIMENT_TABLE: EXPERIMENT_TABLE_COLUMNS,
-    READ_TABLE: READ_TABLE_COLUMNS,
-    READ_SET_TABLE: READ_SET_TABLE_COLUMNS,
-    CALLED_TABLE: CALLED_TABLE_COLUMNS,
-    EXPERIMENT_RNA_TABLE: EXPERIMENT_RNA_TABLE_COLUMNS,
-    READ_RNA_TABLE: READ_RNA_TABLE_COLUMNS,
-}
-RNA_AIRTABLE_TABLES = {EXPERIMENT_RNA_TABLE, READ_RNA_TABLE}
-DNA_AIRTABLE_TABLES = set(AIRTABLE_TABLE_COLUMNS.keys()) - RNA_AIRTABLE_TABLES
-
-GENETIC_FINDINGS_TABLE_COLUMNS = {
-    'chrom', 'pos', 'ref', 'alt', 'variant_type', 'variant_reference_assembly', GENE_COLUMN, 'transcript', 'hgvsc', 'hgvsp',
-    'hgvs', 'sv_type', 'chrom_end', 'pos_end', 'copy_number', *FINDING_METADATA_COLUMNS[:4], 'phenotype_contribution', 'partial_contribution_explained',
-    'genetic_findings_id', 'participant_id', 'experiment_id', 'zygosity', 'allele_balance_or_heteroplasmy_percentage',
-    'variant_inheritance', 'linked_variant', 'additional_family_members_with_variant', 'method_of_discovery',
-    'gene_disease_validity',
-}
 
 RNA_ONLY = EXPERIMENT_RNA_TABLE_AIRTABLE_FIELDS + READ_RNA_TABLE_AIRTABLE_FIELDS + [
     'reference_assembly_uri', 'tissue_affected_status', 'Primary_Biosample']
@@ -296,6 +266,17 @@ def _add_row(row, family_id, row_type):
     data_type_columns = set(DATA_TYPE_AIRTABLE_COLUMNS) - NO_DATA_TYPE_FIELDS - set(DATA_TYPE_OMIT[data_type])
     AIRTABLE_QUERY_COLUMNS.update({f'{field}_{data_type}' for field in data_type_columns})
 
+AIRTABLE_TABLE_COLUMNS = {
+    EXPERIMENT_TABLE: EXPERIMENT_TABLE_COLUMNS,
+    READ_TABLE: READ_TABLE_COLUMNS,
+    READ_SET_TABLE: READ_SET_TABLE_COLUMNS,
+    CALLED_TABLE: CALLED_TABLE_COLUMNS,
+    EXPERIMENT_RNA_TABLE: EXPERIMENT_RNA_TABLE_COLUMNS,
+    READ_RNA_TABLE: READ_RNA_TABLE_COLUMNS,
+}
+RNA_AIRTABLE_TABLES = {EXPERIMENT_RNA_TABLE, READ_RNA_TABLE}
+DNA_AIRTABLE_TABLES = set(AIRTABLE_TABLE_COLUMNS.keys()) - RNA_AIRTABLE_TABLES
+
 WARN_MISSING_TABLE_COLUMNS = {
     PARTICIPANT_TABLE: ['recontactable',  'reported_race', 'affected_status', 'phenotype_description', 'age_at_enrollment'],
     FINDINGS_TABLE: ['known_condition_name'],
@@ -447,13 +428,13 @@ def _add_row(row, family_id, row_type):
         variant['experiment_id'] = experiment_ids_by_participant.get(variant['participant_id'])
 
     file_data = [
-        (PARTICIPANT_TABLE, PARTICIPANT_TABLE_COLUMNS, participant_rows),
-        ('family', GREGOR_FAMILY_TABLE_COLUMNS, list(family_map.values())),
-        (PHENOTYPE_TABLE, PHENOTYPE_TABLE_COLUMNS, phenotype_rows),
-        ('analyte', ANALYTE_TABLE_COLUMNS, analyte_rows),
-        *[(table, AIRTABLE_TABLE_COLUMNS[table], rows) for table, rows in airtable_rows.items()],
-        (EXPERIMENT_LOOKUP_TABLE, EXPERIMENT_LOOKUP_TABLE_COLUMNS, experiment_lookup_rows),
-        (FINDINGS_TABLE, GENETIC_FINDINGS_TABLE_COLUMNS, genetic_findings_rows),
+        (PARTICIPANT_TABLE, participant_rows),
+        ('family', list(family_map.values())),
+        (PHENOTYPE_TABLE, phenotype_rows),
+        ('analyte', analyte_rows),
+        *[(table, rows) for table, rows in airtable_rows.items()],
+        (EXPERIMENT_LOOKUP_TABLE, experiment_lookup_rows),
+        (FINDINGS_TABLE, genetic_findings_rows),
     ]
 
     files, warnings = _populate_gregor_files(file_data)
@@ -662,7 +643,7 @@ def _populate_gregor_files(file_data):
         )
 
     files = []
-    for file_name, expected_columns, data in file_data:
+    for file_name, data in file_data:
         table_config = table_configs.get(file_name)
         if not table_config:
             errors.insert(0, f'No data model found for "{file_name}" table')
@@ -670,15 +651,13 @@ def _populate_gregor_files(file_data):
 
         files.append((file_name, list(table_config.keys()), data))
 
-        expected_columns = {k for d in data for k, v in d.items() if v}  # TODO
+        expected_columns = {k for d in data for k, v in d.items() if v}
         extra_columns = expected_columns.difference(table_config.keys())
         if extra_columns:
             col_summary = ', '.join(sorted(extra_columns))
             warnings.insert(
                 0, f'The following columns are computed for the "{file_name}" table but are missing from the data model: {col_summary}',
             )
-            errors.append(warnings[0]) # TODO
-            continue
         invalid_data_type_columns = {
             col: config['data_type'] for col, config in table_config.items()
             if config.get('data_type') and config['data_type'] not in DATA_TYPE_VALIDATORS
@@ -864,7 +843,7 @@ def _add_row(row, family_id, row_type):
             individuals_ids -= set(known_ids.values())
         individual = proband or next(iter(individuals_by_id.values()), None)
         if individual:
-            f.update({k: individual[k] for k in ['phenotype_description', 'pmid_id', 'solve_status']})  # TODO constant?
+            f.update({k: individual[k] for k in ['phenotype_description', 'pmid_id', 'solve_status']})
 
         sorted_samples = sorted(individuals_by_id.values(), key=lambda x: x.get('date_data_generation', ''))
         earliest_sample = next((s for s in [proband or {}] + sorted_samples if s.get('date_data_generation')), {})
diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py
index 52dee02b61..5e37f7b742 100644
--- a/seqr/views/utils/anvil_metadata_utils.py
+++ b/seqr/views/utils/anvil_metadata_utils.py
@@ -100,6 +100,8 @@
     Sample.SAMPLE_TYPE_WGS: 'SR-GS',
 }
 
+FAMILY_INDIVIDUAL_FIELDS = ['family_id', 'internal_project_id', 'phenotype_description', 'pmid_id', 'solve_status']
+
 
 def _format_hgvs(hgvs, *args):
     return (hgvs or '').split(':')[-1]
@@ -206,7 +208,7 @@ def parse_anvil_metadata(
             individual for individual in family_individuals if individual.affected == Individual.AFFECTED_STATUS_AFFECTED
         ] if include_metadata else []
 
-        subject_family_row = {k: family_subject_row.pop(k) for k in ['family_id', 'internal_project_id', 'phenotype_description', 'pmid_id', 'solve_status']}  # TODO constant
+        subject_family_row = {k: family_subject_row.pop(k) for k in FAMILY_INDIVIDUAL_FIELDS}
         family_row = {
             'family_id': subject_family_row['family_id'],
             'consanguinity': next((

From 576c0c2323be4d99d4227f5aa9056d18ae7a8766 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 16:02:02 -0400
Subject: [PATCH 18/47] correctly include rna airtable field

---
 seqr/views/apis/report_api.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py
index 2463e6c0f0..83e4afb5cb 100644
--- a/seqr/views/apis/report_api.py
+++ b/seqr/views/apis/report_api.py
@@ -226,7 +226,8 @@ def _add_row(row, family_id, row_type):
 READ_RNA_TABLE = 'aligned_rna_short_read'
 READ_RNA_TABLE_AIRTABLE_ID_FIELDS = ['aligned_rna_short_read_file', 'aligned_rna_short_read_index_file']
 READ_RNA_TABLE_AIRTABLE_FIELDS = [
-    'gene_annotation', 'alignment_software', 'alignment_log_file', 'percent_uniquely_aligned', 'percent_multimapped', 'percent_unaligned',
+    'gene_annotation', 'alignment_software', 'alignment_log_file', 'percent_uniquely_aligned', 'percent_multimapped',
+    'percent_unaligned', 'reference_assembly_uri',
 ]
 READ_RNA_TABLE_COLUMNS = {'aligned_rna_short_read_id', 'experiment_rna_short_read_id'}
 READ_RNA_TABLE_COLUMNS.update(READ_RNA_TABLE_AIRTABLE_ID_FIELDS)
@@ -242,7 +243,7 @@ def _add_row(row, family_id, row_type):
 }
 
 RNA_ONLY = EXPERIMENT_RNA_TABLE_AIRTABLE_FIELDS + READ_RNA_TABLE_AIRTABLE_FIELDS + [
-    'reference_assembly_uri', 'tissue_affected_status', 'Primary_Biosample']
+    'tissue_affected_status', 'Primary_Biosample']
 DATA_TYPE_OMIT = {
     'wgs': ['targeted_regions_method'] + RNA_ONLY, 'wes': RNA_ONLY, 'rna': [
         'targeted_regions_method', 'target_insert_size', 'mean_coverage', 'aligned_dna_short_read_file',

From 8c2526146f59c96df2a019cc7ab22f086181f148 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 16:54:51 -0400
Subject: [PATCH 19/47] fix invlaid tables test case

---
 seqr/views/apis/report_api.py            |  6 ++----
 seqr/views/apis/report_api_tests.py      |  8 ++++----
 seqr/views/utils/anvil_metadata_utils.py | 14 +++++++++-----
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py
index 83e4afb5cb..5fe75e60aa 100644
--- a/seqr/views/apis/report_api.py
+++ b/seqr/views/apis/report_api.py
@@ -397,7 +397,7 @@ def _add_row(row, family_id, row_type):
         format_id=_format_gregor_id,
         get_additional_individual_fields=_get_participant_row,
         post_process_variant=_post_process_gregor_variant,
-        variant_filter={'alt__isnull': False},
+        include_svs=False,
         airtable_fields=[SMID_FIELD, PARTICIPANT_ID_FIELD, 'Recontactable'],
         include_mondo=True,
         proband_only_variants=True,
@@ -414,9 +414,6 @@ def _add_row(row, family_id, row_type):
         phenotype_rows += _parse_participant_phenotype_rows(participant)
 
         airtable_participant_id = participant.pop(PARTICIPANT_ID_FIELD)
-        if not airtable_participant_id:
-            continue
-
         airtable_metadata = airtable_metadata_by_participant.get(airtable_participant_id) or {}
         data_types = grouped_data_type_individuals[participant['participant_id']]
         _parse_participant_airtable_rows(
@@ -479,6 +476,7 @@ def _parse_participant_airtable_rows(participant, airtable_metadata, data_types,
     has_analyte = False
     analyte_row = {k: participant.pop(k) for k in ANALYTE_TABLE_COLUMNS}
     participant['participant_id'] = analyte_row['participant_id']
+
     # airtable data
     for data_type in data_types:
         if data_type not in airtable_metadata:
diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py
index b7cb51a5fd..19164ca14b 100644
--- a/seqr/views/apis/report_api_tests.py
+++ b/seqr/views/apis/report_api_tests.py
@@ -832,10 +832,10 @@ def test_gregor_export(self, mock_subprocess, mock_temp_dir, mock_open, mock_dat
             'The following entries are missing required "proband_relationship" in the "participant" table: Broad_NA19678, Broad_NA20870, Broad_NA20872, Broad_NA20874, Broad_NA20875, Broad_NA20876, Broad_NA20881',
             'The following entries have invalid values for "reported_race" in the "participant" table. Allowed values: Asian, White, Black. Invalid values: Broad_NA19675_1 (Middle Eastern or North African)',
             'The following entries have invalid values for "age_at_enrollment" in the "participant" table. Allowed values have data type date. Invalid values: Broad_NA19675_1 (18)',
-            'The following entries have invalid values for "reference_assembly" (from Airtable) in the "aligned_dna_short_read" table. Allowed values have data type integer. Invalid values: NA20888 (GRCh38), VCGS_FAM203_621_D2 (GRCh38)',
-            'The following entries are missing required "mean_coverage" (from Airtable) in the "aligned_dna_short_read" table: VCGS_FAM203_621_D2',
-            'The following entries have non-unique values for "alignment_software" (from Airtable) in the "aligned_dna_short_read" table: BWA-MEM-2.3 (NA20888, VCGS_FAM203_621_D2)',
-            'The following entries have invalid values for "analysis_details" (from Airtable) in the "aligned_dna_short_read" table. Allowed values are a google bucket path starting with gs://. Invalid values: VCGS_FAM203_621_D2 (DOI:10.5281/zenodo.4469317)',
+            'The following entries have invalid values for "reference_assembly" (from Airtable) in the "aligned_dna_short_read" table. Allowed values have data type integer. Invalid values: Broad_exome_NA20888_1 (GRCh38), Broad_exome_VCGS_FAM203_621_D2_1 (GRCh38)',
+            'The following entries are missing required "mean_coverage" (from Airtable) in the "aligned_dna_short_read" table: Broad_exome_VCGS_FAM203_621_D2_1',
+            'The following entries have non-unique values for "alignment_software" (from Airtable) in the "aligned_dna_short_read" table: BWA-MEM-2.3 (Broad_exome_NA20888_1, Broad_exome_VCGS_FAM203_621_D2_1)',
+            'The following entries have invalid values for "analysis_details" (from Airtable) in the "aligned_dna_short_read" table. Allowed values are a google bucket path starting with gs://. Invalid values: Broad_exome_VCGS_FAM203_621_D2_1 (DOI:10.5281/zenodo.4469317)',
             'The following entries have invalid values for "date_data_generation" (from Airtable) in the "experiment_rna_short_read" table. Allowed values have data type float. Invalid values: NA19679 (2023-02-11)',
             'The following entries are missing required "experiment_id" (from Airtable) in the "genetic_findings" table: Broad_NA19675_1_21_3343353',
             'The following entries have non-unique values for "experiment_id" (from Airtable) in the "genetic_findings" table: Broad_exome_VCGS_FAM203_621_D2 (Broad_HG00731_19_1912632, Broad_HG00731_19_1912633, Broad_HG00731_19_1912634, Broad_HG00731_1_248367227)',
diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py
index 5e37f7b742..c859f35514 100644
--- a/seqr/views/utils/anvil_metadata_utils.py
+++ b/seqr/views/utils/anvil_metadata_utils.py
@@ -160,7 +160,7 @@ def parse_anvil_metadata(
         get_additional_sample_fields: Callable[[Sample, dict], dict] = None,
         get_additional_individual_fields: Callable[[Individual, dict], dict] = None,
         individual_samples: dict[Individual, Sample] = None, individual_data_types: dict[str, Iterable[str]] = None,
-        airtable_fields: Iterable[str] = None, mme_values: dict = None, variant_filter: dict = None,
+        airtable_fields: Iterable[str] = None, mme_values: dict = None, include_svs: bool = True,
         variant_json_fields: Iterable[str] = None, post_process_variant: Callable[[dict, list[dict]], dict] = None,
         include_no_individual_families: bool = False, omit_airtable: bool = False, include_metadata: bool = False,
         include_discovery_sample_id: bool = False, include_mondo: bool = False, include_parent_mnvs: bool = False,
@@ -184,7 +184,7 @@ def parse_anvil_metadata(
             sample_ids.add(sample.sample_id)
 
     saved_variants_by_family = _get_parsed_saved_discovery_variants_by_family(
-        list(family_data_by_id.keys()), include_metadata, variant_filter=variant_filter, variant_json_fields=variant_json_fields,
+        list(family_data_by_id.keys()), include_metadata, include_svs=include_svs, variant_json_fields=variant_json_fields,
     )
 
     condition_map = _get_condition_map(family_data_by_id.values())
@@ -327,13 +327,13 @@ def _post_process_variant_metadata(v, gene_variants, include_parent_mnvs=False):
 
 
 def _get_parsed_saved_discovery_variants_by_family(
-        families: Iterable[Family], include_metadata: bool, variant_filter: dict, variant_json_fields: list[str],
+        families: Iterable[Family], include_metadata: bool, include_svs: dict, variant_json_fields: list[str],
 ):
     tag_types = VariantTagType.objects.filter(project__isnull=True, category=DISCOVERY_CATEGORY)
 
     project_saved_variants = SavedVariant.objects.filter(
         varianttag__variant_tag_type__in=tag_types, family__id__in=families,
-        **(variant_filter or {}),
+        **({} if include_svs else {'alt__isnull': False}),
     ).order_by('created_date').distinct().annotate(
         tags=ArrayAgg('varianttag__variant_tag_type__name', distinct=True),
         partial_hpo_terms=ArrayAgg('variantfunctionaldata__metadata', distinct=True, filter=Q(variantfunctionaldata__functional_data_tag='Partial Phenotype Contribution')),
@@ -355,6 +355,10 @@ def _get_parsed_saved_discovery_variants_by_family(
             phenotype_contribution = 'Uncertain'
             partial_hpo_terms = ''
 
+        variant_fields = ['genotypes']
+        if include_svs:
+            variant_fields += ['svType', 'svName', 'end']
+
         parsed_variant = {
             'chrom': chrom,
             'pos': pos,
@@ -365,7 +369,7 @@ def _get_parsed_saved_discovery_variants_by_family(
             'phenotype_contribution': phenotype_contribution,
             'partial_contribution_explained': partial_hpo_terms.replace(', ', '|'),
             **{k: _get_transcript_field(k, config, main_transcript) for k, config in TRANSCRIPT_FIELDS.items()},
-            **{k: variant_json.get(k) for k in ['genotypes', 'svType', 'svName', 'end'] + (variant_json_fields or [])},
+            **{k: variant_json.get(k) for k in variant_fields + (variant_json_fields or [])},
             **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt']},
         }
         if include_metadata:

From 0f8f206c83793ce48f0819b9ec5c7be869d72706 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 17:06:33 -0400
Subject: [PATCH 20/47] fix analyte handling

---
 seqr/views/apis/report_api.py | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py
index 5fe75e60aa..6d18526b7e 100644
--- a/seqr/views/apis/report_api.py
+++ b/seqr/views/apis/report_api.py
@@ -412,12 +412,16 @@ def _add_row(row, family_id, row_type):
     experiment_ids_by_participant = {}
     for participant in participant_rows:
         phenotype_rows += _parse_participant_phenotype_rows(participant)
+        analyte = {k: participant.pop(k) for k in ANALYTE_TABLE_COLUMNS}
+        participant['participant_id'] = analyte['participant_id']
 
-        airtable_participant_id = participant.pop(PARTICIPANT_ID_FIELD)
-        airtable_metadata = airtable_metadata_by_participant.get(airtable_participant_id) or {}
+        if not participant[PARTICIPANT_ID_FIELD]:
+            continue
+
+        airtable_metadata = airtable_metadata_by_participant.get(participant.pop(PARTICIPANT_ID_FIELD)) or {}
         data_types = grouped_data_type_individuals[participant['participant_id']]
         _parse_participant_airtable_rows(
-            participant, airtable_metadata, data_types, experiment_ids_by_participant,
+            analyte, airtable_metadata, data_types, experiment_ids_by_participant,
             analyte_rows, airtable_rows, experiment_lookup_rows,
         )
 
@@ -471,32 +475,29 @@ def _parse_participant_phenotype_rows(participant):
     ]
 
 
-def _parse_participant_airtable_rows(participant, airtable_metadata, data_types, experiment_ids_by_participant,
+def _parse_participant_airtable_rows(analyte, airtable_metadata, data_types, experiment_ids_by_participant,
                                      analyte_rows, airtable_rows, experiment_lookup_rows):
     has_analyte = False
-    analyte_row = {k: participant.pop(k) for k in ANALYTE_TABLE_COLUMNS}
-    participant['participant_id'] = analyte_row['participant_id']
-
     # airtable data
     for data_type in data_types:
         if data_type not in airtable_metadata:
             continue
         is_rna, row = _get_airtable_row(data_type, airtable_metadata)
         has_analyte = True
-        analyte_rows.append({**analyte_row, **{k: row[k] for k in ANALYTE_TABLE_COLUMNS if k in row}})
+        analyte_rows.append({**analyte, **{k: row[k] for k in ANALYTE_TABLE_COLUMNS if k in row}})
         if not is_rna:
-            experiment_ids_by_participant[participant['participant_id']] = row['experiment_dna_short_read_id']
+            experiment_ids_by_participant[analyte['participant_id']] = row['experiment_dna_short_read_id']
         for table in (RNA_AIRTABLE_TABLES if is_rna else DNA_AIRTABLE_TABLES):
             if table == CALLED_TABLE and not row.get(CALLED_VARIANT_FILE_COLUMN):
                 continue
             airtable_rows[table].append({k: row[k] for k in AIRTABLE_TABLE_COLUMNS[table] if k in row})
 
         experiment_lookup_rows.append(
-            {'participant_id': participant['participant_id'], **_get_experiment_lookup_row(is_rna, row)}
+            {'participant_id': analyte['participant_id'], **_get_experiment_lookup_row(is_rna, row)}
         )
 
-    if analyte_row['analyte_id'] and not has_analyte:
-        analyte_rows.append(analyte_row)
+    if analyte['analyte_id'] and not has_analyte:
+        analyte_rows.append(analyte)
 
 
 def _get_gregor_airtable_data(participants, user):
@@ -797,8 +798,8 @@ def _validate_column_data(column, file_name, data, column_validator, warnings, e
 
 def _get_row_id(row):
     id_col = next(col for col in [
-        'genetic_findings_id', 'participant_id', 'experiment_sample_id', 'analyte_id',
-        'aligned_dna_short_read_id', 'aligned_rna_short_read_id', 'family_id',
+        'genetic_findings_id', 'participant_id', 'experiment_sample_id', 'analyte_id', 'family_id',
+        'aligned_dna_short_read_id', 'aligned_rna_short_read_id', 'aligned_dna_short_read_set_id', 'aligned_rna_short_read_set_id',
     ] if col in row)
     return row[id_col]
 

From c56a52030f2d1c263e50aa305f3d6af807eba692 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 31 May 2024 17:21:49 -0400
Subject: [PATCH 21/47] remove unused import

---
 seqr/views/apis/report_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py
index 6d18526b7e..acb45076fb 100644
--- a/seqr/views/apis/report_api.py
+++ b/seqr/views/apis/report_api.py
@@ -14,7 +14,7 @@
 from seqr.views.utils.airtable_utils import AirtableSession
 from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, anvil_export_airtable_fields, \
     FAMILY_ROW_TYPE, SUBJECT_ROW_TYPE, SAMPLE_ROW_TYPE, DISCOVERY_ROW_TYPE, PARTICIPANT_TABLE, PHENOTYPE_TABLE, \
-    EXPERIMENT_TABLE, EXPERIMENT_LOOKUP_TABLE, FINDINGS_TABLE, FINDING_METADATA_COLUMNS, GENE_COLUMN
+    EXPERIMENT_TABLE, EXPERIMENT_LOOKUP_TABLE, FINDINGS_TABLE, GENE_COLUMN
 from seqr.views.utils.export_utils import export_multiple_files, write_multiple_files_to_gs
 from seqr.views.utils.json_utils import create_json_response
 from seqr.views.utils.permissions_utils import analyst_required, get_project_and_check_permissions, \

From bcaba7c1eea1a647249c4a9258776d97de55c91c Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Mon, 3 Jun 2024 15:12:26 -0400
Subject: [PATCH 22/47] debug code

---
 hail_search/queries/base.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py
index efacff45bd..92b6c2367d 100644
--- a/hail_search/queries/base.py
+++ b/hail_search/queries/base.py
@@ -297,11 +297,14 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_
         project_hts = []
         sample_data = {}
         for project_guid, project_sample_data in project_samples.items():
-            project_ht = self._read_table(
-                f'projects/{project_guid}.ht',
-                use_ssd_dir=True,
-                skip_missing_field='family_entries' if skip_all_missing else None,
-            )
+            try:
+                project_ht = self._read_table(
+                    f'projects/{project_guid}.ht',
+                    use_ssd_dir=True,
+                    skip_missing_field='family_entries' if skip_all_missing else None,
+                )
+            except Exception as e:
+                project_ht = None
             if project_ht is None:
                 continue
             project_hts.append(project_ht.select_globals('sample_type', 'family_guids', 'family_samples'))
@@ -1078,7 +1081,7 @@ def gene_counts(self):
 
     def lookup_variants(self, variant_ids, include_project_data=False, **kwargs):
         self._parse_intervals(intervals=None, variant_ids=variant_ids, variant_keys=variant_ids)
-        ht = self._read_table('annotations.ht', drop_globals=['paths', 'versions'])
+        ht = self._read_table('annotations_vep_110.ht', drop_globals=['paths', 'versions'])
         ht = ht.filter(hl.is_defined(ht[XPOS]))
 
         annotation_fields = self.annotation_fields(include_genotype_overrides=False)

From 2c7d85ad6ca9eb574a5e29173b0c2effe7f635ca Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Mon, 3 Jun 2024 15:34:31 -0400
Subject: [PATCH 23/47] debug code

---
 hail_search/queries/base.py          | 6 ++++--
 hail_search/queries/ont_snv_indel.py | 1 +
 hail_search/queries/snv_indel.py     | 1 +
 hail_search/queries/snv_indel_37.py  | 2 +-
 4 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py
index 92b6c2367d..37d6ef87af 100644
--- a/hail_search/queries/base.py
+++ b/hail_search/queries/base.py
@@ -35,6 +35,8 @@ def _to_camel_case(snake_case_str):
 
 class BaseHailTableQuery(object):
 
+    ANNS_HT = 'annotations.ht'
+
     DATA_TYPE = None
     KEY_FIELD = None
     LOADED_GLOBALS = None
@@ -90,7 +92,7 @@ class BaseHailTableQuery(object):
 
     @classmethod
     def load_globals(cls):
-        ht_path = cls._get_table_path('annotations.ht')
+        ht_path = cls._get_table_path(cls.ANNS_HT)
         ht_globals = hl.eval(hl.read_table(ht_path).globals.select(*cls.GLOBALS))
         cls.LOADED_GLOBALS = {k: ht_globals[k] for k in cls.GLOBALS}
 
@@ -1081,7 +1083,7 @@ def gene_counts(self):
 
     def lookup_variants(self, variant_ids, include_project_data=False, **kwargs):
         self._parse_intervals(intervals=None, variant_ids=variant_ids, variant_keys=variant_ids)
-        ht = self._read_table('annotations_vep_110.ht', drop_globals=['paths', 'versions'])
+        ht = self._read_table(self.ANNS_HT, drop_globals=['paths', 'versions'])
         ht = ht.filter(hl.is_defined(ht[XPOS]))
 
         annotation_fields = self.annotation_fields(include_genotype_overrides=False)
diff --git a/hail_search/queries/ont_snv_indel.py b/hail_search/queries/ont_snv_indel.py
index dc99ad8e18..fac3d12d4d 100644
--- a/hail_search/queries/ont_snv_indel.py
+++ b/hail_search/queries/ont_snv_indel.py
@@ -7,6 +7,7 @@
 class OntSnvIndelHailTableQuery(SnvIndelHailTableQuery):
 
     DATA_TYPE = 'ONT_SNV_INDEL'
+    ANNS_HT = 'annotations.ht'
 
     CORE_FIELDS = BaseHailTableQuery.CORE_FIELDS
 
diff --git a/hail_search/queries/snv_indel.py b/hail_search/queries/snv_indel.py
index a95890e038..5b42570aeb 100644
--- a/hail_search/queries/snv_indel.py
+++ b/hail_search/queries/snv_indel.py
@@ -11,6 +11,7 @@
 class SnvIndelHailTableQuery(MitoHailTableQuery):
 
     DATA_TYPE = 'SNV_INDEL'
+    ANNS_HT = 'annotations_vep_110.ht'
 
     GENOTYPE_FIELDS = {f.lower(): f for f in ['DP', 'GQ', 'AB']}
     QUALITY_FILTER_FORMAT = {
diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py
index d43b92cbe6..3c0a9f2aa5 100644
--- a/hail_search/queries/snv_indel_37.py
+++ b/hail_search/queries/snv_indel_37.py
@@ -5,7 +5,7 @@
 
 
 class SnvIndelHailTableQuery37(SnvIndelHailTableQuery):
-
+    ANNS_HT = 'annotations.ht'
     GENOME_VERSION = GENOME_VERSION_GRCh37
     PREDICTION_FIELDS_CONFIG = SnvIndelHailTableQuery.PREDICTION_FIELDS_CONFIG_ALL_BUILDS
     LIFTOVER_ANNOTATION_FIELDS = {}

From cf479bc8686ac7aa41f0bc7a721e8dfe7417428c Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Mon, 3 Jun 2024 16:02:27 -0400
Subject: [PATCH 24/47] nested struct support

---
 hail_search/queries/base.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py
index 37d6ef87af..49ea85afba 100644
--- a/hail_search/queries/base.py
+++ b/hail_search/queries/base.py
@@ -184,22 +184,28 @@ def _format_enum(cls, r, field, enum, empty_array=False, format_array_values=Non
 
         return cls._enum_field(field, value, enum, **kwargs)
 
-    @staticmethod
-    def _enum_field(field_name, value, enum, ht_globals=None, annotate_value=None, format_value=None, drop_fields=None, enum_keys=None, include_version=False, **kwargs):
+    @classmethod
+    def _enum_field(cls, field_name, value, enum, ht_globals=None, annotate_value=None, format_value=None, drop_fields=None, enum_keys=None, include_version=False, **kwargs):
         annotations = {}
         drop = [] + (drop_fields or [])
         value_keys = value.keys()
         for field in (enum_keys or enum.keys()):
             field_enum = enum[field]
+            if field == 'utrannotator':
+                field = 'utrrannotator'
+            is_nested_struct = field in value_keys
             is_array = f'{field}_ids' in value_keys
-            value_field = f"{field}_id{'s' if is_array else ''}"
-            drop.append(value_field)
 
-            enum_array = hl.array(field_enum)
-            if is_array:
-                annotations[f'{field}s'] = value[value_field].map(lambda v: enum_array[v])
+            if is_nested_struct:
+                annotations[field] = cls._enum_field(field, value[field], field_enum, format_value=format_value)
             else:
-                annotations[field] = enum_array[value[value_field]]
+                value_field = f"{field}_id{'s' if is_array else ''}"
+                drop.append(value_field)
+                enum_array = hl.array(field_enum)
+                if is_array:
+                    annotations[f'{field}s'] = value[value_field].map(lambda v: enum_array[v])
+                else:
+                    annotations[field] = enum_array[value[value_field]]
 
         if include_version:
             annotations['version'] = ht_globals['versions'][field_name]

From e1b154c885d2f5be355c30b9b2530d212abada9f Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Mon, 3 Jun 2024 16:08:54 -0400
Subject: [PATCH 25/47] support new loftee format

---
 .../components/panel/variants/Annotations.jsx | 35 ++++++++++---------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx
index 3af864824a..14d8d908ed 100644
--- a/ui/shared/components/panel/variants/Annotations.jsx
+++ b/ui/shared/components/panel/variants/Annotations.jsx
@@ -435,29 +435,21 @@ const svSizeDisplay = (size) => {
   return `${(size / 1000000).toFixed(2) / 1}Mb`
 }
 
-const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcriptsById }) => {
-  const {
-    rsid, svType, numExon, pos, end, svTypeDetail, svSourceDetail, cpxIntervals, algorithms, bothsidesSupport,
-    endChrom,
-  } = variant
-  const mainTranscript = getVariantMainTranscript(variant)
-
-  const isLofNagnag = mainTranscript.isLofNagnag || mainTranscript.lofFlags === 'NAGNAG_SITE'
-  const lofFilters = mainTranscript.lofFilters || (
-    mainTranscript.lof === 'LC' && mainTranscript.lofFilter && mainTranscript.lofFilter.split(/&|,/g)
-  )
-  const lofDetails = (lofFilters || isLofNagnag) ? [
-    ...(lofFilters ? [...new Set(lofFilters)] : []).map((lofFilterKey) => {
-      const lofFilter = LOF_FILTER_MAP[lofFilterKey] || { message: lofFilterKey }
+const getLofDetails = ({ isLofNagnag, lofFilters, lofFilter, lofFlags, lof }) => {
+  const isNagnag = isLofNagnag || lofFlags === 'NAGNAG_SITE'
+  const filters = lofFilters || (lof === 'LC' && lofFilter && lofFilter.split(/&|,/g))
+  return (filters || isNagnag) ? [
+    ...(filters ? [...new Set(filters)] : []).map((lofFilterKey) => {
+      const filter = LOF_FILTER_MAP[lofFilterKey] || { message: lofFilterKey }
       return (
         <div key={lofFilterKey}>
-          <b>{`LOFTEE: ${lofFilter.title}`}</b>
+          <b>{`LOFTEE: ${filter.title}`}</b>
           <br />
-          {lofFilter.message}
+          {filter.message}
         </div>
       )
     }),
-    isLofNagnag ? (
+    isNagnag ? (
       <div key="NAGNAG_SITE">
         <b>LOFTEE: NAGNAG site</b>
         <br />
@@ -465,6 +457,15 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts
       </div>
     ) : null,
   ] : null
+}
+
+const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcriptsById }) => {
+  const {
+    rsid, svType, numExon, pos, end, svTypeDetail, svSourceDetail, cpxIntervals, algorithms, bothsidesSupport,
+    endChrom,
+  } = variant
+  const mainTranscript = getVariantMainTranscript(variant)
+  const lofDetails = getLofDetails(mainTranscript.loftee || mainTranscript)
 
   const transcriptPopupProps = mainTranscript.transcriptId && {
     content: <TranscriptLink variant={variant} transcript={mainTranscript} />,

From ed52cddc684f457e2fc482d56e0eb98fe4f4fc74 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Mon, 3 Jun 2024 16:33:02 -0400
Subject: [PATCH 26/47] first pass intron/exon

---
 ui/shared/components/panel/variants/Transcripts.jsx | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx
index fc1a0523f7..54fb2bb1f4 100644
--- a/ui/shared/components/panel/variants/Transcripts.jsx
+++ b/ui/shared/components/panel/variants/Transcripts.jsx
@@ -114,8 +114,9 @@ const Transcripts = React.memo(({ variant, genesById, transcriptsById, updateMai
                     <AnnotationLabel>Biotype</AnnotationLabel>
                     {transcript.biotype}
                     <br />
-                    <AnnotationLabel>cDNA Position</AnnotationLabel>
-                    {transcript.cdnaPosition}
+                    <AnnotationLabel>Intron/Exon</AnnotationLabel>
+                    {transcript.intron && `Intron ${transcript.intron.index} of ${transcript.intron.total}`}
+                    {transcript.exon && `${transcript.intron ? ', ' : ''}Exon ${transcript.exon.index} of ${transcript.exon.total}`}
                     <br />
                   </AnnotationSection>
                   <AnnotationSection>

From b81cbdbe42d04a5ca9e38b5ac91775f4b0af2b42 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Mon, 3 Jun 2024 16:56:05 -0400
Subject: [PATCH 27/47] show AlphaMissense

---
 ui/shared/components/panel/variants/Predictions.jsx | 10 +++++++++-
 ui/shared/utils/constants.js                        |  7 +++++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/ui/shared/components/panel/variants/Predictions.jsx b/ui/shared/components/panel/variants/Predictions.jsx
index d6a305a145..b55b9e91d3 100644
--- a/ui/shared/components/panel/variants/Predictions.jsx
+++ b/ui/shared/components/panel/variants/Predictions.jsx
@@ -5,7 +5,7 @@ import { connect } from 'react-redux'
 import { Icon, Transition, Popup } from 'semantic-ui-react'
 
 import { getGenesById } from 'redux/selectors'
-import { ORDERED_PREDICTOR_FIELDS, coloredIcon, predictorColorRanges, predictionFieldValue, getVariantMainGeneId } from 'shared/utils/constants'
+import { ORDERED_PREDICTOR_FIELDS, coloredIcon, predictorColorRanges, predictionFieldValue, getVariantMainGeneId, getVariantMainTranscript } from 'shared/utils/constants'
 import { snakecaseToTitlecase } from 'shared/utils/stringUtils'
 import { HorizontalSpacer } from '../../Spacers'
 import { ButtonLink } from '../../StyledComponents'
@@ -111,6 +111,14 @@ class Predictions extends React.PureComponent {
           gene.primateAi.percentile75.toPrecision(3), undefined],
       }
     }
+    const mainTranscript = getVariantMainTranscript(variant)
+    if (mainTranscript?.alphamissense.pathogenicity) {
+      genePredictors.alphamissense = {
+        field: 'alphamissense',
+        fieldValue: mainTranscript.alphamissense.pathogenicity,
+        thresholds: [0.34, 0.34, 0.564, 0.564],
+      }
+    }
 
     const predictorFields = getPredictorFields(variant, predictions, genePredictors)
 
diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js
index 4d107cc89a..988938fef7 100644
--- a/ui/shared/utils/constants.js
+++ b/ui/shared/utils/constants.js
@@ -1446,6 +1446,7 @@ export const ORDERED_PREDICTOR_FIELDS = [
     thresholds: [undefined, undefined, 2.18, 4, undefined],
     requiresCitation: true,
   },
+  { field: 'alphamissense', fieldTitle: 'AlphaMissense', displayOnly: true },
   { field: 'haplogroup_defining', indicatorMap: { Y: { color: 'green', value: '' } } },
   { field: 'mitotip', indicatorMap: MITOTIP_MAP, fieldTitle: 'MitoTIP' },
   { field: 'hmtvar', thresholds: [undefined, undefined, 0.35, 0.35, undefined], fieldTitle: 'HmtVar' },
@@ -1453,9 +1454,9 @@ export const ORDERED_PREDICTOR_FIELDS = [
 
 export const coloredIcon = color => React.createElement(color.startsWith('#') ? ColoredIcon : Icon, { name: 'circle', size: 'small', color })
 export const predictionFieldValue = (
-  predictions, { field, thresholds, reverseThresholds, indicatorMap, infoField, infoTitle },
+  predictions, { field, fieldValue, thresholds, reverseThresholds, indicatorMap, infoField, infoTitle },
 ) => {
-  let value = predictions[field]
+  let value = fieldValue || predictions[field]
   if (value === null || value === undefined) {
     return { value }
   }
@@ -1487,6 +1488,8 @@ export const predictorColorRanges = (thresholds, requiresCitation, reverseThresh
         range = ` >= ${thresholds[i - 1]}`
       } else if (prevUndefined) {
         range = ` < ${thresholds[i]}`
+      } else if (thresholds[i - 1] === thresholds[i]) {
+        return null
       } else {
         range = ` ${thresholds[i - 1]} - ${thresholds[i]}`
       }

From f22066919d4316032a952fa427a27398646d4112 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Mon, 3 Jun 2024 16:58:00 -0400
Subject: [PATCH 28/47] fix intron exon dispaly

---
 ui/shared/components/panel/variants/Transcripts.jsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx
index 54fb2bb1f4..56cbb420c1 100644
--- a/ui/shared/components/panel/variants/Transcripts.jsx
+++ b/ui/shared/components/panel/variants/Transcripts.jsx
@@ -115,8 +115,8 @@ const Transcripts = React.memo(({ variant, genesById, transcriptsById, updateMai
                     {transcript.biotype}
                     <br />
                     <AnnotationLabel>Intron/Exon</AnnotationLabel>
-                    {transcript.intron && `Intron ${transcript.intron.index} of ${transcript.intron.total}`}
-                    {transcript.exon && `${transcript.intron ? ', ' : ''}Exon ${transcript.exon.index} of ${transcript.exon.total}`}
+                    {transcript.intron && `Intron ${transcript.intron.index}/${transcript.intron.total}`}
+                    {transcript.exon && `${transcript.intron ? ', ' : ''}Exon ${transcript.exon.index}/${transcript.exon.total}`}
                     <br />
                   </AnnotationSection>
                   <AnnotationSection>

From f70b59c8090c3e267c6c298297b05495d32814b3 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Tue, 4 Jun 2024 11:04:39 -0400
Subject: [PATCH 29/47] remove utrannotator hardcode fix

---
 hail_search/queries/base.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py
index 49ea85afba..78523f00de 100644
--- a/hail_search/queries/base.py
+++ b/hail_search/queries/base.py
@@ -191,8 +191,6 @@ def _enum_field(cls, field_name, value, enum, ht_globals=None, annotate_value=No
         value_keys = value.keys()
         for field in (enum_keys or enum.keys()):
             field_enum = enum[field]
-            if field == 'utrannotator':
-                field = 'utrrannotator'
             is_nested_struct = field in value_keys
             is_array = f'{field}_ids' in value_keys
 

From 2c5b484be1db7ee01382a90190f8a97f1d89ddb1 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Tue, 4 Jun 2024 11:46:08 -0400
Subject: [PATCH 30/47] show utrannotator

---
 .../components/panel/variants/Annotations.jsx | 73 ++++++++++++++++++-
 1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx
index 14d8d908ed..771e96dd10 100644
--- a/ui/shared/components/panel/variants/Annotations.jsx
+++ b/ui/shared/components/panel/variants/Annotations.jsx
@@ -3,7 +3,7 @@ import PropTypes from 'prop-types'
 import { connect } from 'react-redux'
 import { NavLink } from 'react-router-dom'
 import styled from 'styled-components'
-import { Popup, Label, Icon } from 'semantic-ui-react'
+import { Popup, Label, Icon, Table } from 'semantic-ui-react'
 
 import {
   getGenesById,
@@ -36,6 +36,7 @@ import {
 import {
   GENOME_VERSION_37, GENOME_VERSION_38, getVariantMainTranscript, SVTYPE_LOOKUP, SVTYPE_DETAILS, SCREEN_LABELS,
 } from '../../../utils/constants'
+import { camelcaseToTitlecase } from '../../../utils/stringUtils'
 
 const OverlappedIntervalLabels = React.memo(({ groupedIntervals, variant, getOverlapArgs, getLabels }) => {
   const chromIntervals = groupedIntervals[variant.chrom]
@@ -459,6 +460,60 @@ const getLofDetails = ({ isLofNagnag, lofFilters, lofFilter, lofFlags, lof }) =>
   ] : null
 }
 
+// Adapted from https://github.com/ImperialCardioGenetics/UTRannotator/blob/master/README.md#the-detailed-annotation-for-each-consequence
+const UTR_ANNOTATOR_DESCRIPTIONS = {
+  AltStop: 'Whether there is an alternative stop codon downstream within 5’ UTR',
+  AltStopDistanceToCDS: 'The distance between the alternative stop codon (if exists) and CDS',
+  CapDistanceToStart: 'The distance (number of nucleotides) to the start of 5’UTR',
+  DistanceToCDS: 'The distance (number of nucleotides) to CDS',
+  DistanceToStop: 'The distance (number of nucleotides) to the nearest stop codon (scanning through both the 5’UTR and its downstream CDS)',
+  Evidence: 'Whether the disrupted uORF has any translation evidence',
+  FrameWithCDS: 'The frame of the uORF with respect to CDS, described by inFrame or outOfFrame',
+  KozakContext: 'The Kozak context sequence',
+  KozakStrength: 'The Kozak strength, described by one of the following values: Weak, Moderate or Strong',
+  StartDistanceToCDS: 'The distance between the disrupting uORF and CDS',
+  alt_type: 'The type of uORF with the alternative allele, described by one of following: uORF, inframe_oORF or OutOfFrame_oORF',
+  alt_type_length: 'The length of uORF with the alt allele',
+  newSTOPDistanceToCDS: 'The distance between the gained uSTOP to the start of the CDS',
+  ref_StartDistanceToCDS: 'The distance between the uAUG of the disrupting uORF to CDS',
+  ref_type: 'The type of uORF with the reference allele, described by one of following: uORF, inframe_oORF or OutOfFrame_oORF',
+  ref_type_length: 'The length of uORF with the reference allele',
+  type: 'The type of of 5’ UTR ORF, described by one of the following: uORF(with a stop codon in 5’UTR), inframe_oORF (inframe and overlapping with CDS),OutOfFrame_oORF (out of frame and overlapping with CDS)',
+}
+
+const UtrAnnotatorDetail = ({ fiveutrConsequence, fiveutrAnnotation, ...counts }) => (
+  <Table compact singleLine basic="very">
+    <Table.Body>
+      <Table.Row>
+        <Table.HeaderCell textAlign="right" content="5' UTR Consequence" />
+        <Table.Cell content={fiveutrConsequence} />
+      </Table.Row>
+      {Object.entries(counts).map(([field, value]) => (
+        <Table.Row key={field}>
+          <Table.HeaderCell textAlign="right" content={camelcaseToTitlecase(field)} />
+          <Table.Cell content={value} />
+        </Table.Row>
+      ))}
+      {Object.entries(fiveutrAnnotation).filter(e => e[1] !== null).map(([field, value]) => (
+        <Table.Row key={field}>
+          <Table.HeaderCell textAlign="right">
+            {camelcaseToTitlecase(field)}
+            {UTR_ANNOTATOR_DESCRIPTIONS[field] && (
+              <Popup trigger={<HelpIcon color="black" />} content={UTR_ANNOTATOR_DESCRIPTIONS[field]} flowing />
+            )}
+          </Table.HeaderCell>
+          <Table.Cell content={value} />
+        </Table.Row>
+      ))}
+    </Table.Body>
+  </Table>
+)
+
+UtrAnnotatorDetail.propTypes = {
+  fiveutrConsequence: PropTypes.string,
+  fiveutrAnnotation: PropTypes.object,
+}
+
 const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcriptsById }) => {
   const {
     rsid, svType, numExon, pos, end, svTypeDetail, svSourceDetail, cpxIntervals, algorithms, bothsidesSupport,
@@ -571,6 +626,22 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts
           <Label color="red" horizontal size="tiny">High Constraint Region</Label>
         </span>
       )}
+      {mainTranscript.utrannotator?.fiveutrConsequence && (
+        <div>
+          <b>UTRAnnotator: &nbsp;</b>
+          <Modal
+            modalName={`${variant.variantId}-utrannotator`}
+            title="UTRAnnotator"
+            trigger={
+              <ButtonLink>
+                {mainTranscript.utrannotator.fiveutrConsequence.replace('5_prime_UTR_', '').replace('_variant', '').replace(/_/g, ' ')}
+              </ButtonLink>
+            }
+          >
+            <UtrAnnotatorDetail {...mainTranscript.utrannotator} />
+          </Modal>
+        </div>
+      )}
       {variant.screenRegionType && (
         <div>
           <b>

From 8ecca190f8d00f9bab018d7272cf9361ebef5a3a Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Tue, 4 Jun 2024 12:31:04 -0400
Subject: [PATCH 31/47] shared array formatting

---
 hail_search/queries/base.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py
index 78523f00de..29a18214db 100644
--- a/hail_search/queries/base.py
+++ b/hail_search/queries/base.py
@@ -76,7 +76,6 @@ class BaseHailTableQuery(object):
         'transcripts': {
             'response_key': 'transcripts',
             'empty_array': True,
-            'format_value': lambda value: value.rename({k: _to_camel_case(k) for k in value.keys()}),
             'format_array_values': lambda values, *args: values.group_by(lambda t: t.geneId),
         },
     }
@@ -168,6 +167,10 @@ def _format_enum_response(self, k, enum):
         value = lambda r: self._format_enum(r, k, enum, ht_globals=self._globals, **enum_config)
         return enum_config.get('response_key', _to_camel_case(k)), value
 
+    @staticmethod
+    def _camelcase_value(value):
+        return value.rename({k: _to_camel_case(k) for k in value.keys()})
+
     @classmethod
     def _format_enum(cls, r, field, enum, empty_array=False, format_array_values=None, **kwargs):
         if hasattr(r, f'{field}_id'):
@@ -177,7 +180,7 @@ def _format_enum(cls, r, field, enum, empty_array=False, format_array_values=Non
         if hasattr(value, 'map'):
             if empty_array:
                 value = hl.or_else(value, hl.empty_array(value.dtype.element_type))
-            value = value.map(lambda x: cls._enum_field(field, x, enum, **kwargs))
+            value = value.map(lambda x: cls._enum_field(field, x, enum, **kwargs, format_value=cls._camelcase_value))
             if format_array_values:
                 value = format_array_values(value, r)
             return value

From 6bc8099825e20f0968e9472c74c3ea8f6398b40f Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Tue, 4 Jun 2024 14:34:45 -0400
Subject: [PATCH 32/47] shared consequence details ui

---
 .../components/panel/variants/Predictions.jsx |   2 +-
 .../components/panel/variants/Transcripts.jsx | 192 +++++++++++-------
 2 files changed, 114 insertions(+), 80 deletions(-)

diff --git a/ui/shared/components/panel/variants/Predictions.jsx b/ui/shared/components/panel/variants/Predictions.jsx
index b55b9e91d3..365f508580 100644
--- a/ui/shared/components/panel/variants/Predictions.jsx
+++ b/ui/shared/components/panel/variants/Predictions.jsx
@@ -112,7 +112,7 @@ class Predictions extends React.PureComponent {
       }
     }
     const mainTranscript = getVariantMainTranscript(variant)
-    if (mainTranscript?.alphamissense.pathogenicity) {
+    if (mainTranscript?.alphamissense?.pathogenicity) {
       genePredictors.alphamissense = {
         field: 'alphamissense',
         fieldValue: mainTranscript.alphamissense.pathogenicity,
diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx
index 56cbb420c1..10275d5212 100644
--- a/ui/shared/components/panel/variants/Transcripts.jsx
+++ b/ui/shared/components/panel/variants/Transcripts.jsx
@@ -10,6 +10,7 @@ import { VerticalSpacer } from '../../Spacers'
 import DispatchRequestButton from '../../buttons/DispatchRequestButton'
 import ShowGeneModal from '../../buttons/ShowGeneModal'
 import { ProteinSequence, TranscriptLink } from './VariantUtils'
+import { toCamelcase, camelcaseToTitlecase } from '../../../utils/stringUtils'
 
 const AnnotationSection = styled.div`
   display: inline-block;
@@ -24,6 +25,54 @@ const AnnotationLabel = styled.small`
 
 const HeaderLabel = AnnotationLabel.withComponent('span')
 
+const AnnotationDetail = ({ consequence, title, getContent }) => (
+  <span>
+    <AnnotationLabel>{title}</AnnotationLabel>
+    {getContent ? getContent(consequence) : consequence[toCamelcase(title)]}
+    <br />
+  </span>
+)
+
+AnnotationDetail.propTypes = {
+  consequence: PropTypes.object.isRequired,
+  title: PropTypes.string.isRequired,
+  getContent: PropTypes.func,
+}
+
+export const ConsequenceDetails = ({ consequences, variant, idField, idDetails, annotationSections, ...props }) => (
+  <Table basic="very">
+    <Table.Body>
+      {consequences.map(c => (
+        <Table.Row key={c[idField]}>
+          <Table.Cell width={3}>
+            <TranscriptLink variant={variant} transcript={c} />
+            {idDetails && idDetails(c, variant, props)}
+          </Table.Cell>
+          <Table.Cell width={4}>
+            {c.majorConsequence || c.consequenceTerms.join('; ')}
+          </Table.Cell>
+          <Table.Cell width={9}>
+            {annotationSections.map(([field1, field2]) => (
+              <AnnotationSection>
+                <AnnotationDetail consequence={c} {...field1} />
+                {field2 && <AnnotationDetail consequence={c} {...field2} />}
+              </AnnotationSection>
+            ))}
+          </Table.Cell>
+        </Table.Row>
+      ))}
+    </Table.Body>
+  </Table>
+)
+
+ConsequenceDetails.propTypes = {
+  consequences: PropTypes.arrayOf(PropTypes.object).isRequired,
+  idField: PropTypes.string.isRequired,
+  variant: PropTypes.object,
+  idDetails: PropTypes.func,
+  annotationSections: PropTypes.arrayOf(PropTypes.arrayOf(PropTypes.object)),
+}
+
 const TRANSCRIPT_LABELS = [
   {
     content: 'Canonical',
@@ -42,7 +91,62 @@ const TRANSCRIPT_LABELS = [
   },
 ]
 
-const Transcripts = React.memo(({ variant, genesById, transcriptsById, updateMainTranscript, project }) => (
+const transcriptIdDetails = (transcript, variant, { transcriptsById, project, updateMainTranscript }) => (
+  <div>
+    {transcriptsById[transcript.transcriptId]?.refseqId && (
+      <div>
+        <HeaderLabel>RefSeq:</HeaderLabel>
+        <a
+          href={`https://www.ncbi.nlm.nih.gov/nuccore/${transcriptsById[transcript.transcriptId].refseqId}`}
+          target="_blank"
+          rel="noreferrer"
+        >
+          {transcriptsById[transcript.transcriptId].refseqId}
+        </a>
+      </div>
+    )}
+    {TRANSCRIPT_LABELS.map(({ shouldShow, ...labelProps }) => (
+      shouldShow(transcript, transcriptsById) && (
+        <Label key={labelProps.content} size="small" horizontal {...labelProps} />
+      )
+    ))}
+    {
+      variant.variantGuid && project?.canEdit && (
+        <span>
+          <VerticalSpacer height={5} />
+          {
+            transcript.transcriptId === variant.selectedMainTranscriptId ?
+              <Label content="User Chosen Transcript" color="purple" size="small" /> : (
+                <DispatchRequestButton
+                  onSubmit={updateMainTranscript(transcript.transcriptId)}
+                  confirmDialog="Are you sure want to update the main transcript for this variant?"
+                >
+                  <Label as="a" content="Use as Main Transcript" color="violet" basic size="small" />
+                </DispatchRequestButton>
+              )
+          }
+        </span>
+      )
+    }
+  </div>
+)
+
+const ANNOTATION_SECTIONS = [
+  [{ title: 'Codons' }, { title: 'Amino Acids' }],
+  [
+    { title: 'Biotype' },
+    {
+      title: 'Intron/Exon',
+      getContent: c => ['intron', 'exon'].filter(f => c[f]).map(f => `${camelcaseToTitlecase(f)} ${c[f].index}/${c[f].total}`).join(', '),
+    },
+  ],
+  [
+    { title: 'HGVS.C', getContent: transcript => transcript.hgvsc && <ProteinSequence hgvs={transcript.hgvsc} /> },
+    { title: 'HGVS.P', getContent: transcript => transcript.hgvsp && <ProteinSequence hgvs={transcript.hgvsp} /> },
+  ],
+]
+
+const Transcripts = React.memo(({ variant, genesById, ...props }) => (
   variant.transcripts && Object.entries(variant.transcripts).sort((transcriptsA, transcriptsB) => (
     Math.min(...transcriptsA[1].map(t => t.transcriptRank)) - Math.min(...transcriptsB[1].map(t => t.transcriptRank))
   )).map(([geneId, geneTranscripts]) => (
@@ -54,84 +158,14 @@ const Transcripts = React.memo(({ variant, genesById, transcriptsById, updateMai
         subheader={`Gene Id: ${geneId}`}
       />
       <Segment attached="bottom">
-        <Table basic="very">
-          <Table.Body>
-            {geneTranscripts.map(transcript => (
-              <Table.Row key={transcript.transcriptId}>
-                <Table.Cell width={3}>
-                  <TranscriptLink variant={variant} transcript={transcript} />
-                  {transcriptsById[transcript.transcriptId]?.refseqId && (
-                    <div>
-                      <HeaderLabel>RefSeq:</HeaderLabel>
-                      <a
-                        href={`https://www.ncbi.nlm.nih.gov/nuccore/${transcriptsById[transcript.transcriptId].refseqId}`}
-                        target="_blank"
-                        rel="noreferrer"
-                      >
-                        {transcriptsById[transcript.transcriptId].refseqId}
-                      </a>
-                    </div>
-                  )}
-                  <div>
-                    {TRANSCRIPT_LABELS.map(({ shouldShow, ...labelProps }) => (
-                      shouldShow(transcript, transcriptsById) && (
-                        <Label key={labelProps.content} size="small" horizontal {...labelProps} />
-                      )
-                    ))}
-                    {
-                      variant.variantGuid && project?.canEdit && (
-                        <span>
-                          <VerticalSpacer height={5} />
-                          {
-                            transcript.transcriptId === variant.selectedMainTranscriptId ?
-                              <Label content="User Chosen Transcript" color="purple" size="small" /> : (
-                                <DispatchRequestButton
-                                  onSubmit={updateMainTranscript(transcript.transcriptId)}
-                                  confirmDialog="Are you sure want to update the main transcript for this variant?"
-                                >
-                                  <Label as="a" content="Use as Main Transcript" color="violet" basic size="small" />
-                                </DispatchRequestButton>
-                              )
-                          }
-                        </span>
-                      )
-                    }
-                  </div>
-                </Table.Cell>
-                <Table.Cell width={4}>
-                  {transcript.majorConsequence}
-                </Table.Cell>
-                <Table.Cell width={9}>
-                  <AnnotationSection>
-                    <AnnotationLabel>Codons</AnnotationLabel>
-                    {transcript.codons}
-                    <br />
-                    <AnnotationLabel>Amino Acids</AnnotationLabel>
-                    {transcript.aminoAcids}
-                    <br />
-                  </AnnotationSection>
-                  <AnnotationSection>
-                    <AnnotationLabel>Biotype</AnnotationLabel>
-                    {transcript.biotype}
-                    <br />
-                    <AnnotationLabel>Intron/Exon</AnnotationLabel>
-                    {transcript.intron && `Intron ${transcript.intron.index}/${transcript.intron.total}`}
-                    {transcript.exon && `${transcript.intron ? ', ' : ''}Exon ${transcript.exon.index}/${transcript.exon.total}`}
-                    <br />
-                  </AnnotationSection>
-                  <AnnotationSection>
-                    <AnnotationLabel>HGVS.C</AnnotationLabel>
-                    {transcript.hgvsc && <ProteinSequence hgvs={transcript.hgvsc} />}
-                    <br />
-                    <AnnotationLabel>HGVS.P</AnnotationLabel>
-                    {transcript.hgvsp && <ProteinSequence hgvs={transcript.hgvsp} />}
-                    <br />
-                  </AnnotationSection>
-                </Table.Cell>
-              </Table.Row>
-            ))}
-          </Table.Body>
-        </Table>
+        <ConsequenceDetails
+          consequences={geneTranscripts}
+          variant={variant}
+          idField="transcriptId"
+          idDetails={transcriptIdDetails}
+          annotationSections={ANNOTATION_SECTIONS}
+          {...props}
+        />
       </Segment>
       <VerticalSpacer height={10} />
     </div>

From 2c09ccb38bea46509fcc2a6b1056be7b5f557aed Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Tue, 4 Jun 2024 14:57:12 -0400
Subject: [PATCH 33/47] show regluatory features

---
 .../components/panel/variants/Annotations.jsx | 25 ++++++++++++++++++-
 .../components/panel/variants/Transcripts.jsx |  9 ++++---
 .../panel/variants/VariantUtils.jsx           |  6 ++---
 3 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx
index 771e96dd10..126af1af33 100644
--- a/ui/shared/components/panel/variants/Annotations.jsx
+++ b/ui/shared/components/panel/variants/Annotations.jsx
@@ -22,7 +22,7 @@ import Modal from '../../modal/Modal'
 import { ButtonLink, HelpIcon } from '../../StyledComponents'
 import RnaSeqJunctionOutliersTable from '../../table/RnaSeqJunctionOutliersTable'
 import { getOtherGeneNames } from '../genes/GeneDetail'
-import Transcripts from './Transcripts'
+import Transcripts, { ConsequenceDetails } from './Transcripts'
 import VariantGenes, { GeneLabelContent, omimPhenotypesDetail } from './VariantGene'
 import {
   getLocus,
@@ -185,6 +185,9 @@ VariantPosition.propTypes = {
   svType: PropTypes.string,
 }
 
+const REGULATORY_FEATURE_SECTIONS = [[{ title: 'Biotype' }]]
+const REGULATORY_FEATURE_LINK = { ensemblEntity: 'Regulation', ensemblKey: 'rf' }
+
 const LOF_FILTER_MAP = {
   END_TRUNC: { title: 'End Truncation', message: 'This variant falls in the last 5% of the transcript' },
   INCOMPLETE_CDS: { title: 'Incomplete CDS', message: 'The start or stop codons are not known for this transcript' },
@@ -626,6 +629,26 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts
           <Label color="red" horizontal size="tiny">High Constraint Region</Label>
         </span>
       )}
+      {variant.sortedRegulatoryFeatureConsequences && (
+        <div>
+          <b>Regulatory Feature: &nbsp;</b>
+          <Modal
+            modalName={`${variant.variantId}-regulatory`}
+            title="Regulatory Feature Consequences"
+            trigger={
+              <ButtonLink>{variant.sortedRegulatoryFeatureConsequences[0].consequenceTerms[0].replace(/_/g, ' ')}</ButtonLink>
+            }
+          >
+            <ConsequenceDetails
+              idField="regulatoryFeatureId"
+              consequences={variant.sortedRegulatoryFeatureConsequences}
+              variant={variant}
+              annotationSections={REGULATORY_FEATURE_SECTIONS}
+              ensemblLink={REGULATORY_FEATURE_LINK}
+            />
+          </Modal>
+        </div>
+      )}
       {mainTranscript.utrannotator?.fiveutrConsequence && (
         <div>
           <b>UTRAnnotator: &nbsp;</b>
diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx
index 10275d5212..8b8a32c1af 100644
--- a/ui/shared/components/panel/variants/Transcripts.jsx
+++ b/ui/shared/components/panel/variants/Transcripts.jsx
@@ -39,13 +39,15 @@ AnnotationDetail.propTypes = {
   getContent: PropTypes.func,
 }
 
-export const ConsequenceDetails = ({ consequences, variant, idField, idDetails, annotationSections, ...props }) => (
+export const ConsequenceDetails = (
+  { consequences, variant, idField, idDetails, annotationSections, ensemblLink = {}, ...props },
+) => (
   <Table basic="very">
     <Table.Body>
       {consequences.map(c => (
         <Table.Row key={c[idField]}>
           <Table.Cell width={3}>
-            <TranscriptLink variant={variant} transcript={c} />
+            <TranscriptLink variant={variant} transcript={c} idField={idField} {...ensemblLink} />
             {idDetails && idDetails(c, variant, props)}
           </Table.Cell>
           <Table.Cell width={4}>
@@ -53,7 +55,7 @@ export const ConsequenceDetails = ({ consequences, variant, idField, idDetails,
           </Table.Cell>
           <Table.Cell width={9}>
             {annotationSections.map(([field1, field2]) => (
-              <AnnotationSection>
+              <AnnotationSection key={field1.title}>
                 <AnnotationDetail consequence={c} {...field1} />
                 {field2 && <AnnotationDetail consequence={c} {...field2} />}
               </AnnotationSection>
@@ -71,6 +73,7 @@ ConsequenceDetails.propTypes = {
   variant: PropTypes.object,
   idDetails: PropTypes.func,
   annotationSections: PropTypes.arrayOf(PropTypes.arrayOf(PropTypes.object)),
+  ensemblLink: PropTypes.object,
 }
 
 const TRANSCRIPT_LABELS = [
diff --git a/ui/shared/components/panel/variants/VariantUtils.jsx b/ui/shared/components/panel/variants/VariantUtils.jsx
index 663ff8b58c..d50b0984c5 100644
--- a/ui/shared/components/panel/variants/VariantUtils.jsx
+++ b/ui/shared/components/panel/variants/VariantUtils.jsx
@@ -10,10 +10,10 @@ const SequenceContainer = styled.span`
   color: ${props => props.color || 'inherit'};
 `
 
-export const TranscriptLink = styled.a.attrs(({ variant, transcript }) => ({
+export const TranscriptLink = styled.a.attrs(({ variant, transcript, idField = 'transcriptId', ensemblEntity = 'Transcript', ensemblKey = 't' }) => ({
   target: '_blank',
-  href: `http://${variant.genomeVersion === GENOME_VERSION_37 ? 'grch37' : 'useast'}.ensembl.org/Homo_sapiens/Transcript/Summary?t=${transcript.transcriptId}`,
-  children: transcript.hgvsc?.startsWith(transcript.transcriptId) ? transcript.hgvsc.split(':')[0] : transcript.transcriptId,
+  href: `http://${variant.genomeVersion === GENOME_VERSION_37 ? 'grch37' : 'useast'}.ensembl.org/Homo_sapiens/${ensemblEntity}/Summary?${ensemblKey}=${transcript[idField]}`,
+  children: transcript.hgvsc?.startsWith(transcript.transcriptId) ? transcript.hgvsc.split(':')[0] : transcript[idField],
 }))`
   font-size: 1.3em;
   font-weight: normal;

From ae6550c57d5edd7a0a1aec22e5bc2c1c9cbff4d6 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Tue, 4 Jun 2024 17:41:22 -0400
Subject: [PATCH 34/47] show motif features

---
 .../components/panel/variants/Annotations.jsx | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx
index 126af1af33..4f5b65db72 100644
--- a/ui/shared/components/panel/variants/Annotations.jsx
+++ b/ui/shared/components/panel/variants/Annotations.jsx
@@ -185,8 +185,11 @@ VariantPosition.propTypes = {
   svType: PropTypes.string,
 }
 
-const REGULATORY_FEATURE_SECTIONS = [[{ title: 'Biotype' }]]
 const REGULATORY_FEATURE_LINK = { ensemblEntity: 'Regulation', ensemblKey: 'rf' }
+const CONSEQUENCE_FEATURES = [
+  { name: 'Regulatory', annotationSections: [[{ title: 'Biotype' }]] },
+  { name: 'Motif', annotationSections: [] },
+].map(f => ({ ...f, field: `sorted${f.name}FeatureConsequences`, idField: `${f.name.toLowerCase()}FeatureId` }))
 
 const LOF_FILTER_MAP = {
   END_TRUNC: { title: 'End Truncation', message: 'This variant falls in the last 5% of the transcript' },
@@ -629,26 +632,23 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts
           <Label color="red" horizontal size="tiny">High Constraint Region</Label>
         </span>
       )}
-      {variant.sortedRegulatoryFeatureConsequences && (
+      {CONSEQUENCE_FEATURES.filter(({ field }) => variant[field]).map(({ field, name, ...props }) => (
         <div>
-          <b>Regulatory Feature: &nbsp;</b>
+          <b>{`${name} Feature: `}</b>
           <Modal
-            modalName={`${variant.variantId}-regulatory`}
-            title="Regulatory Feature Consequences"
-            trigger={
-              <ButtonLink>{variant.sortedRegulatoryFeatureConsequences[0].consequenceTerms[0].replace(/_/g, ' ')}</ButtonLink>
-            }
+            modalName={`${variant.variantId}-${name}`}
+            title={`${name} Feature Consequences`}
+            trigger={<ButtonLink>{variant[field][0].consequenceTerms[0].replace(/_/g, ' ')}</ButtonLink>}
           >
             <ConsequenceDetails
-              idField="regulatoryFeatureId"
-              consequences={variant.sortedRegulatoryFeatureConsequences}
+              consequences={variant[field]}
               variant={variant}
-              annotationSections={REGULATORY_FEATURE_SECTIONS}
               ensemblLink={REGULATORY_FEATURE_LINK}
+              {...props}
             />
           </Modal>
         </div>
-      )}
+      ))}
       {mainTranscript.utrannotator?.fiveutrConsequence && (
         <div>
           <b>UTRAnnotator: &nbsp;</b>

From e319f9093d42b6f03358580567178e27bb637621 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Wed, 5 Jun 2024 10:55:40 -0400
Subject: [PATCH 35/47] Revert "debug code"

This reverts commit bcaba7c1eea1a647249c4a9258776d97de55c91c.
---
 hail_search/queries/base.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py
index 29a18214db..9d9738c415 100644
--- a/hail_search/queries/base.py
+++ b/hail_search/queries/base.py
@@ -35,8 +35,6 @@ def _to_camel_case(snake_case_str):
 
 class BaseHailTableQuery(object):
 
-    ANNS_HT = 'annotations.ht'
-
     DATA_TYPE = None
     KEY_FIELD = None
     LOADED_GLOBALS = None
@@ -306,14 +304,11 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_
         project_hts = []
         sample_data = {}
         for project_guid, project_sample_data in project_samples.items():
-            try:
-                project_ht = self._read_table(
-                    f'projects/{project_guid}.ht',
-                    use_ssd_dir=True,
-                    skip_missing_field='family_entries' if skip_all_missing else None,
-                )
-            except Exception as e:
-                project_ht = None
+            project_ht = self._read_table(
+                f'projects/{project_guid}.ht',
+                use_ssd_dir=True,
+                skip_missing_field='family_entries' if skip_all_missing else None,
+            )
             if project_ht is None:
                 continue
             project_hts.append(project_ht.select_globals('sample_type', 'family_guids', 'family_samples'))
@@ -1090,7 +1085,7 @@ def gene_counts(self):
 
     def lookup_variants(self, variant_ids, include_project_data=False, **kwargs):
         self._parse_intervals(intervals=None, variant_ids=variant_ids, variant_keys=variant_ids)
-        ht = self._read_table(self.ANNS_HT, drop_globals=['paths', 'versions'])
+        ht = self._read_table('annotations.ht', drop_globals=['paths', 'versions'])
         ht = ht.filter(hl.is_defined(ht[XPOS]))
 
         annotation_fields = self.annotation_fields(include_genotype_overrides=False)

From 1d2fa049eb25f6462dc4d4af3120d9430541d6ca Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Wed, 5 Jun 2024 10:57:44 -0400
Subject: [PATCH 36/47] Revert "debug code"

This reverts commit 2c7d85ad6ca9eb574a5e29173b0c2effe7f635ca.
---
 hail_search/queries/base.py          | 2 +-
 hail_search/queries/ont_snv_indel.py | 1 -
 hail_search/queries/snv_indel.py     | 1 -
 hail_search/queries/snv_indel_37.py  | 2 +-
 4 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py
index 9d9738c415..341fc8a6a8 100644
--- a/hail_search/queries/base.py
+++ b/hail_search/queries/base.py
@@ -89,7 +89,7 @@ class BaseHailTableQuery(object):
 
     @classmethod
     def load_globals(cls):
-        ht_path = cls._get_table_path(cls.ANNS_HT)
+        ht_path = cls._get_table_path('annotations.ht')
         ht_globals = hl.eval(hl.read_table(ht_path).globals.select(*cls.GLOBALS))
         cls.LOADED_GLOBALS = {k: ht_globals[k] for k in cls.GLOBALS}
 
diff --git a/hail_search/queries/ont_snv_indel.py b/hail_search/queries/ont_snv_indel.py
index fac3d12d4d..dc99ad8e18 100644
--- a/hail_search/queries/ont_snv_indel.py
+++ b/hail_search/queries/ont_snv_indel.py
@@ -7,7 +7,6 @@
 class OntSnvIndelHailTableQuery(SnvIndelHailTableQuery):
 
     DATA_TYPE = 'ONT_SNV_INDEL'
-    ANNS_HT = 'annotations.ht'
 
     CORE_FIELDS = BaseHailTableQuery.CORE_FIELDS
 
diff --git a/hail_search/queries/snv_indel.py b/hail_search/queries/snv_indel.py
index 5b42570aeb..a95890e038 100644
--- a/hail_search/queries/snv_indel.py
+++ b/hail_search/queries/snv_indel.py
@@ -11,7 +11,6 @@
 class SnvIndelHailTableQuery(MitoHailTableQuery):
 
     DATA_TYPE = 'SNV_INDEL'
-    ANNS_HT = 'annotations_vep_110.ht'
 
     GENOTYPE_FIELDS = {f.lower(): f for f in ['DP', 'GQ', 'AB']}
     QUALITY_FILTER_FORMAT = {
diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py
index 3c0a9f2aa5..d43b92cbe6 100644
--- a/hail_search/queries/snv_indel_37.py
+++ b/hail_search/queries/snv_indel_37.py
@@ -5,7 +5,7 @@
 
 
 class SnvIndelHailTableQuery37(SnvIndelHailTableQuery):
-    ANNS_HT = 'annotations.ht'
+
     GENOME_VERSION = GENOME_VERSION_GRCh37
     PREDICTION_FIELDS_CONFIG = SnvIndelHailTableQuery.PREDICTION_FIELDS_CONFIG_ALL_BUILDS
     LIFTOVER_ANNOTATION_FIELDS = {}

From f6d26b2587157013790200e3e4db88c4a36a9a76 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Wed, 5 Jun 2024 15:29:44 -0400
Subject: [PATCH 37/47] adjust annotation order

---
 .../components/panel/variants/Annotations.jsx | 34 +++++++++----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx
index 4f5b65db72..ce8ff11866 100644
--- a/ui/shared/components/panel/variants/Annotations.jsx
+++ b/ui/shared/components/panel/variants/Annotations.jsx
@@ -632,23 +632,6 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts
           <Label color="red" horizontal size="tiny">High Constraint Region</Label>
         </span>
       )}
-      {CONSEQUENCE_FEATURES.filter(({ field }) => variant[field]).map(({ field, name, ...props }) => (
-        <div>
-          <b>{`${name} Feature: `}</b>
-          <Modal
-            modalName={`${variant.variantId}-${name}`}
-            title={`${name} Feature Consequences`}
-            trigger={<ButtonLink>{variant[field][0].consequenceTerms[0].replace(/_/g, ' ')}</ButtonLink>}
-          >
-            <ConsequenceDetails
-              consequences={variant[field]}
-              variant={variant}
-              ensemblLink={REGULATORY_FEATURE_LINK}
-              {...props}
-            />
-          </Modal>
-        </div>
-      ))}
       {mainTranscript.utrannotator?.fiveutrConsequence && (
         <div>
           <b>UTRAnnotator: &nbsp;</b>
@@ -673,6 +656,23 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts
           </b>
         </div>
       )}
+      {CONSEQUENCE_FEATURES.filter(({ field }) => variant[field]).map(({ field, name, ...props }) => (
+        <div>
+          <b>{`${name} Feature: `}</b>
+          <Modal
+            modalName={`${variant.variantId}-${name}`}
+            title={`${name} Feature Consequences`}
+            trigger={<ButtonLink>{variant[field][0].consequenceTerms[0].replace(/_/g, ' ')}</ButtonLink>}
+          >
+            <ConsequenceDetails
+              consequences={variant[field]}
+              variant={variant}
+              ensemblLink={REGULATORY_FEATURE_LINK}
+              {...props}
+            />
+          </Modal>
+        </div>
+      ))}
       {mainTranscript.hgvsc && (
         <div>
           <b>HGVS.C</b>

From 8d20abcc16f396479b681b869bc8a7e651eabc3a Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Wed, 5 Jun 2024 15:35:55 -0400
Subject: [PATCH 38/47] show utrannotator in transcript detail

---
 ui/shared/components/panel/variants/Transcripts.jsx | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx
index 8b8a32c1af..efdb7be9f6 100644
--- a/ui/shared/components/panel/variants/Transcripts.jsx
+++ b/ui/shared/components/panel/variants/Transcripts.jsx
@@ -40,7 +40,7 @@ AnnotationDetail.propTypes = {
 }
 
 export const ConsequenceDetails = (
-  { consequences, variant, idField, idDetails, annotationSections, ensemblLink = {}, ...props },
+  { consequences, variant, idField, idDetails, consequenceDetails, annotationSections, ensemblLink = {}, ...props },
 ) => (
   <Table basic="very">
     <Table.Body>
@@ -52,6 +52,7 @@ export const ConsequenceDetails = (
           </Table.Cell>
           <Table.Cell width={4}>
             {c.majorConsequence || c.consequenceTerms.join('; ')}
+            {consequenceDetails && consequenceDetails(c)}
           </Table.Cell>
           <Table.Cell width={9}>
             {annotationSections.map(([field1, field2]) => (
@@ -72,6 +73,7 @@ ConsequenceDetails.propTypes = {
   idField: PropTypes.string.isRequired,
   variant: PropTypes.object,
   idDetails: PropTypes.func,
+  consequenceDetails: PropTypes.func,
   annotationSections: PropTypes.arrayOf(PropTypes.arrayOf(PropTypes.object)),
   ensemblLink: PropTypes.object,
 }
@@ -134,6 +136,13 @@ const transcriptIdDetails = (transcript, variant, { transcriptsById, project, up
   </div>
 )
 
+const transcriptConsequenceDetails = ({ utrannotator }) => utrannotator?.fiveutrConsequence && (
+  <div>
+    <HeaderLabel>UTRAnnotator:</HeaderLabel>
+    {utrannotator.fiveutrConsequence}
+  </div>
+)
+
 const ANNOTATION_SECTIONS = [
   [{ title: 'Codons' }, { title: 'Amino Acids' }],
   [
@@ -166,6 +175,7 @@ const Transcripts = React.memo(({ variant, genesById, ...props }) => (
           variant={variant}
           idField="transcriptId"
           idDetails={transcriptIdDetails}
+          consequenceDetails={transcriptConsequenceDetails}
           annotationSections={ANNOTATION_SECTIONS}
           {...props}
         />

From fc3ca3dbf7c4612a601713df7a014a5adcbeda89 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Wed, 5 Jun 2024 15:48:50 -0400
Subject: [PATCH 39/47] do not raise unhandled error on airtable mismatch

---
 seqr/views/apis/summary_data_api_tests.py | 8 ++++----
 seqr/views/utils/airtable_utils.py        | 3 ++-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py
index 96a6f5d580..441bce90db 100644
--- a/seqr/views/apis/summary_data_api_tests.py
+++ b/seqr/views/apis/summary_data_api_tests.py
@@ -679,10 +679,10 @@ def test_sample_metadata_export(self, mock_google_authenticated):
         responses.add(responses.GET, '{}/app3Y97xtbbaOopVR/Collaborator'.format(AIRTABLE_URL),
                       json=AIRTABLE_COLLABORATOR_RECORDS, status=200)
         response = self.client.get(include_airtable_url)
-        self.assertEqual(response.status_code, 500)
-        self.assertEqual(
-            response.json()['error'],
-            'Found multiple airtable records for sample NA19675 with mismatched values in field dbgap_study_id')
+        self.assertEqual(response.status_code, 400)
+        self.assertListEqual(
+            response.json()['errors'],
+            ['Found multiple airtable records for sample NA19675 with mismatched values in field dbgap_study_id'])
         self.assertEqual(len(responses.calls), 4)
         first_formula = "OR({CollaboratorSampleID}='NA20885',{CollaboratorSampleID}='NA20888')"
         expected_fields = [
diff --git a/seqr/views/utils/airtable_utils.py b/seqr/views/utils/airtable_utils.py
index f6a80f09ff..027e5785ce 100644
--- a/seqr/views/utils/airtable_utils.py
+++ b/seqr/views/utils/airtable_utils.py
@@ -2,6 +2,7 @@
 from collections import defaultdict
 from django.core.exceptions import PermissionDenied
 
+from seqr.utils.middleware import ErrorsWarningsException
 from seqr.utils.logging_utils import SeqrLogger
 from seqr.views.utils.terra_api_utils import is_google_authenticated
 
@@ -138,7 +139,7 @@ def get_airtable_samples(sample_ids, user, fields, list_fields=None):
             if len(record_field) > 1:
                 error = 'Found multiple airtable records for sample {} with mismatched values in field {}'.format(
                     record_id, field)
-                raise Exception(error)
+                raise ErrorsWarningsException([error])
             if record_field:
                 parsed_record[field] = record_field.pop()
         for field in list_fields:

From 148e8cf80d0599c0552430b397d74cf5d4c2128c Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Thu, 6 Jun 2024 10:04:11 -0400
Subject: [PATCH 40/47] fix AoU link

---
 ui/shared/components/panel/variants/Annotations.jsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx
index 3af864824a..446f60b7eb 100644
--- a/ui/shared/components/panel/variants/Annotations.jsx
+++ b/ui/shared/components/panel/variants/Annotations.jsx
@@ -267,7 +267,7 @@ const VARIANT_LINKS = [
   {
     name: 'AoU',
     shouldShow: ({ svType }) => !svType,
-    getHref: ({ chrom, pos, ref, alt }) => `https://databrowser.researchallofus.org/genomic-variants/${chrom}-${pos}-${ref}-${alt}`,
+    getHref: ({ chrom, pos, ref, alt }) => `https://databrowser.researchallofus.org/variants/${chrom}-${pos}-${ref}-${alt}`,
   },
   {
     name: 'Iranome',

From bc1f3154dc3a89746a204b2609e7639541cc22e1 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Thu, 6 Jun 2024 14:07:56 -0400
Subject: [PATCH 41/47] show extended intronic splice reguion

---
 ui/shared/components/panel/variants/Annotations.jsx |  3 ++-
 ui/shared/components/panel/variants/Transcripts.jsx | 13 +++++++++++--
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx
index 19daaed793..2cb9f9f759 100644
--- a/ui/shared/components/panel/variants/Annotations.jsx
+++ b/ui/shared/components/panel/variants/Annotations.jsx
@@ -22,7 +22,7 @@ import Modal from '../../modal/Modal'
 import { ButtonLink, HelpIcon } from '../../StyledComponents'
 import RnaSeqJunctionOutliersTable from '../../table/RnaSeqJunctionOutliersTable'
 import { getOtherGeneNames } from '../genes/GeneDetail'
-import Transcripts, { ConsequenceDetails } from './Transcripts'
+import Transcripts, { ConsequenceDetails, ExtendedSpliceLabel } from './Transcripts'
 import VariantGenes, { GeneLabelContent, omimPhenotypesDetail } from './VariantGene'
 import {
   getLocus,
@@ -632,6 +632,7 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts
           <Label color="red" horizontal size="tiny">High Constraint Region</Label>
         </span>
       )}
+      <ExtendedSpliceLabel {...mainTranscript} />
       {mainTranscript.utrannotator?.fiveutrConsequence && (
         <div>
           <b>UTRAnnotator: &nbsp;</b>
diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx
index efdb7be9f6..0449243985 100644
--- a/ui/shared/components/panel/variants/Transcripts.jsx
+++ b/ui/shared/components/panel/variants/Transcripts.jsx
@@ -136,10 +136,19 @@ const transcriptIdDetails = (transcript, variant, { transcriptsById, project, up
   </div>
 )
 
-const transcriptConsequenceDetails = ({ utrannotator }) => utrannotator?.fiveutrConsequence && (
+export const ExtendedSpliceLabel = ({ spliceregion }) => spliceregion?.extended_intronic_splice_region_variant && (
+  <Label size="small" horizontal color="yellow" content="Extended Intronic Splice Region" />
+)
+
+ExtendedSpliceLabel.propTypes = {
+  spliceregion: PropTypes.object,
+}
+
+const transcriptConsequenceDetails = ({ utrannotator, ...transcript }) => (
   <div>
-    <HeaderLabel>UTRAnnotator:</HeaderLabel>
+    {utrannotator?.fiveutrConsequence && <HeaderLabel>UTRAnnotator:</HeaderLabel>}
     {utrannotator.fiveutrConsequence}
+    <ExtendedSpliceLabel {...transcript} />
   </div>
 )
 

From fa79427a4b5025c149d27ccd2d13b5463700448b Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Thu, 6 Jun 2024 14:13:22 -0400
Subject: [PATCH 42/47] annotate transcriptRank in search backennd

---
 hail_search/queries/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py
index efacff45bd..0baec779c5 100644
--- a/hail_search/queries/base.py
+++ b/hail_search/queries/base.py
@@ -75,7 +75,7 @@ class BaseHailTableQuery(object):
             'response_key': 'transcripts',
             'empty_array': True,
             'format_value': lambda value: value.rename({k: _to_camel_case(k) for k in value.keys()}),
-            'format_array_values': lambda values, *args: values.group_by(lambda t: t.geneId),
+            'format_array_values': lambda values, *args: hl.enumerate(values).starmap(lambda i, v: v.annotate(transcriptRank=i)).group_by(lambda t: t.geneId),
         },
     }
     LIFTOVER_ANNOTATION_FIELDS = {

From 012d559968f3b4b4d6b92430baa186ab9f876722 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Thu, 6 Jun 2024 14:37:12 -0400
Subject: [PATCH 43/47] annotate transcriptRank correctly

---
 hail_search/queries/base.py |  2 +-
 hail_search/queries/mito.py |  3 +++
 hail_search/test_search.py  |  2 +-
 hail_search/test_utils.py   | 10 +++++-----
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py
index 0baec779c5..efacff45bd 100644
--- a/hail_search/queries/base.py
+++ b/hail_search/queries/base.py
@@ -75,7 +75,7 @@ class BaseHailTableQuery(object):
             'response_key': 'transcripts',
             'empty_array': True,
             'format_value': lambda value: value.rename({k: _to_camel_case(k) for k in value.keys()}),
-            'format_array_values': lambda values, *args: hl.enumerate(values).starmap(lambda i, v: v.annotate(transcriptRank=i)).group_by(lambda t: t.geneId),
+            'format_array_values': lambda values, *args: values.group_by(lambda t: t.geneId),
         },
     }
     LIFTOVER_ANNOTATION_FIELDS = {
diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py
index 8b17734dd3..f59811ee54 100644
--- a/hail_search/queries/mito.py
+++ b/hail_search/queries/mito.py
@@ -93,6 +93,9 @@ class MitoHailTableQuery(BaseHailTableQuery):
             **BaseHailTableQuery.ENUM_ANNOTATION_FIELDS['transcripts'],
             'annotate_value': lambda transcript, *args: {'major_consequence': transcript.consequence_terms.first()},
             'drop_fields': ['consequence_terms'],
+            'format_array_values': lambda values, *args: BaseHailTableQuery.ENUM_ANNOTATION_FIELDS['transcripts']['format_array_values'](values).map_values(
+                lambda transcripts: hl.enumerate(transcripts).starmap(lambda i, t: t.annotate(transcriptRank=i))
+            ),
         }
     }
 
diff --git a/hail_search/test_search.py b/hail_search/test_search.py
index cc1586df32..38c099bd1a 100644
--- a/hail_search/test_search.py
+++ b/hail_search/test_search.py
@@ -112,7 +112,7 @@
         'ENSG00000176227': [
             {'aminoAcids': None, 'canonical': 1, 'codons': None, 'geneId': 'ENSG00000176227',
              'hgvsc': 'ENST00000447022.1:n.1354A>G', 'hgvsp': None,
-             'transcriptId': 'ENST00000447022', 'isLofNagnag': None, 'transcriptRank': 1,
+             'transcriptId': 'ENST00000447022', 'isLofNagnag': None, 'transcriptRank': 0,
              'biotype': 'processed_pseudogene', 'lofFilters': None, 'majorConsequence': 'non_coding_transcript_exon_variant'},
         ],
     },
diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py
index 7da21ce4af..920825e7b2 100644
--- a/hail_search/test_utils.py
+++ b/hail_search/test_utils.py
@@ -229,10 +229,10 @@
            {'aminoAcids': 'E/A', 'canonical': None, 'codons': 'gAa/gCa', 'geneId': 'ENSG00000177000', 'hgvsc': 'ENST00000376592.6:c.1286A>C', 'hgvsp': 'ENSP00000365777.1:p.Glu429Ala', 'transcriptId': 'ENST00000376592', 'isLofNagnag': None, 'transcriptRank': 3, 'biotype': 'protein_coding', 'lofFilters': None, 'majorConsequence': 'missense_variant'},
            {'aminoAcids': 'E/A', 'canonical': None, 'codons': 'gAa/gCa', 'geneId': 'ENSG00000177000', 'hgvsc': 'ENST00000423400.7:c.1406A>C', 'hgvsp': 'ENSP00000398908.3:p.Glu469Ala', 'transcriptId': 'ENST00000423400', 'isLofNagnag': None, 'transcriptRank': 4, 'biotype': 'protein_coding', 'lofFilters': None, 'majorConsequence': 'missense_variant'},
            {'aminoAcids': 'E/A', 'canonical': None, 'codons': 'gAa/gCa', 'geneId': 'ENSG00000177000', 'hgvsc': 'ENST00000641407.1:c.1286A>C', 'hgvsp': 'ENSP00000493098.1:p.Glu429Ala', 'transcriptId': 'ENST00000641407', 'isLofNagnag': None, 'transcriptRank': 5, 'biotype': 'protein_coding', 'lofFilters': None, 'majorConsequence': 'missense_variant'},
-           {'aminoAcids': 'E/A', 'canonical': None, 'codons': 'gAa/gCa', 'geneId': 'ENSG00000177000', 'hgvsc': 'ENST00000641446.1:c.1286A>C', 'hgvsp': 'ENSP00000493262.1:p.Glu429Ala', 'transcriptId': 'ENST00000641446', 'isLofNagnag': None, 'transcriptRank': 7, 'biotype': 'nonsense_mediated_decay', 'lofFilters': None, 'majorConsequence': 'missense_variant'},
-           {'aminoAcids': None, 'canonical': None, 'codons': None, 'geneId': 'ENSG00000177000', 'hgvsc': 'ENST00000641747.1:c.*798A>C', 'hgvsp': None, 'transcriptId': 'ENST00000641747', 'isLofNagnag': None, 'transcriptRank': 8, 'biotype': 'nonsense_mediated_decay', 'lofFilters': None, 'majorConsequence': '3_prime_UTR_variant'},
-           {'aminoAcids': None, 'canonical': None, 'codons': None, 'geneId': 'ENSG00000177000', 'hgvsc': 'ENST00000641759.1:n.1655A>C', 'hgvsp': None, 'transcriptId': 'ENST00000641759', 'isLofNagnag': None, 'transcriptRank': 9, 'biotype': 'retained_intron', 'lofFilters': None, 'majorConsequence': 'non_coding_transcript_exon_variant'},
-           {'aminoAcids': None, 'canonical': None, 'codons': None, 'geneId': 'ENSG00000177000', 'hgvsc': 'ENST00000641805.1:n.1803A>C', 'hgvsp': None, 'transcriptId': 'ENST00000641805', 'isLofNagnag': None, 'transcriptRank': 10, 'biotype': 'retained_intron', 'lofFilters': None, 'majorConsequence': 'non_coding_transcript_exon_variant'},
+           {'aminoAcids': 'E/A', 'canonical': None, 'codons': 'gAa/gCa', 'geneId': 'ENSG00000177000', 'hgvsc': 'ENST00000641446.1:c.1286A>C', 'hgvsp': 'ENSP00000493262.1:p.Glu429Ala', 'transcriptId': 'ENST00000641446', 'isLofNagnag': None, 'transcriptRank': 6, 'biotype': 'nonsense_mediated_decay', 'lofFilters': None, 'majorConsequence': 'missense_variant'},
+           {'aminoAcids': None, 'canonical': None, 'codons': None, 'geneId': 'ENSG00000177000', 'hgvsc': 'ENST00000641747.1:c.*798A>C', 'hgvsp': None, 'transcriptId': 'ENST00000641747', 'isLofNagnag': None, 'transcriptRank': 7, 'biotype': 'nonsense_mediated_decay', 'lofFilters': None, 'majorConsequence': '3_prime_UTR_variant'},
+           {'aminoAcids': None, 'canonical': None, 'codons': None, 'geneId': 'ENSG00000177000', 'hgvsc': 'ENST00000641759.1:n.1655A>C', 'hgvsp': None, 'transcriptId': 'ENST00000641759', 'isLofNagnag': None, 'transcriptRank': 8, 'biotype': 'retained_intron', 'lofFilters': None, 'majorConsequence': 'non_coding_transcript_exon_variant'},
+           {'aminoAcids': None, 'canonical': None, 'codons': None, 'geneId': 'ENSG00000177000', 'hgvsc': 'ENST00000641805.1:n.1803A>C', 'hgvsp': None, 'transcriptId': 'ENST00000641805', 'isLofNagnag': None, 'transcriptRank': 9, 'biotype': 'retained_intron', 'lofFilters': None, 'majorConsequence': 'non_coding_transcript_exon_variant'},
        ],
        'ENSG00000277258': [
            {'aminoAcids': 'E/A', 'canonical': None, 'codons': 'gAa/gCa', 'geneId': 'ENSG00000277258', 'hgvsc': 'ENST00000641820.1:c.551A>C', 'hgvsp': 'ENSP00000492937.1:p.Glu184Ala', 'transcriptId': 'ENST00000641820', 'isLofNagnag': None, 'transcriptRank': 0, 'biotype': 'protein_coding', 'lofFilters': None, 'majorConsequence': 'missense_variant'},
@@ -303,7 +303,7 @@
             {'aminoAcids': None, 'canonical': None, 'codons': None, 'geneId': 'ENSG00000097046', 'hgvsc': 'ENST00000426137.1:c.115+890G>A', 'hgvsp': None, 'transcriptId': 'ENST00000426137', 'isLofNagnag': None, 'transcriptRank': 2, 'biotype': 'protein_coding', 'lofFilters': None, 'majorConsequence': 'non_coding_transcript_exon_variant'},
         ],
         'ENSG00000177000': [
-            {'aminoAcids': None, 'canonical': None, 'codons': None, 'geneId': 'ENSG00000177000', 'hgvsc': 'ENST00000497611.1:n.501+890G>A', 'hgvsp': None, 'transcriptId': 'ENST00000497611', 'isLofNagnag': None, 'transcriptRank': 3, 'biotype': 'processed_transcript', 'lofFilters': None, 'majorConsequence': 'intron_variant'},
+            {'aminoAcids': None, 'canonical': None, 'codons': None, 'geneId': 'ENSG00000177000', 'hgvsc': 'ENST00000497611.1:n.501+890G>A', 'hgvsp': None, 'transcriptId': 'ENST00000497611', 'isLofNagnag': None, 'transcriptRank': 0, 'biotype': 'processed_transcript', 'lofFilters': None, 'majorConsequence': 'intron_variant'},
         ],
     },
     'mainTranscriptId': 'ENST00000428239',

From c6cc0fc6c75e9fd899474e6fc9b184df60972b43 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Thu, 6 Jun 2024 16:32:09 -0400
Subject: [PATCH 44/47] swap snv_indel search class inheritance

---
 hail_search/queries/base.py         |   2 +-
 hail_search/queries/snv_indel.py    | 119 +++-------------------------
 hail_search/queries/snv_indel_37.py | 117 +++++++++++++++++++++++++--
 3 files changed, 120 insertions(+), 118 deletions(-)

diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py
index efacff45bd..424bea3c55 100644
--- a/hail_search/queries/base.py
+++ b/hail_search/queries/base.py
@@ -642,7 +642,7 @@ def _parse_intervals(self, intervals, gene_ids=None, **kwargs):
         return parsed_intervals
 
     def _should_add_chr_prefix(self):
-        return True
+        return self.GENOME_VERSION == GENOME_VERSION_GRCh38
 
     def _filter_by_frequency(self, ht, frequencies, pathogenicity):
         frequencies = {k: v for k, v in (frequencies or {}).items() if k in self.POPULATIONS}
diff --git a/hail_search/queries/snv_indel.py b/hail_search/queries/snv_indel.py
index a95890e038..6567003bb3 100644
--- a/hail_search/queries/snv_indel.py
+++ b/hail_search/queries/snv_indel.py
@@ -1,79 +1,22 @@
 from collections import OrderedDict
-import hail as hl
 
-from hail_search.constants import CLINVAR_KEY, CLINVAR_MITO_KEY, HGMD_KEY, HGMD_PATH_RANGES, \
-    GNOMAD_GENOMES_FIELD, PREFILTER_FREQ_CUTOFF, PATH_FREQ_OVERRIDE_CUTOFF, PATHOGENICTY_SORT_KEY, PATHOGENICTY_HGMD_SORT_KEY, \
-    SCREEN_KEY, SPLICE_AI_FIELD
-from hail_search.queries.base import PredictionPath, QualityFilterFormat
-from hail_search.queries.mito import MitoHailTableQuery
+from hail_search.constants import GENOME_VERSION_GRCh38, SCREEN_KEY, PREFILTER_FREQ_CUTOFF
+from hail_search.queries.base import BaseHailTableQuery, PredictionPath
+from hail_search.queries.snv_indel_37 import SnvIndelHailTableQuery37
 
 
-class SnvIndelHailTableQuery(MitoHailTableQuery):
+class SnvIndelHailTableQuery(SnvIndelHailTableQuery37):
 
-    DATA_TYPE = 'SNV_INDEL'
-
-    GENOTYPE_FIELDS = {f.lower(): f for f in ['DP', 'GQ', 'AB']}
-    QUALITY_FILTER_FORMAT = {
-        'AB': QualityFilterFormat(override=lambda gt: ~gt.GT.is_het(), scale=100),
-    }
-    POPULATIONS = {
-        'seqr': {'hom': 'hom', 'hemi': None, 'het': None, 'sort': 'callset_af'},
-        'topmed': {'hemi': None},
-        'exac': {
-            'filter_af': 'AF_POPMAX', 'ac': 'AC_Adj', 'an': 'AN_Adj', 'hom': 'AC_Hom', 'hemi': 'AC_Hemi',
-            'het': 'AC_Het',
-        },
-        'gnomad_exomes': {'filter_af': 'AF_POPMAX_OR_GLOBAL', 'het': None, 'sort': 'gnomad_exomes'},
-        GNOMAD_GENOMES_FIELD: {'filter_af': 'AF_POPMAX_OR_GLOBAL', 'het': None, 'sort': 'gnomad'},
-    }
-    PREDICTION_FIELDS_CONFIG_ALL_BUILDS = {
-        'cadd': PredictionPath('cadd', 'PHRED'),
-        'eigen': PredictionPath('eigen', 'Eigen_phred'),
-        'mpc': PredictionPath('mpc', 'MPC'),
-        'primate_ai': PredictionPath('primate_ai', 'score'),
-        SPLICE_AI_FIELD: PredictionPath(SPLICE_AI_FIELD, 'delta_score'),
-        'splice_ai_consequence': PredictionPath(SPLICE_AI_FIELD, 'splice_consequence'),
-        'mut_taster': PredictionPath('dbnsfp', 'MutationTaster_pred'),
-        'polyphen': PredictionPath('dbnsfp', 'Polyphen2_HVAR_score'),
-        'revel': PredictionPath('dbnsfp', 'REVEL_score'),
-        'sift': PredictionPath('dbnsfp', 'SIFT_score'),
-    }
-    PREDICTION_FIELDS_CONFIG_38 = {
+    GENOME_VERSION = GENOME_VERSION_GRCh38
+    PREDICTION_FIELDS_CONFIG = {
+        **SnvIndelHailTableQuery37.PREDICTION_FIELDS_CONFIG,
         'fathmm': PredictionPath('dbnsfp', 'fathmm_MKL_coding_score'),
         'mut_pred': PredictionPath('dbnsfp', 'MutPred_score'),
         'vest': PredictionPath('dbnsfp', 'VEST4_score'),
         'gnomad_noncoding': PredictionPath('gnomad_non_coding_constraint', 'z_score'),
     }
-    PREDICTION_FIELDS_CONFIG = {
-        **PREDICTION_FIELDS_CONFIG_ALL_BUILDS,
-        **PREDICTION_FIELDS_CONFIG_38
-    }
-    PATHOGENICITY_FILTERS = {
-        **MitoHailTableQuery.PATHOGENICITY_FILTERS,
-        HGMD_KEY: ('class', HGMD_PATH_RANGES),
-    }
-    PATHOGENICITY_FIELD_MAP = {}
-    ANNOTATION_OVERRIDE_FIELDS = [SPLICE_AI_FIELD, SCREEN_KEY]
-
-    BASE_ANNOTATION_FIELDS = {
-        k: v for k, v in MitoHailTableQuery.BASE_ANNOTATION_FIELDS.items()
-        if k not in MitoHailTableQuery.MITO_ANNOTATION_FIELDS
-    }
-    ENUM_ANNOTATION_FIELDS = {
-        **MitoHailTableQuery.ENUM_ANNOTATION_FIELDS,
-        'screen': {
-            'response_key': 'screenRegionType',
-            'format_value': lambda value: value.region_types.first(),
-        },
-    }
-    ENUM_ANNOTATION_FIELDS[CLINVAR_KEY] = ENUM_ANNOTATION_FIELDS.pop(CLINVAR_MITO_KEY)
-
-    SORTS = {
-        **MitoHailTableQuery.SORTS,
-        PATHOGENICTY_SORT_KEY: lambda r: [MitoHailTableQuery.CLINVAR_SORT(CLINVAR_KEY, r)],
-        PATHOGENICTY_HGMD_SORT_KEY: lambda r: [MitoHailTableQuery.CLINVAR_SORT(CLINVAR_KEY, r), r.hgmd.class_id],
-    }
-
+    LIFTOVER_ANNOTATION_FIELDS = BaseHailTableQuery.LIFTOVER_ANNOTATION_FIELDS
+    ANNOTATION_OVERRIDE_FIELDS = SnvIndelHailTableQuery37.ANNOTATION_OVERRIDE_FIELDS + [SCREEN_KEY]
     FREQUENCY_PREFILTER_FIELDS = OrderedDict([
         (True, PREFILTER_FREQ_CUTOFF),
         ('is_gt_3_percent', 0.03),
@@ -81,55 +24,11 @@ class SnvIndelHailTableQuery(MitoHailTableQuery):
         ('is_gt_10_percent', 0.1),
     ])
 
-    def _prefilter_entries_table(self, ht, *args, **kwargs):
-        ht = super()._prefilter_entries_table(ht, *args, **kwargs)
-        if 'variant_ht' not in self._load_table_kwargs and not self._load_table_kwargs.get('_filter_intervals'):
-            af_ht = self._get_loaded_filter_ht(
-                GNOMAD_GENOMES_FIELD, 'high_af_variants.ht', self._get_gnomad_af_prefilter, **kwargs)
-            if af_ht:
-                ht = ht.filter(hl.is_missing(af_ht[ht.key]))
-        return ht
-
-    def _get_gnomad_af_prefilter(self, frequencies=None, pathogenicity=None, **kwargs):
-        gnomad_genomes_filter = (frequencies or {}).get(GNOMAD_GENOMES_FIELD, {})
-        af_cutoff = gnomad_genomes_filter.get('af')
-        if af_cutoff is None and gnomad_genomes_filter.get('ac') is not None:
-            af_cutoff = PREFILTER_FREQ_CUTOFF
-        if af_cutoff is None:
-            return False
-
-        af_cutoff_field = self._get_af_prefilter_field(af_cutoff)
-        if af_cutoff_field is None:
-            return False
-
-        af_filter = True if af_cutoff_field is True else lambda ht: ht[af_cutoff_field]
-
-        if af_cutoff < PATH_FREQ_OVERRIDE_CUTOFF:
-            clinvar_path_ht = self._get_loaded_clinvar_prefilter_ht(pathogenicity)
-            if clinvar_path_ht is not False:
-                path_cutoff_field = self._get_af_prefilter_field(PATH_FREQ_OVERRIDE_CUTOFF)
-                non_clinvar_filter = lambda ht: hl.is_missing(clinvar_path_ht[ht.key])
-                if af_filter is not True:
-                    non_clinvar_filter = lambda ht: non_clinvar_filter(ht) & af_filter(ht)
-                af_filter = lambda ht: ht[path_cutoff_field] | non_clinvar_filter(ht)
-
-        return af_filter
-
-    def _get_af_prefilter_field(self, af_cutoff):
-        return next((field for field, cutoff in self.FREQUENCY_PREFILTER_FIELDS.items() if af_cutoff <= cutoff), None)
-
     def _get_annotation_override_filters(self, ht, annotation_overrides):
         annotation_filters = super()._get_annotation_override_filters(ht, annotation_overrides)
 
         if annotation_overrides.get(SCREEN_KEY):
             allowed_consequences = hl.set(self._get_enum_terms_ids(SCREEN_KEY.lower(), 'region_type', annotation_overrides[SCREEN_KEY]))
             annotation_filters.append(allowed_consequences.contains(ht.screen.region_type_ids.first()))
-        if annotation_overrides.get(SPLICE_AI_FIELD):
-            score_filter, _ = self._get_in_silico_filter(ht, SPLICE_AI_FIELD, annotation_overrides[SPLICE_AI_FIELD])
-            annotation_filters.append(score_filter)
 
         return annotation_filters
-
-    @staticmethod
-    def _stat_has_non_ref(s):
-        return (s.het_samples > 0) | (s.hom_samples > 0)
diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py
index d43b92cbe6..df0015e151 100644
--- a/hail_search/queries/snv_indel_37.py
+++ b/hail_search/queries/snv_indel_37.py
@@ -1,19 +1,122 @@
 from collections import OrderedDict
+import hail as hl
 
-from hail_search.constants import GENOME_VERSION_GRCh37, PREFILTER_FREQ_CUTOFF
-from hail_search.queries.snv_indel import SnvIndelHailTableQuery
+from hail_search.constants import CLINVAR_KEY, CLINVAR_MITO_KEY, HGMD_KEY, HGMD_PATH_RANGES, \
+    GNOMAD_GENOMES_FIELD, PREFILTER_FREQ_CUTOFF, PATH_FREQ_OVERRIDE_CUTOFF, PATHOGENICTY_SORT_KEY, PATHOGENICTY_HGMD_SORT_KEY, \
+    SPLICE_AI_FIELD, GENOME_VERSION_GRCh37
+from hail_search.queries.base import PredictionPath, QualityFilterFormat
+from hail_search.queries.mito import MitoHailTableQuery
 
 
-class SnvIndelHailTableQuery37(SnvIndelHailTableQuery):
+class SnvIndelHailTableQuery37(MitoHailTableQuery):
 
+    DATA_TYPE = 'SNV_INDEL'
     GENOME_VERSION = GENOME_VERSION_GRCh37
-    PREDICTION_FIELDS_CONFIG = SnvIndelHailTableQuery.PREDICTION_FIELDS_CONFIG_ALL_BUILDS
+
+    GENOTYPE_FIELDS = {f.lower(): f for f in ['DP', 'GQ', 'AB']}
+    QUALITY_FILTER_FORMAT = {
+        'AB': QualityFilterFormat(override=lambda gt: ~gt.GT.is_het(), scale=100),
+    }
+    POPULATIONS = {
+        'seqr': {'hom': 'hom', 'hemi': None, 'het': None, 'sort': 'callset_af'},
+        'topmed': {'hemi': None},
+        'exac': {
+            'filter_af': 'AF_POPMAX', 'ac': 'AC_Adj', 'an': 'AN_Adj', 'hom': 'AC_Hom', 'hemi': 'AC_Hemi',
+            'het': 'AC_Het',
+        },
+        'gnomad_exomes': {'filter_af': 'AF_POPMAX_OR_GLOBAL', 'het': None, 'sort': 'gnomad_exomes'},
+        GNOMAD_GENOMES_FIELD: {'filter_af': 'AF_POPMAX_OR_GLOBAL', 'het': None, 'sort': 'gnomad'},
+    }
+    PREDICTION_FIELDS_CONFIG = {
+        'cadd': PredictionPath('cadd', 'PHRED'),
+        'eigen': PredictionPath('eigen', 'Eigen_phred'),
+        'mpc': PredictionPath('mpc', 'MPC'),
+        'primate_ai': PredictionPath('primate_ai', 'score'),
+        SPLICE_AI_FIELD: PredictionPath(SPLICE_AI_FIELD, 'delta_score'),
+        'splice_ai_consequence': PredictionPath(SPLICE_AI_FIELD, 'splice_consequence'),
+        'mut_taster': PredictionPath('dbnsfp', 'MutationTaster_pred'),
+        'polyphen': PredictionPath('dbnsfp', 'Polyphen2_HVAR_score'),
+        'revel': PredictionPath('dbnsfp', 'REVEL_score'),
+        'sift': PredictionPath('dbnsfp', 'SIFT_score'),
+    }
+    PATHOGENICITY_FILTERS = {
+        **MitoHailTableQuery.PATHOGENICITY_FILTERS,
+        HGMD_KEY: ('class', HGMD_PATH_RANGES),
+    }
+    PATHOGENICITY_FIELD_MAP = {}
+    ANNOTATION_OVERRIDE_FIELDS = [SPLICE_AI_FIELD]
+
     LIFTOVER_ANNOTATION_FIELDS = {}
-    ANNOTATION_OVERRIDE_FIELDS = SnvIndelHailTableQuery.ANNOTATION_OVERRIDE_FIELDS[:-1]
+    BASE_ANNOTATION_FIELDS = {
+        k: v for k, v in MitoHailTableQuery.BASE_ANNOTATION_FIELDS.items()
+        if k not in MitoHailTableQuery.MITO_ANNOTATION_FIELDS
+    }
+    ENUM_ANNOTATION_FIELDS = {
+        **MitoHailTableQuery.ENUM_ANNOTATION_FIELDS,
+        'screen': {
+            'response_key': 'screenRegionType',
+            'format_value': lambda value: value.region_types.first(),
+        },
+    }
+    ENUM_ANNOTATION_FIELDS[CLINVAR_KEY] = ENUM_ANNOTATION_FIELDS.pop(CLINVAR_MITO_KEY)
+
+    SORTS = {
+        **MitoHailTableQuery.SORTS,
+        PATHOGENICTY_SORT_KEY: lambda r: [MitoHailTableQuery.CLINVAR_SORT(CLINVAR_KEY, r)],
+        PATHOGENICTY_HGMD_SORT_KEY: lambda r: [MitoHailTableQuery.CLINVAR_SORT(CLINVAR_KEY, r), r.hgmd.class_id],
+    }
+
     FREQUENCY_PREFILTER_FIELDS = OrderedDict([
         (True, PREFILTER_FREQ_CUTOFF),
         ('is_gt_10_percent', 0.1),
     ])
 
-    def _should_add_chr_prefix(self):
-        return False
+    def _prefilter_entries_table(self, ht, *args, **kwargs):
+        ht = super()._prefilter_entries_table(ht, *args, **kwargs)
+        if 'variant_ht' not in self._load_table_kwargs and not self._load_table_kwargs.get('_filter_intervals'):
+            af_ht = self._get_loaded_filter_ht(
+                GNOMAD_GENOMES_FIELD, 'high_af_variants.ht', self._get_gnomad_af_prefilter, **kwargs)
+            if af_ht:
+                ht = ht.filter(hl.is_missing(af_ht[ht.key]))
+        return ht
+
+    def _get_gnomad_af_prefilter(self, frequencies=None, pathogenicity=None, **kwargs):
+        gnomad_genomes_filter = (frequencies or {}).get(GNOMAD_GENOMES_FIELD, {})
+        af_cutoff = gnomad_genomes_filter.get('af')
+        if af_cutoff is None and gnomad_genomes_filter.get('ac') is not None:
+            af_cutoff = PREFILTER_FREQ_CUTOFF
+        if af_cutoff is None:
+            return False
+
+        af_cutoff_field = self._get_af_prefilter_field(af_cutoff)
+        if af_cutoff_field is None:
+            return False
+
+        af_filter = True if af_cutoff_field is True else lambda ht: ht[af_cutoff_field]
+
+        if af_cutoff < PATH_FREQ_OVERRIDE_CUTOFF:
+            clinvar_path_ht = self._get_loaded_clinvar_prefilter_ht(pathogenicity)
+            if clinvar_path_ht is not False:
+                path_cutoff_field = self._get_af_prefilter_field(PATH_FREQ_OVERRIDE_CUTOFF)
+                non_clinvar_filter = lambda ht: hl.is_missing(clinvar_path_ht[ht.key])
+                if af_filter is not True:
+                    non_clinvar_filter = lambda ht: non_clinvar_filter(ht) & af_filter(ht)
+                af_filter = lambda ht: ht[path_cutoff_field] | non_clinvar_filter(ht)
+
+        return af_filter
+
+    def _get_af_prefilter_field(self, af_cutoff):
+        return next((field for field, cutoff in self.FREQUENCY_PREFILTER_FIELDS.items() if af_cutoff <= cutoff), None)
+
+    def _get_annotation_override_filters(self, ht, annotation_overrides):
+        annotation_filters = super()._get_annotation_override_filters(ht, annotation_overrides)
+
+        if annotation_overrides.get(SPLICE_AI_FIELD):
+            score_filter, _ = self._get_in_silico_filter(ht, SPLICE_AI_FIELD, annotation_overrides[SPLICE_AI_FIELD])
+            annotation_filters.append(score_filter)
+
+        return annotation_filters
+
+    @staticmethod
+    def _stat_has_non_ref(s):
+        return (s.het_samples > 0) | (s.hom_samples > 0)

From 49de202e3edf1b18f39086aa86872158591947f8 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 7 Jun 2024 10:23:36 -0400
Subject: [PATCH 45/47] add missing import

---
 hail_search/queries/snv_indel.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hail_search/queries/snv_indel.py b/hail_search/queries/snv_indel.py
index 6567003bb3..ad56025dfd 100644
--- a/hail_search/queries/snv_indel.py
+++ b/hail_search/queries/snv_indel.py
@@ -1,4 +1,5 @@
 from collections import OrderedDict
+import hail as hl
 
 from hail_search.constants import GENOME_VERSION_GRCh38, SCREEN_KEY, PREFILTER_FREQ_CUTOFF
 from hail_search.queries.base import BaseHailTableQuery, PredictionPath

From f8acf164ef2328d1cc5746b55e2100ab5c2d15af Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 7 Jun 2024 17:13:29 -0400
Subject: [PATCH 46/47] fix conditional

---
 ui/shared/components/panel/variants/Transcripts.jsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx
index 0449243985..353e9854d5 100644
--- a/ui/shared/components/panel/variants/Transcripts.jsx
+++ b/ui/shared/components/panel/variants/Transcripts.jsx
@@ -136,9 +136,9 @@ const transcriptIdDetails = (transcript, variant, { transcriptsById, project, up
   </div>
 )
 
-export const ExtendedSpliceLabel = ({ spliceregion }) => spliceregion?.extended_intronic_splice_region_variant && (
+export const ExtendedSpliceLabel = ({ spliceregion }) => (spliceregion?.extended_intronic_splice_region_variant ? (
   <Label size="small" horizontal color="yellow" content="Extended Intronic Splice Region" />
-)
+) : null)
 
 ExtendedSpliceLabel.propTypes = {
   spliceregion: PropTypes.object,

From 163215b796a96b107c384c974408300b1f29ba68 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Tue, 11 Jun 2024 11:50:34 -0400
Subject: [PATCH 47/47] bump changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d52b04bd41..984ec30bbb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,8 @@
 # _seqr_ Changes
 
 ## dev
+
+## 6/11/24
 * Add "Partial Phenotype Contribution" functional tag (REQUIRES DB MIGRATION)
 
 ## 5/24/24