From 831cb1ced15193b2bccd9aede9b638e87f2d98c7 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 30 May 2024 17:43:30 -0400 Subject: [PATCH 01/47] add functional tag --- ...ariantfunctionaldata_functional_data_tag.py | 18 ++++++++++++++++++ seqr/models.py | 4 ++++ 2 files changed, 22 insertions(+) create mode 100644 seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py diff --git a/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py b/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py new file mode 100644 index 0000000000..027652323a --- /dev/null +++ b/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.23 on 2024-05-30 21:41 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('seqr', '0066_family_post_discovery_mondo_id'), + ] + + operations = [ + migrations.AlterField( + model_name='variantfunctionaldata', + name='functional_data_tag', + field=models.TextField(choices=[('Functional Data', (('Biochemical Function', '{"description": "Gene product performs a biochemical function shared with other known genes in the disease of interest, or consistent with the phenotype.", "color": "#311B92"}'), ('Protein Interaction', '{"description": "Gene product interacts with proteins previously implicated (genetically or biochemically) in the disease of interest.", "color": "#4A148C"}'), ('Expression', '{"description": "Gene is expressed in tissues relevant to the disease of interest and/or is altered in expression in patients who have the disease.", "color": "#7C4DFF"}'), ('Patient Cells', '{"description": "Gene and/or gene product function is demonstrably altered in patients carrying candidate mutations.", "color": "#B388FF"}'), ('Non-patient cells', '{"description": "Gene and/or gene product function is demonstrably altered in human cell culture models carrying candidate mutations.", "color": "#9575CD"}'), ('Animal Model', '{"description": "Non-human animal models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#AA00FF"}'), ('Non-human cell culture model', '{"description": "Non-human cell-culture models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#BA68C8"}'), ('Rescue', '{"description": "The cellular phenotype in patient-derived cells or engineered equivalents can be rescued by addition of the wild-type gene product.", "color": "#663399"}'))), ('Functional Scores', (('Genome-wide Linkage', '{"metadata_title": "LOD Score", "description": "Max LOD score used in analysis to restrict where you looked for causal variants; provide best score available, whether it be a cumulative LOD score across multiple families or just the best family\'s LOD score.", "color": "#880E4F"}'), ('Bonferroni corrected p-value', '{"metadata_title": "P-value", "description": "Bonferroni-corrected p-value for gene if association testing/burden testing/etc was used to identify the gene.", "color": "#E91E63"}'), ('Kindreds w/ Overlapping SV & Similar Phenotype', '{"metadata_title": "#", "description": "Number of kindreds (1+) previously reported/in databases as having structural variant overlapping the gene and a similar phenotype.", "color": "#FF5252"}'))), ('Additional Kindreds (Literature, MME)', (('Additional Unrelated Kindreds w/ Causal Variants in Gene', '{"metadata_title": "# additional families", "description": "Number of additional kindreds with causal variants in this gene (Any other kindreds from collaborators, MME, literature etc). Do not count your family in this total.", "color": "#D84315"}'),)), ('Additional Information', (('Incomplete Penetrance', '{"description": "Variant has been shown to be disease-causing (in literature, functional studies, etc.) but one or more individuals in this family with the variant do not present with clinical features of the disorder.", "color": "#E985DC"}'), ('Partial Phenotype Contribution', '{"description": "Variant is believed to be part but not all of the solve, explaining only some of the phenotypes.", "color": "#1F42D9"}')))]), + ), + ] diff --git a/seqr/models.py b/seqr/models.py index 945cf17894..d04c4a0258 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -935,6 +935,10 @@ class VariantFunctionalData(ModelWithGUID): 'description': 'Variant has been shown to be disease-causing (in literature, functional studies, etc.) but one or more individuals in this family with the variant do not present with clinical features of the disorder.', 'color': '#E985DC', })), + ('Partial Phenotype Contribution', json.dumps({ + 'description': 'Variant is believed to be part but not all of the solve, explaining only some of the phenotypes.', + 'color': '#1F42D9', + })), )), ) From 0c98322dc475af5de0ca70fc23bab714f5a1cb3a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 30 May 2024 17:52:40 -0400 Subject: [PATCH 02/47] add metadata title --- .../0067_alter_variantfunctionaldata_functional_data_tag.py | 4 ++-- seqr/models.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py b/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py index 027652323a..e8f2e6358a 100644 --- a/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py +++ b/seqr/migrations/0067_alter_variantfunctionaldata_functional_data_tag.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.23 on 2024-05-30 21:41 +# Generated by Django 3.2.23 on 2024-05-30 21:51 from django.db import migrations, models @@ -13,6 +13,6 @@ class Migration(migrations.Migration): migrations.AlterField( model_name='variantfunctionaldata', name='functional_data_tag', - field=models.TextField(choices=[('Functional Data', (('Biochemical Function', '{"description": "Gene product performs a biochemical function shared with other known genes in the disease of interest, or consistent with the phenotype.", "color": "#311B92"}'), ('Protein Interaction', '{"description": "Gene product interacts with proteins previously implicated (genetically or biochemically) in the disease of interest.", "color": "#4A148C"}'), ('Expression', '{"description": "Gene is expressed in tissues relevant to the disease of interest and/or is altered in expression in patients who have the disease.", "color": "#7C4DFF"}'), ('Patient Cells', '{"description": "Gene and/or gene product function is demonstrably altered in patients carrying candidate mutations.", "color": "#B388FF"}'), ('Non-patient cells', '{"description": "Gene and/or gene product function is demonstrably altered in human cell culture models carrying candidate mutations.", "color": "#9575CD"}'), ('Animal Model', '{"description": "Non-human animal models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#AA00FF"}'), ('Non-human cell culture model', '{"description": "Non-human cell-culture models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#BA68C8"}'), ('Rescue', '{"description": "The cellular phenotype in patient-derived cells or engineered equivalents can be rescued by addition of the wild-type gene product.", "color": "#663399"}'))), ('Functional Scores', (('Genome-wide Linkage', '{"metadata_title": "LOD Score", "description": "Max LOD score used in analysis to restrict where you looked for causal variants; provide best score available, whether it be a cumulative LOD score across multiple families or just the best family\'s LOD score.", "color": "#880E4F"}'), ('Bonferroni corrected p-value', '{"metadata_title": "P-value", "description": "Bonferroni-corrected p-value for gene if association testing/burden testing/etc was used to identify the gene.", "color": "#E91E63"}'), ('Kindreds w/ Overlapping SV & Similar Phenotype', '{"metadata_title": "#", "description": "Number of kindreds (1+) previously reported/in databases as having structural variant overlapping the gene and a similar phenotype.", "color": "#FF5252"}'))), ('Additional Kindreds (Literature, MME)', (('Additional Unrelated Kindreds w/ Causal Variants in Gene', '{"metadata_title": "# additional families", "description": "Number of additional kindreds with causal variants in this gene (Any other kindreds from collaborators, MME, literature etc). Do not count your family in this total.", "color": "#D84315"}'),)), ('Additional Information', (('Incomplete Penetrance', '{"description": "Variant has been shown to be disease-causing (in literature, functional studies, etc.) but one or more individuals in this family with the variant do not present with clinical features of the disorder.", "color": "#E985DC"}'), ('Partial Phenotype Contribution', '{"description": "Variant is believed to be part but not all of the solve, explaining only some of the phenotypes.", "color": "#1F42D9"}')))]), + field=models.TextField(choices=[('Functional Data', (('Biochemical Function', '{"description": "Gene product performs a biochemical function shared with other known genes in the disease of interest, or consistent with the phenotype.", "color": "#311B92"}'), ('Protein Interaction', '{"description": "Gene product interacts with proteins previously implicated (genetically or biochemically) in the disease of interest.", "color": "#4A148C"}'), ('Expression', '{"description": "Gene is expressed in tissues relevant to the disease of interest and/or is altered in expression in patients who have the disease.", "color": "#7C4DFF"}'), ('Patient Cells', '{"description": "Gene and/or gene product function is demonstrably altered in patients carrying candidate mutations.", "color": "#B388FF"}'), ('Non-patient cells', '{"description": "Gene and/or gene product function is demonstrably altered in human cell culture models carrying candidate mutations.", "color": "#9575CD"}'), ('Animal Model', '{"description": "Non-human animal models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#AA00FF"}'), ('Non-human cell culture model', '{"description": "Non-human cell-culture models with a similarly disrupted copy of the affected gene show a phenotype consistent with human disease state.", "color": "#BA68C8"}'), ('Rescue', '{"description": "The cellular phenotype in patient-derived cells or engineered equivalents can be rescued by addition of the wild-type gene product.", "color": "#663399"}'))), ('Functional Scores', (('Genome-wide Linkage', '{"metadata_title": "LOD Score", "description": "Max LOD score used in analysis to restrict where you looked for causal variants; provide best score available, whether it be a cumulative LOD score across multiple families or just the best family\'s LOD score.", "color": "#880E4F"}'), ('Bonferroni corrected p-value', '{"metadata_title": "P-value", "description": "Bonferroni-corrected p-value for gene if association testing/burden testing/etc was used to identify the gene.", "color": "#E91E63"}'), ('Kindreds w/ Overlapping SV & Similar Phenotype', '{"metadata_title": "#", "description": "Number of kindreds (1+) previously reported/in databases as having structural variant overlapping the gene and a similar phenotype.", "color": "#FF5252"}'))), ('Additional Kindreds (Literature, MME)', (('Additional Unrelated Kindreds w/ Causal Variants in Gene', '{"metadata_title": "# additional families", "description": "Number of additional kindreds with causal variants in this gene (Any other kindreds from collaborators, MME, literature etc). Do not count your family in this total.", "color": "#D84315"}'),)), ('Additional Information', (('Incomplete Penetrance', '{"description": "Variant has been shown to be disease-causing (in literature, functional studies, etc.) but one or more individuals in this family with the variant do not present with clinical features of the disorder.", "color": "#E985DC"}'), ('Partial Phenotype Contribution', '{"metadata_title": "HPO Terms", "description": "Variant is believed to be part of the solve, explaining only some of the phenotypes.", "color": "#1F42D9"}')))]), ), ] diff --git a/seqr/models.py b/seqr/models.py index d04c4a0258..1cd1f46b0e 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -936,7 +936,8 @@ class VariantFunctionalData(ModelWithGUID): 'color': '#E985DC', })), ('Partial Phenotype Contribution', json.dumps({ - 'description': 'Variant is believed to be part but not all of the solve, explaining only some of the phenotypes.', + 'metadata_title': 'HPO Terms', + 'description': 'Variant is believed to be part of the solve, explaining only some of the phenotypes.', 'color': '#1F42D9', })), )), From 323e8cd4c0d0020ea90cdc4cc88fbebc7468fdfb Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 11:30:41 -0400 Subject: [PATCH 03/47] delect HPO terms for phenotype functional tag --- seqr/views/utils/orm_to_json_utils.py | 3 +- ui/redux/selectors.js | 10 +++++++ .../panel/view-fields/TagFieldView.jsx | 28 ++++++++++++++++--- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py index daaa38d98a..67a943fc8f 100644 --- a/seqr/views/utils/orm_to_json_utils.py +++ b/seqr/views/utils/orm_to_json_utils.py @@ -441,8 +441,7 @@ def _format_functional_tags(tags): display_data = VariantFunctionalData.FUNCTIONAL_DATA_TAG_LOOKUP[name] tag.update({ 'name': name, - 'metadataTitle': display_data.get('metadata_title', 'Notes'), - 'color': display_data['color'], + **{k: display_data[k] for k in ['metadataTitle', 'color']}, }) return tags diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js index db076a4937..cf9a0a989d 100644 --- a/ui/redux/selectors.js +++ b/ui/redux/selectors.js @@ -425,6 +425,16 @@ export const getUserOptions = createSelector( ), ) +export const getHpoTermOptionsByFamily = createSelector( + getIndividualsByFamily, + individualsByFamily => Object.entries(individualsByFamily).reduce((acc, [familyGuid, individuals]) => ({ + ...acc, + [familyGuid]: individuals.reduce((fAcc, { features }) => ([...fAcc, ...(features || []).map( + ({ id, label }) => ({ value: id, text: label, description: id }), + )]), []), + }), {}), +) + export const getRnaSeqSignificantJunctionData = createSelector( getGenesById, getIndividualsByGuid, diff --git a/ui/shared/components/panel/view-fields/TagFieldView.jsx b/ui/shared/components/panel/view-fields/TagFieldView.jsx index 9b40e354a5..ab492b93e1 100644 --- a/ui/shared/components/panel/view-fields/TagFieldView.jsx +++ b/ui/shared/components/panel/view-fields/TagFieldView.jsx @@ -1,10 +1,12 @@ import React from 'react' +import { connect } from 'react-redux' import { NavLink } from 'react-router-dom' import PropTypes from 'prop-types' import styled from 'styled-components' import { Popup, Form } from 'semantic-ui-react' import { Field } from 'react-final-form' +import { getHpoTermOptionsByFamily } from 'redux/selectors' import { HorizontalSpacer } from '../../Spacers' import { ColoredLabel, ColoredOutlineLabel } from '../../StyledComponents' import { LargeMultiselect, Multiselect } from '../../form/Inputs' @@ -32,6 +34,15 @@ MultiselectField.propTypes = { input: PropTypes.object, } +const mapHpoDropdownStateToProps = (state, ownProps) => ({ + options: getHpoTermOptionsByFamily(state)[ownProps.metadataId], +}) + +const LIST_FORMAT_PROPS = { + format: val => (val || '').split(', ').filter(v => v), + parse: val => (val || []).join(', '), +} + const METADATA_FIELD_PROPS = { [NOTES_METADATA_TITLE]: { width: 16, maxLength: 50, placeholder: 'Enter up to 50 characters' }, Reason: { width: 16, maxLength: 50, placeholder: 'Brief reason for excluding. Enter up to 50 characters' }, @@ -43,12 +54,16 @@ const METADATA_FIELD_PROPS = { addValueOptions: true, options: ['Sanger', 'Segregation', 'SV', 'Splicing'].map(value => ({ value })), placeholder: 'Select test types or add your own', - format: val => (val || '').split(', ').filter(v => v), - parse: val => (val || []).join(', '), + ...LIST_FORMAT_PROPS, + }, + 'HPO Terms': { + width: 16, + component: connect(mapHpoDropdownStateToProps)(MultiselectField), + ...LIST_FORMAT_PROPS, }, } -const MetadataField = React.memo(({ value, name, error }) => { +const MetadataField = React.memo(({ value, name, error, metadataId }) => { if (!value.metadataTitle) { return null } @@ -62,6 +77,7 @@ const MetadataField = React.memo(({ value, name, error }) => { component={Form.Input} label={value.metadataTitle} error={error} + metadataId={metadataId} {...fieldProps} /> @@ -72,6 +88,7 @@ MetadataField.propTypes = { value: PropTypes.object, name: PropTypes.string, error: PropTypes.bool, + metadataId: PropTypes.string, } export const TagFieldDisplay = React.memo(({ @@ -129,6 +146,7 @@ class TagFieldView extends React.PureComponent { noEditTagTypes: PropTypes.arrayOf(PropTypes.string), linkTagType: PropTypes.string, tagLinkUrl: PropTypes.string, + modalId: PropTypes.string, } getSimplifiedProps() { @@ -199,7 +217,7 @@ class TagFieldView extends React.PureComponent { render() { const { - simplifiedValue, field, tagOptions, popup, tagAnnotation, validate, displayMetadata, ...props + simplifiedValue, field, tagOptions, popup, tagAnnotation, validate, displayMetadata, modalId, ...props } = this.props const additionalFields = tagOptions.some(({ metadataTitle }) => metadataTitle) ? [{ @@ -208,6 +226,7 @@ class TagFieldView extends React.PureComponent { isArrayField: true, validate: val => ((!val || !val.metadataTitle || val.metadataTitle === NOTES_METADATA_TITLE || val.metadata) ? undefined : 'Required'), component: MetadataField, + metadataId: modalId, }] : [] return ( @@ -216,6 +235,7 @@ class TagFieldView extends React.PureComponent { additionalEditFields={additionalFields} modalStyle={MODAL_STYLE} fieldDisplay={this.fieldDisplay} + modalId={modalId} {...props} {...(simplifiedValue ? this.getSimplifiedProps() : this.getMappedProps())} /> From 97499ea95fe862c1715337393838610873ec4700 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 11:52:08 -0400 Subject: [PATCH 04/47] add partial contribution to gregor report --- seqr/views/apis/report_api.py | 4 ++-- seqr/views/utils/anvil_metadata_utils.py | 7 ++++++- ui/pages/Report/components/VariantMetadata.jsx | 1 + 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 7316cd48b0..da586f4cdb 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -241,7 +241,7 @@ def _add_row(row, family_id, row_type): } GENETIC_FINDINGS_TABLE_COLUMNS = { 'chrom', 'pos', 'ref', 'alt', 'variant_type', 'variant_reference_assembly', GENE_COLUMN, 'transcript', 'hgvsc', 'hgvsp', - 'hgvs', 'sv_type', 'chrom_end', 'pos_end', 'copy_number', *FINDING_METADATA_COLUMNS[:4], 'phenotype_contribution', + 'hgvs', 'sv_type', 'chrom_end', 'pos_end', 'copy_number', *FINDING_METADATA_COLUMNS[:4], 'phenotype_contribution', 'partial_contribution_explained', 'genetic_findings_id', 'participant_id', 'experiment_id', 'zygosity', 'allele_balance_or_heteroplasmy_percentage', 'variant_inheritance', 'linked_variant', 'additional_family_members_with_variant', 'method_of_discovery', 'gene_disease_validity', @@ -379,7 +379,7 @@ def _add_row(row, family_id, row_type): elif row_type == DISCOVERY_ROW_TYPE and row: for variant in row: genetic_findings_rows.append({ - **variant, 'phenotype_contribution': 'Full', 'variant_type': 'SNV/INDEL', + **variant, 'variant_type': 'SNV/INDEL', }) parse_anvil_metadata( diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 8a8d0cdc95..b16a929dcb 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -328,7 +328,10 @@ def _get_parsed_saved_discovery_variants_by_family( project_saved_variants = SavedVariant.objects.filter( varianttag__variant_tag_type__in=tag_types, family__id__in=families, **(variant_filter or {}), - ).order_by('created_date').distinct().annotate(tags=ArrayAgg('varianttag__variant_tag_type__name', distinct=True)) + ).order_by('created_date').distinct().annotate( + tags=ArrayAgg('varianttag__variant_tag_type__name', distinct=True), + partial_hpo_terms=ArrayAgg('variantfunctionaldata__metadata', distinct=True, filter=Q(variantfunctionaldata__functional_data_tag='Partial Phenotype Contribution')), + ) variants = [] gene_ids = set() @@ -348,6 +351,8 @@ def _get_parsed_saved_discovery_variants_by_family( 'gene_ids': [gene_id] if gene_id else variant_json.get('transcripts', {}).keys(), 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), 'gene_known_for_phenotype': 'Known' if 'Known gene for phenotype' in variant.tags else 'Candidate', + 'phenotype_contribution': 'Partial' if variant.partial_hpo_terms else 'Full', + 'partial_contribution_explained': variant.partial_hpo_terms[0].replace(', ', '|') if variant.partial_hpo_terms else None, **{k: _get_transcript_field(k, config, main_transcript) for k, config in TRANSCRIPT_FIELDS.items()}, **{k: variant_json.get(k) for k in ['genotypes', 'svType', 'svName', 'end'] + (variant_json_fields or [])}, **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt', 'tags']}, diff --git a/ui/pages/Report/components/VariantMetadata.jsx b/ui/pages/Report/components/VariantMetadata.jsx index b09db6a2ab..7a2e7e03f6 100644 --- a/ui/pages/Report/components/VariantMetadata.jsx +++ b/ui/pages/Report/components/VariantMetadata.jsx @@ -19,6 +19,7 @@ const COLUMNS = [ { name: 'condition_id' }, { name: 'condition_inheritance' }, { name: 'phenotype_contribution' }, + { name: 'partial_contribution_explained' }, { name: 'additional_family_members_with_variant' }, { name: 'method_of_discovery' }, { name: 'Submitted to MME', format: ({ MME }) => (MME ? 'Yes' : 'No') }, From 64a11ff2435aa49ad33def5318f22be3fc2fbacd Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 11:57:23 -0400 Subject: [PATCH 05/47] support uncertain contribution --- seqr/views/utils/anvil_metadata_utils.py | 10 ++++++++-- ui/redux/selectors.js | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index b16a929dcb..3b4638b9ea 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -343,6 +343,12 @@ def _get_parsed_saved_discovery_variants_by_family( gene_id = main_transcript.get('geneId') gene_ids.add(gene_id) + partial_hpo_terms = variant.partial_hpo_terms[0] if variant.partial_hpo_terms else '' + phenotype_contribution = 'Partial' if partial_hpo_terms else 'Full' + if partial_hpo_terms == 'Uncertain': + phenotype_contribution = 'Uncertain' + partial_hpo_terms = '' + variants.append({ 'chrom': chrom, 'pos': pos, @@ -351,8 +357,8 @@ def _get_parsed_saved_discovery_variants_by_family( 'gene_ids': [gene_id] if gene_id else variant_json.get('transcripts', {}).keys(), 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), 'gene_known_for_phenotype': 'Known' if 'Known gene for phenotype' in variant.tags else 'Candidate', - 'phenotype_contribution': 'Partial' if variant.partial_hpo_terms else 'Full', - 'partial_contribution_explained': variant.partial_hpo_terms[0].replace(', ', '|') if variant.partial_hpo_terms else None, + 'phenotype_contribution': phenotype_contribution, + 'partial_contribution_explained': partial_hpo_terms.replace(', ', '|'), **{k: _get_transcript_field(k, config, main_transcript) for k, config in TRANSCRIPT_FIELDS.items()}, **{k: variant_json.get(k) for k in ['genotypes', 'svType', 'svName', 'end'] + (variant_json_fields or [])}, **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt', 'tags']}, diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js index cf9a0a989d..fbc57692b0 100644 --- a/ui/redux/selectors.js +++ b/ui/redux/selectors.js @@ -431,7 +431,7 @@ export const getHpoTermOptionsByFamily = createSelector( ...acc, [familyGuid]: individuals.reduce((fAcc, { features }) => ([...fAcc, ...(features || []).map( ({ id, label }) => ({ value: id, text: label, description: id }), - )]), []), + )]), [{ value: 'Uncertain' }]), }), {}), ) From ce901d0cc8681caf8bafc895e5737c585088eb90 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 12:02:19 -0400 Subject: [PATCH 06/47] include phenotype contribution in individual metadata --- ui/pages/Report/components/VariantMetadata.jsx | 2 -- ui/shared/utils/constants.js | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ui/pages/Report/components/VariantMetadata.jsx b/ui/pages/Report/components/VariantMetadata.jsx index 7a2e7e03f6..ee7fe71e10 100644 --- a/ui/pages/Report/components/VariantMetadata.jsx +++ b/ui/pages/Report/components/VariantMetadata.jsx @@ -18,8 +18,6 @@ const COLUMNS = [ { name: 'known_condition_name' }, { name: 'condition_id' }, { name: 'condition_inheritance' }, - { name: 'phenotype_contribution' }, - { name: 'partial_contribution_explained' }, { name: 'additional_family_members_with_variant' }, { name: 'method_of_discovery' }, { name: 'Submitted to MME', format: ({ MME }) => (MME ? 'Yes' : 'No') }, diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 4d107cc89a..78a34356c5 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1867,6 +1867,8 @@ export const VARIANT_METADATA_COLUMNS = [ { name: 'sv_type', fieldName: 'svType', format: ({ svType }) => SVTYPE_LOOKUP[svType] || svType }, { name: 'variant_inheritance' }, { name: 'gene_known_for_phenotype' }, + { name: 'phenotype_contribution' }, + { name: 'partial_contribution_explained' }, { name: 'notes' }, ] From 5fb0015cbb471e64a733d8e6b7cd2df61842763c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 12:32:57 -0400 Subject: [PATCH 07/47] add tests --- seqr/fixtures/report_variants.json | 26 +++++++++++++++++++++++ seqr/views/apis/report_api_tests.py | 9 +++++--- seqr/views/apis/summary_data_api_tests.py | 15 +++++++++---- 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/seqr/fixtures/report_variants.json b/seqr/fixtures/report_variants.json index bae02ef233..cb096d1e70 100644 --- a/seqr/fixtures/report_variants.json +++ b/seqr/fixtures/report_variants.json @@ -139,5 +139,31 @@ "variant_tag_type": 4, "search_hash": null } +}, +{ + "model": "seqr.variantfunctionaldata", + "pk": 29, + "fields": { + "guid": "VFD0000029_1248367227_r0390_10", + "created_date": "2018-05-24T15:34:01.353Z", + "created_by": null, + "last_modified_date": "2024-05-24T15:34:01.365Z", + "saved_variants": [6], + "functional_data_tag": "Partial Phenotype Contribution", + "metadata": "HP:0000501, HP:0000365" + } +}, +{ + "model": "seqr.variantfunctionaldata", + "pk": 30, + "fields": { + "guid": "VFD0000030_1248367227_r0390_10", + "created_date": "2018-05-24T15:34:01.353Z", + "created_by": null, + "last_modified_date": "2024-05-24T15:34:01.365Z", + "saved_variants": [2], + "functional_data_tag": "Partial Phenotype Contribution", + "metadata": "Uncertain" + } } ] \ No newline at end of file diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 6e3ef0ed8d..c99b6b84c1 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -508,6 +508,7 @@ 'method_of_discovery': 'SR-ES', 'notes': None, 'phenotype_contribution': 'Full', + 'partial_contribution_explained': '', 'phenotype_description': None, 'pmid_id': None, 'seqr_chosen_consequence': None, @@ -612,12 +613,12 @@ ], [ 'Broad_HG00731_1_248367227', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh37', '1', '248367227', 'TC', 'T', '', 'RP11', '', '', '', 'Homozygous', '', 'paternal', '', '', 'Known', '', - 'MONDO:0044970', '', 'Full', '', 'Broad_HG00732', 'SR-ES', '', '', '', '', '', '', '', + 'MONDO:0044970', '', 'Uncertain', '', 'Broad_HG00732', 'SR-ES', '', '', '', '', '', '', '', ], [ 'Broad_NA20889_1_248367227', 'Broad_NA20889', '', 'SNV/INDEL', 'GRCh37', '1', '248367227', 'TC', 'T', '', 'OR4G11P', 'ENST00000505820', 'c.3955G>A', 'c.1586-17C>G', 'Heterozygous', '', 'unknown', 'Broad_NA20889_1_249045487', '', 'Candidate', 'IRIDA syndrome', 'MONDO:0008788', 'Autosomal dominant', - 'Full', '', '', 'SR-ES', '', '', '', '', '', '', '', + 'Partial', 'HP:0000501|HP:0000365', '', 'SR-ES', '', '', '', '', '', '', '', ], [ 'Broad_NA20889_1_249045487', 'Broad_NA20889', '', 'SNV/INDEL', 'GRCh37', '1', '249045487', 'A', 'G', '', 'OR4G11P', '', '', '', 'Heterozygous', '', 'unknown', 'Broad_NA20889_1_248367227', '', 'Candidate', @@ -1219,7 +1220,7 @@ def test_variant_metadata(self): 'genetic_findings_id': 'HG00731_1_248367227', 'known_condition_name': 'mitochondrial disease', 'participant_id': 'HG00731', - 'phenotype_contribution': 'Full', + 'phenotype_contribution': 'Uncertain', 'phenotype_description': 'microcephaly; seizures', 'pos': 248367227, 'projectGuid': 'R0001_1kg', @@ -1290,6 +1291,8 @@ def test_variant_metadata(self): 'hgvsp': 'c.1586-17C>G', 'participant_id': 'NA20889', 'pos': 248367227, + 'partial_contribution_explained': 'HP:0000501|HP:0000365', + 'phenotype_contribution': 'Partial', 'projectGuid': 'R0003_test', 'internal_project_id': 'Test Reprocessed Project', 'ref': 'TC', diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 2c8a9a5354..96a6f5d580 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -107,6 +107,10 @@ 'notes-2': None, 'tags-1': ['Tier 1 - Novel gene and phenotype'], 'tags-2': ['Tier 1 - Novel gene and phenotype'], + 'phenotype_contribution-1': 'Partial', + 'phenotype_contribution-2': 'Full', + 'partial_contribution_explained-1': 'HP:0000501|HP:0000365', + 'partial_contribution_explained-2': '', 'condition_id': 'OMIM:616126', 'condition_inheritance': 'Autosomal recessive', 'known_condition_name': 'Immunodeficiency 38', @@ -153,6 +157,8 @@ 'chrom-1': '1', 'gene_known_for_phenotype-1': 'Candidate', 'tags-1': ['Tier 1 - Novel gene and phenotype'], + 'phenotype_contribution-1': 'Full', + 'partial_contribution_explained-1': '', 'pos-1': 248367227, 'end-1': None, 'ref-1': 'TC', @@ -352,7 +358,8 @@ def test_saved_variants_page(self): response = self.client.get('{}?gene=ENSG00000135953'.format(all_tag_url)) self.assertEqual(response.status_code, 200) expected_variant_guids.add('SV0000002_1248367227_r0390_100') - self.assertSetEqual(set(response.json()['savedVariantsByGuid'].keys()), expected_variant_guids) + report_variants = {'SV0027168_191912632_r0384_rare', 'SV0027167_191912633_r0384_rare', 'SV0027166_191912634_r0384_rare'} + self.assertSetEqual(set(response.json()['savedVariantsByGuid'].keys()), {*report_variants, *expected_variant_guids}) multi_tag_url = reverse(saved_variants_page, args=['Review;Tier 1 - Novel gene and phenotype']) response = self.client.get('{}?gene=ENSG00000135953'.format(multi_tag_url)) @@ -369,7 +376,7 @@ def test_saved_variants_page(self): self.assertEqual(response.status_code, 200) self.assertSetEqual(set(response.json()['savedVariantsByGuid'].keys()), { 'SV0000001_2103343353_r0390_100', 'SV0000002_1248367227_r0390_100', 'SV0000007_prefix_19107_DEL_r00', - 'SV0000006_1248367227_r0003_tes', + 'SV0000006_1248367227_r0003_tes', *report_variants, }) multi_discovery_tag_url = reverse(saved_variants_page, args=['CMG Discovery Tags;Review']) @@ -707,7 +714,7 @@ def test_sample_metadata_export(self, mock_google_authenticated): # Tests for AnVIL access disabled class LocalSummaryDataAPITest(AuthenticationTestCase, SummaryDataAPITest): - fixtures = ['users', '1kg_project', 'reference_data'] + fixtures = ['users', '1kg_project', 'reference_data', 'report_variants'] NUM_MANAGER_SUBMISSIONS = 4 ADDITIONAL_SAMPLES = ['NA21234', 'NA21987'] @@ -723,7 +730,7 @@ def assert_has_expected_calls(self, users, skip_group_call_idxs=None): # Test for permissions from AnVIL only class AnvilSummaryDataAPITest(AnvilAuthenticationTestCase, SummaryDataAPITest): - fixtures = ['users', 'social_auth', '1kg_project', 'reference_data'] + fixtures = ['users', 'social_auth', '1kg_project', 'reference_data', 'report_variants'] NUM_MANAGER_SUBMISSIONS = 4 ADDITIONAL_SAMPLES = [] From d9c53dabb4c23807fad51f40e9e660821f2abda1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 12:36:32 -0400 Subject: [PATCH 08/47] bump changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64626f8078..d52b04bd41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # _seqr_ Changes ## dev +* Add "Partial Phenotype Contribution" functional tag (REQUIRES DB MIGRATION) ## 5/24/24 * Adds external_data to Family model (REQUIRES DB MIGRATION) From e03b6bbe57d5284cf228c927c62c191d8fbe3685 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 12:41:08 -0400 Subject: [PATCH 09/47] updat eui tests --- .../components/IndividualMetadata.test.js | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/ui/pages/SummaryData/components/IndividualMetadata.test.js b/ui/pages/SummaryData/components/IndividualMetadata.test.js index 625bdf05e8..f2e4a34c7c 100644 --- a/ui/pages/SummaryData/components/IndividualMetadata.test.js +++ b/ui/pages/SummaryData/components/IndividualMetadata.test.js @@ -62,6 +62,10 @@ const DATA = [ participant_id: 'NA20889', individual_guid: 'I000017_na20889', proband_relationship: 'Self', + 'phenotype_contribution-1': 'Partial', + 'phenotype_contribution-2': 'Full', + 'partial_contribution_explained-1': 'HP:0000501|HP:0000365', + 'partial_contribution_explained-2': '', }, ] @@ -77,17 +81,18 @@ test('IndividualMetadata render and export', () => { 'filter_flags', 'consanguinity', 'family_history', 'genetic_findings_id-1', 'variant_reference_assembly-1', 'chrom-1', 'pos-1', 'ref-1', 'alt-1', 'gene_of_interest-1', 'gene_id-1', 'seqr_chosen_consequence-1', 'transcript-1', 'hgvsc-1', 'hgvsp-1', 'zygosity-1', 'sv_name-1', 'sv_type-1', 'variant_inheritance-1', 'gene_known_for_phenotype-1', - 'notes-1', 'genetic_findings_id-2', 'variant_reference_assembly-2', 'chrom-2', 'pos-2', + 'phenotype_contribution-1', 'partial_contribution_explained-1', 'notes-1', 'genetic_findings_id-2', 'variant_reference_assembly-2', 'chrom-2', 'pos-2', 'ref-2', 'alt-2', 'gene_of_interest-2', 'gene_id-2', 'seqr_chosen_consequence-2', 'transcript-2', 'hgvsc-2', 'hgvsp-2', - 'zygosity-2', 'sv_name-2', 'sv_type-2', 'variant_inheritance-2', 'gene_known_for_phenotype-2', 'notes-2']) + 'zygosity-2', 'sv_name-2', 'sv_type-2', 'variant_inheritance-2', 'gene_known_for_phenotype-2', + 'phenotype_contribution-2', 'partial_contribution_explained-2', 'notes-2']) expect(exportConfig.processRow(DATA[0])).toEqual([ 'Test Reprocessed Project', 'R0003_test', '12', 'F000012_12', 'NA20889', 'I000017_na20889', null, '', '', '', '', 'Self', 'Female', 'Ashkenazi Jewish', undefined, undefined, null, 'Affected', 'HP:0011675 (Arrhythmia)|HP:0001509 ()', '', null, undefined, 'Waiting for data', 'Tier 1', 'Y', 'WES', '2017-02-05', '', undefined, 'Yes', 'NA20889_1_248367227', undefined, '1', 248367227, 'TC', 'T', 'OR4G11P', 'ENSG00000240361', 'intron_variant', 'ENST00000505820', 'c.3955G>A', 'c.1586-17C>G', 'Heterozygous', undefined, undefined, - 'unknown', 'Candidate', undefined, 'NA20889_1_249045487', undefined, '12', '49045487', undefined, + 'unknown', 'Candidate', 'Partial', 'HP:0000501|HP:0000365', undefined, 'NA20889_1_249045487', undefined, '12', '49045487', undefined, undefined, undefined, undefined, undefined, undefined, undefined, undefined, 'Heterozygous', 'DEL:chr12:49045487-49045898', 'Deletion', - 'unknown', 'Candidate', undefined]) + 'unknown', 'Candidate', 'Full', '', undefined]) }) From fdcdb9b4812b38530279022e03e5347349535c08 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 13:21:06 -0400 Subject: [PATCH 10/47] clean up extra participant fields --- seqr/views/apis/report_api.py | 18 ++++++++++++------ seqr/views/utils/anvil_metadata_utils.py | 23 +++++++++++++++-------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index da586f4cdb..86db286bfb 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -408,10 +408,11 @@ def _add_row(row, family_id, row_type): for participant in participant_rows: phenotype_rows += _parse_participant_phenotype_rows(participant) - if not participant[PARTICIPANT_ID_FIELD]: + airtable_participant_id = participant.pop(PARTICIPANT_ID_FIELD) + if not airtable_participant_id: continue - airtable_metadata = airtable_metadata_by_participant.get(participant[PARTICIPANT_ID_FIELD]) or {} + airtable_metadata = airtable_metadata_by_participant.get(airtable_participant_id) or {} data_types = grouped_data_type_individuals[participant['participant_id']] _parse_participant_airtable_rows( participant, airtable_metadata, data_types, experiment_ids_by_participant, @@ -467,11 +468,11 @@ def _get_individual_data_types(projects): def _parse_participant_phenotype_rows(participant): base_phenotype_row = {'participant_id': participant['participant_id'], 'presence': 'Present', 'ontology': 'HPO'} present_rows = [ - dict(**base_phenotype_row, **_get_phenotype_row(feature)) for feature in participant['features'] or [] + dict(**base_phenotype_row, **_get_phenotype_row(feature)) for feature in participant.pop('features') or [] ] base_phenotype_row['presence'] = 'Absent' return present_rows + [ - dict(**base_phenotype_row, **_get_phenotype_row(feature)) for feature in participant['absent_features'] or [] + dict(**base_phenotype_row, **_get_phenotype_row(feature)) for feature in participant.pop('absent_features') or [] ] @@ -492,8 +493,10 @@ def _parse_participant_airtable_rows(participant, airtable_metadata, data_types, {'participant_id': participant['participant_id'], **_get_experiment_lookup_row(is_rna, row)} ) - if participant['analyte_id'] and not has_analyte: - analyte_rows.append(participant) + # TODO constant + analyte_row = {k: participant.pop(k) for k in ['analyte_id', 'analyte_type', 'primary_biosample', 'tissue_affected_status']} + if analyte_row['analyte_id'] and not has_analyte: + analyte_rows.append(analyte_row) def _get_gregor_airtable_data(participants, user): @@ -647,12 +650,15 @@ def _populate_gregor_files(file_data): files.append((file_name, list(table_config.keys()), data)) + expected_columns = {k for d in data for k, v in d.items() if v} # TODO extra_columns = expected_columns.difference(table_config.keys()) if extra_columns: col_summary = ', '.join(sorted(extra_columns)) warnings.insert( 0, f'The following columns are computed for the "{file_name}" table but are missing from the data model: {col_summary}', ) + errors.append(warnings[0]) # TODO + continue invalid_data_type_columns = { col: config['data_type'] for col, config in table_config.items() if config.get('data_type') and config['data_type'] not in DATA_TYPE_VALIDATORS diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 3b4638b9ea..0a31d270a5 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -136,7 +136,8 @@ def _get_family_metadata(family_filter, family_fields, include_metadata, include family_data_by_id = {} for f in family_data: family_id = f.pop('id') - solve_status = ANALYSIS_SOLVE_STATUS_LOOKUP.get(f['analysisStatus'], Individual.UNSOLVED) + analysis_status = f['analysisStatus'] if include_metadata else f.pop('analysisStatus') + solve_status = ANALYSIS_SOLVE_STATUS_LOOKUP.get(analysis_status, Individual.UNSOLVED) f.update({ 'solve_status': Individual.SOLVE_STATUS_LOOKUP[solve_status], **{k: v['format'](f) for k, v in (family_fields or {}).items()}, @@ -230,7 +231,7 @@ def parse_anvil_metadata( subject_row = _get_subject_row( individual, has_dbgap_submission, airtable_metadata, individual_ids_map, get_additional_individual_fields, - format_id, + format_id, include_metadata, ) if individual.id in matchmaker_individuals: subject_row['MME'] = matchmaker_individuals[individual.id] if mme_values else 'Yes' @@ -400,7 +401,7 @@ def _get_transcript_field(field, config, transcript): return value -def _get_subject_row(individual, has_dbgap_submission, airtable_metadata, individual_ids_map, get_additional_individual_fields, format_id): +def _get_subject_row(individual, has_dbgap_submission, airtable_metadata, individual_ids_map, get_additional_individual_fields, format_id, include_metadata): paternal_ids = individual_ids_map.get(individual.father_id, ('', '')) maternal_ids = individual_ids_map.get(individual.mother_id, ('', '')) subject_row = { @@ -414,19 +415,25 @@ def _get_subject_row(individual, has_dbgap_submission, airtable_metadata, indivi 'absent_features': individual.absent_features, 'proband_relationship': Individual.RELATIONSHIP_LOOKUP.get(individual.proband_relationship, ''), 'paternal_id': format_id(paternal_ids[0]), - 'paternal_guid': paternal_ids[1], 'maternal_id': format_id(maternal_ids[0]), - 'maternal_guid': maternal_ids[1], } + if include_metadata: + subject_row.update({ + 'paternal_guid': paternal_ids[1], + 'maternal_guid': maternal_ids[1], + }) if airtable_metadata is not None: sequencing = airtable_metadata.get('SequencingProduct') or set() subject_row.update({ - 'dbgap_submission': 'Yes' if has_dbgap_submission else 'No', 'dbgap_study_id': airtable_metadata.get('dbgap_study_id', '') if has_dbgap_submission else '', 'dbgap_subject_id': airtable_metadata.get('dbgap_subject_id', '') if has_dbgap_submission else '', - 'multiple_datasets': 'Yes' if len(sequencing) > 1 or ( - len(sequencing) == 1 and list(sequencing)[0] in MULTIPLE_DATASET_PRODUCTS) else 'No', }) + if include_metadata: + subject_row.update({ + 'dbgap_submission': 'Yes' if has_dbgap_submission else 'No', + 'multiple_datasets': 'Yes' if len(sequencing) > 1 or ( + len(sequencing) == 1 and list(sequencing)[0] in MULTIPLE_DATASET_PRODUCTS) else 'No', + }) if get_additional_individual_fields: subject_row.update(get_additional_individual_fields(individual, airtable_metadata)) return subject_row From 438a9ff7342d98a32b7f0db8e501d66a98081ec6 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 13:34:43 -0400 Subject: [PATCH 11/47] clean up extra family fields --- seqr/views/utils/anvil_metadata_utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 0a31d270a5..321df328cc 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -202,10 +202,13 @@ def parse_anvil_metadata( family_subject_row, saved_variants, *condition_map, set_conditions_for_variants=proband_only_variants, ) - affected_individuals = [individual for individual in family_individuals if individual.affected == Individual.AFFECTED_STATUS_AFFECTED] + affected_individuals = [ + individual for individual in family_individuals if individual.affected == Individual.AFFECTED_STATUS_AFFECTED + ] if include_metadata else [] + subject_family_row = {k: family_subject_row.pop(k) for k in ['family_id', 'internal_project_id', 'phenotype_description', 'pmid_id', 'solve_status']} # TODO constant family_row = { - 'family_id': family_subject_row['family_id'], + 'family_id': subject_family_row['family_id'], 'consanguinity': next(( 'Present' if individual.consanguinity else 'None suspected' for individual in family_individuals if individual.consanguinity is not None @@ -235,7 +238,7 @@ def parse_anvil_metadata( ) if individual.id in matchmaker_individuals: subject_row['MME'] = matchmaker_individuals[individual.id] if mme_values else 'Yes' - subject_row.update(family_subject_row) + subject_row.update(subject_family_row) if individual.solve_status: subject_row['solve_status'] = Individual.SOLVE_STATUS_LOOKUP[individual.solve_status] elif individual.affected != Individual.AFFECTED_STATUS_AFFECTED: From d6dccefb0e3a07a3e9b60d92ccb06bef8262deb4 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 14:13:25 -0400 Subject: [PATCH 12/47] clean up airtable rows --- seqr/views/apis/report_api.py | 50 ++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 86db286bfb..352948a20c 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -207,9 +207,10 @@ def _add_row(row, family_id, row_type): 'targeted_region_bed_file', 'date_data_generation', 'target_insert_size', 'sequencing_platform', ] EXPERIMENT_COLUMNS = {'analyte_id', 'experiment_sample_id'} -EXPERIMENT_TABLE_COLUMNS = {'experiment_dna_short_read_id', 'sequencing_event_details'} +EXPERIMENT_TABLE_COLUMNS = {'experiment_dna_short_read_id'} EXPERIMENT_TABLE_COLUMNS.update(EXPERIMENT_COLUMNS) EXPERIMENT_TABLE_COLUMNS.update(EXPERIMENT_TABLE_AIRTABLE_FIELDS) +EXPERIMENT_RNA_TABLE = 'experiment_rna_short_read' EXPERIMENT_RNA_TABLE_AIRTABLE_FIELDS = [ 'library_prep_type', 'single_or_paired_ends', 'within_site_batch_name', 'RIN', 'estimated_library_size', 'total_reads', 'percent_rRNA', 'percent_mRNA', '5prime3prime_bias', @@ -219,12 +220,14 @@ def _add_row(row, family_id, row_type): EXPERIMENT_RNA_TABLE_COLUMNS.update(EXPERIMENT_RNA_TABLE_AIRTABLE_FIELDS) EXPERIMENT_RNA_TABLE_COLUMNS.update([c for c in EXPERIMENT_TABLE_AIRTABLE_FIELDS if not c.startswith('target')]) EXPERIMENT_LOOKUP_TABLE_COLUMNS = {'experiment_id', 'table_name', 'id_in_table', 'participant_id'} +READ_TABLE = 'aligned_dna_short_read' READ_TABLE_AIRTABLE_FIELDS = [ 'aligned_dna_short_read_file', 'aligned_dna_short_read_index_file', 'md5sum', 'reference_assembly', 'mean_coverage', 'alignment_software', 'analysis_details', ] READ_TABLE_COLUMNS = {'aligned_dna_short_read_id', 'experiment_dna_short_read_id'} READ_TABLE_COLUMNS.update(READ_TABLE_AIRTABLE_FIELDS) +READ_RNA_TABLE = 'aligned_rna_short_read' READ_RNA_TABLE_AIRTABLE_ID_FIELDS = ['aligned_rna_short_read_file', 'aligned_rna_short_read_index_file'] READ_RNA_TABLE_AIRTABLE_FIELDS = [ 'gene_annotation', 'alignment_software', 'alignment_log_file', 'percent_uniquely_aligned', 'percent_multimapped', 'percent_unaligned', @@ -233,12 +236,25 @@ def _add_row(row, family_id, row_type): READ_RNA_TABLE_COLUMNS.update(READ_RNA_TABLE_AIRTABLE_ID_FIELDS) READ_RNA_TABLE_COLUMNS.update(READ_RNA_TABLE_AIRTABLE_FIELDS) READ_RNA_TABLE_COLUMNS.update(READ_TABLE_AIRTABLE_FIELDS[2:-1]) +READ_SET_TABLE = 'aligned_dna_short_read_set' READ_SET_TABLE_COLUMNS = {'aligned_dna_short_read_set_id', 'aligned_dna_short_read_id'} +CALLED_TABLE = 'called_variants_dna_short_read' CALLED_VARIANT_FILE_COLUMN = 'called_variants_dna_file' CALLED_TABLE_COLUMNS = { 'called_variants_dna_short_read_id', 'aligned_dna_short_read_set_id', CALLED_VARIANT_FILE_COLUMN, 'md5sum', 'caller_software', 'variant_types', 'analysis_details', } +AIRTABLE_TABLE_COLUMNS = { + EXPERIMENT_TABLE: EXPERIMENT_TABLE_COLUMNS, + READ_TABLE: READ_TABLE_COLUMNS, + READ_SET_TABLE: READ_SET_TABLE_COLUMNS, + CALLED_TABLE: CALLED_TABLE_COLUMNS, + EXPERIMENT_RNA_TABLE: EXPERIMENT_RNA_TABLE_COLUMNS, + READ_RNA_TABLE: READ_RNA_TABLE_COLUMNS, +} +RNA_AIRTABLE_TABLES = {EXPERIMENT_RNA_TABLE, READ_RNA_TABLE} +DNA_AIRTABLE_TABLES = set(AIRTABLE_TABLE_COLUMNS.keys()) - RNA_AIRTABLE_TABLES + GENETIC_FINDINGS_TABLE_COLUMNS = { 'chrom', 'pos', 'ref', 'alt', 'variant_type', 'variant_reference_assembly', GENE_COLUMN, 'transcript', 'hgvsc', 'hgvsp', 'hgvs', 'sv_type', 'chrom_end', 'pos_end', 'copy_number', *FINDING_METADATA_COLUMNS[:4], 'phenotype_contribution', 'partial_contribution_explained', @@ -401,8 +417,7 @@ def _add_row(row, family_id, row_type): phenotype_rows = [] analyte_rows = [] - airtable_rows = [] - airtable_rna_rows = [] + airtable_rows = {table: [] for table in AIRTABLE_TABLE_COLUMNS.keys()} experiment_lookup_rows = [] experiment_ids_by_participant = {} for participant in participant_rows: @@ -416,7 +431,7 @@ def _add_row(row, family_id, row_type): data_types = grouped_data_type_individuals[participant['participant_id']] _parse_participant_airtable_rows( participant, airtable_metadata, data_types, experiment_ids_by_participant, - analyte_rows, airtable_rows, airtable_rna_rows, experiment_lookup_rows, + analyte_rows, airtable_rows, experiment_lookup_rows, ) # Add experiment IDs @@ -428,14 +443,7 @@ def _add_row(row, family_id, row_type): ('family', GREGOR_FAMILY_TABLE_COLUMNS, list(family_map.values())), (PHENOTYPE_TABLE, PHENOTYPE_TABLE_COLUMNS, phenotype_rows), ('analyte', ANALYTE_TABLE_COLUMNS, analyte_rows), - (EXPERIMENT_TABLE, EXPERIMENT_TABLE_COLUMNS, airtable_rows), - ('aligned_dna_short_read', READ_TABLE_COLUMNS, airtable_rows), - ('aligned_dna_short_read_set', READ_SET_TABLE_COLUMNS, airtable_rows), - ('called_variants_dna_short_read', CALLED_TABLE_COLUMNS, [ - row for row in airtable_rows if row.get(CALLED_VARIANT_FILE_COLUMN) - ]), - ('experiment_rna_short_read', EXPERIMENT_RNA_TABLE_COLUMNS, airtable_rna_rows), - ('aligned_rna_short_read', READ_RNA_TABLE_COLUMNS, airtable_rna_rows), + *[(table, AIRTABLE_TABLE_COLUMNS[table], rows) for table, rows in airtable_rows.items()], (EXPERIMENT_LOOKUP_TABLE, EXPERIMENT_LOOKUP_TABLE_COLUMNS, experiment_lookup_rows), (FINDINGS_TABLE, GENETIC_FINDINGS_TABLE_COLUMNS, genetic_findings_rows), ] @@ -477,7 +485,7 @@ def _parse_participant_phenotype_rows(participant): def _parse_participant_airtable_rows(participant, airtable_metadata, data_types, experiment_ids_by_participant, - analyte_rows, airtable_rows, airtable_rna_rows, experiment_lookup_rows): + analyte_rows, airtable_rows, experiment_lookup_rows): has_analyte = False # airtable data for data_type in data_types: @@ -488,7 +496,16 @@ def _parse_participant_airtable_rows(participant, airtable_metadata, data_types, analyte_rows.append({**participant, **row}) if not is_rna: experiment_ids_by_participant[participant['participant_id']] = row['experiment_dna_short_read_id'] - (airtable_rna_rows if is_rna else airtable_rows).append(row) + for table in (RNA_AIRTABLE_TABLES if is_rna else DNA_AIRTABLE_TABLES): + if table == CALLED_TABLE and not row.get(CALLED_VARIANT_FILE_COLUMN): + continue + try: + airtable_rows[table].append({k: row[k] for k in AIRTABLE_TABLE_COLUMNS[table] if k in row}) + except KeyError as e: + # TODO + import pdb; pdb.set_trace() + raise e + experiment_lookup_rows.append( {'participant_id': participant['participant_id'], **_get_experiment_lookup_row(is_rna, row)} ) @@ -798,7 +815,10 @@ def _validate_column_data(column, file_name, data, column_validator, warnings, e def _get_row_id(row): - id_col = next(col for col in ['genetic_findings_id', 'participant_id', 'experiment_sample_id', 'family_id'] if col in row) + id_col = next(col for col in [ + 'genetic_findings_id', 'participant_id', 'experiment_sample_id', 'analyte_id', + 'aligned_dna_short_read_id', 'aligned_rna_short_read_id', 'family_id', + ] if col in row) return row[id_col] From c27890849d4180642cc97d302532fab61c984374 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 14:21:55 -0400 Subject: [PATCH 13/47] clean up analyte table --- seqr/views/apis/report_api.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 352948a20c..8549d5cf4b 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -487,31 +487,26 @@ def _parse_participant_phenotype_rows(participant): def _parse_participant_airtable_rows(participant, airtable_metadata, data_types, experiment_ids_by_participant, analyte_rows, airtable_rows, experiment_lookup_rows): has_analyte = False + analyte_row = {k: participant.pop(k) for k in ANALYTE_TABLE_COLUMNS} + participant['participant_id'] = analyte_row['participant_id'] # airtable data for data_type in data_types: if data_type not in airtable_metadata: continue is_rna, row = _get_airtable_row(data_type, airtable_metadata) has_analyte = True - analyte_rows.append({**participant, **row}) + analyte_rows.append({**analyte_row, **{k: row[k] for k in ANALYTE_TABLE_COLUMNS if k in row}}) if not is_rna: experiment_ids_by_participant[participant['participant_id']] = row['experiment_dna_short_read_id'] for table in (RNA_AIRTABLE_TABLES if is_rna else DNA_AIRTABLE_TABLES): if table == CALLED_TABLE and not row.get(CALLED_VARIANT_FILE_COLUMN): continue - try: - airtable_rows[table].append({k: row[k] for k in AIRTABLE_TABLE_COLUMNS[table] if k in row}) - except KeyError as e: - # TODO - import pdb; pdb.set_trace() - raise e + airtable_rows[table].append({k: row[k] for k in AIRTABLE_TABLE_COLUMNS[table] if k in row}) experiment_lookup_rows.append( {'participant_id': participant['participant_id'], **_get_experiment_lookup_row(is_rna, row)} ) - # TODO constant - analyte_row = {k: participant.pop(k) for k in ['analyte_id', 'analyte_type', 'primary_biosample', 'tissue_affected_status']} if analyte_row['analyte_id'] and not has_analyte: analyte_rows.append(analyte_row) From 9947b335ccf559628f220d18af942ad0db8802c9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 14:33:15 -0400 Subject: [PATCH 14/47] clean up extra findings fields --- seqr/views/apis/report_api.py | 1 - seqr/views/apis/summary_data_api.py | 1 - seqr/views/utils/anvil_metadata_utils.py | 23 +++++++++++++++-------- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 8549d5cf4b..b072e7b8b4 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -918,7 +918,6 @@ def _add_row(row, family_id, row_type): elif row_type == DISCOVERY_ROW_TYPE: family = families_by_id[family_id] for variant in row: - del variant['gene_ids'] variant_rows.append({ 'MME': variant.pop('variantId') in participant_mme[variant['participant_id']].get('variant_ids', []), 'phenotype_contribution': 'Full', diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index 4ebcc4db3e..811052e52a 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -285,7 +285,6 @@ def _add_row(row, family_id, row_type): family_rows_by_id[family_id] = row elif row_type == DISCOVERY_ROW_TYPE: for i, discovery_row in enumerate(row): - del discovery_row['gene_ids'] participant_id = discovery_row.pop('participant_id') parsed_row = {'{}-{}'.format(k, i + 1): v for k, v in discovery_row.items()} parsed_row['num_saved_variants'] = len(row) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 321df328cc..7a6e184c08 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -182,7 +182,7 @@ def parse_anvil_metadata( sample_ids.add(sample.sample_id) saved_variants_by_family = _get_parsed_saved_discovery_variants_by_family( - list(family_data_by_id.keys()), variant_filter=variant_filter, variant_json_fields=variant_json_fields, + list(family_data_by_id.keys()), include_metadata, variant_filter=variant_filter, variant_json_fields=variant_json_fields, ) condition_map = _get_condition_map(family_data_by_id.values()) @@ -325,7 +325,7 @@ def _post_process_variant_metadata(v, gene_variants, include_parent_mnvs=False): def _get_parsed_saved_discovery_variants_by_family( - families: Iterable[Family], variant_filter: dict, variant_json_fields: list[str], + families: Iterable[Family], include_metadata: bool, variant_filter: dict, variant_json_fields: list[str], ): tag_types = VariantTagType.objects.filter(project__isnull=True, category=DISCOVERY_CATEGORY) @@ -353,26 +353,32 @@ def _get_parsed_saved_discovery_variants_by_family( phenotype_contribution = 'Uncertain' partial_hpo_terms = '' - variants.append({ + variant = { 'chrom': chrom, 'pos': pos, 'variant_reference_assembly': GENOME_VERSION_LOOKUP[variant_json['genomeVersion']], 'gene_id': gene_id, 'gene_ids': [gene_id] if gene_id else variant_json.get('transcripts', {}).keys(), - 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), 'gene_known_for_phenotype': 'Known' if 'Known gene for phenotype' in variant.tags else 'Candidate', 'phenotype_contribution': phenotype_contribution, 'partial_contribution_explained': partial_hpo_terms.replace(', ', '|'), **{k: _get_transcript_field(k, config, main_transcript) for k, config in TRANSCRIPT_FIELDS.items()}, **{k: variant_json.get(k) for k in ['genotypes', 'svType', 'svName', 'end'] + (variant_json_fields or [])}, - **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt', 'tags']}, - }) + **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt']}, + } + if include_metadata: + variant.update({ + 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), + 'tags': variant.tags, + }) + variants.append(variant) genes_by_id = get_genes(gene_ids) saved_variants_by_family = defaultdict(list) for row in variants: - row[GENE_COLUMN] = genes_by_id.get(row['gene_id'], {}).get('geneSymbol') + gene_id = row['gene_id'] if include_metadata else row.pop('gene_id') + row[GENE_COLUMN] = genes_by_id.get(gene_id, {}).get('geneSymbol') family_id = row.pop('family_id') saved_variants_by_family[family_id].append(row) @@ -586,8 +592,9 @@ def _update_conditions(family_subject_row, variants, omim_conditions, mondo_cond c for mim_number in mim_numbers for c in omim_conditions[mim_number][None] if c['chrom'] == v['chrom'] and c['start'] <= v['pos'] <= c['end'] ] + gene_ids = v.pop('gene_ids') for mim_number in mim_numbers: - for gene_id in v['gene_ids']: + for gene_id in gene_ids: variant_conditions += omim_conditions[mim_number][gene_id] if set_conditions_for_variants: From 7c7f19590efe0e5ecdb3b555c5110decea0b7e75 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 15:03:50 -0400 Subject: [PATCH 15/47] clean up metadata --- seqr/views/apis/report_api.py | 3 +++ seqr/views/apis/report_api_tests.py | 13 +------------ seqr/views/utils/anvil_metadata_utils.py | 6 +++--- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index b072e7b8b4..9638710c32 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -854,6 +854,9 @@ def _add_row(row, family_id, row_type): } f.update(known_ids) individuals_ids -= set(known_ids.values()) + individual = proband or next(iter(individuals_by_id.values()), None) + if individual: + f.update({k: individual[k] for k in ['phenotype_description', 'pmid_id', 'solve_status']}) # TODO constant? sorted_samples = sorted(individuals_by_id.values(), key=lambda x: x.get('date_data_generation', '')) earliest_sample = next((s for s in [proband or {}] + sorted_samples if s.get('date_data_generation')), {}) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index c99b6b84c1..b7cb51a5fd 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -509,10 +509,7 @@ 'notes': None, 'phenotype_contribution': 'Full', 'partial_contribution_explained': '', - 'phenotype_description': None, - 'pmid_id': None, 'seqr_chosen_consequence': None, - 'solve_status': 'Unsolved', 'svName': None, 'svType': None, 'sv_name': None, @@ -1120,11 +1117,10 @@ def test_family_metadata(self): test_row = next(r for r in response_json['rows'] if r['familyGuid'] == 'F000012_12') self.assertDictEqual(test_row, { 'projectGuid': 'R0003_test', - 'internal_project_id': 'Test Reprocessed Project', 'familyGuid': 'F000012_12', 'family_id': '12', 'displayName': '12', - 'solve_status': 'Unsolved', + 'solve_status': 'Partially solved', 'actual_inheritance': 'unknown', 'condition_id': 'OMIM:616126', 'condition_inheritance': 'Autosomal recessive', @@ -1159,7 +1155,6 @@ def test_family_metadata(self): test_row = next(r for r in response_json['rows'] if r['familyGuid'] == 'F000003_3') self.assertDictEqual(test_row, { 'projectGuid': 'R0001_1kg', - 'internal_project_id': '1kg project nåme with uniçøde', 'familyGuid': 'F000003_3', 'family_id': '3', 'displayName': '3', @@ -1221,10 +1216,8 @@ def test_variant_metadata(self): 'known_condition_name': 'mitochondrial disease', 'participant_id': 'HG00731', 'phenotype_contribution': 'Uncertain', - 'phenotype_description': 'microcephaly; seizures', 'pos': 248367227, 'projectGuid': 'R0001_1kg', - 'internal_project_id': '1kg project nåme with uniçøde', 'ref': 'TC', 'tags': ['Known gene for phenotype'], 'variant_inheritance': 'paternal', @@ -1249,10 +1242,8 @@ def test_variant_metadata(self): 'known_condition_name': 'mitochondrial disease', 'notes': 'The following variants are part of the multinucleotide variant 19-1912632-GC-TT (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T', 'participant_id': 'HG00731', - 'phenotype_description': 'microcephaly; seizures', 'pos': 1912634, 'projectGuid': 'R0001_1kg', - 'internal_project_id': '1kg project nåme with uniçøde', 'ref': 'C', 'tags': ['Known gene for phenotype'], 'transcript': 'ENST00000371839', @@ -1294,7 +1285,6 @@ def test_variant_metadata(self): 'partial_contribution_explained': 'HP:0000501|HP:0000365', 'phenotype_contribution': 'Partial', 'projectGuid': 'R0003_test', - 'internal_project_id': 'Test Reprocessed Project', 'ref': 'TC', 'seqr_chosen_consequence': 'intron_variant', 'tags': ['Tier 1 - Novel gene and phenotype'], @@ -1322,7 +1312,6 @@ def test_variant_metadata(self): 'participant_id': 'NA20889', 'pos': 249045487, 'projectGuid': 'R0003_test', - 'internal_project_id': 'Test Reprocessed Project', 'ref': None, 'svType': 'DEL', 'sv_name': 'DEL:chr1:249045487-249045898', diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 7a6e184c08..65ffa09049 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -353,7 +353,7 @@ def _get_parsed_saved_discovery_variants_by_family( phenotype_contribution = 'Uncertain' partial_hpo_terms = '' - variant = { + parsed_variant = { 'chrom': chrom, 'pos': pos, 'variant_reference_assembly': GENOME_VERSION_LOOKUP[variant_json['genomeVersion']], @@ -367,11 +367,11 @@ def _get_parsed_saved_discovery_variants_by_family( **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt']}, } if include_metadata: - variant.update({ + parsed_variant.update({ 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), 'tags': variant.tags, }) - variants.append(variant) + variants.append(parsed_variant) genes_by_id = get_genes(gene_ids) From 419fdd73b0e04d4af870d19979f5a647bd39041c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 15:36:31 -0400 Subject: [PATCH 16/47] fix anvil export --- seqr/views/apis/report_api.py | 20 +++++++++++----- seqr/views/apis/summary_data_api.py | 7 ++++-- seqr/views/utils/anvil_metadata_utils.py | 29 ++++++++++++------------ 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 9638710c32..a00903431d 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -12,7 +12,7 @@ from seqr.utils.middleware import ErrorsWarningsException from seqr.views.utils.airtable_utils import AirtableSession -from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, \ +from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, anvil_export_airtable_fields, \ FAMILY_ROW_TYPE, SUBJECT_ROW_TYPE, SAMPLE_ROW_TYPE, DISCOVERY_ROW_TYPE, PARTICIPANT_TABLE, PHENOTYPE_TABLE, \ EXPERIMENT_TABLE, EXPERIMENT_LOOKUP_TABLE, FINDINGS_TABLE, FINDING_METADATA_COLUMNS, GENE_COLUMN from seqr.views.utils.export_utils import export_multiple_files, write_multiple_files_to_gs @@ -112,12 +112,13 @@ def anvil_export(request, project_guid): project = get_project_and_check_permissions(project_guid, request.user) parsed_rows = defaultdict(list) + family_diseases = {} def _add_row(row, family_id, row_type): if row_type == DISCOVERY_ROW_TYPE: missing_gene_rows = [ '{chrom}-{pos}-{ref}-{alt}'.format(**discovery_row) for discovery_row in row - if not (discovery_row.get('gene_id') or discovery_row.get('svType'))] + if not (discovery_row.get(GENE_COLUMN) or discovery_row.get('svType'))] if missing_gene_rows: raise ErrorsWarningsException( [f'Discovery variant(s) {", ".join(missing_gene_rows)} in family {family_id} have no associated gene']) @@ -146,19 +147,23 @@ def _add_row(row, family_id, row_type): row.update({ 'project_id': row.pop('internal_project_id'), 'solve_state': row.pop('solve_status'), - 'disease_id': row.get('condition_id', '').replace('|', ';'), - 'disease_description': row.get('known_condition_name', '').replace('|', ';'), 'hpo_present': '|'.join([feature['id'] for feature in row.get('features') or []]), 'hpo_absent': '|'.join([feature['id'] for feature in row.get('absent_features') or []]), 'ancestry': row['reported_ethnicity'] or row['reported_race'], }) + if row_type == FAMILY_ROW_TYPE: + family_diseases[row[entity_id_field]] = { + 'disease_id': row.get('condition_id', '').replace('|', ';'), + 'disease_description': row.get('known_condition_name', '').replace('|', ';'), + } parsed_rows[row_type].append(row) max_loaded_date = request.GET.get('loadedBefore') or (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d') parse_anvil_metadata( [project], request.user, _add_row, max_loaded_date=max_loaded_date, include_discovery_sample_id=True, - get_additional_individual_fields=lambda individual, *args: { + get_additional_individual_fields=lambda individual, airtable_metadata, has_dbgap_submission, *args: { 'congenital_status': Individual.ONSET_AGE_LOOKUP[individual.onset_age] if individual.onset_age else 'Unknown', + **anvil_export_airtable_fields(airtable_metadata, has_dbgap_submission), }, get_additional_sample_fields=lambda sample, *args: { 'entity:sample_id': sample.individual.individual_id, @@ -173,6 +178,9 @@ def _add_row(row, family_id, row_type): }}, ) + for row in parsed_rows[SUBJECT_ROW_TYPE]: + row.update(family_diseases[row['family_id']]) + return export_multiple_files([ ['{}_PI_Subject'.format(project.name), SUBJECT_TABLE_COLUMNS, parsed_rows[SUBJECT_ROW_TYPE]], ['{}_PI_Sample'.format(project.name), SAMPLE_TABLE_COLUMNS, parsed_rows[SAMPLE_ROW_TYPE]], @@ -530,7 +538,7 @@ def _get_gregor_airtable_data(participants, user): return airtable_metadata_by_participant -def _get_participant_row(individual, airtable_sample): +def _get_participant_row(individual, airtable_sample, *args): participant = { 'gregor_center': 'BROAD', 'prior_testing': '|'.join([gene.get('gene', gene['comments']) for gene in individual.rejected_genes or []]), diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index 811052e52a..2c8663b76d 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -23,7 +23,7 @@ add_individual_hpo_details, INDIVIDUAL_DISPLAY_NAME_EXPR, AIP_TAG_TYPE from seqr.views.utils.permissions_utils import analyst_required, user_is_analyst, get_project_guids_user_can_view, \ login_and_policies_required, get_project_and_check_permissions, get_internal_projects -from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, FAMILY_ROW_TYPE, SUBJECT_ROW_TYPE, DISCOVERY_ROW_TYPE +from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, anvil_export_airtable_fields, FAMILY_ROW_TYPE, SUBJECT_ROW_TYPE, DISCOVERY_ROW_TYPE from seqr.views.utils.variant_utils import get_variants_response, bulk_create_tagged_variants, DISCOVERY_CATEGORY from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL @@ -312,11 +312,14 @@ def _add_row(row, family_id, row_type): projects, request.user, _add_row, max_loaded_date=request.GET.get('loadedBefore'), include_metadata=True, omit_airtable=not include_airtable, - get_additional_individual_fields=lambda individual, airtable_metadata: { + get_additional_individual_fields=lambda individual, airtable_metadata, has_dbgap_submission, maternal_ids, paternal_ids: { 'Collaborator': (airtable_metadata or {}).get('Collaborator'), 'individual_guid': individual.guid, 'disorders': individual.disorders, 'filter_flags': json.dumps(individual.filter_flags) if individual.filter_flags else '', + 'paternal_guid': paternal_ids[1], + 'maternal_guid': maternal_ids[1], + **anvil_export_airtable_fields(airtable_metadata, has_dbgap_submission), }, ) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 65ffa09049..52dee02b61 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -234,7 +234,7 @@ def parse_anvil_metadata( subject_row = _get_subject_row( individual, has_dbgap_submission, airtable_metadata, individual_ids_map, get_additional_individual_fields, - format_id, include_metadata, + format_id, ) if individual.id in matchmaker_individuals: subject_row['MME'] = matchmaker_individuals[individual.id] if mme_values else 'Yes' @@ -410,7 +410,7 @@ def _get_transcript_field(field, config, transcript): return value -def _get_subject_row(individual, has_dbgap_submission, airtable_metadata, individual_ids_map, get_additional_individual_fields, format_id, include_metadata): +def _get_subject_row(individual, has_dbgap_submission, airtable_metadata, individual_ids_map, get_additional_individual_fields, format_id): paternal_ids = individual_ids_map.get(individual.father_id, ('', '')) maternal_ids = individual_ids_map.get(individual.mother_id, ('', '')) subject_row = { @@ -426,28 +426,27 @@ def _get_subject_row(individual, has_dbgap_submission, airtable_metadata, indivi 'paternal_id': format_id(paternal_ids[0]), 'maternal_id': format_id(maternal_ids[0]), } - if include_metadata: - subject_row.update({ - 'paternal_guid': paternal_ids[1], - 'maternal_guid': maternal_ids[1], - }) if airtable_metadata is not None: - sequencing = airtable_metadata.get('SequencingProduct') or set() subject_row.update({ 'dbgap_study_id': airtable_metadata.get('dbgap_study_id', '') if has_dbgap_submission else '', 'dbgap_subject_id': airtable_metadata.get('dbgap_subject_id', '') if has_dbgap_submission else '', }) - if include_metadata: - subject_row.update({ - 'dbgap_submission': 'Yes' if has_dbgap_submission else 'No', - 'multiple_datasets': 'Yes' if len(sequencing) > 1 or ( - len(sequencing) == 1 and list(sequencing)[0] in MULTIPLE_DATASET_PRODUCTS) else 'No', - }) if get_additional_individual_fields: - subject_row.update(get_additional_individual_fields(individual, airtable_metadata)) + subject_row.update(get_additional_individual_fields(individual, airtable_metadata, has_dbgap_submission, maternal_ids, paternal_ids)) return subject_row +def anvil_export_airtable_fields(airtable_metadata, has_dbgap_submission): + if airtable_metadata is None: + return {} + sequencing = airtable_metadata.get('SequencingProduct') or set() + return { + 'dbgap_submission': 'Yes' if has_dbgap_submission else 'No', + 'multiple_datasets': 'Yes' if len(sequencing) > 1 or ( + len(sequencing) == 1 and list(sequencing)[0] in MULTIPLE_DATASET_PRODUCTS) else 'No', + } + + def _get_sample_row(sample, participant_id, has_dbgap_submission, airtable_metadata, include_metadata, get_additional_sample_fields=None): sample_row = { 'participant_id': participant_id, From 7facc258821f2566371801010e7a3a1065b9b1b4 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 15:45:51 -0400 Subject: [PATCH 17/47] remove hardcoded column lists --- seqr/views/apis/report_api.py | 63 ++++++++---------------- seqr/views/utils/anvil_metadata_utils.py | 4 +- 2 files changed, 24 insertions(+), 43 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index a00903431d..2463e6c0f0 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -196,17 +196,6 @@ def _add_row(row, family_id, row_type): SMID_FIELD = 'SMID' PARTICIPANT_ID_FIELD = 'CollaboratorParticipantID' COLLABORATOR_SAMPLE_ID_FIELD = 'CollaboratorSampleID' -PARTICIPANT_TABLE_COLUMNS = { - 'participant_id', 'internal_project_id', 'gregor_center', 'consent_code', 'recontactable', 'prior_testing', - 'pmid_id', 'family_id', 'paternal_id', 'maternal_id', 'proband_relationship', - 'sex', 'reported_race', 'reported_ethnicity', 'ancestry_detail', 'solve_status', 'missing_variant_case', - 'age_at_last_observation', 'affected_status', 'phenotype_description', 'age_at_enrollment', -} -GREGOR_FAMILY_TABLE_COLUMNS = {'family_id', 'consanguinity'} -PHENOTYPE_TABLE_COLUMNS = { - 'phenotype_id', 'participant_id', 'term_id', 'presence', 'ontology', 'additional_details', 'onset_age_range', - 'additional_modifiers', -} ANALYTE_TABLE_COLUMNS = { 'analyte_id', 'participant_id', 'analyte_type', 'primary_biosample', 'tissue_affected_status', } @@ -227,7 +216,6 @@ def _add_row(row, family_id, row_type): EXPERIMENT_RNA_TABLE_COLUMNS.update(EXPERIMENT_COLUMNS) EXPERIMENT_RNA_TABLE_COLUMNS.update(EXPERIMENT_RNA_TABLE_AIRTABLE_FIELDS) EXPERIMENT_RNA_TABLE_COLUMNS.update([c for c in EXPERIMENT_TABLE_AIRTABLE_FIELDS if not c.startswith('target')]) -EXPERIMENT_LOOKUP_TABLE_COLUMNS = {'experiment_id', 'table_name', 'id_in_table', 'participant_id'} READ_TABLE = 'aligned_dna_short_read' READ_TABLE_AIRTABLE_FIELDS = [ 'aligned_dna_short_read_file', 'aligned_dna_short_read_index_file', 'md5sum', 'reference_assembly', @@ -252,24 +240,6 @@ def _add_row(row, family_id, row_type): 'called_variants_dna_short_read_id', 'aligned_dna_short_read_set_id', CALLED_VARIANT_FILE_COLUMN, 'md5sum', 'caller_software', 'variant_types', 'analysis_details', } -AIRTABLE_TABLE_COLUMNS = { - EXPERIMENT_TABLE: EXPERIMENT_TABLE_COLUMNS, - READ_TABLE: READ_TABLE_COLUMNS, - READ_SET_TABLE: READ_SET_TABLE_COLUMNS, - CALLED_TABLE: CALLED_TABLE_COLUMNS, - EXPERIMENT_RNA_TABLE: EXPERIMENT_RNA_TABLE_COLUMNS, - READ_RNA_TABLE: READ_RNA_TABLE_COLUMNS, -} -RNA_AIRTABLE_TABLES = {EXPERIMENT_RNA_TABLE, READ_RNA_TABLE} -DNA_AIRTABLE_TABLES = set(AIRTABLE_TABLE_COLUMNS.keys()) - RNA_AIRTABLE_TABLES - -GENETIC_FINDINGS_TABLE_COLUMNS = { - 'chrom', 'pos', 'ref', 'alt', 'variant_type', 'variant_reference_assembly', GENE_COLUMN, 'transcript', 'hgvsc', 'hgvsp', - 'hgvs', 'sv_type', 'chrom_end', 'pos_end', 'copy_number', *FINDING_METADATA_COLUMNS[:4], 'phenotype_contribution', 'partial_contribution_explained', - 'genetic_findings_id', 'participant_id', 'experiment_id', 'zygosity', 'allele_balance_or_heteroplasmy_percentage', - 'variant_inheritance', 'linked_variant', 'additional_family_members_with_variant', 'method_of_discovery', - 'gene_disease_validity', -} RNA_ONLY = EXPERIMENT_RNA_TABLE_AIRTABLE_FIELDS + READ_RNA_TABLE_AIRTABLE_FIELDS + [ 'reference_assembly_uri', 'tissue_affected_status', 'Primary_Biosample'] @@ -296,6 +266,17 @@ def _add_row(row, family_id, row_type): data_type_columns = set(DATA_TYPE_AIRTABLE_COLUMNS) - NO_DATA_TYPE_FIELDS - set(DATA_TYPE_OMIT[data_type]) AIRTABLE_QUERY_COLUMNS.update({f'{field}_{data_type}' for field in data_type_columns}) +AIRTABLE_TABLE_COLUMNS = { + EXPERIMENT_TABLE: EXPERIMENT_TABLE_COLUMNS, + READ_TABLE: READ_TABLE_COLUMNS, + READ_SET_TABLE: READ_SET_TABLE_COLUMNS, + CALLED_TABLE: CALLED_TABLE_COLUMNS, + EXPERIMENT_RNA_TABLE: EXPERIMENT_RNA_TABLE_COLUMNS, + READ_RNA_TABLE: READ_RNA_TABLE_COLUMNS, +} +RNA_AIRTABLE_TABLES = {EXPERIMENT_RNA_TABLE, READ_RNA_TABLE} +DNA_AIRTABLE_TABLES = set(AIRTABLE_TABLE_COLUMNS.keys()) - RNA_AIRTABLE_TABLES + WARN_MISSING_TABLE_COLUMNS = { PARTICIPANT_TABLE: ['recontactable', 'reported_race', 'affected_status', 'phenotype_description', 'age_at_enrollment'], FINDINGS_TABLE: ['known_condition_name'], @@ -447,13 +428,13 @@ def _add_row(row, family_id, row_type): variant['experiment_id'] = experiment_ids_by_participant.get(variant['participant_id']) file_data = [ - (PARTICIPANT_TABLE, PARTICIPANT_TABLE_COLUMNS, participant_rows), - ('family', GREGOR_FAMILY_TABLE_COLUMNS, list(family_map.values())), - (PHENOTYPE_TABLE, PHENOTYPE_TABLE_COLUMNS, phenotype_rows), - ('analyte', ANALYTE_TABLE_COLUMNS, analyte_rows), - *[(table, AIRTABLE_TABLE_COLUMNS[table], rows) for table, rows in airtable_rows.items()], - (EXPERIMENT_LOOKUP_TABLE, EXPERIMENT_LOOKUP_TABLE_COLUMNS, experiment_lookup_rows), - (FINDINGS_TABLE, GENETIC_FINDINGS_TABLE_COLUMNS, genetic_findings_rows), + (PARTICIPANT_TABLE, participant_rows), + ('family', list(family_map.values())), + (PHENOTYPE_TABLE, phenotype_rows), + ('analyte', analyte_rows), + *[(table, rows) for table, rows in airtable_rows.items()], + (EXPERIMENT_LOOKUP_TABLE, experiment_lookup_rows), + (FINDINGS_TABLE, genetic_findings_rows), ] files, warnings = _populate_gregor_files(file_data) @@ -662,7 +643,7 @@ def _populate_gregor_files(file_data): ) files = [] - for file_name, expected_columns, data in file_data: + for file_name, data in file_data: table_config = table_configs.get(file_name) if not table_config: errors.insert(0, f'No data model found for "{file_name}" table') @@ -670,15 +651,13 @@ def _populate_gregor_files(file_data): files.append((file_name, list(table_config.keys()), data)) - expected_columns = {k for d in data for k, v in d.items() if v} # TODO + expected_columns = {k for d in data for k, v in d.items() if v} extra_columns = expected_columns.difference(table_config.keys()) if extra_columns: col_summary = ', '.join(sorted(extra_columns)) warnings.insert( 0, f'The following columns are computed for the "{file_name}" table but are missing from the data model: {col_summary}', ) - errors.append(warnings[0]) # TODO - continue invalid_data_type_columns = { col: config['data_type'] for col, config in table_config.items() if config.get('data_type') and config['data_type'] not in DATA_TYPE_VALIDATORS @@ -864,7 +843,7 @@ def _add_row(row, family_id, row_type): individuals_ids -= set(known_ids.values()) individual = proband or next(iter(individuals_by_id.values()), None) if individual: - f.update({k: individual[k] for k in ['phenotype_description', 'pmid_id', 'solve_status']}) # TODO constant? + f.update({k: individual[k] for k in ['phenotype_description', 'pmid_id', 'solve_status']}) sorted_samples = sorted(individuals_by_id.values(), key=lambda x: x.get('date_data_generation', '')) earliest_sample = next((s for s in [proband or {}] + sorted_samples if s.get('date_data_generation')), {}) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 52dee02b61..5e37f7b742 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -100,6 +100,8 @@ Sample.SAMPLE_TYPE_WGS: 'SR-GS', } +FAMILY_INDIVIDUAL_FIELDS = ['family_id', 'internal_project_id', 'phenotype_description', 'pmid_id', 'solve_status'] + def _format_hgvs(hgvs, *args): return (hgvs or '').split(':')[-1] @@ -206,7 +208,7 @@ def parse_anvil_metadata( individual for individual in family_individuals if individual.affected == Individual.AFFECTED_STATUS_AFFECTED ] if include_metadata else [] - subject_family_row = {k: family_subject_row.pop(k) for k in ['family_id', 'internal_project_id', 'phenotype_description', 'pmid_id', 'solve_status']} # TODO constant + subject_family_row = {k: family_subject_row.pop(k) for k in FAMILY_INDIVIDUAL_FIELDS} family_row = { 'family_id': subject_family_row['family_id'], 'consanguinity': next(( From 576c0c2323be4d99d4227f5aa9056d18ae7a8766 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 16:02:02 -0400 Subject: [PATCH 18/47] correctly include rna airtable field --- seqr/views/apis/report_api.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 2463e6c0f0..83e4afb5cb 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -226,7 +226,8 @@ def _add_row(row, family_id, row_type): READ_RNA_TABLE = 'aligned_rna_short_read' READ_RNA_TABLE_AIRTABLE_ID_FIELDS = ['aligned_rna_short_read_file', 'aligned_rna_short_read_index_file'] READ_RNA_TABLE_AIRTABLE_FIELDS = [ - 'gene_annotation', 'alignment_software', 'alignment_log_file', 'percent_uniquely_aligned', 'percent_multimapped', 'percent_unaligned', + 'gene_annotation', 'alignment_software', 'alignment_log_file', 'percent_uniquely_aligned', 'percent_multimapped', + 'percent_unaligned', 'reference_assembly_uri', ] READ_RNA_TABLE_COLUMNS = {'aligned_rna_short_read_id', 'experiment_rna_short_read_id'} READ_RNA_TABLE_COLUMNS.update(READ_RNA_TABLE_AIRTABLE_ID_FIELDS) @@ -242,7 +243,7 @@ def _add_row(row, family_id, row_type): } RNA_ONLY = EXPERIMENT_RNA_TABLE_AIRTABLE_FIELDS + READ_RNA_TABLE_AIRTABLE_FIELDS + [ - 'reference_assembly_uri', 'tissue_affected_status', 'Primary_Biosample'] + 'tissue_affected_status', 'Primary_Biosample'] DATA_TYPE_OMIT = { 'wgs': ['targeted_regions_method'] + RNA_ONLY, 'wes': RNA_ONLY, 'rna': [ 'targeted_regions_method', 'target_insert_size', 'mean_coverage', 'aligned_dna_short_read_file', From 8c2526146f59c96df2a019cc7ab22f086181f148 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 16:54:51 -0400 Subject: [PATCH 19/47] fix invlaid tables test case --- seqr/views/apis/report_api.py | 6 ++---- seqr/views/apis/report_api_tests.py | 8 ++++---- seqr/views/utils/anvil_metadata_utils.py | 14 +++++++++----- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 83e4afb5cb..5fe75e60aa 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -397,7 +397,7 @@ def _add_row(row, family_id, row_type): format_id=_format_gregor_id, get_additional_individual_fields=_get_participant_row, post_process_variant=_post_process_gregor_variant, - variant_filter={'alt__isnull': False}, + include_svs=False, airtable_fields=[SMID_FIELD, PARTICIPANT_ID_FIELD, 'Recontactable'], include_mondo=True, proband_only_variants=True, @@ -414,9 +414,6 @@ def _add_row(row, family_id, row_type): phenotype_rows += _parse_participant_phenotype_rows(participant) airtable_participant_id = participant.pop(PARTICIPANT_ID_FIELD) - if not airtable_participant_id: - continue - airtable_metadata = airtable_metadata_by_participant.get(airtable_participant_id) or {} data_types = grouped_data_type_individuals[participant['participant_id']] _parse_participant_airtable_rows( @@ -479,6 +476,7 @@ def _parse_participant_airtable_rows(participant, airtable_metadata, data_types, has_analyte = False analyte_row = {k: participant.pop(k) for k in ANALYTE_TABLE_COLUMNS} participant['participant_id'] = analyte_row['participant_id'] + # airtable data for data_type in data_types: if data_type not in airtable_metadata: diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index b7cb51a5fd..19164ca14b 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -832,10 +832,10 @@ def test_gregor_export(self, mock_subprocess, mock_temp_dir, mock_open, mock_dat 'The following entries are missing required "proband_relationship" in the "participant" table: Broad_NA19678, Broad_NA20870, Broad_NA20872, Broad_NA20874, Broad_NA20875, Broad_NA20876, Broad_NA20881', 'The following entries have invalid values for "reported_race" in the "participant" table. Allowed values: Asian, White, Black. Invalid values: Broad_NA19675_1 (Middle Eastern or North African)', 'The following entries have invalid values for "age_at_enrollment" in the "participant" table. Allowed values have data type date. Invalid values: Broad_NA19675_1 (18)', - 'The following entries have invalid values for "reference_assembly" (from Airtable) in the "aligned_dna_short_read" table. Allowed values have data type integer. Invalid values: NA20888 (GRCh38), VCGS_FAM203_621_D2 (GRCh38)', - 'The following entries are missing required "mean_coverage" (from Airtable) in the "aligned_dna_short_read" table: VCGS_FAM203_621_D2', - 'The following entries have non-unique values for "alignment_software" (from Airtable) in the "aligned_dna_short_read" table: BWA-MEM-2.3 (NA20888, VCGS_FAM203_621_D2)', - 'The following entries have invalid values for "analysis_details" (from Airtable) in the "aligned_dna_short_read" table. Allowed values are a google bucket path starting with gs://. Invalid values: VCGS_FAM203_621_D2 (DOI:10.5281/zenodo.4469317)', + 'The following entries have invalid values for "reference_assembly" (from Airtable) in the "aligned_dna_short_read" table. Allowed values have data type integer. Invalid values: Broad_exome_NA20888_1 (GRCh38), Broad_exome_VCGS_FAM203_621_D2_1 (GRCh38)', + 'The following entries are missing required "mean_coverage" (from Airtable) in the "aligned_dna_short_read" table: Broad_exome_VCGS_FAM203_621_D2_1', + 'The following entries have non-unique values for "alignment_software" (from Airtable) in the "aligned_dna_short_read" table: BWA-MEM-2.3 (Broad_exome_NA20888_1, Broad_exome_VCGS_FAM203_621_D2_1)', + 'The following entries have invalid values for "analysis_details" (from Airtable) in the "aligned_dna_short_read" table. Allowed values are a google bucket path starting with gs://. Invalid values: Broad_exome_VCGS_FAM203_621_D2_1 (DOI:10.5281/zenodo.4469317)', 'The following entries have invalid values for "date_data_generation" (from Airtable) in the "experiment_rna_short_read" table. Allowed values have data type float. Invalid values: NA19679 (2023-02-11)', 'The following entries are missing required "experiment_id" (from Airtable) in the "genetic_findings" table: Broad_NA19675_1_21_3343353', 'The following entries have non-unique values for "experiment_id" (from Airtable) in the "genetic_findings" table: Broad_exome_VCGS_FAM203_621_D2 (Broad_HG00731_19_1912632, Broad_HG00731_19_1912633, Broad_HG00731_19_1912634, Broad_HG00731_1_248367227)', diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 5e37f7b742..c859f35514 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -160,7 +160,7 @@ def parse_anvil_metadata( get_additional_sample_fields: Callable[[Sample, dict], dict] = None, get_additional_individual_fields: Callable[[Individual, dict], dict] = None, individual_samples: dict[Individual, Sample] = None, individual_data_types: dict[str, Iterable[str]] = None, - airtable_fields: Iterable[str] = None, mme_values: dict = None, variant_filter: dict = None, + airtable_fields: Iterable[str] = None, mme_values: dict = None, include_svs: bool = True, variant_json_fields: Iterable[str] = None, post_process_variant: Callable[[dict, list[dict]], dict] = None, include_no_individual_families: bool = False, omit_airtable: bool = False, include_metadata: bool = False, include_discovery_sample_id: bool = False, include_mondo: bool = False, include_parent_mnvs: bool = False, @@ -184,7 +184,7 @@ def parse_anvil_metadata( sample_ids.add(sample.sample_id) saved_variants_by_family = _get_parsed_saved_discovery_variants_by_family( - list(family_data_by_id.keys()), include_metadata, variant_filter=variant_filter, variant_json_fields=variant_json_fields, + list(family_data_by_id.keys()), include_metadata, include_svs=include_svs, variant_json_fields=variant_json_fields, ) condition_map = _get_condition_map(family_data_by_id.values()) @@ -327,13 +327,13 @@ def _post_process_variant_metadata(v, gene_variants, include_parent_mnvs=False): def _get_parsed_saved_discovery_variants_by_family( - families: Iterable[Family], include_metadata: bool, variant_filter: dict, variant_json_fields: list[str], + families: Iterable[Family], include_metadata: bool, include_svs: dict, variant_json_fields: list[str], ): tag_types = VariantTagType.objects.filter(project__isnull=True, category=DISCOVERY_CATEGORY) project_saved_variants = SavedVariant.objects.filter( varianttag__variant_tag_type__in=tag_types, family__id__in=families, - **(variant_filter or {}), + **({} if include_svs else {'alt__isnull': False}), ).order_by('created_date').distinct().annotate( tags=ArrayAgg('varianttag__variant_tag_type__name', distinct=True), partial_hpo_terms=ArrayAgg('variantfunctionaldata__metadata', distinct=True, filter=Q(variantfunctionaldata__functional_data_tag='Partial Phenotype Contribution')), @@ -355,6 +355,10 @@ def _get_parsed_saved_discovery_variants_by_family( phenotype_contribution = 'Uncertain' partial_hpo_terms = '' + variant_fields = ['genotypes'] + if include_svs: + variant_fields += ['svType', 'svName', 'end'] + parsed_variant = { 'chrom': chrom, 'pos': pos, @@ -365,7 +369,7 @@ def _get_parsed_saved_discovery_variants_by_family( 'phenotype_contribution': phenotype_contribution, 'partial_contribution_explained': partial_hpo_terms.replace(', ', '|'), **{k: _get_transcript_field(k, config, main_transcript) for k, config in TRANSCRIPT_FIELDS.items()}, - **{k: variant_json.get(k) for k in ['genotypes', 'svType', 'svName', 'end'] + (variant_json_fields or [])}, + **{k: variant_json.get(k) for k in variant_fields + (variant_json_fields or [])}, **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt']}, } if include_metadata: From 0f8f206c83793ce48f0819b9ec5c7be869d72706 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 17:06:33 -0400 Subject: [PATCH 20/47] fix analyte handling --- seqr/views/apis/report_api.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 5fe75e60aa..6d18526b7e 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -412,12 +412,16 @@ def _add_row(row, family_id, row_type): experiment_ids_by_participant = {} for participant in participant_rows: phenotype_rows += _parse_participant_phenotype_rows(participant) + analyte = {k: participant.pop(k) for k in ANALYTE_TABLE_COLUMNS} + participant['participant_id'] = analyte['participant_id'] - airtable_participant_id = participant.pop(PARTICIPANT_ID_FIELD) - airtable_metadata = airtable_metadata_by_participant.get(airtable_participant_id) or {} + if not participant[PARTICIPANT_ID_FIELD]: + continue + + airtable_metadata = airtable_metadata_by_participant.get(participant.pop(PARTICIPANT_ID_FIELD)) or {} data_types = grouped_data_type_individuals[participant['participant_id']] _parse_participant_airtable_rows( - participant, airtable_metadata, data_types, experiment_ids_by_participant, + analyte, airtable_metadata, data_types, experiment_ids_by_participant, analyte_rows, airtable_rows, experiment_lookup_rows, ) @@ -471,32 +475,29 @@ def _parse_participant_phenotype_rows(participant): ] -def _parse_participant_airtable_rows(participant, airtable_metadata, data_types, experiment_ids_by_participant, +def _parse_participant_airtable_rows(analyte, airtable_metadata, data_types, experiment_ids_by_participant, analyte_rows, airtable_rows, experiment_lookup_rows): has_analyte = False - analyte_row = {k: participant.pop(k) for k in ANALYTE_TABLE_COLUMNS} - participant['participant_id'] = analyte_row['participant_id'] - # airtable data for data_type in data_types: if data_type not in airtable_metadata: continue is_rna, row = _get_airtable_row(data_type, airtable_metadata) has_analyte = True - analyte_rows.append({**analyte_row, **{k: row[k] for k in ANALYTE_TABLE_COLUMNS if k in row}}) + analyte_rows.append({**analyte, **{k: row[k] for k in ANALYTE_TABLE_COLUMNS if k in row}}) if not is_rna: - experiment_ids_by_participant[participant['participant_id']] = row['experiment_dna_short_read_id'] + experiment_ids_by_participant[analyte['participant_id']] = row['experiment_dna_short_read_id'] for table in (RNA_AIRTABLE_TABLES if is_rna else DNA_AIRTABLE_TABLES): if table == CALLED_TABLE and not row.get(CALLED_VARIANT_FILE_COLUMN): continue airtable_rows[table].append({k: row[k] for k in AIRTABLE_TABLE_COLUMNS[table] if k in row}) experiment_lookup_rows.append( - {'participant_id': participant['participant_id'], **_get_experiment_lookup_row(is_rna, row)} + {'participant_id': analyte['participant_id'], **_get_experiment_lookup_row(is_rna, row)} ) - if analyte_row['analyte_id'] and not has_analyte: - analyte_rows.append(analyte_row) + if analyte['analyte_id'] and not has_analyte: + analyte_rows.append(analyte) def _get_gregor_airtable_data(participants, user): @@ -797,8 +798,8 @@ def _validate_column_data(column, file_name, data, column_validator, warnings, e def _get_row_id(row): id_col = next(col for col in [ - 'genetic_findings_id', 'participant_id', 'experiment_sample_id', 'analyte_id', - 'aligned_dna_short_read_id', 'aligned_rna_short_read_id', 'family_id', + 'genetic_findings_id', 'participant_id', 'experiment_sample_id', 'analyte_id', 'family_id', + 'aligned_dna_short_read_id', 'aligned_rna_short_read_id', 'aligned_dna_short_read_set_id', 'aligned_rna_short_read_set_id', ] if col in row) return row[id_col] From c56a52030f2d1c263e50aa305f3d6af807eba692 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 31 May 2024 17:21:49 -0400 Subject: [PATCH 21/47] remove unused import --- seqr/views/apis/report_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 6d18526b7e..acb45076fb 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -14,7 +14,7 @@ from seqr.views.utils.airtable_utils import AirtableSession from seqr.views.utils.anvil_metadata_utils import parse_anvil_metadata, anvil_export_airtable_fields, \ FAMILY_ROW_TYPE, SUBJECT_ROW_TYPE, SAMPLE_ROW_TYPE, DISCOVERY_ROW_TYPE, PARTICIPANT_TABLE, PHENOTYPE_TABLE, \ - EXPERIMENT_TABLE, EXPERIMENT_LOOKUP_TABLE, FINDINGS_TABLE, FINDING_METADATA_COLUMNS, GENE_COLUMN + EXPERIMENT_TABLE, EXPERIMENT_LOOKUP_TABLE, FINDINGS_TABLE, GENE_COLUMN from seqr.views.utils.export_utils import export_multiple_files, write_multiple_files_to_gs from seqr.views.utils.json_utils import create_json_response from seqr.views.utils.permissions_utils import analyst_required, get_project_and_check_permissions, \ From bcaba7c1eea1a647249c4a9258776d97de55c91c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 3 Jun 2024 15:12:26 -0400 Subject: [PATCH 22/47] debug code --- hail_search/queries/base.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index efacff45bd..92b6c2367d 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -297,11 +297,14 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ project_hts = [] sample_data = {} for project_guid, project_sample_data in project_samples.items(): - project_ht = self._read_table( - f'projects/{project_guid}.ht', - use_ssd_dir=True, - skip_missing_field='family_entries' if skip_all_missing else None, - ) + try: + project_ht = self._read_table( + f'projects/{project_guid}.ht', + use_ssd_dir=True, + skip_missing_field='family_entries' if skip_all_missing else None, + ) + except Exception as e: + project_ht = None if project_ht is None: continue project_hts.append(project_ht.select_globals('sample_type', 'family_guids', 'family_samples')) @@ -1078,7 +1081,7 @@ def gene_counts(self): def lookup_variants(self, variant_ids, include_project_data=False, **kwargs): self._parse_intervals(intervals=None, variant_ids=variant_ids, variant_keys=variant_ids) - ht = self._read_table('annotations.ht', drop_globals=['paths', 'versions']) + ht = self._read_table('annotations_vep_110.ht', drop_globals=['paths', 'versions']) ht = ht.filter(hl.is_defined(ht[XPOS])) annotation_fields = self.annotation_fields(include_genotype_overrides=False) From 2c7d85ad6ca9eb574a5e29173b0c2effe7f635ca Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 3 Jun 2024 15:34:31 -0400 Subject: [PATCH 23/47] debug code --- hail_search/queries/base.py | 6 ++++-- hail_search/queries/ont_snv_indel.py | 1 + hail_search/queries/snv_indel.py | 1 + hail_search/queries/snv_indel_37.py | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 92b6c2367d..37d6ef87af 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -35,6 +35,8 @@ def _to_camel_case(snake_case_str): class BaseHailTableQuery(object): + ANNS_HT = 'annotations.ht' + DATA_TYPE = None KEY_FIELD = None LOADED_GLOBALS = None @@ -90,7 +92,7 @@ class BaseHailTableQuery(object): @classmethod def load_globals(cls): - ht_path = cls._get_table_path('annotations.ht') + ht_path = cls._get_table_path(cls.ANNS_HT) ht_globals = hl.eval(hl.read_table(ht_path).globals.select(*cls.GLOBALS)) cls.LOADED_GLOBALS = {k: ht_globals[k] for k in cls.GLOBALS} @@ -1081,7 +1083,7 @@ def gene_counts(self): def lookup_variants(self, variant_ids, include_project_data=False, **kwargs): self._parse_intervals(intervals=None, variant_ids=variant_ids, variant_keys=variant_ids) - ht = self._read_table('annotations_vep_110.ht', drop_globals=['paths', 'versions']) + ht = self._read_table(self.ANNS_HT, drop_globals=['paths', 'versions']) ht = ht.filter(hl.is_defined(ht[XPOS])) annotation_fields = self.annotation_fields(include_genotype_overrides=False) diff --git a/hail_search/queries/ont_snv_indel.py b/hail_search/queries/ont_snv_indel.py index dc99ad8e18..fac3d12d4d 100644 --- a/hail_search/queries/ont_snv_indel.py +++ b/hail_search/queries/ont_snv_indel.py @@ -7,6 +7,7 @@ class OntSnvIndelHailTableQuery(SnvIndelHailTableQuery): DATA_TYPE = 'ONT_SNV_INDEL' + ANNS_HT = 'annotations.ht' CORE_FIELDS = BaseHailTableQuery.CORE_FIELDS diff --git a/hail_search/queries/snv_indel.py b/hail_search/queries/snv_indel.py index a95890e038..5b42570aeb 100644 --- a/hail_search/queries/snv_indel.py +++ b/hail_search/queries/snv_indel.py @@ -11,6 +11,7 @@ class SnvIndelHailTableQuery(MitoHailTableQuery): DATA_TYPE = 'SNV_INDEL' + ANNS_HT = 'annotations_vep_110.ht' GENOTYPE_FIELDS = {f.lower(): f for f in ['DP', 'GQ', 'AB']} QUALITY_FILTER_FORMAT = { diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py index d43b92cbe6..3c0a9f2aa5 100644 --- a/hail_search/queries/snv_indel_37.py +++ b/hail_search/queries/snv_indel_37.py @@ -5,7 +5,7 @@ class SnvIndelHailTableQuery37(SnvIndelHailTableQuery): - + ANNS_HT = 'annotations.ht' GENOME_VERSION = GENOME_VERSION_GRCh37 PREDICTION_FIELDS_CONFIG = SnvIndelHailTableQuery.PREDICTION_FIELDS_CONFIG_ALL_BUILDS LIFTOVER_ANNOTATION_FIELDS = {} From cf479bc8686ac7aa41f0bc7a721e8dfe7417428c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 3 Jun 2024 16:02:27 -0400 Subject: [PATCH 24/47] nested struct support --- hail_search/queries/base.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 37d6ef87af..49ea85afba 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -184,22 +184,28 @@ def _format_enum(cls, r, field, enum, empty_array=False, format_array_values=Non return cls._enum_field(field, value, enum, **kwargs) - @staticmethod - def _enum_field(field_name, value, enum, ht_globals=None, annotate_value=None, format_value=None, drop_fields=None, enum_keys=None, include_version=False, **kwargs): + @classmethod + def _enum_field(cls, field_name, value, enum, ht_globals=None, annotate_value=None, format_value=None, drop_fields=None, enum_keys=None, include_version=False, **kwargs): annotations = {} drop = [] + (drop_fields or []) value_keys = value.keys() for field in (enum_keys or enum.keys()): field_enum = enum[field] + if field == 'utrannotator': + field = 'utrrannotator' + is_nested_struct = field in value_keys is_array = f'{field}_ids' in value_keys - value_field = f"{field}_id{'s' if is_array else ''}" - drop.append(value_field) - enum_array = hl.array(field_enum) - if is_array: - annotations[f'{field}s'] = value[value_field].map(lambda v: enum_array[v]) + if is_nested_struct: + annotations[field] = cls._enum_field(field, value[field], field_enum, format_value=format_value) else: - annotations[field] = enum_array[value[value_field]] + value_field = f"{field}_id{'s' if is_array else ''}" + drop.append(value_field) + enum_array = hl.array(field_enum) + if is_array: + annotations[f'{field}s'] = value[value_field].map(lambda v: enum_array[v]) + else: + annotations[field] = enum_array[value[value_field]] if include_version: annotations['version'] = ht_globals['versions'][field_name] From e1b154c885d2f5be355c30b9b2530d212abada9f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 3 Jun 2024 16:08:54 -0400 Subject: [PATCH 25/47] support new loftee format --- .../components/panel/variants/Annotations.jsx | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 3af864824a..14d8d908ed 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -435,29 +435,21 @@ const svSizeDisplay = (size) => { return `${(size / 1000000).toFixed(2) / 1}Mb` } -const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcriptsById }) => { - const { - rsid, svType, numExon, pos, end, svTypeDetail, svSourceDetail, cpxIntervals, algorithms, bothsidesSupport, - endChrom, - } = variant - const mainTranscript = getVariantMainTranscript(variant) - - const isLofNagnag = mainTranscript.isLofNagnag || mainTranscript.lofFlags === 'NAGNAG_SITE' - const lofFilters = mainTranscript.lofFilters || ( - mainTranscript.lof === 'LC' && mainTranscript.lofFilter && mainTranscript.lofFilter.split(/&|,/g) - ) - const lofDetails = (lofFilters || isLofNagnag) ? [ - ...(lofFilters ? [...new Set(lofFilters)] : []).map((lofFilterKey) => { - const lofFilter = LOF_FILTER_MAP[lofFilterKey] || { message: lofFilterKey } +const getLofDetails = ({ isLofNagnag, lofFilters, lofFilter, lofFlags, lof }) => { + const isNagnag = isLofNagnag || lofFlags === 'NAGNAG_SITE' + const filters = lofFilters || (lof === 'LC' && lofFilter && lofFilter.split(/&|,/g)) + return (filters || isNagnag) ? [ + ...(filters ? [...new Set(filters)] : []).map((lofFilterKey) => { + const filter = LOF_FILTER_MAP[lofFilterKey] || { message: lofFilterKey } return (
- {`LOFTEE: ${lofFilter.title}`} + {`LOFTEE: ${filter.title}`}
- {lofFilter.message} + {filter.message}
) }), - isLofNagnag ? ( + isNagnag ? (
LOFTEE: NAGNAG site
@@ -465,6 +457,15 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts
) : null, ] : null +} + +const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcriptsById }) => { + const { + rsid, svType, numExon, pos, end, svTypeDetail, svSourceDetail, cpxIntervals, algorithms, bothsidesSupport, + endChrom, + } = variant + const mainTranscript = getVariantMainTranscript(variant) + const lofDetails = getLofDetails(mainTranscript.loftee || mainTranscript) const transcriptPopupProps = mainTranscript.transcriptId && { content: , From ed52cddc684f457e2fc482d56e0eb98fe4f4fc74 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 3 Jun 2024 16:33:02 -0400 Subject: [PATCH 26/47] first pass intron/exon --- ui/shared/components/panel/variants/Transcripts.jsx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx index fc1a0523f7..54fb2bb1f4 100644 --- a/ui/shared/components/panel/variants/Transcripts.jsx +++ b/ui/shared/components/panel/variants/Transcripts.jsx @@ -114,8 +114,9 @@ const Transcripts = React.memo(({ variant, genesById, transcriptsById, updateMai Biotype {transcript.biotype}
- cDNA Position - {transcript.cdnaPosition} + Intron/Exon + {transcript.intron && `Intron ${transcript.intron.index} of ${transcript.intron.total}`} + {transcript.exon && `${transcript.intron ? ', ' : ''}Exon ${transcript.exon.index} of ${transcript.exon.total}`}
From b81cbdbe42d04a5ca9e38b5ac91775f4b0af2b42 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 3 Jun 2024 16:56:05 -0400 Subject: [PATCH 27/47] show AlphaMissense --- ui/shared/components/panel/variants/Predictions.jsx | 10 +++++++++- ui/shared/utils/constants.js | 7 +++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/ui/shared/components/panel/variants/Predictions.jsx b/ui/shared/components/panel/variants/Predictions.jsx index d6a305a145..b55b9e91d3 100644 --- a/ui/shared/components/panel/variants/Predictions.jsx +++ b/ui/shared/components/panel/variants/Predictions.jsx @@ -5,7 +5,7 @@ import { connect } from 'react-redux' import { Icon, Transition, Popup } from 'semantic-ui-react' import { getGenesById } from 'redux/selectors' -import { ORDERED_PREDICTOR_FIELDS, coloredIcon, predictorColorRanges, predictionFieldValue, getVariantMainGeneId } from 'shared/utils/constants' +import { ORDERED_PREDICTOR_FIELDS, coloredIcon, predictorColorRanges, predictionFieldValue, getVariantMainGeneId, getVariantMainTranscript } from 'shared/utils/constants' import { snakecaseToTitlecase } from 'shared/utils/stringUtils' import { HorizontalSpacer } from '../../Spacers' import { ButtonLink } from '../../StyledComponents' @@ -111,6 +111,14 @@ class Predictions extends React.PureComponent { gene.primateAi.percentile75.toPrecision(3), undefined], } } + const mainTranscript = getVariantMainTranscript(variant) + if (mainTranscript?.alphamissense.pathogenicity) { + genePredictors.alphamissense = { + field: 'alphamissense', + fieldValue: mainTranscript.alphamissense.pathogenicity, + thresholds: [0.34, 0.34, 0.564, 0.564], + } + } const predictorFields = getPredictorFields(variant, predictions, genePredictors) diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 4d107cc89a..988938fef7 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1446,6 +1446,7 @@ export const ORDERED_PREDICTOR_FIELDS = [ thresholds: [undefined, undefined, 2.18, 4, undefined], requiresCitation: true, }, + { field: 'alphamissense', fieldTitle: 'AlphaMissense', displayOnly: true }, { field: 'haplogroup_defining', indicatorMap: { Y: { color: 'green', value: '' } } }, { field: 'mitotip', indicatorMap: MITOTIP_MAP, fieldTitle: 'MitoTIP' }, { field: 'hmtvar', thresholds: [undefined, undefined, 0.35, 0.35, undefined], fieldTitle: 'HmtVar' }, @@ -1453,9 +1454,9 @@ export const ORDERED_PREDICTOR_FIELDS = [ export const coloredIcon = color => React.createElement(color.startsWith('#') ? ColoredIcon : Icon, { name: 'circle', size: 'small', color }) export const predictionFieldValue = ( - predictions, { field, thresholds, reverseThresholds, indicatorMap, infoField, infoTitle }, + predictions, { field, fieldValue, thresholds, reverseThresholds, indicatorMap, infoField, infoTitle }, ) => { - let value = predictions[field] + let value = fieldValue || predictions[field] if (value === null || value === undefined) { return { value } } @@ -1487,6 +1488,8 @@ export const predictorColorRanges = (thresholds, requiresCitation, reverseThresh range = ` >= ${thresholds[i - 1]}` } else if (prevUndefined) { range = ` < ${thresholds[i]}` + } else if (thresholds[i - 1] === thresholds[i]) { + return null } else { range = ` ${thresholds[i - 1]} - ${thresholds[i]}` } From f22066919d4316032a952fa427a27398646d4112 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 3 Jun 2024 16:58:00 -0400 Subject: [PATCH 28/47] fix intron exon dispaly --- ui/shared/components/panel/variants/Transcripts.jsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx index 54fb2bb1f4..56cbb420c1 100644 --- a/ui/shared/components/panel/variants/Transcripts.jsx +++ b/ui/shared/components/panel/variants/Transcripts.jsx @@ -115,8 +115,8 @@ const Transcripts = React.memo(({ variant, genesById, transcriptsById, updateMai {transcript.biotype}
Intron/Exon - {transcript.intron && `Intron ${transcript.intron.index} of ${transcript.intron.total}`} - {transcript.exon && `${transcript.intron ? ', ' : ''}Exon ${transcript.exon.index} of ${transcript.exon.total}`} + {transcript.intron && `Intron ${transcript.intron.index}/${transcript.intron.total}`} + {transcript.exon && `${transcript.intron ? ', ' : ''}Exon ${transcript.exon.index}/${transcript.exon.total}`}
From f70b59c8090c3e267c6c298297b05495d32814b3 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 4 Jun 2024 11:04:39 -0400 Subject: [PATCH 29/47] remove utrannotator hardcode fix --- hail_search/queries/base.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 49ea85afba..78523f00de 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -191,8 +191,6 @@ def _enum_field(cls, field_name, value, enum, ht_globals=None, annotate_value=No value_keys = value.keys() for field in (enum_keys or enum.keys()): field_enum = enum[field] - if field == 'utrannotator': - field = 'utrrannotator' is_nested_struct = field in value_keys is_array = f'{field}_ids' in value_keys From 2c5b484be1db7ee01382a90190f8a97f1d89ddb1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 4 Jun 2024 11:46:08 -0400 Subject: [PATCH 30/47] show utrannotator --- .../components/panel/variants/Annotations.jsx | 73 ++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 14d8d908ed..771e96dd10 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -3,7 +3,7 @@ import PropTypes from 'prop-types' import { connect } from 'react-redux' import { NavLink } from 'react-router-dom' import styled from 'styled-components' -import { Popup, Label, Icon } from 'semantic-ui-react' +import { Popup, Label, Icon, Table } from 'semantic-ui-react' import { getGenesById, @@ -36,6 +36,7 @@ import { import { GENOME_VERSION_37, GENOME_VERSION_38, getVariantMainTranscript, SVTYPE_LOOKUP, SVTYPE_DETAILS, SCREEN_LABELS, } from '../../../utils/constants' +import { camelcaseToTitlecase } from '../../../utils/stringUtils' const OverlappedIntervalLabels = React.memo(({ groupedIntervals, variant, getOverlapArgs, getLabels }) => { const chromIntervals = groupedIntervals[variant.chrom] @@ -459,6 +460,60 @@ const getLofDetails = ({ isLofNagnag, lofFilters, lofFilter, lofFlags, lof }) => ] : null } +// Adapted from https://github.com/ImperialCardioGenetics/UTRannotator/blob/master/README.md#the-detailed-annotation-for-each-consequence +const UTR_ANNOTATOR_DESCRIPTIONS = { + AltStop: 'Whether there is an alternative stop codon downstream within 5’ UTR', + AltStopDistanceToCDS: 'The distance between the alternative stop codon (if exists) and CDS', + CapDistanceToStart: 'The distance (number of nucleotides) to the start of 5’UTR', + DistanceToCDS: 'The distance (number of nucleotides) to CDS', + DistanceToStop: 'The distance (number of nucleotides) to the nearest stop codon (scanning through both the 5’UTR and its downstream CDS)', + Evidence: 'Whether the disrupted uORF has any translation evidence', + FrameWithCDS: 'The frame of the uORF with respect to CDS, described by inFrame or outOfFrame', + KozakContext: 'The Kozak context sequence', + KozakStrength: 'The Kozak strength, described by one of the following values: Weak, Moderate or Strong', + StartDistanceToCDS: 'The distance between the disrupting uORF and CDS', + alt_type: 'The type of uORF with the alternative allele, described by one of following: uORF, inframe_oORF or OutOfFrame_oORF', + alt_type_length: 'The length of uORF with the alt allele', + newSTOPDistanceToCDS: 'The distance between the gained uSTOP to the start of the CDS', + ref_StartDistanceToCDS: 'The distance between the uAUG of the disrupting uORF to CDS', + ref_type: 'The type of uORF with the reference allele, described by one of following: uORF, inframe_oORF or OutOfFrame_oORF', + ref_type_length: 'The length of uORF with the reference allele', + type: 'The type of of 5’ UTR ORF, described by one of the following: uORF(with a stop codon in 5’UTR), inframe_oORF (inframe and overlapping with CDS),OutOfFrame_oORF (out of frame and overlapping with CDS)', +} + +const UtrAnnotatorDetail = ({ fiveutrConsequence, fiveutrAnnotation, ...counts }) => ( + + + + + + + {Object.entries(counts).map(([field, value]) => ( + + + + + ))} + {Object.entries(fiveutrAnnotation).filter(e => e[1] !== null).map(([field, value]) => ( + + + {camelcaseToTitlecase(field)} + {UTR_ANNOTATOR_DESCRIPTIONS[field] && ( + } content={UTR_ANNOTATOR_DESCRIPTIONS[field]} flowing /> + )} + + + + ))} + +
+) + +UtrAnnotatorDetail.propTypes = { + fiveutrConsequence: PropTypes.string, + fiveutrAnnotation: PropTypes.object, +} + const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcriptsById }) => { const { rsid, svType, numExon, pos, end, svTypeDetail, svSourceDetail, cpxIntervals, algorithms, bothsidesSupport, @@ -571,6 +626,22 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts )} + {mainTranscript.utrannotator?.fiveutrConsequence && ( +
+ UTRAnnotator:   + + {mainTranscript.utrannotator.fiveutrConsequence.replace('5_prime_UTR_', '').replace('_variant', '').replace(/_/g, ' ')} + + } + > + + +
+ )} {variant.screenRegionType && (
From 8ecca190f8d00f9bab018d7272cf9361ebef5a3a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 4 Jun 2024 12:31:04 -0400 Subject: [PATCH 31/47] shared array formatting --- hail_search/queries/base.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 78523f00de..29a18214db 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -76,7 +76,6 @@ class BaseHailTableQuery(object): 'transcripts': { 'response_key': 'transcripts', 'empty_array': True, - 'format_value': lambda value: value.rename({k: _to_camel_case(k) for k in value.keys()}), 'format_array_values': lambda values, *args: values.group_by(lambda t: t.geneId), }, } @@ -168,6 +167,10 @@ def _format_enum_response(self, k, enum): value = lambda r: self._format_enum(r, k, enum, ht_globals=self._globals, **enum_config) return enum_config.get('response_key', _to_camel_case(k)), value + @staticmethod + def _camelcase_value(value): + return value.rename({k: _to_camel_case(k) for k in value.keys()}) + @classmethod def _format_enum(cls, r, field, enum, empty_array=False, format_array_values=None, **kwargs): if hasattr(r, f'{field}_id'): @@ -177,7 +180,7 @@ def _format_enum(cls, r, field, enum, empty_array=False, format_array_values=Non if hasattr(value, 'map'): if empty_array: value = hl.or_else(value, hl.empty_array(value.dtype.element_type)) - value = value.map(lambda x: cls._enum_field(field, x, enum, **kwargs)) + value = value.map(lambda x: cls._enum_field(field, x, enum, **kwargs, format_value=cls._camelcase_value)) if format_array_values: value = format_array_values(value, r) return value From 6bc8099825e20f0968e9472c74c3ea8f6398b40f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 4 Jun 2024 14:34:45 -0400 Subject: [PATCH 32/47] shared consequence details ui --- .../components/panel/variants/Predictions.jsx | 2 +- .../components/panel/variants/Transcripts.jsx | 192 +++++++++++------- 2 files changed, 114 insertions(+), 80 deletions(-) diff --git a/ui/shared/components/panel/variants/Predictions.jsx b/ui/shared/components/panel/variants/Predictions.jsx index b55b9e91d3..365f508580 100644 --- a/ui/shared/components/panel/variants/Predictions.jsx +++ b/ui/shared/components/panel/variants/Predictions.jsx @@ -112,7 +112,7 @@ class Predictions extends React.PureComponent { } } const mainTranscript = getVariantMainTranscript(variant) - if (mainTranscript?.alphamissense.pathogenicity) { + if (mainTranscript?.alphamissense?.pathogenicity) { genePredictors.alphamissense = { field: 'alphamissense', fieldValue: mainTranscript.alphamissense.pathogenicity, diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx index 56cbb420c1..10275d5212 100644 --- a/ui/shared/components/panel/variants/Transcripts.jsx +++ b/ui/shared/components/panel/variants/Transcripts.jsx @@ -10,6 +10,7 @@ import { VerticalSpacer } from '../../Spacers' import DispatchRequestButton from '../../buttons/DispatchRequestButton' import ShowGeneModal from '../../buttons/ShowGeneModal' import { ProteinSequence, TranscriptLink } from './VariantUtils' +import { toCamelcase, camelcaseToTitlecase } from '../../../utils/stringUtils' const AnnotationSection = styled.div` display: inline-block; @@ -24,6 +25,54 @@ const AnnotationLabel = styled.small` const HeaderLabel = AnnotationLabel.withComponent('span') +const AnnotationDetail = ({ consequence, title, getContent }) => ( + + {title} + {getContent ? getContent(consequence) : consequence[toCamelcase(title)]} +
+
+) + +AnnotationDetail.propTypes = { + consequence: PropTypes.object.isRequired, + title: PropTypes.string.isRequired, + getContent: PropTypes.func, +} + +export const ConsequenceDetails = ({ consequences, variant, idField, idDetails, annotationSections, ...props }) => ( + + + {consequences.map(c => ( + + + + {idDetails && idDetails(c, variant, props)} + + + {c.majorConsequence || c.consequenceTerms.join('; ')} + + + {annotationSections.map(([field1, field2]) => ( + + + {field2 && } + + ))} + + + ))} + +
+) + +ConsequenceDetails.propTypes = { + consequences: PropTypes.arrayOf(PropTypes.object).isRequired, + idField: PropTypes.string.isRequired, + variant: PropTypes.object, + idDetails: PropTypes.func, + annotationSections: PropTypes.arrayOf(PropTypes.arrayOf(PropTypes.object)), +} + const TRANSCRIPT_LABELS = [ { content: 'Canonical', @@ -42,7 +91,62 @@ const TRANSCRIPT_LABELS = [ }, ] -const Transcripts = React.memo(({ variant, genesById, transcriptsById, updateMainTranscript, project }) => ( +const transcriptIdDetails = (transcript, variant, { transcriptsById, project, updateMainTranscript }) => ( +
+ {transcriptsById[transcript.transcriptId]?.refseqId && ( + + )} + {TRANSCRIPT_LABELS.map(({ shouldShow, ...labelProps }) => ( + shouldShow(transcript, transcriptsById) && ( +
+) + +const ANNOTATION_SECTIONS = [ + [{ title: 'Codons' }, { title: 'Amino Acids' }], + [ + { title: 'Biotype' }, + { + title: 'Intron/Exon', + getContent: c => ['intron', 'exon'].filter(f => c[f]).map(f => `${camelcaseToTitlecase(f)} ${c[f].index}/${c[f].total}`).join(', '), + }, + ], + [ + { title: 'HGVS.C', getContent: transcript => transcript.hgvsc && }, + { title: 'HGVS.P', getContent: transcript => transcript.hgvsp && }, + ], +] + +const Transcripts = React.memo(({ variant, genesById, ...props }) => ( variant.transcripts && Object.entries(variant.transcripts).sort((transcriptsA, transcriptsB) => ( Math.min(...transcriptsA[1].map(t => t.transcriptRank)) - Math.min(...transcriptsB[1].map(t => t.transcriptRank)) )).map(([geneId, geneTranscripts]) => ( @@ -54,84 +158,14 @@ const Transcripts = React.memo(({ variant, genesById, transcriptsById, updateMai subheader={`Gene Id: ${geneId}`} /> - - - {geneTranscripts.map(transcript => ( - - - - {transcriptsById[transcript.transcriptId]?.refseqId && ( - - )} -
- {TRANSCRIPT_LABELS.map(({ shouldShow, ...labelProps }) => ( - shouldShow(transcript, transcriptsById) && ( -
-
- - {transcript.majorConsequence} - - - - Codons - {transcript.codons} -
- Amino Acids - {transcript.aminoAcids} -
-
- - Biotype - {transcript.biotype} -
- Intron/Exon - {transcript.intron && `Intron ${transcript.intron.index}/${transcript.intron.total}`} - {transcript.exon && `${transcript.intron ? ', ' : ''}Exon ${transcript.exon.index}/${transcript.exon.total}`} -
-
- - HGVS.C - {transcript.hgvsc && } -
- HGVS.P - {transcript.hgvsp && } -
-
-
-
- ))} -
-
+
From 2c09ccb38bea46509fcc2a6b1056be7b5f557aed Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 4 Jun 2024 14:57:12 -0400 Subject: [PATCH 33/47] show regluatory features --- .../components/panel/variants/Annotations.jsx | 25 ++++++++++++++++++- .../components/panel/variants/Transcripts.jsx | 9 ++++--- .../panel/variants/VariantUtils.jsx | 6 ++--- 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 771e96dd10..126af1af33 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -22,7 +22,7 @@ import Modal from '../../modal/Modal' import { ButtonLink, HelpIcon } from '../../StyledComponents' import RnaSeqJunctionOutliersTable from '../../table/RnaSeqJunctionOutliersTable' import { getOtherGeneNames } from '../genes/GeneDetail' -import Transcripts from './Transcripts' +import Transcripts, { ConsequenceDetails } from './Transcripts' import VariantGenes, { GeneLabelContent, omimPhenotypesDetail } from './VariantGene' import { getLocus, @@ -185,6 +185,9 @@ VariantPosition.propTypes = { svType: PropTypes.string, } +const REGULATORY_FEATURE_SECTIONS = [[{ title: 'Biotype' }]] +const REGULATORY_FEATURE_LINK = { ensemblEntity: 'Regulation', ensemblKey: 'rf' } + const LOF_FILTER_MAP = { END_TRUNC: { title: 'End Truncation', message: 'This variant falls in the last 5% of the transcript' }, INCOMPLETE_CDS: { title: 'Incomplete CDS', message: 'The start or stop codons are not known for this transcript' }, @@ -626,6 +629,26 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts )} + {variant.sortedRegulatoryFeatureConsequences && ( +
+ Regulatory Feature:   + {variant.sortedRegulatoryFeatureConsequences[0].consequenceTerms[0].replace(/_/g, ' ')} + } + > + + +
+ )} {mainTranscript.utrannotator?.fiveutrConsequence && (
UTRAnnotator:   diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx index 10275d5212..8b8a32c1af 100644 --- a/ui/shared/components/panel/variants/Transcripts.jsx +++ b/ui/shared/components/panel/variants/Transcripts.jsx @@ -39,13 +39,15 @@ AnnotationDetail.propTypes = { getContent: PropTypes.func, } -export const ConsequenceDetails = ({ consequences, variant, idField, idDetails, annotationSections, ...props }) => ( +export const ConsequenceDetails = ( + { consequences, variant, idField, idDetails, annotationSections, ensemblLink = {}, ...props }, +) => ( {consequences.map(c => ( - + {idDetails && idDetails(c, variant, props)} @@ -53,7 +55,7 @@ export const ConsequenceDetails = ({ consequences, variant, idField, idDetails, {annotationSections.map(([field1, field2]) => ( - + {field2 && } @@ -71,6 +73,7 @@ ConsequenceDetails.propTypes = { variant: PropTypes.object, idDetails: PropTypes.func, annotationSections: PropTypes.arrayOf(PropTypes.arrayOf(PropTypes.object)), + ensemblLink: PropTypes.object, } const TRANSCRIPT_LABELS = [ diff --git a/ui/shared/components/panel/variants/VariantUtils.jsx b/ui/shared/components/panel/variants/VariantUtils.jsx index 663ff8b58c..d50b0984c5 100644 --- a/ui/shared/components/panel/variants/VariantUtils.jsx +++ b/ui/shared/components/panel/variants/VariantUtils.jsx @@ -10,10 +10,10 @@ const SequenceContainer = styled.span` color: ${props => props.color || 'inherit'}; ` -export const TranscriptLink = styled.a.attrs(({ variant, transcript }) => ({ +export const TranscriptLink = styled.a.attrs(({ variant, transcript, idField = 'transcriptId', ensemblEntity = 'Transcript', ensemblKey = 't' }) => ({ target: '_blank', - href: `http://${variant.genomeVersion === GENOME_VERSION_37 ? 'grch37' : 'useast'}.ensembl.org/Homo_sapiens/Transcript/Summary?t=${transcript.transcriptId}`, - children: transcript.hgvsc?.startsWith(transcript.transcriptId) ? transcript.hgvsc.split(':')[0] : transcript.transcriptId, + href: `http://${variant.genomeVersion === GENOME_VERSION_37 ? 'grch37' : 'useast'}.ensembl.org/Homo_sapiens/${ensemblEntity}/Summary?${ensemblKey}=${transcript[idField]}`, + children: transcript.hgvsc?.startsWith(transcript.transcriptId) ? transcript.hgvsc.split(':')[0] : transcript[idField], }))` font-size: 1.3em; font-weight: normal; From ae6550c57d5edd7a0a1aec22e5bc2c1c9cbff4d6 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 4 Jun 2024 17:41:22 -0400 Subject: [PATCH 34/47] show motif features --- .../components/panel/variants/Annotations.jsx | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 126af1af33..4f5b65db72 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -185,8 +185,11 @@ VariantPosition.propTypes = { svType: PropTypes.string, } -const REGULATORY_FEATURE_SECTIONS = [[{ title: 'Biotype' }]] const REGULATORY_FEATURE_LINK = { ensemblEntity: 'Regulation', ensemblKey: 'rf' } +const CONSEQUENCE_FEATURES = [ + { name: 'Regulatory', annotationSections: [[{ title: 'Biotype' }]] }, + { name: 'Motif', annotationSections: [] }, +].map(f => ({ ...f, field: `sorted${f.name}FeatureConsequences`, idField: `${f.name.toLowerCase()}FeatureId` })) const LOF_FILTER_MAP = { END_TRUNC: { title: 'End Truncation', message: 'This variant falls in the last 5% of the transcript' }, @@ -629,26 +632,23 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts )} - {variant.sortedRegulatoryFeatureConsequences && ( + {CONSEQUENCE_FEATURES.filter(({ field }) => variant[field]).map(({ field, name, ...props }) => (
- Regulatory Feature:   + {`${name} Feature: `} {variant.sortedRegulatoryFeatureConsequences[0].consequenceTerms[0].replace(/_/g, ' ')} - } + modalName={`${variant.variantId}-${name}`} + title={`${name} Feature Consequences`} + trigger={{variant[field][0].consequenceTerms[0].replace(/_/g, ' ')}} >
- )} + ))} {mainTranscript.utrannotator?.fiveutrConsequence && (
UTRAnnotator:   From e319f9093d42b6f03358580567178e27bb637621 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 5 Jun 2024 10:55:40 -0400 Subject: [PATCH 35/47] Revert "debug code" This reverts commit bcaba7c1eea1a647249c4a9258776d97de55c91c. --- hail_search/queries/base.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 29a18214db..9d9738c415 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -35,8 +35,6 @@ def _to_camel_case(snake_case_str): class BaseHailTableQuery(object): - ANNS_HT = 'annotations.ht' - DATA_TYPE = None KEY_FIELD = None LOADED_GLOBALS = None @@ -306,14 +304,11 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ project_hts = [] sample_data = {} for project_guid, project_sample_data in project_samples.items(): - try: - project_ht = self._read_table( - f'projects/{project_guid}.ht', - use_ssd_dir=True, - skip_missing_field='family_entries' if skip_all_missing else None, - ) - except Exception as e: - project_ht = None + project_ht = self._read_table( + f'projects/{project_guid}.ht', + use_ssd_dir=True, + skip_missing_field='family_entries' if skip_all_missing else None, + ) if project_ht is None: continue project_hts.append(project_ht.select_globals('sample_type', 'family_guids', 'family_samples')) @@ -1090,7 +1085,7 @@ def gene_counts(self): def lookup_variants(self, variant_ids, include_project_data=False, **kwargs): self._parse_intervals(intervals=None, variant_ids=variant_ids, variant_keys=variant_ids) - ht = self._read_table(self.ANNS_HT, drop_globals=['paths', 'versions']) + ht = self._read_table('annotations.ht', drop_globals=['paths', 'versions']) ht = ht.filter(hl.is_defined(ht[XPOS])) annotation_fields = self.annotation_fields(include_genotype_overrides=False) From 1d2fa049eb25f6462dc4d4af3120d9430541d6ca Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 5 Jun 2024 10:57:44 -0400 Subject: [PATCH 36/47] Revert "debug code" This reverts commit 2c7d85ad6ca9eb574a5e29173b0c2effe7f635ca. --- hail_search/queries/base.py | 2 +- hail_search/queries/ont_snv_indel.py | 1 - hail_search/queries/snv_indel.py | 1 - hail_search/queries/snv_indel_37.py | 2 +- 4 files changed, 2 insertions(+), 4 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 9d9738c415..341fc8a6a8 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -89,7 +89,7 @@ class BaseHailTableQuery(object): @classmethod def load_globals(cls): - ht_path = cls._get_table_path(cls.ANNS_HT) + ht_path = cls._get_table_path('annotations.ht') ht_globals = hl.eval(hl.read_table(ht_path).globals.select(*cls.GLOBALS)) cls.LOADED_GLOBALS = {k: ht_globals[k] for k in cls.GLOBALS} diff --git a/hail_search/queries/ont_snv_indel.py b/hail_search/queries/ont_snv_indel.py index fac3d12d4d..dc99ad8e18 100644 --- a/hail_search/queries/ont_snv_indel.py +++ b/hail_search/queries/ont_snv_indel.py @@ -7,7 +7,6 @@ class OntSnvIndelHailTableQuery(SnvIndelHailTableQuery): DATA_TYPE = 'ONT_SNV_INDEL' - ANNS_HT = 'annotations.ht' CORE_FIELDS = BaseHailTableQuery.CORE_FIELDS diff --git a/hail_search/queries/snv_indel.py b/hail_search/queries/snv_indel.py index 5b42570aeb..a95890e038 100644 --- a/hail_search/queries/snv_indel.py +++ b/hail_search/queries/snv_indel.py @@ -11,7 +11,6 @@ class SnvIndelHailTableQuery(MitoHailTableQuery): DATA_TYPE = 'SNV_INDEL' - ANNS_HT = 'annotations_vep_110.ht' GENOTYPE_FIELDS = {f.lower(): f for f in ['DP', 'GQ', 'AB']} QUALITY_FILTER_FORMAT = { diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py index 3c0a9f2aa5..d43b92cbe6 100644 --- a/hail_search/queries/snv_indel_37.py +++ b/hail_search/queries/snv_indel_37.py @@ -5,7 +5,7 @@ class SnvIndelHailTableQuery37(SnvIndelHailTableQuery): - ANNS_HT = 'annotations.ht' + GENOME_VERSION = GENOME_VERSION_GRCh37 PREDICTION_FIELDS_CONFIG = SnvIndelHailTableQuery.PREDICTION_FIELDS_CONFIG_ALL_BUILDS LIFTOVER_ANNOTATION_FIELDS = {} From f6d26b2587157013790200e3e4db88c4a36a9a76 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 5 Jun 2024 15:29:44 -0400 Subject: [PATCH 37/47] adjust annotation order --- .../components/panel/variants/Annotations.jsx | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 4f5b65db72..ce8ff11866 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -632,23 +632,6 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts )} - {CONSEQUENCE_FEATURES.filter(({ field }) => variant[field]).map(({ field, name, ...props }) => ( -
- {`${name} Feature: `} - {variant[field][0].consequenceTerms[0].replace(/_/g, ' ')}} - > - - -
- ))} {mainTranscript.utrannotator?.fiveutrConsequence && (
UTRAnnotator:   @@ -673,6 +656,23 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts
)} + {CONSEQUENCE_FEATURES.filter(({ field }) => variant[field]).map(({ field, name, ...props }) => ( +
+ {`${name} Feature: `} + {variant[field][0].consequenceTerms[0].replace(/_/g, ' ')}} + > + + +
+ ))} {mainTranscript.hgvsc && (
HGVS.C From 8d20abcc16f396479b681b869bc8a7e651eabc3a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 5 Jun 2024 15:35:55 -0400 Subject: [PATCH 38/47] show utrannotator in transcript detail --- ui/shared/components/panel/variants/Transcripts.jsx | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx index 8b8a32c1af..efdb7be9f6 100644 --- a/ui/shared/components/panel/variants/Transcripts.jsx +++ b/ui/shared/components/panel/variants/Transcripts.jsx @@ -40,7 +40,7 @@ AnnotationDetail.propTypes = { } export const ConsequenceDetails = ( - { consequences, variant, idField, idDetails, annotationSections, ensemblLink = {}, ...props }, + { consequences, variant, idField, idDetails, consequenceDetails, annotationSections, ensemblLink = {}, ...props }, ) => (
@@ -52,6 +52,7 @@ export const ConsequenceDetails = ( {c.majorConsequence || c.consequenceTerms.join('; ')} + {consequenceDetails && consequenceDetails(c)} {annotationSections.map(([field1, field2]) => ( @@ -72,6 +73,7 @@ ConsequenceDetails.propTypes = { idField: PropTypes.string.isRequired, variant: PropTypes.object, idDetails: PropTypes.func, + consequenceDetails: PropTypes.func, annotationSections: PropTypes.arrayOf(PropTypes.arrayOf(PropTypes.object)), ensemblLink: PropTypes.object, } @@ -134,6 +136,13 @@ const transcriptIdDetails = (transcript, variant, { transcriptsById, project, up ) +const transcriptConsequenceDetails = ({ utrannotator }) => utrannotator?.fiveutrConsequence && ( +
+ UTRAnnotator: + {utrannotator.fiveutrConsequence} +
+) + const ANNOTATION_SECTIONS = [ [{ title: 'Codons' }, { title: 'Amino Acids' }], [ @@ -166,6 +175,7 @@ const Transcripts = React.memo(({ variant, genesById, ...props }) => ( variant={variant} idField="transcriptId" idDetails={transcriptIdDetails} + consequenceDetails={transcriptConsequenceDetails} annotationSections={ANNOTATION_SECTIONS} {...props} /> From fc3ca3dbf7c4612a601713df7a014a5adcbeda89 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 5 Jun 2024 15:48:50 -0400 Subject: [PATCH 39/47] do not raise unhandled error on airtable mismatch --- seqr/views/apis/summary_data_api_tests.py | 8 ++++---- seqr/views/utils/airtable_utils.py | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 96a6f5d580..441bce90db 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -679,10 +679,10 @@ def test_sample_metadata_export(self, mock_google_authenticated): responses.add(responses.GET, '{}/app3Y97xtbbaOopVR/Collaborator'.format(AIRTABLE_URL), json=AIRTABLE_COLLABORATOR_RECORDS, status=200) response = self.client.get(include_airtable_url) - self.assertEqual(response.status_code, 500) - self.assertEqual( - response.json()['error'], - 'Found multiple airtable records for sample NA19675 with mismatched values in field dbgap_study_id') + self.assertEqual(response.status_code, 400) + self.assertListEqual( + response.json()['errors'], + ['Found multiple airtable records for sample NA19675 with mismatched values in field dbgap_study_id']) self.assertEqual(len(responses.calls), 4) first_formula = "OR({CollaboratorSampleID}='NA20885',{CollaboratorSampleID}='NA20888')" expected_fields = [ diff --git a/seqr/views/utils/airtable_utils.py b/seqr/views/utils/airtable_utils.py index f6a80f09ff..027e5785ce 100644 --- a/seqr/views/utils/airtable_utils.py +++ b/seqr/views/utils/airtable_utils.py @@ -2,6 +2,7 @@ from collections import defaultdict from django.core.exceptions import PermissionDenied +from seqr.utils.middleware import ErrorsWarningsException from seqr.utils.logging_utils import SeqrLogger from seqr.views.utils.terra_api_utils import is_google_authenticated @@ -138,7 +139,7 @@ def get_airtable_samples(sample_ids, user, fields, list_fields=None): if len(record_field) > 1: error = 'Found multiple airtable records for sample {} with mismatched values in field {}'.format( record_id, field) - raise Exception(error) + raise ErrorsWarningsException([error]) if record_field: parsed_record[field] = record_field.pop() for field in list_fields: From 148e8cf80d0599c0552430b397d74cf5d4c2128c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 6 Jun 2024 10:04:11 -0400 Subject: [PATCH 40/47] fix AoU link --- ui/shared/components/panel/variants/Annotations.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 3af864824a..446f60b7eb 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -267,7 +267,7 @@ const VARIANT_LINKS = [ { name: 'AoU', shouldShow: ({ svType }) => !svType, - getHref: ({ chrom, pos, ref, alt }) => `https://databrowser.researchallofus.org/genomic-variants/${chrom}-${pos}-${ref}-${alt}`, + getHref: ({ chrom, pos, ref, alt }) => `https://databrowser.researchallofus.org/variants/${chrom}-${pos}-${ref}-${alt}`, }, { name: 'Iranome', From bc1f3154dc3a89746a204b2609e7639541cc22e1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 6 Jun 2024 14:07:56 -0400 Subject: [PATCH 41/47] show extended intronic splice reguion --- ui/shared/components/panel/variants/Annotations.jsx | 3 ++- ui/shared/components/panel/variants/Transcripts.jsx | 13 +++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 19daaed793..2cb9f9f759 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -22,7 +22,7 @@ import Modal from '../../modal/Modal' import { ButtonLink, HelpIcon } from '../../StyledComponents' import RnaSeqJunctionOutliersTable from '../../table/RnaSeqJunctionOutliersTable' import { getOtherGeneNames } from '../genes/GeneDetail' -import Transcripts, { ConsequenceDetails } from './Transcripts' +import Transcripts, { ConsequenceDetails, ExtendedSpliceLabel } from './Transcripts' import VariantGenes, { GeneLabelContent, omimPhenotypesDetail } from './VariantGene' import { getLocus, @@ -632,6 +632,7 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcripts )} + {mainTranscript.utrannotator?.fiveutrConsequence && (
UTRAnnotator:   diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx index efdb7be9f6..0449243985 100644 --- a/ui/shared/components/panel/variants/Transcripts.jsx +++ b/ui/shared/components/panel/variants/Transcripts.jsx @@ -136,10 +136,19 @@ const transcriptIdDetails = (transcript, variant, { transcriptsById, project, up
) -const transcriptConsequenceDetails = ({ utrannotator }) => utrannotator?.fiveutrConsequence && ( +export const ExtendedSpliceLabel = ({ spliceregion }) => spliceregion?.extended_intronic_splice_region_variant && ( +