Skip to content

Commit

Permalink
added parsing funcs for AggDescription in rcv_accession_classificatio…
Browse files Browse the repository at this point in the history
…n table.
  • Loading branch information
larrybabb committed Dec 5, 2024
1 parent 3fe8a29 commit 5eb6101
Show file tree
Hide file tree
Showing 3 changed files with 284 additions and 2 deletions.
45 changes: 45 additions & 0 deletions scripts/parsing-funcs/parse-parseAggDescription-func.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
CREATE OR REPLACE FUNCTION `clinvar_ingest.parseAggDescription`(json STRING)
RETURNS STRUCT<
description ARRAY<STRUCT<
clinical_impact_assertion_type STRING,
clinical_impact_clinical_significance STRING,
date_last_evaluated DATE,
num_submissions INT64,
interp_description STRING
>>
>
LANGUAGE js
OPTIONS (
library=['gs://clinvar-ingest/bq-tools/parse-utils.js'])
AS r"""
return parseAggDescription(json);
""";

-- test
WITH x as (
SELECT
"""
{
"Description": [
{
"@ClinicalImpactAssertionType": "diagnostic",
"@ClinicalImpactClinicalSignificance": "supports diagnosis",
"@DateLastEvaluated": "2024-01-24",
"@SubmissionCount": "1",
"$": "Tier I - Strong"
},
{
"@ClinicalImpactAssertionType": "prognostic",
"@ClinicalImpactClinicalSignificance": "better outcome",
"@DateLastEvaluated": "2024-01-23",
"@SubmissionCount": "1",
"$": "Tier I - Strong"
}
]}
""" as content
),
aggDescriptions as (
select `clinvar_ingest.parseAggDescription`(x.content)) as aggDescription from x
)
select ad.* from aggDescriptions as ad
;
171 changes: 171 additions & 0 deletions src/parse-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3419,3 +3419,174 @@ function parseTraitSet(json: string): TraitSetOutput {

return buildTraitSetOutput(traitSet);
}



// -- Classification Description interfaces and functions --

// {
// "@ClinicalImpactAssertionType": "diagnostic",
// "@ClinicalImpactClinicalSignificance": "supports diagnosis",
// "@DateLastEvaluated": "2024-01-24",
// "@SubmissionCount": "1",
// "$": "Tier I - Strong"
// }
// }

/**
* Represents the input structure for a classification description Item.
*/
interface DescriptionItemInput {
'@ClinicalImpactAssertionType'?: string;
'@ClinicalImpactClinicalSignificance'?: string;
'@DateLastEvaluated'?: string;
'@SubmissionCount'?: string;
'$'?: string;
}

/**
* Represents the output structure for a classification description item.
*/
interface DescriptionItemOutput {
clinical_impact_assertion_type: string | null;
clinical_impact_clinical_significance: string | null;
date_last_evaluated: Date | null;
num_submissions: number | null;
interp_description: string | null;
}

interface DescriptionItemData {
Description?: DescriptionItemInput | DescriptionItemInput[];
}

/**
* Builds an DescriptionItemOutput object based on the provided DescriptionItemInput.
* @param item - The DescriptionItemInput object.
* @returns The corresponding DescriptionItemOutput object.
*/
function buildDescriptionItemOutput(item: DescriptionItemInput): DescriptionItemOutput {
return {
clinical_impact_assertion_type: item['@ClinicalImpactAssertionType'] ? item['@ClinicalImpactAssertionType'] : null,
clinical_impact_clinical_significance: item['@ClinicalImpactClinicalSignificance'] ? item['@ClinicalImpactClinicalSignificance'] : null,
date_last_evaluated: item['@DateLastEvaluated'] ? new Date(item['@DateLastEvaluated']) : null,
num_submissions: item['@SubmissionCount'] ? parseInt(item['@SubmissionCount'], 10) : null,
interp_description: item['$'] ? item['$'] : null
};
}

/**
* Builds an array of DescriptionItemOutput objects based on the provided DescriptionItemInput.
* If a single aggregate description object is provided, the function will return an array with a single DescriptionItemOutput object.
* @param items - The DescriptionItemInput object or an array of DescriptionItemInput objects.
* @returns An array of DescriptionItemOutput objects.
*/
function buildDescriptionItemsOutput(items: DescriptionItemInput | DescriptionItemInput[]): DescriptionItemOutput[] {
if (!Array.isArray(items)) {
items = [items];
}

return items.map((item): DescriptionItemOutput => ({
...buildDescriptionItemOutput(item)
}));
}

/**
* Parses the JSON input and returns an array of DescriptionItemOutput objects.
* @param json - The JSON input string.
* @returns An array of DescriptionItemOutput objects.
* @throws {Error} If the JSON input is invalid.
*/
function parseDescriptionItems(json: string): DescriptionItemOutput[] {
let data: DescriptionItemData;
try {
data = JSON.parse(json);
} catch (e) {
throw new Error('Invalid JSON input');
}

let DescriptionItems = data && data.Description ? data.Description : [];

return buildDescriptionItemsOutput(DescriptionItems);
}

// below is an example of a JSON object that an rcv_accessionclassification description object
// {
// "Description":
// {
// "@ClinicalImpactAssertionType": "diagnostic",
// "@ClinicalImpactClinicalSignificance": "supports diagnosis",
// "@DateLastEvaluated": "2024-01-24",
// "@SubmissionCount": "1",
// "$": "Tier I - Strong"
// }
// }
//
// a second example of an aggregate classification description object as an array input
//
// {"Description":
// [
// {
// "@ClinicalImpactAssertionType": "diagnostic",
// "@ClinicalImpactClinicalSignificance": "supports diagnosis",
// "@DateLastEvaluated": "2024-01-24",
// "@SubmissionCount": "1",
// "$": "Tier I - Strong"
// },
// {
// "@ClinicalImpactAssertionType": "prognostic",
// "@ClinicalImpactClinicalSignificance": "better outcome",
// "@DateLastEvaluated": "2024-01-24",
// "@SubmissionCount": "1",
// "$": "Tier I - Strong"
// }
// ]
// }

/**
* Represents the input structure for a classification description.
*/
interface AggDescriptionInput {
Description?: DescriptionItemInput | DescriptionItemInput[];
}

/**
* Represents the output structure for a classification description.
*/
interface AggDescriptionOutput {
description: Array<DescriptionItemOutput> | null;
}

interface AggDescriptionData {
Description?: AggDescriptionInput;
}

/**
* Builds a AggDescriptionOutput object based on the provided AggDescriptionInput.
* @param item - The AggDescriptionInput object.
* @returns The corresponding AggDescriptionOutput object.
*/
function buildAggDescriptionOutput(item: AggDescriptionInput): AggDescriptionOutput {
return {
description: item.Description ? buildDescriptionItemsOutput(item.Description) : null
};
}

/**
* Parses the JSON input and returns a AggDescriptionOutput object.
* @param json - The JSON input string.
* @returns A AggDescriptionOutput object.
* @throws {Error} If the JSON input is invalid.
*/
function parseAggDescription(json: string): AggDescriptionOutput {
let data: AggDescriptionData;
try {
data = JSON.parse(json);
} catch (e) {
throw new Error('Invalid JSON input');
}

let AggDescription = data && data.Description ? data.Description : {};

return buildAggDescriptionOutput(AggDescription);
}

70 changes: 68 additions & 2 deletions test/parse-utils.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ const buildTraitsOutput = parseUtils.__get__('buildTraitsOutput');
const parseTraits = parseUtils.__get__('parseTraits');
const buildTraitSetOutput = parseUtils.__get__('buildTraitSetOutput');
const parseTraitSet = parseUtils.__get__('parseTraitSet');
const buildDescriptionItemsOutput = parseUtils.__get__('buildDescriptionItemsOutput');
const buildAggDescriptionOutput = parseUtils.__get__('buildAggDescriptionOutput');
const parseAggDescription = parseUtils.__get__('parseAggDescription');


test('buildGeneListOutput should build GeneListOutput correctly', () => {
const input = { Gene: { '@Symbol': 'Symbol1', 'Name': {'$':'HGNC1'},'@RelationshipType': 'asserted, not computed' } };
Expand Down Expand Up @@ -1195,7 +1199,7 @@ test('parseTraitSet should parse JSON input correctly', () => {
"@Type":"MIM","@ID":"113000","@DB":"OMIM"
}]
}]
}}`;
}}`;
const expectedOutput = {
type: 'Disease',
id: '8827',
Expand Down Expand Up @@ -1243,6 +1247,68 @@ test('parseTraitSet should throw error for invalid JSON input', () => {
});



// {
// "@ClinicalImpactAssertionType": "diagnostic",
// "@ClinicalImpactClinicalSignificance": "supports diagnosis",
// "@DateLastEvaluated": "2024-01-24",
// "@SubmissionCount": "1",
// "$": "Tier I - Strong"
// }
test('buildDescriptionItemsOutput should build DescriptionItemsOutput correctly', () => {
const json = {
'@ClinicalImpactAssertionType': 'diagnostic',
'@ClinicalImpactClinicalSignificance': 'supports diagnosis',
'@DateLastEvaluated': '2024-01-24',
'@SubmissionCount': '1',
'$': 'Tier I - Strong'
};
const expectedOutput = [{
clinical_impact_assertion_type: 'diagnostic',
clinical_impact_clinical_significance: 'supports diagnosis',
date_last_evaluated: new Date('2024-01-24T00:00:00.000Z'),
num_submissions: 1,
interp_description: 'Tier I - Strong'
}];
expect(buildDescriptionItemsOutput(json)).toEqual(expectedOutput);
});


test('buildAggDescriptionOutput should build AggDescriptionOutput correctly', () => {
const json = {
"Description": [
{
"@ClinicalImpactAssertionType": "diagnostic",
"@ClinicalImpactClinicalSignificance": "supports diagnosis",
"@DateLastEvaluated": "2024-01-24",
"@SubmissionCount": "1",
"$": "Tier I - Strong"
},
{
"@ClinicalImpactAssertionType": "prognostic",
"@ClinicalImpactClinicalSignificance": "better outcome",
"@DateLastEvaluated": "2024-01-23",
"@SubmissionCount": "1",
"$": "Tier I - Strong"
}
]};
const expectedOutput = {
description: [
{
clinical_impact_assertion_type: 'diagnostic',
clinical_impact_clinical_significance: 'supports diagnosis',
date_last_evaluated: new Date('2024-01-24T00:00:00.000Z'),
num_submissions: 1,
interp_description: 'Tier I - Strong'
},
{
clinical_impact_assertion_type: 'prognostic',
clinical_impact_clinical_significance: 'better outcome',
date_last_evaluated: new Date('2024-01-23T00:00:00.000Z'),
num_submissions: 1,
interp_description: 'Tier I - Strong'
}
]
};
expect(buildAggDescriptionOutput(json)).toEqual(expectedOutput);
});

0 comments on commit 5eb6101

Please sign in to comment.