Skip to content

Commit

Permalink
temporal and report table procs updates to support new xml
Browse files Browse the repository at this point in the history
  • Loading branch information
larrybabb committed Dec 23, 2024
1 parent 9cae79b commit 8d4fb96
Show file tree
Hide file tree
Showing 37 changed files with 2,303 additions and 1,820 deletions.
33 changes: 33 additions & 0 deletions scripts/dataset-preparation/00-setup-translation-tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,39 @@ VALUES
('SomaticClinicalImpact', 't3', 'Tier III - Unknown', 1, 'somatic', 20, 20, 'somatic', 20, 20, 'none', 'cg000103', 'inconclusive', 'cg000025', null),
('SomaticClinicalImpact', 't4', 'Tier IV - Benign/Likely benign', 0, 'somatic', 32, 32, 'somatic', 32, 32, 'refutes', 'cg000102', 'likely', 'cg000026', null);

-- drop the non-GERMLINE rows from the clinsig_types table (on stage only)
BEGIN
DECLARE project_id STRING;

SET project_id = (SELECT
catalog_name as paroject_id
FROM `INFORMATION_SCHEMA.SCHEMATA`
WHERE schema_name = 'clinvar_ingest');

IF (project_id = 'clingen_stage') THEN
CREATE OR REPLACE TABLE `clinvar_ingest.clinvar_clinsig_types`
AS
SELECT
code,
label,
significance,
original_proposition_type,
original_code_order,
original_description_order,
gks_proposition_type,
gks_code_order,
gks_description_order,
direction,
strength_code,
strength_label,
classification_code,
penetrance_level
FROM `clinvar_ingest.clinvar_clinsig_types`
WHERE statement_type = 'GermlineClassification';

END IF;

END;

CREATE OR REPLACE TABLE `clinvar_ingest.clinvar_proposition_types` (
code STRING,
Expand Down
80 changes: 50 additions & 30 deletions scripts/dataset-preparation/02-normalize-ds-v2-proc.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,18 @@ BEGIN
IF NOT column_exists THEN
-- backup the original clinical_assertion table
EXECUTE IMMEDIATE FORMAT("""
CREATE TABLE `%s.backup_clinical_assertion` AS
SELECT * FROM `%s.clinical_assertion`
CREATE TABLE `%s.backup_clinical_assertion`
AS
SELECT
*
FROM `%s.clinical_assertion`
""", schema_name, schema_name);


-- create or replace the clinical_assertion table from the backup
EXECUTE IMMEDIATE FORMAT("""
CREATE OR REPLACE TABLE `%s.clinical_assertion` AS
CREATE OR REPLACE TABLE `%s.clinical_assertion`
AS
SELECT
*,
'GermlineClassification' as statement_type,
Expand All @@ -39,13 +43,17 @@ BEGIN
IF NOT table_exists THEN
-- backup the original rcv_accession table
EXECUTE IMMEDIATE FORMAT("""
CREATE TABLE `%s.backup_rcv_accession` AS
SELECT * FROM `%s.rcv_accession`
CREATE TABLE `%s.backup_rcv_accession`
AS
SELECT
*
FROM `%s.rcv_accession`
""", schema_name, schema_name);

-- create the rcv_accession_classification table from the backup
EXECUTE IMMEDIATE FORMAT("""
CREATE TABLE %s.rcv_accession_classification AS
CREATE TABLE %s.rcv_accession_classification
AS
SELECT
release_date,
id as rcv_id,
Expand All @@ -65,7 +73,8 @@ BEGIN

-- create or replace the rcv_accession_classification table from the backup
EXECUTE IMMEDIATE FORMAT("""
CREATE OR REPLACE TABLE `%s.rcv_accession` AS
CREATE OR REPLACE TABLE `%s.rcv_accession`
AS
SELECT
release_date,
id,
Expand All @@ -87,13 +96,17 @@ BEGIN
IF NOT column_exists THEN
-- backup the original rcv_accession_classification table
EXECUTE IMMEDIATE FORMAT("""
CREATE TABLE `%s.backup_rcv_accession_classification` AS
SELECT * FROM `%s.rcv_accession_classification`
CREATE TABLE `%s.backup_rcv_accession_classification`
AS
SELECT
*
FROM `%s.rcv_accession_classification`
""", schema_name, schema_name);

-- create or replace the rcv_accession_classification table from the backup
EXECUTE IMMEDIATE FORMAT("""
CREATE OR REPLACE TABLE `%s.rcv_accession_classification` AS
CREATE OR REPLACE TABLE `%s.rcv_accession_classification`
AS
SELECT
release_date,
rcv_id,
Expand All @@ -106,25 +119,27 @@ BEGIN
REGEXP_REPLACE(content, r'"Description"\\s*\\:\\s*"[^"]+"\\s*,*\\s*', "")
) as content
FROM `%s.rcv_accession_classification`
WHERE content is not null
WHERE
content is not null
UNION ALL
SELECT
release_date,
rcv_id,
statement_type,
review_status,
[
STRUCT(
clinical_impact_assertion_type,
clinical_impact_clinical_significance,
date_last_evaluated,
num_submissions,
interp_description
)
] as agg_classification,
content
release_date,
rcv_id,
statement_type,
review_status,
[
STRUCT(
clinical_impact_assertion_type,
clinical_impact_clinical_significance,
date_last_evaluated,
num_submissions,
interp_description
)
] as agg_classification,
content
FROM `%s.rcv_accession_classification`
WHERE content is null
WHERE
content is null
""", schema_name, schema_name, schema_name);
END IF;
END IF;
Expand All @@ -137,13 +152,17 @@ BEGIN
IF NOT table_exists THEN
-- backup the original variation_archive table
EXECUTE IMMEDIATE FORMAT("""
CREATE TABLE `%s.backup_variation_archive` AS
SELECT * FROM `%s.variation_archive`
CREATE TABLE `%s.backup_variation_archive`
AS
SELECT
*
FROM `%s.variation_archive`
""", schema_name, schema_name);

-- create the variation_archive_classification table from the backup
EXECUTE IMMEDIATE FORMAT("""
CREATE TABLE %s.variation_archive_classification AS
CREATE TABLE %s.variation_archive_classification
AS
SELECT
id as vcv_id,
'GermlineClassification' AS statement_type,
Expand All @@ -163,7 +182,8 @@ BEGIN

-- create or replace the variation_archive table from the backup
EXECUTE IMMEDIATE FORMAT("""
CREATE OR REPLACE TABLE `%s.variation_archive` AS
CREATE OR REPLACE TABLE `%s.variation_archive`
AS
SELECT
date_created,
record_status,
Expand Down
11 changes: 8 additions & 3 deletions scripts/dataset-preparation/03-scv-summary-proc.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ CREATE OR REPLACE PROCEDURE `clinvar_ingest.scv_summary`(
)
BEGIN
EXECUTE IMMEDIATE FORMAT("""
CREATE OR REPLACE TABLE `%s.scv_summary` AS
CREATE OR REPLACE TABLE `%s.scv_summary`
AS
WITH obs_sample AS (
SELECT
REGEXP_EXTRACT(id, r'^SCV[0-9]+') as id,
Expand Down Expand Up @@ -48,8 +49,12 @@ BEGIN
STRING_AGG(DISTINCT om.method_type, ", " ORDER BY om.method_type) as method_type
FROM
`%s.clinical_assertion` ca
LEFT JOIN obs_sample os ON os.id = ca.id
LEFT JOIN obs_method om ON om.id = ca.id
LEFT JOIN obs_sample os
ON
os.id = ca.id
LEFT JOIN obs_method om
ON
om.id = ca.id
GROUP BY
ca.id
),
Expand Down
14 changes: 10 additions & 4 deletions scripts/dataset-preparation/03-scv-summary-v2-proc.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ CREATE OR REPLACE PROCEDURE `clinvar_ingest.scv_summary_v2`(
)
BEGIN
EXECUTE IMMEDIATE FORMAT("""
CREATE OR REPLACE TABLE `%s.scv_summary` AS
CREATE OR REPLACE TABLE `%s.scv_summary`
AS
WITH obs_sample AS (
SELECT
REGEXP_EXTRACT(id, r'^SCV[0-9]+') as id,
Expand Down Expand Up @@ -47,8 +48,12 @@ BEGIN
STRING_AGG(DISTINCT om.method_type, ", " ORDER BY om.method_type) as method_type
FROM
`%s.clinical_assertion` ca
LEFT JOIN obs_sample os ON os.id = ca.id
LEFT JOIN obs_method om ON om.id = ca.id
LEFT JOIN obs_sample os
ON
os.id = ca.id
LEFT JOIN obs_method om
ON
om.id = ca.id
GROUP BY
ca.id
),
Expand All @@ -59,7 +64,8 @@ BEGIN
FROM
`%s.clinical_assertion` ca
LEFT JOIN UNNEST(ca.interpretation_comments) as c
WHERE ARRAY_LENGTH(ca.interpretation_comments) > 0
WHERE
ARRAY_LENGTH(ca.interpretation_comments) > 0
GROUP BY
id
)
Expand Down
Loading

0 comments on commit 8d4fb96

Please sign in to comment.