-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
started adding clinvar-curator scripts
- Loading branch information
Showing
9 changed files
with
636 additions
and
0 deletions.
There are no files selected for viewing
189 changes: 189 additions & 0 deletions
189
scripts/clinvar-curartion/cvc-annotations-as-of-func.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
CREATE OR REPLACE TABLE FUNCTION `clinvar_curator.cvc_annotations_as_of`(as_of_date DATE, include_finalized BOOL, only_latest BOOL) AS ( | ||
WITH anno AS | ||
( | ||
select | ||
as_of_date, | ||
release_date, | ||
annotation_id, | ||
-- variant and vcv | ||
variation_id, | ||
vcv_axn, | ||
vcv_id, | ||
vcv_ver, | ||
-- scv | ||
scv_id, | ||
scv_ver, | ||
-- annotation assessment record | ||
curator, | ||
annotated_on, | ||
annotated_date, | ||
annotated_time_utc, | ||
action, | ||
reason, | ||
notes, | ||
submitter_id, | ||
annotation_label, | ||
is_latest, | ||
-- finalized rev info | ||
finalized_review_id, | ||
finalized_reviewer, | ||
finalized_review_status, | ||
finalized_review_notes, | ||
-- finalized submission batch info | ||
finalized_submission_batch_id, | ||
finalized_review_label, | ||
finalized_review_count, | ||
-- prior review data | ||
has_prior_scv_id_annotation, | ||
has_prior_scv_ver_annotation, | ||
has_prior_finalized_submission_batch_id, | ||
prior_scv_annotations | ||
from `clinvar_curator.cvc_baseline_annotations_as_of`(as_of_date, include_finalized, only_latest) | ||
), | ||
scv_max_release_date AS ( | ||
SELECT | ||
id, | ||
MAX(end_release_date) as max_end_release_date | ||
FROM anno as a | ||
JOIN `clinvar_ingest.clinvar_scvs` ON | ||
id = a.scv_id | ||
WHERE | ||
a.release_date >= start_release_date | ||
GROUP BY | ||
id | ||
), | ||
vs_last AS ( | ||
SELECT | ||
smrd.id, | ||
cs.version, | ||
cs.variation_id, | ||
cs.start_release_date, | ||
cs.end_release_date, | ||
cs.deleted_release_date, | ||
cs.classif_type, | ||
cs.rank | ||
FROM scv_max_release_date smrd | ||
JOIN `clinvar_ingest.clinvar_scvs` cs | ||
ON | ||
smrd.id = cs.id | ||
AND | ||
smrd.max_end_release_date = cs.end_release_date | ||
), | ||
vcv_max_release_date AS ( | ||
SELECT | ||
id, | ||
MAX(end_release_date) as max_end_release_date | ||
FROM anno as a | ||
JOIN `clinvar_ingest.clinvar_vcvs` | ||
ON | ||
id = a.vcv_id | ||
where | ||
a.release_date >= start_release_date | ||
GROUP BY | ||
id | ||
), | ||
vv_last AS ( | ||
SELECT | ||
vmrd.id, | ||
cv.version, | ||
cv.variation_id, | ||
cv.start_release_date, | ||
cv.end_release_date, | ||
cv.deleted_release_date, | ||
cv.agg_classification, | ||
cv.rank | ||
FROM vcv_max_release_date vmrd | ||
JOIN `clinvar_ingest.clinvar_vcvs` cv | ||
ON | ||
vmrd.id = cv.id | ||
AND | ||
vmrd.max_end_release_date = cv.end_release_date | ||
) | ||
SELECT | ||
as_of_date, | ||
a.release_date, | ||
a.annotation_id, | ||
-- variant and vcv | ||
a.variation_id, | ||
a.vcv_axn, | ||
a.vcv_id, | ||
a.vcv_ver, | ||
-- scv | ||
a.scv_id, | ||
a.scv_ver, | ||
-- annotation assessment record | ||
a.curator, | ||
a.annotated_on, | ||
a.annotated_date, | ||
a.annotated_time_utc, | ||
a.action, | ||
a.reason, | ||
a.notes, | ||
-- originally annotated scv id+ver assertion data | ||
cs.rpt_stmt_type, | ||
cs.rank, | ||
cs.classif_type, | ||
cs.clinsig_type, | ||
-- submitter from original annotation (should never change) | ||
a.submitter_id, | ||
s.current_name as submitter_name, | ||
s.current_abbrev as submitter_abbrev, | ||
a.annotation_label, | ||
a.finalized_review_label, | ||
|
||
a.has_prior_scv_id_annotation, | ||
a.has_prior_scv_ver_annotation, | ||
a.has_prior_finalized_submission_batch_id, | ||
a.prior_scv_annotations, | ||
|
||
-- is this the annotation the latest for this scv id (TRUE or Count=0 means no newer annotations currently exist for the exact scv id) | ||
a.is_latest AS is_latest_annotation, | ||
|
||
-- what is the latest scv version for this scv id, null if deleted | ||
vs_last.version AS latest_scv_ver, | ||
-- what is the latest scv released date, rank and classification? | ||
vs_last.start_release_date AS latest_scv_release_date, | ||
vs_last.rank as latest_scv_rank, | ||
vs_last.classif_type as latest_scv_classification, | ||
|
||
-- what is the latest vcv version for this vcv id, null if deleted | ||
vv_last.version AS latest_vcv_ver, | ||
-- what is the latest vcv release date? | ||
vv_last.start_release_date AS latest_vcv_release_date, | ||
|
||
-- is this annotation outdated for this scv id due to an update in the version number or moved to a different variation? | ||
(vs_last.version > a.scv_ver OR vs_last.variation_id <> a.variation_id) AS is_outdated_scv, | ||
|
||
-- is this annotation outdated for this vcv id due to an update in the version number | ||
(vv_last.version > a.vcv_ver) AS is_outdated_vcv, | ||
|
||
-- has this scv id been completely deleted from the latest release? | ||
(vs_last.deleted_release_date is not null AND vs_last.deleted_release_date <= a.release_date) AS is_deleted_scv, | ||
-- if the scv id record is deleted then this is the first release it was no longer available in. | ||
vs_last.deleted_release_date as deleted_scv_release_date, | ||
|
||
-- has this scv id been moved to another variation id in the most recent release? | ||
(vs_last.variation_id <> a.variation_id ) AS is_moved_scv | ||
|
||
FROM anno as a | ||
-- we could do an INNER JOIN but if there was an errant record in the annotations | ||
-- sheet that didn't line up with a real scv then it would be inadvertantly hidden | ||
-- So,it is possible (not probable) that the cs.* fields could all be null when returned. | ||
-- same is true for the submitter fields | ||
LEFT JOIN `clinvar_ingest.clinvar_scvs` cs | ||
ON | ||
cs.variation_id = a.variation_id AND | ||
cs.id = a.scv_id AND | ||
cs.version = a.scv_ver AND | ||
a.release_date between cs.start_release_date and cs.end_release_date | ||
LEFT JOIN `clinvar_ingest.clinvar_submitters` s | ||
ON | ||
s.id = a.submitter_id AND | ||
a.release_date between s.start_release_date and s.end_release_date | ||
LEFT JOIN vs_last | ||
ON | ||
vs_last.id = a.scv_id | ||
LEFT JOIN vv_last | ||
ON | ||
vv_last.id = a.vcv_id | ||
); |
176 changes: 176 additions & 0 deletions
176
scripts/clinvar-curartion/cvc-baseline-annotations-as-of-func.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
CREATE OR REPLACE TABLE FUNCTION `clinvar_curator.cvc_baseline_annotations_as_of`(as_of_date DATE, incl_finalized BOOL, only_latest BOOL) AS ( | ||
WITH anno AS | ||
( | ||
SELECT | ||
rel.release_date, | ||
CAST(UNIX_MILLIS(annotation_date) AS STRING) as annotation_id, | ||
a.vcv_id as vcv_axn, | ||
SPLIT(a.scv_id,'.')[OFFSET(0)] AS scv_id, | ||
CAST(SPLIT(a.scv_id,'.')[OFFSET(1)] AS INT64) AS scv_ver, | ||
CAST(a.variation_id AS String) AS variation_id, | ||
CAST(a.submitter_id AS String) AS submitter_id, | ||
LOWER(a.action) AS action, | ||
SPLIT(a.curator_email,'@')[OFFSET(0)] AS curator, | ||
a.annotation_date AS annotated_on, | ||
DATE(a.annotation_date) AS annotated_date, | ||
TIME(a.annotation_date) AS annotated_time_utc, | ||
a.reason, | ||
a.notes, | ||
SPLIT(a.vcv_id,'.')[OFFSET(0)] AS vcv_id, | ||
CAST(SPLIT(a.vcv_id,'.')[OFFSET(1)] AS INT64) AS vcv_ver, | ||
-- if there are no other scv_id annotations after when orderd by annotation date then it is the latest | ||
( | ||
COUNT(a.annotation_date) | ||
OVER ( | ||
PARTITION BY SPLIT(a.scv_id,'.')[OFFSET(0)] | ||
ORDER BY a.annotation_date | ||
ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING | ||
) = 0 | ||
) AS is_latest | ||
|
||
FROM `clinvar_curator.clinvar_annotations` a, | ||
`clinvar_ingest.schema_on`(as_of_date) rel | ||
WHERE | ||
DATE(a.annotation_date) <= as_of_date | ||
), | ||
reviewed_anno AS ( | ||
SELECT | ||
-- rev info | ||
rev.annotation_id, | ||
count(*) as annotation_review_count, | ||
a.scv_id, | ||
a.scv_ver, | ||
a.annotated_date, | ||
rev.reviewer, | ||
rev.status, | ||
rev.notes, | ||
sbm.batch_id, | ||
FORMAT( | ||
'%s (%s) %s%s', | ||
IFNULL(rev.status, 'n/a'), | ||
IF(rev.annotation_id IS NULL, NULL, IFNULL(rev.reviewer, 'n/a')), | ||
IFNULL(FORMAT('*%s*',sbm.batch_id), ''), | ||
IF(COUNT(*)>1, FORMAT('-%ix?',COUNT(*)), '') | ||
) as review_label | ||
FROM `clinvar_curator.cvc_clinvar_reviews` rev | ||
JOIN anno a ON a.annotation_id = rev.annotation_id | ||
LEFT JOIN `clinvar_curator.cvc_clinvar_submissions` sbm | ||
ON | ||
sbm.annotation_id = rev.annotation_id | ||
group by | ||
rev.annotation_id, | ||
a.scv_id, | ||
a.scv_ver, | ||
a.annotated_date, | ||
rev.reviewer, | ||
rev.status, | ||
rev.notes, | ||
sbm.batch_id | ||
), | ||
anno_label as ( | ||
SELECT | ||
a.annotation_id, | ||
FORMAT( | ||
'%t (%s) %s: %s', | ||
a.annotated_date, | ||
IFNULL(a.curator,'n/a'), | ||
IF(a.action ='flagging candidate','flag',IF(a.action = 'no change', 'no chg', 'n/a or unk' )), | ||
LEFT(IFNULL(a.reason,'n/a'), 20)||IF(LENGTH(a.reason) > 20,'...','') | ||
) as annotation_label | ||
FROM anno a | ||
), | ||
ra_priors AS ( | ||
SELECT | ||
a.annotation_id, | ||
(COUNTIF(a.scv_id = prior_a.scv_id) > 0) as has_prior_scv_id_annotation, | ||
(COUNTIF(a.scv_ver = prior_a.scv_ver) > 0) as has_prior_scv_ver_annotation, | ||
(COUNTIF(prior_ra.batch_id is not null) > 0) as has_prior_finalized_submission_batch_id, | ||
STRING_AGG( | ||
FORMAT( | ||
'v%i %s %s', | ||
prior_a.scv_ver, | ||
prior_a_label.annotation_label, | ||
if(prior_ra.review_label is not null, FORMAT('[ %s ]',prior_ra.review_label), '') | ||
), | ||
'\n' | ||
ORDER BY prior_a.annotated_date DESC | ||
) as prior_scv_annotations | ||
|
||
FROM anno as a | ||
LEFT JOIN reviewed_anno as ra | ||
ON | ||
ra.annotation_id = a.annotation_id | ||
|
||
JOIN anno as prior_a | ||
ON | ||
prior_a.scv_id = a.scv_id and | ||
prior_a.annotation_id < a.annotation_id | ||
JOIN anno_label prior_a_label | ||
ON | ||
prior_a.annotation_id = prior_a_label.annotation_id | ||
|
||
LEFT JOIN reviewed_anno as prior_ra | ||
ON | ||
prior_ra.annotation_id = prior_a.annotation_id | ||
WHERE | ||
IF(only_latest, a.is_latest, true) | ||
AND | ||
IF(incl_finalized, true, ra.annotation_id is NULL) | ||
GROUP BY | ||
a.annotation_id | ||
) | ||
SELECT | ||
as_of_date, | ||
a.release_date, | ||
a.annotation_id, | ||
-- variant and vcv | ||
a.variation_id, | ||
a.vcv_axn, | ||
a.vcv_id, | ||
a.vcv_ver, | ||
-- scv | ||
a.scv_id, | ||
a.scv_ver, | ||
-- annotation assessment record | ||
a.curator, | ||
a.annotated_on, | ||
a.annotated_date, | ||
a.annotated_time_utc, | ||
a.action, | ||
a.reason, | ||
a.notes, | ||
a.submitter_id, | ||
FORMAT( | ||
'%t (%s) %s: %s', | ||
a.annotated_date, | ||
IFNULL(a.curator,'n/a'), | ||
IF(a.action ='flagging candidate','flag',IF(a.action = 'no change', 'no chg', 'n/a or unk' )), | ||
LEFT(IFNULL(a.reason,'n/a'), 20)||IF(LENGTH(a.reason) > 20,'...','') | ||
) as annotation_label, | ||
a.is_latest, | ||
-- finalized rev info | ||
ra.annotation_id as finalized_review_id, | ||
ra.reviewer as finalized_reviewer, | ||
ra.status as finalized_review_status, | ||
ra.notes as finalized_review_notes, | ||
-- finalized submission batch info | ||
ra.batch_id as finalized_submission_batch_id, | ||
ra.review_label as finalized_review_label, | ||
ra.annotation_review_count as finalized_review_count, | ||
-- prior review data | ||
ra_priors.has_prior_scv_id_annotation, | ||
ra_priors.has_prior_scv_ver_annotation, | ||
ra_priors.has_prior_finalized_submission_batch_id, | ||
ra_priors.prior_scv_annotations | ||
FROM anno as a | ||
LEFT JOIN reviewed_anno ra | ||
ON | ||
ra.annotation_id = a.annotation_id | ||
LEFT JOIN ra_priors | ||
ON | ||
ra_priors.annotation_id = a.annotation_id | ||
WHERE | ||
IF(only_latest, a.is_latest, true) | ||
AND | ||
IF(incl_finalized, true, ra.annotation_id is NULL) | ||
); |
Oops, something went wrong.