From 3b4aaddf5e01613966af5ecf106771bca3d2de38 Mon Sep 17 00:00:00 2001 From: Lawrence Babb Date: Tue, 19 Nov 2024 16:38:05 -0500 Subject: [PATCH] add first batch of v2 schema scripts --- scripts/general/all_schemas-func.sql | 4 ++-- scripts/general/all_schemas-v2-func.sql | 23 +++++++++++++++++++ scripts/general/schema-on-v2-func.sql | 15 ++++++++++++ .../general/schemas-on-or-after-v2-func.sql | 17 ++++++++++++++ .../general/schemas-on-or-before-v2-func.sql | 16 +++++++++++++ 5 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 scripts/general/all_schemas-v2-func.sql create mode 100644 scripts/general/schema-on-v2-func.sql create mode 100644 scripts/general/schemas-on-or-after-v2-func.sql create mode 100644 scripts/general/schemas-on-or-before-v2-func.sql diff --git a/scripts/general/all_schemas-func.sql b/scripts/general/all_schemas-func.sql index 83d6794..823b6f9 100644 --- a/scripts/general/all_schemas-func.sql +++ b/scripts/general/all_schemas-func.sql @@ -19,9 +19,9 @@ AS ( FROM INFORMATION_SCHEMA.SCHEMATA iss WHERE ( - REGEXP_CONTAINS(iss.schema_name, r'^clinvar_\d{4}_\d{2}_\d{2}_v\d+_\d+_\d+$') + REGEXP_CONTAINS(iss.schema_name, r'^clinvar_\d{4}_\d{2}_\d{2}_v1_\d+_\d+$') OR - REGEXP_CONTAINS(iss.schema_name, r'^clinvar_\d{4}_\d{2}_\d{2}_v\d+_\d+_\d+_beta\d+$') + REGEXP_CONTAINS(iss.schema_name, r'^clinvar_\d{4}_\d{2}_\d{2}_v1_\d+_\d+_beta\d+$') ) AND iss.schema_name <> "clinvar_2019_06_01_v0" diff --git a/scripts/general/all_schemas-v2-func.sql b/scripts/general/all_schemas-v2-func.sql new file mode 100644 index 0000000..bc6c119 --- /dev/null +++ b/scripts/general/all_schemas-v2-func.sql @@ -0,0 +1,23 @@ +CREATE OR REPLACE TABLE FUNCTION `clinvar_ingest.all_schemas_v2`() +AS ( + -- the state of al clinvar schemas available at the moment + SELECT + r.schema_name, + r.release_date, + LAG(r.release_date, 1, DATE('0001-01-01')) OVER (ORDER BY r.release_date ASC) AS prev_release_date, + LEAD(r.release_date, 1, DATE('9999-12-31')) OVER (ORDER BY r.release_date ASC) AS next_release_date + FROM ( + + SELECT + iss.schema_name, + CAST(REGEXP_REPLACE(iss.schema_name, r'clinvar_(\d{4})_(\d{2})_(\d{2}).*', '\\1-\\2-\\3') as DATE) AS release_date + FROM INFORMATION_SCHEMA.SCHEMATA iss + WHERE + ( + REGEXP_CONTAINS(iss.schema_name, r'^clinvar_\d{4}_\d{2}_\d{2}_v2_\d+_\d+$') + OR + REGEXP_CONTAINS(iss.schema_name, r'^clinvar_\d{4}_\d{2}_\d{2}_v2_\d+_\d+_(alpha|beta)\d*$') + ) + ) r + ORDER BY 2 +); \ No newline at end of file diff --git a/scripts/general/schema-on-v2-func.sql b/scripts/general/schema-on-v2-func.sql new file mode 100644 index 0000000..71fce23 --- /dev/null +++ b/scripts/general/schema-on-v2-func.sql @@ -0,0 +1,15 @@ +CREATE OR REPLACE TABLE FUNCTION `clinvar_ingest.schema_on_v2`(on_date DATE) +AS ( + -- the state of schemas available on a certain date + -- if the date lands on a schema release date then that will be the schema + -- otherwise the schema with the release date just prior to that date will be the schema + SELECT + x.schema_name, + x.release_date, + x.prev_release_date, + x.next_release_date + FROM `clinvar_ingest.all_schemas_v2`() x + WHERE on_date >= x.release_date + ORDER BY 2 DESC + LIMIT 1 +); \ No newline at end of file diff --git a/scripts/general/schemas-on-or-after-v2-func.sql b/scripts/general/schemas-on-or-after-v2-func.sql new file mode 100644 index 0000000..a73b1ba --- /dev/null +++ b/scripts/general/schemas-on-or-after-v2-func.sql @@ -0,0 +1,17 @@ +CREATE OR REPLACE TABLE FUNCTION `clinvar_ingest.schemas_on_or_after_v2`(on_or_after_date DATE) +AS ( + -- the state of schemas available on or after a certain date + -- if the date lands on a schema release date then that will be the first schema + -- if the date is prior to the earliest release date then return all schemas + -- otherwise the schema with the release date just prior to that date will be the first schema + SELECT + x.schema_name, + x.release_date, + x.prev_release_date, + x.next_release_date + FROM `clinvar_ingest.all_schemas_v2`() x + WHERE (on_or_after_date > x.prev_release_date AND on_or_after_date < x.next_release_date) OR on_or_after_date <= x.release_date + ORDER BY 2 +); + +-- select * from `clinvar_ingest.schemas_on_or_after`(DATE('2020-06-01')); \ No newline at end of file diff --git a/scripts/general/schemas-on-or-before-v2-func.sql b/scripts/general/schemas-on-or-before-v2-func.sql new file mode 100644 index 0000000..6b99614 --- /dev/null +++ b/scripts/general/schemas-on-or-before-v2-func.sql @@ -0,0 +1,16 @@ +CREATE OR REPLACE TABLE FUNCTION `clinvar_ingest.schemas_on_or_before_v2`(on_or_before_date DATE) +AS ( + -- the state of schemas available on or before a certain date + -- if the date lands on a schema release date then that will be the last schema + -- otherwise the schema with the release date just prior to that date will be the last schema + SELECT + x.schema_name, + x.release_date, + x.prev_release_date, + x.next_release_date + FROM `clinvar_ingest.all_schemas_v2`() x + WHERE on_or_before_date >= x.release_date + ORDER BY 2 +); + +-- select * from `clinvar_ingest.schemas_on_or_before`(DATE('2020-06-01')); \ No newline at end of file