diff --git a/.github/workflows/prod-hail-search-release.yaml b/.github/workflows/prod-hail-search-release.yaml index 58772f0013..0a2dbb0ee7 100644 --- a/.github/workflows/prod-hail-search-release.yaml +++ b/.github/workflows/prod-hail-search-release.yaml @@ -45,11 +45,28 @@ jobs: persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal token fetch-depth: 0 # otherwise, you will failed to push refs to dest repo - - name: Update appVersion in hail-search Chart file + - name: Get latest hail-search version uses: mikefarah/yq@v4.22.1 + id: current with: cmd: > - yq -i '.appVersion = "${{ github.event.workflow_run.head_sha }}"' charts/hail-search/Chart.yaml + yq -r '.version' charts/hail-search/Chart.yaml + + - name: Bump version + id: bump + uses: cbrgm/semver-bump-action@main + with: + current-version: ${{ steps.current.outputs.result }} + bump-level: minor + + - name: Update appVersion and version in seqr Chart file + uses: mikefarah/yq@v4.22.1 + with: + cmd: > + yq -i ' + .appVersion = "${{ github.event.workflow_run.head_sha }}" | + .version = "${{ steps.bump.outputs.new_version }}" + ' charts/hail-search/Chart.yaml - name: Commit and Push changes uses: Andro999b/push@v1.3 diff --git a/.github/workflows/prod-release.yaml b/.github/workflows/prod-release.yaml index 391929e892..b4ab01854c 100644 --- a/.github/workflows/prod-release.yaml +++ b/.github/workflows/prod-release.yaml @@ -45,11 +45,28 @@ jobs: persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal token fetch-depth: 0 # otherwise, you will failed to push refs to dest repo - - name: Update appVersion in seqr Chart file + - name: Get latest seqr version uses: mikefarah/yq@v4.22.1 + id: current with: cmd: > - yq -i '.appVersion = "${{ github.event.workflow_run.head_sha }}"' charts/seqr/Chart.yaml + yq -r '.version' charts/seqr/Chart.yaml + + - name: Bump version + id: bump + uses: cbrgm/semver-bump-action@main + with: + current-version: ${{ steps.current.outputs.result }} + bump-level: minor + + - name: Update appVersion and version in seqr Chart file + uses: mikefarah/yq@v4.22.1 + with: + cmd: > + yq -i ' + .appVersion = "${{ github.event.workflow_run.head_sha }}" | + .version = "${{ steps.bump.outputs.new_version }}" + ' charts/hail-search/Chart.yaml - name: Commit and Push changes uses: Andro999b/push@v1.3 diff --git a/deploy/LOCAL_INSTALL_HELM.md b/deploy/LOCAL_INSTALL_HELM.md index 4fe8122b6b..1c1bd417da 100644 --- a/deploy/LOCAL_INSTALL_HELM.md +++ b/deploy/LOCAL_INSTALL_HELM.md @@ -76,7 +76,7 @@ loading_pipeline_queue test.vcf.gz ``` - In the top header of *seqr*, click on the **Data Management** button. - In the subheader, click on **Load Data**. -- Type the name of the callset path into the **Callset File Path** text box (without the directory prefix), and select the appropriate Sample Type (WES/WGS) and Genome Version (GRCh37/GRCh38) for your project. The pipeline includes a sequence of validation steps to insure the validity of your VCF, but these may be skipped by enabling the **Skip Callset Validation**option. We strongly recommend leaving validation enabled to ensure the quality of your analysis. +- Select your VCF from the dropdown and select the appropriate Sample Type (WES/WGS) and Genome Version (GRCh37/GRCh38) for your project. The pipeline includes a sequence of validation steps to insure the validity of your VCF, but these may be skipped by enabling the **Skip Callset Validation**option. We strongly recommend leaving validation enabled to ensure the quality of your analysis. - Click through to the next page and select your project from the **Projects to Load** dropdown, then click **Submit**. - If you wish to check the status of the loading request, you can click through to the **Pipeline Status** tab to view the loading pipeline interface. - Data should be loaded into the search backend automatically, usually within a few hours. diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.README.txt.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.README.txt.crc index b8eaa2d478..17e2f8ae70 100644 Binary files a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.README.txt.crc and b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.README.txt.crc differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.metadata.json.gz.crc index 8d6507b010..55d0cdd18d 100644 Binary files a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.metadata.json.gz.crc and b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/.metadata.json.gz.crc differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/README.txt b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/README.txt index 5daea17753..22050e815e 100644 --- a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/README.txt +++ b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/README.txt @@ -1,3 +1,3 @@ This folder comprises a Hail (www.hail.is) native Table or MatrixTable. Written with version 0.2.128-eead8100a1c1 - Created at 2024/08/16 15:39:04 \ No newline at end of file + Created at 2025/01/02 17:23:59 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/.part-0.crc index 905a30feec..871216cce1 100644 Binary files a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/.part-0.crc and b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/.part-0.crc differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/part-0 index 05ec205c54..c33540def9 100644 Binary files a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/part-0 and b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/globals/parts/part-0 differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.index.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.index.crc deleted file mode 100644 index 78fad9791a..0000000000 Binary files a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.index.crc and /dev/null differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.metadata.json.gz.crc deleted file mode 100644 index ca274b3389..0000000000 Binary files a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/.metadata.json.gz.crc and /dev/null differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/index b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/index deleted file mode 100644 index 3d8c9a969b..0000000000 Binary files a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/index and /dev/null differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/metadata.json.gz deleted file mode 100644 index 14e2c0d67c..0000000000 Binary files a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.idx/metadata.json.gz and /dev/null differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/.index.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/.index.crc new file mode 100644 index 0000000000..9e82cfb61e Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/.index.crc differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/.metadata.json.gz.crc new file mode 100644 index 0000000000..e05cfb69de Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/.metadata.json.gz.crc differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/index b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/index new file mode 100644 index 0000000000..0734b2c395 Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/index differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/metadata.json.gz new file mode 100644 index 0000000000..879fa5f330 Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/index/part-0-d213a419-0c77-4952-b924-b6af300f393b.idx/metadata.json.gz differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/metadata.json.gz index 9479e06e8a..d5dfa64984 100644 Binary files a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/metadata.json.gz and b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/metadata.json.gz differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/.metadata.json.gz.crc index dcbbf269b1..02e4874e3f 100644 Binary files a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/.metadata.json.gz.crc and b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/.metadata.json.gz.crc differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/metadata.json.gz index 5e76d5dbba..d379d39286 100644 Binary files a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/metadata.json.gz and b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/metadata.json.gz differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/.part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/.part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.crc deleted file mode 100644 index 9fe4d74694..0000000000 Binary files a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/.part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818.crc and /dev/null differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/.part-0-d213a419-0c77-4952-b924-b6af300f393b.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/.part-0-d213a419-0c77-4952-b924-b6af300f393b.crc new file mode 100644 index 0000000000..433649fc57 Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/.part-0-d213a419-0c77-4952-b924-b6af300f393b.crc differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818 b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818 deleted file mode 100644 index 2389c50627..0000000000 Binary files a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/part-0-c11f065f-e1bb-4a1f-9f2d-ad814a396818 and /dev/null differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/part-0-d213a419-0c77-4952-b924-b6af300f393b b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/part-0-d213a419-0c77-4952-b924-b6af300f393b new file mode 100644 index 0000000000..a53545dfd8 Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/lookup.ht/rows/parts/part-0-d213a419-0c77-4952-b924-b6af300f393b differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/.README.txt.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/.README.txt.crc new file mode 100644 index 0000000000..d5ad8f7e2d Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/.README.txt.crc differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/._SUCCESS.crc new file mode 100644 index 0000000000..3b7b044936 Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/._SUCCESS.crc differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/.metadata.json.gz.crc new file mode 100644 index 0000000000..cbb87d8879 Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/.metadata.json.gz.crc differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/README.txt b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/README.txt new file mode 100644 index 0000000000..c7b6eb301a --- /dev/null +++ b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.128-eead8100a1c1 + Created at 2025/01/02 17:57:36 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/_SUCCESS b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/_SUCCESS new file mode 100644 index 0000000000..e69de29bb2 diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/.metadata.json.gz.crc new file mode 100644 index 0000000000..52404ebea7 Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/.metadata.json.gz.crc differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/metadata.json.gz new file mode 100644 index 0000000000..b8223ee0bc Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/metadata.json.gz differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/parts/.part-0.crc new file mode 100644 index 0000000000..581ceab411 Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/parts/.part-0.crc differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/parts/part-0 new file mode 100644 index 0000000000..2da408adba Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/globals/parts/part-0 differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/.index.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/.index.crc new file mode 100644 index 0000000000..3b5f783b8e Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/.index.crc differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/.metadata.json.gz.crc new file mode 100644 index 0000000000..8a49d39d80 Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/.metadata.json.gz.crc differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/index b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/index new file mode 100644 index 0000000000..bf0064cce9 Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/index differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/metadata.json.gz new file mode 100644 index 0000000000..63e4c6ca64 Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/index/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.idx/metadata.json.gz differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/metadata.json.gz new file mode 100644 index 0000000000..e43fa94230 Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/metadata.json.gz differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/.metadata.json.gz.crc new file mode 100644 index 0000000000..3ee859d286 Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/.metadata.json.gz.crc differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/metadata.json.gz new file mode 100644 index 0000000000..4b55898614 Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/metadata.json.gz differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/parts/.part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/parts/.part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.crc new file mode 100644 index 0000000000..006238b850 Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/parts/.part-0-85535ceb-5403-4697-bec1-5eccf7ff958a.crc differ diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/parts/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/parts/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a new file mode 100644 index 0000000000..a32ae6a5d6 Binary files /dev/null and b/hail_search/fixtures/GRCh37/SNV_INDEL/projects/WGS/R0004_non_analyst_project.ht/rows/parts/part-0-85535ceb-5403-4697-bec1-5eccf7ff958a differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.README.txt.crc index 3b6f654d6c..46b5687838 100644 Binary files a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.README.txt.crc and b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.README.txt.crc differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.metadata.json.gz.crc index fee8fcb468..63190d7e43 100644 Binary files a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.metadata.json.gz.crc and b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.metadata.json.gz.crc differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/README.txt b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/README.txt index 6eb9cb83f0..cd3d982528 100644 --- a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/README.txt +++ b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/README.txt @@ -1,3 +1,3 @@ This folder comprises a Hail (www.hail.is) native Table or MatrixTable. - Written with version 0.2.130-bea04d9c79b5 - Created at 2024/11/04 13:45:23 \ No newline at end of file + Written with version 0.2.133-4c60fddb171a + Created at 2025/01/02 12:26:15 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.idx/.index.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.idx/.index.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.idx/.index.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.idx/.metadata.json.gz.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.idx/.metadata.json.gz.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.idx/.metadata.json.gz.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.idx/index similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.idx/index rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.idx/index diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.idx/metadata.json.gz rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/metadata.json.gz index 8f5109cfbb..3e2c0ff4e9 100644 Binary files a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/metadata.json.gz and b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/metadata.json.gz differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/.metadata.json.gz.crc index 6a1017e8eb..ebb4a5208f 100644 Binary files a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/.metadata.json.gz.crc and b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/.metadata.json.gz.crc differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/metadata.json.gz index eeeb787765..affb24f426 100644 Binary files a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/metadata.json.gz and b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/metadata.json.gz differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.crc new file mode 100644 index 0000000000..ff3efb7411 Binary files /dev/null and b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd.crc differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.crc deleted file mode 100644 index 4155311e5a..0000000000 Binary files a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.crc and /dev/null differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd new file mode 100644 index 0000000000..e03622879d Binary files /dev/null and b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-428429a4-e72e-454d-a0c4-7bc8ade5d7fd differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac deleted file mode 100644 index 4317a07a18..0000000000 Binary files a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac and /dev/null differ diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index a518120408..0d284c6a59 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -85,6 +85,9 @@ class BaseHailTableQuery(object): SORTS = { XPOS: lambda r: [r.xpos], + 'family_guid': lambda r: [ + hl.int(r.family_entries.find(hl.is_defined).first().familyGuid.first_match_in('(\d+)').first()) + ], } @classmethod @@ -1222,7 +1225,7 @@ def gene_counts(self): def _filter_variant_ids(self, ht, variant_ids): return ht - def lookup_variants(self, variant_ids): + def lookup_variants(self, variant_ids, additional_annotations=None): self._parse_intervals(intervals=None, variant_ids=variant_ids, variant_keys=variant_ids) ht = self._read_table('annotations.ht', drop_globals=['versions']) ht = self._filter_variant_ids(ht, variant_ids) @@ -1232,6 +1235,8 @@ def lookup_variants(self, variant_ids): k: v for k, v in self.annotation_fields(include_genotype_overrides=False).items() if k not in {FAMILY_GUID_FIELD, GENOTYPES_FIELD} } + if additional_annotations: + annotation_fields.update(additional_annotations) formatted = self._format_results(ht.key_by(), annotation_fields=annotation_fields, include_genotype_overrides=False) return formatted.aggregate(hl.agg.take(formatted.row, len(variant_ids))) @@ -1246,9 +1251,13 @@ def _get_variant_project_data(self, variant_id, **kwargs): return project_data[0] if project_data else {} def lookup_variant(self, variant_id, **kwargs): - variants = self.lookup_variants([variant_id]) + variants = self.lookup_variants([variant_id], additional_annotations=self._lookup_variant_annotations()) if not variants: raise HTTPNotFound() variant = dict(variants[0]) - variant.update(self._get_variant_project_data(variant_id, **kwargs)) + variant.update(self._get_variant_project_data(variant_id, variant=variant, **kwargs)) return variant + + @staticmethod + def _lookup_variant_annotations(): + return {} diff --git a/hail_search/queries/snv_indel.py b/hail_search/queries/snv_indel.py index d55eaf52a6..ed4e598aa1 100644 --- a/hail_search/queries/snv_indel.py +++ b/hail_search/queries/snv_indel.py @@ -2,7 +2,7 @@ import hail as hl from hail_search.constants import GENOME_VERSION_GRCh38, SCREEN_KEY, PREFILTER_FREQ_CUTOFF, ALPHAMISSENSE_SORT, \ - UTR_ANNOTATOR_KEY, EXTENDED_SPLICE_KEY, MOTIF_FEATURES_KEY, REGULATORY_FEATURES_KEY + UTR_ANNOTATOR_KEY, EXTENDED_SPLICE_KEY, MOTIF_FEATURES_KEY, REGULATORY_FEATURES_KEY, GENOME_VERSION_GRCh37 from hail_search.queries.base import BaseHailTableQuery, PredictionPath from hail_search.queries.snv_indel_37 import SnvIndelHailTableQuery37 @@ -12,6 +12,7 @@ class SnvIndelHailTableQuery(SnvIndelHailTableQuery37): GENOME_VERSION = GENOME_VERSION_GRCh38 + LIFT_GENOME_VERSION = GENOME_VERSION_GRCh37 PREDICTION_FIELDS_CONFIG = { **SnvIndelHailTableQuery37.PREDICTION_FIELDS_CONFIG, 'fathmm': PredictionPath('dbnsfp', 'fathmm_MKL_coding_score'), @@ -96,3 +97,7 @@ def _get_annotation_override_filters(self, ht, annotation_overrides): ) return annotation_filters + + @staticmethod + def _lookup_variant_annotations(): + return {'liftover_locus': lambda r: r.rg37_locus} diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py index bd0453b5a5..71323282b1 100644 --- a/hail_search/queries/snv_indel_37.py +++ b/hail_search/queries/snv_indel_37.py @@ -1,9 +1,10 @@ +from aiohttp.web import HTTPNotFound from collections import OrderedDict import hail as hl from hail_search.constants import CLINVAR_KEY, HGMD_KEY, HGMD_PATH_RANGES, \ GNOMAD_GENOMES_FIELD, PREFILTER_FREQ_CUTOFF, PATH_FREQ_OVERRIDE_CUTOFF, PATHOGENICTY_HGMD_SORT_KEY, \ - SPLICE_AI_FIELD, GENOME_VERSION_GRCh37 + SPLICE_AI_FIELD, GENOME_VERSION_GRCh37, GENOME_VERSION_GRCh38 from hail_search.queries.base import PredictionPath, QualityFilterFormat from hail_search.queries.mito import MitoHailTableQuery @@ -12,6 +13,7 @@ class SnvIndelHailTableQuery37(MitoHailTableQuery): DATA_TYPE = 'SNV_INDEL' GENOME_VERSION = GENOME_VERSION_GRCh37 + LIFT_GENOME_VERSION = GENOME_VERSION_GRCh38 GENOTYPE_FIELDS = {f.lower(): f for f in ['DP', 'GQ', 'AB']} QUALITY_FILTER_FORMAT = { @@ -133,3 +135,26 @@ def _get_annotation_override_filters(self, ht, annotation_overrides): @staticmethod def _stat_has_non_ref(s): return (s.het_samples > 0) | (s.hom_samples > 0) + + @staticmethod + def _lookup_variant_annotations(): + return {'liftover_locus': lambda r: r.rg38_locus} + + @classmethod + def _get_lifted_table_path(cls, path): + return f'{cls._get_table_dir(path)}/{cls.LIFT_GENOME_VERSION}/{cls.DATA_TYPE}/{path}' + + def _get_variant_project_data(self, variant_id, variant=None, **kwargs): + project_data = super()._get_variant_project_data(variant_id, **kwargs) + liftover_locus = variant.pop('liftover_locus') + if not liftover_locus: + return project_data + interval = hl.eval(hl.interval(liftover_locus, liftover_locus, includes_start=True, includes_end=True)) + self._load_table_kwargs['_intervals'] = [interval] + self._get_table_path = self._get_lifted_table_path + try: + lift_project_data = super()._get_variant_project_data(variant_id, **kwargs) + except HTTPNotFound: + return project_data + project_data['familyGenotypes'].update(lift_project_data['familyGenotypes']) + return project_data.annotate(liftedFamilyGuids=sorted(lift_project_data['familyGenotypes'].keys())) diff --git a/hail_search/test_search.py b/hail_search/test_search.py index 37cccbf4d9..f8c19ca847 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -14,9 +14,9 @@ EXPECTED_SAMPLE_DATA_WITH_SEX, SV_WGS_SAMPLE_DATA_WITH_SEX, VARIANT_LOOKUP_VARIANT, \ MULTI_PROJECT_SAMPLE_TYPES_SAMPLE_DATA, FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA, \ VARIANT1_BOTH_SAMPLE_TYPES, VARIANT2_BOTH_SAMPLE_TYPES, FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA_MISSING_PARENTAL_WGS, \ - VARIANT3_BOTH_SAMPLE_TYPES, VARIANT4_BOTH_SAMPLE_TYPES, VARIANT2_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, \ + VARIANT4_BOTH_SAMPLE_TYPES, VARIANT2_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, \ VARIANT1_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, VARIANT3_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, \ - VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY + VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, VARIANT4_WES_ONLY, VARIANT3_WES_ONLY from hail_search.web_app import init_web_app, sync_to_async_hail_query from hail_search.queries.base import BaseHailTableQuery @@ -370,10 +370,11 @@ async def test_both_sample_types_search(self): # Variant 1 is de novo in exome but inherited and homozygous in genome. # Variant 2 is inherited and homozygous in exome and de novo and homozygous in genome. - # Variant 3 is inherited in both sample types. Variant 4 is de novo in both sample types. + # Variant 3 is inherited in both sample types. + # Variant 4 is de novo in exome, but inherited in genome in the same parent that has variant 3. inheritance_mode = 'recessive' await self._assert_expected_search( - [VARIANT1_BOTH_SAMPLE_TYPES, VARIANT2_BOTH_SAMPLE_TYPES, [VARIANT3_BOTH_SAMPLE_TYPES, VARIANT4_BOTH_SAMPLE_TYPES]], + [VARIANT1_BOTH_SAMPLE_TYPES, VARIANT2_BOTH_SAMPLE_TYPES, [VARIANT3_WES_ONLY, VARIANT4_WES_ONLY]], sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA, inheritance_mode=inheritance_mode, **COMP_HET_ALL_PASS_FILTERS ) @@ -1241,6 +1242,12 @@ async def test_sort(self): sort_metadata={'ENSG00000177000': 3}, ) + await self._assert_expected_search( + [_sorted(MULTI_PROJECT_VARIANT1, [2]), _sorted(MULTI_PROJECT_VARIANT2, [2]), + _sorted(VARIANT3, [2]), _sorted(VARIANT4, [2]), _sorted(PROJECT_2_VARIANT, [11])], + sort='family_guid', sample_data=MULTI_PROJECT_SAMPLE_DATA, + ) + # size sort only applies to SVs, so has no impact on other variant await self._assert_expected_search( [_sorted(GCNV_VARIANT1, [-171766]), _sorted(GCNV_VARIANT2, [-17768]), _sorted(GCNV_VARIANT4, [-14487]), diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py index 56db8156e9..3cd33653d9 100644 --- a/hail_search/test_utils.py +++ b/hail_search/test_utils.py @@ -380,27 +380,19 @@ 'CAID': 'CA10960369', } -VARIANT3_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY = deepcopy(VARIANT3) -genotypes = VARIANT3_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY['genotypes'] -VARIANT3_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY['genotypes'] = { - 'I000004_hg00731': [ - genotypes['I000004_hg00731'], - {**genotypes['I000004_hg00731'], 'sampleType': 'WGS'} - ], +VARIANT3_WES_ONLY = deepcopy(VARIANT3) +genotypes = VARIANT3_WES_ONLY['genotypes'] +VARIANT3_WES_ONLY['genotypes'] = { + 'I000004_hg00731': [genotypes['I000004_hg00731']], 'I000005_hg00732': [genotypes['I000005_hg00732']], 'I000006_hg00733': [genotypes['I000006_hg00733']], } -VARIANT3_BOTH_SAMPLE_TYPES = deepcopy(VARIANT3_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY) -genotypes = VARIANT3_BOTH_SAMPLE_TYPES['genotypes'] -VARIANT3_BOTH_SAMPLE_TYPES['genotypes']['I000005_hg00732'] = [ - *genotypes['I000005_hg00732'], - {**genotypes['I000005_hg00732'][0], 'sampleType': 'WGS'} -] -VARIANT3_BOTH_SAMPLE_TYPES['genotypes']['I000006_hg00733'] = [ - *genotypes['I000006_hg00733'], - {**genotypes['I000006_hg00733'][0], 'sampleType': 'WGS'} -] +VARIANT3_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY = deepcopy(VARIANT3_WES_ONLY) +genotypes = VARIANT3_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY['genotypes'] +VARIANT3_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY['genotypes']['I000004_hg00731'].append( + {**genotypes['I000004_hg00731'][0], 'sampleType': 'WGS'} +) VARIANT4 = { 'variantId': '1-91511686-T-G', @@ -475,17 +467,20 @@ 'CAID': 'CA341062623', } -VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY = deepcopy(VARIANT4) -genotypes = VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY['genotypes'] -VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY['genotypes'] = { - 'I000004_hg00731': [ - genotypes['I000004_hg00731'], - {**genotypes['I000004_hg00731'], 'sampleType': 'WGS'} - ], +VARIANT4_WES_ONLY = deepcopy(VARIANT4) +genotypes = VARIANT4_WES_ONLY['genotypes'] +VARIANT4_WES_ONLY['genotypes'] = { + 'I000004_hg00731': [genotypes['I000004_hg00731']], 'I000005_hg00732': [genotypes['I000005_hg00732']], 'I000006_hg00733': [genotypes['I000006_hg00733']], } +VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY = deepcopy(VARIANT4_WES_ONLY) +genotypes = VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY['genotypes'] +VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY['genotypes']['I000004_hg00731'].append( + {**genotypes['I000004_hg00731'][0], 'sampleType': 'WGS'} +) + VARIANT4_BOTH_SAMPLE_TYPES = deepcopy(VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY) genotypes = VARIANT4_BOTH_SAMPLE_TYPES['genotypes'] VARIANT4_BOTH_SAMPLE_TYPES['genotypes']['I000005_hg00732'] = [ @@ -494,11 +489,12 @@ ] VARIANT4_BOTH_SAMPLE_TYPES['genotypes']['I000006_hg00733'] = [ *genotypes['I000006_hg00733'], - {**genotypes['I000006_hg00733'][0], 'sampleType': 'WGS'} + {**genotypes['I000006_hg00733'][0], 'numAlt': 2, 'sampleType': 'WGS'} ] VARIANT_LOOKUP_VARIANT = { **VARIANT1, + 'liftedFamilyGuids': ['F000014_14'], 'familyGenotypes': { VARIANT1['familyGuids'][0]: sorted([ {k: v for k, v in g.items() if k != 'individualGuid'} for g in VARIANT1['genotypes'].values() @@ -507,6 +503,10 @@ 'sampleId': 'NA20885', 'sampleType': 'WES', 'familyGuid': 'F000011_11', 'numAlt': 2, 'dp': 6, 'gq': 16, 'ab': 1.0, 'filters': [], }], + 'F000014_14': [{ + 'sampleId': 'NA21234', 'sampleType': 'WGS', 'familyGuid': 'F000014_14', + 'numAlt': 1, 'dp': 27, 'gq': 87, 'ab': 0.531000018119812, 'filters': None, + }], } } for k in {'familyGuids', 'genotypes'}: diff --git a/seqr/fixtures/variant_tag_types.json b/seqr/fixtures/variant_tag_types.json index 8eaae2bdcd..ddc6ed8fbd 100644 --- a/seqr/fixtures/variant_tag_types.json +++ b/seqr/fixtures/variant_tag_types.json @@ -480,7 +480,7 @@ "project": null, "name": "Submit to Clinvar", "category": "Data Sharing", - "description": "By selecting this tag, you are notifying CMG staff that this variant should be submitted to ClinVar. Generally, this is for pathogenic or likely pathogenic variants in known disease genes or for any benign or likely benign variants that are incorrectly annotated in ClinVar. Please also add a note that describes supporting evidence for how you interpreted this variant.", + "description": "", "color": "#8A62AE", "order": 25.0 } diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index 243bd092dd..1f66958f69 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -166,6 +166,11 @@ def _load_new_samples(cls, metadata_path, genome_version, dataset_type, run_vers failed_families_by_guid = {f['guid']: f for f in Family.objects.filter( guid__in={family for families in failed_family_samples.values() for family in families} ).values('guid', 'family_id', 'project__name')} + if failed_families_by_guid: + Family.bulk_update( + user=None, update_json={'analysis_status': Family.ANALYSIS_STATUS_LOADING_FAILED}, + guid__in=failed_families_by_guid, analysis_status=Family.ANALYSIS_STATUS_WAITING_FOR_DATA + ) failures_by_project_check = defaultdict(lambda: defaultdict(list)) for check, check_failures in failed_family_samples.items(): for family_guid, failure_data in check_failures.items(): diff --git a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py index 93d701a1f3..99803c3f3a 100644 --- a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py +++ b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py @@ -44,12 +44,12 @@ f'
Let us know if you have any questions.

All the best,
The seqr team' INTERNAL_TEXT_EMAIL = """Dear seqr user, -This is to notify you that 2 new WES samples have been loaded in seqr project Test Reprocessed Project +This is to notify you that data for 2 new WES samples has been loaded in seqr project Test Reprocessed Project All the best, The seqr team""" INTERNAL_HTML_EMAIL = f'Dear seqr user,

' \ - f'This is to notify you that 2 new WES samples have been loaded in seqr project ' \ + f'This is to notify you that data for 2 new WES samples has been loaded in seqr project ' \ f'Test Reprocessed Project' \ f'

All the best,
The seqr team' @@ -195,7 +195,7 @@ def mock_metadata_file(index): @mock.patch('seqr.utils.file_utils.os.path.isfile', lambda *args: True) @mock.patch('seqr.utils.search.hail_search_utils.HAIL_BACKEND_SERVICE_HOSTNAME', MOCK_HAIL_HOST) @mock.patch('seqr.views.utils.airtable_utils.AIRTABLE_URL', 'http://testairtable') -@mock.patch('seqr.utils.search.add_data_utils.BASE_URL', SEQR_URL) +@mock.patch('seqr.utils.communication_utils.BASE_URL', SEQR_URL) @mock.patch('seqr.utils.search.add_data_utils.SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL', 'anvil-data-loading') @mock.patch('seqr.utils.search.add_data_utils.SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL', 'seqr-data-loading') class CheckNewSamplesTest(AnvilAuthenticationTestCase): @@ -324,7 +324,7 @@ def test_command(self, mock_email, mock_airtable_utils): with self.assertRaises(CommandError) as ce: call_command('check_for_new_samples_from_pipeline', '--genome_version=GRCh37', '--dataset_type=MITO') self.assertEqual(str(ce.exception), 'No successful runs found for genome_version=GRCh37, dataset_type=MITO') - self.mock_glob.assert_called_with('/seqr/seqr-hail-search-data/GRCh37/MITO/runs/*/_SUCCESS') + self.mock_glob.assert_called_with('/seqr/seqr-hail-search-data/GRCh37/MITO/runs/*/_SUCCESS', recursive=False) self.mock_subprocess.assert_not_called() call_command('check_for_new_samples_from_pipeline') @@ -344,7 +344,7 @@ def test_command(self, mock_email, mock_airtable_utils): iter([json.dumps(METADATA_FILES[i])]) for i in range(len(local_files)) ] call_command('check_for_new_samples_from_pipeline') - self.mock_glob.assert_called_with('/seqr/seqr-hail-search-data/*/*/runs/*/_SUCCESS') + self.mock_glob.assert_called_with('/seqr/seqr-hail-search-data/*/*/runs/*/_SUCCESS', recursive=False) self.mock_open.assert_has_calls( [mock.call(path.replace('_SUCCESS', 'metadata.json'), 'r') for path in local_files], any_order=True) self.mock_subprocess.assert_not_called() @@ -462,6 +462,10 @@ def test_command(self, mock_email, mock_airtable_utils): {'analysis_status': 'I', 'analysis_status_last_modified_date': None}, {'analysis_status': 'I', 'analysis_status_last_modified_date': None}, ]) + self.assertSetEqual( + set(Family.objects.filter(guid__in=['F000001_1', 'F000002_2', 'F000003_3']).values_list('analysis_status', flat=True)), + {'F'}, + ) self.assertEqual(Family.objects.get(guid='F000014_14').analysis_status, 'Rncc') # Test airtable PDO updates @@ -544,11 +548,11 @@ def test_command(self, mock_email, mock_airtable_utils): self.mock_send_slack.assert_has_calls([ mock.call( 'seqr-data-loading', - f'2 new WES samples are loaded in {SEQR_URL}project/{PROJECT_GUID}/project_page\n```NA20888, NA20889```', + f'2 new WES samples are loaded in <{SEQR_URL}project/{PROJECT_GUID}/project_page|Test Reprocessed Project>\n```NA20888, NA20889```', ), mock.call( 'anvil-data-loading', - f'1 new WES samples are loaded in {SEQR_URL}project/{EXTERNAL_PROJECT_GUID}/project_page', + f'1 new WES samples are loaded in <{SEQR_URL}project/{EXTERNAL_PROJECT_GUID}/project_page|Non-Analyst Project>', ), mock.call( 'seqr_loading_notifications', @@ -588,10 +592,10 @@ def test_command(self, mock_email, mock_airtable_utils): ), mock.call( 'seqr-data-loading', - f'1 new WES SV samples are loaded in {SEQR_URL}project/R0001_1kg/project_page\n```NA20872```', + f'1 new WES SV samples are loaded in <{SEQR_URL}project/R0001_1kg/project_page|1kg project nåme with uniçøde>\n```NA20872```', ), mock.call( 'seqr-data-loading', - f'1 new WES SV samples are loaded in {SEQR_URL}project/{PROJECT_GUID}/project_page\n```NA20889```', + f'1 new WES SV samples are loaded in <{SEQR_URL}project/{PROJECT_GUID}/project_page|Test Reprocessed Project>\n```NA20889```', ), ]) diff --git a/seqr/migrations/0079_alter_family_analysis_status.py b/seqr/migrations/0079_alter_family_analysis_status.py new file mode 100644 index 0000000000..f8922c0148 --- /dev/null +++ b/seqr/migrations/0079_alter_family_analysis_status.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.16 on 2025-01-03 19:55 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('seqr', '0078_rename_submit_to_clinvar_variantnote_report'), + ] + + operations = [ + migrations.AlterField( + model_name='family', + name='analysis_status', + field=models.CharField(choices=[('S', 'S'), ('S_kgfp', 'S'), ('S_kgdp', 'S'), ('S_ng', 'S'), ('ES', 'E'), ('Sc_kgfp', 'S'), ('Sc_kgdp', 'S'), ('Sc_ng', 'S'), ('Rcpc', 'R'), ('Rncc', 'R'), ('C', 'C'), ('PB', 'P'), ('P', 'P'), ('I', 'A'), ('Q', 'W'), ('F', 'L'), ('N', 'N')], default='Q', max_length=10), + ), + ] diff --git a/seqr/models.py b/seqr/models.py index 1dd9135acc..b52c4a8cca 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -286,6 +286,7 @@ class Family(ModelWithGUID): ANALYSIS_STATUS_PARTIAL_SOLVE = 'P' ANALYSIS_STATUS_PROBABLE_SOLVE = 'PB' ANALYSIS_STATUS_WAITING_FOR_DATA='Q' + ANALYSIS_STATUS_LOADING_FAILED = 'F' SOLVED_ANALYSIS_STATUS_CHOICES = ( ('S', 'Solved'), ('S_kgfp', 'Solved - known gene for phenotype'), @@ -308,6 +309,7 @@ class Family(ModelWithGUID): (ANALYSIS_STATUS_PARTIAL_SOLVE, 'Partial Solve - Analysis in Progress'), (ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS, 'Analysis in Progress'), (ANALYSIS_STATUS_WAITING_FOR_DATA, 'Waiting for data'), + (ANALYSIS_STATUS_LOADING_FAILED, 'Loading failed'), ('N', 'No data expected'), ) SOLVED_ANALYSIS_STATUSES = [status for status, _ in SOLVED_ANALYSIS_STATUS_CHOICES] diff --git a/seqr/urls.py b/seqr/urls.py index 296d496880..126ea39153 100644 --- a/seqr/urls.py +++ b/seqr/urls.py @@ -122,7 +122,7 @@ from seqr.views.apis.data_manager_api import elasticsearch_status, upload_qc_pipeline_output, delete_index, \ update_rna_seq, load_rna_seq_sample_data, proxy_to_kibana, load_phenotype_prioritization_data, \ - validate_callset, get_loaded_projects, load_data, proxy_to_luigi + validate_callset, get_loaded_projects, load_data, loading_vcfs, proxy_to_luigi from seqr.views.apis.report_api import \ anvil_export, \ family_metadata, \ @@ -332,6 +332,7 @@ 'data_management/update_rna_seq': update_rna_seq, 'data_management/load_rna_seq_sample/(?P[^/]+)': load_rna_seq_sample_data, 'data_management/load_phenotype_prioritization_data': load_phenotype_prioritization_data, + 'data_management/loading_vcfs': loading_vcfs, 'data_management/validate_callset': validate_callset, 'data_management/loaded_projects/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)': get_loaded_projects, 'data_management/load_data': load_data, diff --git a/seqr/utils/communication_utils.py b/seqr/utils/communication_utils.py index 14a6d24aa4..5df9d32825 100644 --- a/seqr/utils/communication_utils.py +++ b/seqr/utils/communication_utils.py @@ -7,6 +7,7 @@ from notifications.signals import notify BASE_EMAIL_TEMPLATE = 'Dear seqr user,\n\n{}\n\nAll the best,\nThe seqr team' +EMAIL_MESSAGE_TEMPLATE = 'This is to notify you that data for {notification} has been loaded in seqr project {project_link}' logger = logging.getLogger(__name__) @@ -55,9 +56,16 @@ def send_html_email(email_body, process_message=None, **kwargs): email_message.send() -def send_project_notification(project, notification, email, subject): +def send_project_notification(project, notification, subject, email_template=None, slack_channel=None, slack_detail=None): users = project.subscribers.user_set.all() - notify.send(project, recipient=users, verb=notification) + notify.send(project, recipient=users, verb=f'Loaded {notification}') + + url = f'{BASE_URL}project/{project.guid}/project_page' + + email = (email_template or EMAIL_MESSAGE_TEMPLATE).format( + notification=notification, + project_link=f'{project.name}', + ) email_kwargs = dict( email_body=BASE_EMAIL_TEMPLATE.format(email), to=list(users.values_list('email', flat=True)), @@ -69,6 +77,14 @@ def send_project_notification(project, notification, email, subject): except Exception as e: logger.error(f'Error sending project email for {project.guid}: {e}', extra={'detail': email_kwargs}) + if slack_channel: + slack_message = f'{notification} are loaded in <{url}|{project.name}>' + if slack_detail: + slack_message += f'\n```{slack_detail}```' + safe_post_to_slack(slack_channel, slack_message) + + return url + def _set_bulk_notification_stream(message): set_email_message_stream(message, 'seqr-notifications') diff --git a/seqr/utils/file_utils.py b/seqr/utils/file_utils.py index 76e1258da0..474974d330 100644 --- a/seqr/utils/file_utils.py +++ b/seqr/utils/file_utils.py @@ -48,10 +48,12 @@ def does_file_exist(file_path, user=None): return os.path.isfile(file_path) -def list_files(wildcard_path, user): +def list_files(wildcard_path, user, check_subfolders=False, allow_missing=True): + if check_subfolders: + wildcard_path = f'{wildcard_path.rstrip("/")}/**' if is_google_bucket_file_path(wildcard_path): - return get_gs_file_list(wildcard_path, user, check_subfolders=False, allow_missing=True) - return [file_path for file_path in glob.glob(wildcard_path) if os.path.isfile(file_path)] + return _get_gs_file_list(wildcard_path, user, check_subfolders, allow_missing) + return [file_path for file_path in glob.glob(wildcard_path, recursive=check_subfolders) if os.path.isfile(file_path)] def file_iter(file_path, byte_range=None, raw_content=False, user=None, **kwargs): @@ -91,16 +93,15 @@ def mv_file_to_gs(local_path, gs_path, user=None): run_gsutil_with_wait(command, gs_path, user) -def get_gs_file_list(gs_path, user=None, check_subfolders=True, allow_missing=False): +def _get_gs_file_list(gs_path, user, check_subfolders, allow_missing): gs_path = gs_path.rstrip('/') command = 'ls' if check_subfolders: # If a bucket is empty gsutil throws an error when running ls with ** instead of returning an empty list - subfolders = _run_gsutil_with_stdout(command, gs_path, user) + subfolders = _run_gsutil_with_stdout(command, gs_path.replace('/**', ''), user) if not subfolders: return [] - gs_path = f'{gs_path}/**' all_lines = _run_gsutil_with_stdout(command, gs_path, user, allow_missing=allow_missing) return [line for line in all_lines if is_google_bucket_file_path(line)] diff --git a/seqr/utils/file_utils_tests.py b/seqr/utils/file_utils_tests.py index 32a7bbcb91..86ec92523b 100644 --- a/seqr/utils/file_utils_tests.py +++ b/seqr/utils/file_utils_tests.py @@ -1,7 +1,7 @@ import mock from unittest import TestCase -from seqr.utils.file_utils import mv_file_to_gs, get_gs_file_list +from seqr.utils.file_utils import mv_file_to_gs class FileUtilsTest(TestCase): @@ -30,32 +30,3 @@ def test_mv_file_to_gs(self, mock_logger, mock_subproc): mock_subproc.Popen.assert_called_with('gsutil mv /temp_path gs://bucket/target_path', stdout=mock_subproc.PIPE, stderr=mock_subproc.STDOUT, shell=True) # nosec mock_logger.info.assert_called_with('==> gsutil mv /temp_path gs://bucket/target_path', None) process.wait.assert_called_with() - - @mock.patch('seqr.utils.file_utils.subprocess') - @mock.patch('seqr.utils.file_utils.logger') - def test_get_gs_file_list(self, mock_logger, mock_subproc): - with self.assertRaises(Exception) as ee: - get_gs_file_list('/temp_path') - self.assertEqual(str(ee.exception), 'A Google Storage path is expected.') - - process = mock_subproc.Popen.return_value - process.communicate.return_value = b'', b'-bash: gsutil: command not found.\nPlease check the path.\n' - with self.assertRaises(Exception) as ee: - get_gs_file_list('gs://bucket/target_path/', user=None) - self.assertEqual(str(ee.exception), 'Run command failed: -bash: gsutil: command not found. Please check the path.') - mock_subproc.Popen.assert_called_with('gsutil ls gs://bucket/target_path', stdout=mock_subproc.PIPE, - stderr=mock_subproc.PIPE, shell=True) # nosec - mock_logger.info.assert_called_with('==> gsutil ls gs://bucket/target_path', None) - process.communicate.assert_called_with() - - mock_subproc.reset_mock() - mock_logger.reset_mock() - process.communicate.return_value = b'\n\nUpdates are available for some Cloud SDK components. To install them,\n' \ - b'please run:\n $ gcloud components update\ngs://bucket/target_path/id_file.txt\n' \ - b'gs://bucket/target_path/data.vcf.gz\n', b'' - file_list = get_gs_file_list('gs://bucket/target_path', user=None) - mock_subproc.Popen.assert_called_with('gsutil ls gs://bucket/target_path/**', stdout=mock_subproc.PIPE, - stderr=mock_subproc.PIPE, shell=True) # nosec - mock_logger.info.assert_called_with('==> gsutil ls gs://bucket/target_path/**', None) - process.communicate.assert_called_with() - self.assertEqual(file_list, ['gs://bucket/target_path/id_file.txt', 'gs://bucket/target_path/data.vcf.gz']) diff --git a/seqr/utils/search/add_data_utils.py b/seqr/utils/search/add_data_utils.py index 2864eb3823..c54389cbd2 100644 --- a/seqr/utils/search/add_data_utils.py +++ b/seqr/utils/search/add_data_utils.py @@ -4,7 +4,7 @@ from reference_data.models import GENOME_VERSION_LOOKUP from seqr.models import Sample, Individual, Project -from seqr.utils.communication_utils import send_project_notification, safe_post_to_slack +from seqr.utils.communication_utils import send_project_notification from seqr.utils.logging_utils import SeqrLogger from seqr.utils.middleware import ErrorsWarningsException from seqr.utils.search.utils import backend_specific_call @@ -13,7 +13,7 @@ from seqr.views.utils.dataset_utils import match_and_update_search_samples, load_mapping_file from seqr.views.utils.export_utils import write_multiple_files from seqr.views.utils.pedigree_info_utils import get_no_affected_families -from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, BASE_URL, ANVIL_UI_URL, \ +from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, ANVIL_UI_URL, \ SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL logger = SeqrLogger(__name__) @@ -58,55 +58,43 @@ def add_new_es_search_samples(request_json, project, user, notify=False, expecte return inactivated_sample_guids, updated_family_guids, updated_samples -def _format_email(sample_summary, project_link, *args): - return f'This is to notify you that {sample_summary} have been loaded in seqr project {project_link}' - - -def _basic_notify_search_data_loaded(project, dataset_type, sample_type, inactivated_sample_guids, updated_samples, format_email=_format_email): +def _basic_notify_search_data_loaded(project, dataset_type, sample_type, inactivated_sample_guids, updated_samples, format_email=None, slack_channel=None, include_slack_detail=False): previous_loaded_individuals = set(Sample.objects.filter(guid__in=inactivated_sample_guids).values_list('individual_id', flat=True)) new_sample_ids = [sample['sample_id'] for sample in updated_samples if sample['individual_id'] not in previous_loaded_individuals] - url = f'{BASE_URL}project/{project.guid}/project_page' msg_dataset_type = '' if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS else f' {dataset_type}' num_new_samples = len(new_sample_ids) sample_summary = f'{num_new_samples} new {sample_type}{msg_dataset_type} samples' - project_link = f'{project.name}' - email = format_email(sample_summary, project_link, num_new_samples) - - send_project_notification( + return send_project_notification( project, - notification=f'Loaded {sample_summary}', - email=email, + notification=sample_summary, + email_template=format_email(num_new_samples) if format_email else None, subject='New data available in seqr', + slack_channel=slack_channel, + slack_detail=', '.join(sorted(new_sample_ids)) if include_slack_detail else None, ) - return sample_summary, new_sample_ids, url - def notify_search_data_loaded(project, is_internal, dataset_type, sample_type, inactivated_sample_guids, updated_samples, num_samples): if is_internal: - format_email = _format_email + format_email = None else: workspace_name = f'{project.workspace_namespace}/{project.workspace_name}' - def format_email(sample_summary, project_link, num_new_samples): + def format_email(num_new_samples): reload_summary = f' and {num_samples - num_new_samples} re-loaded samples' if num_samples > num_new_samples else '' return '\n'.join([ f'We are following up on the request to load data from AnVIL on {project.created_date.date().strftime("%B %d, %Y")}.', - f'We have loaded {sample_summary}{reload_summary} from the AnVIL workspace {workspace_name} to the corresponding seqr project {project_link}.', + f'We have loaded {{notification}}{reload_summary} from the AnVIL workspace {workspace_name} to the corresponding seqr project {{project_link}}.', 'Let us know if you have any questions.', ]) - sample_summary, new_sample_ids, url = _basic_notify_search_data_loaded( + url = _basic_notify_search_data_loaded( project, dataset_type, sample_type, inactivated_sample_guids, updated_samples, format_email=format_email, + slack_channel=SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL if is_internal else SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL, + include_slack_detail=is_internal, ) - sample_id_list = f'\n```{", ".join(sorted(new_sample_ids))}```' if is_internal else '' - summary_message = f'{sample_summary} are loaded in {url}{sample_id_list}' - safe_post_to_slack( - SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL if is_internal else SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL, - summary_message) - if not is_internal: AirtableSession(user=None, base=AirtableSession.ANVIL_BASE, no_auth=True).safe_patch_records( ANVIL_REQUEST_TRACKING_TABLE, max_records=1, diff --git a/seqr/utils/vcf_utils.py b/seqr/utils/vcf_utils.py index 7a421db930..ad5bf1b856 100644 --- a/seqr/utils/vcf_utils.py +++ b/seqr/utils/vcf_utils.py @@ -1,3 +1,4 @@ +import os import re from collections import defaultdict @@ -107,3 +108,28 @@ def validate_vcf_exists(data_path, user, path_name=None, allowed_exts=None): raise ErrorsWarningsException(['Data file or path {} is not found.'.format(path_name or data_path)]) return file_to_check + + +def get_vcf_list(data_path, user): + file_list = list_files(data_path, user, check_subfolders=True, allow_missing=False) + data_path_list = [path.replace(data_path, '') for path in file_list if path.endswith(VCF_FILE_EXTENSIONS)] + return _merge_sharded_vcf(data_path_list) + + +def _merge_sharded_vcf(vcf_files): + files_by_path = defaultdict(list) + + for vcf_file in vcf_files: + subfolder_path, file = vcf_file.rsplit('/', 1) + files_by_path[subfolder_path].append(file) + + # discover the sharded VCF files in each folder, replace the sharded VCF files with a single path with '*' + for subfolder_path, files in files_by_path.items(): + if len(files) < 2: + continue + prefix = os.path.commonprefix(files) + suffix = re.fullmatch(r'{}\d*(?P\D.*)'.format(prefix), files[0]).groupdict()['suffix'] + if all([re.fullmatch(r'{}\d+{}'.format(prefix, suffix), file) for file in files]): + files_by_path[subfolder_path] = [f'{prefix}*{suffix}'] + + return [f'{path}/{file}' for path, files in files_by_path.items() for file in files] diff --git a/seqr/views/apis/anvil_workspace_api.py b/seqr/views/apis/anvil_workspace_api.py index 1791cdc295..f1cf52f371 100644 --- a/seqr/views/apis/anvil_workspace_api.py +++ b/seqr/views/apis/anvil_workspace_api.py @@ -1,8 +1,6 @@ """APIs for management of projects related to AnVIL workspaces.""" import json import time -import os -import re from datetime import datetime from functools import wraps from collections import defaultdict @@ -16,7 +14,6 @@ from seqr.models import Project, CAN_EDIT, Sample, Individual, IgvSample from seqr.views.react_app import render_app_html from seqr.views.utils.airtable_utils import AirtableSession, ANVIL_REQUEST_TRACKING_TABLE -from seqr.utils.search.constants import VCF_FILE_EXTENSIONS from seqr.utils.search.utils import get_search_samples from seqr.views.utils.airflow_utils import trigger_airflow_data_loading from seqr.views.utils.json_to_orm_utils import create_model_from_json @@ -27,8 +24,8 @@ from seqr.views.utils.pedigree_info_utils import parse_basic_pedigree_table, JsonConstants from seqr.views.utils.individual_utils import add_or_update_individuals_and_families from seqr.utils.communication_utils import send_html_email -from seqr.utils.file_utils import get_gs_file_list -from seqr.utils.vcf_utils import validate_vcf_and_get_samples, validate_vcf_exists +from seqr.utils.file_utils import list_files +from seqr.utils.vcf_utils import validate_vcf_and_get_samples, validate_vcf_exists, get_vcf_list from seqr.utils.logging_utils import SeqrLogger from seqr.utils.middleware import ErrorsWarningsException from seqr.views.utils.permissions_utils import is_anvil_authenticated, check_workspace_perm, login_and_policies_required @@ -109,24 +106,23 @@ def grant_workspace_access(request, namespace, name): return create_json_response({'success': True}) -def _get_workspace_files(request, namespace, name, workspace_meta): +def _get_workspace_bucket(namespace, name, workspace_meta): bucket_name = workspace_meta['workspace']['bucketName'] - bucket_path = 'gs://{bucket}'.format(bucket=bucket_name.rstrip('/')) - return bucket_path, get_gs_file_list(bucket_path, request.user) + return 'gs://{bucket}'.format(bucket=bucket_name.rstrip('/')) @anvil_workspace_access_required(meta_fields=['workspace.bucketName']) -def get_anvil_vcf_list(*args): - bucket_path, file_list = _get_workspace_files(*args) - data_path_list = [path.replace(bucket_path, '') for path in file_list if path.endswith(VCF_FILE_EXTENSIONS)] - data_path_list = _merge_sharded_vcf(data_path_list) +def get_anvil_vcf_list(request, *args): + bucket_path = _get_workspace_bucket(*args) + data_path_list = get_vcf_list(bucket_path, request.user) return create_json_response({'dataPathList': data_path_list}) @anvil_workspace_access_required(meta_fields=['workspace.bucketName']) -def get_anvil_igv_options(*args): - bucket_path, file_list = _get_workspace_files(*args) +def get_anvil_igv_options(request, *args): + bucket_path = _get_workspace_bucket(*args) + file_list = list_files(bucket_path, request.user, check_subfolders=True, allow_missing=False) igv_options = [ {'name': path.replace(bucket_path, ''), 'value': path} for path in file_list if path.endswith(IgvSample.SAMPLE_TYPE_FILE_EXTENSIONS[IgvSample.SAMPLE_TYPE_ALIGNMENT]) @@ -340,22 +336,3 @@ def _wait_for_service_account_access(user, namespace, name): def _get_seqr_project_url(project): return f'{BASE_URL}project/{project.guid}/project_page' - - -def _merge_sharded_vcf(vcf_files): - files_by_path = defaultdict(list) - - for vcf_file in vcf_files: - subfolder_path, file = vcf_file.rsplit('/', 1) - files_by_path[subfolder_path].append(file) - - # discover the sharded VCF files in each folder, replace the sharded VCF files with a single path with '*' - for subfolder_path, files in files_by_path.items(): - if len(files) < 2: - continue - prefix = os.path.commonprefix(files) - suffix = re.fullmatch(r'{}\d*(?P\D.*)'.format(prefix), files[0]).groupdict()['suffix'] - if all([re.fullmatch(r'{}\d+{}'.format(prefix, suffix), file) for file in files]): - files_by_path[subfolder_path] = [f'{prefix}*{suffix}'] - - return [f'{path}/{file}' for path, files in files_by_path.items() for file in files] diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py index 928fb22840..07c348e5f5 100644 --- a/seqr/views/apis/anvil_workspace_api_tests.py +++ b/seqr/views/apis/anvil_workspace_api_tests.py @@ -443,6 +443,12 @@ def _test_get_workspace_files(self, url, response_key, expected_files, mock_subp .format(TEST_WORKSPACE_NAMESPACE, TEST_WORKSPACE_NAME1), self.collaborator_user) + # Test gsutil error + mock_subprocess.return_value.communicate.return_value = b'', b'-bash: gsutil: command not found.\nPlease check the path.\n' + response = self.client.get(url, content_type='application/json') + self.assertEqual(response.status_code, 500) + self.assertEqual(response.json()['error'], 'Run command failed: -bash: gsutil: command not found. Please check the path.') + # Test empty bucket mock_subprocess.return_value.communicate.return_value = b'', None response = self.client.get(url, content_type='application/json') diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 71a93b96d3..34bdc10cd6 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -9,6 +9,7 @@ import urllib3 from django.contrib.postgres.aggregates import ArrayAgg +from django.core.exceptions import PermissionDenied from django.db.models import Max, F, Q, Count from django.http.response import HttpResponse from django.views.decorators.csrf import csrf_exempt @@ -20,7 +21,7 @@ from seqr.utils.file_utils import file_iter, does_file_exist from seqr.utils.logging_utils import SeqrLogger from seqr.utils.middleware import ErrorsWarningsException -from seqr.utils.vcf_utils import validate_vcf_exists +from seqr.utils.vcf_utils import validate_vcf_exists, get_vcf_list from seqr.views.utils.airflow_utils import trigger_airflow_data_loading from seqr.views.utils.airtable_utils import AirtableSession, LOADABLE_PDO_STATUSES, AVAILABLE_PDO_STATUS @@ -30,6 +31,7 @@ from seqr.views.utils.json_utils import create_json_response from seqr.views.utils.json_to_orm_utils import update_model_from_json from seqr.views.utils.permissions_utils import data_manager_required, pm_or_data_manager_required, get_internal_projects +from seqr.views.utils.terra_api_utils import anvil_enabled from seqr.models import Sample, RnaSample, Individual, Project, PhenotypePrioritization @@ -346,16 +348,9 @@ def load_rna_seq_sample_data(request, sample_guid): def _notify_phenotype_prioritization_loaded(project, tool, num_samples): - url = f'{BASE_URL}project/{project.guid}/project_page' - project_link = f'{project.name}' - email = ( - f'This is to notify you that {tool.title()} data for {num_samples} sample(s) ' - f'has been loaded in seqr project {project_link}' - ) send_project_notification( project, - notification=f'Loaded {num_samples} {tool.title()} sample(s)', - email=email, + notification=f'{num_samples} {tool.title()} sample(s)', subject=f'New {tool.title()} data available in seqr', ) @@ -445,6 +440,15 @@ def load_phenotype_prioritization_data(request): } +@pm_or_data_manager_required +def loading_vcfs(request): + if anvil_enabled(): + raise PermissionDenied() + return create_json_response({ + 'vcfs': get_vcf_list(LOADING_DATASETS_DIR, request.user), + }) + + @pm_or_data_manager_required def validate_callset(request): request_json = json.loads(request.body) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index b07f6c82e9..86b5f0eb17 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -8,7 +8,7 @@ from seqr.utils.communication_utils import _set_bulk_notification_stream from seqr.views.apis.data_manager_api import elasticsearch_status, upload_qc_pipeline_output, delete_index, \ - update_rna_seq, load_rna_seq_sample_data, load_phenotype_prioritization_data, validate_callset, \ + update_rna_seq, load_rna_seq_sample_data, load_phenotype_prioritization_data, validate_callset, loading_vcfs, \ get_loaded_projects, load_data from seqr.views.utils.orm_to_json_utils import _get_json_for_models from seqr.views.utils.test_utils import AuthenticationTestCase, AirflowTestCase, AirtableTest @@ -936,11 +936,11 @@ def test_update_rna_tpm(self, *args, **kwargs): def test_update_rna_splice_outlier(self, *args, **kwargs): self._test_update_rna_seq('splice_outlier', *args, **kwargs) - @mock.patch('seqr.views.utils.dataset_utils.BASE_URL', 'https://test-seqr.org/') + @mock.patch('seqr.utils.communication_utils.BASE_URL', 'https://test-seqr.org/') @mock.patch('seqr.views.utils.dataset_utils.SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL', 'seqr-data-loading') @mock.patch('seqr.views.utils.file_utils.tempfile.gettempdir', lambda: 'tmp/') @mock.patch('seqr.utils.communication_utils.send_html_email') - @mock.patch('seqr.views.utils.dataset_utils.safe_post_to_slack') + @mock.patch('seqr.utils.communication_utils.safe_post_to_slack') @mock.patch('seqr.views.apis.data_manager_api.datetime') @mock.patch('seqr.views.apis.data_manager_api.os.mkdir') @mock.patch('seqr.views.apis.data_manager_api.os.rename') @@ -1099,10 +1099,10 @@ def _test_basic_data_loading(data, num_parsed_samples, num_loaded_samples, new_s mock_send_slack.assert_has_calls([ mock.call( 'seqr-data-loading', - f'0 new RNA {params["message_data_type"]} samples are loaded in \n``````', + f'0 new RNA {params["message_data_type"]} sample(s) are loaded in ', ), mock.call( 'seqr-data-loading', - f'1 new RNA {params["message_data_type"]} samples are loaded in \n```NA20888```', + f'1 new RNA {params["message_data_type"]} sample(s) are loaded in \n```NA20888```', ), ]) self.assertEqual(mock_send_email.call_count, 2) @@ -1262,7 +1262,7 @@ def test_load_rna_seq_sample_data(self): def _join_data(cls, data): return ['\t'.join(line).encode('utf-8') for line in data] - @mock.patch('seqr.views.apis.data_manager_api.BASE_URL', 'https://test-seqr.org/') + @mock.patch('seqr.utils.communication_utils.BASE_URL', 'https://test-seqr.org/') @mock.patch('seqr.models.random') @mock.patch('seqr.utils.communication_utils.send_html_email') @mock.patch('seqr.utils.file_utils.subprocess.Popen') @@ -1345,10 +1345,10 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess, mock_send_ema self.assertListEqual(saved_data, EXPECTED_LIRICAL_DATA) mock_subprocess.assert_called_with('gsutil cat gs://seqr_data/lirical_data.tsv.gz | gunzip -c -q - ', stdout=-1, stderr=-2, shell=True) # nosec self._assert_expected_notifications(mock_send_email, [ - {'data_type': 'Lirical', 'user': self.data_manager_user, 'email_body': 'Lirical data for 1 sample(s)'}, - {'data_type': 'Lirical', 'user': self.data_manager_user, 'email_body': 'Lirical data for 1 sample(s)', + {'data_type': 'Lirical', 'user': self.data_manager_user, 'email_body': 'data for 1 Lirical sample(s)'}, + {'data_type': 'Lirical', 'user': self.data_manager_user, 'email_body': 'data for 1 Lirical sample(s)', 'project_guid': 'R0003_test', 'project_name': 'Test Reprocessed Project'} - ], has_html=True) + ]) # Test uploading new data self.reset_logs() @@ -1376,17 +1376,17 @@ def test_load_phenotype_prioritization_data(self, mock_subprocess, mock_send_ema nested_fields=[{'fields': ('individual', 'guid'), 'key': 'individualGuid'}]) self.assertListEqual(saved_data, EXPECTED_UPDATED_LIRICAL_DATA) self._assert_expected_notifications(mock_send_email, [ - {'data_type': 'Lirical', 'user': self.data_manager_user, 'email_body': 'Lirical data for 2 sample(s)'}, - ], has_html=True) + {'data_type': 'Lirical', 'user': self.data_manager_user, 'email_body': 'data for 2 Lirical sample(s)'}, + ]) @staticmethod - def _assert_expected_notifications(mock_send_email, expected_notifs: list[dict], has_html=False): + def _assert_expected_notifications(mock_send_email, expected_notifs: list[dict]): calls = [] for notif_dict in expected_notifs: project_guid = notif_dict.get('project_guid', PROJECT_GUID) project_name = notif_dict.get('project_name', '1kg project nåme with uniçøde') url = f'https://test-seqr.org/project/{project_guid}/project_page' - project_link = f'{project_name}' if has_html else f'<{url}|{project_name}>' + project_link = f'{project_name}' expected_email_body = ( f'Dear seqr user,\n\nThis is to notify you that {notif_dict["email_body"]} ' f'has been loaded in seqr project {project_link}\n\nAll the best,\nThe seqr team' @@ -1401,6 +1401,32 @@ def _assert_expected_notifications(mock_send_email, expected_notifs: list[dict], ) mock_send_email.assert_has_calls(calls) + @mock.patch('seqr.utils.file_utils.os.path.isfile', lambda *args: True) + @mock.patch('seqr.utils.file_utils.glob.glob') + def test_loading_vcfs(self, mock_glob): + url = reverse(loading_vcfs) + self.check_pm_login(url) + + mock_glob.return_value = [] + response = self.client.get(url, content_type='application/json') + self._test_expected_vcf_responses(response, mock_glob, url) + + def _test_expected_vcf_responses(self, response, mock_glob, url): + self.assertEqual(response.status_code, 200) + self.assertDictEqual(response.json(), {'vcfs': []}) + mock_glob.assert_called_with('/local_datasets/**', recursive=True) + + mock_glob.return_value = ['/local_datasets/sharded_vcf/part001.vcf', '/local_datasets/sharded_vcf/part002.vcf', '/local_datasets/test.vcf.gz'] + response = self.client.get(url, content_type='application/json') + self.assertEqual(response.status_code, 200) + self.assertDictEqual(response.json(), {'vcfs': ['/sharded_vcf/part00*.vcf', '/test.vcf.gz']}) + mock_glob.assert_called_with('/local_datasets/**', recursive=True) + + # test data manager access + self.login_data_manager_user() + response = self.client.get(url, content_type='application/json') + self.assertEqual(response.status_code, 200) + @mock.patch('seqr.utils.file_utils.os.path.isfile') @mock.patch('seqr.utils.file_utils.glob.glob') @mock.patch('seqr.utils.file_utils.subprocess.Popen') @@ -1928,3 +1954,6 @@ def _assert_write_pedigree_error(self, response): def _test_no_affected_family(self, url, body): # Sample ID filtering skips the unaffected family pass + + def _test_expected_vcf_responses(self, response, mock_glob, url): + self.assertEqual(response.status_code, 403) diff --git a/seqr/views/apis/dataset_api_tests.py b/seqr/views/apis/dataset_api_tests.py index 721018a6f6..a3a88e6ad8 100644 --- a/seqr/views/apis/dataset_api_tests.py +++ b/seqr/views/apis/dataset_api_tests.py @@ -48,7 +48,7 @@ class DatasetAPITest(object): @mock.patch('seqr.models.random.randint') @mock.patch('seqr.utils.communication_utils.logger') @mock.patch('seqr.utils.communication_utils.send_html_email') - @mock.patch('seqr.utils.search.add_data_utils.BASE_URL', 'https://seqr.broadinstitute.org/') + @mock.patch('seqr.utils.communication_utils.BASE_URL', 'https://seqr.broadinstitute.org/') @urllib3_responses.activate def test_add_variants_dataset(self, mock_send_email, mock_logger, mock_random): url = reverse(add_variants_dataset_handler, args=[PROJECT_GUID]) @@ -269,7 +269,7 @@ def _assert_expected_notification(self, mock_send_email, sample_type, count, ema project_guid=PROJECT_GUID, project_name='1kg project nåme with uniçøde', recipient='test_user_manager@test.com'): if not email_content: - email_content = f'This is to notify you that {count} new {sample_type} samples have been loaded in seqr project {project_name}' + email_content = f'This is to notify you that data for {count} new {sample_type} samples has been loaded in seqr project {project_name}' mock_send_email.assert_called_once_with( email_body=f'Dear seqr user,\n\n{email_content}\n\nAll the best,\nThe seqr team', subject='New data available in seqr', to=[recipient], process_message=mock.ANY, diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 428e6265fd..1a97c3581f 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -1,7 +1,7 @@ from collections import defaultdict from datetime import datetime, timedelta -from django.db.models import Count, Q, Value +from django.db.models import Count, Q, F, Value from django.contrib.postgres.aggregates import ArrayAgg import json import re @@ -17,12 +17,13 @@ EXPERIMENT_TABLE, EXPERIMENT_LOOKUP_TABLE, FINDINGS_TABLE, GENE_COLUMN, FAMILY_INDIVIDUAL_FIELDS from seqr.views.utils.export_utils import export_multiple_files, write_multiple_files from seqr.views.utils.json_utils import create_json_response +from seqr.views.utils.orm_to_json_utils import get_json_for_queryset from seqr.views.utils.permissions_utils import user_is_analyst, get_project_and_check_permissions, \ get_project_guids_user_can_view, get_internal_projects, pm_or_analyst_required, active_user_has_policies_and_passes_test from seqr.views.utils.terra_api_utils import anvil_enabled from seqr.views.utils.variant_utils import DISCOVERY_CATEGORY -from seqr.models import Project, Family, Sample, RnaSample, Individual +from seqr.models import Project, Family, FamilyAnalysedBy, Sample, RnaSample, Individual from settings import GREGOR_DATA_MODEL_URL @@ -890,6 +891,16 @@ def _add_row(row, family_id, row_type): parse_anvil_metadata( projects, user=request.user, add_row=_add_row, omit_airtable=True, include_family_sample_metadata=True, include_no_individual_families=True) + analysed_by = get_json_for_queryset( + FamilyAnalysedBy.objects.filter(family_id__in=families_by_id).order_by('last_modified_date'), + additional_values={'familyId': F('family_id')}, + ) + analysed_by_family_type = defaultdict(lambda: defaultdict(list)) + for fab in analysed_by: + analysed_by_family_type[fab['familyId']][fab['dataType']].append( + f"{fab['createdBy']} ({fab['lastModifiedDate']:%-m/%-d/%Y})" + ) + for family_id, f in families_by_id.items(): individuals_by_id = family_individuals[family_id] proband = next((i for i in individuals_by_id.values() if i['proband_relationship'] == 'Self'), None) @@ -910,6 +921,10 @@ def _add_row(row, family_id, row_type): sorted_samples = sorted(individuals_by_id.values(), key=lambda x: x.get('date_data_generation', '')) earliest_sample = next((s for s in [proband or {}] + sorted_samples if s.get('date_data_generation')), {}) + analysed_by = [ + f'{ANALYSIS_DATA_TYPE_LOOKUP[data_type]}: {", ".join(analysed)}' + for data_type, analysed in analysed_by_family_type[family_id].items() + ] inheritance_models = f.pop('inheritance_models', []) f.update({ 'individual_count': len(individuals_by_id), @@ -920,6 +935,7 @@ def _add_row(row, family_id, row_type): 'genes': '; '.join(sorted(f.get('genes', []))), 'actual_inheritance': 'unknown' if inheritance_models == {'unknown'} else ';'.join( sorted([i for i in inheritance_models if i != 'unknown'])), + 'analysed_by': '; '.join(analysed_by), }) return create_json_response({'rows': list(families_by_id.values())}) @@ -933,6 +949,9 @@ def _get_metadata_projects(project_guid, user): return [get_project_and_check_permissions(project_guid, user)] +ANALYSIS_DATA_TYPE_LOOKUP = dict(FamilyAnalysedBy.DATA_TYPE_CHOICES) + + FAMILY_STRUCTURES = { 1: 'singleton', 2: 'duo', diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index db3dbb36f0..704dfd1dc9 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -1221,6 +1221,7 @@ def test_family_metadata(self): 'phenotype_description': None, 'analysisStatus': 'Q', 'analysis_groups': '', + 'analysed_by': '', 'consanguinity': 'Unknown', }) @@ -1234,6 +1235,34 @@ def test_family_metadata(self): 'F000001_1', 'F000002_2', 'F000003_3', 'F000004_4', 'F000005_5', 'F000006_6', 'F000007_7', 'F000008_8', 'F000009_9', 'F000010_10', 'F000011_11', 'F000012_12', 'F000013_13'] self.assertListEqual(sorted([r['familyGuid'] for r in response_json['rows']]), expected_families) + test_row = next(r for r in response_json['rows'] if r['familyGuid'] == 'F000001_1') + self.assertDictEqual(test_row, { + 'projectGuid': 'R0001_1kg', + 'internal_project_id': '1kg project nåme with uniçøde', + 'familyGuid': 'F000001_1', + 'family_id': '1', + 'displayName': '1', + 'solve_status': 'Unsolved', + 'actual_inheritance': 'de novo', + 'date_data_generation': '2017-02-05', + 'data_type': 'WES', + 'proband_id': 'NA19675_1', + 'maternal_id': 'NA19679', + 'paternal_id': 'NA19678', + 'other_individual_ids': '', + 'individual_count': 3, + 'family_structure': 'trio', + 'genes': 'RP11', + 'pmid_id': '34415322', + 'phenotype_description': 'myopathy', + 'analysisStatus': 'Q', + 'analysis_groups': 'Test Group 1', + 'analysed_by': 'WES/WGS: Test No Access User (7/22/2022)', + 'consanguinity': 'Present', + 'condition_id': 'OMIM:615120', + 'known_condition_name': 'Myasthenic syndrome, congenital, 8, with pre- and postsynaptic defects', + 'condition_inheritance': 'Autosomal recessive|X-linked', + }) test_row = next(r for r in response_json['rows'] if r['familyGuid'] == 'F000003_3') self.assertDictEqual(test_row, { 'projectGuid': 'R0001_1kg', @@ -1253,6 +1282,7 @@ def test_family_metadata(self): 'phenotype_description': None, 'analysisStatus': 'Q', 'analysis_groups': 'Accepted; Test Group 1', + 'analysed_by': '', 'consanguinity': 'Unknown', 'condition_id': 'OMIM:615123', 'known_condition_name': '', diff --git a/seqr/views/apis/variant_search_api.py b/seqr/views/apis/variant_search_api.py index 56b6a02160..99cf4f345d 100644 --- a/seqr/views/apis/variant_search_api.py +++ b/seqr/views/apis/variant_search_api.py @@ -602,9 +602,11 @@ def _update_lookup_variant(variant, response): for genotype in variant['familyGenotypes'].pop(family_guid) }) - for i, genotypes in enumerate(variant.pop('familyGenotypes').values()): + for i, (unmapped_family_guid, genotypes) in enumerate(variant.pop('familyGenotypes').items()): family_guid = f'F{i}_{variant["variantId"]}' variant['lookupFamilyGuids'].append(family_guid) + if unmapped_family_guid in variant.get('liftedFamilyGuids', []): + variant['liftedFamilyGuids'][variant['liftedFamilyGuids'].index(unmapped_family_guid)] = family_guid for j, genotype in enumerate(genotypes): individual_guid = f'I{j}_{family_guid}' individual = individual_summary_map[(genotype.pop('familyGuid'), genotype.pop('sampleId'))] diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py index eef51399c7..c8e03aa305 100644 --- a/seqr/views/apis/variant_search_api_tests.py +++ b/seqr/views/apis/variant_search_api_tests.py @@ -804,12 +804,14 @@ def test_variant_lookup(self, mock_variant_lookup): expected_variant = { **VARIANT_LOOKUP_VARIANT, 'familyGuids': [], - 'lookupFamilyGuids': ['F0_1-10439-AC-A', 'F1_1-10439-AC-A'], + 'lookupFamilyGuids': ['F0_1-10439-AC-A', 'F1_1-10439-AC-A', 'F2_1-10439-AC-A'], + 'liftedFamilyGuids': ['F2_1-10439-AC-A'], 'genotypes': { 'I0_F0_1-10439-AC-A': {'ab': 0.0, 'dp': 60, 'gq': 20, 'numAlt': 0, 'filters': [], 'sampleType': 'WES'}, 'I1_F0_1-10439-AC-A': {'ab': 0.0, 'dp': 24, 'gq': 0, 'numAlt': 0, 'filters': [], 'sampleType': 'WES'}, 'I2_F0_1-10439-AC-A': {'ab': 0.5, 'dp': 10, 'gq': 99, 'numAlt': 1, 'filters': [], 'sampleType': 'WES'}, 'I0_F1_1-10439-AC-A': {'ab': 1.0, 'dp': 6, 'gq': 16, 'numAlt': 2, 'filters': [], 'sampleType': 'WES'}, + 'I0_F2_1-10439-AC-A': {'ab': 0.531000018119812, 'dp': 27, 'gq': 87, 'numAlt': 1, 'filters': None, 'sampleType': 'WGS'}, }, } del expected_variant['familyGenotypes'] @@ -830,6 +832,11 @@ def test_variant_lookup(self, mock_variant_lookup): 'features': [{'category': 'HP:0001626', 'label': '1 terms'}, {'category': 'Other', 'label': '1 terms'}], 'vlmContactEmail': 'seqr-test@gmail.com,test@broadinstitute.org', }, + 'I0_F2_1-10439-AC-A': { + 'affected': 'A', 'familyGuid': 'F2_1-10439-AC-A', 'features': [], + 'individualGuid': 'I0_F2_1-10439-AC-A', 'sex': 'F', + 'vlmContactEmail': 'vlm@broadinstitute.org', + }, 'I1_F0_1-10439-AC-A': { 'affected': 'N', 'familyGuid': 'F0_1-10439-AC-A', 'features': [], 'individualGuid': 'I1_F0_1-10439-AC-A', 'sex': 'M', @@ -860,7 +867,7 @@ def test_variant_lookup(self, mock_variant_lookup): response_variant['variantId'] = '1-248367227-TC-T' response_variant['genomeVersion'] = '37' - self.login_collaborator() + self.login_manager() response = self.client.get(url.replace("38", "37")) self.assertEqual(response.status_code, 200) @@ -869,9 +876,11 @@ def test_variant_lookup(self, mock_variant_lookup): ('I000005_hg00732', 'I1_F0_1-10439-AC-A', {'sampleId': 'HG00732', 'familyGuid': 'F000002_2'}), ('I000004_hg00731', 'I2_F0_1-10439-AC-A', {'sampleId': 'HG00731', 'familyGuid': 'F000002_2'}), ('I000015_na20885', 'I0_F1_1-10439-AC-A', {'sampleId': 'NA20885', 'familyGuid': 'F000011_11'}), + ('I000018_na21234', 'I0_F2_1-10439-AC-A', {'sampleId': 'NA21234', 'familyGuid': 'F000014_14'}), ] expected_variant.update({ - 'lookupFamilyGuids': ['F000002_2', 'F000011_11'], + 'lookupFamilyGuids': ['F000002_2', 'F000011_11', 'F000014_14'], + 'liftedFamilyGuids': ['F000014_14'], 'genotypes': { individual_guid: {**expected_variant['genotypes'][anon_individual_guid], **genotype} for individual_guid, anon_individual_guid, genotype in individual_guid_map @@ -881,32 +890,37 @@ def test_variant_lookup(self, mock_variant_lookup): }) expected_body.update({ **{k: {**EXPECTED_SEARCH_RESPONSE[k]} for k in { - 'savedVariantsByGuid', 'variantTagsByGuid', 'variantNotesByGuid', + 'mmeSubmissionsByGuid', 'variantTagsByGuid', 'variantNotesByGuid', }}, **EXPECTED_TRANSCRIPTS_RESPONSE, + 'omimIntervals': {}, + 'savedVariantsByGuid': {'SV0000002_1248367227_r0390_100': EXPECTED_SAVED_VARIANT}, 'variantFunctionalDataByGuid': {}, 'locusListsByGuid': EXPECTED_SEARCH_CONTEXT_RESPONSE['locusListsByGuid'], 'projectsByGuid': { p: {k: mock.ANY for k in PROJECT_TAG_TYPE_FIELDS} - for p in [PROJECT_GUID, 'R0003_test'] + for p in [PROJECT_GUID, 'R0003_test', 'R0004_non_analyst_project'] }, 'familiesByGuid': { f: {k: mock.ANY for k in [*FAMILY_FIELDS, 'individualGuids']} - for f in ['F000002_2', 'F000011_11'] + for f in ['F000002_2', 'F000011_11', 'F000014_14'] }, 'individualsByGuid': { i[0]: {k: mock.ANY for k in [*INDIVIDUAL_FIELDS, 'igvSampleGuids']} - for i in individual_guid_map + for i in individual_guid_map + [('I000019_na21987',)] }, }) expected_body['genesById']['ENSG00000227232'] = expected_pa_gene - del expected_body['savedVariantsByGuid']['SV0000001_2103343353_r0390_100'] + expected_body['mmeSubmissionsByGuid']['MS000018_P0004517'] = expected_body['mmeSubmissionsByGuid'].pop('MS000001_na19675') + expected_body['savedVariantsByGuid']['SV0000006_1248367227_r0004_non'] = mock.ANY + expected_body['variantTagsByGuid']['VT1726970_2103343353_r0004_tes'] = EXPECTED_TAG + expected_body['variantTagsByGuid']['VT1726961_2103343353_r0005_tes'] = EXPECTED_TAG for k in ['VT1708633_2103343353_r0390_100', 'VT1726961_2103343353_r0390_100']: del expected_body['variantTagsByGuid'][k] self.assertDictEqual(response.json(), expected_body) mock_variant_lookup.assert_called_with( - self.collaborator_user, ('1', 10439, 'AC', 'A'), genome_version='37', + self.manager_user, ('1', 10439, 'AC', 'A'), genome_version='37', ) @mock.patch('seqr.views.apis.variant_search_api.sv_variant_lookup') diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py index c113e9985f..f75e93eb00 100644 --- a/seqr/views/utils/dataset_utils.py +++ b/seqr/views/utils/dataset_utils.py @@ -5,7 +5,7 @@ from tqdm import tqdm from seqr.models import Sample, Individual, Family, Project, RnaSample, RnaSeqOutlier, RnaSeqTpm, RnaSeqSpliceOutlier -from seqr.utils.communication_utils import safe_post_to_slack, send_project_notification +from seqr.utils.communication_utils import send_project_notification from seqr.utils.file_utils import file_iter from seqr.utils.logging_utils import SeqrLogger from seqr.utils.middleware import ErrorsWarningsException @@ -14,7 +14,7 @@ from seqr.views.utils.permissions_utils import get_internal_projects from seqr.views.utils.json_utils import _to_snake_case, _to_camel_case from reference_data.models import GeneInfo -from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, BASE_URL +from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL logger = SeqrLogger(__name__) @@ -216,7 +216,8 @@ def match_and_update_search_samples( updated_samples = Sample.objects.filter(guid__in=activated_sample_guids) family_guids_to_update = [ - family_guid for family_guid, analysis_status in included_families.items() if analysis_status == Family.ANALYSIS_STATUS_WAITING_FOR_DATA + family_guid for family_guid, analysis_status in included_families.items() + if analysis_status in {Family.ANALYSIS_STATUS_WAITING_FOR_DATA, Family.ANALYSIS_STATUS_LOADING_FAILED} ] Family.bulk_update( user, {'analysis_status': Family.ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS}, guid__in=family_guids_to_update) @@ -557,20 +558,12 @@ def _notify_rna_loading(model_cls, sample_projects, internal_projects): data_type = RNA_MODEL_DISPLAY_NAME[model_cls] for project_agg in sample_projects: new_ids = project_agg["new_sample_ids"] - project_link = f'<{BASE_URL}project/{project_agg["guid"]}/project_page|{project_agg["name"]}>' - safe_post_to_slack( - SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, - f'{len(new_ids)} new RNA {data_type} samples are loaded in {project_link}\n```{", ".join(new_ids)}```' - ) - email = ( - f'This is to notify you that data for {len(new_ids)} new RNA {data_type} sample(s) ' - f'has been loaded in seqr project {project_link}' - ) send_project_notification( project=projects_by_name[project_agg["name"]], - notification=f'Loaded {len(new_ids)} new RNA {data_type} sample(s)', - email=email, + notification=f'{len(new_ids)} new RNA {data_type} sample(s)', subject=f'New RNA {data_type} data available in seqr', + slack_channel=SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, + slack_detail=', '.join(new_ids), ) diff --git a/ui/pages/DataManagement/components/LoadData.jsx b/ui/pages/DataManagement/components/LoadData.jsx index 6dc9bdcdba..e9ad6d7494 100644 --- a/ui/pages/DataManagement/components/LoadData.jsx +++ b/ui/pages/DataManagement/components/LoadData.jsx @@ -48,32 +48,43 @@ const LoadedProjectOptions = props => ( ) +const FILE_PATH_FIELD = { + name: 'filePath', + validate: validators.required, +} + +const CALLSET_PAGE_FIELDS = [ + { + name: 'skipValidation', + label: 'Skip Callset Validation', + component: InlineToggle, + asFormInput: true, + }, + { + ...GENOME_VERSION_FIELD, + component: ButtonRadioGroup, + validate: validators.required, + }, + { + name: 'sampleType', + label: 'Sample Type', + component: ButtonRadioGroup, + options: [SAMPLE_TYPE_EXOME, SAMPLE_TYPE_GENOME].map(value => ({ value, text: value })), + validate: validators.required, + }, +] + const CALLSET_PAGE = { fields: [ { - name: 'filePath', - label: 'Callset File Path', - placeholder: 'gs://', - validate: validators.required, - }, - { - name: 'skipValidation', - label: 'Skip Callset Validation', - component: InlineToggle, - asFormInput: true, - }, - { - ...GENOME_VERSION_FIELD, - component: ButtonRadioGroup, - validate: validators.required, - }, - { - name: 'sampleType', - label: 'Sample Type', - component: ButtonRadioGroup, - options: [SAMPLE_TYPE_EXOME, SAMPLE_TYPE_GENOME].map(value => ({ value, text: value })), - validate: validators.required, + label: 'VCF', + component: LoadOptionsSelect, + url: '/api/data_management/loading_vcfs', + optionsResponseKey: 'vcfs', + validationErrorMessage: 'No VCFs found in the loading datasets directory', + ...FILE_PATH_FIELD, }, + ...CALLSET_PAGE_FIELDS, ], submitUrl: '/api/data_management/validate_callset', } @@ -81,7 +92,12 @@ const CALLSET_PAGE = { const MULTI_DATA_TYPE_CALLSET_PAGE = { ...CALLSET_PAGE, fields: [ - ...CALLSET_PAGE.fields, + { + label: 'Callset File Path', + placeholder: 'gs://', + ...FILE_PATH_FIELD, + }, + ...CALLSET_PAGE_FIELDS, { name: 'datasetType', label: 'Dataset Type', diff --git a/ui/pages/Report/components/FamilyMetadata.jsx b/ui/pages/Report/components/FamilyMetadata.jsx index 93bfde2892..9f20b94625 100644 --- a/ui/pages/Report/components/FamilyMetadata.jsx +++ b/ui/pages/Report/components/FamilyMetadata.jsx @@ -15,6 +15,7 @@ const COLUMNS = [ { name: 'paternal_id' }, { name: 'maternal_id' }, { name: 'other_individual_ids' }, + { name: 'analysed_by', style: { minWidth: '400px' } }, ] const FamilyMetadata = props => ( diff --git a/ui/pages/Search/components/SavedSearch.jsx b/ui/pages/Search/components/SavedSearch.jsx index 5ae575497f..a3b163e422 100644 --- a/ui/pages/Search/components/SavedSearch.jsx +++ b/ui/pages/Search/components/SavedSearch.jsx @@ -1,4 +1,5 @@ import React from 'react' +import isEqual from 'lodash/isEqual' import PropTypes from 'prop-types' import { connect } from 'react-redux' import { FormSpy } from 'react-final-form' @@ -31,11 +32,17 @@ const FormButtonContainer = styled.div` const SUBSCRIPTION = { values: true } +const isSameSearch = ({ locus: locus1, ...search1 }, { locus: locus2, ...search2 }) => ( + isEqual(search1, search2) && ( + locus1?.locusListGuid ? locus1.locusListGuid === locus2.locusListGuid : isEqual(locus1, locus2) + ) +) + const CurrentSavedSearchProvider = ({ element, ...props }) => ( {({ values }) => { const currentSavedSearch = values.search && Object.values(props.savedSearchesByGuid).find( - ({ search }) => search === values.search, + ({ search }) => isSameSearch(search, values.search), ) return React.createElement(element, { currentSavedSearch, search: values.search, ...props }) }} diff --git a/ui/pages/SummaryData/components/VariantLookup.jsx b/ui/pages/SummaryData/components/VariantLookup.jsx index 9e6ad7e00e..0e9ef3e8a0 100644 --- a/ui/pages/SummaryData/components/VariantLookup.jsx +++ b/ui/pages/SummaryData/components/VariantLookup.jsx @@ -1,7 +1,7 @@ import React from 'react' import { connect } from 'react-redux' import PropTypes from 'prop-types' -import { Grid, Header } from 'semantic-ui-react' +import { Grid, Header, Label } from 'semantic-ui-react' import { RECEIVE_DATA } from 'redux/utils/reducerUtils' import { QueryParamsEditor } from 'shared/components/QueryParamEditor' @@ -14,7 +14,7 @@ import FamilyReads from 'shared/components/panel/family/FamilyReads' import FamilyVariantTags from 'shared/components/panel/variants/FamilyVariantTags' import Variants, { Variant, StyledVariantRow } from 'shared/components/panel/variants/Variants' import { FamilyVariantIndividuals } from 'shared/components/panel/variants/VariantIndividuals' -import { GENOME_VERSION_FIELD } from 'shared/utils/constants' +import { GENOME_VERSION_FIELD, GENOME_VERSION_37, GENOME_VERSION_38 } from 'shared/utils/constants' import { sendVlmContactEmail } from '../reducers' import { getVlmDefaultContactEmails, getVlmFamiliesByContactEmail } from '../selectors' @@ -43,10 +43,19 @@ const mapContactDispatchToProps = { const ContactButton = connect(null, mapContactDispatchToProps)(SendEmailButton) -const LookupFamilyLayout = ({ topContent, bottomContent, children, ...buttonProps }) => ( +const liftoverGenomeVersion = genomeVersion => ( + genomeVersion === GENOME_VERSION_37 ? GENOME_VERSION_38 : GENOME_VERSION_37 +) + +const LookupFamilyLayout = ({ topContent, bottomContent, hasLiftover, genomeVersion, children, ...buttonProps }) => ( {topContent} + @@ -60,6 +69,8 @@ LookupFamilyLayout.propTypes = { topContent: PropTypes.node, bottomContent: PropTypes.node, children: PropTypes.node, + hasLiftover: PropTypes.bool, + genomeVersion: PropTypes.string, } const InternalFamily = ({ familyGuid, variant, reads, showReads }) => ( @@ -70,6 +81,8 @@ const InternalFamily = ({ familyGuid, variant, reads, showReads }) => ( )} bottomContent={{reads}} + hasLiftover={variant.liftedFamilyGuids?.includes(familyGuid)} + genomeVersion={variant.genomeVersion} > {showReads} @@ -96,6 +109,8 @@ const BaseLookupVariant = ({ variant, familiesByContactEmail, vlmDefaultContactE key={contactEmail} defaultEmail={vlmDefaultContactEmails[contactEmail]} modalId={contactEmail} + hasLiftover={(variant.liftedFamilyGuids || []).some(familyGuid => families.includes(familyGuid))} + genomeVersion={variant.genomeVersion} > {families.map(familyGuid => ( diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 2e92d2746c..c7b9725bba 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -164,6 +164,7 @@ const FAMILY_STATUS_CLOSED = 'C' const FAMILY_STATUS_PARTIAL_SOLVE = 'P' const FAMILY_STATUS_ANALYSIS_IN_PROGRESS = 'I' const FAMILY_STATUS_WAITING_FOR_DATA = 'Q' +const FAMILY_STATUS_LOADING_FAILED = 'F' const FAMILY_STATUS_NO_DATA = 'N' const DEPRECATED_FAMILY_ANALYSIS_STATUS_OPTIONS = [ @@ -184,6 +185,7 @@ export const SELECTABLE_FAMILY_ANALYSIS_STATUS_OPTIONS = [ { value: FAMILY_STATUS_PARTIAL_SOLVE, color: '#288582', name: 'Partial Solve - Analysis in Progress' }, { value: FAMILY_STATUS_ANALYSIS_IN_PROGRESS, color: '#4682B4', name: 'Analysis in Progress' }, { value: FAMILY_STATUS_WAITING_FOR_DATA, color: '#FFC107', name: 'Waiting for data' }, + { value: FAMILY_STATUS_LOADING_FAILED, color: '#ba4c12', name: 'Loading failed' }, { value: FAMILY_STATUS_NO_DATA, color: '#646464', name: 'No data expected' }, ] export const ALL_FAMILY_ANALYSIS_STATUS_OPTIONS = [ @@ -1395,7 +1397,7 @@ const VARIANT_SORT_OPTONS = [ ), }, ] -const VARIANT_SEARCH_SORT_OPTONS = VARIANT_SORT_OPTONS.slice(1, VARIANT_SORT_OPTONS.length - 1) +const VARIANT_SEARCH_SORT_OPTONS = VARIANT_SORT_OPTONS.slice(0, VARIANT_SORT_OPTONS.length - 1) export const VARIANT_SORT_LOOKUP = VARIANT_SORT_OPTONS.reduce( (acc, opt) => ({