Merge pull request #43 from friendsofstrandseq/dev

Final results file to check if all outputs were produced, dump config…
friendsofstrandseq · Dec 4, 2023 · 33d8e87 · 33d8e87
2 parents 7aadd43 + 202ad00
commit 33d8e87
Show file tree

Hide file tree

Showing 13 changed files with 884 additions and 190 deletions.
diff --git a/.github/workflows/main.yaml → .github/workflows/main_test.yaml b/.github/workflows/main.yaml → .github/workflows/main_test.yaml
@@ -1,23 +1,16 @@
 name: ashleys-qc-pipeline workflow checks
 
 on:
-  schedule:
-    # Run every Sunday at 00:00 UTC on the master branch
-    - cron:  '0 0 * * 0'
-      # branches:
-      #   - master
   push:
     branches:
-      - '*'
-      - '!master'
-
+      - "**"
 
 jobs:
   # WORK
   Formatting:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
 
       - name: Formatting
         uses: github/super-linter@v4
@@ -30,21 +23,17 @@ jobs:
   Linting:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Linting
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
           directory: .
           snakefile: ./workflow/Snakefile
-          # stagein: "mamba env remove -n snakemake && mamba create -y -n snakemake -c conda-forge -c bioconda unzip snakemake pandas pysam tqdm imagemagick && source activate snakemake && ls -l && pwd"
           args: "--lint --config ashleys_pipeline=True"
   Testing_ashleys:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -61,11 +50,8 @@ jobs:
 
   Testing_ashleys_fastqc_enabled:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -82,11 +68,8 @@ jobs:
 
   Testing_ashleys_ms_norm_enabled:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -102,11 +85,8 @@ jobs:
           args: "--cores 1 --use-conda --configfile .tests/config/simple_config.yaml --config multistep_normalisation=True --conda-frontend mamba --report report.zip"
   Testing_ashleys_hg38:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -122,11 +102,8 @@ jobs:
           args: "--cores 1 --use-conda --config reference=hg38 use_light_data=True chromosomes=[chr17] --conda-frontend mamba --report report.zip"
   Testing_ashleys_hg19:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -142,11 +119,8 @@ jobs:
           args: "--cores 1 --use-conda --config reference=hg19 use_light_data=True chromosomes=[chr17] --conda-frontend mamba --report report.zip"
   Testing_ashleys_T2T:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -162,11 +136,8 @@ jobs:
           args: "--cores 1 --use-conda --config reference=T2T use_light_data=True chromosomes=[chr17] --conda-frontend mamba --report report.zip"
   Testing_ashleys_mm10:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -182,11 +153,8 @@ jobs:
           args: "--cores 1 --use-conda --config reference=mm10 use_light_data=True chromosomes=[chr17] --conda-frontend mamba --report report.zip"
   Testing_jub_nb:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -202,11 +170,8 @@ jobs:
           args: "--cores 1 --use-conda --configfile .tests/config/simple_config.yaml --config hand_selection=True --conda-frontend mamba --report report.zip"
   Testing_publishdir:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -222,11 +187,8 @@ jobs:
           args: "--cores 1 --use-conda --configfile .tests/config/simple_config.yaml --config publishdir=.tests/data_chr17_publishdir --conda-frontend mamba --report report.zip"
   Testing_list_commands:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:

diff --git a/.tests/config/simple_config.yaml b/.tests/config/simple_config.yaml
@@ -1,28 +1,36 @@
-version: 2.2.2
-# Option to display all potential options - listed in config_metadata.yaml
-list_commands: False
-## Data location - MUST BE AN ABSOULTE PATH (due to snakemake-symlink issues) - PLEASE MODIFY IT
-# input_bam_location: ".tests/data_CHR17"
-data_location: ".tests/data_CHR17"
-# Reference genome used by BWA to map FASTQ files
-# reference: sandbox.zenodo.org/record/1074721/files/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna
-# Enable / Disable download of external files (1000G SNV & Fasta ref genome)
-dl_external_files: False
-# Enable / Disable multistep normalisation
-multistep_normalisation: False
-# Ashleys-qc binary classification threshold
-ashleys_threshold: 0.5
-# Enable / Disable FastQC analysis
-FastQC_analysis: False
-# To be informed of pipeline status
+# --------------------------------------------------------
+# Ashleys-QC pipeline Configuration
+# --------------------------------------------------------
+version: 2.2.3
+
+# Email for notifications about the pipeline's status
 email: ""
 
-############################################################################
-#                          ADVANCED PARAMETERS
-############################################################################
+# List of samples to process if multiple are specified
+samples_to_process: []
+
+# Plate size
+plate_size: 96
+
+# --------------------------------------------------------
+# Data location & I/O
+# --------------------------------------------------------
+
+# Absolute path to the data location (modify as needed)
+data_location: ".tests/data_CHR17"
+
+# Directory to publish important data (e.g., stats, plots, counts). Leave empty if not required.
+publishdir: ""
+
+# --------------------------------------------------------
+# Reference Data Configuration
+# --------------------------------------------------------
 
+# Reference genome used by BWA to map FASTQ files
 reference: "hg38"
 
+# Reference genome files' location
+
 references_data:
   "hg38":
     reference_fasta: ".tests/external_data/chr17.fa.gz"
@@ -31,70 +39,69 @@ references_data:
   "T2T":
     reference_fasta: "workflow/data/ref_genomes/T2T.fa"
 
-# Boolean parameters
-## Is the pipeline called used as a submodule in mosaicatcher-pipeline?
-mosaicatcher_pipeline: False
-## Enable/Disable hand selection through Jupyter Notebook
-hand_selection: False
+# List of chromosomes to process
+chromosomes:
+  - chr17
 
-# Window size used by mosaic binning algorithm
-window: 200000
+# Specify any chromosomes to exclude from processing
+chromosomes_to_exclude: []
 
-plottype_counts:
-  - "raw"
-  - "normalised"
+# --------------------------------------------------------
+# Quality Control Configuration
+# --------------------------------------------------------
 
-alfred_plots:
-  - "dist"
-  - "devi"
+# Threshold for Ashleys-qc binary classification
+ashleys_threshold: 0.5
 
-plate_orientation: landscape
+# Enable or disable FastQC analysis
+MultiQC: False
+
+# --------------------------------------------------------
+# Counts Configuration
+# --------------------------------------------------------
+
+# Enable or disable multistep normalization analysis
+multistep_normalisation: False
+
+# Advanced parameters for multi-step normalisation
+multistep_normalisation_options:
+  min_reads_bin: 5
+  n_subsample: 1000
+  min_reads_cell: 100000
+
+# Window size used by the mosaic binning algorithm
+window: 200000
+
+# Enable or disable hand selection through the Jupyter Notebook
+hand_selection: False
+
+# --------------------------------------------------------
+# GENECORE Configuration
+# --------------------------------------------------------
 
-# Chromosomes list to process
-chromosomes:
-  - chr1
-  - chr2
-  - chr3
-  - chr4
-  - chr5
-  - chr6
-  - chr7
-  - chr8
-  - chr9
-  - chr10
-  - chr11
-  - chr12
-  - chr13
-  - chr14
-  - chr15
-  - chr16
-  - chr17
-  - chr18
-  - chr19
-  - chr20
-  - chr21
-  - chr22
-  - chrX
-  - chrY
-
-# GENECORE
 genecore: False
-samples_to_process: []
 genecore_date_folder: ""
-genecore_prefix: "/g/korbel/shared/genecore"
+genecore_prefix: "/g/korbel/STOCKS/Data/Assay/sequencing/2023"
+genecore_regex_element: "PE20"
 
-##### DEV only
+# --------------------------------------------------------
+# Internal Parameters
+# --------------------------------------------------------
+
+# Is the pipeline used as a submodule in mosaicatcher-pipeline?
+mosaicatcher_pipeline: False
 
 # Overwrite ASHLEYS PREDICTIONS for GitHub & smoke dataset purpose
 use_light_data: True
 
-# If specified, will copy important data (stats, plots, counts file) to a second place
-publishdir: ""
-
-# Multi-step normalisation advanced parameters
-multistep_normalisation_options:
-  min_reads_bin: 5
-  n_subsample: 1000
-  min_reads_cell: 1000
-# Others
+# For snakemake linting
 abs_path: "/"
+
+# Type of plots for counts
+plottype_counts:
+  - "raw"
+  - "normalised"
+
+# Option to display all potential commands (as listed in config_metadata.yaml)
+list_commands: False
+# --------------------------------------------------------
diff --git a/config/config.yaml b/config/config.yaml
@@ -1,14 +1,17 @@
 # --------------------------------------------------------
 # Ashleys-QC pipeline Configuration
 # --------------------------------------------------------
-version: 2.2.2
+version: 2.2.3
 
 # Email for notifications about the pipeline's status
 email: ""
 
 # List of samples to process if multiple are specified
 samples_to_process: []
 
+# Plate size
+plate_size: 96
+
 # --------------------------------------------------------
 # Data location & I/O
 # --------------------------------------------------------