From d8977fe8e396a9b091ec7947fe7c311628da623f Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Mon, 4 Dec 2023 10:44:35 +0000
Subject: [PATCH] merge blacklist fix, dump config fix, assertion to check
 labels.tsv x selected/ x scNOVA input lists, labels at later stage to prevent
 working on modified list of cells, other minor fixes

---
 .gitignore                                    |   4 +-
 .tests/config/simple_config.yaml              |   7 +-
 afac/update_timestamps.py                     |  25 +
 config/config.yaml                            |   3 +
 config/config_metadata.yaml                   |   6 +
 .../Dockerfile-2.2.2.dockerfile               | 227 +++++++++
 .../Dockerfile-2.2.3.dockerfile               | 299 ++++++++++++
 .../add_T2T_part_to_Dockerfile.sh             |  35 ++
 watchdog_pipeline/watchdog_pipeline.py        |  13 +-
 workflow/Snakefile                            |  26 +-
 workflow/envs/scNOVA/scNOVA_DL.yaml           |   1 +
 workflow/rules/aggregate_fct.smk              |   2 +-
 workflow/rules/common.smk                     | 450 +++++-------------
 workflow/rules/count.smk                      |   3 +-
 workflow/rules/plots.smk                      |   7 +-
 workflow/rules/regenotyping.smk               |   1 +
 workflow/rules/scNOVA.smk                     |  19 +
 workflow/rules/utils.smk                      |  15 +
 .../scripts/normalization/merge-blacklist.py  |   3 +-
 .../scNOVA_scripts/assert_list_of_cells.py    |  57 +++
 workflow/scripts/utils/dump_config.py         |  40 +-
 21 files changed, 857 insertions(+), 386 deletions(-)
 create mode 100644 afac/update_timestamps.py
 create mode 100644 github-actions-runner/Dockerfile-2.2.2.dockerfile
 create mode 100644 github-actions-runner/Dockerfile-2.2.3.dockerfile
 create mode 100644 github-actions-runner/add_T2T_part_to_Dockerfile.sh
 create mode 100644 workflow/scripts/scNOVA_scripts/assert_list_of_cells.py

diff --git a/.gitignore b/.gitignore
index 406d336a..a140b637 100644
--- a/.gitignore
+++ b/.gitignore
@@ -218,4 +218,6 @@ LOGS_DEV/
 
 # scTRIP multiplot
 workflow/scripts/plotting/scTRIP_multiplot/scTRIPmultiplot
-workflow/config/scTRIP_multiplot.ok
\ No newline at end of file
+workflow/config/scTRIP_multiplot.ok
+args.output
+scNOVA_env_costea.yaml
diff --git a/.tests/config/simple_config.yaml b/.tests/config/simple_config.yaml
index 48e84da5..884e952f 100644
--- a/.tests/config/simple_config.yaml
+++ b/.tests/config/simple_config.yaml
@@ -3,10 +3,10 @@
 # --------------------------------------------------------
 
 # MosaiCatcher version
-version: 2.2.2
+version: 2.2.3
 
 # Ashleys-QC pipeline version
-ashleys_pipeline_version: 2.2.2
+ashleys_pipeline_version: 2.2.3
 
 # Email for notifications about the pipeline's status
 email: ""
@@ -14,6 +14,9 @@ email: ""
 # List of samples to process if multiple are specified
 samples_to_process: []
 
+# Plate size 
+plate_size: 96
+
 # --------------------------------------------------------
 # Data location & I/O
 # --------------------------------------------------------
diff --git a/afac/update_timestamps.py b/afac/update_timestamps.py
new file mode 100644
index 00000000..84cb551a
--- /dev/null
+++ b/afac/update_timestamps.py
@@ -0,0 +1,25 @@
+import os, sys
+import time
+from pathlib import Path
+
+
+def update_timestamps(directory):
+    """
+    Update the access and modification times of all files in the given directory and its subdirectories.
+
+    :param directory: Path to the directory
+    """
+    for root, dirs, files in os.walk(directory):
+        for file in files:
+            if file.endswith(".fastq.gz"):
+                continue
+            file_path = Path(root) / file
+            current_time = time.time()
+            print(file_path)
+            os.utime(file_path, (current_time, current_time))
+            print(f"Updated timestamp for: {file_path}")
+
+
+# Example usage
+directory_path = sys.argv[1]
+update_timestamps(directory_path)
diff --git a/config/config.yaml b/config/config.yaml
index 5a3b5098..3809643d 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -14,6 +14,9 @@ email: ""
 # List of samples to process if multiple are specified
 samples_to_process: []
 
+# Plate size
+plate_size: 96
+
 # --------------------------------------------------------
 # Data location & I/O
 # --------------------------------------------------------
diff --git a/config/config_metadata.yaml b/config/config_metadata.yaml
index 97c1af02..bff78ba9 100644
--- a/config/config_metadata.yaml
+++ b/config/config_metadata.yaml
@@ -135,3 +135,9 @@ use_strandscape_labels::
   required: False
   default: False
   lint_check: False
+plate_size::
+  desc: "Plate size used for the sequencing (96/384)"
+  type: int
+  required: True
+  default: 96
+  lint_check: False
diff --git a/github-actions-runner/Dockerfile-2.2.2.dockerfile b/github-actions-runner/Dockerfile-2.2.2.dockerfile
new file mode 100644
index 00000000..06f3ea66
--- /dev/null
+++ b/github-actions-runner/Dockerfile-2.2.2.dockerfile
@@ -0,0 +1,227 @@
+FROM condaforge/mambaforge:latest
+LABEL io.github.snakemake.containerized="true"
+LABEL io.github.snakemake.conda_env_hash="77eaa388d65d5205b87324fb0adb89561bc0e532a328995990a1d580aeb894ae"
+
+# Step 1: Retrieve conda environments
+
+# Conda environment:
+#   source: https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/bwa/index/environment.yaml
+#   prefix: /conda-envs/5681728a49bd83ceed09ba194330c858
+#   channels:
+#     - bioconda
+#     - conda-forge
+#     - defaults
+#   dependencies:
+#     - bwa ==0.7.17
+RUN mkdir -p /conda-envs/5681728a49bd83ceed09ba194330c858
+ADD https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/bwa/index/environment.yaml /conda-envs/5681728a49bd83ceed09ba194330c858/environment.yaml
+
+# Conda environment:
+#   source: https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/fastqc/environment.yaml
+#   prefix: /conda-envs/08d4368302a4bdf7eda6b536495efe7d
+#   channels:
+#     - bioconda
+#     - conda-forge
+#     - defaults
+#   dependencies:
+#     - fastqc ==0.11.9
+RUN mkdir -p /conda-envs/08d4368302a4bdf7eda6b536495efe7d
+ADD https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/fastqc/environment.yaml /conda-envs/08d4368302a4bdf7eda6b536495efe7d/environment.yaml
+
+# Conda environment:
+#   source: https://raw.githubusercontent.com/friendsofstrandseq/ashleys-qc-pipeline/2.2.2/workflow/envs/ashleys_base.yaml
+#   prefix: /conda-envs/87c04f5d115eff742eca84455513deba
+#   name: ashleys_base
+#   channels:
+#     - conda-forge
+#     - bioconda
+#   dependencies:
+#     - samtools
+#     - tabix
+#     - bwa
+#     - sambamba
+#     - mosaicatcher
+#     # - alfred
+#     - ashleys-qc
+#     - pandas
+#     # PUBLISHDIR
+#     - rsync
+#     # MULTIQC
+#     - multiqc
+#     # Fix sklearn update
+#     - scikit-learn=1.2.2
+RUN mkdir -p /conda-envs/87c04f5d115eff742eca84455513deba
+ADD https://raw.githubusercontent.com/friendsofstrandseq/ashleys-qc-pipeline/2.2.2/workflow/envs/ashleys_base.yaml /conda-envs/87c04f5d115eff742eca84455513deba/environment.yaml
+
+# Conda environment:
+#   source: https://raw.githubusercontent.com/friendsofstrandseq/ashleys-qc-pipeline/2.2.2/workflow/envs/ashleys_rtools.yaml
+#   prefix: /conda-envs/9b847fc31baae8e01dfb7ce438a56b71
+#   name: rtools
+#   channels:
+#     - conda-forge
+#     - bioconda
+#     - r
+#     - anaconda
+#   dependencies:
+#     # - bioconductor-biocparallel
+#     # - bioconductor-bsgenome
+#     # - bioconductor-bsgenome.hsapiens.ucsc.hg19
+#     # - bioconductor-bsgenome.hsapiens.ucsc.hg38
+#     # - bioconductor-fastseg
+#     # - bioconductor-genomicalignments
+#     - bioconductor-genomicranges
+#     # - bioconductor-rsamtools
+#     # - bioconductor-s4vectors
+#     - r-assertthat
+#     - r-base
+#     # - r-biocmanager
+#     - r-cowplot
+#     - r-data.table
+#     # - r-devtools
+#     # - r-doparallel
+#     # - r-foreach
+#     - r-ggplot2
+#     # - r-gtools
+#     - r-reshape2
+#     # - r-zoo
+#     # - r-dplyr
+#     # - r-mc2d
+#     # - r-pheatmap
+#     # - bioconductor-complexheatmap
+#     # - r-gplots
+#     - r-scales
+#     - r-rcolorbrewer
+#     # - r-stringr
+#     - r-cairo
+#     - fonts-anaconda
+#     # NEW
+#     - bioconductor-edger
+#     - r-r.utils
+#     # PLATE PLOT
+#     - r-dplyr
+#     - r-platetools
+#     - r-viridis
+#     # GC_correction
+#     - r-tidyr
+#     - r-ggpubr
+#     # SOLVE R lib issue
+#     - r-stringi=1.7.12
+RUN mkdir -p /conda-envs/9b847fc31baae8e01dfb7ce438a56b71
+ADD https://raw.githubusercontent.com/friendsofstrandseq/ashleys-qc-pipeline/2.2.2/workflow/envs/ashleys_rtools.yaml /conda-envs/9b847fc31baae8e01dfb7ce438a56b71/environment.yaml
+
+# Conda environment:
+#   source: workflow/envs/mc_base.yaml
+#   prefix: /conda-envs/c80307395eddf442c2fb6870f40d822b
+#   name: mc-base
+#   channels:
+#     - conda-forge
+#     - bioconda
+#   dependencies:
+#     - pandas
+#     - intervaltree
+#     - scipy
+#     - pysam
+#     - tqdm
+#     - perl
+#     - pypdf2
+#     - parmap
+#     # NEW
+#     - pyyaml
+#     - seaborn
+#     - matplotlib
+#     # SOLVE se-pe detection
+#     - samtools
+#     # ArbiGent Hufsah deps
+#     - pytables
+#     - xopen
+RUN mkdir -p /conda-envs/c80307395eddf442c2fb6870f40d822b
+COPY workflow/envs/mc_base.yaml /conda-envs/c80307395eddf442c2fb6870f40d822b/environment.yaml
+
+# Conda environment:
+#   source: workflow/envs/mc_bioinfo_tools.yaml
+#   prefix: /conda-envs/f251d84cdc9f25d0e14b48e780261d66
+#   name: mc-bioinfo-tools
+#   channels:
+#     - conda-forge
+#     - bioconda
+#   dependencies:
+#     - bcftools
+#     - freebayes
+#     - mosaicatcher
+#     - samtools
+#     - tabix
+#     - whatshap
+RUN mkdir -p /conda-envs/f251d84cdc9f25d0e14b48e780261d66
+COPY workflow/envs/mc_bioinfo_tools.yaml /conda-envs/f251d84cdc9f25d0e14b48e780261d66/environment.yaml
+
+# Conda environment:
+#   source: workflow/envs/rtools.yaml
+#   prefix: /conda-envs/598c87b6c764d05e0c66953cc67f2931
+#   name: rtools
+#   channels:
+#     - bioconda
+#     - conda-forge
+#     - r
+#     - anaconda
+#   dependencies:
+#     # # NEW
+#     - strandphaser
+#     # ###############
+#     - bioconductor-biocparallel
+#     - bioconductor-bsgenome
+#     - bioconductor-bsgenome.hsapiens.ucsc.hg38
+#     - bioconductor-complexheatmap
+#     # - bioconductor-fastseg
+#     - bioconductor-genomicalignments
+#     - bioconductor-genomicranges
+#     - bioconductor-rsamtools
+#     # - bioconductor-s4vectors
+#     - fonts-anaconda
+#     - r-assertthat
+#     - r-base
+#     - r-biocmanager
+#     - r-cairo
+#     - r-cowplot
+#     - r-data.table
+#     - r-devtools
+#     - r-doparallel
+#     - r-dplyr
+#     - r-foreach
+#     - r-ggplot2
+#     - r-gplots
+#     - r-gtools
+#     - r-mc2d
+#     - r-rcolorbrewer
+#     - r-reshape2
+#     - r-scales
+#     - r-stringr
+#     # SV_CALLS_DEV
+#     # - r-zoo
+#     - r-r.utils
+#     - r-ggnewscale
+#     # HEATMAP
+#     - r-tidyr
+#     # ARBIGENT
+#     - r-reshape
+#     - r-optparse
+#     - r-tidyr
+#     - r-ggbeeswarm
+#     - r-pheatmap
+#     # GC_correction
+#     - r-ggpubr
+#     - bioconductor-edger
+#     # SOLVE R lib issue
+#     - r-stringi=1.7.12
+RUN mkdir -p /conda-envs/598c87b6c764d05e0c66953cc67f2931
+COPY workflow/envs/rtools.yaml /conda-envs/598c87b6c764d05e0c66953cc67f2931/environment.yaml
+
+# Step 2: Generate conda environments
+
+RUN mamba env create --prefix /conda-envs/5681728a49bd83ceed09ba194330c858 --file /conda-envs/5681728a49bd83ceed09ba194330c858/environment.yaml && \
+    mamba env create --prefix /conda-envs/08d4368302a4bdf7eda6b536495efe7d --file /conda-envs/08d4368302a4bdf7eda6b536495efe7d/environment.yaml && \
+    mamba env create --prefix /conda-envs/87c04f5d115eff742eca84455513deba --file /conda-envs/87c04f5d115eff742eca84455513deba/environment.yaml && \
+    mamba env create --prefix /conda-envs/9b847fc31baae8e01dfb7ce438a56b71 --file /conda-envs/9b847fc31baae8e01dfb7ce438a56b71/environment.yaml && \
+    mamba env create --prefix /conda-envs/c80307395eddf442c2fb6870f40d822b --file /conda-envs/c80307395eddf442c2fb6870f40d822b/environment.yaml && \
+    mamba env create --prefix /conda-envs/f251d84cdc9f25d0e14b48e780261d66 --file /conda-envs/f251d84cdc9f25d0e14b48e780261d66/environment.yaml && \
+    mamba env create --prefix /conda-envs/598c87b6c764d05e0c66953cc67f2931 --file /conda-envs/598c87b6c764d05e0c66953cc67f2931/environment.yaml && \
+    mamba clean --all -y
diff --git a/github-actions-runner/Dockerfile-2.2.3.dockerfile b/github-actions-runner/Dockerfile-2.2.3.dockerfile
new file mode 100644
index 00000000..aa4d1c42
--- /dev/null
+++ b/github-actions-runner/Dockerfile-2.2.3.dockerfile
@@ -0,0 +1,299 @@
+FROM condaforge/mambaforge:latest
+LABEL io.github.snakemake.containerized="true"
+LABEL io.github.snakemake.conda_env_hash="8c338e2bbe95ae23ac438e1ac650a859ed4dbb9a77747c17f62707ea2f67a667"
+
+# Step 1: Retrieve conda environments
+
+# Conda environment:
+#   source: ../ashleys-qc-pipeline/workflow/envs/ashleys_base.yaml
+#   prefix: /conda-envs/87c04f5d115eff742eca84455513deba
+#   name: ashleys_base
+#   channels:
+#     - conda-forge
+#     - bioconda
+#   dependencies:
+#     - samtools
+#     - tabix
+#     - bwa
+#     - sambamba
+#     - mosaicatcher
+#     # - alfred
+#     - ashleys-qc
+#     - pandas
+#     # PUBLISHDIR
+#     - rsync
+#     # MULTIQC
+#     - multiqc
+#     # Fix sklearn update
+#     - scikit-learn=1.2.2
+RUN mkdir -p /conda-envs/87c04f5d115eff742eca84455513deba
+COPY ../ashleys-qc-pipeline/workflow/envs/ashleys_base.yaml /conda-envs/87c04f5d115eff742eca84455513deba/environment.yaml
+
+# Conda environment:
+#   source: ../ashleys-qc-pipeline/workflow/envs/ashleys_rtools.yaml
+#   prefix: /conda-envs/9b847fc31baae8e01dfb7ce438a56b71
+#   name: rtools
+#   channels:
+#     - conda-forge
+#     - bioconda
+#     - r
+#     - anaconda
+#   dependencies:
+#     # - bioconductor-biocparallel
+#     # - bioconductor-bsgenome
+#     # - bioconductor-bsgenome.hsapiens.ucsc.hg19
+#     # - bioconductor-bsgenome.hsapiens.ucsc.hg38
+#     # - bioconductor-fastseg
+#     # - bioconductor-genomicalignments
+#     - bioconductor-genomicranges
+#     # - bioconductor-rsamtools
+#     # - bioconductor-s4vectors
+#     - r-assertthat
+#     - r-base
+#     # - r-biocmanager
+#     - r-cowplot
+#     - r-data.table
+#     # - r-devtools
+#     # - r-doparallel
+#     # - r-foreach
+#     - r-ggplot2
+#     # - r-gtools
+#     - r-reshape2
+#     # - r-zoo
+#     # - r-dplyr
+#     # - r-mc2d
+#     # - r-pheatmap
+#     # - bioconductor-complexheatmap
+#     # - r-gplots
+#     - r-scales
+#     - r-rcolorbrewer
+#     # - r-stringr
+#     - r-cairo
+#     - fonts-anaconda
+#     # NEW
+#     - bioconductor-edger
+#     - r-r.utils
+#     # PLATE PLOT
+#     - r-dplyr
+#     - r-platetools
+#     - r-viridis
+#     # GC_correction
+#     - r-tidyr
+#     - r-ggpubr
+#     # SOLVE R lib issue
+#     - r-stringi=1.7.12
+RUN mkdir -p /conda-envs/9b847fc31baae8e01dfb7ce438a56b71
+COPY ../ashleys-qc-pipeline/workflow/envs/ashleys_rtools.yaml /conda-envs/9b847fc31baae8e01dfb7ce438a56b71/environment.yaml
+
+# Conda environment:
+#   source: https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/bwa/index/environment.yaml
+#   prefix: /conda-envs/5681728a49bd83ceed09ba194330c858
+#   channels:
+#     - bioconda
+#     - conda-forge
+#     - defaults
+#   dependencies:
+#     - bwa ==0.7.17
+RUN mkdir -p /conda-envs/5681728a49bd83ceed09ba194330c858
+ADD https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/bwa/index/environment.yaml /conda-envs/5681728a49bd83ceed09ba194330c858/environment.yaml
+
+# Conda environment:
+#   source: https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/fastqc/environment.yaml
+#   prefix: /conda-envs/08d4368302a4bdf7eda6b536495efe7d
+#   channels:
+#     - bioconda
+#     - conda-forge
+#     - defaults
+#   dependencies:
+#     - fastqc ==0.11.9
+RUN mkdir -p /conda-envs/08d4368302a4bdf7eda6b536495efe7d
+ADD https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/fastqc/environment.yaml /conda-envs/08d4368302a4bdf7eda6b536495efe7d/environment.yaml
+
+# Conda environment:
+#   source: workflow/envs/mc_base.yaml
+#   prefix: /conda-envs/c80307395eddf442c2fb6870f40d822b
+#   name: mc-base
+#   channels:
+#     - conda-forge
+#     - bioconda
+#   dependencies:
+#     - pandas
+#     - intervaltree
+#     - scipy
+#     - pysam
+#     - tqdm
+#     - perl
+#     - pypdf2
+#     - parmap
+#     # NEW
+#     - pyyaml
+#     - seaborn
+#     - matplotlib
+#     # SOLVE se-pe detection
+#     - samtools
+#     # ArbiGent Hufsah deps
+#     - pytables
+#     - xopen
+RUN mkdir -p /conda-envs/c80307395eddf442c2fb6870f40d822b
+COPY workflow/envs/mc_base.yaml /conda-envs/c80307395eddf442c2fb6870f40d822b/environment.yaml
+
+# Conda environment:
+#   source: workflow/envs/mc_bioinfo_tools.yaml
+#   prefix: /conda-envs/f251d84cdc9f25d0e14b48e780261d66
+#   name: mc-bioinfo-tools
+#   channels:
+#     - conda-forge
+#     - bioconda
+#   dependencies:
+#     - bcftools
+#     - freebayes
+#     - mosaicatcher
+#     - samtools
+#     - tabix
+#     - whatshap
+RUN mkdir -p /conda-envs/f251d84cdc9f25d0e14b48e780261d66
+COPY workflow/envs/mc_bioinfo_tools.yaml /conda-envs/f251d84cdc9f25d0e14b48e780261d66/environment.yaml
+
+# Conda environment:
+#   source: workflow/envs/rtools.yaml
+#   prefix: /conda-envs/598c87b6c764d05e0c66953cc67f2931
+#   name: rtools
+#   channels:
+#     - bioconda
+#     - conda-forge
+#     - r
+#     - anaconda
+#   dependencies:
+#     # # NEW
+#     - strandphaser
+#     # ###############
+#     - bioconductor-biocparallel
+#     - bioconductor-bsgenome
+#     - bioconductor-bsgenome.hsapiens.ucsc.hg38
+#     - bioconductor-complexheatmap
+#     # - bioconductor-fastseg
+#     - bioconductor-genomicalignments
+#     - bioconductor-genomicranges
+#     - bioconductor-rsamtools
+#     # - bioconductor-s4vectors
+#     - fonts-anaconda
+#     - r-assertthat
+#     - r-base
+#     - r-biocmanager
+#     - r-cairo
+#     - r-cowplot
+#     - r-data.table
+#     - r-devtools
+#     - r-doparallel
+#     - r-dplyr
+#     - r-foreach
+#     - r-ggplot2
+#     - r-gplots
+#     - r-gtools
+#     - r-mc2d
+#     - r-rcolorbrewer
+#     - r-reshape2
+#     - r-scales
+#     - r-stringr
+#     # SV_CALLS_DEV
+#     # - r-zoo
+#     - r-r.utils
+#     - r-ggnewscale
+#     # HEATMAP
+#     - r-tidyr
+#     # ARBIGENT
+#     - r-reshape
+#     - r-optparse
+#     - r-tidyr
+#     - r-ggbeeswarm
+#     - r-pheatmap
+#     # GC_correction
+#     - r-ggpubr
+#     - bioconductor-edger
+#     # SOLVE R lib issue
+#     - r-stringi=1.7.12
+RUN mkdir -p /conda-envs/598c87b6c764d05e0c66953cc67f2931
+COPY workflow/envs/rtools.yaml /conda-envs/598c87b6c764d05e0c66953cc67f2931/environment.yaml
+
+# Conda environment:
+#   source: workflow/envs/scNOVA/scNOVA_DL.yaml
+#   prefix: /conda-envs/1ede379ce8d378df7dca25b2bf4111f3
+#   name: scNOVA_DL
+#   channels:
+#     - conda-forge
+#     - anaconda
+#   dependencies:
+#     - tensorflow=1.15.0
+#     - scikit-learn=0.21.3
+#     - python=3.7.4
+#     - matplotlib=3.1.1
+#     - pandas=0.25.3
+#     - h5py=2.10.0
+#     - numpy
+#     # scNOVA archive
+#     - unzip
+#     # Fix
+RUN mkdir -p /conda-envs/1ede379ce8d378df7dca25b2bf4111f3
+COPY workflow/envs/scNOVA/scNOVA_DL.yaml /conda-envs/1ede379ce8d378df7dca25b2bf4111f3/environment.yaml
+
+# Conda environment:
+#   source: workflow/envs/scNOVA/scNOVA_R.yaml
+#   prefix: /conda-envs/193f60d48796dd17eb847ea689b863a9
+#   name: scNOVA
+#   channels:
+#     - bioconda
+#     - conda-forge
+#     - r
+#   dependencies:
+#     - bioconductor-deseq2=1.30.0
+#     - r-matrixstats=0.58.0
+#     - r-pheatmap=1.0.12
+#     - r-gplots=3.1.1
+#     - r-umap=0.2.7.0
+#     - r-rtsne=0.15
+#     - r-factoextra=1.0.7
+#     - r-pracma=2.3.3
+#     - bioconductor-chromvar=1.12.0
+#     - r-nabor=0.5.0
+#     - bioconductor-motifmatchr=1.12.0
+#     - bioconductor-bsgenome.hsapiens.ucsc.hg38=1.4.3
+#     - bioconductor-jaspar2016=1.18.0
+#     - r-codetools=0.2_18
+#     - r-fitdistrplus
+#     - r-doparallel
+#     - r-foreach
+RUN mkdir -p /conda-envs/193f60d48796dd17eb847ea689b863a9
+COPY workflow/envs/scNOVA/scNOVA_R.yaml /conda-envs/193f60d48796dd17eb847ea689b863a9/environment.yaml
+
+# Conda environment:
+#   source: workflow/envs/scNOVA/scNOVA_bioinfo_tools.yaml
+#   prefix: /conda-envs/ca9641251a8cb0057003875ad776c49f
+#   name: scNOVA_bioinfo_tools
+#   channels:
+#     - conda-forge
+#     - bioconda
+#     - anaconda
+#   dependencies:
+#     - samtools
+#     - biobambam
+#     - bedtools
+RUN mkdir -p /conda-envs/ca9641251a8cb0057003875ad776c49f
+COPY workflow/envs/scNOVA/scNOVA_bioinfo_tools.yaml /conda-envs/ca9641251a8cb0057003875ad776c49f/environment.yaml
+
+# Step 2: Generate conda environments
+
+RUN mamba env create --prefix /conda-envs/87c04f5d115eff742eca84455513deba --file /conda-envs/87c04f5d115eff742eca84455513deba/environment.yaml && \
+    mamba env create --prefix /conda-envs/9b847fc31baae8e01dfb7ce438a56b71 --file /conda-envs/9b847fc31baae8e01dfb7ce438a56b71/environment.yaml && \
+    mamba env create --prefix /conda-envs/5681728a49bd83ceed09ba194330c858 --file /conda-envs/5681728a49bd83ceed09ba194330c858/environment.yaml && \
+    mamba env create --prefix /conda-envs/08d4368302a4bdf7eda6b536495efe7d --file /conda-envs/08d4368302a4bdf7eda6b536495efe7d/environment.yaml && \
+    mamba env create --prefix /conda-envs/c80307395eddf442c2fb6870f40d822b --file /conda-envs/c80307395eddf442c2fb6870f40d822b/environment.yaml && \
+    mamba env create --prefix /conda-envs/f251d84cdc9f25d0e14b48e780261d66 --file /conda-envs/f251d84cdc9f25d0e14b48e780261d66/environment.yaml && \
+    mamba env create --prefix /conda-envs/598c87b6c764d05e0c66953cc67f2931 --file /conda-envs/598c87b6c764d05e0c66953cc67f2931/environment.yaml && \
+    mamba env create --prefix /conda-envs/1ede379ce8d378df7dca25b2bf4111f3 --file /conda-envs/1ede379ce8d378df7dca25b2bf4111f3/environment.yaml && \
+    mamba env create --prefix /conda-envs/193f60d48796dd17eb847ea689b863a9 --file /conda-envs/193f60d48796dd17eb847ea689b863a9/environment.yaml && \
+    mamba env create --prefix /conda-envs/ca9641251a8cb0057003875ad776c49f --file /conda-envs/ca9641251a8cb0057003875ad776c49f/environment.yaml && \
+    mamba clean --all -y
+# CUSTOM PART
+RUN wget https://zenodo.org/record/7697400/files/BSgenome.T2T.CHM13.V2_1.0.0.tar.gz -P /workflow/data/ref_genomes/
+COPY /workflow/scripts/utils/install_R_package.R /conda-envs/
+RUN chmod -R 0777 /conda-envs/598c87b6c764d05e0c66953cc67f2931/lib/R/library && /conda-envs/598c87b6c764d05e0c66953cc67f2931/bin/Rscript /conda-envs/install_R_package.R /workflow/data/ref_genomes/BSgenome.T2T.CHM13.V2_1.0.0.tar.gz
diff --git a/github-actions-runner/add_T2T_part_to_Dockerfile.sh b/github-actions-runner/add_T2T_part_to_Dockerfile.sh
new file mode 100644
index 00000000..7c631edd
--- /dev/null
+++ b/github-actions-runner/add_T2T_part_to_Dockerfile.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Check if a Dockerfile path is provided
+if [ "$#" -ne 1 ]; then
+    echo "Usage: $0 <path-to-Dockerfile>"
+    exit 1
+fi
+
+DOCKERFILE=$1
+
+# Check if the Dockerfile exists
+if [ ! -f "$DOCKERFILE" ]; then
+    echo "Dockerfile not found: $DOCKERFILE"
+    exit 1
+fi
+
+# Extract the R environment variable
+Renv=$(grep -P "\/rtools.*environment\.yaml" "$DOCKERFILE" | sed "s/\//\t/g" | cut -f 5)
+
+# Check if Renv is extracted
+if [ -z "$Renv" ]; then
+    echo "R environment variable not found in the Dockerfile."
+    exit 1
+fi
+
+# Append custom steps to the Dockerfile
+{
+    echo '\n'
+    echo "# CUSTOM PART"
+    echo "RUN wget https://zenodo.org/record/7697400/files/BSgenome.T2T.CHM13.V2_1.0.0.tar.gz -P /workflow/data/ref_genomes/"
+    echo "COPY /workflow/scripts/utils/install_R_package.R /conda-envs/"
+    echo "RUN chmod -R 0777 /conda-envs/$Renv/lib/R/library && /conda-envs/$Renv/bin/Rscript /conda-envs/install_R_package.R /workflow/data/ref_genomes/BSgenome.T2T.CHM13.V2_1.0.0.tar.gz"
+} >>"$DOCKERFILE"
+
+echo "Custom steps added to $DOCKERFILE"
diff --git a/watchdog_pipeline/watchdog_pipeline.py b/watchdog_pipeline/watchdog_pipeline.py
index c0f259f9..4c1a6614 100644
--- a/watchdog_pipeline/watchdog_pipeline.py
+++ b/watchdog_pipeline/watchdog_pipeline.py
@@ -46,7 +46,8 @@
 ]
 profile_dry_run = [
     "--profile",
-    "workflow/snakemake_profiles/local/conda_singularity/",
+    "workflow/snakemake_profiles/local/conda/",
+    # "workflow/snakemake_profiles/local/conda_singularity/",
     "-c",
     "1",
 ]
@@ -272,7 +273,7 @@ def check_unprocessed_folder(self):
         # last_message_timestamp = last_message_timestamp
 
         main_df = list()
-        if workflows_data:
+        if len(workflows_data) > 0:
             for plate in total_list_runs:
                 # print(plate)
                 if plate.split("-")[0][:2] == "20":
@@ -383,6 +384,7 @@ def check_unprocessed_folder(self):
             pd.options.display.max_rows = 999
             pd.options.display.max_colwidth = 30
             # pd.options.display.max_columns = 50
+
             main_df = pd.DataFrame(main_df)
             # main_df.loc[(main_df["labels"] == True) &  (main_df["report"] == True), "real_status"] = "Completed"
             main_df.loc[
@@ -418,7 +420,7 @@ def check_unprocessed_folder(self):
             main_df["real_status"] = main_df["real_status"].fillna(
                 "Error (to  investigate))"
             )
-
+            print(workflows_data["workflows"])
             print(main_df)
 
             dry_run_db = False
@@ -454,6 +456,9 @@ def check_unprocessed_folder(self):
                         e for e in workflows_data["workflows"] if e["id"] == workflow_id
                     ]
 
+                    print(panoptes_entry)
+                    print(panoptes_data)
+
                     if panoptes_data:
                         panoptes_data = panoptes_data[0]
                         if "completed_at" not in panoptes_data:
@@ -530,7 +535,7 @@ def check_unprocessed_folder(self):
                 for row in main_df.loc[
                     # (main_df["multiqc_scratch"] == False)
                     (main_df["multiqc_scratch"] == False)
-                    & (main_df["report"] == False)
+                    # & (main_df["report"] == False)
                 ].to_dict("records"):
                     logging.info(row)
 
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 5acf31fe..4262bd44 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -19,19 +19,29 @@ if config["ashleys_pipeline"] is True:
 
     module ashleys_qc:
         snakefile:
-            github(
-                "friendsofstrandseq/ashleys-qc-pipeline",
-                path="workflow/Snakefile",
-                tag=str(config["ashleys_pipeline_version"]),
-            )
+            "../../ashleys-qc-pipeline/workflow/Snakefile"
+            # github(
+            #     "friendsofstrandseq/ashleys-qc-pipeline",
+            #     path="workflow/Snakefile",
+            #     tag=str(config["ashleys_pipeline_version"]),
+            # )
         config:
             config
 
     use rule * from ashleys_qc as ashleys_*
 
-    localrules:
-        ashleys_genecore_symlink,
-        symlink_selected_bam,
+    if config["ashleys_pipeline_only"] is True:
+
+        localrules:
+            ashleys_genecore_symlink,
+            ashleys_symlink_selected_bam,
+
+    else:
+
+        localrules:
+            ashleys_genecore_symlink,
+            ashleys_symlink_selected_bam,
+            symlink_selected_bam,
 
 else:
 
diff --git a/workflow/envs/scNOVA/scNOVA_DL.yaml b/workflow/envs/scNOVA/scNOVA_DL.yaml
index 8530fdf8..775c36d8 100644
--- a/workflow/envs/scNOVA/scNOVA_DL.yaml
+++ b/workflow/envs/scNOVA/scNOVA_DL.yaml
@@ -12,3 +12,4 @@ dependencies:
   - numpy
   # scNOVA archive
   - unzip
+  # Fix
diff --git a/workflow/rules/aggregate_fct.smk b/workflow/rules/aggregate_fct.smk
index 278d45b9..5de9c6e1 100644
--- a/workflow/rules/aggregate_fct.smk
+++ b/workflow/rules/aggregate_fct.smk
@@ -169,7 +169,7 @@ def aggregate_cells_scTRIP_multiplot(wildcards):
     cell_list = df.cell.tolist()
 
     return expand(
-        "{folder}/{sample}/plots/scTRIP_multiplot/{cell}/{chrom}.png",
+        "{folder}/{sample}/plots/scTRIP_multiplot/{cell}/{chrom}.pdf",
         folder=config["data_location"],
         sample=wildcards.sample,
         cell=cell_list,
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index 4c42e1b8..7af4b6b4 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -11,6 +11,12 @@ import os, sys
 os.environ["LC_CTYPE"] = "C"
 
 
+# print(config["data_location"])
+
+if config["ashleys_pipeline"] is True and config["genecore"] is True:
+    config["data_location"] = "/".join(config["data_location"].split("/")[:-1])
+
+
 envvars:
     "LC_CTYPE",
 
@@ -131,6 +137,9 @@ class HandleInput:
         genecore=False,
         genecore_path=str,
     ):
+        # print(input_path)
+        # print(genecore_path)
+        # print("\n")
         if genecore is False:
             df_config_files = self.handle_input_data(thisdir=input_path, bam=bam)
         elif genecore is True:
@@ -154,56 +163,69 @@ class HandleInput:
         Returns:
             _type_: _description_
         """
-        complete_df_list = list()
+        from pprint import pprint
+        from collections import Counter
 
-        # List of folders/files to not consider (restrict to samples only)
-        l = sorted(
-            [
-                e
-                for e in os.listdir(
-                    "{genecore_prefix}/{date_folder}".format(
-                        genecore_prefix=config["genecore_prefix"],
-                        date_folder=config["genecore_date_folder"],
-                    )
-                )
-                if e.endswith(".txt.gz")
-            ]
+        directory_path = f"{config['genecore_prefix']}/{config['genecore_date_folder']}"
+
+        l = sorted([e for e in os.listdir(directory_path) if e.endswith(".txt.gz")])
+
+        complete_df_list = list()
+        # print(thisdir)
+        genecore_prefix = config["genecore_prefix"]
+        date_folder = config["genecore_date_folder"]
+        # print(f"{genecore_prefix}/{date_folder}")
+
+        # Pattern to extract sample name and index
+        pattern = re.compile(r"(.*_lane1)(.*?)(iTRU|PE20)(.*?)(\d{2})(?:_1_|_2_)")
+
+        samples = list()
+        prefixes = list()
+        indexes = list()
+        plate_types = list()
+        d_master = collections.defaultdict(
+            lambda: {
+                "indexes": set(),
+                "file_prefix": "",
+                "plate_type": "",
+                "index_pattern": "",
+            }
         )
-        # print(l)
-        # Create a list of  files to process for each sample
-        d_master = collections.defaultdict(dict)
-        sub_l = list()
-        for j, e in enumerate(l):
-            sub_l.append(e)
-            if (j + 1) % 192 == 0:
-                common_element = findstem(sub_l)
-                l_elems = common_element.split("lane1")
-                # print(sub_l)
-                # print(common_element)
-                # print(l_elems)
-                # print(l_elems[1].split("{regex_element}".format(regex_element=config["genecore_regex_element"]))
-                prefix = l_elems[0]
-                # technician_name = l_elems[0].split("_")[-2]
-                sample = l_elems[1].split(
-                    "{regex_element}".format(
-                        regex_element=config["genecore_regex_element"]
-                    )
-                )[0]
-                index = l_elems[1].split(
-                    "{regex_element}".format(
-                        regex_element=config["genecore_regex_element"]
+
+        # First pass: Count occurrences of each sample_name
+        file_counts_per_sample = Counter()
+        for file_path in l:
+            match = pattern.search(file_path)
+            if match:
+                sample_name = match.group(2)
+                file_counts_per_sample[sample_name] += 1
+
+        # Second pass: Process files and determine plate type per sample
+        for j, file_path in enumerate(sorted(l)):
+            match = pattern.search(file_path)
+            if match:
+                sample_name = match.group(2)
+                index = match.group(4)
+                indexes.append(index)
+                d_master[sample_name]["indexes"].add(index)
+                file_count = file_counts_per_sample[sample_name]
+
+                # Determine plate type using modulo 96 operation
+                if file_count % 96 != 0:
+                    raise ValueError(
+                        f"Invalid file count for sample {sample_name} with file count {file_count}. Must be a multiple of 96."
                     )
-                )[1]
-                # pe_index = common_element[-1]
-                sub_l = list()
-
-                d_master[sample]["prefix"] = prefix
-                # d_master[sample]["technician_name"] = technician_name
-                d_master[sample]["index"] = index
-                d_master[sample]["common_element"] = common_element
-        # from pprint import pprint
-        # pprint(d_master)
-        # exit()
+                plate_type = int(file_count / 2)
+
+                if (j + 1) % file_count == 0:
+                    prefixes.append(match.group(3))
+                    d_master[sample_name]["file_prefix"] = match.group(1)
+                    d_master[sample_name]["index_pattern"] = match.group(3)
+                    plate = directory_path.split("/")[-1]
+                    samples.append(sample_name)
+                    plate_types.append(plate_type)
+                    d_master[sample_name]["plate_type"] = plate_type
+
         samples_to_process = (
             config["samples_to_process"]
             if len(config["samples_to_process"]) > 0
@@ -220,8 +242,8 @@ class HandleInput:
                 "{data_location}/{sample}/fastq/{sample}{regex_element}{index}{cell_nb}.{pair}.fastq.gz",
                 data_location=config["data_location"],
                 sample=sample,
-                regex_element=config["genecore_regex_element"],
-                index=d_master[sample]["index"],
+                regex_element=d_master[sample]["index_pattern"],
+                index=d_master[sample]["indexes"],
                 cell_nb=[str(e).zfill(2) for e in list(range(1, 97))],
                 pair=["1", "2"],
             )
@@ -229,7 +251,8 @@ class HandleInput:
             if sample in samples_to_process
         ]
         genecore_list = [sub_e for e in genecore_list for sub_e in e]
-        # pprint(genecore_list)
+        # pprint(d_master)
+
         complete_df_list = list()
 
         for sample in d_master:
@@ -248,11 +271,12 @@ class HandleInput:
                 df["Full_path"] = df[["Folder", "File"]].apply(
                     lambda r: f"{r['Folder']}/{r['File']}.fastq.gz", axis=1
                 )
+
                 df["Genecore_path"] = df["File"].apply(
-                    lambda r: f"{config['genecore_prefix']}/{config['genecore_date_folder']}/{d_master[sample]['prefix']}lane1{r.replace('.', '_')}_sequence.txt.gz"
+                    lambda r: f"{config['genecore_prefix']}/{config['genecore_date_folder']}/{d_master[sample]['file_prefix']}{r.replace('.', '_')}_sequence.txt.gz"
                 )
                 df["Genecore_file"] = df["File"].apply(
-                    lambda r: f"{d_master[sample]['prefix']}lane1{r.replace('.', '_')}"
+                    lambda r: f"{d_master[sample]['file_prefix']}{r.replace('.', '_')}"
                 )
                 df["Genecore_file"] = df["Genecore_file"].apply(
                     lambda r: "_".join(r.split("_")[:-1])
@@ -375,12 +399,18 @@ def findstem(arr):
 
 # Create configuration file with samples
 
+# print("config['data_location']")
+# print(config["data_location"])
+
 c = HandleInput(
     input_path=config["data_location"],
-    genecore_path="{genecore_prefix}/{genecore_date_folder}".format(
+    genecore_path="{genecore_prefix}".format(
         genecore_prefix=config["genecore_prefix"],
-        genecore_date_folder=config["genecore_date_folder"],
     ),
+    # genecore_path="{genecore_prefix}/{genecore_date_folder}".format(
+    #     genecore_prefix=config["genecore_prefix"],
+    #     genecore_date_folder=config["genecore_date_folder"],
+    # ),
     output_path="{data_location}/config/config_df.tsv".format(
         data_location=config["data_location"]
     ),
@@ -532,8 +562,12 @@ def onsuccess_fct(log):
         log, "SUCCESS", config, config_metadata
     )
     shell(
-        'mail -s "[Snakemake] smk-wf-catalog/mosacaitcher-pipeline v{} - Run on {} - SUCCESS" {} < {}'.format(
-            config["version"], config["data_location"], config["email"], log_path_new
+        'mail -s "[smk-wf-catalog/mosaicatcher-pipeline] v{} - [{}--{}] - SUCCESS" {} < {}'.format(
+            config["version"],
+            config["data_location"].split("/")[-1],
+            ";".join(samples),
+            config["email"],
+            log_path_new,
         )
     )
 
@@ -546,8 +580,12 @@ def onerror_fct(log):
         log, "ERROR", config, config_metadata
     )
     shell(
-        'mail -s "[Snakemake] smk-wf-catalog/mosacaitcher-pipeline v{} - Run on {} - ERRROR" {} < {}'.format(
-            config["version"], config["data_location"], config["email"], log_path_new
+        'mail -s "[smk-wf-catalog/mosaicatcher-pipeline] v{} - [{}--{}] - ERROR" {} < {}'.format(
+            config["version"],
+            config["data_location"].split("/")[-1],
+            ";".join(samples),
+            config["email"],
+            log_path_new,
         )
     )
 
@@ -564,308 +602,26 @@ def get_scnova_final_output(wildcards):
     # abbreviate_names = False
 
     l = [
-        # expand(
-        #     "{folder}/{sample}/scNOVA_input_user/{clone}_sv_calls_all_print.txt",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        #     clone=clones[wildcards.sample],
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/Features_reshape_{clone}_orientation_CN_correct0.txt",
-        #     clone=clones[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_input_user/sv_calls_all_print_CREs.txt",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_bam_modified/{cell}.sc_pre_mono_sort_for_mark_uniq.bam",
-        #     cell=cell_per_sample[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/{sample}.tab",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/{sample}_sort.txt",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/{sample}_sort_geneid.txt",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/Deeptool_Genes_for_CNN_{sample}.tab",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/Deeptool_Genes_for_CNN_{sample}_sc.tab",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/Deeptool_chr_length_{sample}.tab",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/Deeptool_chr_length_{sample}_sc.tab",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/Deeptool_Genes_for_CNN_{sample}_sort.txt",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/Deeptool_Genes_for_CNN_{sample}_sort_lab.txt",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/Deeptool_Genes_for_CNN_{sample}_sort_lab_final.txt",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/Features_reshape_{sample}_{clone}_orientation_norm_qc.pdf",
-        #     clone=clones[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/Features_reshape_{clone}_orientation_norm.txt",
-        #     clone=clones[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/Deeptool_Genes_for_CNN_{sample}_sc_sort.txt",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/Deeptool_Genes_for_CNN_{sample}_sc_sort_lab.txt",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/Deeptool_Genes_for_CNN_{sample}_sc_sort_lab_final.txt",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/Features_reshape_{sample}_{clone}_Resid_orientation_qc.pdf",
-        #     clone=clones[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/Features_reshape_{clone}_Resid_orientation.txt",
-        #     clone=clones[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/Features_reshape_all_orientation_norm_var_GC_CpG_RT_T_comb3_{clone}.txt",
-        #     clone=clones[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/Expression_all_{clone}.txt",
-        #     clone=clones[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/Features_reshape_all_TSS_matrix_woM_all_RT_{clone}.txt",
-        #     clone=clones[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result_CNN/DNN_train80_output_ypred_{clone}.csv",
-        #     clone=clones[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result_CNN/DNN_train40_output_ypred_{clone}.csv",
-        #     clone=clones[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result_CNN/DNN_train20_output_ypred_{clone}.csv",
-        #     clone=clones[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result_CNN/DNN_train5_output_ypred_{clone}.csv",
-        #     clone=clones[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result_CNN/DNN_train80_output_ypred_{clone}_annot.txt",
-        #     clone=clones[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result_CNN/DNN_train40_output_ypred_{clone}_annot.txt",
-        #     clone=clones[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result_CNN/DNN_train20_output_ypred_{clone}_annot.txt",
-        #     clone=clones[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result_CNN/DNN_train5_output_ypred_{clone}_annot.txt",
-        #     clone=clones[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result_plots/Result_scNOVA_plots_{sample}.pdf",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/result_PLSDA_{sample}.txt",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
         expand(
             "{folder}/{sample}/scNOVA_result_plots/Result_scNOVA_plots_{sample}_alternative_PLSDA.pdf",
             folder=config["data_location"],
             sample=wildcards.sample,
         ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/{sample}_CREs_2kb.tab",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/{sample}_CREs_2kb_sort.txt",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result/{sample}_CREs_2kb_sort_num.txt",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
         expand(
             "{folder}/{sample}/scNOVA_result/{sample}_CREs_2kb_sort_num_sort_for_chromVAR.txt",
             folder=config["data_location"],
             sample=wildcards.sample,
         ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_bam_modified/{cell}.sc_pre_mono_sort_for_mark_uniq.bam.W1.bam",
-        #     cell=cell_per_sample[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_bam_modified/{cell}.sc_pre_mono_sort_for_mark_uniq.bam.W2.bam",
-        #     cell=cell_per_sample[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_bam_modified/{cell}.sc_pre_mono_sort_for_mark_uniq.bam.C1.bam",
-        #     cell=cell_per_sample[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_bam_modified/{cell}.sc_pre_mono_sort_for_mark_uniq.bam.C2.bam",
-        #     cell=cell_per_sample[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_bam_modified/{cell}.sc_pre_mono_sort_for_mark_uniq.bam.W.bam",
-        #     cell=cell_per_sample[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_bam_modified/{cell}.sc_pre_mono_sort_for_mark_uniq.bam.C.bam",
-        #     cell=cell_per_sample[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_bam_modified/{cell}.sc_pre_mono_sort_for_mark_uniq.bam.W.bam.bai",
-        #     cell=cell_per_sample[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_bam_modified/{cell}.sc_pre_mono_sort_for_mark_uniq.bam.C.bam.bai",
-        #     cell=cell_per_sample[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_nucleosomes_bam/nucleosome_sampleA/result.H1.bam",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_nucleosomes_bam/nucleosome_sampleB/result.H2.bam",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_input_user/strandphaser_output_copy.txt",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result_haplo/Deeptool_DHS_2kb_H1H2.tab",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
         expand(
             "{folder}/{sample}/scNOVA_result_haplo/Deeptool_DHS_2kb_H1H2_sort.txt",
             folder=config["data_location"],
             sample=wildcards.sample,
         ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_result_haplo/Deeptool_Genebody_H1H2.tab",
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
         expand(
             "{folder}/{sample}/scNOVA_result_haplo/Deeptool_Genebody_H1H2_sort.txt",
             folder=config["data_location"],
             sample=wildcards.sample,
         ),
-        # expand(
-        #     "{folder}/{sample}/scNOVA_bam_merge/{clone}.merge.bam",
-        #     clone=clones[wildcards.sample],
-        #     folder=config["data_location"],
-        #     sample=wildcards.sample,
-        # ),
     ]
     l = [sub_e for e in l for sub_e in e]
     return l
@@ -1096,16 +852,26 @@ def get_all_plots(wildcards):
             ),
         )
 
-        # Run summary section
+        # Config section
 
         l_outputs.extend(
             expand(
-                "{folder}/{sample}/config/run_summary.txt",
+                "{folder}/{sample}/config/config.yaml",
                 folder=config["data_location"],
                 sample=wildcards.sample,
             ),
         )
 
+        # Run summary section
+
+        # l_outputs.extend(
+        #     expand(
+        #         "{folder}/{sample}/config/run_summary.txt",
+        #         folder=config["data_location"],
+        #         sample=wildcards.sample,
+        #     ),
+        # )
+
     # from pprint import pprint
     # pprint(l_outputs)
     return l_outputs
diff --git a/workflow/rules/count.smk b/workflow/rules/count.smk
index 080d64b7..f1a0e74e 100755
--- a/workflow/rules/count.smk
+++ b/workflow/rules/count.smk
@@ -136,6 +136,7 @@ rule symlink_selected_bam:
 
 rule remove_unselected_bam:
     input:
+        labels="{folder}/{sample}/cell_selection/labels.tsv",
         bam=unselected_input_bam,
         bai=unselected_input_bai,
     output:
@@ -196,7 +197,7 @@ if (
             "../envs/mc_base.yaml"
         shell:
             """
-            workflow/scripts/normalization/merge-blacklist.py --merge_distance 500000 {input.norm} --whitelist {input.whitelist} --min_whitelist_interval_size {params.window} > {output.merged} 2>> {log}
+            workflow/scripts/normalization/merge-blacklist.py --merge_distance 500000 {input.norm} --whitelist {input.whitelist} --min_whitelist_interval_size {params.window} --output {output.merged}
             """
 
 else:
diff --git a/workflow/rules/plots.smk b/workflow/rules/plots.smk
index 1acc1e55..221c8610 100644
--- a/workflow/rules/plots.smk
+++ b/workflow/rules/plots.smk
@@ -17,7 +17,7 @@ if config["ashleys_pipeline"] is False:
             # "{folder}/{sample}/plots/counts/CountComplete.raw.pdf",
             report(
                 "{folder}/{sample}/plots/counts/CountComplete.raw.pdf",
-                category="Mosaic Counts",
+                category="Mosaic counts",
                 subcategory="{sample}",
                 labels={"Cell": "ALL", "Type": "raw"},
             ),
@@ -40,7 +40,7 @@ rule divide_pdf:
         report(
             "{folder}/{sample}/plots/counts_raw/{cell}.{i, \d+}.pdf",
             caption="../report/mosaic_counts.rst",
-            category="Mosaic counts",
+            category="Mosaic counts cellwise",
             subcategory="{sample}",
             labels={"Cell": "{cell}", "Nb": "{i}", "Type": "raw"},
         ),
@@ -306,7 +306,7 @@ rule scTRIP_multiplot:
         sv_counts="{folder}/{sample}/mosaiclassifier/sv_calls/stringent_filterTRUE.tsv",
     output:
         figure=report(
-            "{folder}/{sample}/plots/scTRIP_multiplot/{cell}/{chrom}.png",
+            "{folder}/{sample}/plots/scTRIP_multiplot/{cell}/{chrom}.pdf",
             category="scTRIP multiplot",
             subcategory="{sample}",
             labels={"Cell": "{cell}", "Chrom": "{chrom}"},
@@ -315,6 +315,7 @@ rule scTRIP_multiplot:
         "{folder}/log/scTRIP_multiplot/{sample}/{cell}/{chrom}.log",
     conda:
         "../envs/rtools.yaml"
+    container: None
     resources:
         mem_mb=get_mem_mb,
     shell:
diff --git a/workflow/rules/regenotyping.smk b/workflow/rules/regenotyping.smk
index ebb451df..2bfae7b0 100644
--- a/workflow/rules/regenotyping.smk
+++ b/workflow/rules/regenotyping.smk
@@ -6,6 +6,7 @@ rule mergeBams:
         check=remove_unselected_fct,
         bam=selected_input_bam,
         bai=selected_input_bai,
+        labels="{folder}/{sample}/cell_selection/labels.tsv",
     output:
         temp("{folder}/{sample}/merged_bam/merged.raw.bam"),
     log:
diff --git a/workflow/rules/scNOVA.smk b/workflow/rules/scNOVA.smk
index 04c108d2..9f6c7c5b 100755
--- a/workflow/rules/scNOVA.smk
+++ b/workflow/rules/scNOVA.smk
@@ -1,8 +1,24 @@
+rule assert_list_of_cells:
+    input:
+        labels="{folder}/{sample}/cell_selection/labels.tsv",
+        subclone_list="{folder}/{sample}/scNOVA_input_user/input_subclonality.txt",
+        selected_cells="{folder}/{sample}/selected/",
+    output:
+        "{folder}/{sample}/scNOVA_input_user/assert_list_of_cells.txt",
+    log:
+        "{folder}/{sample}/log/assert_list_of_cells.log",
+    conda:
+        "../envs/mc_base.yaml"
+    script:
+        "../scripts/scNOVA_scripts/assert_list_of_cells.py"
+
+
 rule filter_sv_calls:
     log:
         "{folder}/{sample}/log/filter_sv_calls/{sample}.log",
     input:
         "{folder}/{sample}/mosaiclassifier/sv_calls/stringent_filterTRUE.tsv",
+        "{folder}/{sample}/scNOVA_input_user/assert_list_of_cells.txt",
     output:
         "{folder}/{sample}/scNOVA_input_user/sv_calls.tsv",
     conda:
@@ -147,6 +163,7 @@ rule remove_dup:
         None
     input:
         bam="{folder}/{sample}/scNOVA_bam_modified/{cell}.sc_pre_mono_sort_for_mark.bam",
+        assert_list_of_cells="{folder}/{sample}/scNOVA_input_user/assert_list_of_cells.txt",
     output:
         bam_uniq="{folder}/{sample}/scNOVA_bam_modified/{cell}.sc_pre_mono_sort_for_mark_uniq.bam",
         bam_metrix="{folder}/{sample}/scNOVA_bam_modified/{cell}.sc_pre_mono.metrix_dup.txt",
@@ -272,6 +289,7 @@ rule filter_input_subclonality:
         None
     input:
         "{folder}/{sample}/scNOVA_input_user/input_subclonality.txt",
+        "{folder}/{sample}/scNOVA_input_user/assert_list_of_cells.txt",
     output:
         "{folder}/{sample}/scNOVA_input_user/input_subclonality_{clone}.txt",
     conda:
@@ -973,6 +991,7 @@ rule split_bam_WC:
         None
     input:
         "{folder}/{sample}/scNOVA_bam_modified/{cell}.sc_pre_mono_sort_for_mark_uniq.bam",
+        "{folder}/{sample}/scNOVA_input_user/assert_list_of_cells.txt",
     output:
         bam_header="{folder}/{sample}/scNOVA_bam_modified/{cell}.header_WC.sam",
         bam_C1="{folder}/{sample}/scNOVA_bam_modified/{cell}.sc_pre_mono_sort_for_mark_uniq.bam.C1.bam",
diff --git a/workflow/rules/utils.smk b/workflow/rules/utils.smk
index 39353b67..4eaf8464 100644
--- a/workflow/rules/utils.smk
+++ b/workflow/rules/utils.smk
@@ -139,3 +139,18 @@ rule samtools_faindex:
         mem_mb=get_mem_mb_heavy,
     shell:
         "samtools faidx {input}"
+
+
+rule save_config:
+    input:
+        "config/config.yaml",
+    output:
+        "{folder}/{sample}/config/config.yaml",
+    log:
+        "{folder}/log/save_config/{sample}.log",
+    conda:
+        "../envs/mc_base.yaml"
+    resources:
+        mem_mb=get_mem_mb,
+    script:
+        "../scripts/utils/dump_config.py"
diff --git a/workflow/scripts/normalization/merge-blacklist.py b/workflow/scripts/normalization/merge-blacklist.py
index 9a484eec..750d3966 100755
--- a/workflow/scripts/normalization/merge-blacklist.py
+++ b/workflow/scripts/normalization/merge-blacklist.py
@@ -16,6 +16,7 @@ def main():
         type=int,
         help="If the distance between two blacklisted intervals is below this threshold, they are merged.",
     )
+    parser.add_argument("--output", default=None, help="Output file name")
     parser.add_argument(
         "--whitelist", default=None, help="TSV file with intervals to be removed from the blacklist (columns: chrom, start, end)."
     )
@@ -71,7 +72,7 @@ def main():
 
     print("White listing: Removed", additional_whitelist, "bp of sequence for blacklist", file=sys.stderr)
 
-    norm_table.to_csv(sys.stdout, index=False, sep="\t")
+    norm_table.to_csv(args.output, index=False, sep="\t")
 
     ## Identify "complex" intervals
     # segments = calls.groupby(by=['chrom','start','end']).sv_call_name.agg({'is_complex':partial(is_complex, ignore_haplotypes=args.ignore_haplotypes, min_cell_count=args.min_cell_count)}).reset_index().sort_values(['chrom','start','end'])
diff --git a/workflow/scripts/scNOVA_scripts/assert_list_of_cells.py b/workflow/scripts/scNOVA_scripts/assert_list_of_cells.py
new file mode 100644
index 00000000..651fb7c6
--- /dev/null
+++ b/workflow/scripts/scNOVA_scripts/assert_list_of_cells.py
@@ -0,0 +1,57 @@
+import pandas as pd
+import os
+
+
+def main(labels_file, subclone_file, selected_folder, output_file):
+    # Read labels.tsv
+    labels_df = pd.read_csv(labels_file, sep="\t")
+    labels_cells = set(
+        labels_df["cell"].str.replace(".sort.mdup.bam", "").values.tolist()
+    )
+
+    # Read input_subclonality.txt
+    input_subclonality = pd.read_csv(subclone_file, sep="\t")
+    subclone_cells = set(input_subclonality["Filename"].values.tolist())
+
+    # List files in selected/ folder and process filenames
+    selected_cells = set(
+        file.replace(".sort.mdup.bam", "")
+        for file in os.listdir(selected_folder)
+        if file.endswith(".sort.mdup.bam")
+    )
+
+    # Compare sets
+    if labels_cells == subclone_cells == selected_cells:
+        result = "PASS: All cell lists match."
+    else:
+        result = "FAIL: Cell lists do not match."
+
+    # Logging details of the mismatch
+    with open(output_file, "w") as output:
+        output.write("Labels cells: {}\n".format(labels_cells))
+        output.write("Subclone cells: {}\n".format(subclone_cells))
+        output.write("Selected cells: {}\n".format(selected_cells))
+        output.write("Discrepancy details:\n")
+        output.write(
+            "In labels but not in subclone: {}\n".format(labels_cells - subclone_cells)
+        )
+        output.write(
+            "In subclone but not in labels: {}\n".format(subclone_cells - labels_cells)
+        )
+        output.write(
+            "In labels but not in selected: {}\n".format(labels_cells - selected_cells)
+        )
+        output.write(
+            "In selected but not in labels: {}\n".format(selected_cells - labels_cells)
+        )
+        output.write(result)
+
+
+if __name__ == "__main__":
+    # Extracting Snakemake input variables
+    labels_file = snakemake.input.labels
+    subclone_file = snakemake.input.subclone_list
+    selected_folder = snakemake.input.selected_cells
+    output_file = snakemake.output[0]
+
+    main(labels_file, subclone_file, selected_folder, output_file)
diff --git a/workflow/scripts/utils/dump_config.py b/workflow/scripts/utils/dump_config.py
index 4701706a..6b299ee6 100644
--- a/workflow/scripts/utils/dump_config.py
+++ b/workflow/scripts/utils/dump_config.py
@@ -1,28 +1,22 @@
-import json
-import time
+import yaml
 
-timestamp = time.strftime("%Y%m%d-%H%M%S")
 
-configured_samples = []
-for key in config.keys():
-    if not key.startswith("sample_description"):
-        continue
-    sample = key.split("_", 2)[-1]
-    configured_samples.append(sample)
+def update_config(input_file, output_file):
+    # Load the existing config file
+    with open(input_file, "r") as file:
+        flat_file_config = yaml.safe_load(file)
 
-if configured_samples:
-    second_dump = "config_{}_{}.json".format(timestamp, "_".join(sorted(configured_samples)))
-else:
-    second_dump = "config_{}.json".format(timestamp)
+    # Update the config with Snakemake parameters
+    for key, value in snakemake.config.items():
+        flat_file_config[key] = value
 
-with open(output[0], "w") as fake:
-    _ = fake.write(second_dump + "\n(Full configuration dump)")
+    # Save the updated config to the output file
+    with open(output_file, "w") as file:
+        yaml.dump(flat_file_config, file)
 
-with open(second_dump, "w") as dump:
-    json.dump(
-        config,
-        dump,
-        ensure_ascii=True,
-        indent=2,
-        sort_keys=True,
-    )
+
+if __name__ == "__main__":
+    input_config = snakemake.input[0]
+    output_config = snakemake.output[0]
+
+    update_config(input_config, output_config)