From 8b57dac91884ee0cae74c95f44177c9028db25c0 Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Mon, 4 Dec 2023 09:16:17 +0000
Subject: [PATCH 01/12] Final results file to check if all outputs were
 produced, dump config update, plot_plate_dev for 384 well plate
 correspondance

---
 .tests/config/simple_config.yaml              | 157 +++----
 config/config.yaml                            |   5 +-
 workflow/Snakefile                            |   2 +-
 .../384_1A3C5E7G_correspondance_table.tsv     | 385 ++++++++++++++++++
 workflow/rules/common.smk                     | 165 +++++---
 workflow/rules/multiqc.smk                    |   4 +-
 workflow/rules/rules.smk                      |  42 +-
 workflow/scripts/plotting/plot_plate.R        |   2 +
 workflow/scripts/plotting/plot_plate_dev.R    | 109 +++++
 workflow/scripts/utils/dump_config.py         |  22 +
 10 files changed, 754 insertions(+), 139 deletions(-)
 create mode 100644 workflow/data/plotting/384_1A3C5E7G_correspondance_table.tsv
 create mode 100644 workflow/scripts/plotting/plot_plate_dev.R
 create mode 100644 workflow/scripts/utils/dump_config.py

diff --git a/.tests/config/simple_config.yaml b/.tests/config/simple_config.yaml
index 831386e..ecd378f 100644
--- a/.tests/config/simple_config.yaml
+++ b/.tests/config/simple_config.yaml
@@ -1,28 +1,36 @@
-version: 2.2.2
-# Option to display all potential options - listed in config_metadata.yaml
-list_commands: False
-## Data location - MUST BE AN ABSOULTE PATH (due to snakemake-symlink issues) - PLEASE MODIFY IT
-# input_bam_location: ".tests/data_CHR17"
-data_location: ".tests/data_CHR17"
-# Reference genome used by BWA to map FASTQ files
-# reference: sandbox.zenodo.org/record/1074721/files/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna
-# Enable / Disable download of external files (1000G SNV & Fasta ref genome)
-dl_external_files: False
-# Enable / Disable multistep normalisation
-multistep_normalisation: False
-# Ashleys-qc binary classification threshold
-ashleys_threshold: 0.5
-# Enable / Disable FastQC analysis
-FastQC_analysis: False
-# To be informed of pipeline status
+# --------------------------------------------------------
+# Ashleys-QC pipeline Configuration
+# --------------------------------------------------------
+version: 2.2.3
+
+# Email for notifications about the pipeline's status
 email: ""
 
-############################################################################
-#                          ADVANCED PARAMETERS
-############################################################################
+# List of samples to process if multiple are specified
+samples_to_process: []
+
+# Plate size
+plate_size: 96
+
+# --------------------------------------------------------
+# Data location & I/O
+# --------------------------------------------------------
+
+# Absolute path to the data location (modify as needed)
+data_location: ".tests/data_CHR17"
+
+# Directory to publish important data (e.g., stats, plots, counts). Leave empty if not required.
+publishdir: ""
+
+# --------------------------------------------------------
+# Reference Data Configuration
+# --------------------------------------------------------
 
+# Reference genome used by BWA to map FASTQ files
 reference: "hg38"
 
+# Reference genome files' location
+
 references_data:
   "hg38":
     reference_fasta: ".tests/external_data/chr17.fa.gz"
@@ -31,70 +39,69 @@ references_data:
   "T2T":
     reference_fasta: "workflow/data/ref_genomes/T2T.fa"
 
-# Boolean parameters
-## Is the pipeline called used as a submodule in mosaicatcher-pipeline?
-mosaicatcher_pipeline: False
-## Enable/Disable hand selection through Jupyter Notebook
-hand_selection: False
+# List of chromosomes to process
+chromosomes:
+  - chr17
 
-# Window size used by mosaic binning algorithm
-window: 200000
+# Specify any chromosomes to exclude from processing
+chromosomes_to_exclude: []
 
-plottype_counts:
-  - "raw"
-  - "normalised"
+# --------------------------------------------------------
+# Quality Control Configuration
+# --------------------------------------------------------
 
-alfred_plots:
-  - "dist"
-  - "devi"
+# Threshold for Ashleys-qc binary classification
+ashleys_threshold: 0.5
 
-plate_orientation: landscape
+# Enable or disable FastQC analysis
+MultiQC: False
+
+# --------------------------------------------------------
+# Counts Configuration
+# --------------------------------------------------------
+
+# Enable or disable multistep normalization analysis
+multistep_normalisation: False
+
+# Advanced parameters for multi-step normalisation
+multistep_normalisation_options:
+  min_reads_bin: 5
+  n_subsample: 1000
+  min_reads_cell: 100000
+
+# Window size used by the mosaic binning algorithm
+window: 200000
+
+# Enable or disable hand selection through the Jupyter Notebook
+hand_selection: False
+
+# --------------------------------------------------------
+# GENECORE Configuration
+# --------------------------------------------------------
 
-# Chromosomes list to process
-chromosomes:
-  - chr1
-  - chr2
-  - chr3
-  - chr4
-  - chr5
-  - chr6
-  - chr7
-  - chr8
-  - chr9
-  - chr10
-  - chr11
-  - chr12
-  - chr13
-  - chr14
-  - chr15
-  - chr16
-  - chr17
-  - chr18
-  - chr19
-  - chr20
-  - chr21
-  - chr22
-  - chrX
-  - chrY
-
-# GENECORE
 genecore: False
-samples_to_process: []
 genecore_date_folder: ""
-genecore_prefix: "/g/korbel/shared/genecore"
+genecore_prefix: "/g/korbel/STOCKS/Data/Assay/sequencing/2023"
+genecore_regex_element: "PE20"
 
-##### DEV only
+# --------------------------------------------------------
+# Internal Parameters
+# --------------------------------------------------------
 
-# Overwrite ASHLEYS PREDICTIONS for GitHub & smoke dataset purpose
-use_light_data: True
+# Is the pipeline used as a submodule in mosaicatcher-pipeline?
+mosaicatcher_pipeline: False
 
-# If specified, will copy important data (stats, plots, counts file) to a second place
-publishdir: ""
+# Overwrite ASHLEYS PREDICTIONS for GitHub & smoke dataset purpose
+use_light_data: False
 
-# Multi-step normalisation advanced parameters
-multistep_normalisation_options:
-  min_reads_bin: 5
-  n_subsample: 1000
-  min_reads_cell: 1000
-# Others
+# For snakemake linting
 abs_path: "/"
+
+# Type of plots for counts
+plottype_counts:
+  - "raw"
+  - "normalised"
+
+# Option to display all potential commands (as listed in config_metadata.yaml)
+list_commands: False
+# --------------------------------------------------------
diff --git a/config/config.yaml b/config/config.yaml
index 666e437..1d435f5 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -1,7 +1,7 @@
 # --------------------------------------------------------
 # Ashleys-QC pipeline Configuration
 # --------------------------------------------------------
-version: 2.2.2
+version: 2.2.3
 
 # Email for notifications about the pipeline's status
 email: ""
@@ -9,6 +9,9 @@ email: ""
 # List of samples to process if multiple are specified
 samples_to_process: []
 
+# Plate size
+plate_size: 96
+
 # --------------------------------------------------------
 # Data location & I/O
 # --------------------------------------------------------
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 9798f54..4e4e206 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -29,7 +29,7 @@ if config["list_commands"] is False:
 
     rule all:
         input:
-            get_final_output(),
+            get_final_result(),
 
     if config["email"]:
 
diff --git a/workflow/data/plotting/384_1A3C5E7G_correspondance_table.tsv b/workflow/data/plotting/384_1A3C5E7G_correspondance_table.tsv
new file mode 100644
index 0000000..583f0ef
--- /dev/null
+++ b/workflow/data/plotting/384_1A3C5E7G_correspondance_table.tsv
@@ -0,0 +1,385 @@
+Well_position	index
+A1	iTRU1A01
+C1	iTRU1A02
+E1	iTRU1A03
+G1	iTRU1A04
+I1	iTRU1A05
+K1	iTRU1A06
+M1	iTRU1A07
+O1	iTRU1A08
+A3	iTRU1A09
+C3	iTRU1A10
+E3	iTRU1A11
+G3	iTRU1A12
+I3	iTRU1A13
+K3	iTRU1A14
+M3	iTRU1A15
+O3	iTRU1A16
+A5	iTRU1A17
+C5	iTRU1A18
+E5	iTRU1A19
+G5	iTRU1A20
+I5	iTRU1A21
+K5	iTRU1A22
+M5	iTRU1A23
+O5	iTRU1A24
+A7	iTRU1A25
+C7	iTRU1A26
+E7	iTRU1A27
+G7	iTRU1A28
+I7	iTRU1A29
+K7	iTRU1A30
+M7	iTRU1A31
+O7	iTRU1A32
+A9	iTRU1A33
+C9	iTRU1A34
+E9	iTRU1A35
+G9	iTRU1A36
+I9	iTRU1A37
+K9	iTRU1A38
+M9	iTRU1A39
+O9	iTRU1A40
+A11	iTRU1A41
+C11	iTRU1A42
+E11	iTRU1A43
+G11	iTRU1A44
+I11	iTRU1A45
+K11	iTRU1A46
+M11	iTRU1A47
+O11	iTRU1A48
+A13	iTRU1A49
+C13	iTRU1A50
+E13	iTRU1A51
+G13	iTRU1A52
+I13	iTRU1A53
+K13	iTRU1A54
+M13	iTRU1A55
+O13	iTRU1A56
+A15	iTRU1A57
+C15	iTRU1A58
+E15	iTRU1A59
+G15	iTRU1A60
+I15	iTRU1A61
+K15	iTRU1A62
+M15	iTRU1A63
+O15	iTRU1A64
+A17	iTRU1A65
+C17	iTRU1A66
+E17	iTRU1A67
+G17	iTRU1A68
+I17	iTRU1A69
+K17	iTRU1A70
+M17	iTRU1A71
+O17	iTRU1A72
+A19	iTRU1A73
+C19	iTRU1A74
+E19	iTRU1A75
+G19	iTRU1A76
+I19	iTRU1A77
+K19	iTRU1A78
+M19	iTRU1A79
+O19	iTRU1A80
+A21	iTRU1A81
+C21	iTRU1A82
+E21	iTRU1A83
+G21	iTRU1A84
+I21	iTRU1A85
+K21	iTRU1A86
+M21	iTRU1A87
+O21	iTRU1A88
+A23	iTRU1A89
+C23	iTRU1A90
+E23	iTRU1A91
+G23	iTRU1A92
+I23	iTRU1A93
+K23	iTRU1A94
+M23	iTRU1A95
+O23	iTRU1A96
+A2	iTRU3C01
+C2	iTRU3C02
+E2	iTRU3C03
+G2	iTRU3C04
+I2	iTRU3C05
+K2	iTRU3C06
+M2	iTRU3C07
+O2	iTRU3C08
+A4	iTRU3C09
+C4	iTRU3C10
+E4	iTRU3C11
+G4	iTRU3C12
+I4	iTRU3C13
+K4	iTRU3C14
+M4	iTRU3C15
+O4	iTRU3C16
+A6	iTRU3C17
+C6	iTRU3C18
+E6	iTRU3C19
+G6	iTRU3C20
+I6	iTRU3C21
+K6	iTRU3C22
+M6	iTRU3C23
+O6	iTRU3C24
+A8	iTRU3C25
+C8	iTRU3C26
+E8	iTRU3C27
+G8	iTRU3C28
+I8	iTRU3C29
+K8	iTRU3C30
+M8	iTRU3C31
+O8	iTRU3C32
+A10	iTRU3C33
+C10	iTRU3C34
+E10	iTRU3C35
+G10	iTRU3C36
+I10	iTRU3C37
+K10	iTRU3C38
+M10	iTRU3C39
+O10	iTRU3C40
+A12	iTRU3C41
+C12	iTRU3C42
+E12	iTRU3C43
+G12	iTRU3C44
+I12	iTRU3C45
+K12	iTRU3C46
+M12	iTRU3C47
+O12	iTRU3C48
+A14	iTRU3C49
+C14	iTRU3C50
+E14	iTRU3C51
+G14	iTRU3C52
+I14	iTRU3C53
+K14	iTRU3C54
+M14	iTRU3C55
+O14	iTRU3C56
+A16	iTRU3C57
+C16	iTRU3C58
+E16	iTRU3C59
+G16	iTRU3C60
+I16	iTRU3C61
+K16	iTRU3C62
+M16	iTRU3C63
+O16	iTRU3C64
+A18	iTRU3C65
+C18	iTRU3C66
+E18	iTRU3C67
+G18	iTRU3C68
+I18	iTRU3C69
+K18	iTRU3C70
+M18	iTRU3C71
+O18	iTRU3C72
+A20	iTRU3C73
+C20	iTRU3C74
+E20	iTRU3C75
+G20	iTRU3C76
+I20	iTRU3C77
+K20	iTRU3C78
+M20	iTRU3C79
+O20	iTRU3C80
+A22	iTRU3C81
+C22	iTRU3C82
+E22	iTRU3C83
+G22	iTRU3C84
+I22	iTRU3C85
+K22	iTRU3C86
+M22	iTRU3C87
+O22	iTRU3C88
+A24	iTRU3C89
+C24	iTRU3C90
+E24	iTRU3C91
+G24	iTRU3C92
+I24	iTRU3C93
+K24	iTRU3C94
+M24	iTRU3C95
+O24	iTRU3C96
+B1	iTRUE5E01
+D1	iTRUE5E02
+F1	iTRUE5E03
+H1	iTRUE5E04
+J1	iTRUE5E05
+L1	iTRUE5E06
+N1	iTRUE5E07
+P1	iTRUE5E08
+B3	iTRUE5E09
+D3	iTRUE5E10
+F3	iTRUE5E11
+H3	iTRUE5E12
+J3	iTRUE5E13
+L3	iTRUE5E14
+N3	iTRUE5E15
+P3	iTRUE5E16
+B5	iTRUE5E17
+D5	iTRUE5E18
+F5	iTRUE5E19
+H5	iTRUE5E20
+J5	iTRUE5E21
+L5	iTRUE5E22
+N5	iTRUE5E23
+P5	iTRUE5E24
+B7	iTRUE5E25
+D7	iTRUE5E26
+F7	iTRUE5E27
+H7	iTRUE5E28
+J7	iTRUE5E29
+L7	iTRUE5E30
+N7	iTRUE5E31
+P7	iTRUE5E32
+B9	iTRUE5E33
+D9	iTRUE5E34
+F9	iTRUE5E35
+H9	iTRUE5E36
+J9	iTRUE5E37
+L9	iTRUE5E38
+N9	iTRUE5E39
+P9	iTRUE5E40
+B11	iTRUE5E41
+D11	iTRUE5E42
+F11	iTRUE5E43
+H11	iTRUE5E44
+J11	iTRUE5E45
+L11	iTRUE5E46
+N11	iTRUE5E47
+P11	iTRUE5E48
+B13	iTRUE5E49
+D13	iTRUE5E50
+F13	iTRUE5E51
+H13	iTRUE5E52
+J13	iTRUE5E53
+L13	iTRUE5E54
+N13	iTRUE5E55
+P13	iTRUE5E56
+B15	iTRUE5E57
+D15	iTRUE5E58
+F15	iTRUE5E59
+H15	iTRUE5E60
+J15	iTRUE5E61
+L15	iTRUE5E62
+N15	iTRUE5E63
+P15	iTRUE5E64
+B17	iTRUE5E65
+D17	iTRUE5E66
+F17	iTRUE5E67
+H17	iTRUE5E68
+J17	iTRUE5E69
+L17	iTRUE5E70
+N17	iTRUE5E71
+P17	iTRUE5E72
+B19	iTRUE5E73
+D19	iTRUE5E74
+F19	iTRUE5E75
+H19	iTRUE5E76
+J19	iTRUE5E77
+L19	iTRUE5E78
+N19	iTRUE5E79
+P19	iTRUE5E80
+B21	iTRUE5E81
+D21	iTRUE5E82
+F21	iTRUE5E83
+H21	iTRUE5E84
+J21	iTRUE5E85
+L21	iTRUE5E86
+N21	iTRUE5E87
+P21	iTRUE5E88
+B23	iTRUE5E89
+D23	iTRUE5E90
+F23	iTRUE5E91
+H23	iTRUE5E92
+J23	iTRUE5E93
+L23	iTRUE5E94
+N23	iTRUE5E95
+P23	iTRUE5E96
+B2	iTRUE7G01
+D2	iTRUE7G02
+F2	iTRUE7G03
+H2	iTRUE7G04
+J2	iTRUE7G05
+L2	iTRUE7G06
+N2	iTRUE7G07
+P2	iTRUE7G08
+B4	iTRUE7G09
+D4	iTRUE7G10
+F4	iTRUE7G11
+H4	iTRUE7G12
+J4	iTRUE7G13
+L4	iTRUE7G14
+N4	iTRUE7G15
+P4	iTRUE7G16
+B6	iTRUE7G17
+D6	iTRUE7G18
+F6	iTRUE7G19
+H6	iTRUE7G20
+J6	iTRUE7G21
+L6	iTRUE7G22
+N6	iTRUE7G23
+P6	iTRUE7G24
+B8	iTRUE7G25
+D8	iTRUE7G26
+F8	iTRUE7G27
+H8	iTRUE7G28
+J8	iTRUE7G29
+L8	iTRUE7G30
+N8	iTRUE7G31
+P8	iTRUE7G32
+B10	iTRUE7G33
+D10	iTRUE7G34
+F10	iTRUE7G35
+H10	iTRUE7G36
+J10	iTRUE7G37
+L10	iTRUE7G38
+N10	iTRUE7G39
+P10	iTRUE7G40
+B12	iTRUE7G41
+D12	iTRUE7G42
+F12	iTRUE7G43
+H12	iTRUE7G44
+J12	iTRUE7G45
+L12	iTRUE7G46
+N12	iTRUE7G47
+P12	iTRUE7G48
+B14	iTRUE7G49
+D14	iTRUE7G50
+F14	iTRUE7G51
+H14	iTRUE7G52
+J14	iTRUE7G53
+L14	iTRUE7G54
+N14	iTRUE7G55
+P14	iTRUE7G56
+B16	iTRUE7G57
+D16	iTRUE7G58
+F16	iTRUE7G59
+H16	iTRUE7G60
+J16	iTRUE7G61
+L16	iTRUE7G62
+N16	iTRUE7G63
+P16	iTRUE7G64
+B18	iTRUE7G65
+D18	iTRUE7G66
+F18	iTRUE7G67
+H18	iTRUE7G68
+J18	iTRUE7G69
+L18	iTRUE7G70
+N18	iTRUE7G71
+P18	iTRUE7G72
+B20	iTRUE7G73
+D20	iTRUE7G74
+F20	iTRUE7G75
+H20	iTRUE7G76
+J20	iTRUE7G77
+L20	iTRUE7G78
+N20	iTRUE7G79
+P20	iTRUE7G80
+B22	iTRUE7G81
+D22	iTRUE7G82
+F22	iTRUE7G83
+H22	iTRUE7G84
+J22	iTRUE7G85
+L22	iTRUE7G86
+N22	iTRUE7G87
+P22	iTRUE7G88
+B24	iTRUE7G89
+D24	iTRUE7G90
+F24	iTRUE7G91
+H24	iTRUE7G92
+J24	iTRUE7G93
+L24	iTRUE7G94
+N24	iTRUE7G95
+P24	iTRUE7G96
\ No newline at end of file
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index 3ee5a85..4e1f0ba 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -125,47 +125,69 @@ class HandleInput:
         Returns:
             _type_: _description_
         """
-        complete_df_list = list()
+        from pprint import pprint
+        from collections import Counter
 
-        # List of folders/files to not consider (restrict to samples only)
-        l = sorted(
-            [
-                e
-                for e in os.listdir(
-                    "{genecore_prefix}/{date_folder}".format(
-                        genecore_prefix=config["genecore_prefix"],
-                        date_folder=config["genecore_date_folder"],
-                    )
-                )
-                if e.endswith(".txt.gz")
-            ]
+        directory_path = f"{config['genecore_prefix']}/{config['genecore_date_folder']}"
+
+        l = sorted([e for e in os.listdir(directory_path) if e.endswith(".txt.gz")])
+
+        complete_df_list = list()
+        # print(thisdir)
+        genecore_prefix = config["genecore_prefix"]
+        date_folder = config["genecore_date_folder"]
+        # print(f"{genecore_prefix}/{date_folder}")
+
+        # Pattern to extract sample name and index
+        pattern = re.compile(r"(.*_lane1)(.*?)(iTRU|PE20)(.*?)(\d{2})(?:_1_|_2_)")
+
+        samples = list()
+        prefixes = list()
+        indexes = list()
+        plate_types = list()
+        d_master = collections.defaultdict(
+            lambda: {
+                "indexes": set(),
+                "file_prefix": "",
+                "plate_type": "",
+                "index_pattern": "",
+            }
         )
 
-        # Create a list of  files to process for each sample
-        d_master = collections.defaultdict(dict)
-        sub_l = list()
-        for j, e in enumerate(l):
-            # print(j,e)
-            sub_l.append(e)
-            if (j + 1) % 192 == 0:
-                common_element = findstem(sub_l)
-                l_elems = common_element.split("lane1")
-                prefix = l_elems[0]
-                sample = l_elems[1].split(
-                    "{regex_element}".format(
-                        regex_element=config["genecore_regex_element"]
-                    )
-                )[0]
-                index = l_elems[1].split(
-                    "{regex_element}".format(
-                        regex_element=config["genecore_regex_element"]
+        # First pass: Count occurrences of each sample_name
+        file_counts_per_sample = Counter()
+        for file_path in l:
+            match = pattern.search(file_path)
+            if match:
+                sample_name = match.group(2)
+                file_counts_per_sample[sample_name] += 1
+
+        # Second pass: Process files and determine plate type per sample
+        for j, file_path in enumerate(sorted(l)):
+            match = pattern.search(file_path)
+            if match:
+                sample_name = match.group(2)
+                index = match.group(4)
+                indexes.append(index)
+                d_master[sample_name]["indexes"].add(index)
+                file_count = file_counts_per_sample[sample_name]
+
+                # Determine plate type using modulo 96 operation
+                if file_count % 96 != 0:
+                    raise ValueError(
+                        f"Invalid file count for sample {sample_name} with file count {file_count}. Must be a multiple of 96."
                     )
-                )[1]
-                sub_l = list()
+                plate_type = int(file_count / 2)
+
+                if (j + 1) % file_count == 0:
+                    prefixes.append(match.group(3))
+                    d_master[sample_name]["file_prefix"] = match.group(1)
+                    d_master[sample_name]["index_pattern"] = match.group(3)
+                    plate = directory_path.split("/")[-1]
+                    samples.append(sample_name)
+                    plate_types.append(plate_type)
+                    d_master[sample_name]["plate_type"] = plate_type
 
-                d_master[sample]["prefix"] = prefix
-                d_master[sample]["index"] = index
-                d_master[sample]["common_element"] = common_element
         samples_to_process = (
             config["samples_to_process"]
             if len(config["samples_to_process"]) > 0
@@ -182,8 +204,8 @@ class HandleInput:
                 "{data_location}/{sample}/fastq/{sample}{regex_element}{index}{cell_nb}.{pair}.fastq.gz",
                 data_location=config["data_location"],
                 sample=sample,
-                regex_element=config["genecore_regex_element"],
-                index=d_master[sample]["index"],
+                regex_element=d_master[sample]["index_pattern"],
+                index=d_master[sample]["indexes"],
                 cell_nb=[str(e).zfill(2) for e in list(range(1, 97))],
                 pair=["1", "2"],
             )
@@ -191,7 +213,8 @@ class HandleInput:
             if sample in samples_to_process
         ]
         genecore_list = [sub_e for e in genecore_list for sub_e in e]
-        # pprint(genecore_list)
+        # pprint(d_master)
+
         complete_df_list = list()
 
         for sample in d_master:
@@ -210,11 +233,12 @@ class HandleInput:
                 df["Full_path"] = df[["Folder", "File"]].apply(
                     lambda r: f"{r['Folder']}/{r['File']}.fastq.gz", axis=1
                 )
+
                 df["Genecore_path"] = df["File"].apply(
-                    lambda r: f"{config['genecore_prefix']}/{config['genecore_date_folder']}/{d_master[sample]['prefix']}lane1{r.replace('.', '_')}_sequence.txt.gz"
+                    lambda r: f"{config['genecore_prefix']}/{config['genecore_date_folder']}/{d_master[sample]['file_prefix']}{r.replace('.', '_')}_sequence.txt.gz"
                 )
                 df["Genecore_file"] = df["File"].apply(
-                    lambda r: f"{d_master[sample]['prefix']}lane1{r.replace('.', '_')}"
+                    lambda r: f"{d_master[sample]['file_prefix']}{r.replace('.', '_')}"
                 )
                 df["Genecore_file"] = df["Genecore_file"].apply(
                     lambda r: "_".join(r.split("_")[:-1])
@@ -229,6 +253,7 @@ class HandleInput:
             drop=True
         )
         pd.options.display.max_colwidth = 200
+
         # print(complete_df)
         return complete_df, d_master
 
@@ -314,6 +339,7 @@ class HandleInput:
         complete_df = complete_df.sort_values(by=["Cell", "File"]).reset_index(
             drop=True
         )
+
         return complete_df
 
 
@@ -390,7 +416,7 @@ plottype_counts = (
 # print(plottype_counts)
 
 
-def get_final_output():
+def get_final_output(wildcards):
     """
     Function called by snakemake rule all to run the pipeline
     """
@@ -403,7 +429,7 @@ def get_final_output():
             expand(
                 "{path}/{sample}/multiqc/multiqc_report/multiqc_report.html",
                 path=config["data_location"],
-                sample=samples,
+                sample=wildcards.sample,
             ),
         )
 
@@ -415,7 +441,7 @@ def get_final_output():
             expand(
                 "{path}/{sample}/cell_selection/labels.tsv",
                 path=config["data_location"],
-                sample=samples,
+                sample=wildcards.sample,
             )
         )
 
@@ -425,14 +451,14 @@ def get_final_output():
             expand(
                 "{output_folder}/{sample}/plots/counts/CountComplete.{plottype_counts}.pdf",
                 output_folder=config["data_location"],
-                sample=samples,
+                sample=wildcards.sample,
                 plottype_counts=plottype_counts,
             ),
         )
 
     # Plate plots
 
-    for sample in samples:
+    for sample in [wildcards.sample]:
         if len(cell_per_sample[sample]) in [96, 384]:
             final_list.extend(
                 [
@@ -452,17 +478,43 @@ def get_final_output():
     if config["publishdir"] != "":
         final_list.extend(
             expand(
-                "{folder}/config/publishdir_outputs.ok",
+                "{folder}/{sample}/config/publishdir_outputs.ok",
                 folder=config["data_location"],
-                sample=samples,
+                sample=wildcards.sample,
             )
         )
 
-    # print(final_list)
+    # Config section
+
+    final_list.extend(
+        expand(
+            "{folder}/{sample}/config/config_ashleys.yaml",
+            folder=config["data_location"],
+            sample=wildcards.sample,
+        ),
+    )
+
     return final_list
 
 
-def publishdir_fct():
+def get_final_result():
+    """
+    Input function of the pipeline, will retrieve all 'end' outputs
+    """
+    final_list = list()
+
+    final_list.extend(
+        expand(
+            "{folder}/{sample}/config/ashleys_final_results.ok",
+            folder=config["data_location"],
+            sample=samples,
+        )
+    )
+
+    return final_list
+
+
+def publishdir_fct(wildcards):
     """
     Restricted for ASHLEYS at the moment
     Backup files on a secondary location
@@ -472,17 +524,18 @@ def publishdir_fct():
         "{folder}/{sample}/cell_selection/labels.tsv",
         "{folder}/{sample}/counts/{sample}.info_raw",
         "{folder}/{sample}/counts/{sample}.txt.raw.gz",
-        "config/config.yaml",
+        "{folder}/{sample}/config/config_ashleys.yaml",
     ]
     final_list = [
-        expand(e, folder=config["data_location"], sample=samples)
+        expand(e, folder=config["data_location"], sample=wildcards.sample)
         for e in list_files_to_copy
     ]
+    final_list = [sub_e for e in final_list for sub_e in e]
     final_list.extend(
         expand(
             "{folder}/{sample}/plots/counts/CountComplete.{plottype_counts}.pdf",
             folder=config["data_location"],
-            sample=samples,
+            sample=wildcards.sample,
             plottype_counts=plottype_counts,
         )
     )
@@ -492,7 +545,7 @@ def publishdir_fct():
             expand(
                 "{folder}/{sample}/plots/plate/ashleys_plate_{plate_plot}.pdf",
                 folder=config["data_location"],
-                sample=samples,
+                sample=wildcards.sample,
                 plate_plot=["predictions", "probabilities"],
             )
         )
@@ -500,14 +553,14 @@ def publishdir_fct():
             expand(
                 "{folder}/{sample}/cell_selection/labels_positive_control_corrected.tsv",
                 folder=config["data_location"],
-                sample=samples,
+                sample=wildcards.sample,
             )
         )
         final_list.extend(
             expand(
                 "{folder}/{sample}/config/bypass_cell.txt",
                 folder=config["data_location"],
-                sample=samples,
+                sample=wildcards.sample,
             )
         )
 
diff --git a/workflow/rules/multiqc.smk b/workflow/rules/multiqc.smk
index 0ffb49a..7825a8a 100644
--- a/workflow/rules/multiqc.smk
+++ b/workflow/rules/multiqc.smk
@@ -9,7 +9,7 @@ rule fastqc:
             subcategory="{sample}",
             labels={"Sample": "{sample}", "Cell": "{cell}", "Pair": "{pair}"},
         ),
-        zip="{folder}/{sample}/fastqc/{cell}_{pair}_fastqc.zip",
+        zip="{folder}/{sample}/multiqc/fastqc/{cell}_{pair}_fastqc.zip",
     params:
         "--quiet",
     log:
@@ -137,6 +137,8 @@ rule multiqc:
         ),
     log:
         "{folder}/{sample}/log/multiqc/{sample}.log",
+    resources:
+        mem_mb=get_mem_mb_heavy,
     params:
         multiqc_input=lambda wc, input: "{abs_path}".format(
             abs_path=config["abs_path"]
diff --git a/workflow/rules/rules.smk b/workflow/rules/rules.smk
index 2ba5e2e..144ae97 100644
--- a/workflow/rules/rules.smk
+++ b/workflow/rules/rules.smk
@@ -15,6 +15,7 @@ if config["genecore"] is True and config["genecore_date_folder"]:
 
         localrules:
             genecore_symlink,
+            symlink_bam_ashleys,
 
     rule genecore_symlink:
         input:
@@ -29,11 +30,12 @@ if config["genecore"] is True and config["genecore_date_folder"]:
         log:
             "{folder}/log/genecore_symlink/{sample}/{cell}_{pair}.log",
         shell:
-            "ln -s {input} {output}"
+            "ln -f -s {input} {output}"
 
     ruleorder: genecore_symlink > bwa_strandseq_to_reference_alignment
 
 
+# if config["use_light_data"] is False:
 localrules:
     symlink_bam_ashleys,
 
@@ -134,7 +136,8 @@ rule mark_duplicates:
     conda:
         "../envs/ashleys_base.yaml"
     resources:
-        mem_mb=get_mem_mb,
+        mem_mb=get_mem_mb_heavy,
+        # partition="bigmem",
         time="10:00:00",
     shell:
         "sambamba markdup {input.bam} {output} 2>&1 > {log}"
@@ -300,6 +303,7 @@ if config["use_light_data"] is False:
                 subcategory="{sample}",
                 labels={"Sample": "{sample}", "Plot Type": "Probabilities"},
             ),
+            well_table="{folder}/{sample}/plots/plate/ashleys_well_table.tsv",
         log:
             "{folder}/log/plot_plate/{sample}.log",
         conda:
@@ -326,12 +330,40 @@ if config["publishdir"] != "":
 
     rule publishdir_outputs_ashleys:
         input:
-            list_publishdir=publishdir_fct(),
+            list_publishdir=publishdir_fct,
         output:
-            touch("{folder}/config/publishdir_outputs.ok"),
+            touch("{folder}/{sample}/config/publishdir_outputs.ok"),
         log:
-            "{folder}/log/publishdir_outputs/publishdir_outputs.log",
+            "{folder}/log/publishdir_outputs/{sample}.log",
         conda:
             "../envs/ashleys_base.yaml"
         script:
             "../scripts/utils/publishdir.py"
+
+
+rule save_config:
+    input:
+        "config/config.yaml",
+    output:
+        "{folder}/{sample}/config/config_ashleys.yaml",
+    log:
+        "{folder}/log/save_config/{sample}.log",
+    conda:
+        "../envs/ashleys_base.yaml"
+    resources:
+        mem_mb=get_mem_mb,
+    script:
+        "../scripts/utils/dump_config.py"
+
+
+rule ashleys_final_results:
+    input:
+        get_final_output,
+    output:
+        "{folder}/{sample}/config/ashleys_final_results.ok",
+    log:
+        "{folder}/log/ashleys_final_results/{sample}.log",
+    conda:
+        "../envs/ashleys_base.yaml"
+    shell:
+        "touch {output}"
diff --git a/workflow/scripts/plotting/plot_plate.R b/workflow/scripts/plotting/plot_plate.R
index 69fb05d..c2dbd81 100644
--- a/workflow/scripts/plotting/plot_plate.R
+++ b/workflow/scripts/plotting/plot_plate.R
@@ -62,3 +62,5 @@ raw_map(
     ggtitle(paste0("Sample: ", snakemake@wildcards[["sample"]], " | ASHLEYS probabilities"))
 
 dev.off()
+
+write.table(ashleys_data, file = snakemake@output[["well_table"]], sep = "\t", row.names = FALSE, quote = FALSE)
diff --git a/workflow/scripts/plotting/plot_plate_dev.R b/workflow/scripts/plotting/plot_plate_dev.R
new file mode 100644
index 0000000..ae2bf1f
--- /dev/null
+++ b/workflow/scripts/plotting/plot_plate_dev.R
@@ -0,0 +1,109 @@
+library(platetools)
+library(ggplot2)
+library(viridis)
+library(dplyr)
+library(stringr)
+
+
+# df <- data.frame(vals = rnorm(384),
+#                  well = num_to_well(1:384, plate = 384))
+
+# print(df)
+# stop()
+
+args <- commandArgs(trailingOnly = T)
+
+prefix = args[1]
+
+## collect ASHLEYS prediction and count files
+ashleys_data <- read.table(file =  paste0(prefix, "/cell_selection/labels.tsv"), sep = "\t", header = TRUE)
+# ashleys_data <- read.table(file = snakemake@input[["labels"]], sep = "\t", header = TRUE)
+plate_type <- nrow((ashleys_data))
+ashleys_data <- dplyr::arrange(ashleys_data, cell)
+colnames(ashleys_data)[1] <- "ashleys_id"
+
+corr_table_path = "workflow/data/plotting/384_1A3C5E7G_correspondance_table.tsv"
+corr_table <- read.table(corr_table_path, header = TRUE, sep = "\t")
+sample <- basename(prefix)
+
+
+# Apply regex and extract groups
+ashleys_data <- ashleys_data %>%
+  mutate(
+    index = str_extract(ashleys_id, "(iTRU|PE20)[A-Za-z0-9]{4,5}")
+  )
+
+
+# View the result
+
+# Well_position <- character()
+
+# if (plate_type == 96) {
+#     for (i in 1:12)
+#     {
+#         for (j in 1:8)
+#         {
+#             tmp <- paste0(LETTERS[j], i)
+#             Well_position <- c(Well_position, tmp)
+#         }
+#     }
+# } else if (plate_type == 384) {
+#     for (i in 1:24)
+#     {
+#         for (j in 1:16)
+#         {
+#             tmp <- paste0(LETTERS[j], i)
+#             Well_position <- c(Well_position, tmp)
+#         }
+#     }
+# }
+
+
+print(corr_table)
+print(ashleys_data)
+ashleys_data <- merge(ashleys_data, corr_table, by.x = "index", by.y = "index", all.x = TRUE)
+
+write.table(ashleys_data, file = paste0(prefix, "/plots/plate/ashleys_well_table.tsv"), sep = "\t", row.names = FALSE, quote = FALSE)
+
+
+ashleys_data <- ashleys_data %>%
+  mutate(
+    Well_row = str_extract(Well_position, "[A-Za-z]+"),
+    Well_col = as.integer(str_extract(Well_position, "\\d+"))
+  ) %>%
+  arrange(Well_row, Well_col)
+
+
+print(ashleys_data)
+
+
+pdf(paste0(prefix, "/plots/plate/ashleys_plate_predictions.pdf"))
+# pdf(snakemake@output[["predictions"]])
+
+
+raw_map(
+    data = ashleys_data$prediction,
+    well = ashleys_data$Well_position,
+    plate = plate_type
+) +
+    scale_fill_distiller(type = "div", palette = "RdYlGn", direction = 1) +
+    # ggtitle(paste0("Sample: TEST | ASHLEYS binary predictions (cutoff=0.5)"))
+    ggtitle(paste0("Sample: ", sample, " | ASHLEYS binary predictions (cutoff=", 0.5, ")"))
+
+dev.off()
+
+# pdf("TEST_ashleys_plate_probabilities.pdf")
+pdf(paste0(prefix, "/plots/plate/ashleys_plate_probabilities.pdf"))
+
+# pdf(snakemake@output[["probabilities"]])
+
+raw_map(
+    data = ashleys_data$probability,
+    well = ashleys_data$Well_position,
+    plate = plate_type
+) +
+    scale_fill_distiller(type = "div", palette = "RdYlGn", direction = 1) +
+    # ggtitle(paste0("Sample: ", "TEST", " | ASHLEYS probabilities"))
+    ggtitle(paste0("Sample: ", sample, " | ASHLEYS probabilities"))
+
+dev.off()
\ No newline at end of file
diff --git a/workflow/scripts/utils/dump_config.py b/workflow/scripts/utils/dump_config.py
new file mode 100644
index 0000000..6b299ee
--- /dev/null
+++ b/workflow/scripts/utils/dump_config.py
@@ -0,0 +1,22 @@
+import yaml
+
+
+def update_config(input_file, output_file):
+    # Load the existing config file
+    with open(input_file, "r") as file:
+        flat_file_config = yaml.safe_load(file)
+
+    # Update the config with Snakemake parameters
+    for key, value in snakemake.config.items():
+        flat_file_config[key] = value
+
+    # Save the updated config to the output file
+    with open(output_file, "w") as file:
+        yaml.dump(flat_file_config, file)
+
+
+if __name__ == "__main__":
+    input_config = snakemake.input[0]
+    output_config = snakemake.output[0]
+
+    update_config(input_config, output_config)

From ad9ec3ecc25ab3467c713ef2d33e94acfdf40f0a Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Mon, 4 Dec 2023 09:29:25 +0000
Subject: [PATCH 02/12] Final results file to check if all outputs were
 produced, dump config update, plot_plate_dev for 384 well plate
 correspondance

---
 workflow/rules/gc.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflow/rules/gc.smk b/workflow/rules/gc.smk
index 85f38bf..854481f 100644
--- a/workflow/rules/gc.smk
+++ b/workflow/rules/gc.smk
@@ -78,7 +78,7 @@ if config["multistep_normalisation"] is True and config["window"] == 200000:
         output:
             "{folder}/{sample}/counts/multistep_normalisation/{sample}.txt.scaled.GC.VST.reformat.gz",
         log:
-            "{folder}/{sample}/log/reformat_ms_norm/{sample}.log"
+            "{folder}/{sample}/log/reformat_ms_norm/{sample}.log",
         conda:
             "../envs/ashleys_base.yaml"
         resources:

From 947b82ce783cdb1a3a321606fe5f15be5d4db6ec Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Mon, 4 Dec 2023 09:31:48 +0000
Subject: [PATCH 03/12] Update github CI/CD

---
 .github/workflows/main.yaml | 41 +++----------------------------------
 1 file changed, 3 insertions(+), 38 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 2bceafa..e47052f 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -2,15 +2,11 @@ name: ashleys-qc-pipeline workflow checks
 
 on:
   schedule:
-    # Run every Sunday at 00:00 UTC on the master branch
-    - cron:  '0 0 * * 0'
-      # branches:
-      #   - master
+    - cron: "0 0 * * 0"
   push:
     branches:
-      - '*'
-      - '!master'
-
+      - "*"
+      - "!master"
 
 jobs:
   # WORK
@@ -36,13 +32,9 @@ jobs:
         with:
           directory: .
           snakefile: ./workflow/Snakefile
-          # stagein: "mamba env remove -n snakemake && mamba create -y -n snakemake -c conda-forge -c bioconda unzip snakemake pandas pysam tqdm imagemagick && source activate snakemake && ls -l && pwd"
           args: "--lint --config ashleys_pipeline=True"
   Testing_ashleys:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
       - uses: actions/checkout@v3.3.0
       - name: Testing data
@@ -61,9 +53,6 @@ jobs:
 
   Testing_ashleys_fastqc_enabled:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
       - uses: actions/checkout@v3.3.0
       - name: Testing data
@@ -82,9 +71,6 @@ jobs:
 
   Testing_ashleys_ms_norm_enabled:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
       - uses: actions/checkout@v3.3.0
       - name: Testing data
@@ -102,9 +88,6 @@ jobs:
           args: "--cores 1 --use-conda --configfile .tests/config/simple_config.yaml --config multistep_normalisation=True --conda-frontend mamba --report report.zip"
   Testing_ashleys_hg38:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
       - uses: actions/checkout@v3.3.0
       - name: Testing data
@@ -122,9 +105,6 @@ jobs:
           args: "--cores 1 --use-conda --config reference=hg38 use_light_data=True chromosomes=[chr17] --conda-frontend mamba --report report.zip"
   Testing_ashleys_hg19:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
       - uses: actions/checkout@v3.3.0
       - name: Testing data
@@ -142,9 +122,6 @@ jobs:
           args: "--cores 1 --use-conda --config reference=hg19 use_light_data=True chromosomes=[chr17] --conda-frontend mamba --report report.zip"
   Testing_ashleys_T2T:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
       - uses: actions/checkout@v3.3.0
       - name: Testing data
@@ -162,9 +139,6 @@ jobs:
           args: "--cores 1 --use-conda --config reference=T2T use_light_data=True chromosomes=[chr17] --conda-frontend mamba --report report.zip"
   Testing_ashleys_mm10:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
       - uses: actions/checkout@v3.3.0
       - name: Testing data
@@ -182,9 +156,6 @@ jobs:
           args: "--cores 1 --use-conda --config reference=mm10 use_light_data=True chromosomes=[chr17] --conda-frontend mamba --report report.zip"
   Testing_jub_nb:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
       - uses: actions/checkout@v3.3.0
       - name: Testing data
@@ -202,9 +173,6 @@ jobs:
           args: "--cores 1 --use-conda --configfile .tests/config/simple_config.yaml --config hand_selection=True --conda-frontend mamba --report report.zip"
   Testing_publishdir:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
       - uses: actions/checkout@v3.3.0
       - name: Testing data
@@ -222,9 +190,6 @@ jobs:
           args: "--cores 1 --use-conda --configfile .tests/config/simple_config.yaml --config publishdir=.tests/data_chr17_publishdir --conda-frontend mamba --report report.zip"
   Testing_list_commands:
     runs-on: ubuntu-latest
-    # needs:
-    #   - Linting
-    #   - Formatting
     steps:
       - uses: actions/checkout@v3.3.0
       - name: Testing data

From 5c447f6c2ffca8fa0154c7326e1af77faefaca4f Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Mon, 4 Dec 2023 09:32:10 +0000
Subject: [PATCH 04/12] Update github CI/CD

---
 .github/workflows/main.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index e47052f..888f891 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -6,6 +6,7 @@ on:
   push:
     branches:
       - "*"
+      - "dev"
       - "!master"
 
 jobs:

From 840781d4333a7055841863913a9ba14509c5524f Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Mon, 4 Dec 2023 09:33:05 +0000
Subject: [PATCH 05/12] Update github CI/CD

---
 .github/workflows/main.yaml | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 888f891..c760916 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -1,8 +1,8 @@
 name: ashleys-qc-pipeline workflow checks
 
 on:
-  schedule:
-    - cron: "0 0 * * 0"
+  # schedule:
+  #   - cron: "0 0 * * 0"
   push:
     branches:
       - "*"
@@ -14,7 +14,7 @@ jobs:
   Formatting:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
 
       - name: Formatting
         uses: github/super-linter@v4
@@ -27,7 +27,7 @@ jobs:
   Linting:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Linting
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -37,7 +37,7 @@ jobs:
   Testing_ashleys:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -55,7 +55,7 @@ jobs:
   Testing_ashleys_fastqc_enabled:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -73,7 +73,7 @@ jobs:
   Testing_ashleys_ms_norm_enabled:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -90,7 +90,7 @@ jobs:
   Testing_ashleys_hg38:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -107,7 +107,7 @@ jobs:
   Testing_ashleys_hg19:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -124,7 +124,7 @@ jobs:
   Testing_ashleys_T2T:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -141,7 +141,7 @@ jobs:
   Testing_ashleys_mm10:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -158,7 +158,7 @@ jobs:
   Testing_jub_nb:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -175,7 +175,7 @@ jobs:
   Testing_publishdir:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:
@@ -192,7 +192,7 @@ jobs:
   Testing_list_commands:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3.3.0
+      - uses: actions/checkout@v4
       - name: Testing data
         uses: snakemake/snakemake-github-action@v1.24.0
         with:

From dc754e63ed08927c195bcc044ddf2b569ac48321 Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Mon, 4 Dec 2023 09:35:02 +0000
Subject: [PATCH 06/12] Update github CI/CD

---
 .github/workflows/main.yaml | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index c760916..a458c0d 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -1,12 +1,11 @@
 name: ashleys-qc-pipeline workflow checks
 
 on:
-  # schedule:
-  #   - cron: "0 0 * * 0"
+  schedule:
+    - cron: "0 0 * * 0"
   push:
     branches:
-      - "*"
-      - "dev"
+      - "**"
       - "!master"
 
 jobs:

From d642639821e6985c8a107d69fdd100ea693a0252 Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Mon, 4 Dec 2023 10:44:57 +0000
Subject: [PATCH 07/12] Update github CI/CD

---
 .github/workflows/main.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index a458c0d..e88cc9b 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -1,8 +1,8 @@
 name: ashleys-qc-pipeline workflow checks
 
 on:
-  schedule:
-    - cron: "0 0 * * 0"
+  # schedule:
+  #   - cron: "0 0 * * 0"
   push:
     branches:
       - "**"

From f33ef1de5e6ca6c52ce730835a093b9623e88e93 Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Mon, 4 Dec 2023 10:49:21 +0000
Subject: [PATCH 08/12] Update github CI/CD

---
 .github/workflows/action_test.yaml | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 .github/workflows/action_test.yaml

diff --git a/.github/workflows/action_test.yaml b/.github/workflows/action_test.yaml
new file mode 100644
index 0000000..8d8fe4f
--- /dev/null
+++ b/.github/workflows/action_test.yaml
@@ -0,0 +1,22 @@
+name: Example Workflow
+
+on:
+  push:
+    branches:
+      - "**"
+
+jobs:
+  hello_world_job:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v3
+
+      - name: Run a one-line script
+        run: echo "Hello, world! My first GitHub Actions workflow."
+
+      - name: Run a multi-line script
+        run: |
+          echo "This is a multi-line script."
+          echo "You can add more commands here."
+          echo "Each command is run in the shell."

From c0fe9899ceeecacbc29207fe3c23207a35cc9ffb Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Mon, 4 Dec 2023 10:51:54 +0000
Subject: [PATCH 09/12] Update github CI/CD

---
 .github/workflows/main.yaml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index e88cc9b..7b63a99 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -1,11 +1,14 @@
 name: ashleys-qc-pipeline workflow checks
 
 on:
-  # schedule:
-  #   - cron: "0 0 * * 0"
+  schedule:
+    # Run every Sunday at 00:00 UTC on the master branch
+    - cron: "0 0 * * 0"
+      # branches:
+      #   - master
   push:
     branches:
-      - "**"
+      - "*"
       - "!master"
 
 jobs:

From c8d1d04c6c546d03bc65ff4775840afda589e02c Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Mon, 4 Dec 2023 11:00:45 +0000
Subject: [PATCH 10/12] Update github CI/CD

---
 .github/workflows/{main.yaml => main_test.yaml} | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)
 rename .github/workflows/{main.yaml => main_test.yaml} (98%)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main_test.yaml
similarity index 98%
rename from .github/workflows/main.yaml
rename to .github/workflows/main_test.yaml
index 7b63a99..c10e18c 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main_test.yaml
@@ -1,15 +1,9 @@
 name: ashleys-qc-pipeline workflow checks
 
 on:
-  schedule:
-    # Run every Sunday at 00:00 UTC on the master branch
-    - cron: "0 0 * * 0"
-      # branches:
-      #   - master
   push:
     branches:
-      - "*"
-      - "!master"
+      - "**"
 
 jobs:
   # WORK

From 7383b815d6ff5feb1bb924c5ee8ade3aa21e633c Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Mon, 4 Dec 2023 12:34:33 +0000
Subject: [PATCH 11/12] Update github CI/CD

---
 .tests/config/simple_config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.tests/config/simple_config.yaml b/.tests/config/simple_config.yaml
index ecd378f..869dd5c 100644
--- a/.tests/config/simple_config.yaml
+++ b/.tests/config/simple_config.yaml
@@ -92,7 +92,7 @@ genecore_regex_element: "PE20"
 mosaicatcher_pipeline: False
 
 # Overwrite ASHLEYS PREDICTIONS for GitHub & smoke dataset purpose
-use_light_data: False
+use_light_data: True
 
 # For snakemake linting
 abs_path: "/"

From 202ad00b4fedbac69c5226eb5b1aa2f3c5a7002e Mon Sep 17 00:00:00 2001
From: Thomas Weber <thomas.weber@embl.de>
Date: Mon, 4 Dec 2023 13:55:17 +0000
Subject: [PATCH 12/12] Dockerfile

---
 .github/workflows/action_test.yaml            |  22 ----
 .../Dockerfile-2.2.3.dockerfile               | 118 ++++++++++++++++++
 workflow/rules/rules.smk                      |   1 -
 3 files changed, 118 insertions(+), 23 deletions(-)
 delete mode 100644 .github/workflows/action_test.yaml
 create mode 100644 github-actions-runner/Dockerfile-2.2.3.dockerfile

diff --git a/.github/workflows/action_test.yaml b/.github/workflows/action_test.yaml
deleted file mode 100644
index 8d8fe4f..0000000
--- a/.github/workflows/action_test.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-name: Example Workflow
-
-on:
-  push:
-    branches:
-      - "**"
-
-jobs:
-  hello_world_job:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout Repository
-        uses: actions/checkout@v3
-
-      - name: Run a one-line script
-        run: echo "Hello, world! My first GitHub Actions workflow."
-
-      - name: Run a multi-line script
-        run: |
-          echo "This is a multi-line script."
-          echo "You can add more commands here."
-          echo "Each command is run in the shell."
diff --git a/github-actions-runner/Dockerfile-2.2.3.dockerfile b/github-actions-runner/Dockerfile-2.2.3.dockerfile
new file mode 100644
index 0000000..eba6242
--- /dev/null
+++ b/github-actions-runner/Dockerfile-2.2.3.dockerfile
@@ -0,0 +1,118 @@
+FROM condaforge/mambaforge:latest
+LABEL io.github.snakemake.containerized="true"
+LABEL io.github.snakemake.conda_env_hash="e9bc3082704cbf20eaa004e1360fb45da1359e3288296fb66dfad7e245e22563"
+
+# Step 1: Retrieve conda environments
+
+# Conda environment:
+#   source: https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/bwa/index/environment.yaml
+#   prefix: /conda-envs/5681728a49bd83ceed09ba194330c858
+#   channels:
+#     - bioconda
+#     - conda-forge
+#     - defaults
+#   dependencies:
+#     - bwa ==0.7.17
+RUN mkdir -p /conda-envs/5681728a49bd83ceed09ba194330c858
+ADD https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/bwa/index/environment.yaml /conda-envs/5681728a49bd83ceed09ba194330c858/environment.yaml
+
+# Conda environment:
+#   source: https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/fastqc/environment.yaml
+#   prefix: /conda-envs/08d4368302a4bdf7eda6b536495efe7d
+#   channels:
+#     - bioconda
+#     - conda-forge
+#     - defaults
+#   dependencies:
+#     - fastqc ==0.11.9
+RUN mkdir -p /conda-envs/08d4368302a4bdf7eda6b536495efe7d
+ADD https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/fastqc/environment.yaml /conda-envs/08d4368302a4bdf7eda6b536495efe7d/environment.yaml
+
+# Conda environment:
+#   source: workflow/envs/ashleys_base.yaml
+#   prefix: /conda-envs/87c04f5d115eff742eca84455513deba
+#   name: ashleys_base
+#   channels:
+#     - conda-forge
+#     - bioconda
+#   dependencies:
+#     - samtools
+#     - tabix
+#     - bwa
+#     - sambamba
+#     - mosaicatcher
+#     # - alfred
+#     - ashleys-qc
+#     - pandas
+#     # PUBLISHDIR
+#     - rsync
+#     # MULTIQC
+#     - multiqc
+#     # Fix sklearn update
+#     - scikit-learn=1.2.2
+RUN mkdir -p /conda-envs/87c04f5d115eff742eca84455513deba
+COPY workflow/envs/ashleys_base.yaml /conda-envs/87c04f5d115eff742eca84455513deba/environment.yaml
+
+# Conda environment:
+#   source: workflow/envs/ashleys_rtools.yaml
+#   prefix: /conda-envs/9b847fc31baae8e01dfb7ce438a56b71
+#   name: rtools
+#   channels:
+#     - conda-forge
+#     - bioconda
+#     - r
+#     - anaconda
+#   dependencies:
+#     # - bioconductor-biocparallel
+#     # - bioconductor-bsgenome
+#     # - bioconductor-bsgenome.hsapiens.ucsc.hg19
+#     # - bioconductor-bsgenome.hsapiens.ucsc.hg38
+#     # - bioconductor-fastseg
+#     # - bioconductor-genomicalignments
+#     - bioconductor-genomicranges
+#     # - bioconductor-rsamtools
+#     # - bioconductor-s4vectors
+#     - r-assertthat
+#     - r-base
+#     # - r-biocmanager
+#     - r-cowplot
+#     - r-data.table
+#     # - r-devtools
+#     # - r-doparallel
+#     # - r-foreach
+#     - r-ggplot2
+#     # - r-gtools
+#     - r-reshape2
+#     # - r-zoo
+#     # - r-dplyr
+#     # - r-mc2d
+#     # - r-pheatmap
+#     # - bioconductor-complexheatmap
+#     # - r-gplots
+#     - r-scales
+#     - r-rcolorbrewer
+#     # - r-stringr
+#     - r-cairo
+#     - fonts-anaconda
+#     # NEW
+#     - bioconductor-edger
+#     - r-r.utils
+#     # PLATE PLOT
+#     - r-dplyr
+#     - r-platetools
+#     - r-viridis
+#     # GC_correction
+#     - r-tidyr
+#     - r-ggpubr
+#     # SOLVE R lib issue
+#     - r-stringi=1.7.12
+RUN mkdir -p /conda-envs/9b847fc31baae8e01dfb7ce438a56b71
+COPY workflow/envs/ashleys_rtools.yaml /conda-envs/9b847fc31baae8e01dfb7ce438a56b71/environment.yaml
+
+# Step 2: Generate conda environments
+
+RUN mamba env create --prefix /conda-envs/5681728a49bd83ceed09ba194330c858 --file /conda-envs/5681728a49bd83ceed09ba194330c858/environment.yaml && \
+    mamba env create --prefix /conda-envs/08d4368302a4bdf7eda6b536495efe7d --file /conda-envs/08d4368302a4bdf7eda6b536495efe7d/environment.yaml && \
+    mamba env create --prefix /conda-envs/87c04f5d115eff742eca84455513deba --file /conda-envs/87c04f5d115eff742eca84455513deba/environment.yaml && \
+    mamba env create --prefix /conda-envs/9b847fc31baae8e01dfb7ce438a56b71 --file /conda-envs/9b847fc31baae8e01dfb7ce438a56b71/environment.yaml && \
+    mamba clean --all -y
diff --git a/workflow/rules/rules.smk b/workflow/rules/rules.smk
index 144ae97..f8f6ecc 100644
--- a/workflow/rules/rules.smk
+++ b/workflow/rules/rules.smk
@@ -137,7 +137,6 @@ rule mark_duplicates:
         "../envs/ashleys_base.yaml"
     resources:
         mem_mb=get_mem_mb_heavy,
-        # partition="bigmem",
         time="10:00:00",
     shell:
         "sambamba markdup {input.bam} {output} 2>&1 > {log}"