From 8b57dac91884ee0cae74c95f44177c9028db25c0 Mon Sep 17 00:00:00 2001 From: Thomas Weber Date: Mon, 4 Dec 2023 09:16:17 +0000 Subject: [PATCH 01/12] Final results file to check if all outputs were produced, dump config update, plot_plate_dev for 384 well plate correspondance --- .tests/config/simple_config.yaml | 157 +++---- config/config.yaml | 5 +- workflow/Snakefile | 2 +- .../384_1A3C5E7G_correspondance_table.tsv | 385 ++++++++++++++++++ workflow/rules/common.smk | 165 +++++--- workflow/rules/multiqc.smk | 4 +- workflow/rules/rules.smk | 42 +- workflow/scripts/plotting/plot_plate.R | 2 + workflow/scripts/plotting/plot_plate_dev.R | 109 +++++ workflow/scripts/utils/dump_config.py | 22 + 10 files changed, 754 insertions(+), 139 deletions(-) create mode 100644 workflow/data/plotting/384_1A3C5E7G_correspondance_table.tsv create mode 100644 workflow/scripts/plotting/plot_plate_dev.R create mode 100644 workflow/scripts/utils/dump_config.py diff --git a/.tests/config/simple_config.yaml b/.tests/config/simple_config.yaml index 831386e..ecd378f 100644 --- a/.tests/config/simple_config.yaml +++ b/.tests/config/simple_config.yaml @@ -1,28 +1,36 @@ -version: 2.2.2 -# Option to display all potential options - listed in config_metadata.yaml -list_commands: False -## Data location - MUST BE AN ABSOULTE PATH (due to snakemake-symlink issues) - PLEASE MODIFY IT -# input_bam_location: ".tests/data_CHR17" -data_location: ".tests/data_CHR17" -# Reference genome used by BWA to map FASTQ files -# reference: sandbox.zenodo.org/record/1074721/files/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna -# Enable / Disable download of external files (1000G SNV & Fasta ref genome) -dl_external_files: False -# Enable / Disable multistep normalisation -multistep_normalisation: False -# Ashleys-qc binary classification threshold -ashleys_threshold: 0.5 -# Enable / Disable FastQC analysis -FastQC_analysis: False -# To be informed of pipeline status +# -------------------------------------------------------- +# Ashleys-QC pipeline Configuration +# -------------------------------------------------------- +version: 2.2.3 + +# Email for notifications about the pipeline's status email: "" -############################################################################ -# ADVANCED PARAMETERS -############################################################################ +# List of samples to process if multiple are specified +samples_to_process: [] + +# Plate size +plate_size: 96 + +# -------------------------------------------------------- +# Data location & I/O +# -------------------------------------------------------- + +# Absolute path to the data location (modify as needed) +data_location: ".tests/data_CHR17" + +# Directory to publish important data (e.g., stats, plots, counts). Leave empty if not required. +publishdir: "" + +# -------------------------------------------------------- +# Reference Data Configuration +# -------------------------------------------------------- +# Reference genome used by BWA to map FASTQ files reference: "hg38" +# Reference genome files' location + references_data: "hg38": reference_fasta: ".tests/external_data/chr17.fa.gz" @@ -31,70 +39,69 @@ references_data: "T2T": reference_fasta: "workflow/data/ref_genomes/T2T.fa" -# Boolean parameters -## Is the pipeline called used as a submodule in mosaicatcher-pipeline? -mosaicatcher_pipeline: False -## Enable/Disable hand selection through Jupyter Notebook -hand_selection: False +# List of chromosomes to process +chromosomes: + - chr17 -# Window size used by mosaic binning algorithm -window: 200000 +# Specify any chromosomes to exclude from processing +chromosomes_to_exclude: [] -plottype_counts: - - "raw" - - "normalised" +# -------------------------------------------------------- +# Quality Control Configuration +# -------------------------------------------------------- -alfred_plots: - - "dist" - - "devi" +# Threshold for Ashleys-qc binary classification +ashleys_threshold: 0.5 -plate_orientation: landscape +# Enable or disable FastQC analysis +MultiQC: False + +# -------------------------------------------------------- +# Counts Configuration +# -------------------------------------------------------- + +# Enable or disable multistep normalization analysis +multistep_normalisation: False + +# Advanced parameters for multi-step normalisation +multistep_normalisation_options: + min_reads_bin: 5 + n_subsample: 1000 + min_reads_cell: 100000 + +# Window size used by the mosaic binning algorithm +window: 200000 + +# Enable or disable hand selection through the Jupyter Notebook +hand_selection: False + +# -------------------------------------------------------- +# GENECORE Configuration +# -------------------------------------------------------- -# Chromosomes list to process -chromosomes: - - chr1 - - chr2 - - chr3 - - chr4 - - chr5 - - chr6 - - chr7 - - chr8 - - chr9 - - chr10 - - chr11 - - chr12 - - chr13 - - chr14 - - chr15 - - chr16 - - chr17 - - chr18 - - chr19 - - chr20 - - chr21 - - chr22 - - chrX - - chrY - -# GENECORE genecore: False -samples_to_process: [] genecore_date_folder: "" -genecore_prefix: "/g/korbel/shared/genecore" +genecore_prefix: "/g/korbel/STOCKS/Data/Assay/sequencing/2023" +genecore_regex_element: "PE20" -##### DEV only +# -------------------------------------------------------- +# Internal Parameters +# -------------------------------------------------------- -# Overwrite ASHLEYS PREDICTIONS for GitHub & smoke dataset purpose -use_light_data: True +# Is the pipeline used as a submodule in mosaicatcher-pipeline? +mosaicatcher_pipeline: False -# If specified, will copy important data (stats, plots, counts file) to a second place -publishdir: "" +# Overwrite ASHLEYS PREDICTIONS for GitHub & smoke dataset purpose +use_light_data: False -# Multi-step normalisation advanced parameters -multistep_normalisation_options: - min_reads_bin: 5 - n_subsample: 1000 - min_reads_cell: 1000 -# Others +# For snakemake linting abs_path: "/" + +# Type of plots for counts +plottype_counts: + - "raw" + - "normalised" + +# Option to display all potential commands (as listed in config_metadata.yaml) +list_commands: False +# -------------------------------------------------------- diff --git a/config/config.yaml b/config/config.yaml index 666e437..1d435f5 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,7 +1,7 @@ # -------------------------------------------------------- # Ashleys-QC pipeline Configuration # -------------------------------------------------------- -version: 2.2.2 +version: 2.2.3 # Email for notifications about the pipeline's status email: "" @@ -9,6 +9,9 @@ email: "" # List of samples to process if multiple are specified samples_to_process: [] +# Plate size +plate_size: 96 + # -------------------------------------------------------- # Data location & I/O # -------------------------------------------------------- diff --git a/workflow/Snakefile b/workflow/Snakefile index 9798f54..4e4e206 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -29,7 +29,7 @@ if config["list_commands"] is False: rule all: input: - get_final_output(), + get_final_result(), if config["email"]: diff --git a/workflow/data/plotting/384_1A3C5E7G_correspondance_table.tsv b/workflow/data/plotting/384_1A3C5E7G_correspondance_table.tsv new file mode 100644 index 0000000..583f0ef --- /dev/null +++ b/workflow/data/plotting/384_1A3C5E7G_correspondance_table.tsv @@ -0,0 +1,385 @@ +Well_position index +A1 iTRU1A01 +C1 iTRU1A02 +E1 iTRU1A03 +G1 iTRU1A04 +I1 iTRU1A05 +K1 iTRU1A06 +M1 iTRU1A07 +O1 iTRU1A08 +A3 iTRU1A09 +C3 iTRU1A10 +E3 iTRU1A11 +G3 iTRU1A12 +I3 iTRU1A13 +K3 iTRU1A14 +M3 iTRU1A15 +O3 iTRU1A16 +A5 iTRU1A17 +C5 iTRU1A18 +E5 iTRU1A19 +G5 iTRU1A20 +I5 iTRU1A21 +K5 iTRU1A22 +M5 iTRU1A23 +O5 iTRU1A24 +A7 iTRU1A25 +C7 iTRU1A26 +E7 iTRU1A27 +G7 iTRU1A28 +I7 iTRU1A29 +K7 iTRU1A30 +M7 iTRU1A31 +O7 iTRU1A32 +A9 iTRU1A33 +C9 iTRU1A34 +E9 iTRU1A35 +G9 iTRU1A36 +I9 iTRU1A37 +K9 iTRU1A38 +M9 iTRU1A39 +O9 iTRU1A40 +A11 iTRU1A41 +C11 iTRU1A42 +E11 iTRU1A43 +G11 iTRU1A44 +I11 iTRU1A45 +K11 iTRU1A46 +M11 iTRU1A47 +O11 iTRU1A48 +A13 iTRU1A49 +C13 iTRU1A50 +E13 iTRU1A51 +G13 iTRU1A52 +I13 iTRU1A53 +K13 iTRU1A54 +M13 iTRU1A55 +O13 iTRU1A56 +A15 iTRU1A57 +C15 iTRU1A58 +E15 iTRU1A59 +G15 iTRU1A60 +I15 iTRU1A61 +K15 iTRU1A62 +M15 iTRU1A63 +O15 iTRU1A64 +A17 iTRU1A65 +C17 iTRU1A66 +E17 iTRU1A67 +G17 iTRU1A68 +I17 iTRU1A69 +K17 iTRU1A70 +M17 iTRU1A71 +O17 iTRU1A72 +A19 iTRU1A73 +C19 iTRU1A74 +E19 iTRU1A75 +G19 iTRU1A76 +I19 iTRU1A77 +K19 iTRU1A78 +M19 iTRU1A79 +O19 iTRU1A80 +A21 iTRU1A81 +C21 iTRU1A82 +E21 iTRU1A83 +G21 iTRU1A84 +I21 iTRU1A85 +K21 iTRU1A86 +M21 iTRU1A87 +O21 iTRU1A88 +A23 iTRU1A89 +C23 iTRU1A90 +E23 iTRU1A91 +G23 iTRU1A92 +I23 iTRU1A93 +K23 iTRU1A94 +M23 iTRU1A95 +O23 iTRU1A96 +A2 iTRU3C01 +C2 iTRU3C02 +E2 iTRU3C03 +G2 iTRU3C04 +I2 iTRU3C05 +K2 iTRU3C06 +M2 iTRU3C07 +O2 iTRU3C08 +A4 iTRU3C09 +C4 iTRU3C10 +E4 iTRU3C11 +G4 iTRU3C12 +I4 iTRU3C13 +K4 iTRU3C14 +M4 iTRU3C15 +O4 iTRU3C16 +A6 iTRU3C17 +C6 iTRU3C18 +E6 iTRU3C19 +G6 iTRU3C20 +I6 iTRU3C21 +K6 iTRU3C22 +M6 iTRU3C23 +O6 iTRU3C24 +A8 iTRU3C25 +C8 iTRU3C26 +E8 iTRU3C27 +G8 iTRU3C28 +I8 iTRU3C29 +K8 iTRU3C30 +M8 iTRU3C31 +O8 iTRU3C32 +A10 iTRU3C33 +C10 iTRU3C34 +E10 iTRU3C35 +G10 iTRU3C36 +I10 iTRU3C37 +K10 iTRU3C38 +M10 iTRU3C39 +O10 iTRU3C40 +A12 iTRU3C41 +C12 iTRU3C42 +E12 iTRU3C43 +G12 iTRU3C44 +I12 iTRU3C45 +K12 iTRU3C46 +M12 iTRU3C47 +O12 iTRU3C48 +A14 iTRU3C49 +C14 iTRU3C50 +E14 iTRU3C51 +G14 iTRU3C52 +I14 iTRU3C53 +K14 iTRU3C54 +M14 iTRU3C55 +O14 iTRU3C56 +A16 iTRU3C57 +C16 iTRU3C58 +E16 iTRU3C59 +G16 iTRU3C60 +I16 iTRU3C61 +K16 iTRU3C62 +M16 iTRU3C63 +O16 iTRU3C64 +A18 iTRU3C65 +C18 iTRU3C66 +E18 iTRU3C67 +G18 iTRU3C68 +I18 iTRU3C69 +K18 iTRU3C70 +M18 iTRU3C71 +O18 iTRU3C72 +A20 iTRU3C73 +C20 iTRU3C74 +E20 iTRU3C75 +G20 iTRU3C76 +I20 iTRU3C77 +K20 iTRU3C78 +M20 iTRU3C79 +O20 iTRU3C80 +A22 iTRU3C81 +C22 iTRU3C82 +E22 iTRU3C83 +G22 iTRU3C84 +I22 iTRU3C85 +K22 iTRU3C86 +M22 iTRU3C87 +O22 iTRU3C88 +A24 iTRU3C89 +C24 iTRU3C90 +E24 iTRU3C91 +G24 iTRU3C92 +I24 iTRU3C93 +K24 iTRU3C94 +M24 iTRU3C95 +O24 iTRU3C96 +B1 iTRUE5E01 +D1 iTRUE5E02 +F1 iTRUE5E03 +H1 iTRUE5E04 +J1 iTRUE5E05 +L1 iTRUE5E06 +N1 iTRUE5E07 +P1 iTRUE5E08 +B3 iTRUE5E09 +D3 iTRUE5E10 +F3 iTRUE5E11 +H3 iTRUE5E12 +J3 iTRUE5E13 +L3 iTRUE5E14 +N3 iTRUE5E15 +P3 iTRUE5E16 +B5 iTRUE5E17 +D5 iTRUE5E18 +F5 iTRUE5E19 +H5 iTRUE5E20 +J5 iTRUE5E21 +L5 iTRUE5E22 +N5 iTRUE5E23 +P5 iTRUE5E24 +B7 iTRUE5E25 +D7 iTRUE5E26 +F7 iTRUE5E27 +H7 iTRUE5E28 +J7 iTRUE5E29 +L7 iTRUE5E30 +N7 iTRUE5E31 +P7 iTRUE5E32 +B9 iTRUE5E33 +D9 iTRUE5E34 +F9 iTRUE5E35 +H9 iTRUE5E36 +J9 iTRUE5E37 +L9 iTRUE5E38 +N9 iTRUE5E39 +P9 iTRUE5E40 +B11 iTRUE5E41 +D11 iTRUE5E42 +F11 iTRUE5E43 +H11 iTRUE5E44 +J11 iTRUE5E45 +L11 iTRUE5E46 +N11 iTRUE5E47 +P11 iTRUE5E48 +B13 iTRUE5E49 +D13 iTRUE5E50 +F13 iTRUE5E51 +H13 iTRUE5E52 +J13 iTRUE5E53 +L13 iTRUE5E54 +N13 iTRUE5E55 +P13 iTRUE5E56 +B15 iTRUE5E57 +D15 iTRUE5E58 +F15 iTRUE5E59 +H15 iTRUE5E60 +J15 iTRUE5E61 +L15 iTRUE5E62 +N15 iTRUE5E63 +P15 iTRUE5E64 +B17 iTRUE5E65 +D17 iTRUE5E66 +F17 iTRUE5E67 +H17 iTRUE5E68 +J17 iTRUE5E69 +L17 iTRUE5E70 +N17 iTRUE5E71 +P17 iTRUE5E72 +B19 iTRUE5E73 +D19 iTRUE5E74 +F19 iTRUE5E75 +H19 iTRUE5E76 +J19 iTRUE5E77 +L19 iTRUE5E78 +N19 iTRUE5E79 +P19 iTRUE5E80 +B21 iTRUE5E81 +D21 iTRUE5E82 +F21 iTRUE5E83 +H21 iTRUE5E84 +J21 iTRUE5E85 +L21 iTRUE5E86 +N21 iTRUE5E87 +P21 iTRUE5E88 +B23 iTRUE5E89 +D23 iTRUE5E90 +F23 iTRUE5E91 +H23 iTRUE5E92 +J23 iTRUE5E93 +L23 iTRUE5E94 +N23 iTRUE5E95 +P23 iTRUE5E96 +B2 iTRUE7G01 +D2 iTRUE7G02 +F2 iTRUE7G03 +H2 iTRUE7G04 +J2 iTRUE7G05 +L2 iTRUE7G06 +N2 iTRUE7G07 +P2 iTRUE7G08 +B4 iTRUE7G09 +D4 iTRUE7G10 +F4 iTRUE7G11 +H4 iTRUE7G12 +J4 iTRUE7G13 +L4 iTRUE7G14 +N4 iTRUE7G15 +P4 iTRUE7G16 +B6 iTRUE7G17 +D6 iTRUE7G18 +F6 iTRUE7G19 +H6 iTRUE7G20 +J6 iTRUE7G21 +L6 iTRUE7G22 +N6 iTRUE7G23 +P6 iTRUE7G24 +B8 iTRUE7G25 +D8 iTRUE7G26 +F8 iTRUE7G27 +H8 iTRUE7G28 +J8 iTRUE7G29 +L8 iTRUE7G30 +N8 iTRUE7G31 +P8 iTRUE7G32 +B10 iTRUE7G33 +D10 iTRUE7G34 +F10 iTRUE7G35 +H10 iTRUE7G36 +J10 iTRUE7G37 +L10 iTRUE7G38 +N10 iTRUE7G39 +P10 iTRUE7G40 +B12 iTRUE7G41 +D12 iTRUE7G42 +F12 iTRUE7G43 +H12 iTRUE7G44 +J12 iTRUE7G45 +L12 iTRUE7G46 +N12 iTRUE7G47 +P12 iTRUE7G48 +B14 iTRUE7G49 +D14 iTRUE7G50 +F14 iTRUE7G51 +H14 iTRUE7G52 +J14 iTRUE7G53 +L14 iTRUE7G54 +N14 iTRUE7G55 +P14 iTRUE7G56 +B16 iTRUE7G57 +D16 iTRUE7G58 +F16 iTRUE7G59 +H16 iTRUE7G60 +J16 iTRUE7G61 +L16 iTRUE7G62 +N16 iTRUE7G63 +P16 iTRUE7G64 +B18 iTRUE7G65 +D18 iTRUE7G66 +F18 iTRUE7G67 +H18 iTRUE7G68 +J18 iTRUE7G69 +L18 iTRUE7G70 +N18 iTRUE7G71 +P18 iTRUE7G72 +B20 iTRUE7G73 +D20 iTRUE7G74 +F20 iTRUE7G75 +H20 iTRUE7G76 +J20 iTRUE7G77 +L20 iTRUE7G78 +N20 iTRUE7G79 +P20 iTRUE7G80 +B22 iTRUE7G81 +D22 iTRUE7G82 +F22 iTRUE7G83 +H22 iTRUE7G84 +J22 iTRUE7G85 +L22 iTRUE7G86 +N22 iTRUE7G87 +P22 iTRUE7G88 +B24 iTRUE7G89 +D24 iTRUE7G90 +F24 iTRUE7G91 +H24 iTRUE7G92 +J24 iTRUE7G93 +L24 iTRUE7G94 +N24 iTRUE7G95 +P24 iTRUE7G96 \ No newline at end of file diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 3ee5a85..4e1f0ba 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -125,47 +125,69 @@ class HandleInput: Returns: _type_: _description_ """ - complete_df_list = list() + from pprint import pprint + from collections import Counter - # List of folders/files to not consider (restrict to samples only) - l = sorted( - [ - e - for e in os.listdir( - "{genecore_prefix}/{date_folder}".format( - genecore_prefix=config["genecore_prefix"], - date_folder=config["genecore_date_folder"], - ) - ) - if e.endswith(".txt.gz") - ] + directory_path = f"{config['genecore_prefix']}/{config['genecore_date_folder']}" + + l = sorted([e for e in os.listdir(directory_path) if e.endswith(".txt.gz")]) + + complete_df_list = list() + # print(thisdir) + genecore_prefix = config["genecore_prefix"] + date_folder = config["genecore_date_folder"] + # print(f"{genecore_prefix}/{date_folder}") + + # Pattern to extract sample name and index + pattern = re.compile(r"(.*_lane1)(.*?)(iTRU|PE20)(.*?)(\d{2})(?:_1_|_2_)") + + samples = list() + prefixes = list() + indexes = list() + plate_types = list() + d_master = collections.defaultdict( + lambda: { + "indexes": set(), + "file_prefix": "", + "plate_type": "", + "index_pattern": "", + } ) - # Create a list of files to process for each sample - d_master = collections.defaultdict(dict) - sub_l = list() - for j, e in enumerate(l): - # print(j,e) - sub_l.append(e) - if (j + 1) % 192 == 0: - common_element = findstem(sub_l) - l_elems = common_element.split("lane1") - prefix = l_elems[0] - sample = l_elems[1].split( - "{regex_element}".format( - regex_element=config["genecore_regex_element"] - ) - )[0] - index = l_elems[1].split( - "{regex_element}".format( - regex_element=config["genecore_regex_element"] + # First pass: Count occurrences of each sample_name + file_counts_per_sample = Counter() + for file_path in l: + match = pattern.search(file_path) + if match: + sample_name = match.group(2) + file_counts_per_sample[sample_name] += 1 + + # Second pass: Process files and determine plate type per sample + for j, file_path in enumerate(sorted(l)): + match = pattern.search(file_path) + if match: + sample_name = match.group(2) + index = match.group(4) + indexes.append(index) + d_master[sample_name]["indexes"].add(index) + file_count = file_counts_per_sample[sample_name] + + # Determine plate type using modulo 96 operation + if file_count % 96 != 0: + raise ValueError( + f"Invalid file count for sample {sample_name} with file count {file_count}. Must be a multiple of 96." ) - )[1] - sub_l = list() + plate_type = int(file_count / 2) + + if (j + 1) % file_count == 0: + prefixes.append(match.group(3)) + d_master[sample_name]["file_prefix"] = match.group(1) + d_master[sample_name]["index_pattern"] = match.group(3) + plate = directory_path.split("/")[-1] + samples.append(sample_name) + plate_types.append(plate_type) + d_master[sample_name]["plate_type"] = plate_type - d_master[sample]["prefix"] = prefix - d_master[sample]["index"] = index - d_master[sample]["common_element"] = common_element samples_to_process = ( config["samples_to_process"] if len(config["samples_to_process"]) > 0 @@ -182,8 +204,8 @@ class HandleInput: "{data_location}/{sample}/fastq/{sample}{regex_element}{index}{cell_nb}.{pair}.fastq.gz", data_location=config["data_location"], sample=sample, - regex_element=config["genecore_regex_element"], - index=d_master[sample]["index"], + regex_element=d_master[sample]["index_pattern"], + index=d_master[sample]["indexes"], cell_nb=[str(e).zfill(2) for e in list(range(1, 97))], pair=["1", "2"], ) @@ -191,7 +213,8 @@ class HandleInput: if sample in samples_to_process ] genecore_list = [sub_e for e in genecore_list for sub_e in e] - # pprint(genecore_list) + # pprint(d_master) + complete_df_list = list() for sample in d_master: @@ -210,11 +233,12 @@ class HandleInput: df["Full_path"] = df[["Folder", "File"]].apply( lambda r: f"{r['Folder']}/{r['File']}.fastq.gz", axis=1 ) + df["Genecore_path"] = df["File"].apply( - lambda r: f"{config['genecore_prefix']}/{config['genecore_date_folder']}/{d_master[sample]['prefix']}lane1{r.replace('.', '_')}_sequence.txt.gz" + lambda r: f"{config['genecore_prefix']}/{config['genecore_date_folder']}/{d_master[sample]['file_prefix']}{r.replace('.', '_')}_sequence.txt.gz" ) df["Genecore_file"] = df["File"].apply( - lambda r: f"{d_master[sample]['prefix']}lane1{r.replace('.', '_')}" + lambda r: f"{d_master[sample]['file_prefix']}{r.replace('.', '_')}" ) df["Genecore_file"] = df["Genecore_file"].apply( lambda r: "_".join(r.split("_")[:-1]) @@ -229,6 +253,7 @@ class HandleInput: drop=True ) pd.options.display.max_colwidth = 200 + # print(complete_df) return complete_df, d_master @@ -314,6 +339,7 @@ class HandleInput: complete_df = complete_df.sort_values(by=["Cell", "File"]).reset_index( drop=True ) + return complete_df @@ -390,7 +416,7 @@ plottype_counts = ( # print(plottype_counts) -def get_final_output(): +def get_final_output(wildcards): """ Function called by snakemake rule all to run the pipeline """ @@ -403,7 +429,7 @@ def get_final_output(): expand( "{path}/{sample}/multiqc/multiqc_report/multiqc_report.html", path=config["data_location"], - sample=samples, + sample=wildcards.sample, ), ) @@ -415,7 +441,7 @@ def get_final_output(): expand( "{path}/{sample}/cell_selection/labels.tsv", path=config["data_location"], - sample=samples, + sample=wildcards.sample, ) ) @@ -425,14 +451,14 @@ def get_final_output(): expand( "{output_folder}/{sample}/plots/counts/CountComplete.{plottype_counts}.pdf", output_folder=config["data_location"], - sample=samples, + sample=wildcards.sample, plottype_counts=plottype_counts, ), ) # Plate plots - for sample in samples: + for sample in [wildcards.sample]: if len(cell_per_sample[sample]) in [96, 384]: final_list.extend( [ @@ -452,17 +478,43 @@ def get_final_output(): if config["publishdir"] != "": final_list.extend( expand( - "{folder}/config/publishdir_outputs.ok", + "{folder}/{sample}/config/publishdir_outputs.ok", folder=config["data_location"], - sample=samples, + sample=wildcards.sample, ) ) - # print(final_list) + # Config section + + final_list.extend( + expand( + "{folder}/{sample}/config/config_ashleys.yaml", + folder=config["data_location"], + sample=wildcards.sample, + ), + ) + return final_list -def publishdir_fct(): +def get_final_result(): + """ + Input function of the pipeline, will retrieve all 'end' outputs + """ + final_list = list() + + final_list.extend( + expand( + "{folder}/{sample}/config/ashleys_final_results.ok", + folder=config["data_location"], + sample=samples, + ) + ) + + return final_list + + +def publishdir_fct(wildcards): """ Restricted for ASHLEYS at the moment Backup files on a secondary location @@ -472,17 +524,18 @@ def publishdir_fct(): "{folder}/{sample}/cell_selection/labels.tsv", "{folder}/{sample}/counts/{sample}.info_raw", "{folder}/{sample}/counts/{sample}.txt.raw.gz", - "config/config.yaml", + "{folder}/{sample}/config/config_ashleys.yaml", ] final_list = [ - expand(e, folder=config["data_location"], sample=samples) + expand(e, folder=config["data_location"], sample=wildcards.sample) for e in list_files_to_copy ] + final_list = [sub_e for e in final_list for sub_e in e] final_list.extend( expand( "{folder}/{sample}/plots/counts/CountComplete.{plottype_counts}.pdf", folder=config["data_location"], - sample=samples, + sample=wildcards.sample, plottype_counts=plottype_counts, ) ) @@ -492,7 +545,7 @@ def publishdir_fct(): expand( "{folder}/{sample}/plots/plate/ashleys_plate_{plate_plot}.pdf", folder=config["data_location"], - sample=samples, + sample=wildcards.sample, plate_plot=["predictions", "probabilities"], ) ) @@ -500,14 +553,14 @@ def publishdir_fct(): expand( "{folder}/{sample}/cell_selection/labels_positive_control_corrected.tsv", folder=config["data_location"], - sample=samples, + sample=wildcards.sample, ) ) final_list.extend( expand( "{folder}/{sample}/config/bypass_cell.txt", folder=config["data_location"], - sample=samples, + sample=wildcards.sample, ) ) diff --git a/workflow/rules/multiqc.smk b/workflow/rules/multiqc.smk index 0ffb49a..7825a8a 100644 --- a/workflow/rules/multiqc.smk +++ b/workflow/rules/multiqc.smk @@ -9,7 +9,7 @@ rule fastqc: subcategory="{sample}", labels={"Sample": "{sample}", "Cell": "{cell}", "Pair": "{pair}"}, ), - zip="{folder}/{sample}/fastqc/{cell}_{pair}_fastqc.zip", + zip="{folder}/{sample}/multiqc/fastqc/{cell}_{pair}_fastqc.zip", params: "--quiet", log: @@ -137,6 +137,8 @@ rule multiqc: ), log: "{folder}/{sample}/log/multiqc/{sample}.log", + resources: + mem_mb=get_mem_mb_heavy, params: multiqc_input=lambda wc, input: "{abs_path}".format( abs_path=config["abs_path"] diff --git a/workflow/rules/rules.smk b/workflow/rules/rules.smk index 2ba5e2e..144ae97 100644 --- a/workflow/rules/rules.smk +++ b/workflow/rules/rules.smk @@ -15,6 +15,7 @@ if config["genecore"] is True and config["genecore_date_folder"]: localrules: genecore_symlink, + symlink_bam_ashleys, rule genecore_symlink: input: @@ -29,11 +30,12 @@ if config["genecore"] is True and config["genecore_date_folder"]: log: "{folder}/log/genecore_symlink/{sample}/{cell}_{pair}.log", shell: - "ln -s {input} {output}" + "ln -f -s {input} {output}" ruleorder: genecore_symlink > bwa_strandseq_to_reference_alignment +# if config["use_light_data"] is False: localrules: symlink_bam_ashleys, @@ -134,7 +136,8 @@ rule mark_duplicates: conda: "../envs/ashleys_base.yaml" resources: - mem_mb=get_mem_mb, + mem_mb=get_mem_mb_heavy, + # partition="bigmem", time="10:00:00", shell: "sambamba markdup {input.bam} {output} 2>&1 > {log}" @@ -300,6 +303,7 @@ if config["use_light_data"] is False: subcategory="{sample}", labels={"Sample": "{sample}", "Plot Type": "Probabilities"}, ), + well_table="{folder}/{sample}/plots/plate/ashleys_well_table.tsv", log: "{folder}/log/plot_plate/{sample}.log", conda: @@ -326,12 +330,40 @@ if config["publishdir"] != "": rule publishdir_outputs_ashleys: input: - list_publishdir=publishdir_fct(), + list_publishdir=publishdir_fct, output: - touch("{folder}/config/publishdir_outputs.ok"), + touch("{folder}/{sample}/config/publishdir_outputs.ok"), log: - "{folder}/log/publishdir_outputs/publishdir_outputs.log", + "{folder}/log/publishdir_outputs/{sample}.log", conda: "../envs/ashleys_base.yaml" script: "../scripts/utils/publishdir.py" + + +rule save_config: + input: + "config/config.yaml", + output: + "{folder}/{sample}/config/config_ashleys.yaml", + log: + "{folder}/log/save_config/{sample}.log", + conda: + "../envs/ashleys_base.yaml" + resources: + mem_mb=get_mem_mb, + script: + "../scripts/utils/dump_config.py" + + +rule ashleys_final_results: + input: + get_final_output, + output: + "{folder}/{sample}/config/ashleys_final_results.ok", + log: + "{folder}/log/ashleys_final_results/{sample}.log", + conda: + "../envs/ashleys_base.yaml" + shell: + "touch {output}" diff --git a/workflow/scripts/plotting/plot_plate.R b/workflow/scripts/plotting/plot_plate.R index 69fb05d..c2dbd81 100644 --- a/workflow/scripts/plotting/plot_plate.R +++ b/workflow/scripts/plotting/plot_plate.R @@ -62,3 +62,5 @@ raw_map( ggtitle(paste0("Sample: ", snakemake@wildcards[["sample"]], " | ASHLEYS probabilities")) dev.off() + +write.table(ashleys_data, file = snakemake@output[["well_table"]], sep = "\t", row.names = FALSE, quote = FALSE) diff --git a/workflow/scripts/plotting/plot_plate_dev.R b/workflow/scripts/plotting/plot_plate_dev.R new file mode 100644 index 0000000..ae2bf1f --- /dev/null +++ b/workflow/scripts/plotting/plot_plate_dev.R @@ -0,0 +1,109 @@ +library(platetools) +library(ggplot2) +library(viridis) +library(dplyr) +library(stringr) + + +# df <- data.frame(vals = rnorm(384), +# well = num_to_well(1:384, plate = 384)) + +# print(df) +# stop() + +args <- commandArgs(trailingOnly = T) + +prefix = args[1] + +## collect ASHLEYS prediction and count files +ashleys_data <- read.table(file = paste0(prefix, "/cell_selection/labels.tsv"), sep = "\t", header = TRUE) +# ashleys_data <- read.table(file = snakemake@input[["labels"]], sep = "\t", header = TRUE) +plate_type <- nrow((ashleys_data)) +ashleys_data <- dplyr::arrange(ashleys_data, cell) +colnames(ashleys_data)[1] <- "ashleys_id" + +corr_table_path = "workflow/data/plotting/384_1A3C5E7G_correspondance_table.tsv" +corr_table <- read.table(corr_table_path, header = TRUE, sep = "\t") +sample <- basename(prefix) + + +# Apply regex and extract groups +ashleys_data <- ashleys_data %>% + mutate( + index = str_extract(ashleys_id, "(iTRU|PE20)[A-Za-z0-9]{4,5}") + ) + + +# View the result + +# Well_position <- character() + +# if (plate_type == 96) { +# for (i in 1:12) +# { +# for (j in 1:8) +# { +# tmp <- paste0(LETTERS[j], i) +# Well_position <- c(Well_position, tmp) +# } +# } +# } else if (plate_type == 384) { +# for (i in 1:24) +# { +# for (j in 1:16) +# { +# tmp <- paste0(LETTERS[j], i) +# Well_position <- c(Well_position, tmp) +# } +# } +# } + + +print(corr_table) +print(ashleys_data) +ashleys_data <- merge(ashleys_data, corr_table, by.x = "index", by.y = "index", all.x = TRUE) + +write.table(ashleys_data, file = paste0(prefix, "/plots/plate/ashleys_well_table.tsv"), sep = "\t", row.names = FALSE, quote = FALSE) + + +ashleys_data <- ashleys_data %>% + mutate( + Well_row = str_extract(Well_position, "[A-Za-z]+"), + Well_col = as.integer(str_extract(Well_position, "\\d+")) + ) %>% + arrange(Well_row, Well_col) + + +print(ashleys_data) + + +pdf(paste0(prefix, "/plots/plate/ashleys_plate_predictions.pdf")) +# pdf(snakemake@output[["predictions"]]) + + +raw_map( + data = ashleys_data$prediction, + well = ashleys_data$Well_position, + plate = plate_type +) + + scale_fill_distiller(type = "div", palette = "RdYlGn", direction = 1) + + # ggtitle(paste0("Sample: TEST | ASHLEYS binary predictions (cutoff=0.5)")) + ggtitle(paste0("Sample: ", sample, " | ASHLEYS binary predictions (cutoff=", 0.5, ")")) + +dev.off() + +# pdf("TEST_ashleys_plate_probabilities.pdf") +pdf(paste0(prefix, "/plots/plate/ashleys_plate_probabilities.pdf")) + +# pdf(snakemake@output[["probabilities"]]) + +raw_map( + data = ashleys_data$probability, + well = ashleys_data$Well_position, + plate = plate_type +) + + scale_fill_distiller(type = "div", palette = "RdYlGn", direction = 1) + + # ggtitle(paste0("Sample: ", "TEST", " | ASHLEYS probabilities")) + ggtitle(paste0("Sample: ", sample, " | ASHLEYS probabilities")) + +dev.off() \ No newline at end of file diff --git a/workflow/scripts/utils/dump_config.py b/workflow/scripts/utils/dump_config.py new file mode 100644 index 0000000..6b299ee --- /dev/null +++ b/workflow/scripts/utils/dump_config.py @@ -0,0 +1,22 @@ +import yaml + + +def update_config(input_file, output_file): + # Load the existing config file + with open(input_file, "r") as file: + flat_file_config = yaml.safe_load(file) + + # Update the config with Snakemake parameters + for key, value in snakemake.config.items(): + flat_file_config[key] = value + + # Save the updated config to the output file + with open(output_file, "w") as file: + yaml.dump(flat_file_config, file) + + +if __name__ == "__main__": + input_config = snakemake.input[0] + output_config = snakemake.output[0] + + update_config(input_config, output_config) From ad9ec3ecc25ab3467c713ef2d33e94acfdf40f0a Mon Sep 17 00:00:00 2001 From: Thomas Weber Date: Mon, 4 Dec 2023 09:29:25 +0000 Subject: [PATCH 02/12] Final results file to check if all outputs were produced, dump config update, plot_plate_dev for 384 well plate correspondance --- workflow/rules/gc.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/rules/gc.smk b/workflow/rules/gc.smk index 85f38bf..854481f 100644 --- a/workflow/rules/gc.smk +++ b/workflow/rules/gc.smk @@ -78,7 +78,7 @@ if config["multistep_normalisation"] is True and config["window"] == 200000: output: "{folder}/{sample}/counts/multistep_normalisation/{sample}.txt.scaled.GC.VST.reformat.gz", log: - "{folder}/{sample}/log/reformat_ms_norm/{sample}.log" + "{folder}/{sample}/log/reformat_ms_norm/{sample}.log", conda: "../envs/ashleys_base.yaml" resources: From 947b82ce783cdb1a3a321606fe5f15be5d4db6ec Mon Sep 17 00:00:00 2001 From: Thomas Weber Date: Mon, 4 Dec 2023 09:31:48 +0000 Subject: [PATCH 03/12] Update github CI/CD --- .github/workflows/main.yaml | 41 +++---------------------------------- 1 file changed, 3 insertions(+), 38 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 2bceafa..e47052f 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -2,15 +2,11 @@ name: ashleys-qc-pipeline workflow checks on: schedule: - # Run every Sunday at 00:00 UTC on the master branch - - cron: '0 0 * * 0' - # branches: - # - master + - cron: "0 0 * * 0" push: branches: - - '*' - - '!master' - + - "*" + - "!master" jobs: # WORK @@ -36,13 +32,9 @@ jobs: with: directory: . snakefile: ./workflow/Snakefile - # stagein: "mamba env remove -n snakemake && mamba create -y -n snakemake -c conda-forge -c bioconda unzip snakemake pandas pysam tqdm imagemagick && source activate snakemake && ls -l && pwd" args: "--lint --config ashleys_pipeline=True" Testing_ashleys: runs-on: ubuntu-latest - # needs: - # - Linting - # - Formatting steps: - uses: actions/checkout@v3.3.0 - name: Testing data @@ -61,9 +53,6 @@ jobs: Testing_ashleys_fastqc_enabled: runs-on: ubuntu-latest - # needs: - # - Linting - # - Formatting steps: - uses: actions/checkout@v3.3.0 - name: Testing data @@ -82,9 +71,6 @@ jobs: Testing_ashleys_ms_norm_enabled: runs-on: ubuntu-latest - # needs: - # - Linting - # - Formatting steps: - uses: actions/checkout@v3.3.0 - name: Testing data @@ -102,9 +88,6 @@ jobs: args: "--cores 1 --use-conda --configfile .tests/config/simple_config.yaml --config multistep_normalisation=True --conda-frontend mamba --report report.zip" Testing_ashleys_hg38: runs-on: ubuntu-latest - # needs: - # - Linting - # - Formatting steps: - uses: actions/checkout@v3.3.0 - name: Testing data @@ -122,9 +105,6 @@ jobs: args: "--cores 1 --use-conda --config reference=hg38 use_light_data=True chromosomes=[chr17] --conda-frontend mamba --report report.zip" Testing_ashleys_hg19: runs-on: ubuntu-latest - # needs: - # - Linting - # - Formatting steps: - uses: actions/checkout@v3.3.0 - name: Testing data @@ -142,9 +122,6 @@ jobs: args: "--cores 1 --use-conda --config reference=hg19 use_light_data=True chromosomes=[chr17] --conda-frontend mamba --report report.zip" Testing_ashleys_T2T: runs-on: ubuntu-latest - # needs: - # - Linting - # - Formatting steps: - uses: actions/checkout@v3.3.0 - name: Testing data @@ -162,9 +139,6 @@ jobs: args: "--cores 1 --use-conda --config reference=T2T use_light_data=True chromosomes=[chr17] --conda-frontend mamba --report report.zip" Testing_ashleys_mm10: runs-on: ubuntu-latest - # needs: - # - Linting - # - Formatting steps: - uses: actions/checkout@v3.3.0 - name: Testing data @@ -182,9 +156,6 @@ jobs: args: "--cores 1 --use-conda --config reference=mm10 use_light_data=True chromosomes=[chr17] --conda-frontend mamba --report report.zip" Testing_jub_nb: runs-on: ubuntu-latest - # needs: - # - Linting - # - Formatting steps: - uses: actions/checkout@v3.3.0 - name: Testing data @@ -202,9 +173,6 @@ jobs: args: "--cores 1 --use-conda --configfile .tests/config/simple_config.yaml --config hand_selection=True --conda-frontend mamba --report report.zip" Testing_publishdir: runs-on: ubuntu-latest - # needs: - # - Linting - # - Formatting steps: - uses: actions/checkout@v3.3.0 - name: Testing data @@ -222,9 +190,6 @@ jobs: args: "--cores 1 --use-conda --configfile .tests/config/simple_config.yaml --config publishdir=.tests/data_chr17_publishdir --conda-frontend mamba --report report.zip" Testing_list_commands: runs-on: ubuntu-latest - # needs: - # - Linting - # - Formatting steps: - uses: actions/checkout@v3.3.0 - name: Testing data From 5c447f6c2ffca8fa0154c7326e1af77faefaca4f Mon Sep 17 00:00:00 2001 From: Thomas Weber Date: Mon, 4 Dec 2023 09:32:10 +0000 Subject: [PATCH 04/12] Update github CI/CD --- .github/workflows/main.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index e47052f..888f891 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -6,6 +6,7 @@ on: push: branches: - "*" + - "dev" - "!master" jobs: From 840781d4333a7055841863913a9ba14509c5524f Mon Sep 17 00:00:00 2001 From: Thomas Weber Date: Mon, 4 Dec 2023 09:33:05 +0000 Subject: [PATCH 05/12] Update github CI/CD --- .github/workflows/main.yaml | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 888f891..c760916 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -1,8 +1,8 @@ name: ashleys-qc-pipeline workflow checks on: - schedule: - - cron: "0 0 * * 0" + # schedule: + # - cron: "0 0 * * 0" push: branches: - "*" @@ -14,7 +14,7 @@ jobs: Formatting: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3.3.0 + - uses: actions/checkout@v4 - name: Formatting uses: github/super-linter@v4 @@ -27,7 +27,7 @@ jobs: Linting: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3.3.0 + - uses: actions/checkout@v4 - name: Linting uses: snakemake/snakemake-github-action@v1.24.0 with: @@ -37,7 +37,7 @@ jobs: Testing_ashleys: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3.3.0 + - uses: actions/checkout@v4 - name: Testing data uses: snakemake/snakemake-github-action@v1.24.0 with: @@ -55,7 +55,7 @@ jobs: Testing_ashleys_fastqc_enabled: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3.3.0 + - uses: actions/checkout@v4 - name: Testing data uses: snakemake/snakemake-github-action@v1.24.0 with: @@ -73,7 +73,7 @@ jobs: Testing_ashleys_ms_norm_enabled: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3.3.0 + - uses: actions/checkout@v4 - name: Testing data uses: snakemake/snakemake-github-action@v1.24.0 with: @@ -90,7 +90,7 @@ jobs: Testing_ashleys_hg38: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3.3.0 + - uses: actions/checkout@v4 - name: Testing data uses: snakemake/snakemake-github-action@v1.24.0 with: @@ -107,7 +107,7 @@ jobs: Testing_ashleys_hg19: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3.3.0 + - uses: actions/checkout@v4 - name: Testing data uses: snakemake/snakemake-github-action@v1.24.0 with: @@ -124,7 +124,7 @@ jobs: Testing_ashleys_T2T: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3.3.0 + - uses: actions/checkout@v4 - name: Testing data uses: snakemake/snakemake-github-action@v1.24.0 with: @@ -141,7 +141,7 @@ jobs: Testing_ashleys_mm10: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3.3.0 + - uses: actions/checkout@v4 - name: Testing data uses: snakemake/snakemake-github-action@v1.24.0 with: @@ -158,7 +158,7 @@ jobs: Testing_jub_nb: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3.3.0 + - uses: actions/checkout@v4 - name: Testing data uses: snakemake/snakemake-github-action@v1.24.0 with: @@ -175,7 +175,7 @@ jobs: Testing_publishdir: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3.3.0 + - uses: actions/checkout@v4 - name: Testing data uses: snakemake/snakemake-github-action@v1.24.0 with: @@ -192,7 +192,7 @@ jobs: Testing_list_commands: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3.3.0 + - uses: actions/checkout@v4 - name: Testing data uses: snakemake/snakemake-github-action@v1.24.0 with: From dc754e63ed08927c195bcc044ddf2b569ac48321 Mon Sep 17 00:00:00 2001 From: Thomas Weber Date: Mon, 4 Dec 2023 09:35:02 +0000 Subject: [PATCH 06/12] Update github CI/CD --- .github/workflows/main.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index c760916..a458c0d 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -1,12 +1,11 @@ name: ashleys-qc-pipeline workflow checks on: - # schedule: - # - cron: "0 0 * * 0" + schedule: + - cron: "0 0 * * 0" push: branches: - - "*" - - "dev" + - "**" - "!master" jobs: From d642639821e6985c8a107d69fdd100ea693a0252 Mon Sep 17 00:00:00 2001 From: Thomas Weber Date: Mon, 4 Dec 2023 10:44:57 +0000 Subject: [PATCH 07/12] Update github CI/CD --- .github/workflows/main.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index a458c0d..e88cc9b 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -1,8 +1,8 @@ name: ashleys-qc-pipeline workflow checks on: - schedule: - - cron: "0 0 * * 0" + # schedule: + # - cron: "0 0 * * 0" push: branches: - "**" From f33ef1de5e6ca6c52ce730835a093b9623e88e93 Mon Sep 17 00:00:00 2001 From: Thomas Weber Date: Mon, 4 Dec 2023 10:49:21 +0000 Subject: [PATCH 08/12] Update github CI/CD --- .github/workflows/action_test.yaml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 .github/workflows/action_test.yaml diff --git a/.github/workflows/action_test.yaml b/.github/workflows/action_test.yaml new file mode 100644 index 0000000..8d8fe4f --- /dev/null +++ b/.github/workflows/action_test.yaml @@ -0,0 +1,22 @@ +name: Example Workflow + +on: + push: + branches: + - "**" + +jobs: + hello_world_job: + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v3 + + - name: Run a one-line script + run: echo "Hello, world! My first GitHub Actions workflow." + + - name: Run a multi-line script + run: | + echo "This is a multi-line script." + echo "You can add more commands here." + echo "Each command is run in the shell." From c0fe9899ceeecacbc29207fe3c23207a35cc9ffb Mon Sep 17 00:00:00 2001 From: Thomas Weber Date: Mon, 4 Dec 2023 10:51:54 +0000 Subject: [PATCH 09/12] Update github CI/CD --- .github/workflows/main.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index e88cc9b..7b63a99 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -1,11 +1,14 @@ name: ashleys-qc-pipeline workflow checks on: - # schedule: - # - cron: "0 0 * * 0" + schedule: + # Run every Sunday at 00:00 UTC on the master branch + - cron: "0 0 * * 0" + # branches: + # - master push: branches: - - "**" + - "*" - "!master" jobs: From c8d1d04c6c546d03bc65ff4775840afda589e02c Mon Sep 17 00:00:00 2001 From: Thomas Weber Date: Mon, 4 Dec 2023 11:00:45 +0000 Subject: [PATCH 10/12] Update github CI/CD --- .github/workflows/{main.yaml => main_test.yaml} | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) rename .github/workflows/{main.yaml => main_test.yaml} (98%) diff --git a/.github/workflows/main.yaml b/.github/workflows/main_test.yaml similarity index 98% rename from .github/workflows/main.yaml rename to .github/workflows/main_test.yaml index 7b63a99..c10e18c 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main_test.yaml @@ -1,15 +1,9 @@ name: ashleys-qc-pipeline workflow checks on: - schedule: - # Run every Sunday at 00:00 UTC on the master branch - - cron: "0 0 * * 0" - # branches: - # - master push: branches: - - "*" - - "!master" + - "**" jobs: # WORK From 7383b815d6ff5feb1bb924c5ee8ade3aa21e633c Mon Sep 17 00:00:00 2001 From: Thomas Weber Date: Mon, 4 Dec 2023 12:34:33 +0000 Subject: [PATCH 11/12] Update github CI/CD --- .tests/config/simple_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.tests/config/simple_config.yaml b/.tests/config/simple_config.yaml index ecd378f..869dd5c 100644 --- a/.tests/config/simple_config.yaml +++ b/.tests/config/simple_config.yaml @@ -92,7 +92,7 @@ genecore_regex_element: "PE20" mosaicatcher_pipeline: False # Overwrite ASHLEYS PREDICTIONS for GitHub & smoke dataset purpose -use_light_data: False +use_light_data: True # For snakemake linting abs_path: "/" From 202ad00b4fedbac69c5226eb5b1aa2f3c5a7002e Mon Sep 17 00:00:00 2001 From: Thomas Weber Date: Mon, 4 Dec 2023 13:55:17 +0000 Subject: [PATCH 12/12] Dockerfile --- .github/workflows/action_test.yaml | 22 ---- .../Dockerfile-2.2.3.dockerfile | 118 ++++++++++++++++++ workflow/rules/rules.smk | 1 - 3 files changed, 118 insertions(+), 23 deletions(-) delete mode 100644 .github/workflows/action_test.yaml create mode 100644 github-actions-runner/Dockerfile-2.2.3.dockerfile diff --git a/.github/workflows/action_test.yaml b/.github/workflows/action_test.yaml deleted file mode 100644 index 8d8fe4f..0000000 --- a/.github/workflows/action_test.yaml +++ /dev/null @@ -1,22 +0,0 @@ -name: Example Workflow - -on: - push: - branches: - - "**" - -jobs: - hello_world_job: - runs-on: ubuntu-latest - steps: - - name: Checkout Repository - uses: actions/checkout@v3 - - - name: Run a one-line script - run: echo "Hello, world! My first GitHub Actions workflow." - - - name: Run a multi-line script - run: | - echo "This is a multi-line script." - echo "You can add more commands here." - echo "Each command is run in the shell." diff --git a/github-actions-runner/Dockerfile-2.2.3.dockerfile b/github-actions-runner/Dockerfile-2.2.3.dockerfile new file mode 100644 index 0000000..eba6242 --- /dev/null +++ b/github-actions-runner/Dockerfile-2.2.3.dockerfile @@ -0,0 +1,118 @@ +FROM condaforge/mambaforge:latest +LABEL io.github.snakemake.containerized="true" +LABEL io.github.snakemake.conda_env_hash="e9bc3082704cbf20eaa004e1360fb45da1359e3288296fb66dfad7e245e22563" + +# Step 1: Retrieve conda environments + +# Conda environment: +# source: https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/bwa/index/environment.yaml +# prefix: /conda-envs/5681728a49bd83ceed09ba194330c858 +# channels: +# - bioconda +# - conda-forge +# - defaults +# dependencies: +# - bwa ==0.7.17 +RUN mkdir -p /conda-envs/5681728a49bd83ceed09ba194330c858 +ADD https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/bwa/index/environment.yaml /conda-envs/5681728a49bd83ceed09ba194330c858/environment.yaml + +# Conda environment: +# source: https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/fastqc/environment.yaml +# prefix: /conda-envs/08d4368302a4bdf7eda6b536495efe7d +# channels: +# - bioconda +# - conda-forge +# - defaults +# dependencies: +# - fastqc ==0.11.9 +RUN mkdir -p /conda-envs/08d4368302a4bdf7eda6b536495efe7d +ADD https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/fastqc/environment.yaml /conda-envs/08d4368302a4bdf7eda6b536495efe7d/environment.yaml + +# Conda environment: +# source: workflow/envs/ashleys_base.yaml +# prefix: /conda-envs/87c04f5d115eff742eca84455513deba +# name: ashleys_base +# channels: +# - conda-forge +# - bioconda +# dependencies: +# - samtools +# - tabix +# - bwa +# - sambamba +# - mosaicatcher +# # - alfred +# - ashleys-qc +# - pandas +# # PUBLISHDIR +# - rsync +# # MULTIQC +# - multiqc +# # Fix sklearn update +# - scikit-learn=1.2.2 +RUN mkdir -p /conda-envs/87c04f5d115eff742eca84455513deba +COPY workflow/envs/ashleys_base.yaml /conda-envs/87c04f5d115eff742eca84455513deba/environment.yaml + +# Conda environment: +# source: workflow/envs/ashleys_rtools.yaml +# prefix: /conda-envs/9b847fc31baae8e01dfb7ce438a56b71 +# name: rtools +# channels: +# - conda-forge +# - bioconda +# - r +# - anaconda +# dependencies: +# # - bioconductor-biocparallel +# # - bioconductor-bsgenome +# # - bioconductor-bsgenome.hsapiens.ucsc.hg19 +# # - bioconductor-bsgenome.hsapiens.ucsc.hg38 +# # - bioconductor-fastseg +# # - bioconductor-genomicalignments +# - bioconductor-genomicranges +# # - bioconductor-rsamtools +# # - bioconductor-s4vectors +# - r-assertthat +# - r-base +# # - r-biocmanager +# - r-cowplot +# - r-data.table +# # - r-devtools +# # - r-doparallel +# # - r-foreach +# - r-ggplot2 +# # - r-gtools +# - r-reshape2 +# # - r-zoo +# # - r-dplyr +# # - r-mc2d +# # - r-pheatmap +# # - bioconductor-complexheatmap +# # - r-gplots +# - r-scales +# - r-rcolorbrewer +# # - r-stringr +# - r-cairo +# - fonts-anaconda +# # NEW +# - bioconductor-edger +# - r-r.utils +# # PLATE PLOT +# - r-dplyr +# - r-platetools +# - r-viridis +# # GC_correction +# - r-tidyr +# - r-ggpubr +# # SOLVE R lib issue +# - r-stringi=1.7.12 +RUN mkdir -p /conda-envs/9b847fc31baae8e01dfb7ce438a56b71 +COPY workflow/envs/ashleys_rtools.yaml /conda-envs/9b847fc31baae8e01dfb7ce438a56b71/environment.yaml + +# Step 2: Generate conda environments + +RUN mamba env create --prefix /conda-envs/5681728a49bd83ceed09ba194330c858 --file /conda-envs/5681728a49bd83ceed09ba194330c858/environment.yaml && \ + mamba env create --prefix /conda-envs/08d4368302a4bdf7eda6b536495efe7d --file /conda-envs/08d4368302a4bdf7eda6b536495efe7d/environment.yaml && \ + mamba env create --prefix /conda-envs/87c04f5d115eff742eca84455513deba --file /conda-envs/87c04f5d115eff742eca84455513deba/environment.yaml && \ + mamba env create --prefix /conda-envs/9b847fc31baae8e01dfb7ce438a56b71 --file /conda-envs/9b847fc31baae8e01dfb7ce438a56b71/environment.yaml && \ + mamba clean --all -y diff --git a/workflow/rules/rules.smk b/workflow/rules/rules.smk index 144ae97..f8f6ecc 100644 --- a/workflow/rules/rules.smk +++ b/workflow/rules/rules.smk @@ -137,7 +137,6 @@ rule mark_duplicates: "../envs/ashleys_base.yaml" resources: mem_mb=get_mem_mb_heavy, - # partition="bigmem", time="10:00:00", shell: "sambamba markdup {input.bam} {output} 2>&1 > {log}"