diff --git a/envs/dummy_env.yaml b/envs/dummy_env.yaml deleted file mode 100644 index e69de29..0000000 diff --git a/rules/align.smk b/rules/align.smk deleted file mode 100644 index 70e09e1..0000000 --- a/rules/align.smk +++ /dev/null @@ -1,90 +0,0 @@ -# if config['bsjfa']: -# BSJFA=config['bsjfa'] -# else: -# BSJFA=config['bsjfadefault'] -# check_readaccess(BSJFA) -PRIMERINFO=config['primertsv'] -check_readaccess(PRIMERINFO) -REFFA=config['reffa'] -check_readaccess(REFFA) - -rule create_primerbed: - input: - primertsv=PRIMERINFO - output: - primerbed=join(WORKDIR,"bowtie_index","primers.bed") - params: - script=join(SCRIPTSDIR,"build_primer_bed.py") - envmodules: TOOLS["python37"]["version"] - shell:""" -python {params.script} {input.primertsv} > {output.primerbed} -""" - -rule create_bsjfa: - input: - bed=rules.create_primerbed.output.primerbed, - reffa=REFFA - output: - bsjfa=join(WORKDIR,"bowtie_index","bsj.fa") - params: - script=join(SCRIPTSDIR,"generate_bsj_fasta_from_primer_bed.py") - container: "docker://nciccbr/ccbr_htseq_0.13.5:latest" - shell:""" -python {params.script} \ ---bed {input.bed} \ ---reffa {input.reffa} \ ---outfa {output.bsjfa} -""" - -rule build_index: - input: - bsjfa=rules.create_bsjfa.output.bsjfa, - output: - join(WORKDIR,"bowtie_index","bsj.1.bt2") - params: - outdir=join(WORKDIR,"bowtie_index") - envmodules: TOOLS["bowtie2"]["version"] - threads: 2 - shell: """ -# cp {input.bsjfa} {params.outdir} -cd {params.outdir} -bsjfa=$(basename {input.bsjfa}) -bowtie2-build $bsjfa bsj -# gzip -n -f {input.bsjfa} -""" - -rule align: - input: - r1=rules.cutadapt.output.of1, - r2=rules.cutadapt.output.of2, - bt2_index=rules.build_index.output, - output: - bam=join(WORKDIR,"results","{sample}","bam","{sample}.bam") - params: - sample="{sample}", - workdir=WORKDIR, - outdir=join(WORKDIR,"results","{sample}","bowtie2"), - peorse=get_peorse, - bowtie2_alignment_parameters=config["bowtie2_alignment_parameters"], - mem=MEMORY - threads: 56 - envmodules: TOOLS["bowtie2"]["version"], TOOLS["sambamba"]["version"] - shell:""" -bt2_index=$(echo "{input.bt2_index}"|awk -F".1.bt2" '{{print $1}}') -if [ "{params.peorse}" == "PE" ];then - bowtie2 \ - --threads {threads} \ - {params.bowtie2_alignment_parameters} \ - -x $bt2_index \ - -U {input.r1},{input.r2} -else - bowtie2 \ - --threads {threads} \ - {params.bowtie2_alignment_parameters} \ - -x $bt2_index \ - -U {input.r1} -fi \ -| awk -F"\\t" '{{if ($6 ~ /60M/ || $1 ~ /^@/){{print}}}}' \ -| sambamba view --nthreads={threads} -S --format=bam -o=/dev/stdout /dev/stdin \ -| sambamba sort --memory-limit={params.mem}G --tmpdir=/dev/shm --nthreads={threads} --out={output.bam} /dev/stdin -""" diff --git a/rules/init.smk b/rules/init.smk deleted file mode 100644 index fb87a25..0000000 --- a/rules/init.smk +++ /dev/null @@ -1,96 +0,0 @@ -import shutil -import sys -import os -import pandas as pd -import yaml -import glob - -CONFIGFILE = str(workflow.overwrite_configfiles[0]) - -def check_existence(filename): - """Checks if file exists on filesystem - :param filename : Name of file to check - """ - filename=filename.strip() - if not os.path.exists(filename): - sys.exit("File: {} does not exists!".format(filename)) - return True - - -def check_readaccess(filename): - """Checks permissions to see if user can read a file - :param filename : Name of file to check - """ - filename=filename.strip() - check_existence(filename) - if not os.access(filename,os.R_OK): - sys.exit("File: {} exists, but user cannot read from file due to permissions!".format(filename)) - return True - - -def check_writeaccess(filename): - """Checks permissions to see if user can write to a file - :param filename : Name of file to check - """ - filename=filename.strip() - check_existence(filename) - if not os.access(filename,os.W_OK): - sys.exit("File: {} exists, but user cannot write to file due to permissions!".format(filename)) - return True - - -# -MEMORY="100" -# get working dir from config -WORKDIR = config["workdir"] - -# get resources folder -try: - RESOURCESDIR = config["resourcesdir"] -except KeyError: - RESOURCESDIR = join(WORKDIR,"resources") - -# get scripts folder -try: - SCRIPTSDIR = config["scriptsdir"] -except KeyError: - SCRIPTSDIR = join(WORKDIR,"scripts") - -## Load tools from YAML file -with open(config["tools"]) as f: - TOOLS = yaml.safe_load(f) - -if not os.path.exists(join(WORKDIR,"fastqs")): - os.mkdir(join(WORKDIR,"fastqs")) -if not os.path.exists(join(WORKDIR,"results")): - os.mkdir(join(WORKDIR,"results")) -for f in ["samples", "tools", "cluster"]: - check_readaccess(config[f]) - -SAMPLESDF = pd.read_csv(config["samples"],sep="\t",header=0,index_col="sampleName") -SAMPLES = list(SAMPLESDF.index) -SAMPLESDF["R1"]=join(RESOURCESDIR,"dummy") -SAMPLESDF["R2"]=join(RESOURCESDIR,"dummy") -SAMPLESDF["PEorSE"]="PE" - -for sample in SAMPLES: - R1file=SAMPLESDF["path_to_R1_fastq"][sample] - R2file=SAMPLESDF["path_to_R2_fastq"][sample] - # print(sample,R1file,R2file) - check_readaccess(R1file) - R1filenewname=join(WORKDIR,"fastqs",sample+".R1.fastq.gz") - if not os.path.exists(R1filenewname): - os.symlink(R1file,R1filenewname) - SAMPLESDF.loc[[sample],"R1"]=R1filenewname - if str(R2file)!='nan': - check_readaccess(R2file) - R2filenewname=join(WORKDIR,"fastqs",sample+".R2.fastq.gz") - if not os.path.exists(R2filenewname): - os.symlink(R2file,R2filenewname) - SAMPLESDF.loc[[sample],"R2"]=R2filenewname - else: - SAMPLESDF.loc[[sample],"PEorSE"]="SE" -# print(SAMPLESDF) -# sys.exit() - - diff --git a/rules/quant.smk b/rules/quant.smk deleted file mode 100644 index 6637037..0000000 --- a/rules/quant.smk +++ /dev/null @@ -1,49 +0,0 @@ -BSJFA=rules.create_bsjfa.output.bsjfa - -rule salmon: - input: - bam=rules.align.output.bam, - reffa=BSJFA, - output: - sf=join(WORKDIR,"results","{sample}","salmon","quant.sf") - params: - salmon_parameters=config["salmon_parameters"] - envmodules: TOOLS["salmon"]["version"] - shell:""" -outdir=$(dirname {output.sf}) -salmon quant \ ---threads {threads} \ -{params.salmon_parameters} \ --t {input.reffa} \ --a {input.bam} \ ---output $outdir -""" - -rule aggregate_quant: - input: - expand(join(WORKDIR,"results","{sample}","salmon","quant.sf"),sample=SAMPLES) - output: - mergedquant=join(WORKDIR,"results","mergedquant.tsv"), - filteredmergedquant=join(WORKDIR,"results","mergedquant.filtered.tsv") - params: - rowsumfilter=config['aggregate_quant_rowsum_filter'] - envmodules: TOOLS["salmon"]["version"] - shell:""" -names="" -quants="" -for i in {input};do - outdir=$(dirname $i) - quants="$quants $outdir" - samplename=$(echo $outdir|awk -F"/" '{{print $(NF-1)}}') - names="$names $samplename" -done -salmon quantmerge --quants $quants --names $names --column numreads -o {output.mergedquant} -head -n1 {output.mergedquant} > {output.filteredmergedquant} -tail -n +2 {output.mergedquant} |\ -awk -F"\\t" -v r={params.rowsumfilter} -v OFS="\\t" '{{for(i=2;i<=NF;i++){{sum[NR]=sum[NR]+$i}};if (sum[NR] >= r) {{print sum[NR],$0}}}}' |\ -sort -k1,1gr |\ -cut -f2- >> {output.filteredmergedquant} -""" - - - \ No newline at end of file diff --git a/rules/trim.smk b/rules/trim.smk deleted file mode 100644 index 9fe2ff5..0000000 --- a/rules/trim.smk +++ /dev/null @@ -1,51 +0,0 @@ -def get_fastqs(wildcards): - d=dict() - d["R1"]=SAMPLESDF["R1"][wildcards.sample] - d["R2"]=SAMPLESDF["R2"][wildcards.sample] - return d - -def get_peorse(wildcards): - return SAMPLESDF["PEorSE"][wildcards.sample] - -rule cutadapt: - input: - unpack(get_fastqs) - output: - of1=join(WORKDIR,"results","{sample}","trim","{sample}.R1.trim.fastq.gz"), - of2=join(WORKDIR,"results","{sample}","trim","{sample}.R2.trim.fastq.gz") - params: - sample="{sample}", - workdir=WORKDIR, - outdir=join(WORKDIR,"results","{sample}"), - peorse=get_peorse, - adapters=join(RESOURCESDIR,"TruSeq_and_nextera_adapters.consolidated.fa") - envmodules: TOOLS["cutadapt"]["version"] - threads: 56 - shell:""" -if [ ! -d {params.outdir} ];then mkdir {params.outdir};fi -if [ "{params.peorse}" == "PE" ];then - ## Paired-end - cutadapt --pair-filter=any \ - --nextseq-trim=2 \ - --trim-n \ - -n 5 -O 5 \ - -q 10,10 -m 35:35 \ - -b file:{params.adapters} \ - -B file:{params.adapters} \ - -j {threads} \ - -o {output.of1} -p {output.of2} \ - {input.R1} {input.R2} -else - ## Single-end - cutadapt \ - --nextseq-trim=2 \ - --trim-n \ - -n 5 -O 5 \ - -q 10,10 -m 35 \ - -b file:{params.adapters} \ - -j {threads} \ - -o {output.of1} \ - {input.R1} - touch {output.of2} -fi -""" \ No newline at end of file diff --git a/scripts/build_primer_bed.py b/scripts/build_primer_bed.py deleted file mode 100644 index c667c93..0000000 --- a/scripts/build_primer_bed.py +++ /dev/null @@ -1,32 +0,0 @@ -import sys -primerstsv=open(sys.argv[1]).readlines() -primerstsv.pop(0) -primers=dict() -for i in primerstsv: - i=i.strip().split("\t") - circRNAnamesplit=i[0].split("_") - circRNAname="_".join(circRNAnamesplit[:-1]) - if not circRNAname in primers: - primers[circRNAname]=dict() - primers[circRNAname]["coordinates"]=list() - ForR=circRNAnamesplit[-1][0] - primers[circRNAname]["chrom"]=i[1] - strand=i[4] - primers[circRNAname]["coordinates"].append(int(i[2])) - primers[circRNAname]["coordinates"].append(int(i[3])) - primers[circRNAname]["coordinates"].sort() - if ForR == "F": - primers[circRNAname]["strand"]=strand - if ForR == "F" and strand == "+": - primers[circRNAname]["ASorSS"]="SS" - if ForR == "F" and strand == "-": - primers[circRNAname]["ASorSS"]="AS" -for primer in primers: - sname=primer+"_"+primers[primer]["ASorSS"] - #sname="_".join([primer,primers[primer]["ASorSS"]) - #print(sname) - chrom=primers[primer]["chrom"] - start=str(primers[primer]["coordinates"][0]) - end=str(primers[primer]["coordinates"][-1]) - strand=primers[primer]["strand"] - print("\t".join([chrom,start,end,sname,".",strand])) diff --git a/scripts/gather_cluster_stats.sh b/scripts/gather_cluster_stats.sh deleted file mode 100644 index 49c326e..0000000 --- a/scripts/gather_cluster_stats.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/bash -## AUTHOR : Vishal N. Koparde, Ph.D., CCBR, NCI -## DATE : Feb 2021 -## This scripts gathers cluster related statistics for jobs run on Biowulf using Snakemake by: -## > extracting "external" jobids from snakemake.log files -## > gather cluster stats for each job using "jobdata" and "jobhist" commands -## > sorts output by job submission time -## > output TSV file -## - -function node2runpartition { - node=$1 - partitions_requested=$2 - run_partition=$(for p in `echo $partitions_requested|awk '{print $NF}'|tr "," " "`;do if [ "$(freen -N $node|grep $p|awk '{print $1}'|grep $p|wc -l)" == "1" ]; then echo $p;break 1;fi;done) - if [ "$run_partition" == "" ];then - echo "unknown" - else - echo "$run_partition" - fi -} - - -function get_jobid_stats { -jobid=$1 -declare -A jobdataarray -notaccountablejob=$(jobhist $jid|grep "No accounting"|wc -l) -if [ "$notaccountablejob" == "1" ];then - jobdataarray["submit_time"]="JOBNOTACCOUNTABLE" - jobdataarray["jobid"]="$jobid" -else - jobdata $jobid > ${jobid}.tmp - awk -F"\t" '{if (NF==2) {print}}' ${jobid}.tmp > ${jobid}.data && rm -f ${jobid}.tmp - while read a b;do - jobdataarray["$a"]="$b" - done < ${jobid}.data - rm -f ${jobid}.data - st=${jobdataarray["submit_time"]} - jobdataarray["human_submit_time"]=$(date -d @$st|sed "s/ /_/g") - jobdataarray["alloc_node_partition"]=$(node2runpartition ${jobdataarray["alloc_node"]} ${jobdataarray["partition"]}) - jobdataarray["run_node_partition"]=$(node2runpartition ${jobdataarray["node_list"]} ${jobdataarray["partition"]}) -fi -echo -ne "${jobdataarray["submit_time"]}\t" -echo -ne "${jobdataarray["human_submit_time"]}\t" -echo -ne "${jobdataarray["jobid"]}:${jobdataarray["state"]}:${jobdataarray["job_name"]}\t" -echo -ne "${jobdataarray["alloc_node"]}:${jobdataarray["alloc_node_partition"]}:${jobdataarray["node_list"]}:${jobdataarray["run_node_partition"]}\t" -echo -ne "${jobdataarray["queued"]}:${jobdataarray["elapsed"]}:${jobdataarray["time_limit"]}\t" -echo -ne "${jobdataarray["avg_cpus"]}:${jobdataarray["max_cpu_used"]}:${jobdataarray["cpus_per_task"]}\t" -echo -ne "${jobdataarray["avg_mem"]}:${jobdataarray["max_mem_used"]}:${jobdataarray["total_mem"]}\t" -echo -ne "${jobdataarray["partition"]}:${jobdataarray["qos"]}\t" -echo -ne "${jobdataarray["username"]}:${jobdataarray["groupname"]}:${jobdataarray["account"]}\t" -echo -ne "${jobdataarray["work_dir"]}\t" -echo -ne "${jobdataarray["std_out"]}\t" -echo -ne "${jobdataarray["std_err"]}\n" -} - -if [ "$#" != "1" ];then - echo " bash $0 " - exit 1 -fi - -snakemakelogfile=$1 -grep "with external jobid" $snakemakelogfile | awk '{print $NF}' | sed "s/['.]//g" | sort | uniq > ${snakemakelogfile}.jobids.lst -echo -ne "##SubmitTime\tHumanSubmitTime\tJobID:JobState:JobName\tAllocNode:AllocNodePartition:RunNode:RunNodePartition\tQueueTime:RunTime:TimeLimit\tAvgCPU:MaxCPU:CPULimit\tAvgMEM:MaxMEM:MEMLimit\tPartition:QOS\tUsername:Group:Account\tWorkdir\tStdOut\tStdErr\n" -while read jid;do - get_jobid_stats $jid -done < ${snakemakelogfile}.jobids.lst |sort -k1,1n -rm -f ${snakemakelogfile}.jobids.lst \ No newline at end of file diff --git a/scripts/generate_bsj_fasta_from_primer_bed.py b/scripts/generate_bsj_fasta_from_primer_bed.py deleted file mode 100644 index b82f293..0000000 --- a/scripts/generate_bsj_fasta_from_primer_bed.py +++ /dev/null @@ -1,54 +0,0 @@ -import HTSeq -import sys -import argparse -import os - -def read_bed_file(filename): - bedfile=open(filename,'r') - primers=dict() - for f in bedfile.readlines(): - f=f.strip().split("\t") - primer=f[3] - primers[primer]=dict() - primers[primer]["chrom"]=f[0] - primers[primer]["start"]=int(f[1]) - primers[primer]["end"]=int(f[2]) - primers[primer]["strand"]=f[5] - return primers - -parser = argparse.ArgumentParser(description='Create fasta file from divergent primers bed file') -parser.add_argument('--bed', dest='primerbed', type=str, required=True, - help='Divergent primers bed file with min. 4 columns (chr,start,end,name)') -parser.add_argument('--reffa', dest='reffa', type=str, required=True, - help='reference fasta') -parser.add_argument('--outfa', dest='outfa', type=str, required=True, - help='output fasta') -parser.add_argument('--scanlength', dest='scanlen', type=int, required=False, default=200, - help='scan length...even positive number(default 200)') -parser.add_argument('--flankmax', dest='flankmax', type=int, required=False, default=30, - help='flankmax (default 30)') -args = parser.parse_args() -# sequences = dict( (s.name, s) for s in HTSeq.FastaReader(args.reffa) ) -sequences = dict( (s[1], s[0]) for s in HTSeq.FastaReader(args.reffa, raw_iterator=True) ) -# sequences = dict( (s[1], s[0]) for s in HTSeq.FastaReader(args.reffa) ) -primers = read_bed_file(args.primerbed) -# print(primers) -# for primer in primers: -# print(primers[primer]["chrom"]) -outfastafile = open( args.outfa, "w" ) -offset = "N"*300 -for primer in primers: - seq=sequences[primers[primer]["chrom"]] - for i in range(primers[primer]["start"]-args.scanlen,primers[primer]["start"],1): - for j in range(primers[primer]["end"],primers[primer]["end"]+args.scanlen,1): - k=int((j-i)/2) - if k >= args.flankmax: - bsjflank=seq[ j - args.flankmax : j ] + seq[ i : i + args.flankmax ] - else: - bsjflank=seq[ j-k : j ] + seq[ i : i+k ] - sname = "_".join([primer,str(i),str(j)]) - # print(sname) - # print(bsjflank) - myseq = HTSeq.Sequence( bytes(offset+bsjflank+offset, 'utf-8'), sname) - myseq.write_to_fasta_file(outfastafile) -outfastafile.close() \ No newline at end of file diff --git a/scripts/run_wrapper.bash b/scripts/run_wrapper.bash deleted file mode 100644 index 155e223..0000000 --- a/scripts/run_wrapper.bash +++ /dev/null @@ -1,268 +0,0 @@ -#!/usr/bin/env bash -# Author: Vishal Koparde, Ph.D. -# CCBR, NCI -# (c) 2021 -# -# wrapper script to run the TOBIAS snakemake workflow -# run tobias -# https://github.com/loosolab/tobias/ -# ## clone the pipeline to a folder -# ## git clone https://github.com/loosolab/TOBIAS.git - -set -eo pipefail -module purge - -SINGULARITY_BINDS="-B ${PIPELINE_HOME}:${PIPELINE_HOME} -B ${WORKDIR}:${WORKDIR}" - -function get_git_commitid_tag() { - cd $1 - gid=$(git rev-parse HEAD) - tag=$(git describe --tags $gid 2>/dev/null) - echo -ne "$gid\t$tag" -} - -# ## setting PIPELINE_HOME -PIPELINE_HOME=$(readlink -f $(dirname "$0")) -echo "Pipeline Dir: $PIPELINE_HOME" -SNAKEFILE="${PIPELINE_HOME}/Snakefile" -# get github commit tag -GIT_COMMIT_TAG=$(get_git_commitid_tag $PIPELINE_HOME) -echo "Git Commit/Tag: $GIT_COMMIT_TAG" - -function usage() { cat << EOF -run_tobias.sh: run TOBIAS for ATAC seq data -USAGE: - bash run_tobias.sh -Required Positional Argument: - MODE: [Type: Str] Valid options: - a) init : initialize workdir - b) run : run with slurm - c) reset : DELETE workdir dir and re-init it - e) dryrun : dry run snakemake to generate DAG - f) unlock : unlock workdir if locked by snakemake - g) runlocal : run without submitting to sbatch -EOF -} - -function err() { cat <<< " -# -# -# - $@ -# -# -# -" && usage && exit 1 1>&2; } - -function init() { - -if [ "$#" -eq "1" ]; then err "init needs an absolute path to the working dir"; fi -if [ "$#" -gt "2" ]; then err "init takes only one more argument"; fi -WORKDIR=$2 -x=$(echo $WORKDIR|awk '{print substr($1,1,1)}') -if [ "$x" != "/" ]; then err "working dir should be supplied as an absolute path"; fi -echo "Working Dir: $WORKDIR" -if [ -d $WORKDIR ];then err "Folder $WORKDIR already exists!"; exit 1; fi -mkdir -p $WORKDIR -sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" -e "s/WORKDIR/${WORKDIR//\//\\/}/g" ${PIPELINE_HOME}/config/config.yaml > $WORKDIR/config.yaml - -#create log and stats folders -if [ ! -d $WORKDIR/logs ]; then mkdir -p $WORKDIR/logs;echo "Logs Dir: $WORKDIR/logs";fi -if [ ! -d $WORKDIR/stats ];then mkdir -p $WORKDIR/stats;echo "Stats Dir: $WORKDIR/stats";fi - -echo "Done Initializing $WORKDIR. You can now edit $WORKDIR/config.yaml and $WORKDIR/samples.tsv" - -} - -function runcheck(){ - if [ "$#" -eq "1" ]; then err "absolute path to the working dir needed"; usage; exit 1; fi - if [ "$#" -gt "2" ]; then err "too many arguments"; usage; exit 1; fi - WORKDIR=$2 - echo "Working Dir: $WORKDIR" - if [ ! -d $WORKDIR ];then err "Folder $WORKDIR does not exist!"; exit 1; fi - module load python/3.7 - module load snakemake/5.24.1 -} - -function dryrun() { - runcheck "$@" - run "--dry-run" -} - -function unlock() { - runcheck "$@" - run "--unlock" -} - -function runlocal() { - runcheck "$@" - if [ "$SLURM_JOB_ID" == "" ];then err "runlocal can only be done on an interactive node"; exit 1; fi - module load singularity - run "local" -} - -function runslurm() { - runcheck "$@" - run "slurm" -} - -function preruncleanup() { - echo "Running..." - - cd $WORKDIR - ## check if initialized - for f in config.yaml samples.tsv; do - if [ ! -f $WORKDIR/$f ]; then err "Error: '${f}' file not found in workdir ... initialize first!";usage && exit 1;fi - done - ## Archive previous run files - if [ -f ${WORKDIR}/snakemake.log ];then - modtime=$(stat ${WORKDIR}/snakemake.log |grep Modify|awk '{print $2,$3}'|awk -F"." '{print $1}'|sed "s/ //g"|sed "s/-//g"|sed "s/://g") - mv ${WORKDIR}/snakemake.log ${WORKDIR}/stats/snakemake.${modtime}.log - if [ -f ${WORKDIR}/snakemake.log.HPC_summary.txt ];then - mv ${WORKDIR}/snakemake.log.HPC_summary.txt ${WORKDIR}/stats/snakemake.${modtime}.log.HPC_summary.txt - fi - if [ -f ${WORKDIR}/snakemake.stats ];then - mv ${WORKDIR}/snakemake.stats ${WORKDIR}/stats/snakemake.${modtime}.stats - fi - fi - nslurmouts=$(find ${WORKDIR} -maxdepth 1 -name "slurm-*.out" |wc -l) - if [ "$nslurmouts" != "0" ];then - for f in $(ls ${WORKDIR}/slurm-*.out);do gzip -n $f;mv ${f}.gz ${WORKDIR}/logs/;done - fi - -} - -function postrun() { - bash ${PIPELINE_HOME}/scripts/gather_cluster_stats.sh ${WORKDIR}/snakemake.log > ${WORKDIR}/snakemake.log.HPC_summary.txt -} - -function run() { - - - if [ "$1" == "local" ];then - - preruncleanup - - snakemake -s ${PIPELINE_HOME}/$SNAKEFILE \ - --directory $WORKDIR \ - --printshellcmds \ - --use-singularity \ - --singularity-args $SINGULARITY_BINDS \ - --use-envmodules \ - --latency-wait 120 \ - --configfile ${WORKDIR}/config.yaml \ - --cores all \ - --stats ${WORKDIR}/snakemake.stats \ - 2>&1|tee ${WORKDIR}/snakemake.log - - if [ "$?" -eq "0" ];then - snakemake -s ${PIPELINE_HOME}/$SNAKEFILE \ - --report ${WORKDIR}/runlocal_snakemake_report.html \ - --directory $WORKDIR \ - --configfile ${WORKDIR}/config.yaml - fi - - postrun - - elif [ "$1" == "slurm" ];then - - preruncleanup - - cat > ${WORKDIR}/submit_script.sbatch << EOF -#!/bin/bash -#SBATCH --job-name="insert_jobname_here" -#SBATCH --mem=10g -#SBATCH --partition="ccr,norm" -#SBATCH --time=96:00:00 -#SBATCH --cpus-per-task=2 - -module load python/3.7 -module load snakemake/5.24.1 -module load singularity - -cd \$SLURM_SUBMIT_DIR - -snakemake -s $SNAKEFILE \ ---directory $WORKDIR \ ---use-singularity \ ---singularity-args $SINGULARITY_BINDS \ ---use-envmodules \ ---printshellcmds \ ---latency-wait 120 \ ---configfile ${WORKDIR}/config.yaml \ ---cluster-config ${PIPELINE_HOME}/config/cluster.json \ ---cluster "sbatch --gres {cluster.gres} --cpus-per-task {cluster.threads} -p {cluster.partition} -t {cluster.time} --mem {cluster.mem} --job-name {cluster.name} --output {cluster.output} --error {cluster.error}" \ --j 500 \ ---rerun-incomplete \ ---keep-going \ ---stats ${WORKDIR}/snakemake.stats \ -2>&1|tee ${WORKDIR}/snakemake.log - -if [ "\$?" -eq "0" ];then - snakemake -s $SNAKEFILE \ - --directory $WORKDIR \ - --report ${WORKDIR}/runslurm_snakemake_report.html \ - --configfile ${WORKDIR}/config.yaml -fi - -bash ${PIPELINE_HOME}/scripts/gather_cluster_stats.sh ${WORKDIR}/snakemake.log > ${WORKDIR}/snakemake.log.HPC_summary.txt - -EOF - - sbatch ${WORKDIR}/submit_script.sbatch - - else - -snakemake $1 -s ${SNAKEFILE} \ ---directory $WORKDIR \ ---use-envmodules \ ---printshellcmds \ ---latency-wait 120 \ ---configfile ${WORKDIR}/config.yaml \ ---cluster-config ${PIPELINE_HOME}/config/cluster.json \ ---cluster "sbatch --gres {cluster.gres} --cpus-per-task {cluster.threads} -p {cluster.partition} -t {cluster.time} --mem {cluster.mem} --job-name {cluster.name} --output {cluster.output} --error {cluster.error}" \ --j 500 \ ---rerun-incomplete \ ---keep-going \ ---stats ${WORKDIR}/snakemake.stats - - fi - -} - -function reset() { -if [ "$#" -eq "1" ]; then err "cleanup needs an absolute path to the existing working dir"; usage; fi -if [ "$#" -gt "2" ]; then err "cleanup takes only one more argument"; usage; fi -WORKDIR=$2 -echo "Working Dir: $WORKDIR" -if [ ! -d $WORKDIR ];then err "Folder $WORKDIR does not exist!";fi -echo "Deleting $WORKDIR" -rm -rf $WORKDIR -echo "Re-Initializing $WORKDIR" -init "$@" -} - - -function main(){ - - if [ $# -eq 0 ]; then usage; exit 1; fi - - case $1 in - init) init "$@" && exit 0;; - dryrun) dryrun "$@" && exit 0;; - unlock) unlock "$@" && exit 0;; - run) runslurm "$@" && exit 0;; - runlocal) runlocal "$@" && exit 0;; - reset) reset "$@" && exit 0;; - -h | --help | help) usage && exit 0;; - -* | --*) err "Error: Failed to provide mode: ."; usage && exit 1;; - *) err "Error: Failed to provide mode: . '${1}' is not supported."; usage && exit 1;; - esac -} - -main "$@" - - - - -