From 9025f400fe72ba93a505abed47d41d406b6eee7e Mon Sep 17 00:00:00 2001 From: Linda Xiang Date: Fri, 10 Jun 2022 10:22:33 -0400 Subject: [PATCH 1/3] [wfpm v0.7.11] started a new version cutadapt@0.2.0 from cutadapt@0.1.1 which was released --- cutadapt/main.nf | 2 +- cutadapt/pkg.json | 4 ++-- cutadapt/tests/checker.nf | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cutadapt/main.nf b/cutadapt/main.nf index d235bf1..c5fcc64 100755 --- a/cutadapt/main.nf +++ b/cutadapt/main.nf @@ -29,7 +29,7 @@ /* this block is auto-generated based on info from pkg.json where */ /* changes can be made if needed, do NOT modify this block manually */ nextflow.enable.dsl = 2 -version = '0.1.1' // package version, changed from 3.4.0 so it doesnt match cutadapt +version = '0.2.0' container = [ 'ghcr.io': 'ghcr.io/icgc-argo-workflows/argo-qc-tools.cutadapt' diff --git a/cutadapt/pkg.json b/cutadapt/pkg.json index d84486e..44d154d 100644 --- a/cutadapt/pkg.json +++ b/cutadapt/pkg.json @@ -1,6 +1,6 @@ { "name": "cutadapt", - "version": "0.1.1", + "version": "0.2.0", "description": "CutAdapt tool", "main": "main.nf", "deprecated": false, @@ -37,4 +37,4 @@ "license": "MIT", "bugReport": "https://github.com/icgc-argo-workflows/argo-qc-tools/issues", "homepage": "https://github.com/icgc-argo-workflows/argo-qc-tools#readme" -} +} \ No newline at end of file diff --git a/cutadapt/tests/checker.nf b/cutadapt/tests/checker.nf index dd64844..cc2cd15 100755 --- a/cutadapt/tests/checker.nf +++ b/cutadapt/tests/checker.nf @@ -27,7 +27,7 @@ /* this block is auto-generated based on info from pkg.json where */ /* changes can be made if needed, do NOT modify this block manually */ nextflow.enable.dsl = 2 -version = '0.1.1' // package version +version = '0.2.0' container = [ 'ghcr.io': 'ghcr.io/icgc-argo-workflows/argo-qc-tools.cutadapt' ] From bce023849e581c0bb96c37617f98012d2d8b0582 Mon Sep 17 00:00:00 2001 From: Linda Xiang Date: Fri, 10 Jun 2022 10:56:14 -0400 Subject: [PATCH 2/3] adjust cutadapt in/out, update tests and check.nf --- cutadapt/Dockerfile | 2 +- cutadapt/main.nf | 25 +++++----- cutadapt/main.py | 46 ++++++++++++------ cutadapt/tests/checker.nf | 15 ++++-- ...expected-TCRBOA7-T-RNA.cutadapt.log.qc.tgz | Bin 1091 -> 0 bytes .../expected/expected.test-job-1.cutadapt.tgz | Bin 0 -> 1146 bytes .../expected/expected.test-job-2.cutadapt.tgz | Bin 0 -> 1021 bytes cutadapt/tests/test-job-1.json | 3 +- cutadapt/tests/test-job-2.json | 8 +++ 9 files changed, 65 insertions(+), 34 deletions(-) delete mode 100644 cutadapt/tests/expected/expected-TCRBOA7-T-RNA.cutadapt.log.qc.tgz create mode 100644 cutadapt/tests/expected/expected.test-job-1.cutadapt.tgz create mode 100644 cutadapt/tests/expected/expected.test-job-2.cutadapt.tgz create mode 100644 cutadapt/tests/test-job-2.json diff --git a/cutadapt/Dockerfile b/cutadapt/Dockerfile index 03a5cd9..5be3c0a 100644 --- a/cutadapt/Dockerfile +++ b/cutadapt/Dockerfile @@ -7,7 +7,7 @@ # Set the base image to Ubuntu FROM ubuntu:20.04 -ARG PACKAGE_VERSION=2.10 +ARG PACKAGE_VERSION=4.0 ARG BUILD_PACKAGES="build-essential" ARG DEBIAN_FRONTEND=noninteractive diff --git a/cutadapt/main.nf b/cutadapt/main.nf index c5fcc64..7bd62ba 100755 --- a/cutadapt/main.nf +++ b/cutadapt/main.nf @@ -29,7 +29,7 @@ /* this block is auto-generated based on info from pkg.json where */ /* changes can be made if needed, do NOT modify this block manually */ nextflow.enable.dsl = 2 -version = '0.2.0' +version = '0.2.0' // package version, changed from 3.4.0 so it doesnt match cutadapt container = [ 'ghcr.io': 'ghcr.io/icgc-argo-workflows/argo-qc-tools.cutadapt' @@ -49,9 +49,9 @@ params.publish_dir = "" // set to empty string will disable publishDir // tool specific params go here, add / change as needed +params.read_group_id = "" params.input_R1="" -params.input_R2="" -params.output_pattern = "*.cutadapt.log.qc.tgz" // output file name pattern +params.input_R2="No_File" params.read1_adapter="AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC" params.read2_adapter="AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT" params.min_length=1 @@ -60,31 +60,35 @@ params.extra_options="" process cutadapt { container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}" - publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir + publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir ? true : false cpus params.cpus memory "${params.mem} GB" input: - path input_R1 - path input_R2 + tuple val(read_group_id), path(input_R1), path(input_R2) output: - path "output_dir/${params.output_pattern}", emit: output_tgz + path "output_dir/*.cutadapt.tgz", emit: cutadapt_tar + path "output_dir/*.cutadapt.log", emit: cutadapt_log + path "output_dir/*{fq,fastq,fq.gz,fastq.gz}", emit: cutadapt_results script: // add and initialize variables here as needed + arg_input_R2 = input_R2.name != 'No_File' ? "-2 ${input_R2}" : "" + """ mkdir -p output_dir main.py \ - -1 ${input_R1} -2 ${input_R2} \ + -1 ${input_R1} \ + -r ${read_group_id} \ -o output_dir \ -a ${params.read1_adapter} \ -A ${params.read2_adapter} \ -m ${params.min_length} \ - -q ${params.qual_cutoff} ${params.extra_options} + -q ${params.qual_cutoff} ${arg_input_R2} ${params.extra_options} """ } @@ -93,7 +97,6 @@ process cutadapt { // using this command: nextflow run icgc-argo-workflows/argo-qc-tools/cutadapt/main.nf -r cutadapt.v3.4.0 --params-file workflow { cutadapt( - file(params.input_R1), - file(params.input_R2) + tuple(params.read_group_id, file(params.input_R1), file(params.input_R2)) ) } diff --git a/cutadapt/main.py b/cutadapt/main.py index c39b8af..6f6baee 100755 --- a/cutadapt/main.py +++ b/cutadapt/main.py @@ -33,6 +33,7 @@ import re import json import tarfile +import hashlib def run_cmd(cmd): proc = subprocess.Popen( @@ -61,7 +62,7 @@ def prepare_tarball(qc_metrics, logfile): files_to_tar = ['tar_content.json', qc_metrics, logfile] - tarfile_name = f"{os.path.dirname(logfile)}/{os.path.basename(logfile)}.qc.tgz" + tarfile_name = re.sub(r'.log$', r'.tgz', logfile) with tarfile.open(tarfile_name, "w:gz") as tar: for f in files_to_tar: tar.add(f, arcname=os.path.basename(f)) @@ -82,15 +83,17 @@ def prep_qc_metrics(cutadapt_log, tool_ver): }, 'metrics': {} } - with open(cutadapt_log,'r') as l: - log=l.read() - r1_adapt=re.search("Read 1 with adapter:\s+\d+.+(\d+\.\d+)%",log) - r2_adapt=re.search("Read 2 with adapter:\s+\d+.+(\d+\.\d+)%",log) - q_trim=re.search("Quality-trimmed:\s+\d+.+(\d+\.\d+)%",log) - qc_metrics['metrics']['adapter_read1_percent']=float(r1_adapt.group(1)) - qc_metrics['metrics']['adapter_read2_percent']=float(r2_adapt.group(1)) - qc_metrics['metrics']['quality_trimmed_percent']=float(q_trim.group(1)) + # TO UPDATE + # with open(cutadapt_log,'r') as l: + # log=l.read() + # r1_adapt=re.search("Read 1 with adapter:\s+\d+.+(\d+\.\d+)%",log) + # r2_adapt=re.search("Read 2 with adapter:\s+\d+.+(\d+\.\d+)%",log) + # q_trim=re.search("Quality-trimmed:\s+\d+.+(\d+\.\d+)%",log) + + # qc_metrics['metrics']['adapter_read1_percent']=float(r1_adapt.group(1)) + # qc_metrics['metrics']['adapter_read2_percent']=float(r2_adapt.group(1)) + # qc_metrics['metrics']['quality_trimmed_percent']=float(q_trim.group(1)) qc_metrics_file = f"{os.path.dirname(cutadapt_log)}/{os.path.basename(cutadapt_log)}.qc_metrics.json" with open(qc_metrics_file, "w") as j: @@ -110,7 +113,9 @@ def main(): parser.add_argument('-1', '--input-R1', dest='input_R1', type=str, help='Input file read 1', required=True) parser.add_argument('-2', '--input-R2', dest='input_R2', type=str, - help='Input file read 2', required=True) + help='Input file read 2') + parser.add_argument('-r', '--rg_id', dest='rg_id', type=str, + help='Read group ID', required=True) parser.add_argument('-o', '--output-dir', dest='output_dir', type=str, help='Output directory', required=True) parser.add_argument('-a', '--read1-adapter', dest='adapter_R1', type=str, @@ -130,18 +135,27 @@ def main(): if not os.path.isfile(args.input_R1): sys.exit('Error: specified input file %s does not exist or is not accessible!' % args.input_R1) - if not os.path.isfile(args.input_R2): + if args.input_R2 and not os.path.isfile(args.input_R2): sys.exit('Error: specified input file %s does not exist or is not accessible!' % args.input_R2) if not os.path.isdir(args.output_dir): sys.exit('Error: specified output dir %s does not exist or is not accessible!' % args.output_dir) - basename=os.path.basename(args.input_R1) - index_of_dot=basename.index('.') - base=basename[:index_of_dot] + basename_R1=os.path.basename(args.input_R1) + + if args.input_R2: + basename_R2=os.path.basename(args.input_R2) + cmd = f"cutadapt -q {args.min_trim_qual} -m {args.min_trim_len} -a {args.adapter_R1} -A {args.adapter_R2} -o {args.output_dir}/trim_{basename_R1} -p {args.output_dir}/trim_{basename_R2} {args.input_R1} {args.input_R2}" + else: + cmd = f"cutadapt -q {args.min_trim_qual} -m {args.min_trim_len} -a {args.adapter_R1} -o {args.output_dir}/trim_{basename_R1} {args.input_R1}" + - stdout, stderr, returncode = run_cmd(f"cutadapt -q {args.min_trim_qual} -m {args.min_trim_len} -a {args.adapter_R1} -A {args.adapter_R2} -o {args.output_dir}/out.fastq.gz -p {args.output_dir}/out2.fastq.gz {args.input_R1} {args.input_R2}") + stdout, stderr, returncode = run_cmd(cmd) + # in case the rg_id contains filename not friendly characters + friendly_rgid = "".join([ c if re.match(r"[a-zA-Z0-9\.\-_]", c) else "_" for c in args.rg_id ]) + # calculate md5 and add it in the logfile name to avoid name colision + md5sum = hashlib.md5((args.rg_id).encode('utf-8')).hexdigest() - logfile=f"{args.output_dir}/{base}.cutadapt.log" + logfile=f"{args.output_dir}/{friendly_rgid}.{md5sum}.cutadapt.log" with open(logfile,"w") as log: log.write(stdout) diff --git a/cutadapt/tests/checker.nf b/cutadapt/tests/checker.nf index cc2cd15..c80eb48 100755 --- a/cutadapt/tests/checker.nf +++ b/cutadapt/tests/checker.nf @@ -27,7 +27,7 @@ /* this block is auto-generated based on info from pkg.json where */ /* changes can be made if needed, do NOT modify this block manually */ nextflow.enable.dsl = 2 -version = '0.2.0' +version = '0.2.0' // package version container = [ 'ghcr.io': 'ghcr.io/icgc-argo-workflows/argo-qc-tools.cutadapt' ] @@ -38,8 +38,11 @@ params.container_registry = "" params.container_version = "" params.container = "" // tool specific parmas go here, add / change as needed -params.input_file = "" +params.read_group_id = "" +params.input_R1="" +params.input_R2="No_File" params.expected_output = "" + include { cutadapt } from '../main' process file_smart_diff { @@ -56,7 +59,7 @@ process file_smart_diff { tar xvf !{output_tgz} -C actual tar xvf !{expected_tgz} -C expected - export NAME=`basename !{output_tgz} .cutadapt.log.qc.tgz` + export NAME=`basename !{output_tgz} .cutadapt.tgz` diff actual/tar_content.json expected/tar_content.json \ && ( echo "TAR_CONTENT: Test PASSED" && exit 0 ) || ( echo "TAR_CONTENT: Test FAILED. tar_content.json files do not match" && exit 1 ) @@ -74,20 +77,22 @@ process file_smart_diff { } workflow checker { take: + read_group_id input_R1 input_R2 expected_tgz main: cutadapt( - file(params.input_R1), file(params.input_R2) + tuple(read_group_id, input_R1, input_R2) ) file_smart_diff( - cutadapt.out.output_tgz, + cutadapt.out.cutadapt_tar, expected_tgz ) } workflow { checker( + params.read_group_id, file(params.input_R1), file(params.input_R2), file(params.expected_tgz) diff --git a/cutadapt/tests/expected/expected-TCRBOA7-T-RNA.cutadapt.log.qc.tgz b/cutadapt/tests/expected/expected-TCRBOA7-T-RNA.cutadapt.log.qc.tgz deleted file mode 100644 index 2fb77359fbc65b62c8cbf3b06afdfdeeddfce1f6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1091 zcmV-J1ibqniwFoKZC7Ce|5QU#LQg?AEmSR1PC+hXb#!55VQ_RVY;R{SabqrYXLy17!$Ad1w>Y;k&?)Yl{WRmC2Ql7Wwa;T}oCY*@gqu zLh5iNfU`TZ$6Wk|q(^$coyT_{WSq+C=v9a|^wg75;ca*`&zR@)Be**{04-}BS9tt? zZRG!i_z()674nSrgVCCj{!-R zI!@z-Cb_yI8%3Ke-BYuAtuaH5=ht_guWiNsSVUU$hR0DE7t-K(xJm|#@;HAZt2#r_M-eBCc5(KrZ2&gC!)gb+ zOjffBL50m0vP!VMM6n?JbYFu%s&T!H^Gx5*aOYgd=P>g zY-{1{G@3@i)J&Ke9Du6;oxNDuYlLH@J%(5#PwRvzhzU^cOX;eaMTn zHhVeel}cn?XXO=GIiN2XB&w2-mFR)2lBp}vBZW#O5JH^yS((*ynHo7ZDIU~tj7SE{ zx@UHn;H1akk7m1Gk(Em=Pl)53ot?qwCAzn|2jm?mQaaA9nmwn6MROOKj$w!J_Sg(v z7**DUZPi-sX|8Ap$Jk{3aaKqYKB)t@aLj(AYm{BWg@} z@Ihm{OusQ6P4R@V2aQH#^w>$}-mL$mNo-(5HZ|U(yR^-QF$7JRFxECvKFjN+EE73? zLE%jxx?iAXC9jrwT!Hv?0>8`hO3$(F_E5TZ+Yn4cP&>{?ML=CboagEmvszY_!h(0G zJHoqRA3gy_UDgcP1UPkhGvE^t)D@l`2*-JcQ8y8niAM^+4zXph>!Tr_^kcE9UtyL1Zd43JV3q;3f~P-+upU*QcEqh)KW_=wbarU{R_tX J2ZsPC002^|CKdny diff --git a/cutadapt/tests/expected/expected.test-job-1.cutadapt.tgz b/cutadapt/tests/expected/expected.test-job-1.cutadapt.tgz new file mode 100644 index 0000000000000000000000000000000000000000..4fcb9bddd1277bf9eef96f8bb2051c5fb8a5707f GIT binary patch literal 1146 zcmV-=1cmz_iwFqrSfgSB|5Qa&R4r0YK`u3AH#0OhF*IW}G&D3gWI1AEI5lQAIXPlB zW@I)pIW{h1b#!55VQ_RVbZ2@1?N{$^+cpr-SMw7}jA><~_)e(0cdo&c13Cc$9Pztiw z$@ed~`Ojiq7*!bhTcwNFI{&V1H_xH#wns$Rw`X2B?B4menPhvwzNYh^q*11q=6gH; zp9nzzGCs?dDbu*>`_K=6J`Jst_W^a(P})Oo$Bu2=qr^LpN6yUkymNP!xWaR3yfje~ zEey@{1>FW|Hd6nvj=b?WLyvzSyH9SqllnSBzY6}hKj(kZ@&8WvfAkRlbC(OZy8!$T ziH3w49MDnIjF%Lw)*6+awoMv3Q*!erZV zv&bko#~go8^s2zT$#IjanMVdz3TuKFQK;s+G!F%3u#YvufbFPNnHr-C*k43#t!HUg zZ^;Ay?pm>;tR1&5(s@*=$7CM`z;0x~tg&Z*di-LEVR-g#$5dHpHpAT&f~g{u2)67{coXdY0;9 znMEbYAII>kDlW|22Wew3o};|3(g^CiLPf|s+_?EN2aSIUvS@*BciR?;vAwi7P?RaZRYh(ZFAbVYGNy=GC+bX7$5oSvj!kM$g4_jr#}hB_xM>WuP6_Zq#muTQOt_4;kF z#Y}t=vxi=hUxZBV2r~Ih$TpDNg-kfNm~GUZW9BqCiTjv!_rp#)>7%r2r@Z025F=fB*mh literal 0 HcmV?d00001 diff --git a/cutadapt/tests/expected/expected.test-job-2.cutadapt.tgz b/cutadapt/tests/expected/expected.test-job-2.cutadapt.tgz new file mode 100644 index 0000000000000000000000000000000000000000..d00c2d87eec671196ed1592e4e1f07153df7456e GIT binary patch literal 1021 zcmV@e_Y=s9lSd!%RZ z%>xhrGEt?LrKZ2ss(h~c_ua0>1K;mT#6H!$?Xi3H?=pw=fc=;1Kg~q0_WE0^|IY+q zaFr|zsjDoh2V)p4e!N^*7awNSlL5X6xSM#c>xOBxNDGJ$eb$s0LmdUaIldS3ti5-Vr$_ChZ+j9Kr>hSs25Vhfd z!~d_G{{zG<`HkMH z5*!**&Pc3^LX;`wSt(&BD!e0Tybp}Qx&j7P0URs=v-xZh&*wAzoyRl$%op(joADx! zXDH8NuoPe#cYD1|vucDlhhn*iFW!Ef1y*+uZI4w*Mm24ooUTQ!ujuAC_$n=OUa3UZ zbyjYmdF0ZP=yj_K7!8L+RT3zrtf zLNgQALQ7cTYQLuHP@)G%gL*4893%#Zw>hY{s?racWiZ5G(csaID$`n)Fx-hQY3nR+ zJUSbH=`{>F>*gK9U5Q%mL{>c%vhBTTrmS{H-M<#;jLY}ih(=wcQ1pfn20a^t*CZFk7$*hPYsC2&j-3l!!|pmuRmzdlTUi}P z$wzehQzj3MyLZOxW4daRw}JN)wJ&vFUhib0`-Qls7*M|r3fiHXIDN10(5TP)9&!7; r&nZKh6CY(pc`HY)-1ZydG%zqQFfcGMFfcGMcxCt#h_ckQ04M+e6#@Kb literal 0 HcmV?d00001 diff --git a/cutadapt/tests/test-job-1.json b/cutadapt/tests/test-job-1.json index 76e96d0..21654a5 100644 --- a/cutadapt/tests/test-job-1.json +++ b/cutadapt/tests/test-job-1.json @@ -1,7 +1,8 @@ { + "read_group_id": "TEST-RNA", "input_R1": "input/TCRBOA7-T-RNA.250reads.read1.fastq.gz", "input_R2": "input/TCRBOA7-T-RNA.250reads.read2.fastq.gz", - "expected_tgz": "expected/expected-TCRBOA7-T-RNA.cutadapt.log.qc.tgz", + "expected_tgz": "expected/expected.test-job-1.cutadapt.tgz", "publish_dir": "outdir", "cpus": 1, "mem": 0.5 diff --git a/cutadapt/tests/test-job-2.json b/cutadapt/tests/test-job-2.json new file mode 100644 index 0000000..aad75e8 --- /dev/null +++ b/cutadapt/tests/test-job-2.json @@ -0,0 +1,8 @@ +{ + "read_group_id": "TEST-RNA", + "input_R1": "input/TCRBOA7-T-RNA.250reads.read1.fastq.gz", + "expected_tgz": "expected/expected.test-job-2.cutadapt.tgz", + "publish_dir": "outdir", + "cpus": 1, + "mem": 0.5 +} From 7bbf9ec961c75f46410e77dacad4445265ec1b3e Mon Sep 17 00:00:00 2001 From: Morgan Taschuk Date: Fri, 24 Jun 2022 15:26:29 -0400 Subject: [PATCH 3/3] Small fix to cutadapt python script to accept single end output; changed output metrics to remove read specific adapter content and quality; update README --- cutadapt/README.md | 15 ++++++------- cutadapt/main.py | 20 +++++++----------- .../expected/expected.test-job-1.cutadapt.tgz | Bin 1146 -> 1152 bytes .../expected/expected.test-job-2.cutadapt.tgz | Bin 1021 -> 1034 bytes 4 files changed, 14 insertions(+), 21 deletions(-) diff --git a/cutadapt/README.md b/cutadapt/README.md index 857bea7..cc8994d 100644 --- a/cutadapt/README.md +++ b/cutadapt/README.md @@ -31,11 +31,10 @@ nextflow run main.nf -params-file params.json **Required**: * `input_R1`: Read1 fastq file. This is assumed to be in a gzipped form. -* `input_R2`: Read2 fastq file. This is assumed to be in a gzipped form. * `publish_dir`: the final location for the results. **Optional**: - +* `input_R2`: Read2 fastq file. This is assumed to be in a gzipped form. Default: No_File, which forces cutadapt to run for single end * `read1_adapter` : Override the adapter for read 1. Default: "AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC" * `read2_adapter` : Override the adapter for read 2. Default: "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT" * `min_length` : set the minimum length permitted for reads. Default: 1 @@ -61,10 +60,7 @@ Examples of these files are located in [tests/expected](tests/expected). ### Metrics Current metrics captured in the qc_metrics file include: -* `adapter_read1_percent `: the percent of read 1's that contained adapter sequence. -* `adapter_read2_percent`: the percent of read 2's that contained adapter sequence. -* `quality_trimmed_percent`: the percent of reads that were trimmed for quality. - By default, this should be 0, because by default, quality trimming is turned off. +* `adapter_percent `: the percent of reads that contained adapter sequence. ## Local Testing @@ -79,7 +75,7 @@ wfpm workon cutadapt 3. Build the Docker container locally ``` cd cutadapt -docker build -t ghcr.io/icgc-argo-qc-wg/argo-qc-tools.cutadapt:3.4.0 . +docker build -t ghcr.io/icgc-argo-qc-workflows/argo-qc-tools.cutadapt:0.2.1 . ``` 4. Run the tests ``` @@ -92,6 +88,7 @@ If everything works correctly, you should see something like the following: Validating package: /Users/mtaschuk/git/argo-qc-tools/cutadapt Pakcage valid. Testing package: /Users/mtaschuk/git/argo-qc-tools/cutadapt -[1/1] Testing: /Users/mtaschuk/git/argo-qc-tools/cutadapt/tests/test-job-1.json. PASSED -Tested package: cutadapt, PASSED: 1, FAILED: 0 +[1/2] Testing: /Users/mtaschuk/git/argo-qc-tools/cutadapt/tests/test-job-1.json. PASSED +[2/2] Testing: /Users/mtaschuk/git/argo-qc-tools/cutadapt/tests/test-job-2.json. PASSED +Tested package: cutadapt, PASSED: 2, FAILED: 0 ``` diff --git a/cutadapt/main.py b/cutadapt/main.py index 6f6baee..d2d23d5 100755 --- a/cutadapt/main.py +++ b/cutadapt/main.py @@ -84,16 +84,12 @@ def prep_qc_metrics(cutadapt_log, tool_ver): 'metrics': {} } - # TO UPDATE - # with open(cutadapt_log,'r') as l: - # log=l.read() - # r1_adapt=re.search("Read 1 with adapter:\s+\d+.+(\d+\.\d+)%",log) - # r2_adapt=re.search("Read 2 with adapter:\s+\d+.+(\d+\.\d+)%",log) - # q_trim=re.search("Quality-trimmed:\s+\d+.+(\d+\.\d+)%",log) - - # qc_metrics['metrics']['adapter_read1_percent']=float(r1_adapt.group(1)) - # qc_metrics['metrics']['adapter_read2_percent']=float(r2_adapt.group(1)) - # qc_metrics['metrics']['quality_trimmed_percent']=float(q_trim.group(1)) + with open(cutadapt_log,'r') as l: + log=l.read() + adapt=re.search("Read 1 with adapter:\s+\d+.+(\d+\.\d+)%",log) + if adapt is None: + adapt=re.search("Reads with adapters:\s+\d+.+(\d+\.\d+)%",log) + qc_metrics['metrics']['adapter_percent']=float(adapt.group(1)) qc_metrics_file = f"{os.path.dirname(cutadapt_log)}/{os.path.basename(cutadapt_log)}.qc_metrics.json" with open(qc_metrics_file, "w") as j: @@ -141,11 +137,11 @@ def main(): sys.exit('Error: specified output dir %s does not exist or is not accessible!' % args.output_dir) basename_R1=os.path.basename(args.input_R1) - + if args.input_R2: basename_R2=os.path.basename(args.input_R2) cmd = f"cutadapt -q {args.min_trim_qual} -m {args.min_trim_len} -a {args.adapter_R1} -A {args.adapter_R2} -o {args.output_dir}/trim_{basename_R1} -p {args.output_dir}/trim_{basename_R2} {args.input_R1} {args.input_R2}" - else: + else: cmd = f"cutadapt -q {args.min_trim_qual} -m {args.min_trim_len} -a {args.adapter_R1} -o {args.output_dir}/trim_{basename_R1} {args.input_R1}" diff --git a/cutadapt/tests/expected/expected.test-job-1.cutadapt.tgz b/cutadapt/tests/expected/expected.test-job-1.cutadapt.tgz index 4fcb9bddd1277bf9eef96f8bb2051c5fb8a5707f..c0dcfe860bd1febe2e508453b8ba8d5da234e907 100644 GIT binary patch delta 1109 zcmV-b1giV`2!II(ABzYGoD8;+2Rwh_Zre5x&R_E>4hDuLKtz+GBwIEDOf^>T$Jz#o z{u_!!$J!)Ok?1<<(8t;f^vQaX?0CtNBH5Cl%XkTF@_0NR@8j{K>3|M?I*u-Xkx?S6 zy=M{D$$j6931`=C^qf1cy$6?jFCeR0M-?vr)SLW|h&_T_r@1`hp6fAx=sSNS>e-C> z{)p@udG&&ae;%ts%RW>)#fh5QhI>jQ_P#8M1TJ#$FNSvf=9K2Jn{2QU6g^b&AA}Lk?wqRDNzbe)n|P zIA~|dYF^4J#&;zS2x>RxhQu`;v6%T zRx+_9X09~_%qWLgRT3zr7;g(?7_!v8BOnH9?_2$9$H_MHTU>_fF zKOAgl*^irr!FKZ&k(O|VG5(&Yb%Ak{<2F>ij5Mqz<^&(2P%l-b?{dmuA9I8byIJch z)mj#?Uq)T77ircU$%FCDy<$dLH*b@qv#6H$<;Z1+E@N<3UT0{$b9LBw+YQ-w40S79 zht`E8lfT4-D>#2bpe^kCKJ|B5y}1(i?p=uwo1yjyyv1GU7WFX%9UO>hdmv8b#Yz^j z96z8aClFqhXj#egRTfnsew@IsvN+dEe7ZY0iglDXSsFp}R;USihX*%b<)H9KK^B#0 zM&2t5Y5`G}sWqNxSyc+}2f{5M!yN;g0B08OI(z~)v+RGaV^2UZOE_&u5b_4`X|R#8 zOd4D(nd36a_Pn7_fMAS?L{PCg#YsczHqF@>Hp&p!usda;3OP`E*__mNlzhOTKc;fs zgu8WKZ_-thtlfAoR;xnyFn~;eYf=oOTvMnSxA+z0U%(l%J$INYUvTtK% b-iJnv7%^hRh!G=3UYC3YyI_S204M+e0)#RU delta 1103 zcmV-V1hD&n3Hk^JABzYG?pULd2Rwi8Zre5x&sXy)4hDuLKt%J8BwI29Of^>T!`cRk zz6Xj#$J!*(k?1;Ek;mE#?8$bL?089&BH0q7?Q}M* zJO7^uK>sp6%atk9xa#}R4}U%lt&{fwb<|MWLvF{8ZQG;7JC8@s%=Nr;cb2%qb7{OZ zQ4%c-&GZG`25B}@|FDj{@i>1&kAEM#Pj0%C`Z_|t3jVi0=YP@h|4#US^br4ZmkYPM z0Q?W+WnT9GjsLmechA3&t`Hsn?~MPA))}&Mtz)Z*a#eHnWDR)3$|^--$1a<) zJ{oTtj^AD#KHYRg-SL0N|KBwKbLzRC<2W7vcl`et|G((}e`!ni9{+zc2TdLS?*jjM zS^<7H=mJtZtWquZ_^^J<|=`-07jXF3ii1a@YkQ!p!Of5)pp=rJ9v<%#nPw)O32aC z5u7d&lJXi*CnVG+${M$3&I%T75-RcGo?zN|Z9A!5c?)%eyefY`^IXda_9YcR9Bf93 z=Z(T-+i|nVC^*L)e^2zPz`V(Eld73V2387df)`P!=DIWw1!b_0HNt@Hs8yL7qYBtx zL~X5SX;yE^1OM(?v7)RUw=U9oRH?^m%jeiQVXn}6zy{4dN z5M`NO;f_{isqy_lxaDEGKCm%xX7RSc#~_#`+J+c|WR`!ln}#Ih4dPQ{qhgiRxRxr% zX_C!;!ciR?;vAwi7P?RaZRYh(ZFAbVYGNy=GC+bX7$5oSvj!kM$g4_jr#}hB_xM>WuP6_Zq#m zuTQOt_4+?;u*FP#5wnM0kza&N?g%pZOvpBn+=Wayx0r3zonz)SIEnk1b@#(gI_ad7 VPCDtNlTN-b`5U5v0;K>b00065Fr5GZ diff --git a/cutadapt/tests/expected/expected.test-job-2.cutadapt.tgz b/cutadapt/tests/expected/expected.test-job-2.cutadapt.tgz index d00c2d87eec671196ed1592e4e1f07153df7456e..c1bc90ce43359ad1705567d82e27174290f6527f 100644 GIT binary patch delta 985 zcmV;~119|a2Z{&>ABzYGq71f?2SI<&SMwDYcH@T+fh>HwM&6wOMV21gyu$ zz8Uj_qip6~Hwe8GxIK9W99qp49{<=uej)H;sB}?@GdBt&mdujSr%95qz>R;%iN#kB zeEgeC)>_n>{@lv?sp>xp`Wl}_QC}k7vF5#yeNg`a^Vtco&#C_Nf|v53zqb1SjR1_V zvrQ$mDzbJwhw=LR%e8aydPzeu^MjciWML2lah_adak!0=2E{}28TgD{RQ|Njs0IDhQ_8~?lE?EU)p z{K)q$|34f5Ybi@)XCvdN=9Ms9oeme?F*(k^6{;bodI*vcK4ugSimr$EV zs0{l`T=1!9)Gdu${vXkR z;`b_D;?HWGu5p;I({zdQG6hEhrm;EbOzr&UoOv^b@NoSmJ)89wIHKu~7x^3>0(wm@I>j!< zL!;C_tZWd&1be4%PR+4kcEXlx0atk1FS)#_(SyOD-g6B%0)v0UZVuYLRQe&a3?>*X z8XVkEh1Q~mNyGc3ZHv-)bUJ_EYnZsK-#3JN#aq$vLOm3+7rroCu9`vjuS0s{BLACp zqw7Yf3zHQI5i4!7hDs*%@{g4_ZxOL>D&L?|WknuzNp=;|EU0$k>} z{lHy-#~iO8cnk2E;|JZqC*&pO$>d06A~*R}qQYbHVk-4ss9}}7L8spq;>NgpZ@eAS)snIce3VJaL#;>Rwh@^gRs5D>K%*`w z=$2aIkHQf$|A>u35{%rDOBu>8iBM+L?c}7ByLn5T78Vv378Vv378Vv3Ul{%bwG3$S H04M+e1B&*8 delta 972 zcmV;-12g=J2>k~KABzYGEL)?I2SI=LSMw@e_Y=s9lSd!&D7@XZ4c z|1wdfmZhe@)T(^0`uE+g#slB)O2j_ZyzQ}j_3tu=^??1C>OajyuJ-y{tN+ggU~rWz z3#qFtsRv^iEPlLPSQj5=)RO_e2)LVguIq+rv`Rv6?MKndU#Gqu`83&Uk&2zBx!TZ^ zk@g@B-ZYl)G?u9G&utILt*L+1=K(eRZ}|T{|GS|dg}&kc{{f!nW&iK^pWFP)`nMh5 zwhjNk82@Xfa%AU5##Si`*>H7y-0+skvHz2->I{h;hg`}A*m>J>{ORiO`PLA%;eW&b zublq_*9mwS8U8o?{|Ns->HmM5`N2K@XP)C3{(q7E-)1%7_ilFqsU3d~ncl+rjozvf z92!#2NUVxNlquv{DPbonyd!A54~)UO0tQwA94rB|`D_u-=QI4B$20uQ7x4m{@gk0A zD9>WB6kr;6d%aAvYJ@k3V!4Pf-hP|~R(B9>k5x!UHEo`pu0^e{=;k;0DlKwesYKRw zR&Jnqtf zLNgQALQ7cTYQLuHP@)G%gL*4893%#Zw>hY{s?racWiZ5G(cpj4jVjYxmN49jE@|s5 zZ#+61f9W+0IqT*f!(EA5?nG8S6teBTX{M}pN8P^`>5R+tZ`K=K?xQXYqliXbq*Iu6 znZf;PQ+z3}_OeXm_$yA>1Qs_t)U4!YpNk5d?P2FJxwJaR?jZ3qqV)r7pq zlThpnP+uFX8-v#*7sVJS1=MTA`52C!3tYqQI!jf`k