Skip to content

Commit

Permalink
Merge branch 'fix-hs-metrics' into 'develop'
Browse files Browse the repository at this point in the history
simplify interface and correct output of HsMetrics

See merge request tron/tron-bam-preprocessing!20
  • Loading branch information
Pablo Riesgo Ferreiro committed Jun 23, 2021
2 parents 4fcdcfb + a2fe200 commit 6be9be4
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 28 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2019 TRON
Copyright (c) 2019-2021 TRON

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
6 changes: 4 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ test:
nextflow main.nf -profile test,conda --skip_deduplication --output output/test4
nextflow main.nf -profile test,conda --output output/test5 --skip_deduplication --skip_bqsr --skip_metrics --known_indels1 false --known_indels2 false
nextflow main.nf -profile test,conda --output output/test6 --intervals false --skip_deduplication --skip_bqsr --skip_realignment
nextflow main.nf -profile test,conda --output output/test7 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --skip_bqsr --skip_realignment
nextflow main.nf -profile test,conda --output output/test8 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10 --remove_duplicates false --skip_bqsr --skip_realignment
nextflow main.nf -profile test,conda --output output/test7 --skip_bqsr --skip_realignment
nextflow main.nf -profile test,conda --output output/test8 --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10 --remove_duplicates false --skip_bqsr --skip_realignment
nextflow main.nf -profile test,conda --output output/test9 --skip_deduplication --skip_bqsr --skip_realignment --input_files false --input_bam test_data/TESTX_S1_L001.bam

check:
Expand Down Expand Up @@ -48,6 +48,8 @@ check:
test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 7 output file!"; exit 1; }
test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 8 output file!"; exit 1; }
test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 8 output file!"; exit 1; }
test -s output/test8/sample1/metrics/TESTX_S1_L001.prepared.dedup.hs_metrics.txt || { echo "Missing test 8 output file!"; exit 1; }
test -s output/test8/sample1/metrics/TESTX_S1_L001.prepared.dedup_metrics.txt || { echo "Missing test 8 output file!"; exit 1; }
test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 8 output file!"; exit 1; }
test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 8 output file!"; exit 1; }
test -s output/test9/TESTX_S1_L001/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 9 output file!"; exit 1; }
Expand Down
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,6 @@ Optional input:
* --known_indels1: path to a VCF of known indels (optional to perform realignment around indels)
* --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels)
* --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None)
* --hs_metrics_target_coverage: name of output file for target HS metrics (default: None)
* --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None)
* --collect_hs_minimum_base_quality: minimum base quality for a base to contribute coverage (default: 20).
* --collect_hs_minimum_mapping_quality: minimum mapping quality for a read to contribute coverage (default: 20).
* --skip_bqsr: optionally skip BQSR (default: false)
Expand Down
29 changes: 9 additions & 20 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ params.dbsnp = false
params.known_indels1 = false
params.known_indels2 = false
params.intervals = false
params.hs_metrics_target_coverage = false
params.hs_metrics_per_base_coverage = false
params.skip_bqsr = false
params.skip_realignment = false
params.skip_deduplication = false
Expand Down Expand Up @@ -136,7 +134,7 @@ if (!params.skip_deduplication) {
cpus "${params.mark_duplicates_cpus}"
memory "${params.mark_duplicates_memory}"
tag "${name}"
publishDir "${publish_dir}/${name}/metrics", mode: "copy", pattern: "*.dedup_metrics"
publishDir "${publish_dir}/${name}/metrics", mode: "copy", pattern: "*.dedup_metrics.txt"

input:
set name, bam_name, type, file(bam) from prepared_bams
Expand All @@ -145,10 +143,10 @@ if (!params.skip_deduplication) {
set val(name), val(bam_name), val(type),
file("${bam.baseName}.dedup.bam"), file("${bam.baseName}.dedup.bam.bai") into deduplicated_bams,
deduplicated_bams_for_metrics, deduplicated_bams_for_hs_metrics
file("${bam.baseName}.dedup_metrics") optional true into deduplication_metrics
file("${bam.baseName}.dedup_metrics.txt") optional true

script:
dedup_metrics = params.skip_metrics ? "": "--metrics-file ${bam.baseName}.dedup_metrics"
dedup_metrics = params.skip_metrics ? "": "--metrics-file ${bam.baseName}.dedup_metrics.txt"
remove_duplicates = params.remove_duplicates ? "--remove-all-duplicates true" : "--remove-all-duplicates false"
"""
mkdir tmp
Expand All @@ -157,9 +155,7 @@ if (!params.skip_deduplication) {
--java-options '-Xmx${params.mark_duplicates_memory} -Djava.io.tmpdir=tmp' \
--input ${bam} \
--output ${bam.baseName}.dedup.bam \
--conf 'spark.executor.cores=${task.cpus}' \
${remove_duplicates} \
${dedup_metrics}
--conf 'spark.executor.cores=${task.cpus}' ${remove_duplicates} ${dedup_metrics}
"""
}
}
Expand Down Expand Up @@ -202,18 +198,11 @@ if (! params.skip_metrics) {
set name, bam_name, type, file(bam), file(bai) from deduplicated_bams_for_hs_metrics

output:
file("*_metrics") optional true into txt_hs_metrics
file("*.pdf") optional true into pdf_hs_metrics
file(params.hs_metrics_target_coverage) optional true into target_hs_metrics
file(params.hs_metrics_per_base_coverage) optional true into per_base_hs_metrics
file("*_metrics") optional true
file("*.pdf") optional true
file("${bam.baseName}.hs_metrics.txt")

script:
hs_metrics_target_coverage= params.hs_metrics_target_coverage ?
"--PER_TARGET_COVERAGE ${params.hs_metrics_target_coverage} --REFERENCE_SEQUENCE ${params.reference}" :
""
hs_metrics_per_base_coverage= params.hs_metrics_per_base_coverage ?
"--PER_BASE_COVERAGE ${params.hs_metrics_per_base_coverage}" :
""
minimum_base_quality = params.collect_hs_metrics_min_base_quality ?
"--MINIMUM_BASE_QUALITY ${params.collect_hs_metrics_min_base_quality}" : ""
minimum_mapping_quality = params.collect_hs_metrics_min_mapping_quality ?
Expand All @@ -224,10 +213,10 @@ if (! params.skip_metrics) {
gatk CollectHsMetrics \
--java-options '-Xmx${params.metrics_memory} -Djava.io.tmpdir=tmp' \
--INPUT ${bam} \
--OUTPUT ${bam.baseName} \
--OUTPUT ${bam.baseName}.hs_metrics.txt \
--TARGET_INTERVALS ${params.intervals} \
--BAIT_INTERVALS ${params.intervals} \
${hs_metrics_target_coverage} ${hs_metrics_per_base_coverage} ${minimum_base_quality} ${minimum_mapping_quality}
${minimum_base_quality} ${minimum_mapping_quality}
"""
}
}
Expand Down
4 changes: 1 addition & 3 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail']

cleanup = true

VERSION = '1.5.0'
VERSION = '1.6.0'
DOI = 'https://zenodo.org/badge/latestdoi/358400957'

manifest {
Expand Down Expand Up @@ -83,8 +83,6 @@ Optional input:
* --known_indels1: path to a VCF of known indels (optional to perform realignment around indels)
* --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels)
* --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None)
* --hs_metrics_target_coverage: name of output file for target HS metrics (default: None)
* --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None)
* --collect_hs_minimum_base_quality: minimum base quality for a base to contribute coverage (default: 20).
* --collect_hs_minimum_mapping_quality: minimum mapping quality for a read to contribute coverage (default: 20).
* --skip_bqsr: optionally skip BQSR (default: false)
Expand Down

0 comments on commit 6be9be4

Please sign in to comment.