Skip to content

Commit

Permalink
HC annot fix (#54)
Browse files Browse the repository at this point in the history
* added bamout variable, added GQB annotation, AS_StandardAnnotation removed for vcf mode

* Removed disksize suggestion in Readme

* minor edit to HC task to set vcf_basenmame from with the task
  • Loading branch information
bshifaw authored Jun 3, 2020
1 parent 1654241 commit ae4c938
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 8 deletions.
4 changes: 0 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,6 @@ it easier to configure the workflow.*
#### Requirements/expectations
- One or more GVCFs produced by HaplotypeCaller in GVCF mode
- Bare minimum 50 samples. Gene panels are not supported.
- When determining disk size in the JSON, use the guideline below
- small_disk = (num_gvcfs / 10) + 10
- medium_disk = (num_gvcfs * 15) + 10
- huge_disk = num_gvcfs + 10

### Outputs
- A VCF file and its index, filtered using variant quality score recalibration
Expand Down
21 changes: 17 additions & 4 deletions haplotypecaller-gvcf-gatk4.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ workflow HaplotypeCallerGvcf_GATK4 {
File scattered_calling_intervals_list

Boolean make_gvcf = true
String gatk_docker = "broadinstitute/gatk:4.1.4.0"
Boolean make_bamout = false
String gatk_docker = "broadinstitute/gatk:4.1.7.0"
String gatk_path = "/gatk/gatk"
String gitc_docker = "broadinstitute/genomes-in-the-cloud:2.3.1-1500064817"
String samtools_path = "samtools"
Expand Down Expand Up @@ -89,6 +90,7 @@ workflow HaplotypeCallerGvcf_GATK4 {
ref_fasta_index = ref_fasta_index,
hc_scatter = hc_divisor,
make_gvcf = make_gvcf,
make_bamout = make_bamout,
docker = gatk_docker,
gatk_path = gatk_path
}
Expand Down Expand Up @@ -168,6 +170,7 @@ task HaplotypeCaller {
File ref_fasta_index
Float? contamination
Boolean make_gvcf
Boolean make_bamout
Int hc_scatter

String gatk_path
Expand All @@ -188,7 +191,10 @@ task HaplotypeCaller {

Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB") + size(ref_dict, "GB")
Int disk_size = ceil(((size(input_bam, "GB") + 30) / hc_scatter) + ref_size) + 20


String vcf_basename = if make_gvcf then basename(output_filename, ".gvcf") else basename(output_filename, ".vcf")
String bamout_arg = if make_bamout then "-bamout ~{vcf_basename}.bamout.bam" else ""

parameter_meta {
input_bam: {
description: "a bam file",
Expand All @@ -208,8 +214,14 @@ task HaplotypeCaller {
-I ~{input_bam} \
-L ~{interval_list} \
-O ~{output_filename} \
-contamination ~{default=0 contamination} ~{true="-ERC GVCF" false="" make_gvcf} \
-G StandardAnnotation -G AS_StandardAnnotation -G StandardHCAnnotation
-contamination ~{default=0 contamination} \
-G StandardAnnotation -G StandardHCAnnotation ~{true="-G AS_StandardAnnotation" false="" make_gvcf} \
-GQB 10 -GQB 20 -GQB 30 -GQB 40 -GQB 50 -GQB 60 -GQB 70 -GQB 80 -GQB 90 \
~{true="-ERC GVCF" false="" make_gvcf} \
~{bamout_arg}

# Cromwell doesn't like optional task outputs, so we have to touch this file.
touch ~{vcf_basename}.bamout.bam
}
runtime {
docker: docker
Expand All @@ -220,6 +232,7 @@ task HaplotypeCaller {
output {
File output_vcf = "~{output_filename}"
File output_vcf_index = "~{output_filename}.tbi"
File bamout = "~{vcf_basename}.bamout.bam"
}
}
# Merge GVCFs generated per-interval for the same sample
Expand Down

0 comments on commit ae4c938

Please sign in to comment.