From 753a7d41fb1a47c93e73f3a0dec628701fa23d0b Mon Sep 17 00:00:00 2001 From: Jeongu Park Date: Sat, 2 Mar 2019 23:03:06 +0900 Subject: [PATCH] 02 Mar 2019 manuals update --- manuals/RNA/{cufflinks.sh => 04_Cufflinks.sh} | 0 manuals/RNA/samtools.sh | 2 + manuals/VariantCall/03_pindel.sh | 2 - manuals/VariantCall/05_union.sh | 10 +++-- manuals/VariantCall/06_remove_same_pos.py | 4 +- manuals/VariantCall/07_VEP.sh | 22 +++++++++- manuals/VariantCall/08_Variant_coverage.sh | 31 ++++++++++++++ manuals/VariantCall/09_add_to_vcf.sh | 33 +++++++++++++++ manuals/WES/07_Haplotypecall.sh | 25 +++++++++--- manuals/WES/08_Mutect2.sh | 33 --------------- manuals/WES/08_VEP.sh | 34 ++++++++++++++++ manuals/WES/09_HLAtyping.sh | 17 -------- manuals/WES/10_VariantFiltration.sh | 9 ----- manuals/pVACseq/00_phased_vcf.sh | 40 +++++++++++++++++++ manuals/pVACseq/01_pvacseq.sh | 22 ++++++++++ manuals/pvacseq.sh | 11 ----- 16 files changed, 211 insertions(+), 84 deletions(-) rename manuals/RNA/{cufflinks.sh => 04_Cufflinks.sh} (100%) create mode 100644 manuals/RNA/samtools.sh create mode 100644 manuals/VariantCall/08_Variant_coverage.sh create mode 100644 manuals/VariantCall/09_add_to_vcf.sh delete mode 100644 manuals/WES/08_Mutect2.sh create mode 100644 manuals/WES/08_VEP.sh delete mode 100644 manuals/WES/09_HLAtyping.sh delete mode 100644 manuals/WES/10_VariantFiltration.sh create mode 100644 manuals/pVACseq/00_phased_vcf.sh create mode 100644 manuals/pVACseq/01_pvacseq.sh delete mode 100644 manuals/pvacseq.sh diff --git a/manuals/RNA/cufflinks.sh b/manuals/RNA/04_Cufflinks.sh similarity index 100% rename from manuals/RNA/cufflinks.sh rename to manuals/RNA/04_Cufflinks.sh diff --git a/manuals/RNA/samtools.sh b/manuals/RNA/samtools.sh new file mode 100644 index 0000000..02274eb --- /dev/null +++ b/manuals/RNA/samtools.sh @@ -0,0 +1,2 @@ +samtools sort -o /home/01_Neoantigen/01_RNA/03_Align_hisat2/Cufflinks_RNA-Tumor.sortd.sam /home/01_Neoantigen/01_RNA/03_Align_hisat2/Cufflinks_RNA-Tumor.sam +samtools view -Su /home/01_Neoantigen/01_RNA/03_Align_hisat2/Cufflinks_RNA-Tumor.sortd.sam > /home/01_Neoantigen/01_RNA/03_Align_hisat2/Cufflinks_RNA-Tumor.sorted.bam diff --git a/manuals/VariantCall/03_pindel.sh b/manuals/VariantCall/03_pindel.sh index 0451ef9..a55e7af 100644 --- a/manuals/VariantCall/03_pindel.sh +++ b/manuals/VariantCall/03_pindel.sh @@ -7,5 +7,3 @@ vcf_D='/home/01_Neoantigen/02_VariantCall/03_pindel/variant_pindel_D.vcf' #samtools faidx ${ref} pindel -f ${ref} -i ${config} -c ALL -o ${pindel_output} -T 3 -pindel2vcf -p ${pindel_output}_SI -r ${ref} -R UCSC_hg38 -d 2013_12 -v ${vcf_SI} -pindel2vcf -p ${pindel_output}_D -r ${ref} -R UCSC_hg38 -d 2013_12 -v ${vcf_D} diff --git a/manuals/VariantCall/05_union.sh b/manuals/VariantCall/05_union.sh index 4bf0860..4f87ff6 100644 --- a/manuals/VariantCall/05_union.sh +++ b/manuals/VariantCall/05_union.sh @@ -2,9 +2,11 @@ path=/home/01_Neoantigen/02_VariantCall gatk=/home/01_Neoantigen/tools/gatk-4.1.0.0/gatk +vt=/home/01_Neoantigen/tools/vt/vt gatk_bundle=/home/01_Neoantigen/reference/gatk_bundle outdir=${path}/05_union + mkdir -p ${outdir} cp ${path}/01_mutect2/03_mutect2_filtered_PASS.vcf.gz* ${outdir} @@ -12,8 +14,10 @@ cp ${path}/02_varscan/01_varscan.Somatic.hc.sorted.vcf.gz* ${outdir} cp ${path}/04_strelka/results/variants/strelka_PASS.vcf.gz* ${outdir} -${gatk} MergeVcfs -I ${outdir}/01_varscan.Somatic.hc.sorted.vcf.gz -I ${outdir}/03_mutect2_filtered_PASS.vcf.gz -I ${outdir}/strelka_PASS.vcf.gz -O ${outdir}/vcf_union.vcf -D ${gatk_bundle}/hg38.dict - - +${gatk} MergeVcfs -I ${outdir}/01_varscan.Somatic.hc.sorted.vcf.gz -I ${outdir}/03_mutect2_filtered_PASS.vcf.gz -I ${outdir}/strelka_PASS.vcf.gz -I ${outdir}/indel.filter.output.vcf.gz -I ${outdir}/snp.filter.output.vcf.gz -O ${outdir}/vcf_union.vcf -D ${gatk_bundle}/hg38.dict +#Normalization +${gatk} LeftAlignAndTrimVariants -V ${outdir}/vcf_union.vcf -R ${gatk_bundle}/hg38.fa -O ${outdir}/vcf_union_normalized.vcf +#split multi-allelic variants to biallelic +${vt} decompose ${outdir}/vcf_union_normalized.vcf -o ${outdir}vcf_union_normalized_vt.vcf diff --git a/manuals/VariantCall/06_remove_same_pos.py b/manuals/VariantCall/06_remove_same_pos.py index e9cbb99..366173f 100644 --- a/manuals/VariantCall/06_remove_same_pos.py +++ b/manuals/VariantCall/06_remove_same_pos.py @@ -1,9 +1,9 @@ import sys import re -if len(sys.argv) != 3 : +if len(sys.argv) != 2 : print ('#########################################################################################') - print ('python 06_remove_same_pos.py /home/01_Neoantigen/02_VariantCall/05_union/vcf_union.vcf') + print ('python 06_remove_same_pos.py /home/01_Neoantigen/02_VariantCall/05_union/vcf_union_normalized_vt.vcf') print ('#########################################################################################') exit() diff --git a/manuals/VariantCall/07_VEP.sh b/manuals/VariantCall/07_VEP.sh index 37385bc..948318c 100644 --- a/manuals/VariantCall/07_VEP.sh +++ b/manuals/VariantCall/07_VEP.sh @@ -1,10 +1,28 @@ #!/bin/bash -VEP=/home/01_Neoantigen/tools/ensembl-vep/vep path=/home/01_Neoantigen/02_VariantCall indir=${path}/05_union outdir=${path}/06_annotate_VEP +ref=/home/01_Neoantigen/reference/gatk_bundle/hg38.fa mkdir -p ${outdir} -${VEP} -i ${indir}/vcf_union_removed.vcf -o ${outdir}/union_annotated.vcf --fork 3 --cache +vep \ +--input_file ${indir}/vcf_union_normalized_vt_removed.vcf \ +--output_file ${outdir}/union_annotated.vcf \ +--format vcf \ +--vcf \ +--symbol \ +--transcript_version \ +--offline \ +--terms SO \ +--plugin Downstream \ +--plugin Wildtype \ +--dir_plugin /home/shinjae325/.conda/envs/neoantigen/share/ensembl-vep-95.2-0 \ +--flag_pick \ +--tsl \ +--hgvs \ +--fasta ${ref} \ +--pick \ +--cache \ +--fork 6 diff --git a/manuals/VariantCall/08_Variant_coverage.sh b/manuals/VariantCall/08_Variant_coverage.sh new file mode 100644 index 0000000..f8db101 --- /dev/null +++ b/manuals/VariantCall/08_Variant_coverage.sh @@ -0,0 +1,31 @@ +#!/bin/bash +path=/home/01_Neoantigen +indir=${path}/02_VariantCall/06_annotate_VEP +outdir=${path}/02_VariantCall/07_Variant_coverage +bam_readcount=${path}/tools/bam-readcount/bin/bam-readcount +ref=${path}/reference/gatk_bundle/hg38.fa +normal_WES=${path}/01_WES_Normal/06_BQSR/WES-Normal_dedup_bqsr.bam +tumor_WES=${path}/01_WES_Tumor/06_BQSR/WES-Tumor_dedup_bqsr.bam +tumor_RNA=${path}/01_RNA/03_Align_hisat2/Cufflinks_RNA-Tumor.sorted.bam +#tumor_RNA=${path}/01_RNA/04_StringTie/RNA-Tumor.sorted.bam + + +mkdir -p ${outdir} + +#split snp and indel from vcf files +bcftools filter -e'%TYPE="indel"' ${indir}/union_annotated.vcf > ${outdir}/union_annotated_only_snp.vcf +bcftools filter -e'%TYPE="snp"' ${indir}/union_annotated.vcf > ${outdir}/union_annotated_only_indel.vcf + +#Get position for snp, indel +sed '/^#/ d' ${outdir}/union_annotated_only_indel.vcf | awk '{print $1,$2,$2}' > ${outdir}/indel.positions +sed '/^#/ d' ${outdir}/union_annotated_only_snp.vcf | awk '{print $1,$2,$2}' > ${outdir}/snp.positions + +#bam-readcount +${bam_readcount} -f ${ref} ${normal_WES} -i -b 20 -l ${outdir}/indel.positions > ${outdir}/Normal_WES_indel_readcount +${bam_readcount} -f ${ref} ${normal_WES} -b 20 -l ${outdir}/snp.positions > ${outdir}/Normal_WES_snp_readcount +${bam_readcount} -f ${ref} ${tumor_WES} -i -b 20 -l ${outdir}/indel.positions > ${outdir}/Tumor_WES_indel_readcount +${bam_readcount} -f ${ref} ${tumor_WES} -b 20 -l ${outdir}/snp.positions > ${outdir}/Tumor_WES_snp_readcount +${bam_readcount} -f ${ref} ${normal_WES} -i -b 20 -l ${outdir}/indel.positions > ${outdir}/Tumor_RNA_indel_readcount +${bam_readcount} -f ${ref} ${normal_WES} -b 20 -l ${outdir}/snp.positions > ${outdir}/Tumor_RNA_snp_readcount + + diff --git a/manuals/VariantCall/09_add_to_vcf.sh b/manuals/VariantCall/09_add_to_vcf.sh new file mode 100644 index 0000000..678ceb6 --- /dev/null +++ b/manuals/VariantCall/09_add_to_vcf.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +path=/home/01_Neoantigen +cov_indir=${path}/02_VariantCall/07_Variant_coverage +vcf_indir=${path}/02_VariantCall/06_annotate_VEP +rna_dir=${path}/01_RNA/04_StringTie + + +outdir=${path}/02_VariantCall/08_add_coverage_vcf +outfn1=01_cov_Nor_indel.vcf +outfn2=02_cov_Nor_snp.vcf +outfn3=03_cov_Nor_Tum_indel.vcf +outfn4=04_cov_Nor_Tum_snp.vcf +outfn5=05_cov_Nor_Tum_RNA_indel.vcf +outfn6=06_cov_Nor_Tum_RNA_snp_final.vcf +outfn7=07_somatic.vcf + +#mkdir -p ${outdir} + + +# add coverage information to vcf +#vcf-readcount-annotator ${vcf_indir}/union_annotated.vcf ${cov_indir}/Normal_WES_indel_readcount DNA -s NORMAL -t indel -o ${outdir}/${outfn1} +#vcf-readcount-annotator ${outdir}/${outfn1} ${cov_indir}/Normal_WES_snp_readcount DNA -s NORMAL -t snv -o ${outdir}/${outfn2} +#vcf-readcount-annotator ${outdir}/${outfn2} ${cov_indir}/Tumor_WES_indel_readcount DNA -s TUMOR -t indel -o ${outdir}/${outfn3} +#vcf-readcount-annotator ${outdir}/${outfn3} ${cov_indir}/Tumor_WES_snp_readcount DNA -s TUMOR -t snv -o ${outdir}/${outfn4} +#vcf-readcount-annotator ${outdir}/${outfn4} ${cov_indir}/Tumor_RNA_indel_readcount RNA -s TUMOR -t indel -o ${outdir}/${outfn5} +#vcf-readcount-annotator ${outdir}/${outfn5} ${cov_indir}/Tumor_RNA_snp_readcount RNA -s TUMOR -t snv -o ${outdir}/${outfn6} + +# add expression information to vcf +vcf-expression-annotator -s TUMOR -o ${outdir}/${outfn7} ${outdir}/${outfn6} ${rna_dir}/transcripts.gtf stringtie transcript + +bgzip -c ${outdir}/07_somatic.vcf > ${outdir}/07_somatic.vcf.gz +tabix -p vcf ${outdir}/07_somatic.vcf.gz diff --git a/manuals/WES/07_Haplotypecall.sh b/manuals/WES/07_Haplotypecall.sh index d4860ad..f523625 100644 --- a/manuals/WES/07_Haplotypecall.sh +++ b/manuals/WES/07_Haplotypecall.sh @@ -4,12 +4,27 @@ home=/home/01_Neoantigen gatk=${home}/tools/gatk-4.1.0.0/gatk refdir=${home}/reference gatk_bundle=${refdir}/gatk_bundle -inputdir=${home}/01_WES_Normal/06_BQSR -outdir=${home}/02_VariantCall/HaplotypeCall +inputdir=${home}/02_WES_Merge/02_Sort +outdir=${home}/02_VariantCall/HaplotypeCall_merge mkdir -p ${outdir} +# HaplotypeCall ${gatk} HaplotypeCaller \ --I ${inputdir}/WES-Normal_dedup_bqsr.bam \ --O ${outdir}/WES-Normal_haplotypecall.vcf \ ---emit-ref-confidence GVCF -R ${gatk_bundle}/hg38.fa + -I ${inputdir}/Merge_sorted.bam \ + -O ${outdir}/germline_haplotypecall.vcf \ + --emit-ref-confidence GVCF \ + -R ${gatk_bundle}/hg38.fa \ + +# GenotypeGVCFs +${gatk} GenotypeGVCFs \ + --variant ${outdir}/germline_haplotypecall.vcf \ + -R ${gatk_bundle}/hg38.fa \ + -O ${outdir}/germline_haplotypecall_gvcf.vcf + +# Variant Filtration +${gatk} VariantFiltration \ + --R ${gatk_bundle}/hg38.fa \ + --V ${outdir}/germline_haplotypecall_gvcf.vcf \ + --filter-name "FS" --filter "FS > 30.0" --filter-name "QD" --filter "QD < 2.0" \ + -O ${outdir}/germline_haplotypecall_filt.vcf diff --git a/manuals/WES/08_Mutect2.sh b/manuals/WES/08_Mutect2.sh deleted file mode 100644 index d24052d..0000000 --- a/manuals/WES/08_Mutect2.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -home=/home/01_Neoantigen -gatk=${home}/tools/gatk-4.1.0.0/gatk -refdir=${home}/reference -gatk_bundle=${refdir}/gatk_bundle -normaldir=${home}/01_WES_Normal/06_BQSR/Normal_dedup_bqsr.bam -tumordir=${home}/01_WES_Tumor/03_BQSR/tumor_dedup_bqsr.bam - -outdir=${home}/01_WES_Normal/08_Mutect2 - -mkdir -p ${outdir} - -# replace read groups of normal and tumor WES bam file -#$gatk AddOrReplaceReadGroups -I $normaldir -O $outdir/normal_reheader.bam -RGID Normal_sample -RGLB Normal_sample -RGPL ILLUMINA -RGPM HISEQ2500 -RGSM Normal_sample -RGPU Patient16 -#$gatk AddOrReplaceReadGroups -I $tumordir -O $outdir/tumor_reheader.bam -RGID Tumor_sample -RGLB Tumor_sample -RGPL ILLUMINA -RGPM HISEQ2500 -RGSM Tumor_sample -RGPU Patient16 - -# check replaced bam file header -#samtools_0.1.18 view -H $outdir/normal_reheader.bam -#samtools_0.1.18 view -H $outdir/tumor_reheader.bam - -# indexing -#samtools_0.1.18 index $outdir/normal_reheader.bam -#samtools_0.1.18 index $outdir/tumor_reheader.bam - -# run Mutect2 -${gatk} Mutect2 -I ${outdir}/normal_reheader.bam -I $outdir/tumor_reheader.bam --normal Normal_sample --tumor Tumor_sample -O $outdir/somatic_mutect2.vcf -R $hg38_bundle/hg38.fa - - -${gatk} HaplotypeCaller \ --I ${inpudir}/WES-${1}_dedup_bqsr.bam \ --O ${outdir}/WES-${i}_haplocall.vcf \ ---emit-ref-confidence GVCF -R ${gatk_bundle}/hg38.fa diff --git a/manuals/WES/08_VEP.sh b/manuals/WES/08_VEP.sh new file mode 100644 index 0000000..e96d32a --- /dev/null +++ b/manuals/WES/08_VEP.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +indir=/home/01_Neoantigen/02_VariantCall/HaplotypeCall +ref=/home/01_Neoantigen/reference/gatk_bundle/hg38.fa + +vep \ +--input_file ${indir}/Germline_haplotypecall_PASS.vcf \ +--output_file ${indir}/Germline_haplotypecall_PASS_vep.vcf \ +--format vcf \ +--vcf \ +--symbol \ +--transcript_version \ +--offline \ +--terms SO \ +--plugin Downstream \ +--plugin Wildtype \ +--dir_plugin /home/shinjae325/.conda/envs/neoantigen/share/ensembl-vep-95.2-0 \ +--flag_pick \ +--tsl \ +--hgvs \ +--fasta ${ref} \ +--pick \ +--cache \ +--fork 6 + +#filter non coding region variants + +filter_vep \ +--format vcf \ +--ontology \ +--filter "Consequence is coding_sequence_variant" \ +-o ${indir}/Germline_haplotypecall_PASS_vep_CDS.vcf \ +-i ${indir}/Germline_haplotypecall_PASS_vep.vcf + diff --git a/manuals/WES/09_HLAtyping.sh b/manuals/WES/09_HLAtyping.sh deleted file mode 100644 index 2cdf45e..0000000 --- a/manuals/WES/09_HLAtyping.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -path=/home/00_TeamProject/01_WES_Normal -tool=/home/00_TeamProject/tools/HLAminer-1.4/HLAminer_v1.4 -hlaminer=${tool}/bin/HLAminer.pl -hla_ref=${tool}/database/HLA_ABC_GEN.fasta -hla_pd=${tool}/database/hla_nom_p.txt -inputdir=${path}/03_BWA -outdir=${path}/09_HLAtyping - -mkdir -p ${outdir} - -### Predict HLA -echo "Predicting HLA..." -${hlaminer} -a ${outdir}/Normal.sam -e 0 -h ${hla_ref} -p ${hla_pd} -z 200 -i 99 -q 30 -s 500 -n 0 - - diff --git a/manuals/WES/10_VariantFiltration.sh b/manuals/WES/10_VariantFiltration.sh deleted file mode 100644 index dca4c9e..0000000 --- a/manuals/WES/10_VariantFiltration.sh +++ /dev/null @@ -1,9 +0,0 @@ -cd ~/projects - -mkdir 11_VariantFiltration - -gatk VariantFiltration --R ~/datasets/Homo_Sapiens/ucsc.hg19.fasta \ ---V 10_GenotypeGVCF/NIST7035_haplocall.vcf \ ---filter-name "FS" --filter "FS > 30.0" --filter-name "QD" --filter "QD < 2.0" \ --O 11_VariantFiltration/NIST7035_haplocall_filt.vcf - diff --git a/manuals/pVACseq/00_phased_vcf.sh b/manuals/pVACseq/00_phased_vcf.sh new file mode 100644 index 0000000..d3dc376 --- /dev/null +++ b/manuals/pVACseq/00_phased_vcf.sh @@ -0,0 +1,40 @@ +#!/bin/bash +path=/home/01_Neoantigen +gatk_bundle=${path}/reference/gatk_bundle +gatk=${path}/tools/GenomeAnalysisTK-3.8.1/GenomeAnalysisTK.jar +picard=/home/01_Neoantigen/tools/picard/build/libs/picard.jar + +germline_dir=${path}/02_VariantCall/HaplotypeCall +somatic_dir=${path}/02_VariantCall/08_add_coverage_vcf + +tumor_dir=/home/01_Neoantigen/01_WES_Tumor/06_BQSR +outdir=${path}/02_VariantCall/09_pvacseq + + +# combine somatic and germline variants using gatk 3.* +java -jar ${gatk} -T CombineVariants \ +-R ${gatk_bundle}/hg38.fa \ +--variant ${germline_dir}/Germline_haplotypecall_PASS_vep_CDS.vcf \ +--variant ${somatic_dir}/07_somatic.vcf \ +-o ${outdir}/00_combined_germline_somatic.vcf \ +--assumeIdenticalSamples + + +# Sort combiend vcf using picard +java -jar ${picard} SortVcf \ +I=${outdir}/00_combined_germline_somatic.vcf \ +O=${outdir}/01_combined_germline_somatic_sorted.vcf \ +SEQUENCE_DICTIONARY=${gatk_bundle}/hg38.dict + +# Phase variants using gatk's ReadBackedPhasing +java -jar ${gatk} -T ReadBackedPhasing \ +-R ${gatk_bundle}/hg38.fa \ +-I ${tumor_dir}/WES-Tumor_dedup_bqsr.bam \ +--variant ${outdir}/01_combined_germline_somatic_sorted.vcf \ +-L ${outdir}/01_combined_germline_somatic_sorted.vcf \ +-o ${outdir}/02_phased.vcf + + +bgzip -c ${outdir}/02_phased.vcf > ${outdir}/02_phased.vcf.gz +tabix -p vcf ${outdir}/02_phased.vcf.gz + diff --git a/manuals/pVACseq/01_pvacseq.sh b/manuals/pVACseq/01_pvacseq.sh new file mode 100644 index 0000000..6af0503 --- /dev/null +++ b/manuals/pVACseq/01_pvacseq.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +path=/home/01_Neoantigen +outdir=${path}/02_VariantCall/09_pvacseq +indir=${path}/02_VariantCall/08_add_coverage_vcf +iedb=${path}/tools/IEDB +HLA=HLA-A*02:06,HLA-A*26:02,HLA-B*55:02,HLA-B*40:02,HLA-C*03:04,HLA-C*01:02 + +mkdir -p ${outdir} + +pvacseq run ${indir}/07_somatic.vcf.gz Tumor ${HLA} NetMHC PickPocket ${outdir}/Result \ + -e 8,9,10 \ + --iedb-install-directory ${iedb} \ + -i ${outdir}/additional_input_file_list.yaml \ + -p ${outdir}/02_phased.vcf.gz + --normal-sample-name Normal \ + --top-score-metric lowest \ + --net-chop-method cterm \ + --netmhc-stab -d full \ + -t 7 + + diff --git a/manuals/pvacseq.sh b/manuals/pvacseq.sh deleted file mode 100644 index 6f204fe..0000000 --- a/manuals/pvacseq.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -pvacseq run \ -00_example_data/input.vcf \ -Test \ -HLA-G*01:09,HLA-E*01:01,H2-IAb \ -NetMHC PickPocket NNalign hycho \ --e 9,10 \ --i 00_example_data/additional_input_file_list.yaml --tdna-vaf 20 \ ---net-chop-method cterm --netmhc-stab \ ---top-score-metric=lowest -d full --keep-tmp-files