Skip to content

Commit

Permalink
02 Mar 2019 manuals update
Browse files Browse the repository at this point in the history
  • Loading branch information
jeongu committed Mar 2, 2019
1 parent 3831fcc commit 753a7d4
Show file tree
Hide file tree
Showing 16 changed files with 211 additions and 84 deletions.
File renamed without changes.
2 changes: 2 additions & 0 deletions manuals/RNA/samtools.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
samtools sort -o /home/01_Neoantigen/01_RNA/03_Align_hisat2/Cufflinks_RNA-Tumor.sortd.sam /home/01_Neoantigen/01_RNA/03_Align_hisat2/Cufflinks_RNA-Tumor.sam
samtools view -Su /home/01_Neoantigen/01_RNA/03_Align_hisat2/Cufflinks_RNA-Tumor.sortd.sam > /home/01_Neoantigen/01_RNA/03_Align_hisat2/Cufflinks_RNA-Tumor.sorted.bam
2 changes: 0 additions & 2 deletions manuals/VariantCall/03_pindel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,3 @@ vcf_D='/home/01_Neoantigen/02_VariantCall/03_pindel/variant_pindel_D.vcf'

#samtools faidx ${ref}
pindel -f ${ref} -i ${config} -c ALL -o ${pindel_output} -T 3
pindel2vcf -p ${pindel_output}_SI -r ${ref} -R UCSC_hg38 -d 2013_12 -v ${vcf_SI}
pindel2vcf -p ${pindel_output}_D -r ${ref} -R UCSC_hg38 -d 2013_12 -v ${vcf_D}
10 changes: 7 additions & 3 deletions manuals/VariantCall/05_union.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,22 @@

path=/home/01_Neoantigen/02_VariantCall
gatk=/home/01_Neoantigen/tools/gatk-4.1.0.0/gatk
vt=/home/01_Neoantigen/tools/vt/vt
gatk_bundle=/home/01_Neoantigen/reference/gatk_bundle
outdir=${path}/05_union


mkdir -p ${outdir}

cp ${path}/01_mutect2/03_mutect2_filtered_PASS.vcf.gz* ${outdir}
cp ${path}/02_varscan/01_varscan.Somatic.hc.sorted.vcf.gz* ${outdir}
cp ${path}/04_strelka/results/variants/strelka_PASS.vcf.gz* ${outdir}


${gatk} MergeVcfs -I ${outdir}/01_varscan.Somatic.hc.sorted.vcf.gz -I ${outdir}/03_mutect2_filtered_PASS.vcf.gz -I ${outdir}/strelka_PASS.vcf.gz -O ${outdir}/vcf_union.vcf -D ${gatk_bundle}/hg38.dict


${gatk} MergeVcfs -I ${outdir}/01_varscan.Somatic.hc.sorted.vcf.gz -I ${outdir}/03_mutect2_filtered_PASS.vcf.gz -I ${outdir}/strelka_PASS.vcf.gz -I ${outdir}/indel.filter.output.vcf.gz -I ${outdir}/snp.filter.output.vcf.gz -O ${outdir}/vcf_union.vcf -D ${gatk_bundle}/hg38.dict

#Normalization
${gatk} LeftAlignAndTrimVariants -V ${outdir}/vcf_union.vcf -R ${gatk_bundle}/hg38.fa -O ${outdir}/vcf_union_normalized.vcf

#split multi-allelic variants to biallelic
${vt} decompose ${outdir}/vcf_union_normalized.vcf -o ${outdir}vcf_union_normalized_vt.vcf
4 changes: 2 additions & 2 deletions manuals/VariantCall/06_remove_same_pos.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import sys
import re

if len(sys.argv) != 3 :
if len(sys.argv) != 2 :
print ('#########################################################################################')
print ('python 06_remove_same_pos.py /home/01_Neoantigen/02_VariantCall/05_union/vcf_union.vcf')
print ('python 06_remove_same_pos.py /home/01_Neoantigen/02_VariantCall/05_union/vcf_union_normalized_vt.vcf')
print ('#########################################################################################')
exit()

Expand Down
22 changes: 20 additions & 2 deletions manuals/VariantCall/07_VEP.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,28 @@
#!/bin/bash

VEP=/home/01_Neoantigen/tools/ensembl-vep/vep
path=/home/01_Neoantigen/02_VariantCall
indir=${path}/05_union
outdir=${path}/06_annotate_VEP
ref=/home/01_Neoantigen/reference/gatk_bundle/hg38.fa

mkdir -p ${outdir}
${VEP} -i ${indir}/vcf_union_removed.vcf -o ${outdir}/union_annotated.vcf --fork 3 --cache
vep \
--input_file ${indir}/vcf_union_normalized_vt_removed.vcf \
--output_file ${outdir}/union_annotated.vcf \
--format vcf \
--vcf \
--symbol \
--transcript_version \
--offline \
--terms SO \
--plugin Downstream \
--plugin Wildtype \
--dir_plugin /home/shinjae325/.conda/envs/neoantigen/share/ensembl-vep-95.2-0 \
--flag_pick \
--tsl \
--hgvs \
--fasta ${ref} \
--pick \
--cache \
--fork 6

31 changes: 31 additions & 0 deletions manuals/VariantCall/08_Variant_coverage.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/bash
path=/home/01_Neoantigen
indir=${path}/02_VariantCall/06_annotate_VEP
outdir=${path}/02_VariantCall/07_Variant_coverage
bam_readcount=${path}/tools/bam-readcount/bin/bam-readcount
ref=${path}/reference/gatk_bundle/hg38.fa
normal_WES=${path}/01_WES_Normal/06_BQSR/WES-Normal_dedup_bqsr.bam
tumor_WES=${path}/01_WES_Tumor/06_BQSR/WES-Tumor_dedup_bqsr.bam
tumor_RNA=${path}/01_RNA/03_Align_hisat2/Cufflinks_RNA-Tumor.sorted.bam
#tumor_RNA=${path}/01_RNA/04_StringTie/RNA-Tumor.sorted.bam


mkdir -p ${outdir}

#split snp and indel from vcf files
bcftools filter -e'%TYPE="indel"' ${indir}/union_annotated.vcf > ${outdir}/union_annotated_only_snp.vcf
bcftools filter -e'%TYPE="snp"' ${indir}/union_annotated.vcf > ${outdir}/union_annotated_only_indel.vcf

#Get position for snp, indel
sed '/^#/ d' ${outdir}/union_annotated_only_indel.vcf | awk '{print $1,$2,$2}' > ${outdir}/indel.positions
sed '/^#/ d' ${outdir}/union_annotated_only_snp.vcf | awk '{print $1,$2,$2}' > ${outdir}/snp.positions

#bam-readcount
${bam_readcount} -f ${ref} ${normal_WES} -i -b 20 -l ${outdir}/indel.positions > ${outdir}/Normal_WES_indel_readcount
${bam_readcount} -f ${ref} ${normal_WES} -b 20 -l ${outdir}/snp.positions > ${outdir}/Normal_WES_snp_readcount
${bam_readcount} -f ${ref} ${tumor_WES} -i -b 20 -l ${outdir}/indel.positions > ${outdir}/Tumor_WES_indel_readcount
${bam_readcount} -f ${ref} ${tumor_WES} -b 20 -l ${outdir}/snp.positions > ${outdir}/Tumor_WES_snp_readcount
${bam_readcount} -f ${ref} ${normal_WES} -i -b 20 -l ${outdir}/indel.positions > ${outdir}/Tumor_RNA_indel_readcount
${bam_readcount} -f ${ref} ${normal_WES} -b 20 -l ${outdir}/snp.positions > ${outdir}/Tumor_RNA_snp_readcount


33 changes: 33 additions & 0 deletions manuals/VariantCall/09_add_to_vcf.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash

path=/home/01_Neoantigen
cov_indir=${path}/02_VariantCall/07_Variant_coverage
vcf_indir=${path}/02_VariantCall/06_annotate_VEP
rna_dir=${path}/01_RNA/04_StringTie


outdir=${path}/02_VariantCall/08_add_coverage_vcf
outfn1=01_cov_Nor_indel.vcf
outfn2=02_cov_Nor_snp.vcf
outfn3=03_cov_Nor_Tum_indel.vcf
outfn4=04_cov_Nor_Tum_snp.vcf
outfn5=05_cov_Nor_Tum_RNA_indel.vcf
outfn6=06_cov_Nor_Tum_RNA_snp_final.vcf
outfn7=07_somatic.vcf

#mkdir -p ${outdir}


# add coverage information to vcf
#vcf-readcount-annotator ${vcf_indir}/union_annotated.vcf ${cov_indir}/Normal_WES_indel_readcount DNA -s NORMAL -t indel -o ${outdir}/${outfn1}
#vcf-readcount-annotator ${outdir}/${outfn1} ${cov_indir}/Normal_WES_snp_readcount DNA -s NORMAL -t snv -o ${outdir}/${outfn2}
#vcf-readcount-annotator ${outdir}/${outfn2} ${cov_indir}/Tumor_WES_indel_readcount DNA -s TUMOR -t indel -o ${outdir}/${outfn3}
#vcf-readcount-annotator ${outdir}/${outfn3} ${cov_indir}/Tumor_WES_snp_readcount DNA -s TUMOR -t snv -o ${outdir}/${outfn4}
#vcf-readcount-annotator ${outdir}/${outfn4} ${cov_indir}/Tumor_RNA_indel_readcount RNA -s TUMOR -t indel -o ${outdir}/${outfn5}
#vcf-readcount-annotator ${outdir}/${outfn5} ${cov_indir}/Tumor_RNA_snp_readcount RNA -s TUMOR -t snv -o ${outdir}/${outfn6}

# add expression information to vcf
vcf-expression-annotator -s TUMOR -o ${outdir}/${outfn7} ${outdir}/${outfn6} ${rna_dir}/transcripts.gtf stringtie transcript

bgzip -c ${outdir}/07_somatic.vcf > ${outdir}/07_somatic.vcf.gz
tabix -p vcf ${outdir}/07_somatic.vcf.gz
25 changes: 20 additions & 5 deletions manuals/WES/07_Haplotypecall.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,27 @@ home=/home/01_Neoantigen
gatk=${home}/tools/gatk-4.1.0.0/gatk
refdir=${home}/reference
gatk_bundle=${refdir}/gatk_bundle
inputdir=${home}/01_WES_Normal/06_BQSR
outdir=${home}/02_VariantCall/HaplotypeCall
inputdir=${home}/02_WES_Merge/02_Sort
outdir=${home}/02_VariantCall/HaplotypeCall_merge

mkdir -p ${outdir}

# HaplotypeCall
${gatk} HaplotypeCaller \
-I ${inputdir}/WES-Normal_dedup_bqsr.bam \
-O ${outdir}/WES-Normal_haplotypecall.vcf \
--emit-ref-confidence GVCF -R ${gatk_bundle}/hg38.fa
-I ${inputdir}/Merge_sorted.bam \
-O ${outdir}/germline_haplotypecall.vcf \
--emit-ref-confidence GVCF \
-R ${gatk_bundle}/hg38.fa \

# GenotypeGVCFs
${gatk} GenotypeGVCFs \
--variant ${outdir}/germline_haplotypecall.vcf \
-R ${gatk_bundle}/hg38.fa \
-O ${outdir}/germline_haplotypecall_gvcf.vcf

# Variant Filtration
${gatk} VariantFiltration \
--R ${gatk_bundle}/hg38.fa \
--V ${outdir}/germline_haplotypecall_gvcf.vcf \
--filter-name "FS" --filter "FS > 30.0" --filter-name "QD" --filter "QD < 2.0" \
-O ${outdir}/germline_haplotypecall_filt.vcf
33 changes: 0 additions & 33 deletions manuals/WES/08_Mutect2.sh

This file was deleted.

34 changes: 34 additions & 0 deletions manuals/WES/08_VEP.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash

indir=/home/01_Neoantigen/02_VariantCall/HaplotypeCall
ref=/home/01_Neoantigen/reference/gatk_bundle/hg38.fa

vep \
--input_file ${indir}/Germline_haplotypecall_PASS.vcf \
--output_file ${indir}/Germline_haplotypecall_PASS_vep.vcf \
--format vcf \
--vcf \
--symbol \
--transcript_version \
--offline \
--terms SO \
--plugin Downstream \
--plugin Wildtype \
--dir_plugin /home/shinjae325/.conda/envs/neoantigen/share/ensembl-vep-95.2-0 \
--flag_pick \
--tsl \
--hgvs \
--fasta ${ref} \
--pick \
--cache \
--fork 6

#filter non coding region variants

filter_vep \
--format vcf \
--ontology \
--filter "Consequence is coding_sequence_variant" \
-o ${indir}/Germline_haplotypecall_PASS_vep_CDS.vcf \
-i ${indir}/Germline_haplotypecall_PASS_vep.vcf

17 changes: 0 additions & 17 deletions manuals/WES/09_HLAtyping.sh

This file was deleted.

9 changes: 0 additions & 9 deletions manuals/WES/10_VariantFiltration.sh

This file was deleted.

40 changes: 40 additions & 0 deletions manuals/pVACseq/00_phased_vcf.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash
path=/home/01_Neoantigen
gatk_bundle=${path}/reference/gatk_bundle
gatk=${path}/tools/GenomeAnalysisTK-3.8.1/GenomeAnalysisTK.jar
picard=/home/01_Neoantigen/tools/picard/build/libs/picard.jar

germline_dir=${path}/02_VariantCall/HaplotypeCall
somatic_dir=${path}/02_VariantCall/08_add_coverage_vcf

tumor_dir=/home/01_Neoantigen/01_WES_Tumor/06_BQSR
outdir=${path}/02_VariantCall/09_pvacseq


# combine somatic and germline variants using gatk 3.*
java -jar ${gatk} -T CombineVariants \
-R ${gatk_bundle}/hg38.fa \
--variant ${germline_dir}/Germline_haplotypecall_PASS_vep_CDS.vcf \
--variant ${somatic_dir}/07_somatic.vcf \
-o ${outdir}/00_combined_germline_somatic.vcf \
--assumeIdenticalSamples


# Sort combiend vcf using picard
java -jar ${picard} SortVcf \
I=${outdir}/00_combined_germline_somatic.vcf \
O=${outdir}/01_combined_germline_somatic_sorted.vcf \
SEQUENCE_DICTIONARY=${gatk_bundle}/hg38.dict

# Phase variants using gatk's ReadBackedPhasing
java -jar ${gatk} -T ReadBackedPhasing \
-R ${gatk_bundle}/hg38.fa \
-I ${tumor_dir}/WES-Tumor_dedup_bqsr.bam \
--variant ${outdir}/01_combined_germline_somatic_sorted.vcf \
-L ${outdir}/01_combined_germline_somatic_sorted.vcf \
-o ${outdir}/02_phased.vcf


bgzip -c ${outdir}/02_phased.vcf > ${outdir}/02_phased.vcf.gz
tabix -p vcf ${outdir}/02_phased.vcf.gz

22 changes: 22 additions & 0 deletions manuals/pVACseq/01_pvacseq.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

path=/home/01_Neoantigen
outdir=${path}/02_VariantCall/09_pvacseq
indir=${path}/02_VariantCall/08_add_coverage_vcf
iedb=${path}/tools/IEDB
HLA=HLA-A*02:06,HLA-A*26:02,HLA-B*55:02,HLA-B*40:02,HLA-C*03:04,HLA-C*01:02

mkdir -p ${outdir}

pvacseq run ${indir}/07_somatic.vcf.gz Tumor ${HLA} NetMHC PickPocket ${outdir}/Result \
-e 8,9,10 \
--iedb-install-directory ${iedb} \
-i ${outdir}/additional_input_file_list.yaml \
-p ${outdir}/02_phased.vcf.gz
--normal-sample-name Normal \
--top-score-metric lowest \
--net-chop-method cterm \
--netmhc-stab -d full \
-t 7


11 changes: 0 additions & 11 deletions manuals/pvacseq.sh

This file was deleted.

0 comments on commit 753a7d4

Please sign in to comment.