-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_htseq_icgc_gene_per_lib.sh
executable file
·77 lines (67 loc) · 3.29 KB
/
run_htseq_icgc_gene_per_lib.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/bin/bash
set -e
anno=PATH_TO_ANNOTATION/gencode.v19.annotation.hs37d5_chr.gtf
outdir=OUTDIR
meta=METADATA_FILE
aligndir=ALIGNMENT_DIR
mkdir -p $outdir
Q="-q batch"
mem=50gb
if [ ! -z "$1" ]
then
file=$1
filebase=$(basename $file)
aid=$(echo $filebase | cut -f 1 -d '.')
stranded=$(grep $aid $meta | cut -f 31)
if [ -f ${file%bam}done ]
then
### per read group
for lib in $(samtools view -H $file | grep -e "^@RG" | cut -f 2 | cut -f 2- -d ":")
do
donefile="${outdir}/${filebase%bam}${lib}.count.done"
logfile="${outdir}/${filebase%bam}${lib}.count.log"
outfile="${outdir}/${filebase%bam}${lib}.count.txt"
if [ ! -f $donefile ]
then
if [ "$stranded" == "fr-firststrand" ]
then
cd $(pwd); umask 077; samtools view -F 4 -r $lib $file | htseq-count -m intersection-nonempty --stranded=reverse --idattr gene_id -r pos - $anno > $outfile && touch $donefile
elif [ "$stranded" == "fr-secondstrand" ]
then
cd $(pwd); umask 077; samtools view -F 4 -r $lib $file | htseq-count -m intersection-nonempty --stranded=yes --idattr gene_id -r pos - $anno > $outfile && touch $donefile
else
cd $(pwd); umask 077; samtools view -F 4 -r $lib $file | htseq-count -m intersection-nonempty --stranded=no --idattr gene_id -r pos - $anno > $outfile && touch $donefile
fi
fi
done
fi
else
for file in $(ls -1 ${aligndir}/*.bam)
do
filebase=$(basename $file)
aid=$(echo $filebase | cut -f 1 -d '.')
stranded=$(grep $aid $meta | cut -f 31)
if [ -f ${file%bam}done ]
then
### per read group
for lib in $(samtools view -H $file | grep -e "^@RG" | cut -f 2 | cut -f 2- -d ":")
do
donefile="${outdir}/${filebase%bam}${lib}.count.done"
logfile="${outdir}/${filebase%bam}${lib}.count.log"
outfile="${outdir}/${filebase%bam}${lib}.count.txt"
if [ ! -f $donefile ]
then
if [ "$stranded" == "fr-firststrand" ]
then
echo "umask 077; samtools view -F 4 -r $lib $file | htseq-count -m intersection-nonempty --stranded=reverse --idattr gene_id -r pos - $anno > $outfile && touch $donefile" | qsub -l nodes=1:ppn=1,mem=$mem,vmem=$mem,pmem=$mem,walltime=24:00:00 -N icgc_htseq -j oe -o $logfile $Q
elif [ "$stranded" == "fr-secondstrand" ]
then
echo "umask 077; samtools view -F 4 -r $lib $file | htseq-count -m intersection-nonempty --stranded=yes --idattr gene_id -r pos - $anno > $outfile && touch $donefile" | qsub -l nodes=1:ppn=1,mem=$mem,vmem=$mem,pmem=$mem,walltime=24:00:00 -N icgc_htseq -j oe -o $logfile $Q
else
echo "umask 077; samtools view -F 4 -r $lib $file | htseq-count -m intersection-nonempty --stranded=no --idattr gene_id -r pos - $anno > $outfile && touch $donefile" | qsub -l nodes=1:ppn=1,mem=$mem,vmem=$mem,pmem=$mem,walltime=24:00:00 -N icgc_htseq -j oe -o $logfile $Q
fi
fi
done
fi
done
fi