Skip to content

Commit

Permalink
fix: run python3 -E to make sure pythonpath is unset
Browse files Browse the repository at this point in the history
  • Loading branch information
kelly-sovacool committed Nov 5, 2024
1 parent e450d1b commit 652d8ca
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 92 deletions.
4 changes: 2 additions & 2 deletions workflow/rules/findcircrna.smk
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def get_per_sample_files_to_merge(wildcards):
# 2. parse the back_spliced_junction BED from above along with known splicing annotations to CircExplorer2 'parse' to create
# a. circularRNA_known.txt ... circRNAs around known gene exons
# b. low_conf_circularRNA_known.txt .... circRNAs with low confidence
# 3. parse back_spliced_junction BED along with circularRNA_known.txt and low_conf_circularRNA_known.txt to custom python -E script
# 3. parse back_spliced_junction BED along with circularRNA_known.txt and low_conf_circularRNA_known.txt to custom python script
# to create an aggregated list of BSJs with following columns:
# | # | ColName |
# |---|-------------|
Expand Down Expand Up @@ -1283,7 +1283,7 @@ rule merge_per_sample:
container: config['containers']['star_ucsc_cufflinks']
shell:
"""
python3 {params.script} \\
python3 -E {params.script} \\
--pyscript {params.pyscript} \\
--dcc {params.ndcc} \\
--mapsplice {params.nmapsplice} \\
Expand Down
20 changes: 10 additions & 10 deletions workflow/rules/post_findcircrna_processing.smk
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ BSJbedbn=$(basename {output.BSJbed})
if [ "{params.peorse}" == "PE" ];then
python3 {params.scriptpe} \\
python3 -E {params.scriptpe} \\
--inbam {input.chimericbam} \\
--sample_counts_table {input.countstable} \\
--plusbam {params.tmpdir}/{params.sample}.BSJ.plus.bam \\
Expand All @@ -91,7 +91,7 @@ python3 {params.scriptpe} \\
else
python3 {params.scriptse} \\
python3 -E {params.scriptse} \\
--inbam {input.chimericbam} \\
--sample_counts_table {input.countstable} \\
--plusbam {params.tmpdir}/{params.sample}.BSJ.plus.bam \\
Expand Down Expand Up @@ -127,7 +127,7 @@ for i in $(echo {params.viruses}|tr ',' ' ');do
bash {params.bam2bwscript} ${{outdir}}/{params.sample}.${{i}}.BSJ.bam {params.tmpdir}
done
python3 {params.flankscript} --reffa {params.reffa} --inbsjbedgz {params.tmpdir}/${{BSJbedbn}} --outbsjbedgz {output.BSJbed}
python3 -E {params.flankscript} --reffa {params.reffa} --inbsjbedgz {params.tmpdir}/${{BSJbedbn}} --outbsjbedgz {output.BSJbed}
rm -rf {params.tmpdir}
"""
Expand Down Expand Up @@ -283,12 +283,12 @@ rule create_circExplorer_merged_found_counts_table:
"""
set -exo pipefail
mkdir -p {params.tmpdir}
python3 {params.pythonscript} \\
python3 -E {params.pythonscript} \\
-b {input.bsj_found_counts} \\
-l {input.linear_spliced_counts} \\
-o {output.found_counts_table}
python3 {params.pythonscript2} \\
python3 -E {params.pythonscript2} \\
--annotationcounts {input.annotation_counts} \\
--allfoundcounts {output.found_counts_table} \\
--countstable {output.count_counts_table}
Expand Down Expand Up @@ -324,9 +324,9 @@ if RUN_MAPSPLICE:
for bamfile in {input};do
bamfile_bn=$(basename $bamfile)
if [ "{params.peorse}" == "PE" ];then
echo "python3 {params.bash2nreads_pyscript} --inbam $bamfile --regions {params.regions} --pe > {params.tmpdir}/${{bamfile_bn}}.counts"
echo "python3 -E {params.bash2nreads_pyscript} --inbam $bamfile --regions {params.regions} --pe > {params.tmpdir}/${{bamfile_bn}}.counts"
else
echo "python3 {params.bash2nreads_pyscript} --inbam $bamfile --regions {params.regions} > {params.tmpdir}/${{bamfile_bn}}.counts"
echo "python3 -E {params.bash2nreads_pyscript} --inbam $bamfile --regions {params.regions} > {params.tmpdir}/${{bamfile_bn}}.counts"
fi
done > {params.tmpdir}/do_bamstats
parallel -j 2 < {params.tmpdir}/do_bamstats
Expand Down Expand Up @@ -374,9 +374,9 @@ else:
for bamfile in {input};do
bamfile_bn=$(basename $bamfile)
if [ "{params.peorse}" == "PE" ];then
echo "python3 {params.bash2nreads_pyscript} --inbam $bamfile --regions {params.regions} --pe > {params.tmpdir}/${{bamfile_bn}}.counts"
echo "python3 -E {params.bash2nreads_pyscript} --inbam $bamfile --regions {params.regions} --pe > {params.tmpdir}/${{bamfile_bn}}.counts"
else
echo "python3 {params.bash2nreads_pyscript} --inbam $bamfile --regions {params.regions} > {params.tmpdir}/${{bamfile_bn}}.counts"
echo "python3 -E {params.bash2nreads_pyscript} --inbam $bamfile --regions {params.regions} > {params.tmpdir}/${{bamfile_bn}}.counts"
fi
done > {params.tmpdir}/do_bamstats
parallel -j 2 < {params.tmpdir}/do_bamstats
Expand Down Expand Up @@ -450,7 +450,7 @@ rule create_hq_bams:
outdir=$(dirname {output.outbam})
if [ ! -d $outdir ];then mkdir -p $outdir;fi
cd $outdir
python3 {params.script} \\
python3 -E {params.script} \\
-i {input.inbam} \\
-t {input.countstable} \\
-o {output.outbam} \\
Expand Down
26 changes: 13 additions & 13 deletions workflow/scripts/_create_circExplorer_linear_bam.v2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ start0=$(date +%s.%N)
# _bedintersect_to_rid2jid.py
# also requires these tools
# bedtools
# python3 with pysam library
# python3 -E with pysam library
# samtools
# bedSort from ucsc tools
# ucsc
Expand Down Expand Up @@ -126,14 +126,14 @@ start=$(date +%s.%N)

if [ "$peorse" == "PE" ];then

python3 ${SCRIPT_DIR}/filter_bam.py \
python3 -E ${SCRIPT_DIR}/filter_bam.py \
--inbam $non_chimeric_star2p_bam \
--outbam $filtered_bam \
--pe

else

python3 ${SCRIPT_DIR}/filter_bam.py \
python3 -E ${SCRIPT_DIR}/filter_bam.py \
--inbam $non_chimeric_star2p_bam \
--outbam $filtered_bam \

Expand Down Expand Up @@ -183,7 +183,7 @@ bedSort ${tmpdir}/BSJ.ends.bed ${tmpdir}/BSJ.ends.bed
printtime $SCRIPT_NAME $start0 $start "finding max readlength"
start=$(date +%s.%N)

python3 ${SCRIPT_DIR}/bam_get_max_readlen.py -i $filtered_bam > ${tmpdir}/${sample_name}.maxrl
python3 -E ${SCRIPT_DIR}/bam_get_max_readlen.py -i $filtered_bam > ${tmpdir}/${sample_name}.maxrl
maxrl=$(cat ${tmpdir}/${sample_name}.maxrl)
two_maxrl=$((maxrl*2))

Expand Down Expand Up @@ -226,7 +226,7 @@ pigz -p4 -f ${rid2jidgzip%.*} && rm -f ${rid2jidgzip%.*}.tmp
printtime $SCRIPT_NAME $start0 $start "filter linear and spliced readids to those near BSJs only; creating counts table"
start=$(date +%s.%N)

python3 ${SCRIPT_DIR}/_filter_linear_spliced_readids_w_rid2jid.py \
python3 -E ${SCRIPT_DIR}/_filter_linear_spliced_readids_w_rid2jid.py \
--linearin ${tmpdir}/${sample_name}.linear.readids.gz \
--splicedin ${tmpdir}/${sample_name}.spliced.readids.gz \
--rid2jid ${rid2jidgzip} \
Expand Down Expand Up @@ -254,10 +254,10 @@ splicedbam_bn=$(basename $splicedbam)
linearbam_all_bn=$(basename $linearbam_all)
splicedbam_all_bn=$(basename $splicedbam_all)

echo "python3 ${SCRIPT_DIR}/filter_bam_by_readids.py --inputBAM $filtered_bam --outputBAM ${tmpdir}/${linearbam_bn} --readids $linearrids" >> ${tmpdir}/para2
echo "python3 ${SCRIPT_DIR}/filter_bam_by_readids.py --inputBAM $filtered_bam --outputBAM ${tmpdir}/${splicedbam_bn} --readids $splicedrids" >> ${tmpdir}/para2
echo "python3 ${SCRIPT_DIR}/filter_bam_by_readids.py --inputBAM $filtered_bam --outputBAM ${tmpdir}/${linearbam_all_bn} --readids ${tmpdir}/${sample_name}.linear.readids.gz" >> ${tmpdir}/para2
echo "python3 ${SCRIPT_DIR}/filter_bam_by_readids.py --inputBAM $filtered_bam --outputBAM ${tmpdir}/${splicedbam_all_bn} --readids ${tmpdir}/${sample_name}.spliced.readids.gz" >> ${tmpdir}/para2
echo "python3 -E ${SCRIPT_DIR}/filter_bam_by_readids.py --inputBAM $filtered_bam --outputBAM ${tmpdir}/${linearbam_bn} --readids $linearrids" >> ${tmpdir}/para2
echo "python3 -E ${SCRIPT_DIR}/filter_bam_by_readids.py --inputBAM $filtered_bam --outputBAM ${tmpdir}/${splicedbam_bn} --readids $splicedrids" >> ${tmpdir}/para2
echo "python3 -E ${SCRIPT_DIR}/filter_bam_by_readids.py --inputBAM $filtered_bam --outputBAM ${tmpdir}/${linearbam_all_bn} --readids ${tmpdir}/${sample_name}.linear.readids.gz" >> ${tmpdir}/para2
echo "python3 -E ${SCRIPT_DIR}/filter_bam_by_readids.py --inputBAM $filtered_bam --outputBAM ${tmpdir}/${splicedbam_all_bn} --readids ${tmpdir}/${sample_name}.spliced.readids.gz" >> ${tmpdir}/para2

parallel -j 4 < ${tmpdir}/para2

Expand All @@ -283,10 +283,10 @@ samtools sort -l 9 -T ${tmpdir}/sorttmp --write-index -@${threads} -O BAM -o ${s

if [ -f ${tmpdir}/para3 ];then rm -f ${tmpdir}/para3;fi

echo "python3 ${SCRIPT_DIR}/bam_split_by_regions.py --inbam $linearbam --sample_name $sample_name --regions $regions --prefix linear_BSJ --outdir $outdir --host $host --additives $additives --viruses $viruses" >> ${tmpdir}/para3
echo "python3 ${SCRIPT_DIR}/bam_split_by_regions.py --inbam $splicedbam --sample_name $sample_name --regions $regions --prefix spliced_BSJ --outdir $outdir --host $host --additives $additives --viruses $viruses" >> ${tmpdir}/para3
echo "python3 ${SCRIPT_DIR}/bam_split_by_regions.py --inbam $linearbam_all --sample_name $sample_name --regions $regions --prefix linear --outdir $outdir --host $host --additives $additives --viruses $viruses" >> ${tmpdir}/para3
echo "python3 ${SCRIPT_DIR}/bam_split_by_regions.py --inbam $splicedbam_all --sample_name $sample_name --regions $regions --prefix spliced --outdir $outdir --host $host --additives $additives --viruses $viruses" >> ${tmpdir}/para3
echo "python3 -E ${SCRIPT_DIR}/bam_split_by_regions.py --inbam $linearbam --sample_name $sample_name --regions $regions --prefix linear_BSJ --outdir $outdir --host $host --additives $additives --viruses $viruses" >> ${tmpdir}/para3
echo "python3 -E ${SCRIPT_DIR}/bam_split_by_regions.py --inbam $splicedbam --sample_name $sample_name --regions $regions --prefix spliced_BSJ --outdir $outdir --host $host --additives $additives --viruses $viruses" >> ${tmpdir}/para3
echo "python3 -E ${SCRIPT_DIR}/bam_split_by_regions.py --inbam $linearbam_all --sample_name $sample_name --regions $regions --prefix linear --outdir $outdir --host $host --additives $additives --viruses $viruses" >> ${tmpdir}/para3
echo "python3 -E ${SCRIPT_DIR}/bam_split_by_regions.py --inbam $splicedbam_all --sample_name $sample_name --regions $regions --prefix spliced --outdir $outdir --host $host --additives $additives --viruses $viruses" >> ${tmpdir}/para3

parallel -j 4 < ${tmpdir}/para3

Expand Down
210 changes: 143 additions & 67 deletions workflow/scripts/_make_merge_per_sample_sh.py
Original file line number Diff line number Diff line change
@@ -1,78 +1,154 @@
import argparse
from os.path import join

def _get_counts_file_path(sampledir,s,prog):
if prog=="circExplorer":
return join(sampledir,"circExplorer",s+".circExplorer.counts_table.tsv")
if prog=="circExplorerbwa":
return join(sampledir,"circExplorer_BWA",s+".circExplorer_bwa.annotation_counts.tsv")
elif prog=="ciri":
return join(sampledir,"ciri",s+".ciri.out.filtered")
elif prog=="dcc":
return join(sampledir,"DCC",s+".dcc.counts_table.tsv.filtered")
elif prog=="mapsplice":
return join(sampledir,"MapSplice",s+".mapsplice.counts_table.tsv.filtered")
elif prog=="nclscan":
return join(sampledir,"NCLscan",s+".nclscan.counts_table.tsv.filtered")
elif prog=="circrnafinder":
return join(sampledir,"circRNA_finder",s+".circRNA_finder.counts_table.tsv.filtered")
elif prog=="findcirc":
return join(sampledir,"find_circ",s+".find_circ.bed.filtered")

def _get_counts_file_path(sampledir, s, prog):
if prog == "circExplorer":
return join(sampledir, "circExplorer", s + ".circExplorer.counts_table.tsv")
if prog == "circExplorerbwa":
return join(
sampledir, "circExplorer_BWA", s + ".circExplorer_bwa.annotation_counts.tsv"
)
elif prog == "ciri":
return join(sampledir, "ciri", s + ".ciri.out.filtered")
elif prog == "dcc":
return join(sampledir, "DCC", s + ".dcc.counts_table.tsv.filtered")
elif prog == "mapsplice":
return join(sampledir, "MapSplice", s + ".mapsplice.counts_table.tsv.filtered")
elif prog == "nclscan":
return join(sampledir, "NCLscan", s + ".nclscan.counts_table.tsv.filtered")
elif prog == "circrnafinder":
return join(
sampledir, "circRNA_finder", s + ".circRNA_finder.counts_table.tsv.filtered"
)
elif prog == "findcirc":
return join(sampledir, "find_circ", s + ".find_circ.bed.filtered")

def main() :
parser = argparse.ArgumentParser(description='Merge per sample Counts from different circRNA detection tools')

def main():
parser = argparse.ArgumentParser(
description="Merge per sample Counts from different circRNA detection tools"
)
# INPUTS
parser.add_argument('--pyscript', dest='pyscript', type=str, required=True,
help='python script to be run')
parser.add_argument('--sampledir', dest='sampledir', type=str, required=True,
help='generally <workdir>/<results>/<sampleName>')
parser.add_argument('--dcc', dest='dcc', type=int, required=False, default=0,
help='n_run_dcc')
parser.add_argument('--mapsplice', dest='mapsplice', type=int, required=False,default=0,
help='n_run_mapslice')
parser.add_argument('--findcirc', dest='findcirc', type=int, required=False,default=0,
help='n_run_findcirc')
parser.add_argument('--nclscan', dest='nclscan', type=int, required=False,default=0,
help='n_run_nclscan')
parser.add_argument('--circrnafinder', dest='circrnafinder', type=int, required=False, default=0,
help='n_run_circrnafinder')
parser.add_argument('--samplename', dest='samplename', type=str, required=True,
help='Sample Name')
parser.add_argument('--min_read_count_reqd', dest='minreads', type=int, required=False, default=2,
help='Read count threshold..circRNA with lower than this number of read support are excluded! (default=2)')
parser.add_argument('--hqcc', dest='hqcc', type=str, required=False, default="circExplorer,circExplorer_bwa",
help='Comma separated list of high confidence core callers (default="circExplorer,circExplorer_bwa")')
parser.add_argument('--hqccpn', dest='hqccpn', type=int, required=False, default=1,
help='Define n:high confidence core callers plus n callers are required to call this circRNA HQ (default 1)')
parser.add_argument("--reffa",dest="reffa",required=True,type=str,
help="reference fasta file path")
parser.add_argument('--pyscriptoutfile',required=True,help='merged table')
parser.add_argument(
"--pyscript",
dest="pyscript",
type=str,
required=True,
help="python script to be run",
)
parser.add_argument(
"--sampledir",
dest="sampledir",
type=str,
required=True,
help="generally <workdir>/<results>/<sampleName>",
)
parser.add_argument(
"--dcc", dest="dcc", type=int, required=False, default=0, help="n_run_dcc"
)
parser.add_argument(
"--mapsplice",
dest="mapsplice",
type=int,
required=False,
default=0,
help="n_run_mapslice",
)
parser.add_argument(
"--findcirc",
dest="findcirc",
type=int,
required=False,
default=0,
help="n_run_findcirc",
)
parser.add_argument(
"--nclscan",
dest="nclscan",
type=int,
required=False,
default=0,
help="n_run_nclscan",
)
parser.add_argument(
"--circrnafinder",
dest="circrnafinder",
type=int,
required=False,
default=0,
help="n_run_circrnafinder",
)
parser.add_argument(
"--samplename", dest="samplename", type=str, required=True, help="Sample Name"
)
parser.add_argument(
"--min_read_count_reqd",
dest="minreads",
type=int,
required=False,
default=2,
help="Read count threshold..circRNA with lower than this number of read support are excluded! (default=2)",
)
parser.add_argument(
"--hqcc",
dest="hqcc",
type=str,
required=False,
default="circExplorer,circExplorer_bwa",
help='Comma separated list of high confidence core callers (default="circExplorer,circExplorer_bwa")',
)
parser.add_argument(
"--hqccpn",
dest="hqccpn",
type=int,
required=False,
default=1,
help="Define n:high confidence core callers plus n callers are required to call this circRNA HQ (default 1)",
)
parser.add_argument(
"--reffa",
dest="reffa",
required=True,
type=str,
help="reference fasta file path",
)
parser.add_argument("--pyscriptoutfile", required=True, help="merged table")
# OUTPUTS
parser.add_argument('--outscript',required=True,help='output bash script')
parser.add_argument("--outscript", required=True, help="output bash script")
args = parser.parse_args()

sd=args.sampledir
sn=args.samplename
parameters=""
parameters+=" --circExplorer "+_get_counts_file_path(sd,sn,'circExplorer')
parameters+=" --ciri "+_get_counts_file_path(sd,sn,'ciri')
parameters+=" --circExplorerbwa "+_get_counts_file_path(sd,sn,'circExplorerbwa')
if args.dcc==1: parameters+=" --dcc "+_get_counts_file_path(sd,sn,'dcc')
if args.mapsplice==1: parameters+=" --mapsplice "+_get_counts_file_path(sd,sn,'mapsplice')
if args.nclscan==1: parameters+=" --nclscan "+_get_counts_file_path(sd,sn,'nclscan')
if args.circrnafinder==1: parameters+=" --circrnafinder "+_get_counts_file_path(sd,sn,'circrnafinder')
if args.findcirc==1: parameters+=" --findcirc "+_get_counts_file_path(sd,sn,'findcirc')
parameters+=" --reffa "+args.reffa
parameters+=" --min_read_count_reqd "+str(args.minreads)
parameters+=" --samplename "+sn
parameters+=" --hqcc "+args.hqcc
parameters+=" --hqccpn "+str(args.hqccpn)
parameters+=" -o "+args.pyscriptoutfile

with open(args.outscript,'w') as outscript:
outscript.write("python3 "+args.pyscript+parameters)

sd = args.sampledir
sn = args.samplename
parameters = ""
parameters += " --circExplorer " + _get_counts_file_path(sd, sn, "circExplorer")
parameters += " --ciri " + _get_counts_file_path(sd, sn, "ciri")
parameters += " --circExplorerbwa " + _get_counts_file_path(
sd, sn, "circExplorerbwa"
)
if args.dcc == 1:
parameters += " --dcc " + _get_counts_file_path(sd, sn, "dcc")
if args.mapsplice == 1:
parameters += " --mapsplice " + _get_counts_file_path(sd, sn, "mapsplice")
if args.nclscan == 1:
parameters += " --nclscan " + _get_counts_file_path(sd, sn, "nclscan")
if args.circrnafinder == 1:
parameters += " --circrnafinder " + _get_counts_file_path(
sd, sn, "circrnafinder"
)
if args.findcirc == 1:
parameters += " --findcirc " + _get_counts_file_path(sd, sn, "findcirc")
parameters += " --reffa " + args.reffa
parameters += " --min_read_count_reqd " + str(args.minreads)
parameters += " --samplename " + sn
parameters += " --hqcc " + args.hqcc
parameters += " --hqccpn " + str(args.hqccpn)
parameters += " -o " + args.pyscriptoutfile

with open(args.outscript, "w") as outscript:
outscript.write("python3 -E " + args.pyscript + parameters)
outscript.close()


if __name__ == "__main__":
main()
main()

0 comments on commit 652d8ca

Please sign in to comment.