Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
egaffo committed Mar 23, 2020
2 parents d52c3f4 + 281abae commit fbd367c
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 17 deletions.
2 changes: 1 addition & 1 deletion src/ccp_check_indexes.scons
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ if genome_indexes_to_build:
indexes['BOWTIE2'] = [env['BOWTIE2_INDEX'] + suffix for suffix \
in bowtie2_index_suffixes]

if any([f in env['CIRCRNA_METHODS'] for f in ['circexplorer2_star']]):
if any([f in env['CIRCRNA_METHODS'] for f in ['circexplorer2_star', 'dcc', 'circrna_finder']]):
if env['STAR_INDEX'] == '':
#index dir
env.Replace(STAR_INDEX = os.path.dirname(os.path.abspath(str(indexes['STAR'][0]))))
Expand Down
2 changes: 1 addition & 1 deletion src/ccp_circexplorer2.scons
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ if env['ALIGNER'].lower() == 'segemehl':

## for segemehl >= v0.3.0 modify the input BED file
fixed_bed_cmd = '''grep ';B\\|C;' ${SOURCES} | cut -f1,2,3,6 | sort | '''\
'''uniq -c | sed -r 's/.*([0-9]+) ([^\\t]+)\\t([^\\t]+)'''\
'''uniq -c | sed -r 's/ *([0-9]+) ([^\\t]+)\\t([^\\t]+)'''\
'''\\t([^\\t]+)\\t([^\\t]+).*/echo "\\2\\t$$((\\3+1))\\t\\4\\tsplits:'''\
'''\\1:\\1:\\1:C:P\\t0\\t\\5"/e' > $TARGET'''

Expand Down
17 changes: 3 additions & 14 deletions src/ccp_collect_circrnas.scons
Original file line number Diff line number Diff line change
Expand Up @@ -136,21 +136,10 @@ unique_circ = env.Command('unique_circ_ids.gtf.gz',
unique_circ_cmd)

## translate backsplice intervals into start and stop single nucleotide intervals
## mind that we are writing a newline at the end of the file that will end up
## in the head of the sorted final file. This breaks bedtools intersect with
## -sorted option. Thus, we need to get rid of the newline by using grep -v '^$'
snp_unique_circ_cmd = '''zcat ${SOURCES[0]} | '''\
'''sed -r 's/([^\\t]+)\\t([^\\t]+)\\t([^\\t]+)\\t'''\
'''([^\\t]+)\\t([^\\t]+)\\t([^\\t]+)\\t'''\
'''([^\\t]+)\\t([^\\t]+)\\tgene_id "([^"]+)";'''\
'''/echo -e "\\1\\t\\2\\tstart\\t'''\
'''\\4\\t$$((\\4))\\t\\6\\t'''\
'''\\7\\t\\8\\tgene_id @\\9@;\\n'''\
'''\\1\\t\\2\\tstop\\t'''\
'''$$((\\5))\\t\\5\\t\\6\\t'''\
'''\\7\\t\\8\\tgene_id @\\9@;"/e' | '''\
'''sed -r 's/@/"/g' | sort -k1,1 -k4,4n -k5,5n | '''\
'''grep -v '^$' | gzip -c > $TARGET '''
'''split_start_end_gtf.py -t - | '''\
'''sort -k1,1 -k4,4n -k5,5n | '''\
'''gzip -c > $TARGET '''
snp_unique_circ = env.Command('sn_unique_circ.gtf.gz',
[unique_circ],
snp_unique_circ_cmd)
Expand Down
2 changes: 1 addition & 1 deletion src/circRNAs_analysis.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ shared.counts.table <- crossprod(x = table(circ.per.method[, .(circ_id, method)]
## order rows and columns alphabetically
shared.counts.table <-
shared.counts.table[sort(colnames(shared.counts.table)),
sort(colnames(shared.counts.table))]
sort(colnames(shared.counts.table)), drop = F]
## save table
write.csv(x = data.frame(shared.counts.table),
Expand Down
61 changes: 61 additions & 0 deletions src/split_start_end_gtf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env python

import argparse, sys


if __name__ == '__main__':

desc = ''
parser = argparse.ArgumentParser(description = desc)
parser.add_argument('infile',
default = '-',
help = 'Input GTF file. Set - for stdin stream.')
parser.add_argument('-o', '--outfile',
type = str,
default = '-',
help = 'Output filename. Default to stdout.')
parser.add_argument('-t', '--trim',
action = 'store_true',
help = 'Trim the GTF attribute field and keep only '\
'gene_id.')

args = parser.parse_args()

if args.infile == '-':
infile = sys.stdin
else:
infile = open(args.infile, 'r')

if args.outfile == '-':
outfile = sys.stdout
else:
outfile = open(args.outfile, 'w')

for inline in infile:
outline = inline.split('\t')
attribute = outline[8]

if args.trim:
for field in outline[8].split(';'):
if 'gene_id' in field.split():
attribute = field + ';\n'

start_out = '\t'.join(outline[0:2] +
['start'] +
[outline[3]] +
[outline[3]] +
outline[5:8] +
[attribute])
stop_out = '\t'.join(outline[0:2] +
['stop'] +
[outline[4]] +
[outline[4]] +
outline[5:8] +
[attribute])
outfile.write(start_out)
outfile.write(stop_out)

infile.close()
outfile.close()


0 comments on commit fbd367c

Please sign in to comment.