Skip to content

Commit

Permalink
default to discarding noncanonical junctions
Browse files Browse the repository at this point in the history
Replaced --remove_noncanonical with --allow_noncanonical
  • Loading branch information
maschon0 committed Sep 25, 2023
1 parent cd192ad commit c8742cc
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 16 deletions.
2 changes: 1 addition & 1 deletion bookend/core/argument_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def run(self):
bam_to_elr_parser.add_argument("--record_artifacts", dest='RECORD_ARTIFACTS', default=False, action='store_true', help="Reports artifact-masked S/E labels as >/].")
bam_to_elr_parser.add_argument("--split", dest='SPLIT', default=False, action='store_true', help="Separate reads into different files by their multimapping number.")
bam_to_elr_parser.add_argument("--untrimmed", dest='UNTRIMMED', default=False, action='store_true', help="(overrides -s -c -e) End labels were not trimmed from input reads prior to alignment.")
bam_to_elr_parser.add_argument("--remove_noncanonical", dest='REMOVE_NONCANONICAL', default=False, action='store_true', help="Require canonical splice junction motifs (GT/AG, GC/AG, AT/AC).")
bam_to_elr_parser.add_argument("--allow_noncanonical", dest='ALLOW_NONCANONICAL', default=False, action='store_true', help="Do not require canonical splice junction motifs (GT/AG, GC/AG, AT/AC).")
bam_to_elr_parser.add_argument("--sj_shift", dest='SJ_SHIFT', default=0, type=int, help="Shift up to this many bases to find a canonical splice junction")
bam_to_elr_parser.add_argument("--minlen_strict", dest='MINLEN_STRICT', default=18, type=int, help="Keep reads down to this length only if perfectly aligned.")
bam_to_elr_parser.add_argument("--minlen_loose", dest='MINLEN_LOOSE', default=25, type=int, help="Keep reads down to this length if they passed alignment parameters.")
Expand Down
11 changes: 4 additions & 7 deletions bookend/core/bam_to_elr.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(self, args):
self.input = args['INPUT']
self.error_rate = args['ERROR_RATE']
self.max_intron = args['MAX_INTRON']
self.remove_noncanonical = args['REMOVE_NONCANONICAL']
self.allow_noncanonical = args['ALLOW_NONCANONICAL']
if self.start or self.end or self.capped:
self.stranded = True

Expand Down Expand Up @@ -92,7 +92,7 @@ def __init__(self, args):
'mismatch_rate':self.mismatch_rate,
'error_rate' : self.error_rate,
'sj_shift':self.sj_shift,
'remove_noncanonical':self.remove_noncanonical,
'remove_noncanonical':not self.allow_noncanonical,
'labels_are_trimmed':not self.untrimmed,
'quality_filter':True,
'reference':self.reference,
Expand All @@ -114,7 +114,6 @@ def __init__(self, args):
self.config_dict['max_headclip'] = 10
self.config_dict['quality_filter'] = False
self.config_dict['error_rate'] = 0.2
self.config_dict['remove_noncanonical'] = True
self.config_dict['remove_gapped_termini'] = True
if self.untrimmed:
self.config_dict['max_headclip'] = 120
Expand All @@ -129,12 +128,10 @@ def __init__(self, args):
self.config_dict['s_tag'] = True
self.config_dict['e_tag'] = True
self.config_dict['quality_filter'] = False
self.config_dict['remove_noncanonical'] = True
self.config_dict['remove_gapped_termini'] = True
elif self.data_type.upper() in ['ONT-RNA','ONT_RNA','DIRECT_RNA', 'DIRECT-RNA']:
"""Reads are from Oxford Nanopore direct RNA kit, downstream of basecalling."""
self.config_dict['stranded'] = True
self.config_dict['remove_noncanonical'] = True
self.config_dict['remove_gapped_termini'] = True
self.config_dict['labels_are_trimmed'] = False
self.config_dict['quality_filter'] = False
Expand Down Expand Up @@ -250,15 +247,15 @@ def display_options(self):
options_string += " *** Filters ***\n"
options_string += " --record_artifacts: {}\n".format(self.record_artifacts)
options_string += " --mismatch_rate: {}\n".format(self.mismatch_rate)
options_string += " --remove_noncanonical: {}\n".format(self.remove_noncanonical)
options_string += " --allow_noncanonical: {}\n".format(self.allow_noncanonical)
options_string += " Perfect minlen (--minlen_strict): {}\n".format(self.minlen_strict)
options_string += " Relaxed minlen (--minlen_loose): {}\n".format(self.minlen_loose)
options_string += " Secondary alignments (--secondary): {}\n".format(self.secondary)

if not self.genome:
options_string += "\nWARNING: cap detection and artifact masking can only be done if a reference genome is provided."
options_string += "\nProvide a genome fasta with --genome /path/to/fasta"
if self.remove_noncanonical:
if self.allow_noncanonical:
options_string += "\nWARNING: noncanonical splice junctions can only be detected if --genome is provided."

if not self.splice and not self.reference and self.sj_shift:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def run(self):
bam_to_elr_parser.add_argument("--record_artifacts", dest='RECORD_ARTIFACTS', default=False, action='store_true', help="Reports artifact-masked S/E labels as >/].")
bam_to_elr_parser.add_argument("--split", dest='SPLIT', default=False, action='store_true', help="Separate reads into different files by their multimapping number.")
bam_to_elr_parser.add_argument("--untrimmed", dest='UNTRIMMED', default=False, action='store_true', help="(overrides -s -c -e) End labels were not trimmed from input reads prior to alignment.")
bam_to_elr_parser.add_argument("--remove_noncanonical", dest='REMOVE_NONCANONICAL', default=False, action='store_true', help="Require canonical splice junction motifs (GT/AG, GC/AG, AT/AC).")
bam_to_elr_parser.add_argument("--allow_noncanonical", dest='ALLOW_NONCANONICAL', default=False, action='store_true', help="Do not require canonical splice junction motifs (GT/AG, GC/AG, AT/AC).")
bam_to_elr_parser.add_argument("--sj_shift", dest='SJ_SHIFT', default=0, type=int, help="Shift up to this many bases to find a canonical splice junction")
bam_to_elr_parser.add_argument("--minlen_strict", dest='MINLEN_STRICT', default=18, type=int, help="Keep reads down to this length only if perfectly aligned.")
bam_to_elr_parser.add_argument("--minlen_loose", dest='MINLEN_LOOSE', default=25, type=int, help="Keep reads down to this length if they passed alignment parameters.")
Expand Down
11 changes: 4 additions & 7 deletions build/lib.linux-x86_64-3.7/bookend/core/bam_to_elr.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(self, args):
self.input = args['INPUT']
self.error_rate = args['ERROR_RATE']
self.max_intron = args['MAX_INTRON']
self.remove_noncanonical = args['REMOVE_NONCANONICAL']
self.allow_noncanonical = args['ALLOW_NONCANONICAL']
if self.start or self.end or self.capped:
self.stranded = True

Expand Down Expand Up @@ -92,7 +92,7 @@ def __init__(self, args):
'mismatch_rate':self.mismatch_rate,
'error_rate' : self.error_rate,
'sj_shift':self.sj_shift,
'remove_noncanonical':self.remove_noncanonical,
'remove_noncanonical':not self.allow_noncanonical,
'labels_are_trimmed':not self.untrimmed,
'quality_filter':True,
'reference':self.reference,
Expand All @@ -114,7 +114,6 @@ def __init__(self, args):
self.config_dict['max_headclip'] = 10
self.config_dict['quality_filter'] = False
self.config_dict['error_rate'] = 0.2
self.config_dict['remove_noncanonical'] = True
self.config_dict['remove_gapped_termini'] = True
if self.untrimmed:
self.config_dict['max_headclip'] = 120
Expand All @@ -129,12 +128,10 @@ def __init__(self, args):
self.config_dict['s_tag'] = True
self.config_dict['e_tag'] = True
self.config_dict['quality_filter'] = False
self.config_dict['remove_noncanonical'] = True
self.config_dict['remove_gapped_termini'] = True
elif self.data_type.upper() in ['ONT-RNA','ONT_RNA','DIRECT_RNA', 'DIRECT-RNA']:
"""Reads are from Oxford Nanopore direct RNA kit, downstream of basecalling."""
self.config_dict['stranded'] = True
self.config_dict['remove_noncanonical'] = True
self.config_dict['remove_gapped_termini'] = True
self.config_dict['labels_are_trimmed'] = False
self.config_dict['quality_filter'] = False
Expand Down Expand Up @@ -250,15 +247,15 @@ def display_options(self):
options_string += " *** Filters ***\n"
options_string += " --record_artifacts: {}\n".format(self.record_artifacts)
options_string += " --mismatch_rate: {}\n".format(self.mismatch_rate)
options_string += " --remove_noncanonical: {}\n".format(self.remove_noncanonical)
options_string += " --allow_noncanonical: {}\n".format(self.allow_noncanonical)
options_string += " Perfect minlen (--minlen_strict): {}\n".format(self.minlen_strict)
options_string += " Relaxed minlen (--minlen_loose): {}\n".format(self.minlen_loose)
options_string += " Secondary alignments (--secondary): {}\n".format(self.secondary)

if not self.genome:
options_string += "\nWARNING: cap detection and artifact masking can only be done if a reference genome is provided."
options_string += "\nProvide a genome fasta with --genome /path/to/fasta"
if self.remove_noncanonical:
if self.allow_noncanonical:
options_string += "\nWARNING: noncanonical splice junctions can only be detected if --genome is provided."

if not self.splice and not self.reference and self.sj_shift:
Expand Down
Binary file modified dist/bookend_rna-1.2.1-py3.7-linux-x86_64.egg
Binary file not shown.

0 comments on commit c8742cc

Please sign in to comment.