default to discarding noncanonical junctions

Replaced --remove_noncanonical with --allow_noncanonical
Gregor-Mendel-Institute · Sep 25, 2023 · c8742cc · c8742cc
1 parent cd192ad
commit c8742cc
Show file tree

Hide file tree

Showing 5 changed files with 10 additions and 16 deletions.
diff --git a/bookend/core/argument_parsers.py b/bookend/core/argument_parsers.py
@@ -124,7 +124,7 @@ def run(self):
 bam_to_elr_parser.add_argument("--record_artifacts", dest='RECORD_ARTIFACTS', default=False, action='store_true', help="Reports artifact-masked S/E labels as >/].")
 bam_to_elr_parser.add_argument("--split", dest='SPLIT', default=False, action='store_true', help="Separate reads into different files by their multimapping number.")
 bam_to_elr_parser.add_argument("--untrimmed", dest='UNTRIMMED', default=False, action='store_true', help="(overrides -s -c -e) End labels were not trimmed from input reads prior to alignment.")
-bam_to_elr_parser.add_argument("--remove_noncanonical", dest='REMOVE_NONCANONICAL', default=False, action='store_true', help="Require canonical splice junction motifs (GT/AG, GC/AG, AT/AC).")
+bam_to_elr_parser.add_argument("--allow_noncanonical", dest='ALLOW_NONCANONICAL', default=False, action='store_true', help="Do not require canonical splice junction motifs (GT/AG, GC/AG, AT/AC).")
 bam_to_elr_parser.add_argument("--sj_shift", dest='SJ_SHIFT', default=0, type=int, help="Shift up to this many bases to find a canonical splice junction")
 bam_to_elr_parser.add_argument("--minlen_strict", dest='MINLEN_STRICT', default=18, type=int, help="Keep reads down to this length only if perfectly aligned.")
 bam_to_elr_parser.add_argument("--minlen_loose", dest='MINLEN_LOOSE', default=25, type=int, help="Keep reads down to this length if they passed alignment parameters.")

diff --git a/bookend/core/bam_to_elr.py b/bookend/core/bam_to_elr.py
@@ -43,7 +43,7 @@ def __init__(self, args):
         self.input = args['INPUT']
         self.error_rate = args['ERROR_RATE']
         self.max_intron = args['MAX_INTRON']
-        self.remove_noncanonical = args['REMOVE_NONCANONICAL']
+        self.allow_noncanonical = args['ALLOW_NONCANONICAL']
         if self.start or self.end or self.capped:
             self.stranded = True
 
@@ -92,7 +92,7 @@ def __init__(self, args):
             'mismatch_rate':self.mismatch_rate,
             'error_rate' : self.error_rate,
             'sj_shift':self.sj_shift,
-            'remove_noncanonical':self.remove_noncanonical,
+            'remove_noncanonical':not self.allow_noncanonical,
             'labels_are_trimmed':not self.untrimmed,
             'quality_filter':True,
             'reference':self.reference,
@@ -114,7 +114,6 @@ def __init__(self, args):
                 self.config_dict['max_headclip'] = 10
                 self.config_dict['quality_filter'] = False
                 self.config_dict['error_rate'] = 0.2
-                self.config_dict['remove_noncanonical'] = True
                 self.config_dict['remove_gapped_termini'] = True
                 if self.untrimmed:
                     self.config_dict['max_headclip'] = 120
@@ -129,12 +128,10 @@ def __init__(self, args):
                 self.config_dict['s_tag'] = True
                 self.config_dict['e_tag'] = True
                 self.config_dict['quality_filter'] = False
-                self.config_dict['remove_noncanonical'] = True
                 self.config_dict['remove_gapped_termini'] = True
             elif self.data_type.upper() in ['ONT-RNA','ONT_RNA','DIRECT_RNA', 'DIRECT-RNA']:
                 """Reads are from Oxford Nanopore direct RNA kit, downstream of basecalling."""
                 self.config_dict['stranded'] = True
-                self.config_dict['remove_noncanonical'] = True
                 self.config_dict['remove_gapped_termini'] = True
                 self.config_dict['labels_are_trimmed'] = False
                 self.config_dict['quality_filter'] = False
@@ -250,15 +247,15 @@ def display_options(self):
         options_string += "  *** Filters ***\n"
         options_string += "  --record_artifacts:                  {}\n".format(self.record_artifacts)
         options_string += "  --mismatch_rate:                     {}\n".format(self.mismatch_rate)
-        options_string += "  --remove_noncanonical:               {}\n".format(self.remove_noncanonical)
+        options_string += "  --allow_noncanonical:               {}\n".format(self.allow_noncanonical)
         options_string += "  Perfect minlen (--minlen_strict):    {}\n".format(self.minlen_strict)
         options_string += "  Relaxed minlen (--minlen_loose):     {}\n".format(self.minlen_loose)
         options_string += "  Secondary alignments (--secondary):  {}\n".format(self.secondary)
 
         if not self.genome:
             options_string += "\nWARNING: cap detection and artifact masking can only be done if a reference genome is provided."
             options_string += "\nProvide a genome fasta with --genome /path/to/fasta"
-            if self.remove_noncanonical:
+            if self.allow_noncanonical:
                 options_string += "\nWARNING: noncanonical splice junctions can only be detected if --genome is provided."
 
         if not self.splice and not self.reference and self.sj_shift:

diff --git a/build/lib.linux-x86_64-3.7/bookend/core/argument_parsers.py b/build/lib.linux-x86_64-3.7/bookend/core/argument_parsers.py
@@ -124,7 +124,7 @@ def run(self):
 bam_to_elr_parser.add_argument("--record_artifacts", dest='RECORD_ARTIFACTS', default=False, action='store_true', help="Reports artifact-masked S/E labels as >/].")
 bam_to_elr_parser.add_argument("--split", dest='SPLIT', default=False, action='store_true', help="Separate reads into different files by their multimapping number.")
 bam_to_elr_parser.add_argument("--untrimmed", dest='UNTRIMMED', default=False, action='store_true', help="(overrides -s -c -e) End labels were not trimmed from input reads prior to alignment.")
-bam_to_elr_parser.add_argument("--remove_noncanonical", dest='REMOVE_NONCANONICAL', default=False, action='store_true', help="Require canonical splice junction motifs (GT/AG, GC/AG, AT/AC).")
+bam_to_elr_parser.add_argument("--allow_noncanonical", dest='ALLOW_NONCANONICAL', default=False, action='store_true', help="Do not require canonical splice junction motifs (GT/AG, GC/AG, AT/AC).")
 bam_to_elr_parser.add_argument("--sj_shift", dest='SJ_SHIFT', default=0, type=int, help="Shift up to this many bases to find a canonical splice junction")
 bam_to_elr_parser.add_argument("--minlen_strict", dest='MINLEN_STRICT', default=18, type=int, help="Keep reads down to this length only if perfectly aligned.")
 bam_to_elr_parser.add_argument("--minlen_loose", dest='MINLEN_LOOSE', default=25, type=int, help="Keep reads down to this length if they passed alignment parameters.")

diff --git a/build/lib.linux-x86_64-3.7/bookend/core/bam_to_elr.py b/build/lib.linux-x86_64-3.7/bookend/core/bam_to_elr.py
@@ -43,7 +43,7 @@ def __init__(self, args):
         self.input = args['INPUT']
         self.error_rate = args['ERROR_RATE']
         self.max_intron = args['MAX_INTRON']
-        self.remove_noncanonical = args['REMOVE_NONCANONICAL']
+        self.allow_noncanonical = args['ALLOW_NONCANONICAL']
         if self.start or self.end or self.capped:
             self.stranded = True
 
@@ -92,7 +92,7 @@ def __init__(self, args):
             'mismatch_rate':self.mismatch_rate,
             'error_rate' : self.error_rate,
             'sj_shift':self.sj_shift,
-            'remove_noncanonical':self.remove_noncanonical,
+            'remove_noncanonical':not self.allow_noncanonical,
             'labels_are_trimmed':not self.untrimmed,
             'quality_filter':True,
             'reference':self.reference,
@@ -114,7 +114,6 @@ def __init__(self, args):
                 self.config_dict['max_headclip'] = 10
                 self.config_dict['quality_filter'] = False
                 self.config_dict['error_rate'] = 0.2
-                self.config_dict['remove_noncanonical'] = True
                 self.config_dict['remove_gapped_termini'] = True
                 if self.untrimmed:
                     self.config_dict['max_headclip'] = 120
@@ -129,12 +128,10 @@ def __init__(self, args):
                 self.config_dict['s_tag'] = True
                 self.config_dict['e_tag'] = True
                 self.config_dict['quality_filter'] = False
-                self.config_dict['remove_noncanonical'] = True
                 self.config_dict['remove_gapped_termini'] = True
             elif self.data_type.upper() in ['ONT-RNA','ONT_RNA','DIRECT_RNA', 'DIRECT-RNA']:
                 """Reads are from Oxford Nanopore direct RNA kit, downstream of basecalling."""
                 self.config_dict['stranded'] = True
-                self.config_dict['remove_noncanonical'] = True
                 self.config_dict['remove_gapped_termini'] = True
                 self.config_dict['labels_are_trimmed'] = False
                 self.config_dict['quality_filter'] = False
@@ -250,15 +247,15 @@ def display_options(self):
         options_string += "  *** Filters ***\n"
         options_string += "  --record_artifacts:                  {}\n".format(self.record_artifacts)
         options_string += "  --mismatch_rate:                     {}\n".format(self.mismatch_rate)
-        options_string += "  --remove_noncanonical:               {}\n".format(self.remove_noncanonical)
+        options_string += "  --allow_noncanonical:               {}\n".format(self.allow_noncanonical)
         options_string += "  Perfect minlen (--minlen_strict):    {}\n".format(self.minlen_strict)
         options_string += "  Relaxed minlen (--minlen_loose):     {}\n".format(self.minlen_loose)
         options_string += "  Secondary alignments (--secondary):  {}\n".format(self.secondary)
 
         if not self.genome:
             options_string += "\nWARNING: cap detection and artifact masking can only be done if a reference genome is provided."
             options_string += "\nProvide a genome fasta with --genome /path/to/fasta"
-            if self.remove_noncanonical:
+            if self.allow_noncanonical:
                 options_string += "\nWARNING: noncanonical splice junctions can only be detected if --genome is provided."
 
         if not self.splice and not self.reference and self.sj_shift:

diff --git a/dist/bookend_rna-1.2.1-py3.7-linux-x86_64.egg b/dist/bookend_rna-1.2.1-py3.7-linux-x86_64.egg