Finish Release-1.0.0_beta4

Runtime improvement to rule-based filtering. Modified rules for selecting filtering input sets. Bug fixes for junctools
EI-CoreBioinformatics · Nov 16, 2016 · 83ec874 · 83ec874
2 parents a46cbde + 8b45672
commit 83ec874
Show file tree

Hide file tree

Showing 22 changed files with 264 additions and 345 deletions.
diff --git a/configure.ac b/configure.ac
@@ -4,7 +4,7 @@
 
 # Autoconf setup
 AC_PREREQ([2.68])
-AC_INIT([portcullis],[1.0.0_beta3],[daniel.mapleson@earlham.ac.uk],[portcullis],[http://www.earlham.ac.uk])
+AC_INIT([portcullis],[1.0.0_beta4],[daniel.mapleson@earlham.ac.uk],[portcullis],[http://www.earlham.ac.uk])
 AC_CONFIG_SRCDIR([src/portcullis.cc])
 AC_CONFIG_AUX_DIR([build-aux])
 AC_CONFIG_MACRO_DIR([m4])

diff --git a/data/selftrain_initial_neg.layer1.json b/data/selftrain_initial_neg.layer1.json
@@ -1,93 +1,16 @@
 {
 	"parameters": {
-                "M1-canonical_ss": {
-			"operator": "in",
-			"value": ["N", "S"]
-		},
-                "M2-nb_reads": {
-                        "operator": "lte",
-                        "value": 1
-                },                
-		"M3-nb_dist_aln": {
-			"operator": "gte",
-			"value": 2
-		},
-		"M4-nb_rel_aln": {
-                        "operator": "eq",
-                        "value": 0
-                },
-                "M11-entropy": {
-			"operator": "lt",
-			"value": 1.0
-		},
-                "M11-entropy.2": {
-			"operator": "gt",
-			"value": 2.0
-		},
-                "M11-entropy.3": {
-			"operator": "eq",
-			"value": 0.0
-		},
                 "M12-maxmmes": {
                         "operator": "lt",
-                        "value": 7
-                },
-                "M12-maxmmes.2": {
-                        "operator": "lt",
-                        "value": 10
-                },
-                "M12-maxmmes.3": {
-                        "operator": "lt",
-                        "value": 20
-                },
-                "M8-max_min_anc": {
-                        "operator": "lt",
-                        "value": 16
-                },
-		"Suspect": {
-			"operator": "eq",
-			"value": 1
-		},
-                "PFP": {
-			"operator": "eq",
-			"value": 1
-		},
-                "M13-hamming5p": {
-                        "operator": "lte",
-                        "value": 2
+                        "value": 15
                 },
-                "M14-hamming3p": {
-                        "operator": "lte",
-                        "value": 2
-                },
-                "M19-mean_mismatches": {
-			"operator": "gte",
-			"value": 5.0
-		},
-                "M19-mean_mismatches.2": {
-			"operator": "gte",
-			"value": 2.0
-		},
                 "M20-nb_usrs": {
-                        "operator": "eq",
-                        "value": 0
-                },
-                "M20-nb_usrs.2": {
-                        "operator": "eq",
-                        "value": 1
-                },
-                "M21-nb_msrs": {
-                        "operator": "gte",
+                        "operator": "lte",
                         "value": 1
                 },
                 "M22-rel2raw": {
-                        "operator": "lt",
-                        "value": 0.9
-                },
-                "M22-rel2raw.2": {
                         "operator": "eq",
-                        "value": 0.0
+                        "value": 0
                 }
 	},
-	"expression": "( M20-nb_usrs & M21-nb_msrs & M12-maxmmes.2 & M22-rel2raw.2 )"  
-}
+	"expression": "( M12-maxmmes & M20-nb_usrs & M22-rel2raw )" }
diff --git a/data/selftrain_initial_neg.layer2.json b/data/selftrain_initial_neg.layer2.json
@@ -4,90 +4,22 @@
 			"operator": "in",
 			"value": ["N", "S"]
 		},
-                "M2-nb_reads": {
-                        "operator": "lte",
-                        "value": 1
-                },                
-		"M3-nb_dist_aln": {
-			"operator": "gte",
-			"value": 2
-		},
-		"M4-nb_rel_aln": {
-                        "operator": "eq",
-                        "value": 0
-                },
-                "M11-entropy": {
-			"operator": "lt",
-			"value": 1.0
-		},
-                "M11-entropy.2": {
-			"operator": "gt",
-			"value": 2.0
-		},
-                "M11-entropy.3": {
-			"operator": "eq",
-			"value": 0.0
-		},
+	    "M22-rel2raw": {
+		"operator": "lt",
+		"value": 0.5
+	    },
+	    "M19-mean_mismatches": {
+		"operator": "gte",
+		"value": 1
+	    },
                 "M12-maxmmes": {
                         "operator": "lt",
-                        "value": 7
-                },
-                "M12-maxmmes.2": {
-                        "operator": "lt",
-                        "value": 10
-                },
-                "M12-maxmmes.3": {
-                        "operator": "lt",
-                        "value": 20
-                },
-                "M8-max_min_anc": {
-                        "operator": "lt",
-                        "value": 16
-                },
-		"Suspect": {
-			"operator": "eq",
-			"value": 1
-		},
-                "PFP": {
-			"operator": "eq",
-			"value": 1
-		},
-                "M13-hamming5p": {
-                        "operator": "lte",
-                        "value": 2
+                        "value": 15
                 },
-                "M14-hamming3p": {
-                        "operator": "lte",
-                        "value": 2
-                },
-                "M19-mean_mismatches": {
-			"operator": "gte",
-			"value": 5.0
-		},
-                "M19-mean_mismatches.2": {
-			"operator": "gte",
-			"value": 2.0
-		},
                 "M20-nb_usrs": {
-                        "operator": "eq",
-                        "value": 0
-                },
-                "M20-nb_usrs.2": {
-                        "operator": "eq",
-                        "value": 1
-                },
-                "M21-nb_msrs": {
-                        "operator": "gte",
+                        "operator": "lte",
                         "value": 1
-                },
-                "M22-rel2raw": {
-                        "operator": "lt",
-                        "value": 0.9
-                },
-                "M22-rel2raw.2": {
-                        "operator": "eq",
-                        "value": 0.0
                 }
 	},
-	"expression": "( M20-nb_usrs.2 & M1-canonical_ss & M12-maxmmes.2 )"  
+	"expression": "( M20-nb_usrs & M12-maxmmes & M22-rel2raw & (M1-canonical_ss | M19-mean_mismatches ) )"  
 }
diff --git a/data/selftrain_initial_neg.layer3.json b/data/selftrain_initial_neg.layer3.json
@@ -4,14 +4,10 @@
 			"operator": "in",
 			"value": ["N", "S"]
 		},
-                "M11-entropy": {
-			"operator": "gt",
-			"value": 3.0
-		},
-                "Suspect": {
+                "PFP": {
 			"operator": "eq",
 			"value": 1
 		}
 	},
-	"expression": "( M1-canonical_ss & Suspect & M11-entropy )"  
+	"expression": "( M1-canonical_ss & PFP )"  
 }
diff --git a/data/selftrain_initial_neg.layer4.json b/data/selftrain_initial_neg.layer4.json
@@ -2,11 +2,11 @@
 	"parameters": {
                 "M12-maxmmes": {
                         "operator": "lt",
-                        "value": 7
+                        "value": 15
                 },
                 "M22-rel2raw": {
                         "operator": "lt",
-                        "value": 0.5
+                        "value": 0.3
                 }
 	},
 	"expression": "( M12-maxmmes & M22-rel2raw )"

diff --git a/data/selftrain_initial_neg.layer5.json b/data/selftrain_initial_neg.layer5.json
@@ -1,13 +1,20 @@
 {
 	"parameters": {
-                "M1-canonical_ss": {
-			"operator": "in",
-			"value": ["N"]
-		},
-                "M22-rel2raw": {
-                        "operator": "eq",
-                        "value": 0.0
-                }
+	    "M4-nb_rel_aln": {
+		"operator": "lt",
+		"value": 1
+	    },
+	    "M11-entropy": {
+		"operator": "eq",
+		"value": 0},
+	    "M17-primary_junc":
+	    { "operator": "eq",
+	      "value": 0
+	    },
+	    "Suspect": {
+		"operator": "eq",
+		"value": 1
+	    }
 	},
-	"expression": "( M1-canonical_ss & M22-rel2raw )"  
+	"expression": "( M4-nb_rel_aln & M11-entropy & M17-primary_junc & Suspect )"  
 }
diff --git a/data/selftrain_initial_neg.layer6.json b/data/selftrain_initial_neg.layer6.json
diff --git a/data/selftrain_initial_neg.layer7.json b/data/selftrain_initial_neg.layer7.json
@@ -1,9 +1,5 @@
 {
 	"parameters": {
-                "M2-nb_reads": {
-                        "operator": "gt",
-                        "value": 100
-                },
                 "M22-rel2raw": {
                         "operator": "eq",
                         "value": 0.0
@@ -17,5 +13,5 @@
                         "value": 3
                 }
 	},
-	"expression": "( M2-nb_reads & M22-rel2raw & M13-hamming5p & M14-hamming3p )"  
+	"expression": "( M22-rel2raw & M13-hamming5p & M14-hamming3p )"  
 }
diff --git a/data/selftrain_initial_pos.layer1.json b/data/selftrain_initial_pos.layer1.json
diff --git a/data/selftrain_initial_pos.layer2.json b/data/selftrain_initial_pos.layer2.json
diff --git a/data/selftrain_initial_pos.layer3.json b/data/selftrain_initial_pos.layer3.json
@@ -71,7 +71,11 @@
         "M22-rel2raw.2": {
             "operator": "gte",
             "value": 0.75
-        }
+        },
+	"M17-primary_junc": {
+	    "operator": "eq",
+	    "value": 1
+	}
     },
-    "expression": "( M1-canonical_ss ) | ( M1-canonical_ss.2 & M22-rel2raw & M13-hamming5p & M14-hamming3p ) | ( M1-canonical_ss.3 & M22-rel2raw.2 & M13-hamming5p.2 & M14-hamming3p.2 & M19-mean_mismatches & M11-entropy.2 )"
+    "expression": "(( M1-canonical_ss ) | ( M1-canonical_ss.2 & M22-rel2raw & M13-hamming5p & M14-hamming3p ) | ( M1-canonical_ss.3 & M22-rel2raw.2 & M13-hamming5p.2 & M14-hamming3p.2 & M19-mean_mismatches & M11-entropy.2 )) & (M17-primary_junc)"
 }
diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -53,9 +53,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '1.0.0_beta3'
+version = '1.0.0_beta4'
 # The full version, including alpha/beta/rc tags.
-release = '1.0.0_beta3'
+release = '1.0.0_beta4'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

diff --git a/lib/include/portcullis/rule_parser.hpp b/lib/include/portcullis/rule_parser.hpp
@@ -139,19 +139,28 @@ enum Operator {
     NOT_IN
 };
 
+const std::map<string, Operator> String2OperatorMap = {
+    {"EQ", Operator::EQ},
+    {"GT", Operator::GT},
+    {"LT", Operator::LT},
+    {"GTE", Operator::GTE},
+    {"LTE", Operator::LTE},
+    {"IN", Operator::IN},
+    {"NOT_IN", Operator::NOT_IN}
+};
+
 typedef unordered_map<string, pair<Operator, double>> NumericFilterMap;
 typedef unordered_map<string, pair<Operator, unordered_set<string>>> SetFilterMap;
 typedef map<Intron, vector<string>, IntronComparator> JuncResultMap;
 
-Operator stringToOp(const string& str);
 
 string opToString(const Operator op);
 
 bool isNumericOp(Operator op);
 
 struct eval : boost::static_visitor<bool> {
 
-    eval(const NumericFilterMap& _numericmap, const SetFilterMap& _stringmap, const JunctionPtr _junc, JuncResultMap* _juncMap);
+    eval(const NumericFilterMap& _numericmap, const SetFilterMap& _stringmap, const unordered_map<string, uint16_t>& _namemap, const JunctionPtr _junc, JuncResultMap* _juncMap);
 
     //
     bool operator()(const var& v) const;
@@ -182,6 +191,7 @@ struct eval : boost::static_visitor<bool> {
 
     NumericFilterMap numericmap;
     SetFilterMap stringmap;
+    unordered_map<string, uint16_t> namemap;
     JunctionPtr junc;
     JuncResultMap* juncMap;
 
@@ -227,11 +237,14 @@ class RuleFilter {
      * @param param Value
      * @return True if parameter passes operation and threshold, false otherwise
      */
-    static bool parse(const string& expression, JunctionPtr junc, NumericFilterMap& numericFilters, SetFilterMap& stringFilters, JuncResultMap* results);
+    static bool parse(const string& expression, JunctionPtr junc, 
+            NumericFilterMap& numericFilters, SetFilterMap& stringFilters, 
+            const unordered_map<string, uint16_t>& namemap, JuncResultMap* results);
 
 public:
 
-    static map<string,int> filter(const path& ruleFile, const JunctionList& all, JunctionList& pass, JunctionList& fail, const string& prefix, JuncResultMap& resultMap);
+    static map<string,int> filter(const path& ruleFile, const JunctionList& all, 
+            JunctionList& pass, JunctionList& fail, const string& prefix, JuncResultMap& resultMap);
 
     static void saveResults(const path& outputFile, const JunctionSystem& js, JuncResultMap& results);
 };