Skip to content

Commit

Permalink
Merge 'Cache LIKE regexes' from RJ Barman
Browse files Browse the repository at this point in the history
This PR adds a regex cache to `ProgramState` so that we ca re-use
already constructed regexes while processing LIKE expressions. I didn't
find anywhere else that seemed like a good fit to put an execution-time
only cache like this, so let me know if there's a better spot.
To best match sqlite, I added the constant mask into the `Function`
instruction (this indicates whether the first argument to the function
was determined to be constant at compile time), and decide whether to
use the cache based on its value. I've left the value for
`constant_mask` as 0 on every other kind of `Function` instruction. That
seemed to be the safest choice, as that appears to be what has been
implicitly done up to this point. Happy to change that if you'd advise
otherwise.

Fixes #168
Closes #320
  • Loading branch information
penberg committed Sep 12, 2024
2 parents c3a57d4 + e67f1e9 commit b7926df
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 20 deletions.
19 changes: 19 additions & 0 deletions core/translate/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,9 @@ pub fn translate_condition_expr(
cursor_hint,
)?;
program.emit_insn(Insn::Function {
// Only constant patterns for LIKE are supported currently, so this
// is always 1
constant_mask: 1,
func: crate::vdbe::Func::Scalar(ScalarFunc::Like),
start_reg: pattern_reg,
dest: cur_reg,
Expand Down Expand Up @@ -655,6 +658,7 @@ pub fn translate_expr(
let regs = program.alloc_register();
translate_expr(program, referenced_tables, &args[0], regs, cursor_hint)?;
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: regs,
dest: target_register,
func: crate::vdbe::Func::Json(j),
Expand All @@ -673,6 +677,7 @@ pub fn translate_expr(
}

program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
Expand Down Expand Up @@ -734,6 +739,7 @@ pub fn translate_expr(
translate_expr(program, referenced_tables, arg, reg, cursor_hint)?;
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
Expand Down Expand Up @@ -811,6 +817,9 @@ pub fn translate_expr(
}
}
program.emit_insn(Insn::Function {
// Only constant patterns for LIKE are supported currently, so this
// is always 1
constant_mask: 1,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
Expand Down Expand Up @@ -847,6 +856,7 @@ pub fn translate_expr(
cursor_hint,
)?;
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: regs,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
Expand All @@ -862,6 +872,7 @@ pub fn translate_expr(
}
let regs = program.alloc_register();
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: regs,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
Expand All @@ -883,6 +894,7 @@ pub fn translate_expr(
}
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(ScalarFunc::Date),
Expand Down Expand Up @@ -934,6 +946,7 @@ pub fn translate_expr(
}

program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: str_reg,
dest: target_register,
func: crate::vdbe::Func::Scalar(ScalarFunc::Substring),
Expand All @@ -958,6 +971,7 @@ pub fn translate_expr(
}
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
Expand All @@ -979,6 +993,7 @@ pub fn translate_expr(
}
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(ScalarFunc::Time),
Expand Down Expand Up @@ -1012,6 +1027,7 @@ pub fn translate_expr(
}
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
Expand Down Expand Up @@ -1045,6 +1061,7 @@ pub fn translate_expr(
}

program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(ScalarFunc::Min),
Expand Down Expand Up @@ -1078,6 +1095,7 @@ pub fn translate_expr(
}

program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: target_register + 1,
dest: target_register,
func: crate::vdbe::Func::Scalar(ScalarFunc::Max),
Expand Down Expand Up @@ -1114,6 +1132,7 @@ pub fn translate_expr(
cursor_hint,
)?;
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: func_reg,
dest: target_register,
func: crate::vdbe::Func::Scalar(srf),
Expand Down
3 changes: 2 additions & 1 deletion core/vdbe/explain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -533,12 +533,13 @@ pub fn insn_to_str(
"".to_string(),
),
Insn::Function {
constant_mask,
start_reg,
dest,
func,
} => (
"Function",
1,
*constant_mask,
*start_reg as i32,
*dest as i32,
OwnedValue::Text(Rc::new(func.to_string())),
Expand Down
77 changes: 59 additions & 18 deletions core/vdbe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,10 +302,10 @@ pub enum Insn {

// Function
Function {
// constant_mask: i32, // P1, not used for now
start_reg: usize, // P2, start of argument registers
dest: usize, // P3
func: Func, // P4
constant_mask: i32, // P1
start_reg: usize, // P2, start of argument registers
dest: usize, // P3
func: Func, // P4
},

InitCoroutine {
Expand Down Expand Up @@ -383,6 +383,7 @@ pub struct ProgramState {
cursors: RefCell<BTreeMap<CursorID, Box<dyn Cursor>>>,
registers: Vec<OwnedValue>,
ended_coroutine: bool, // flag to notify yield coroutine finished
regex_cache: HashMap<String, Regex>,
}

impl ProgramState {
Expand All @@ -395,6 +396,7 @@ impl ProgramState {
cursors,
registers,
ended_coroutine: false,
regex_cache: HashMap::new(),
}
}

Expand Down Expand Up @@ -1173,6 +1175,7 @@ impl Program {
}
}
Insn::Function {
constant_mask,
func,
start_reg,
dest,
Expand Down Expand Up @@ -1208,11 +1211,16 @@ impl Program {
start_reg,
state.registers.len()
);
let pattern = state.registers[start_reg].clone();
let text = state.registers[start_reg + 1].clone();
let pattern = &state.registers[start_reg];
let text = &state.registers[start_reg + 1];
let result = match (pattern, text) {
(OwnedValue::Text(pattern), OwnedValue::Text(text)) => {
OwnedValue::Integer(exec_like(&pattern, &text) as i64)
let cache = if *constant_mask > 0 {
Some(&mut state.regex_cache)
} else {
None
};
OwnedValue::Integer(exec_like(cache, pattern, text) as i64)
}
_ => {
unreachable!("Like on non-text registers");
Expand Down Expand Up @@ -1699,10 +1707,26 @@ fn exec_char(values: Vec<OwnedValue>) -> OwnedValue {
OwnedValue::Text(Rc::new(result))
}

// Implements LIKE pattern matching.
fn exec_like(pattern: &str, text: &str) -> bool {
let re = Regex::new(&pattern.replace('%', ".*").replace('_', ".").to_string()).unwrap();
re.is_match(text)
fn construct_like_regex(pattern: &str) -> Regex {
Regex::new(&pattern.replace('%', ".*").replace('_', ".").to_string()).unwrap()
}

// Implements LIKE pattern matching. Caches the constructed regex if a cache is provided
fn exec_like(regex_cache: Option<&mut HashMap<String, Regex>>, pattern: &str, text: &str) -> bool {
if let Some(cache) = regex_cache {
match cache.get(pattern) {
Some(re) => re.is_match(text),
None => {
let re = construct_like_regex(pattern);
let res = re.is_match(text);
cache.insert(pattern.to_string(), re);
res
}
}
} else {
let re = construct_like_regex(pattern);
re.is_match(text)
}
}

fn exec_minmax<'a>(
Expand Down Expand Up @@ -1876,7 +1900,7 @@ mod tests {
};
use mockall::{mock, predicate};
use rand::{rngs::mock::StepRng, thread_rng};
use std::{cell::Ref, rc::Rc};
use std::{cell::Ref, collections::HashMap, rc::Rc};

mock! {
Cursor {
Expand Down Expand Up @@ -2224,12 +2248,29 @@ mod tests {
}

#[test]
fn test_like() {
assert!(exec_like("a%", "aaaa"));
assert!(exec_like("%a%a", "aaaa"));
assert!(exec_like("%a.a", "aaaa"));
assert!(exec_like("a.a%", "aaaa"));
assert!(!exec_like("%a.ab", "aaaa"));
fn test_like_no_cache() {
assert!(exec_like(None, "a%", "aaaa"));
assert!(exec_like(None, "%a%a", "aaaa"));
assert!(exec_like(None, "%a.a", "aaaa"));
assert!(exec_like(None, "a.a%", "aaaa"));
assert!(!exec_like(None, "%a.ab", "aaaa"));
}

#[test]
fn test_like_with_cache() {
let mut cache = HashMap::new();
assert!(exec_like(Some(&mut cache), "a%", "aaaa"));
assert!(exec_like(Some(&mut cache), "%a%a", "aaaa"));
assert!(exec_like(Some(&mut cache), "%a.a", "aaaa"));
assert!(exec_like(Some(&mut cache), "a.a%", "aaaa"));
assert!(!exec_like(Some(&mut cache), "%a.ab", "aaaa"));

// again after values have been cached
assert!(exec_like(Some(&mut cache), "a%", "aaaa"));
assert!(exec_like(Some(&mut cache), "%a%a", "aaaa"));
assert!(exec_like(Some(&mut cache), "%a.a", "aaaa"));
assert!(exec_like(Some(&mut cache), "a.a%", "aaaa"));
assert!(!exec_like(Some(&mut cache), "%a.ab", "aaaa"));
}

#[test]
Expand Down
2 changes: 1 addition & 1 deletion sqlite3/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -883,7 +883,7 @@ fn sqlite3_errstr_impl(rc: i32) -> *const std::ffi::c_char {
"datatype mismatch", // SQLITE_MISMATCH
"bad parameter or other API misuse", // SQLITE_MISUSE
#[cfg(feature = "lfs")]
"", // SQLITE_NOLFS
"", // SQLITE_NOLFS
#[cfg(not(feature = "lfs"))]
"large file support is disabled", // SQLITE_NOLFS
"authorization denied", // SQLITE_AUTH
Expand Down

0 comments on commit b7926df

Please sign in to comment.