diff --git a/CHANGELOG.md b/CHANGELOG.md index b42cc4bf..ac6e980f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ - Fixed stack overflows when handling very deep `Tree` objects - Added a non-recursive `Drop` implementation - Rewrote `Context::import` to use the heap instead of stack +- Updated `Context::import` to cache the `TreeOp → Node` mapping, which is a + minor optimization (probably only relevant for unreasonably large `Tree` + objects) # 0.2.5 The highlight of this release is native Windows support (including JIT diff --git a/fidget/src/core/context/mod.rs b/fidget/src/core/context/mod.rs index 8b8826cf..56494c9a 100644 --- a/fidget/src/core/context/mod.rs +++ b/fidget/src/core/context/mod.rs @@ -24,9 +24,10 @@ pub use tree::{Tree, TreeOp}; use crate::Error; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashMap}; use std::fmt::Write; use std::io::{BufRead, BufReader, Read}; +use std::sync::Arc; use ordered_float::OrderedFloat; @@ -945,64 +946,114 @@ impl Context { // stack for actions, and a second stack for return values). enum Action<'a> { /// Pushes `Up(op)` followed by `Down(c)` for each child - Down(&'a TreeOp), + Down(&'a Arc), /// Consumes imported trees from the stack and pushes a new tree - Up(&'a TreeOp), + Up(&'a Arc), /// Pops the latest axis frame Pop, } let mut axes = vec![(self.x(), self.y(), self.z())]; - let mut todo = vec![Action::Down(tree)]; + let mut todo = vec![Action::Down(tree.arc())]; let mut stack = vec![]; + // Cache of TreeOp -> Node mapping under a particular frame (axes) + // + // This isn't required for correctness, but can be a speed optimization + // (because it means we don't have to walk the same tree twice). + let mut seen = HashMap::new(); + while let Some(t) = todo.pop() { match t { Action::Down(t) => { - todo.push(Action::Up(t)); - match t { - TreeOp::Const(..) | TreeOp::Input(..) => (), - TreeOp::Unary(_op, arg) => todo.push(Action::Down(arg)), + // If we've already seen this TreeOp with these axes, then + // we can return the previous Node. + if matches!( + t.as_ref(), + TreeOp::Unary(..) | TreeOp::Binary(..) + ) { + if let Some(p) = + seen.get(&(*axes.last().unwrap(), Arc::as_ptr(t))) + { + stack.push(*p); + continue; + } + } + match t.as_ref() { + TreeOp::Const(c) => { + stack.push(self.constant(*c)); + } + TreeOp::Input(s) => { + let axes = axes.last().unwrap(); + stack.push(match *s { + "X" => axes.0, + "Y" => axes.1, + "Z" => axes.2, + s => panic!("invalid tree input string {s:?}"), + }); + } + TreeOp::Unary(_op, arg) => { + todo.push(Action::Up(t)); + todo.push(Action::Down(arg)); + } TreeOp::Binary(_op, lhs, rhs) => { + todo.push(Action::Up(t)); todo.push(Action::Down(lhs)); todo.push(Action::Down(rhs)); } TreeOp::RemapAxes { target: _, x, y, z } => { // Action::Up(t) does the remapping and target eval + todo.push(Action::Up(t)); todo.push(Action::Down(x)); todo.push(Action::Down(y)); todo.push(Action::Down(z)); } } } - Action::Up(t) => match t { - TreeOp::Const(c) => stack.push(self.constant(*c)), - TreeOp::Input(s) => { - let axes = axes.last().unwrap(); - stack.push(match *s { - "X" => axes.0, - "Y" => axes.1, - "Z" => axes.2, - s => panic!("invalid tree input string {s:?}"), - }); - } - TreeOp::Unary(op, ..) => { - let arg = stack.pop().unwrap(); - stack.push(self.op_unary(arg, *op).unwrap()); - } - TreeOp::Binary(op, ..) => { - let lhs = stack.pop().unwrap(); - let rhs = stack.pop().unwrap(); - stack.push(self.op_binary(lhs, rhs, *op).unwrap()); + Action::Up(t) => { + match t.as_ref() { + TreeOp::Const(..) | TreeOp::Input(..) => unreachable!(), + TreeOp::Unary(op, ..) => { + let arg = stack.pop().unwrap(); + let out = self.op_unary(arg, *op).unwrap(); + stack.push(out); + } + TreeOp::Binary(op, ..) => { + let lhs = stack.pop().unwrap(); + let rhs = stack.pop().unwrap(); + let out = self.op_binary(lhs, rhs, *op).unwrap(); + if Arc::strong_count(t) > 1 { + seen.insert( + (*axes.last().unwrap(), Arc::as_ptr(t)), + out, + ); + } + stack.push(out); + } + TreeOp::RemapAxes { target, .. } => { + let x = stack.pop().unwrap(); + let y = stack.pop().unwrap(); + let z = stack.pop().unwrap(); + axes.push((x, y, z)); + todo.push(Action::Pop); + todo.push(Action::Down(target)); + } } - TreeOp::RemapAxes { target, .. } => { - let x = stack.pop().unwrap(); - let y = stack.pop().unwrap(); - let z = stack.pop().unwrap(); - axes.push((x, y, z)); - todo.push(Action::Pop); - todo.push(Action::Down(target)); + // Update the cache with the new tree, if relevant + // + // The `strong_count` check is a rough heuristic to avoid + // caching if there's only a single owner of the tree. This + // isn't perfect, but it doesn't need to be for correctness. + if matches!( + t.as_ref(), + TreeOp::Unary(..) | TreeOp::Binary(..) + ) && Arc::strong_count(t) > 1 + { + seen.insert( + (*axes.last().unwrap(), Arc::as_ptr(t)), + *stack.last().unwrap(), + ); } - }, + } Action::Pop => { axes.pop().unwrap(); } diff --git a/fidget/src/core/context/tree.rs b/fidget/src/core/context/tree.rs index af46f6ad..ef272b2b 100644 --- a/fidget/src/core/context/tree.rs +++ b/fidget/src/core/context/tree.rs @@ -129,6 +129,11 @@ impl Tree { Arc::as_ptr(&self.0) } + /// Borrow the inner `Arc` + pub(crate) fn arc(&self) -> &Arc { + &self.0 + } + /// Remaps the axes of the given tree /// /// The remapping is lazy; it is not evaluated until the tree is imported @@ -382,4 +387,56 @@ mod test { assert_eq!(ctx.eval_xyz(v_, 2.0, 2.0, 1.0).unwrap(), 17.0); assert_eq!(ctx.eval_xyz(v_, 2.0, 2.0, 2.0).unwrap(), 20.0); } + + #[test] + fn tree_import_cache() { + let mut x = Tree::x(); + for _ in 0..100_000 { + x += 1.0; + } + let mut ctx = Context::new(); + let start = std::time::Instant::now(); + ctx.import(&x); + let small = start.elapsed(); + + // Build a new tree with 4 copies of the original + let x = x.clone() * x.clone() * x.clone() * x; + let mut ctx = Context::new(); + let start = std::time::Instant::now(); + ctx.import(&x); + let large = start.elapsed(); + + assert!( + large.as_millis() < small.as_millis() * 2, + "tree import cache failed: {large:?} is much larger than {small:?}" + ); + } + + #[test] + fn tree_import_nocache() { + let mut x = Tree::x(); + for _ in 0..100_000 { + x += 1.0; + } + let mut ctx = Context::new(); + let start = std::time::Instant::now(); + ctx.import(&x); + let small = start.elapsed(); + + // Build a new tree with 4 remapped versions of the original + let x = x.remap_xyz(Tree::y(), Tree::z(), Tree::x()) + * x.remap_xyz(Tree::z(), Tree::x(), Tree::y()) + * x.remap_xyz(Tree::y(), Tree::x(), Tree::z()) + * x; + let mut ctx = Context::new(); + let start = std::time::Instant::now(); + ctx.import(&x); + let large = start.elapsed(); + + assert!( + large.as_millis() > small.as_millis() * 2, + "tree import cache failed: + {large:?} is not much larger than {small:?}" + ); + } }