Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: Introduce TypeKey instead of String keys for named types #590

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@

# Changelog

## Unreleased

### Candid

* [BREAKING]: type representation was optimized to improve performance:
* In `Type::Var(var)` `var` now has type `TypeKey` instead of `String`. Calling `var.as_str()` returns `&str` and `var.to_string()` returns a `String`. The string representation of indexed variables remains `table{index}` to maintain compatibility with previous versions.
* `TypeEnv` now contains a `HashMap` instead of `BTreeMap`. Code that relied on the iteration order of the map (e.g. `env.0.iter()`) should make use of the newly added `TypeEnv::to_sorted_iter()` method which returns types sorted by their keys.

## 2025-01-15

### Candid 0.10.12
Expand Down
1 change: 1 addition & 0 deletions clippy.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ignore-interior-mutability = ["candid::types::type_key::TypeKey"]
2 changes: 2 additions & 0 deletions rust/candid/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ candid_derive = { path = "../candid_derive", version = "=0.6.6" }
ic_principal = { path = "../ic_principal", version = "0.1.0" }
binread = { version = "2.2", features = ["debug_template"] }
byteorder = "1.5.0"
foldhash = "0.1.3"
hashbrown = "0.15"
leb128 = "0.2.5"
paste = "1.0"
hex.workspace = true
Expand Down
13 changes: 5 additions & 8 deletions rust/candid/src/binary_parser.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::types::internal::{Field, Function, Label, Type, TypeInner};
use crate::types::internal::{Field, Function, Label, Type, TypeInner, TypeKey};
use crate::types::type_env::TypeMap;
use crate::types::{FuncMode, TypeEnv};
use anyhow::{anyhow, Context, Result};
use binread::io::{Read, Seek};
Expand Down Expand Up @@ -135,17 +136,14 @@ pub struct PrincipalBytes {
pub inner: Vec<u8>,
}

fn index_to_var(ind: i64) -> String {
format!("table{ind}")
}
impl IndexType {
fn to_type(&self, len: u64) -> Result<Type> {
Ok(match self.index {
v if v >= 0 => {
if v >= len as i64 {
return Err(anyhow!("type index {} out of range", v));
}
TypeInner::Var(index_to_var(v))
TypeInner::Var(TypeKey::indexed(v))
}
-1 => TypeInner::Null,
-2 => TypeInner::Bool,
Expand Down Expand Up @@ -233,13 +231,12 @@ impl ConsType {
}
impl Table {
fn to_env(&self, len: u64) -> Result<TypeEnv> {
use std::collections::BTreeMap;
let mut env = BTreeMap::new();
let mut env = TypeMap::default();
for (i, t) in self.table.iter().enumerate() {
let ty = t
.to_type(len)
.with_context(|| format!("Invalid table entry {i}: {t:?}"))?;
env.insert(index_to_var(i as i64), ty);
env.insert(TypeKey::indexed(i as i64), ty);
}
// validate method has func type
for t in env.values() {
Expand Down
2 changes: 1 addition & 1 deletion rust/candid/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ pub mod pretty;

// Candid hash function comes from
// https://caml.inria.fr/pub/papers/garrigue-polymorphic_variants-ml98.pdf
// Not public API. Only used by tests.
// Not public API.
// Remember to update the same function in candid_derive if you change this function.
#[doc(hidden)]
#[inline]
Expand Down
8 changes: 4 additions & 4 deletions rust/candid/src/pretty/candid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ pub fn pp_ty_inner(ty: &TypeInner) -> RcDoc {
Text => str("text"),
Reserved => str("reserved"),
Empty => str("empty"),
Var(ref s) => str(s),
Var(ref s) => str(s.as_str()),
Principal => str("principal"),
Opt(ref t) => kwd("opt").append(pp_ty(t)),
Vec(ref t) if matches!(t.as_ref(), Nat8) => str("blob"),
Expand All @@ -116,7 +116,7 @@ pub fn pp_ty_inner(ty: &TypeInner) -> RcDoc {
let doc = pp_args(args).append(" ->").append(RcDoc::space());
match t.as_ref() {
Service(ref serv) => doc.append(pp_service(serv)),
Var(ref s) => doc.append(s),
Var(ref s) => doc.append(s.as_str()),
_ => unreachable!(),
}
}
Expand Down Expand Up @@ -188,9 +188,9 @@ fn pp_service(serv: &[(String, Type)]) -> RcDoc {
}

fn pp_defs(env: &TypeEnv) -> RcDoc {
lines(env.0.iter().map(|(id, ty)| {
lines(env.to_sorted_iter().map(|(id, ty)| {
kwd("type")
.append(ident(id))
.append(ident(id.as_str()))
.append(kwd("="))
.append(pp_ty(ty))
.append(";")
Expand Down
33 changes: 20 additions & 13 deletions rust/candid/src/types/internal.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
use super::CandidType;
use crate::idl_hash;
use crate::types::type_env::TypeMap;
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::fmt;
use std::fmt::Debug;
use std::hash::Hash;

pub use crate::types::type_key::TypeKey;

// This is a re-implementation of std::any::TypeId to get rid of 'static constraint.
// The current TypeId doesn't consider lifetime while computing the hash, which is
Expand All @@ -24,7 +29,7 @@ impl TypeId {
impl std::fmt::Display for TypeId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let name = NAME.with(|n| n.borrow_mut().get(self));
write!(f, "{name}")
f.write_str(&name)
}
}
pub fn type_of<T>(_: &T) -> TypeId {
Expand Down Expand Up @@ -113,8 +118,9 @@ impl TypeContainer {
}
let id = ID.with(|n| n.borrow().get(t).cloned());
if let Some(id) = id {
self.env.0.insert(id.to_string(), res);
TypeInner::Var(id.to_string())
let type_key = TypeKey::from(id.to_string());
self.env.0.insert(type_key.clone(), res);
TypeInner::Var(type_key)
} else {
// if the type is part of an enum, the id won't be recorded.
// we want to inline the type in this case.
Expand All @@ -133,17 +139,18 @@ impl TypeContainer {
.into();
let id = ID.with(|n| n.borrow().get(t).cloned());
if let Some(id) = id {
self.env.0.insert(id.to_string(), res);
TypeInner::Var(id.to_string())
let type_key = TypeKey::from(id.to_string());
self.env.0.insert(type_key.clone(), res);
TypeInner::Var(type_key)
} else {
return res;
}
}
TypeInner::Knot(id) => {
let name = id.to_string();
let ty = ENV.with(|e| e.borrow().get(id).unwrap().clone());
self.env.0.insert(id.to_string(), ty);
TypeInner::Var(name)
let type_key = TypeKey::from(id.to_string());
self.env.0.insert(type_key.clone(), ty);
TypeInner::Var(type_key)
}
TypeInner::Func(func) => TypeInner::Func(Function {
modes: func.modes.clone(),
Expand Down Expand Up @@ -187,7 +194,7 @@ pub enum TypeInner {
Reserved,
Empty,
Knot(TypeId), // For recursive types from Rust
Var(String), // For variables from Candid file
Var(TypeKey), // For variables from Candid file
Unknown,
Opt(Type),
Vec(Type),
Expand Down Expand Up @@ -248,12 +255,12 @@ impl Type {
pub fn is_blob(&self, env: &crate::TypeEnv) -> bool {
self.as_ref().is_blob(env)
}
pub fn subst(&self, tau: &std::collections::BTreeMap<String, String>) -> Self {
pub fn subst(&self, tau: &TypeMap<TypeKey>) -> Self {
use TypeInner::*;
match self.as_ref() {
Var(id) => match tau.get(id) {
None => Var(id.to_string()),
Some(new_id) => Var(new_id.to_string()),
None => Var(id.clone()),
Some(new_id) => Var(new_id.clone()),
},
Opt(t) => Opt(t.subst(tau)),
Vec(t) => Vec(t.subst(tau)),
Expand Down Expand Up @@ -330,7 +337,7 @@ pub fn text_size(t: &Type, limit: i32) -> Result<i32, ()> {
Reserved => 8,
Principal => 9,
Knot(_) => 10,
Var(id) => id.len() as i32,
Var(id) => id.as_str().len() as i32,
Opt(t) => 4 + text_size(t, limit - 4)?,
Vec(t) => 4 + text_size(t, limit - 4)?,
Record(fs) | Variant(fs) => {
Expand Down
1 change: 1 addition & 0 deletions rust/candid/src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ pub mod result;

pub mod arc;
pub mod rc;
mod type_key;

pub trait CandidType {
// memoized type derivation
Expand Down
54 changes: 35 additions & 19 deletions rust/candid/src/types/type_env.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,34 @@
use crate::types::internal::TypeKey;
use crate::types::{Function, Type, TypeInner};
use crate::{Error, Result};
use std::collections::BTreeMap;
use foldhash::fast::FixedState;
use hashbrown::HashMap;

pub type TypeMap<V> = HashMap<TypeKey, V, FixedState>;

#[derive(Debug, Clone, Default)]
pub struct TypeEnv(pub BTreeMap<String, Type>);
pub struct TypeEnv(pub TypeMap<Type>);

impl TypeEnv {
pub fn new() -> Self {
TypeEnv(BTreeMap::new())
TypeEnv(TypeMap::default())
}

pub fn merge<'a>(&'a mut self, env: &TypeEnv) -> Result<&'a mut Self> {
for (k, v) in &env.0 {
let entry = self.0.entry(k.to_string()).or_insert_with(|| v.clone());
for (k, v) in env.0.iter() {
let entry = self.0.entry(k.clone()).or_insert_with(|| v.clone());
if *entry != *v {
return Err(Error::msg("inconsistent binding"));
}
}
Ok(self)
}
pub fn merge_type(&mut self, env: TypeEnv, ty: Type) -> Type {
let tau: BTreeMap<String, String> = env
let tau: TypeMap<TypeKey> = env
.0
.keys()
.filter(|k| self.0.contains_key(*k))
.map(|k| (k.clone(), format!("{k}/1")))
.map(|k| (k.clone(), format!("{k}/1").into()))
.collect();
for (k, t) in env.0 {
let t = t.subst(&tau);
Expand All @@ -35,13 +40,14 @@ impl TypeEnv {
}
ty.subst(&tau)
}
pub fn find_type(&self, name: &str) -> Result<&Type> {
match self.0.get(name) {
None => Err(Error::msg(format!("Unbound type identifier {name}"))),
pub fn find_type(&self, key: &TypeKey) -> Result<&Type> {
match self.0.get(key) {
None => Err(Error::msg(format!("Unbound type identifier {key}"))),
Some(t) => Ok(t),
}
}
pub fn rec_find_type(&self, name: &str) -> Result<&Type> {

pub fn rec_find_type(&self, name: &TypeKey) -> Result<&Type> {
let t = self.find_type(name)?;
match t.as_ref() {
TypeInner::Var(id) => self.rec_find_type(id),
Expand Down Expand Up @@ -82,8 +88,8 @@ impl TypeEnv {
}
fn is_empty<'a>(
&'a self,
res: &mut BTreeMap<&'a str, Option<bool>>,
id: &'a str,
res: &mut HashMap<&'a TypeKey, Option<bool>, FixedState>,
id: &'a TypeKey,
) -> Result<bool> {
match res.get(id) {
None => {
Expand Down Expand Up @@ -116,24 +122,34 @@ impl TypeEnv {
}
}
pub fn replace_empty(&mut self) -> Result<()> {
let mut res = BTreeMap::new();
for name in self.0.keys() {
self.is_empty(&mut res, name)?;
let mut res = HashMap::default();
for key in self.0.keys() {
self.is_empty(&mut res, key)?;
}
let ids: Vec<_> = res
.iter()
.into_iter()
.filter(|(_, v)| matches!(v, Some(true)))
.map(|(id, _)| id.to_string())
.map(|(id, _)| id.to_owned())
.collect();
for id in ids {
self.0.insert(id, TypeInner::Empty.into());
}
Ok(())
}

/// Creates an iterator that iterates over the types in the order of keys.
///
/// The implementation collects elements into a temporary vector and sorts the vector.
pub fn to_sorted_iter(&self) -> impl Iterator<Item = (&TypeKey, &Type)> {
let mut vec: Vec<_> = self.0.iter().collect();
vec.sort_unstable_by_key(|elem| elem.0);
vec.into_iter()
}
}

impl std::fmt::Display for TypeEnv {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for (k, v) in &self.0 {
for (k, v) in self.to_sorted_iter() {
writeln!(f, "type {k} = {v}")?;
}
Ok(())
Expand Down
Loading
Loading