diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bfdbe9..b0bb24d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,3 +30,7 @@ - implemented `Tokenizer::seek` - renamed `Tokenizer::tokenize_all` to `Tokenizer::consume_all` + +## [12.1.0] - 2024-03-12 + +- replace `lazy_static` with `OnceLock`, removing a dependency diff --git a/Cargo.lock b/Cargo.lock index 5710378..80300a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -241,11 +241,10 @@ checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "jayce" -version = "12.0.0" +version = "12.1.0" dependencies = [ "bytecount", "criterion", - "lazy_static", "regex", ] @@ -258,12 +257,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - [[package]] name = "libc" version = "0.2.153" diff --git a/Cargo.toml b/Cargo.toml index 82eafc1..c08d778 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "jayce" -version = "12.0.0" +version = "12.1.0" edition = "2021" description = "jayce is a tokenizer 🌌" repository = "https://github.com/AuracleTech/jayce" @@ -8,7 +8,6 @@ license = "MIT" [dependencies] bytecount = "0.6.7" -lazy_static = "1.4.0" regex = "1.10.3" [dev-dependencies] diff --git a/README.md b/README.md index 9b350de..7697c65 100644 --- a/README.md +++ b/README.md @@ -6,25 +6,28 @@ jayce is a tokenizer 🌌 ```rust use jayce::{Duo, Tokenizer}; +use std::sync::OnceLock; const SOURCE: &str = "Excalibur = 5000$; // Your own language!"; -lazy_static::lazy_static! { - static ref DUOS: Vec> = vec![ - Duo::new("whitespace", r"^[^\S\n]+", false), - Duo::new("commentLine", r"^//(.*)", false), - Duo::new("commentBlock", r"^/\*(.|\n)*?\*/", false), - Duo::new("newline", r"^\n", false), - - Duo::new("price", r"^[0-9]+\$", true), - Duo::new("semicolon", r"^;", true), - Duo::new("operator", r"^=", true), - Duo::new("name", r"^[a-zA-Z_]+", true) - ]; +fn duos() -> &'static Vec> { + static DUOS: OnceLock>> = OnceLock::new(); + DUOS.get_or_init(|| { + vec![ + Duo::new("whitespace", r"^[^\S\n]+", false), + Duo::new("commentLine", r"^//(.*)", false), + Duo::new("commentBlock", r"^/\*(.|\n)*?\*/", false), + Duo::new("newline", r"^\n", false), + Duo::new("price", r"^[0-9]+\$", true), + Duo::new("semicolon", r"^;", true), + Duo::new("operator", r"^=", true), + Duo::new("name", r"^[a-zA-Z_]+", true), + ] + }) } fn main() -> Result<(), Box> { - let mut tokenizer = Tokenizer::new(SOURCE, &DUOS); + let mut tokenizer = Tokenizer::new(SOURCE, duos()); while let Some(token) = tokenizer.consume()? { println!("{:?}", token); diff --git a/TODOS.md b/TODOS.md index 3199047..02395f1 100644 --- a/TODOS.md +++ b/TODOS.md @@ -1,9 +1,5 @@ # TODO -- [ ] replace `lazy_static` by `LazyLock` when available in stable std release - -#### new features - - [ ] multi-threading support - [ ] improve performance and precision by removing `^` - [ ] parsing the whole file at once diff --git a/benches/initialization.rs b/benches/initialization.rs index 8f183d6..3de4f09 100644 --- a/benches/initialization.rs +++ b/benches/initialization.rs @@ -1,12 +1,12 @@ #[allow(unused_imports)] use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use jayce::{internal::DUOS_RUST, Tokenizer}; +use jayce::{internal::duos_rust, Tokenizer}; const EMPTY_SOURCE: &str = ""; fn criterion_benchmark(c: &mut Criterion) { c.bench_function("initialization", |b| { - b.iter(|| black_box(Tokenizer::new(EMPTY_SOURCE, &DUOS_RUST))) + b.iter(|| black_box(Tokenizer::new(EMPTY_SOURCE, duos_rust()))) }); } diff --git a/benches/tokenize.rs b/benches/tokenize.rs index cfbdd91..a14f510 100644 --- a/benches/tokenize.rs +++ b/benches/tokenize.rs @@ -1,6 +1,6 @@ #[allow(unused_imports)] use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use jayce::{internal::DUOS_RUST, Tokenizer}; +use jayce::{internal::duos_rust, Tokenizer}; fn criterion_benchmark(c: &mut Criterion) { let current_dir = std::env::current_dir() @@ -28,7 +28,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b: &mut criterion::Bencher<'_>| { b.iter(|| { for (_, source) in files.iter() { - let mut tokenizer = Tokenizer::new(source, &DUOS_RUST); + let mut tokenizer = Tokenizer::new(source, duos_rust()); let tokens = tokenizer.consume_all().unwrap(); black_box(tokens); } @@ -38,7 +38,7 @@ fn criterion_benchmark(c: &mut Criterion) { let mut total = 0; for (_, source) in files.iter() { - let mut tokenizer = Tokenizer::new(source, &DUOS_RUST); + let mut tokenizer = Tokenizer::new(source, duos_rust()); total += tokenizer.consume_all().unwrap().len(); } println!("Amount of tokens created : {}", total); diff --git a/examples/example.rs b/examples/example.rs index 9fab559..9422ce2 100644 --- a/examples/example.rs +++ b/examples/example.rs @@ -1,23 +1,26 @@ use jayce::{Duo, Tokenizer}; +use std::sync::OnceLock; const SOURCE: &str = "Excalibur = 5000$; // Your own language!"; -lazy_static::lazy_static! { - static ref DUOS: Vec> = vec![ - Duo::new("whitespace", r"^[^\S\n]+", false), - Duo::new("commentLine", r"^//(.*)", false), - Duo::new("commentBlock", r"^/\*(.|\n)*?\*/", false), - Duo::new("newline", r"^\n", false), - - Duo::new("price", r"^[0-9]+\$", true), - Duo::new("semicolon", r"^;", true), - Duo::new("operator", r"^=", true), - Duo::new("name", r"^[a-zA-Z_]+", true) - ]; +fn duos() -> &'static Vec> { + static DUOS: OnceLock>> = OnceLock::new(); + DUOS.get_or_init(|| { + vec![ + Duo::new("whitespace", r"^[^\S\n]+", false), + Duo::new("commentLine", r"^//(.*)", false), + Duo::new("commentBlock", r"^/\*(.|\n)*?\*/", false), + Duo::new("newline", r"^\n", false), + Duo::new("price", r"^[0-9]+\$", true), + Duo::new("semicolon", r"^;", true), + Duo::new("operator", r"^=", true), + Duo::new("name", r"^[a-zA-Z_]+", true), + ] + }) } fn main() -> Result<(), Box> { - let mut tokenizer = Tokenizer::new(SOURCE, &DUOS); + let mut tokenizer = Tokenizer::new(SOURCE, duos()); while let Some(token) = tokenizer.consume()? { println!("{:?}", token); diff --git a/examples/multiple_files.rs b/examples/multiple_files.rs index 975e045..b27900f 100644 --- a/examples/multiple_files.rs +++ b/examples/multiple_files.rs @@ -1,4 +1,4 @@ -use jayce::{internal::DUOS_RUST, Tokenizer}; +use jayce::{internal::duos_rust, Tokenizer}; fn main() -> Result<(), Box> { let current_dir = std::env::current_dir() @@ -24,7 +24,7 @@ fn main() -> Result<(), Box> { let mut total_token_count = 0; for (filename, source) in files.iter() { - let mut tokenizer = Tokenizer::new(source, &DUOS_RUST); + let mut tokenizer = Tokenizer::new(source, duos_rust()); let tokens_for_file = tokenizer.consume_all()?.len(); diff --git a/src/internal.rs b/src/internal.rs index d3081f4..d05b3ab 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -1,3 +1,5 @@ +use std::sync::OnceLock; + use crate::Duo; #[derive(Debug, PartialEq, Clone, Copy)] @@ -36,9 +38,10 @@ pub enum KindsRust { MacroExclamation, } -lazy_static::lazy_static! { - pub static ref DUOS_RUST: Vec> = vec![ - Duo::new(KindsRust::Whitespace, r"^[^\S\n]+", false), +pub fn duos_rust() -> &'static Vec> { + static DUOS_RUST: OnceLock>> = OnceLock::new(); + DUOS_RUST.get_or_init(|| {vec![ + Duo::new(KindsRust::Whitespace, r"^[^\S\n]+", false), Duo::new(KindsRust::CommentLine, r"^//(.*)", false), Duo::new(KindsRust::CommentBlock, r"^/\*(.|\n)*?\*/", false), Duo::new(KindsRust::Newline, r"^\n", false), @@ -72,6 +75,46 @@ lazy_static::lazy_static! { Duo::new(KindsRust::Caret, r"^\^", true), Duo::new(KindsRust::TempBorrow, r"^&", true), Duo::new(KindsRust::Question, r"^\?", true), - Duo::new(KindsRust::MacroExclamation, r"^!", true), - ]; + Duo::new(KindsRust::MacroExclamation, r"^!", true) + ]}) } + +// lazy_static::lazy_static! { +// pub static ref DUOS_RUST: Vec> = vec![ +// Duo::new(KindsRust::Whitespace, r"^[^\S\n]+", false), +// Duo::new(KindsRust::CommentLine, r"^//(.*)", false), +// Duo::new(KindsRust::CommentBlock, r"^/\*(.|\n)*?\*/", false), +// Duo::new(KindsRust::Newline, r"^\n", false), +// Duo::new( +// KindsRust::Keyword, +// r"^(mut|let|if|else|fn|struct|enum|match|use|mod|pub|crate|impl|trait|for|while|loop|break|continue|return|as|const|static|type|where|unsafe|extern|ref|self|super|in|move|dyn|abstract|async|await|become|box|do|final|macro|override|priv|typeof|unsized|virtual|yield)\b", +// true +// ), +// Duo::new(KindsRust::String, r#"^"[^"]*""#, true), +// Duo::new(KindsRust::Char, r"^'(.|\\n)'", true), +// Duo::new(KindsRust::Lifetime, r"^'(?:[a-z_][a-z0-9_]*|static)\b", true), +// Duo::new(KindsRust::Operator, r"^(=|\+|-|\*|/|%)", true), +// Duo::new(KindsRust::Identifier, r"^[a-zA-Z_][a-zA-Z0-9_]*", true), +// Duo::new(KindsRust::Integer, r"^\d+", true), +// Duo::new(KindsRust::Float, r"^\d+\.\d+", true), +// Duo::new(KindsRust::DoubleColon, r"^::", true), +// Duo::new(KindsRust::Semicolon, r"^;", true), +// Duo::new(KindsRust::OpenBrace, r"^\{", true), +// Duo::new(KindsRust::CloseBrace, r"^\}", true), +// Duo::new(KindsRust::OpenParen, r"^\(", true), +// Duo::new(KindsRust::CloseParen, r"^\)", true), +// Duo::new(KindsRust::OpenBracket, r"^\[", true), +// Duo::new(KindsRust::CloseBracket, r"^\]", true), +// Duo::new(KindsRust::Comma, r"^,", true), +// Duo::new(KindsRust::Hash, r"^#", true), +// Duo::new(KindsRust::Dot, r"^\.", true), +// Duo::new(KindsRust::Colon, r"^:", true), +// Duo::new(KindsRust::Pipe, r"^\|", true), +// Duo::new(KindsRust::OpenAngle, r"^<", true), +// Duo::new(KindsRust::CloseAngle, r"^>", true), +// Duo::new(KindsRust::Caret, r"^\^", true), +// Duo::new(KindsRust::TempBorrow, r"^&", true), +// Duo::new(KindsRust::Question, r"^\?", true), +// Duo::new(KindsRust::MacroExclamation, r"^!", true), +// ]; +// } diff --git a/tests/custom_duo_type.rs b/tests/custom_duo_type.rs index c84d2d2..bf9870d 100644 --- a/tests/custom_duo_type.rs +++ b/tests/custom_duo_type.rs @@ -1,18 +1,21 @@ use jayce::{Duo, Token, Tokenizer}; -use lazy_static::lazy_static; +use std::sync::OnceLock; const SOURCE: &str = "Excalibur = 5000$"; -lazy_static! { - static ref DUOS: Vec> = vec![ - Duo::new(1, r"^\s+", false), - Duo::new(2, r"^//(.*)", false), - Duo::new(3, r"^/\*(.|\n)*?\*/", false), - Duo::new(4, r"^\n", false), - Duo::new(5, r"^[0-9]+\$", true), - Duo::new(6, r"^=", true), - Duo::new(7, r"^[a-zA-Z_]+", true) - ]; +fn duos() -> &'static Vec> { + static DUOS: OnceLock>> = OnceLock::new(); + DUOS.get_or_init(|| { + vec![ + Duo::new(1, r"^\s+", false), + Duo::new(2, r"^//(.*)", false), + Duo::new(3, r"^/\*(.|\n)*?\*/", false), + Duo::new(4, r"^\n", false), + Duo::new(5, r"^[0-9]+\$", true), + Duo::new(6, r"^=", true), + Duo::new(7, r"^[a-zA-Z_]+", true), + ] + }) } const EXPECTED: [Token; 3] = [ @@ -35,7 +38,7 @@ const EXPECTED: [Token; 3] = [ #[test] fn custom_duos() { - let tokens = Tokenizer::new(SOURCE, &DUOS).consume_all().unwrap(); + let tokens = Tokenizer::new(SOURCE, duos()).consume_all().unwrap(); assert_eq!(tokens, EXPECTED); assert_eq!(tokens.len(), EXPECTED.len()); } diff --git a/tests/failed_match.rs b/tests/failed_match.rs index de8a135..dc647df 100644 --- a/tests/failed_match.rs +++ b/tests/failed_match.rs @@ -1,10 +1,10 @@ -use jayce::{internal::DUOS_RUST, Tokenizer}; +use jayce::{internal::duos_rust, Tokenizer}; const SOURCE: &str = "🦀"; #[test] #[should_panic(expected = "Failed to match")] fn failed_match() { - let mut tokenizer = Tokenizer::new(SOURCE, &DUOS_RUST); + let mut tokenizer = Tokenizer::new(SOURCE, duos_rust()); let _ = tokenizer.consume().unwrap(); } diff --git a/tests/multiline.rs b/tests/multiline.rs index 2689b74..956f914 100644 --- a/tests/multiline.rs +++ b/tests/multiline.rs @@ -1,4 +1,5 @@ use jayce::{Duo, Token, Tokenizer}; +use std::sync::OnceLock; const SOURCE: &str = r#"let kind_cat = "I calmly pet my cute cats" pancake_icecream @@ -7,18 +8,20 @@ very_multiline "#; -lazy_static::lazy_static! { - static ref DUOS: Vec> = vec![ - Duo::new("whitespace", r"^[^\S\n]+", true), - Duo::new("comment_line", r"^//(.*)", true), - Duo::new("comment_block", r"^/\*(.|\n)*?\*/", true), - Duo::new("newline", r"^\n", true), - - Duo::new("operator", r"^=", true), - Duo::new("keyword", r"^let", true), - Duo::new("string", r#"^"[^"]*""#, true), - Duo::new("identifier", r"^[a-z_]+", true) - ]; +fn duos() -> &'static Vec> { + static DUOS: OnceLock>> = OnceLock::new(); + DUOS.get_or_init(|| { + vec![ + Duo::new("whitespace", r"^[^\S\n]+", true), + Duo::new("comment_line", r"^//(.*)", true), + Duo::new("comment_block", r"^/\*(.|\n)*?\*/", true), + Duo::new("newline", r"^\n", true), + Duo::new("operator", r"^=", true), + Duo::new("keyword", r"^let", true), + Duo::new("string", r#"^"[^"]*""#, true), + Duo::new("identifier", r"^[a-z_]+", true), + ] + }) } const EXPECTED: [Token<&'static str>; 14] = [ @@ -96,7 +99,7 @@ const EXPECTED: [Token<&'static str>; 14] = [ #[test] fn multiline() { - let tokens = Tokenizer::new(SOURCE, &DUOS).consume_all().unwrap(); + let tokens = Tokenizer::new(SOURCE, duos()).consume_all().unwrap(); assert_eq!(tokens, EXPECTED); assert_eq!(tokens.len(), EXPECTED.len()); } diff --git a/tests/peek.rs b/tests/peek.rs index 53a7ff2..8edff48 100644 --- a/tests/peek.rs +++ b/tests/peek.rs @@ -1,4 +1,5 @@ use jayce::{Duo, Token, Tokenizer}; +use std::sync::OnceLock; const SOURCE: &str = r#"let kind_cat = "I calmly pet my cute cats" pancake_icecream @@ -7,18 +8,20 @@ very_multiline "#; -lazy_static::lazy_static! { - static ref DUOS: Vec> = vec![ - Duo::new("whitespace", r"^[^\S\n]+", true), - Duo::new("comment_line", r"^//(.*)", true), - Duo::new("comment_block", r"^/\*(.|\n)*?\*/", true), - Duo::new("newline", r"^\n", true), - - Duo::new("operator", r"^=", true), - Duo::new("keyword", r"^let", true), - Duo::new("string", r#"^"[^"]*""#, true), - Duo::new("identifier", r"^[a-z_]+", true) - ]; +fn duos() -> &'static Vec> { + static DUOS: OnceLock>> = OnceLock::new(); + DUOS.get_or_init(|| { + vec![ + Duo::new("whitespace", r"^[^\S\n]+", true), + Duo::new("comment_line", r"^//(.*)", true), + Duo::new("comment_block", r"^/\*(.|\n)*?\*/", true), + Duo::new("newline", r"^\n", true), + Duo::new("operator", r"^=", true), + Duo::new("keyword", r"^let", true), + Duo::new("string", r#"^"[^"]*""#, true), + Duo::new("identifier", r"^[a-z_]+", true), + ] + }) } const EXPECTED: [Token<&'static str>; 14] = [ @@ -96,7 +99,7 @@ const EXPECTED: [Token<&'static str>; 14] = [ #[test] fn peek_test() { - let mut tokenizer = Tokenizer::new(SOURCE, &DUOS); + let mut tokenizer = Tokenizer::new(SOURCE, duos()); let mut tokens = Vec::new(); if let Some(token) = tokenizer.peek().unwrap() { diff --git a/tests/tokenize_all.rs b/tests/tokenize_all.rs index 5aa1de0..056edaf 100644 --- a/tests/tokenize_all.rs +++ b/tests/tokenize_all.rs @@ -1,4 +1,5 @@ use jayce::{Duo, Token, Tokenizer}; +use std::sync::OnceLock; const SOURCE: &str = "abc 123 xyz456 // comment"; const SOURCE_PANIC: &str = "🦀"; @@ -12,14 +13,16 @@ pub enum Kinds { Numeric, } -lazy_static::lazy_static! { - pub static ref DUOS: Vec> = vec![ - Duo::new(Kinds::CommentLine, r"^//.*$", false), - Duo::new(Kinds::Whitespace, r"^\s+", false), - - Duo::new(Kinds::Alpha, r"^[a-zA-Z]+", true), - Duo::new(Kinds::Numeric, r"^\d+", true), - ]; +fn duos() -> &'static Vec> { + static DUOS: OnceLock>> = OnceLock::new(); + DUOS.get_or_init(|| { + vec![ + Duo::new(Kinds::CommentLine, r"^//.*$", false), + Duo::new(Kinds::Whitespace, r"^\s+", false), + Duo::new(Kinds::Alpha, r"^[a-zA-Z]+", true), + Duo::new(Kinds::Numeric, r"^\d+", true), + ] + }) } const EXPECTED: [Token; 4] = [ @@ -47,7 +50,7 @@ const EXPECTED: [Token; 4] = [ #[test] fn tokenize_all() { - let tokens = Tokenizer::new(SOURCE, &DUOS).consume_all().unwrap(); + let tokens = Tokenizer::new(SOURCE, duos()).consume_all().unwrap(); assert_eq!(tokens, EXPECTED); assert_eq!(tokens.len(), EXPECTED.len()); } @@ -55,6 +58,6 @@ fn tokenize_all() { #[test] #[should_panic(expected = "Failed to match at line")] fn tokenize_all_should_panic() { - let mut tokenizer = Tokenizer::new(SOURCE_PANIC, &DUOS); + let mut tokenizer = Tokenizer::new(SOURCE_PANIC, duos()); let _ = tokenizer.consume_all().unwrap(); }