Skip to content

Commit

Permalink
Implement peek method
Browse files Browse the repository at this point in the history
  • Loading branch information
AuracleTech committed Mar 11, 2024
1 parent 95fbc8a commit 8e5ba82
Show file tree
Hide file tree
Showing 7 changed files with 174 additions and 16 deletions.
4 changes: 2 additions & 2 deletions benches/tokenize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ fn criterion_benchmark(c: &mut Criterion) {
b.iter(|| {
for (_, source) in files.iter() {
let mut tokenizer = Tokenizer::new(source, &DUOS_RUST);
let tokens = tokenizer.tokenize_all().unwrap();
let tokens = tokenizer.consume_all().unwrap();
black_box(tokens);
}
})
Expand All @@ -39,7 +39,7 @@ fn criterion_benchmark(c: &mut Criterion) {
let mut total = 0;
for (_, source) in files.iter() {
let mut tokenizer = Tokenizer::new(source, &DUOS_RUST);
total += tokenizer.tokenize_all().unwrap().len();
total += tokenizer.consume_all().unwrap().len();
}
println!("Amount of tokens created : {}", total);
}
Expand Down
2 changes: 1 addition & 1 deletion examples/multiple_files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
for (filename, source) in files.iter() {
let mut tokenizer = Tokenizer::new(source, &DUOS_RUST);

let tokens_for_file = tokenizer.tokenize_all()?.len();
let tokens_for_file = tokenizer.consume_all()?.len();

println!("File {} has {} tokens", filename, tokens_for_file);

Expand Down
40 changes: 32 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,23 @@ impl<T> Duo<T> {
pub struct Tokenizer<'a, T> {
source: &'a str,
duos: &'a [Duo<T>],
cursor: usize,
line: usize,
column: usize,
pub cursor: usize,
pub line: usize,
pub column: usize,
next: Option<Token<'a, T>>,
}

#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct Token<'a, T> {
pub kind: &'a T,
pub value: &'a str,
pub pos: (usize, usize),
}

impl<'a, T> Tokenizer<'a, T> {
impl<'a, T> Tokenizer<'a, T>
where
T: Clone,
{
#[inline]
pub fn new(source: &'a str, duos: &'a [Duo<T>]) -> Self {
Self {
Expand All @@ -41,10 +45,12 @@ impl<'a, T> Tokenizer<'a, T> {
cursor: 0,
line: 1,
column: 1,
next: None,
}
}

pub fn consume(&mut self) -> Result<Option<Token<'a, T>>, Box<dyn std::error::Error>> {
// OPTIMIZE might inline the advance method
fn advance(&mut self) -> Result<Option<Token<'a, T>>, Box<dyn std::error::Error>> {
while self.cursor < self.source.len() {
let mut matched = false;

Expand Down Expand Up @@ -86,8 +92,26 @@ impl<'a, T> Tokenizer<'a, T> {
Ok(None)
}

pub fn tokenize_all(&mut self) -> Result<Vec<Token<'a, T>>, Box<dyn std::error::Error>> {
let mut tokens = Vec::new();
pub fn peek(&mut self) -> Result<Option<Token<'a, T>>, Box<dyn std::error::Error>> {
if self.next.is_none() {
self.next = self.advance()?;
}

Ok(self.next.clone())
}

pub fn consume(&mut self) -> Result<Option<Token<'a, T>>, Box<dyn std::error::Error>> {
if self.next.is_none() {
self.next = self.advance()?;
}

let result = Ok(self.next.take());
self.next = self.advance()?;
result
}

pub fn consume_all(&mut self) -> Result<Vec<Token<'a, T>>, Box<dyn std::error::Error>> {
let mut tokens: Vec<Token<'_, T>> = Vec::new();
while let Some(token) = self.consume()? {
tokens.push(token);
}
Expand Down
2 changes: 1 addition & 1 deletion tests/custom_duo_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ const EXPECTED: [Token<u64>; 3] = [

#[test]
fn custom_duos() {
let tokens = Tokenizer::new(SOURCE, &DUOS).tokenize_all().unwrap();
let tokens = Tokenizer::new(SOURCE, &DUOS).consume_all().unwrap();
assert_eq!(tokens, EXPECTED);
assert_eq!(tokens.len(), EXPECTED.len());
}
2 changes: 1 addition & 1 deletion tests/multiline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ const EXPECTED: [Token<&'static str>; 14] = [

#[test]
fn multiline() {
let tokens = Tokenizer::new(SOURCE, &DUOS).tokenize_all().unwrap();
let tokens = Tokenizer::new(SOURCE, &DUOS).consume_all().unwrap();
assert_eq!(tokens, EXPECTED);
assert_eq!(tokens.len(), EXPECTED.len());
}
134 changes: 134 additions & 0 deletions tests/peek.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
use jayce::{Duo, Token, Tokenizer};

const SOURCE: &str = r#"let kind_cat = "I calmly pet my cute cats"
pancake_icecream
very_multiline
"#;

lazy_static::lazy_static! {
static ref DUOS: Vec<Duo<&'static str>> = vec![
Duo::new("whitespace", r"^[^\S\n]+", true),
Duo::new("comment_line", r"^//(.*)", true),
Duo::new("comment_block", r"^/\*(.|\n)*?\*/", true),
Duo::new("newline", r"^\n", true),

Duo::new("operator", r"^=", true),
Duo::new("keyword", r"^let", true),
Duo::new("string", r#"^"[^"]*""#, true),
Duo::new("identifier", r"^[a-z_]+", true)
];
}

const EXPECTED: [Token<&'static str>; 14] = [
Token {
kind: &"keyword",
value: "let",
pos: (1, 1),
},
Token {
kind: &"whitespace",
value: " ",
pos: (1, 4),
},
Token {
kind: &"identifier",
value: "kind_cat",
pos: (1, 5),
},
Token {
kind: &"whitespace",
value: " ",
pos: (1, 13),
},
Token {
kind: &"operator",
value: "=",
pos: (1, 14),
},
Token {
kind: &"whitespace",
value: " ",
pos: (1, 15),
},
Token {
kind: &"string",
value: "\"I calmly pet my cute cats\"",
pos: (1, 16),
},
Token {
kind: &"newline",
value: "\n",
pos: (1, 43),
},
Token {
kind: &"identifier",
value: "pancake_icecream",
pos: (2, 1),
},
Token {
kind: &"newline",
value: "\n",
pos: (2, 17),
},
Token {
kind: &"newline",
value: "\n",
pos: (3, 1),
},
Token {
kind: &"identifier",
value: "very_multiline",
pos: (4, 1),
},
Token {
kind: &"newline",
value: "\n",
pos: (4, 15),
},
Token {
kind: &"newline",
value: "\n",
pos: (5, 1),
},
];

#[test]
fn peek_test() {
let mut tokenizer = Tokenizer::new(SOURCE, &DUOS);
let mut tokens = Vec::new();

if let Some(token) = tokenizer.peek().unwrap() {
assert_eq!(token, EXPECTED[0]);
}
if let Some(token) = tokenizer.peek().unwrap() {
assert_eq!(token, EXPECTED[0]);
}

for expected in EXPECTED.iter() {
let peeked = tokenizer.peek().unwrap();
let consumed = tokenizer.consume().unwrap();
assert_eq!(peeked, consumed);
tokens.push(consumed.unwrap());
if consumed.is_some() {
assert_eq!(consumed.unwrap(), *expected);
} else {
panic!("Early termination, expected token.")
}
}

let token = tokenizer.consume().unwrap();
assert_eq!(token, None);
let token = tokenizer.peek().unwrap();
assert_eq!(token, None);
let token = tokenizer.consume().unwrap();
assert_eq!(token, None);
let token = tokenizer.peek().unwrap();
assert_eq!(token, None);
let token = tokenizer.consume().unwrap();
assert_eq!(token, None);

assert_eq!(tokens, EXPECTED);
assert_eq!(tokens.len(), EXPECTED.len());
}
6 changes: 3 additions & 3 deletions tests/tokenize_all.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use jayce::{Duo, Token, Tokenizer};
const SOURCE: &str = "abc 123 xyz456 // comment";
const SOURCE_PANIC: &str = "🦀";

#[derive(Debug, PartialEq)]
#[derive(Debug, PartialEq, Clone)]
pub enum Kinds {
CommentLine,
Whitespace,
Expand Down Expand Up @@ -47,7 +47,7 @@ const EXPECTED: [Token<Kinds>; 4] = [

#[test]
fn tokenize_all() {
let tokens = Tokenizer::new(SOURCE, &DUOS).tokenize_all().unwrap();
let tokens = Tokenizer::new(SOURCE, &DUOS).consume_all().unwrap();
assert_eq!(tokens, EXPECTED);
assert_eq!(tokens.len(), EXPECTED.len());
}
Expand All @@ -56,5 +56,5 @@ fn tokenize_all() {
#[should_panic(expected = "Failed to match at line")]
fn tokenize_all_should_panic() {
let mut tokenizer = Tokenizer::new(SOURCE_PANIC, &DUOS);
let _ = tokenizer.tokenize_all().unwrap();
let _ = tokenizer.consume_all().unwrap();
}

0 comments on commit 8e5ba82

Please sign in to comment.