diff --git a/Cargo.toml b/Cargo.toml index 7fd123b..fdb47a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "charred" -version = "2.0.0" +version = "2.1.0" authors = ["trivernis "] edition = "2018" license-file = "LICENSE" diff --git a/src/lexer.rs b/src/lexer.rs index d59062a..fdfe1df 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,6 +1,6 @@ use crate::error::TapeResult; use crate::input_reader::InputReader; -use crate::token::{Token, TokenCheckerFn, UnknownToken}; +use crate::token::{EOFToken, Token, TokenCheckerFn, UnknownToken}; pub struct Lexer { reader: InputReader, @@ -22,7 +22,7 @@ impl Lexer { let mut found = false; for checker_fn in &self.checkers { - if let Some(token) = checker_fn.as_ref()(&mut self.reader).await? { + if let Some(token) = checker_fn.as_ref()(&mut self.reader, tokens.last()).await? { tokens.push(token); found = true; break; @@ -34,6 +34,7 @@ impl Lexer { tokens.push(Token::new(UnknownToken(self.reader.consume().await?))) } } + tokens.push(Token::new(EOFToken)); Ok(tokens) } diff --git a/src/lib.rs b/src/lib.rs index 96a4bb0..9e665dd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ pub mod error; pub mod input_reader; pub mod lexer; pub mod token; +pub mod token_reader; #[cfg(test)] mod tests; diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 90b79ae..016cbd6 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -1,3 +1,4 @@ mod test_input; mod test_lexer; mod test_token; +mod test_token_reader; diff --git a/src/tests/test_lexer.rs b/src/tests/test_lexer.rs index b1362de..606c04c 100644 --- a/src/tests/test_lexer.rs +++ b/src/tests/test_lexer.rs @@ -52,9 +52,9 @@ async fn parse_string_token(reader: &mut InputReader) -> TapeResult = vec![ - Arc::new(|reader| Box::pin(parse_number_token(reader))), - Arc::new(|reader| Box::pin(parse_whitespace_token(reader))), - Arc::new(|reader| Box::pin(parse_string_token(reader))), + Arc::new(|reader, _| Box::pin(parse_number_token(reader))), + Arc::new(|reader, _| Box::pin(parse_whitespace_token(reader))), + Arc::new(|reader, _| Box::pin(parse_string_token(reader))), ]; let input_reader = InputReader::new(Cursor::new("The Alphabet 12 ok")); let mut lexer = Lexer::new(input_reader, checkers); @@ -75,8 +75,8 @@ async fn it_scans() { #[tokio::test] async fn it_falls_back_to_unknown() { let checkers: Vec = vec![ - Arc::new(|reader| Box::pin(parse_number_token(reader))), - Arc::new(|reader| Box::pin(parse_string_token(reader))), + Arc::new(|reader, _| Box::pin(parse_number_token(reader))), + Arc::new(|reader, _| Box::pin(parse_string_token(reader))), ]; let input_reader = InputReader::new(Cursor::new("The Alphabet 12 ok")); let mut lexer = Lexer::new(input_reader, checkers); diff --git a/src/tests/test_token_reader.rs b/src/tests/test_token_reader.rs new file mode 100644 index 0000000..0b7dd0f --- /dev/null +++ b/src/tests/test_token_reader.rs @@ -0,0 +1,57 @@ +use crate::token::{EOFToken, Token}; +use crate::token_reader::TokenReader; + +struct AToken; +struct BToken; +struct CToken; + +fn get_reader() -> TokenReader { + let tokens = vec![ + Token::new(AToken), + Token::new(BToken), + Token::new(AToken), + Token::new(CToken), + Token::new(CToken), + Token::new(EOFToken), + ]; + + TokenReader::new(tokens) +} + +#[test] +fn peek_does_not_consume() { + let reader = get_reader(); + + assert!(reader.peek_is::()); + assert!(!reader.peek_is::()); + assert!(reader.peek_is::()); +} + +#[test] +fn consume_does_consume() { + let mut reader = get_reader(); + assert!(reader.consume_as::().is_some()); + assert!(reader.consume_as::().is_some()); + assert!(reader.consume_as::().is_some()); + assert!(reader.consume_as::().is_some()); +} + +#[test] +fn check_eof_works() { + let mut reader = get_reader(); + reader.seek(4); + assert!(!reader.check_eof()); + reader.seek(5); + assert!(reader.check_eof()); +} +#[test] +fn peek_and_consume_returns_eof_on_input_end() { + let mut reader = get_reader(); + reader.seek(4); + assert!(reader.consume_as::().is_none()); + assert!(reader.consume_as::().is_some()); + assert!(reader.consume_as::().is_some()); + assert!(reader.consume_as::().is_some()); + reader.seek(0); + assert!(reader.consume_as::().is_none()); +} diff --git a/src/token.rs b/src/token.rs index 0b8973f..0eebbd0 100644 --- a/src/token.rs +++ b/src/token.rs @@ -8,6 +8,7 @@ use std::sync::Arc; pub type TokenCheckerFn = Arc< dyn for<'a> Fn( &'a mut InputReader, + Option<&'a Token>, // last token ) -> Pin>> + Send + 'a>> + Send + Sync, @@ -45,3 +46,6 @@ impl Token { /// Parsed when no other matching token was found for the character pub struct UnknownToken(pub char); + +/// End Of File Token +pub struct EOFToken; diff --git a/src/token_reader.rs b/src/token_reader.rs new file mode 100644 index 0000000..d428c6c --- /dev/null +++ b/src/token_reader.rs @@ -0,0 +1,64 @@ +use crate::token::{EOFToken, Token}; + +pub struct TokenReader { + tokens: Vec, + index: usize, + eof: Token, +} + +impl TokenReader { + /// Creates a new token reader + pub fn new(mut tokens: Vec) -> Self { + if tokens.last().is_none() || !tokens.last().unwrap().is::() { + // ensure that the last token always is an EOF Token + tokens.push(Token::new(EOFToken)); + } + Self { + tokens, + index: 0, + eof: Token::new(EOFToken), + } + } + + /// Peeks the next token + #[inline] + pub fn peek(&self) -> &Token { + self.tokens.get(self.index).unwrap_or(&self.eof) + } + + /// Checks if the next token is of a specific type without consuming it + #[inline] + pub fn peek_is(&self) -> bool { + self.peek().is::() + } + + /// Peeks the next token and tries to return is as a concrete type + #[inline] + pub fn peek_as(&self) -> Option<&T> { + self.peek().try_as::() + } + + /// Consumes the next token and returns it + pub fn consume(&mut self) -> &Token { + self.index += 1; + self.tokens.get(self.index - 1).unwrap_or(&self.eof) + } + + /// Consumes the next token and tries to return it as the specified type + #[inline] + pub fn consume_as(&mut self) -> Option<&T> { + self.consume().try_as::() + } + + /// Seeks to the given index + #[inline] + pub fn seek(&mut self, to_index: usize) { + self.index = to_index + } + + /// Returns if EOF has been reached + #[inline] + pub fn check_eof(&self) -> bool { + self.peek_is::() + } +}