diff --git a/Cargo.toml b/Cargo.toml index 230c6a6..7fd123b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,6 @@ repository = "https://github.com/Trivernis/charred-rs" [dependencies] thiserror = "1.0.24" log = "0.4.14" -async-trait = "0.1.50" [dependencies.tokio] version = "1.5.0" diff --git a/src/input_reader.rs b/src/input_reader.rs index 65d820c..2ad1acc 100644 --- a/src/input_reader.rs +++ b/src/input_reader.rs @@ -78,4 +78,9 @@ impl InputReader { Ok(()) } + + /// Returns the current index + pub fn index(&self) -> usize { + self.index + } } diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..d59062a --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,40 @@ +use crate::error::TapeResult; +use crate::input_reader::InputReader; +use crate::token::{Token, TokenCheckerFn, UnknownToken}; + +pub struct Lexer { + reader: InputReader, + checkers: Vec, +} + +impl Lexer { + /// Creates a new lexer with provided checker functions + pub fn new(reader: InputReader, checkers: Vec) -> Self { + Self { reader, checkers } + } + + /// Scans for tokens + pub async fn scan(&mut self) -> TapeResult> { + let mut tokens = Vec::new(); + + while !self.reader.check_eof().await { + let index = self.reader.index(); + let mut found = false; + + for checker_fn in &self.checkers { + if let Some(token) = checker_fn.as_ref()(&mut self.reader).await? { + tokens.push(token); + found = true; + break; + } else { + self.reader.seek_to(index).await?; + } + } + if !found { + tokens.push(Token::new(UnknownToken(self.reader.consume().await?))) + } + } + + Ok(tokens) + } +} diff --git a/src/lib.rs b/src/lib.rs index d06b43c..96a4bb0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ pub mod error; pub mod input_reader; +pub mod lexer; pub mod token; #[cfg(test)] diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 124aebd..90b79ae 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -1,2 +1,3 @@ mod test_input; +mod test_lexer; mod test_token; diff --git a/src/tests/test_lexer.rs b/src/tests/test_lexer.rs new file mode 100644 index 0000000..b0fa225 --- /dev/null +++ b/src/tests/test_lexer.rs @@ -0,0 +1,91 @@ +use crate::error::TapeResult; +use crate::input_reader::InputReader; +use crate::lexer::Lexer; +use crate::token::{Token, TokenCheckerFn, UnknownToken}; +use std::io::Cursor; +use std::sync::Arc; + +struct NumberToken(i32); +struct StringToken(String); +struct WhiteSpaceToken; + +async fn parse_number_token(reader: &mut InputReader) -> TapeResult> { + let mut num = String::new(); + while !reader.check_eof().await && reader.peek().await?.is_numeric() { + num.push(reader.consume().await?); + } + if num.is_empty() { + Ok(None) + } else { + Ok(Some(Token::new(NumberToken(num.parse::().unwrap())))) + } +} + +async fn parse_whitespace_token(reader: &mut InputReader) -> TapeResult> { + let mut count = 0; + while !reader.check_eof().await && reader.peek().await?.is_whitespace() { + reader.consume().await?; + count += 1; + } + if count > 0 { + Ok(Some(Token::new(WhiteSpaceToken))) + } else { + Ok(None) + } +} + +async fn parse_string_token(reader: &mut InputReader) -> TapeResult> { + let mut value = String::new(); + while !reader.check_eof().await + && !reader.peek().await?.is_numeric() + && !reader.peek().await?.is_whitespace() + { + value.push(reader.consume().await?); + } + if value.is_empty() { + Ok(None) + } else { + Ok(Some(Token::new(StringToken(value)))) + } +} + +#[tokio::test] +async fn it_scans() { + let checkers: Vec = vec![ + Arc::new(|reader| Box::pin(parse_number_token(reader))), + Arc::new(|reader| Box::pin(parse_whitespace_token(reader))), + Arc::new(|reader| Box::pin(parse_string_token(reader))), + ]; + let input_reader = InputReader::new(Cursor::new("The Alphabet 12 ok")); + let mut lexer = Lexer::new(input_reader, checkers); + let tokens = lexer.scan().await.unwrap(); + assert!(!tokens.is_empty()); + let mut tokens = tokens.into_iter(); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); +} + +#[tokio::test] +async fn it_falls_back_to_unknown() { + let checkers: Vec = vec![ + Arc::new(|reader| Box::pin(parse_number_token(reader))), + Arc::new(|reader| Box::pin(parse_string_token(reader))), + ]; + let input_reader = InputReader::new(Cursor::new("The Alphabet 12 ok")); + let mut lexer = Lexer::new(input_reader, checkers); + let tokens = lexer.scan().await.unwrap(); + assert!(!tokens.is_empty()); + let mut tokens = tokens.into_iter(); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); +} diff --git a/src/tests/test_token.rs b/src/tests/test_token.rs index ba474e2..16ce5e3 100644 --- a/src/tests/test_token.rs +++ b/src/tests/test_token.rs @@ -1,41 +1,37 @@ use crate::error::TapeResult; use crate::input_reader::InputReader; -use crate::token::{ProtoToken, Token}; -use async_trait::async_trait; +use crate::token::Token; use std::io::Cursor; #[derive(Debug)] struct TestToken(i32); -#[async_trait] -impl ProtoToken for TestToken { - async fn try_parse(reader: &mut InputReader) -> TapeResult> { - let mut num = String::new(); - while !reader.check_eof().await && reader.peek().await?.is_numeric() { - num.push(reader.consume().await?); - } - if num.is_empty() { - Ok(None) - } else { - Ok(Some(Token::new(TestToken(num.parse::().unwrap())))) - } +async fn parse_test_token(reader: &mut InputReader) -> TapeResult> { + let mut num = String::new(); + while !reader.check_eof().await && reader.peek().await?.is_numeric() { + num.push(reader.consume().await?); + } + if num.is_empty() { + Ok(None) + } else { + Ok(Some(Token::new(TestToken(num.parse::().unwrap())))) } } #[tokio::test] async fn it_parses() { let mut reader = InputReader::new(Cursor::new("128")); - let token = TestToken::try_parse(&mut reader).await.unwrap(); + let token = parse_test_token(&mut reader).await.unwrap(); assert!(token.is_some()); let token = token.unwrap().try_into::().unwrap(); assert_eq!(token.0, 128); let mut reader = InputReader::new(Cursor::new("string a12 24\n")); - let token = TestToken::try_parse(&mut reader).await.unwrap(); + let token = parse_test_token(&mut reader).await.unwrap(); assert!(token.is_none()); reader.seek_to(8).await.unwrap(); - let token = TestToken::try_parse(&mut reader).await.unwrap(); + let token = parse_test_token(&mut reader).await.unwrap(); assert!(token.is_some()); let token = token.unwrap().try_into::().unwrap(); assert_eq!(token.0, 12); diff --git a/src/token.rs b/src/token.rs index ccc57e3..0b8973f 100644 --- a/src/token.rs +++ b/src/token.rs @@ -1,13 +1,17 @@ use crate::error::TapeResult; use crate::input_reader::InputReader; -use async_trait::async_trait; use std::any::{Any, TypeId}; +use std::future::Future; +use std::pin::Pin; +use std::sync::Arc; -#[async_trait] -pub trait ProtoToken { - /// Tries parsing the token - async fn try_parse(reader: &mut InputReader) -> TapeResult>; -} +pub type TokenCheckerFn = Arc< + dyn for<'a> Fn( + &'a mut InputReader, + ) -> Pin>> + Send + 'a>> + + Send + + Sync, +>; pub struct Token { inner: Box, @@ -38,3 +42,6 @@ impl Token { self.inner.as_ref().type_id() == TypeId::of::() } } + +/// Parsed when no other matching token was found for the character +pub struct UnknownToken(pub char);