diff --git a/Cargo.toml b/Cargo.toml index eea0eac..7fd123b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "charred" -version = "0.3.6" +version = "2.0.0" authors = ["trivernis "] edition = "2018" license-file = "LICENSE" @@ -12,3 +12,13 @@ repository = "https://github.com/Trivernis/charred-rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +thiserror = "1.0.24" +log = "0.4.14" + +[dependencies.tokio] +version = "1.5.0" +features = ["io-util", "io-std", "fs"] + +[dev-dependencies.tokio] +version = "1.5.0" +features = ["rt-multi-thread", "macros"] \ No newline at end of file diff --git a/README.md b/README.md index 3b733fd..7fd0d3f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,86 @@ # Charred -This library provides a CharTapeMachine that can be used for building parsers with direct string access rather than token stream access. +This crate provides a generic asynchronous lexer that operates on files with tokio. +Tokens are parsed with provided async closures. + +## Usage + +```rust +use crate::error::TapeResult; +use crate::input_reader::InputReader; +use crate::lexer::Lexer; +use crate::token::{Token, TokenCheckerFn}; +use std::io::Cursor; +use std::sync::Arc; + +struct NumberToken(i32); +struct StringToken(String); +struct WhiteSpaceToken; + +async fn parse_number_token(reader: &mut InputReader) -> TapeResult> { + let mut num = String::new(); + while !reader.check_eof().await && reader.peek().await?.is_numeric() { + num.push(reader.consume().await?); + } + if num.is_empty() { + Ok(None) + } else { + Ok(Some(Token::new(NumberToken(num.parse::().unwrap())))) + } +} + +async fn parse_whitespace_token(reader: &mut InputReader) -> TapeResult> { + let mut count = 0; + while !reader.check_eof().await && reader.peek().await?.is_whitespace() { + reader.consume().await?; + count += 1; + } + if count > 0 { + Ok(Some(Token::new(WhiteSpaceToken))) + } else { + Ok(None) + } +} + +async fn parse_string_token(reader: &mut InputReader) -> TapeResult> { + let mut value = String::new(); + while !reader.check_eof().await + && !reader.peek().await?.is_numeric() + && !reader.peek().await?.is_whitespace() + { + value.push(reader.consume().await?); + } + if value.is_empty() { + Ok(None) + } else { + Ok(Some(Token::new(StringToken(value)))) + } +} + +#[tokio::main] +async fn main() { + // functions that try to parse the token into an object + let checkers: Vec = vec![ + Arc::new(|reader| Box::pin(parse_number_token(reader))), + Arc::new(|reader| Box::pin(parse_whitespace_token(reader))), + Arc::new(|reader| Box::pin(parse_string_token(reader))), + ]; + // input reader encapsulates (almost) any type that implements AsyncBufRead + let input_reader = InputReader::new(Cursor::new("Word 12")); + let mut lexer = Lexer::new(input_reader, checkers); + + // scan starts scanning the provided input + let tokens = lexer.scan().await.unwrap(); + assert!(!tokens.is_empty()); + + let mut tokens = tokens.into_iter(); + // use the is, try_as and try_into methods on the token type to get the underlying value + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); +} +``` + +## License + +Apache-2.0 diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..a0902fe --- /dev/null +++ b/src/error.rs @@ -0,0 +1,12 @@ +use thiserror::Error; + +pub type TapeResult = Result; + +#[derive(Debug, Error)] +pub enum TapeError { + #[error("IO Error: {0}")] + TokioIoError(#[from] tokio::io::Error), + + #[error("Unexpected EOF")] + EOF, +} diff --git a/src/input_reader.rs b/src/input_reader.rs new file mode 100644 index 0000000..2ad1acc --- /dev/null +++ b/src/input_reader.rs @@ -0,0 +1,86 @@ +use crate::error::{TapeError, TapeResult}; +use std::io::ErrorKind; +use tokio::io::{AsyncBufRead, AsyncBufReadExt}; + +/// An Input reader to asynchronously read a type +/// that implements AsyncBufRead and AsyncSeek. +pub struct InputReader { + inner: Box, + buf: String, + index: usize, +} + +impl InputReader { + pub fn new(inner: T) -> Self { + Self { + inner: Box::new(inner), + buf: String::new(), + index: 0, + } + } + + /// Reads the next char consuming it in the process + #[inline] + pub async fn consume(&mut self) -> TapeResult { + self.read_next().await + } + + /// Returns the next char without forwarding + #[inline] + pub async fn peek(&mut self) -> TapeResult { + let char = self.read_next().await?; + self.seek_to(self.index - 1).await?; + + Ok(char) + } + + /// Returns if EOF has been reached + #[inline] + pub async fn check_eof(&mut self) -> bool { + if let Err(TapeError::EOF) = self.peek().await { + true + } else { + false + } + } + + /// Reads the next char returning \x00 for EOF + async fn read_next(&mut self) -> TapeResult { + self.seek_to(self.index + 1).await?; + let result = self + .buf + .get(self.index - 1..self.index) + .ok_or(TapeError::EOF)? + .chars() + .next() + .ok_or(TapeError::EOF); + + result + } + + /// Seeks to a given index + pub async fn seek_to(&mut self, to_index: usize) -> TapeResult<()> { + while to_index >= self.buf.len() { + let mut line = String::new(); + self.inner.read_line(&mut line).await.map_err(|e| { + if e.kind() == ErrorKind::UnexpectedEof { + TapeError::EOF + } else { + TapeError::TokioIoError(e) + } + })?; + if line.is_empty() { + break; + } + self.buf.push_str(&line); + } + self.index = to_index; + + Ok(()) + } + + /// Returns the current index + pub fn index(&self) -> usize { + self.index + } +} diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..d59062a --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,40 @@ +use crate::error::TapeResult; +use crate::input_reader::InputReader; +use crate::token::{Token, TokenCheckerFn, UnknownToken}; + +pub struct Lexer { + reader: InputReader, + checkers: Vec, +} + +impl Lexer { + /// Creates a new lexer with provided checker functions + pub fn new(reader: InputReader, checkers: Vec) -> Self { + Self { reader, checkers } + } + + /// Scans for tokens + pub async fn scan(&mut self) -> TapeResult> { + let mut tokens = Vec::new(); + + while !self.reader.check_eof().await { + let index = self.reader.index(); + let mut found = false; + + for checker_fn in &self.checkers { + if let Some(token) = checker_fn.as_ref()(&mut self.reader).await? { + tokens.push(token); + found = true; + break; + } else { + self.reader.seek_to(index).await?; + } + } + if !found { + tokens.push(Token::new(UnknownToken(self.reader.consume().await?))) + } + } + + Ok(tokens) + } +} diff --git a/src/lib.rs b/src/lib.rs index d7c7077..96a4bb0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,82 +1,7 @@ -pub mod tapemachine; +pub mod error; +pub mod input_reader; +pub mod lexer; +pub mod token; #[cfg(test)] -mod tests { - use crate::tapemachine::TapeResult; - use crate::tapemachine::{CharTapeMachine, TapeError}; - - const TEST_STRING: &str = "TEST STRING 1234 \\l \\n"; - - #[test] - fn it_returns_the_next_char() { - let mut ctm = CharTapeMachine::new(TEST_STRING.chars().collect()); - let test_chars: Vec = TEST_STRING.chars().collect(); - - let mut next = ctm.next_char().unwrap(); - assert_eq!(next, *test_chars.get(1).unwrap()); - - next = ctm.next_char().unwrap(); - assert_eq!(next, *test_chars.get(2).unwrap()); - - let _ = ctm.next_char().unwrap(); - let _ = ctm.next_char().unwrap(); - let _ = ctm.next_char().unwrap(); - next = ctm.next_char().unwrap(); - assert_eq!(next, *test_chars.get(6).unwrap()); - } - - #[test] - fn it_rewinds() { - let mut ctm = CharTapeMachine::new(TEST_STRING.chars().collect()); - let test_chars: Vec = TEST_STRING.chars().collect(); - - ctm.next_char().unwrap(); - ctm.next_char().unwrap(); - assert_eq!(ctm.next_char(), Some(*test_chars.get(3).unwrap())); - - ctm.rewind(1); - assert_eq!(ctm.next_char(), Some(*test_chars.get(2).unwrap())); - } - - #[test] - fn it_seeks() { - let mut ctm = CharTapeMachine::new(TEST_STRING.chars().collect()); - let test_chars: Vec = TEST_STRING.chars().collect(); - - assert_eq!(ctm.next_char(), Some(*test_chars.get(1).unwrap())); - ctm.seek_one().unwrap(); - assert_eq!(ctm.next_char(), Some(*test_chars.get(3).unwrap())); - ctm.seek_one().unwrap(); - ctm.seek_whitespace(); - assert_eq!(ctm.next_char(), Some(*test_chars.get(6).unwrap())); - } - - #[test] - fn it_asserts_chars() -> TapeResult<()> { - let mut ctm = CharTapeMachine::new(TEST_STRING.chars().collect()); - ctm.assert_any(&['A', 'B', 'T'], None)?; - ctm.seek_one().unwrap(); - ctm.assert_char(&'E', None)?; - ctm.seek_one().unwrap(); - ctm.assert_str_sequence("ST ", None)?; - ctm.seek_one().unwrap(); - ctm.assert_any_sequence(&[&['C'], &['A'], &['A', 'B'], &['S', 'T', 'R']], None)?; - - if let Ok(_) = - ctm.assert_any_sequence(&[&['C'], &['A'], &['A', 'B'], &['S', 'T', 'R']], None) - { - Err(TapeError::new(0)) - } else { - Ok(()) - } - } - - #[test] - fn it_checks_eof() -> TapeResult<()> { - let mut ctm = CharTapeMachine::new(TEST_STRING.chars().collect()); - let _ = ctm.get_string_until_any(&['n'], &[]); - assert!(ctm.check_eof()); - - Ok(()) - } -} +mod tests; diff --git a/src/tapemachine.rs b/src/tapemachine.rs deleted file mode 100644 index 0868682..0000000 --- a/src/tapemachine.rs +++ /dev/null @@ -1,424 +0,0 @@ -use std::error::Error; -use std::fmt::{self, Display, Formatter}; - -#[derive(Debug)] -pub struct TapeError { - index: usize, -} - -impl TapeError { - pub fn new(index: usize) -> Self { - Self { index } - } - - /// Returns the index the error occured on - pub fn get_index(&self) -> usize { - self.index - } -} - -impl Display for TapeError { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "Tape Error at: {}", self.index) - } -} - -impl Error for TapeError {} - -pub type TapeResult = Result; - -const ESCAPE: char = '\\'; - -pub struct CharTapeMachine { - index: usize, - text: Vec, - current_char: char, - previous_char: char, -} - -impl CharTapeMachine { - pub fn new(text: Vec) -> Self { - let current_char = if text.len() > 0 { - *text.first().unwrap() - } else { - ' ' - }; - Self { - text, - index: 0, - previous_char: current_char, - current_char, - } - } - - #[inline] - pub fn get_text(&self) -> Vec { - self.text.clone() - } - - #[inline] - pub fn get_index(&self) -> usize { - self.index - } - - /// returns the current char - #[inline] - pub fn get_current(&self) -> char { - self.current_char - } - - /// Creates an error at the current position - #[inline] - pub fn err(&self) -> TapeError { - TapeError::new(self.index) - } - - /// Returns the next char - /// if there is any - pub fn next_char(&mut self) -> Option { - if self.index < self.text.len() { - self.index += 1; - self.previous_char = self.current_char; - self.current_char = *self.text.get(self.index)?; - - Some(self.current_char) - } else { - None - } - } - - /// Peeks the next available char - #[inline] - pub fn peek_char(&mut self) -> Option { - Some(*self.text.get(self.index + 1)?) - } - - /// Rewinds to a given index - #[inline] - pub fn rewind(&mut self, index: usize) { - if self.text.len() > index { - self.index = index; - self.current_char = *self.text.get(index).unwrap(); - if self.index > 0 { - self.previous_char = *self.text.get(index - 1).unwrap(); - } - } - } - - /// Rewinds to a given index and returns an error - #[inline] - pub fn rewind_with_error(&mut self, index: usize) -> TapeError { - self.rewind(index); - TapeError::new(index) - } - - /// Seeks one character or returns an error - /// if there is no next character - #[inline] - pub fn seek_one(&mut self) -> TapeResult<()> { - if let Some(_) = self.next_char() { - Ok(()) - } else { - Err(TapeError::new(self.index)) - } - } - - /// Seeks one character and returns - /// if it seeked or an error occurred - #[inline] - pub fn try_seek(&mut self) -> bool { - self.seek_one().is_ok() - } - - /// Seeks any character of the given group until none is encountered anymore - pub fn seek_any(&mut self, chars: &[char]) -> TapeResult<()> { - while self.check_any(chars) { - self.seek_one()?; - } - - Ok(()) - } - - /// Seeks until it encounters a non whitespace character - pub fn seek_whitespace(&mut self) { - if self.current_char.is_whitespace() { - while let Some(next) = self.next_char() { - if !next.is_whitespace() || self.check_escaped() { - break; - } - } - } - } - - /// Checks if the machine has reached the eof - pub fn check_eof(&self) -> bool { - self.index >= self.text.len() - } - - /// checks if the current char is escaped - #[inline] - pub fn check_escaped(&mut self) -> bool { - let start = self.index; - - let escaped = if self.previous_char == ESCAPE { - self.rewind(start - 1); - !self.check_escaped() - } else { - false - }; - self.rewind(start); - - escaped - } - - /// Returns true if the given character is equal to the current one - /// and the current character is not escaped - #[inline] - pub fn check_char(&mut self, value: &char) -> bool { - self.current_char == *value && !self.check_escaped() - } - - /// Checks if one of the given chars matches the current one - #[inline] - pub fn check_any(&mut self, chars: &[char]) -> bool { - !self.check_escaped() && chars.contains(&self.current_char) - } - - /// checks if the next characters match a given sequence of characters - pub fn check_sequence(&mut self, sequence: &[char]) -> bool { - let start_index = self.index; - - if self.check_escaped() { - self.rewind(start_index); - - false - } else { - for sq_character in sequence { - if self.current_char != *sq_character { - self.rewind(start_index); - return false; - } - if self.next_char() == None { - self.rewind(start_index); - return false; - } - } - if self.index > 0 { - self.rewind(self.index - 1); - } - true - } - } - - // checks if the next characters mach a string sequence - pub fn check_str_sequence(&mut self, sequence: &str) -> bool { - let start_index = self.index; - - if self.check_escaped() { - self.rewind(start_index); - - false - } else { - let matches = sequence.chars().all(|sq_character| { - if self.current_char != sq_character { - self.rewind(start_index); - return false; - } - if self.next_char() == None { - self.rewind(start_index); - return false; - } - true - }); - if !matches { - false - } else { - if self.index > 0 { - self.rewind(self.index - 1); - } - true - } - } - } - - /// checks if the next characters match any given sequence - #[inline] - pub fn check_any_sequence(&mut self, sequences: &[&[char]]) -> bool { - for seq in sequences { - if self.check_sequence(*seq) { - return true; - } - } - - false - } - - /// checks if the next characters match any given sequence of strings - #[inline] - pub fn check_any_str_sequence(&mut self, sequences: &[&str]) -> bool { - for str_seq in sequences { - if self.check_str_sequence(str_seq) { - return true; - } - } - - false - } - - /// returns an error on the current position and optionally rewinds - /// if a rewind index is given - #[inline] - pub fn assert_error(&mut self, rewind_index: Option) -> TapeError { - if let Some(index) = rewind_index { - self.rewind_with_error(index) - } else { - TapeError::new(self.index) - } - } - - /// returns an error if the given char doesn't match the current one and rewinds - /// if a rewind index is given - #[inline] - pub fn assert_char(&mut self, value: &char, rewind_index: Option) -> TapeResult<()> { - if self.check_char(value) { - Ok(()) - } else { - Err(self.assert_error(rewind_index)) - } - } - - /// returns an error if the current char doesn't match any of the given group - #[inline] - pub fn assert_any(&mut self, chars: &[char], rewind_index: Option) -> TapeResult<()> { - if self.check_any(chars) { - Ok(()) - } else { - Err(self.assert_error(rewind_index)) - } - } - - /// returns an error if the next chars don't match a special sequence - #[inline] - pub fn assert_sequence( - &mut self, - sequence: &[char], - rewind_index: Option, - ) -> TapeResult<()> { - if self.check_sequence(sequence) { - Ok(()) - } else { - Err(self.assert_error(rewind_index)) - } - } - - /// returns an error if the next chars don't match a special sequence - #[inline] - pub fn assert_str_sequence( - &mut self, - sequence: &str, - rewind_index: Option, - ) -> TapeResult<()> { - if self.check_str_sequence(sequence) { - Ok(()) - } else { - Err(self.assert_error(rewind_index)) - } - } - - /// returns an error if the next chars don't match any given sequence - pub fn assert_any_sequence( - &mut self, - sequences: &[&[char]], - rewind_index: Option, - ) -> TapeResult<()> { - if self.check_any_sequence(sequences) { - Ok(()) - } else { - Err(self.assert_error(rewind_index)) - } - } - - /// returns an error if the next chars don't match any given sequence - pub fn assert_any_str_sequence( - &mut self, - sequences: &[&str], - rewind_index: Option, - ) -> TapeResult<()> { - if self.check_any_str_sequence(sequences) { - Ok(()) - } else { - Err(self.assert_error(rewind_index)) - } - } - - /// returns the string until any given character is matched is matched. - /// rewinds with error if it encounters a character form the error group - #[inline] - pub fn get_string_until_any(&mut self, until: &[char], err_at: &[char]) -> TapeResult { - let start_index = self.index; - - self.get_string_until_any_or_rewind(until, err_at, start_index) - } - - /// Returns the string until it encounters a given sequence or rewinds with error - /// if it encounters an err sequence - pub fn get_string_until_sequence( - &mut self, - until: &[&[char]], - err_at: &[&[char]], - ) -> Result { - let start_index = self.index; - let mut result = String::new(); - - if self.check_any_sequence(until) { - return Ok(result); - } else if self.check_any_sequence(err_at) { - return Err(TapeError::new(self.index)); - } - - result.push(self.current_char); - while let Some(ch) = self.next_char() { - if self.check_any_sequence(until) || self.check_any_sequence(err_at) { - break; - } - result.push(ch); - } - - if self.check_any_sequence(err_at) { - Err(self.rewind_with_error(start_index)) - } else { - Ok(result) - } - } - - /// returns the string until a special char is found - /// or rewinds if an err_at char is found - pub fn get_string_until_any_or_rewind( - &mut self, - until: &[char], - err_at: &[char], - rewind_index: usize, - ) -> TapeResult { - let mut result = String::new(); - - if self.check_any(until) { - return Ok(result); - } else if self.check_any(err_at) { - return Err(self.rewind_with_error(rewind_index)); - } - - result.push(self.current_char); - while let Some(ch) = self.next_char() { - if self.check_any(until) || self.check_any(err_at) { - break; - } - result.push(ch); - } - - if self.check_any(err_at) { - Err(self.rewind_with_error(rewind_index)) - } else { - Ok(result) - } - } -} diff --git a/src/tests/mod.rs b/src/tests/mod.rs new file mode 100644 index 0000000..90b79ae --- /dev/null +++ b/src/tests/mod.rs @@ -0,0 +1,3 @@ +mod test_input; +mod test_lexer; +mod test_token; diff --git a/src/tests/test_input.rs b/src/tests/test_input.rs new file mode 100644 index 0000000..703c43e --- /dev/null +++ b/src/tests/test_input.rs @@ -0,0 +1,41 @@ +use crate::error::{TapeError, TapeResult}; +use crate::input_reader::InputReader; +use std::io::Cursor; + +fn get_reader() -> InputReader { + let data = "ABCDEFG HIJKLMNOP 12345567890\nSecond Line\n\n"; + InputReader::new(Cursor::new(data)) +} + +#[tokio::test] +async fn it_peeks() { + let mut reader = get_reader(); + assert_eq!(reader.peek().await.unwrap(), 'A'); + assert_eq!(reader.peek().await.unwrap(), 'A'); + assert_eq!(reader.peek().await.unwrap(), 'A'); +} + +#[tokio::test] +async fn it_consumes() { + let mut reader = get_reader(); + assert_eq!(reader.consume().await.unwrap(), 'A'); + assert_eq!(reader.consume().await.unwrap(), 'B'); + assert_eq!(reader.consume().await.unwrap(), 'C'); +} + +#[tokio::test] +async fn it_checks_for_eof() { + let mut reader = get_reader(); + assert!(!is_eof(reader.seek_to(29).await)); + assert!(!reader.check_eof().await); + assert!(!is_eof(reader.seek_to(47).await)); + assert!(is_eof(reader.consume().await.map(|_| ()))); + assert!(reader.check_eof().await); +} + +fn is_eof(result: TapeResult<()>) -> bool { + match result { + Err(TapeError::EOF) => true, + _ => false, + } +} diff --git a/src/tests/test_lexer.rs b/src/tests/test_lexer.rs new file mode 100644 index 0000000..b1362de --- /dev/null +++ b/src/tests/test_lexer.rs @@ -0,0 +1,93 @@ +use crate::error::TapeResult; +use crate::input_reader::InputReader; +use crate::lexer::Lexer; +use crate::token::{Token, TokenCheckerFn, UnknownToken}; +use std::io::Cursor; +use std::sync::Arc; + +struct NumberToken(i32); +struct StringToken(String); +struct WhiteSpaceToken; + +async fn parse_number_token(reader: &mut InputReader) -> TapeResult> { + let mut num = String::new(); + while !reader.check_eof().await && reader.peek().await?.is_numeric() { + num.push(reader.consume().await?); + } + if num.is_empty() { + Ok(None) + } else { + Ok(Some(Token::new(NumberToken(num.parse::().unwrap())))) + } +} + +async fn parse_whitespace_token(reader: &mut InputReader) -> TapeResult> { + let mut count = 0; + while !reader.check_eof().await && reader.peek().await?.is_whitespace() { + reader.consume().await?; + count += 1; + } + if count > 0 { + Ok(Some(Token::new(WhiteSpaceToken))) + } else { + Ok(None) + } +} + +async fn parse_string_token(reader: &mut InputReader) -> TapeResult> { + let mut value = String::new(); + while !reader.check_eof().await + && !reader.peek().await?.is_numeric() + && !reader.peek().await?.is_whitespace() + { + value.push(reader.consume().await?); + } + if value.is_empty() { + Ok(None) + } else { + Ok(Some(Token::new(StringToken(value)))) + } +} + +#[tokio::test] +async fn it_scans() { + let checkers: Vec = vec![ + Arc::new(|reader| Box::pin(parse_number_token(reader))), + Arc::new(|reader| Box::pin(parse_whitespace_token(reader))), + Arc::new(|reader| Box::pin(parse_string_token(reader))), + ]; + let input_reader = InputReader::new(Cursor::new("The Alphabet 12 ok")); + let mut lexer = Lexer::new(input_reader, checkers); + + let tokens = lexer.scan().await.unwrap(); + assert!(!tokens.is_empty()); + + let mut tokens = tokens.into_iter(); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); +} + +#[tokio::test] +async fn it_falls_back_to_unknown() { + let checkers: Vec = vec![ + Arc::new(|reader| Box::pin(parse_number_token(reader))), + Arc::new(|reader| Box::pin(parse_string_token(reader))), + ]; + let input_reader = InputReader::new(Cursor::new("The Alphabet 12 ok")); + let mut lexer = Lexer::new(input_reader, checkers); + let tokens = lexer.scan().await.unwrap(); + assert!(!tokens.is_empty()); + let mut tokens = tokens.into_iter(); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); +} diff --git a/src/tests/test_token.rs b/src/tests/test_token.rs new file mode 100644 index 0000000..16ce5e3 --- /dev/null +++ b/src/tests/test_token.rs @@ -0,0 +1,52 @@ +use crate::error::TapeResult; +use crate::input_reader::InputReader; +use crate::token::Token; +use std::io::Cursor; + +#[derive(Debug)] +struct TestToken(i32); + +async fn parse_test_token(reader: &mut InputReader) -> TapeResult> { + let mut num = String::new(); + while !reader.check_eof().await && reader.peek().await?.is_numeric() { + num.push(reader.consume().await?); + } + if num.is_empty() { + Ok(None) + } else { + Ok(Some(Token::new(TestToken(num.parse::().unwrap())))) + } +} + +#[tokio::test] +async fn it_parses() { + let mut reader = InputReader::new(Cursor::new("128")); + let token = parse_test_token(&mut reader).await.unwrap(); + assert!(token.is_some()); + let token = token.unwrap().try_into::().unwrap(); + assert_eq!(token.0, 128); + + let mut reader = InputReader::new(Cursor::new("string a12 24\n")); + let token = parse_test_token(&mut reader).await.unwrap(); + assert!(token.is_none()); + reader.seek_to(8).await.unwrap(); + + let token = parse_test_token(&mut reader).await.unwrap(); + assert!(token.is_some()); + let token = token.unwrap().try_into::().unwrap(); + assert_eq!(token.0, 12); +} + +#[test] +fn it_converts() { + let token = Token::new(TestToken(12)); + assert!(token.is::()); + + let test_token = token.try_as::(); + assert!(test_token.is_some()); + assert_eq!(test_token.unwrap().0, 12); + + let test_token = token.try_into::(); + assert!(test_token.is_some()); + assert_eq!(test_token.unwrap().0, 12); +} diff --git a/src/token.rs b/src/token.rs new file mode 100644 index 0000000..0b8973f --- /dev/null +++ b/src/token.rs @@ -0,0 +1,47 @@ +use crate::error::TapeResult; +use crate::input_reader::InputReader; +use std::any::{Any, TypeId}; +use std::future::Future; +use std::pin::Pin; +use std::sync::Arc; + +pub type TokenCheckerFn = Arc< + dyn for<'a> Fn( + &'a mut InputReader, + ) -> Pin>> + Send + 'a>> + + Send + + Sync, +>; + +pub struct Token { + inner: Box, +} + +impl Token { + /// Constructs a new token + pub fn new(inner: A) -> Self { + Self { + inner: Box::new(inner), + } + } + + /// Tries downcasting the value to a concrete type + pub fn try_as(&self) -> Option<&T> { + self.inner.downcast_ref::() + } + + pub fn try_into(self) -> Option { + match self.inner.downcast() { + Ok(value) => Some(*value), + Err(_) => None, + } + } + + /// Checks if the inner value is of a given concrete type + pub fn is(&self) -> bool { + self.inner.as_ref().type_id() == TypeId::of::() + } +} + +/// Parsed when no other matching token was found for the character +pub struct UnknownToken(pub char);