Add token reader and access to last token in checker function

Signed-off-by: trivernis <trivernis@protonmail.com>
master
trivernis 4 years ago
parent 652045becb
commit 8cb21fcaca
Signed by: Trivernis
GPG Key ID: DFFFCC2C7A02DB45

@ -1,6 +1,6 @@
[package] [package]
name = "charred" name = "charred"
version = "2.0.0" version = "2.1.0"
authors = ["trivernis <trivernis@protonmail.com>"] authors = ["trivernis <trivernis@protonmail.com>"]
edition = "2018" edition = "2018"
license-file = "LICENSE" license-file = "LICENSE"

@ -1,6 +1,6 @@
use crate::error::TapeResult; use crate::error::TapeResult;
use crate::input_reader::InputReader; use crate::input_reader::InputReader;
use crate::token::{Token, TokenCheckerFn, UnknownToken}; use crate::token::{EOFToken, Token, TokenCheckerFn, UnknownToken};
pub struct Lexer { pub struct Lexer {
reader: InputReader, reader: InputReader,
@ -22,7 +22,7 @@ impl Lexer {
let mut found = false; let mut found = false;
for checker_fn in &self.checkers { for checker_fn in &self.checkers {
if let Some(token) = checker_fn.as_ref()(&mut self.reader).await? { if let Some(token) = checker_fn.as_ref()(&mut self.reader, tokens.last()).await? {
tokens.push(token); tokens.push(token);
found = true; found = true;
break; break;
@ -34,6 +34,7 @@ impl Lexer {
tokens.push(Token::new(UnknownToken(self.reader.consume().await?))) tokens.push(Token::new(UnknownToken(self.reader.consume().await?)))
} }
} }
tokens.push(Token::new(EOFToken));
Ok(tokens) Ok(tokens)
} }

@ -2,6 +2,7 @@ pub mod error;
pub mod input_reader; pub mod input_reader;
pub mod lexer; pub mod lexer;
pub mod token; pub mod token;
pub mod token_reader;
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;

@ -1,3 +1,4 @@
mod test_input; mod test_input;
mod test_lexer; mod test_lexer;
mod test_token; mod test_token;
mod test_token_reader;

@ -52,9 +52,9 @@ async fn parse_string_token(reader: &mut InputReader) -> TapeResult<Option<Token
#[tokio::test] #[tokio::test]
async fn it_scans() { async fn it_scans() {
let checkers: Vec<TokenCheckerFn> = vec![ let checkers: Vec<TokenCheckerFn> = vec![
Arc::new(|reader| Box::pin(parse_number_token(reader))), Arc::new(|reader, _| Box::pin(parse_number_token(reader))),
Arc::new(|reader| Box::pin(parse_whitespace_token(reader))), Arc::new(|reader, _| Box::pin(parse_whitespace_token(reader))),
Arc::new(|reader| Box::pin(parse_string_token(reader))), Arc::new(|reader, _| Box::pin(parse_string_token(reader))),
]; ];
let input_reader = InputReader::new(Cursor::new("The Alphabet 12 ok")); let input_reader = InputReader::new(Cursor::new("The Alphabet 12 ok"));
let mut lexer = Lexer::new(input_reader, checkers); let mut lexer = Lexer::new(input_reader, checkers);
@ -75,8 +75,8 @@ async fn it_scans() {
#[tokio::test] #[tokio::test]
async fn it_falls_back_to_unknown() { async fn it_falls_back_to_unknown() {
let checkers: Vec<TokenCheckerFn> = vec![ let checkers: Vec<TokenCheckerFn> = vec![
Arc::new(|reader| Box::pin(parse_number_token(reader))), Arc::new(|reader, _| Box::pin(parse_number_token(reader))),
Arc::new(|reader| Box::pin(parse_string_token(reader))), Arc::new(|reader, _| Box::pin(parse_string_token(reader))),
]; ];
let input_reader = InputReader::new(Cursor::new("The Alphabet 12 ok")); let input_reader = InputReader::new(Cursor::new("The Alphabet 12 ok"));
let mut lexer = Lexer::new(input_reader, checkers); let mut lexer = Lexer::new(input_reader, checkers);

@ -0,0 +1,57 @@
use crate::token::{EOFToken, Token};
use crate::token_reader::TokenReader;
struct AToken;
struct BToken;
struct CToken;
fn get_reader() -> TokenReader {
let tokens = vec![
Token::new(AToken),
Token::new(BToken),
Token::new(AToken),
Token::new(CToken),
Token::new(CToken),
Token::new(EOFToken),
];
TokenReader::new(tokens)
}
#[test]
fn peek_does_not_consume() {
let reader = get_reader();
assert!(reader.peek_is::<AToken>());
assert!(!reader.peek_is::<BToken>());
assert!(reader.peek_is::<AToken>());
}
#[test]
fn consume_does_consume() {
let mut reader = get_reader();
assert!(reader.consume_as::<AToken>().is_some());
assert!(reader.consume_as::<BToken>().is_some());
assert!(reader.consume_as::<AToken>().is_some());
assert!(reader.consume_as::<CToken>().is_some());
}
#[test]
fn check_eof_works() {
let mut reader = get_reader();
reader.seek(4);
assert!(!reader.check_eof());
reader.seek(5);
assert!(reader.check_eof());
}
#[test]
fn peek_and_consume_returns_eof_on_input_end() {
let mut reader = get_reader();
reader.seek(4);
assert!(reader.consume_as::<EOFToken>().is_none());
assert!(reader.consume_as::<EOFToken>().is_some());
assert!(reader.consume_as::<EOFToken>().is_some());
assert!(reader.consume_as::<EOFToken>().is_some());
reader.seek(0);
assert!(reader.consume_as::<EOFToken>().is_none());
}

@ -8,6 +8,7 @@ use std::sync::Arc;
pub type TokenCheckerFn = Arc< pub type TokenCheckerFn = Arc<
dyn for<'a> Fn( dyn for<'a> Fn(
&'a mut InputReader, &'a mut InputReader,
Option<&'a Token>, // last token
) -> Pin<Box<dyn Future<Output = TapeResult<Option<Token>>> + Send + 'a>> ) -> Pin<Box<dyn Future<Output = TapeResult<Option<Token>>> + Send + 'a>>
+ Send + Send
+ Sync, + Sync,
@ -45,3 +46,6 @@ impl Token {
/// Parsed when no other matching token was found for the character /// Parsed when no other matching token was found for the character
pub struct UnknownToken(pub char); pub struct UnknownToken(pub char);
/// End Of File Token
pub struct EOFToken;

@ -0,0 +1,64 @@
use crate::token::{EOFToken, Token};
pub struct TokenReader {
tokens: Vec<Token>,
index: usize,
eof: Token,
}
impl TokenReader {
/// Creates a new token reader
pub fn new(mut tokens: Vec<Token>) -> Self {
if tokens.last().is_none() || !tokens.last().unwrap().is::<EOFToken>() {
// ensure that the last token always is an EOF Token
tokens.push(Token::new(EOFToken));
}
Self {
tokens,
index: 0,
eof: Token::new(EOFToken),
}
}
/// Peeks the next token
#[inline]
pub fn peek(&self) -> &Token {
self.tokens.get(self.index).unwrap_or(&self.eof)
}
/// Checks if the next token is of a specific type without consuming it
#[inline]
pub fn peek_is<T: 'static>(&self) -> bool {
self.peek().is::<T>()
}
/// Peeks the next token and tries to return is as a concrete type
#[inline]
pub fn peek_as<T: 'static>(&self) -> Option<&T> {
self.peek().try_as::<T>()
}
/// Consumes the next token and returns it
pub fn consume(&mut self) -> &Token {
self.index += 1;
self.tokens.get(self.index - 1).unwrap_or(&self.eof)
}
/// Consumes the next token and tries to return it as the specified type
#[inline]
pub fn consume_as<T: 'static>(&mut self) -> Option<&T> {
self.consume().try_as::<T>()
}
/// Seeks to the given index
#[inline]
pub fn seek(&mut self, to_index: usize) {
self.index = to_index
}
/// Returns if EOF has been reached
#[inline]
pub fn check_eof(&self) -> bool {
self.peek_is::<EOFToken>()
}
}
Loading…
Cancel
Save