Add lexer

Signed-off-by: trivernis <trivernis@protonmail.com>
pull/1/head
trivernis 3 years ago
parent 7298b79c5a
commit 554e3f353d
Signed by: Trivernis
GPG Key ID: DFFFCC2C7A02DB45

@ -14,7 +14,6 @@ repository = "https://github.com/Trivernis/charred-rs"
[dependencies]
thiserror = "1.0.24"
log = "0.4.14"
async-trait = "0.1.50"
[dependencies.tokio]
version = "1.5.0"

@ -78,4 +78,9 @@ impl InputReader {
Ok(())
}
/// Returns the current index
pub fn index(&self) -> usize {
self.index
}
}

@ -0,0 +1,40 @@
use crate::error::TapeResult;
use crate::input_reader::InputReader;
use crate::token::{Token, TokenCheckerFn, UnknownToken};
pub struct Lexer {
reader: InputReader,
checkers: Vec<TokenCheckerFn>,
}
impl Lexer {
/// Creates a new lexer with provided checker functions
pub fn new(reader: InputReader, checkers: Vec<TokenCheckerFn>) -> Self {
Self { reader, checkers }
}
/// Scans for tokens
pub async fn scan(&mut self) -> TapeResult<Vec<Token>> {
let mut tokens = Vec::new();
while !self.reader.check_eof().await {
let index = self.reader.index();
let mut found = false;
for checker_fn in &self.checkers {
if let Some(token) = checker_fn.as_ref()(&mut self.reader).await? {
tokens.push(token);
found = true;
break;
} else {
self.reader.seek_to(index).await?;
}
}
if !found {
tokens.push(Token::new(UnknownToken(self.reader.consume().await?)))
}
}
Ok(tokens)
}
}

@ -1,5 +1,6 @@
pub mod error;
pub mod input_reader;
pub mod lexer;
pub mod token;
#[cfg(test)]

@ -1,2 +1,3 @@
mod test_input;
mod test_lexer;
mod test_token;

@ -0,0 +1,91 @@
use crate::error::TapeResult;
use crate::input_reader::InputReader;
use crate::lexer::Lexer;
use crate::token::{Token, TokenCheckerFn, UnknownToken};
use std::io::Cursor;
use std::sync::Arc;
struct NumberToken(i32);
struct StringToken(String);
struct WhiteSpaceToken;
async fn parse_number_token(reader: &mut InputReader) -> TapeResult<Option<Token>> {
let mut num = String::new();
while !reader.check_eof().await && reader.peek().await?.is_numeric() {
num.push(reader.consume().await?);
}
if num.is_empty() {
Ok(None)
} else {
Ok(Some(Token::new(NumberToken(num.parse::<i32>().unwrap()))))
}
}
async fn parse_whitespace_token(reader: &mut InputReader) -> TapeResult<Option<Token>> {
let mut count = 0;
while !reader.check_eof().await && reader.peek().await?.is_whitespace() {
reader.consume().await?;
count += 1;
}
if count > 0 {
Ok(Some(Token::new(WhiteSpaceToken)))
} else {
Ok(None)
}
}
async fn parse_string_token(reader: &mut InputReader) -> TapeResult<Option<Token>> {
let mut value = String::new();
while !reader.check_eof().await
&& !reader.peek().await?.is_numeric()
&& !reader.peek().await?.is_whitespace()
{
value.push(reader.consume().await?);
}
if value.is_empty() {
Ok(None)
} else {
Ok(Some(Token::new(StringToken(value))))
}
}
#[tokio::test]
async fn it_scans() {
let checkers: Vec<TokenCheckerFn> = vec![
Arc::new(|reader| Box::pin(parse_number_token(reader))),
Arc::new(|reader| Box::pin(parse_whitespace_token(reader))),
Arc::new(|reader| Box::pin(parse_string_token(reader))),
];
let input_reader = InputReader::new(Cursor::new("The Alphabet 12 ok"));
let mut lexer = Lexer::new(input_reader, checkers);
let tokens = lexer.scan().await.unwrap();
assert!(!tokens.is_empty());
let mut tokens = tokens.into_iter();
assert!(tokens.next().unwrap().is::<StringToken>());
assert!(tokens.next().unwrap().is::<WhiteSpaceToken>());
assert!(tokens.next().unwrap().is::<StringToken>());
assert!(tokens.next().unwrap().is::<WhiteSpaceToken>());
assert!(tokens.next().unwrap().is::<NumberToken>());
assert!(tokens.next().unwrap().is::<WhiteSpaceToken>());
assert!(tokens.next().unwrap().is::<StringToken>());
}
#[tokio::test]
async fn it_falls_back_to_unknown() {
let checkers: Vec<TokenCheckerFn> = vec![
Arc::new(|reader| Box::pin(parse_number_token(reader))),
Arc::new(|reader| Box::pin(parse_string_token(reader))),
];
let input_reader = InputReader::new(Cursor::new("The Alphabet 12 ok"));
let mut lexer = Lexer::new(input_reader, checkers);
let tokens = lexer.scan().await.unwrap();
assert!(!tokens.is_empty());
let mut tokens = tokens.into_iter();
assert!(tokens.next().unwrap().is::<StringToken>());
assert!(tokens.next().unwrap().is::<UnknownToken>());
assert!(tokens.next().unwrap().is::<StringToken>());
assert!(tokens.next().unwrap().is::<UnknownToken>());
assert!(tokens.next().unwrap().is::<NumberToken>());
assert!(tokens.next().unwrap().is::<UnknownToken>());
assert!(tokens.next().unwrap().is::<StringToken>());
}

@ -1,41 +1,37 @@
use crate::error::TapeResult;
use crate::input_reader::InputReader;
use crate::token::{ProtoToken, Token};
use async_trait::async_trait;
use crate::token::Token;
use std::io::Cursor;
#[derive(Debug)]
struct TestToken(i32);
#[async_trait]
impl ProtoToken for TestToken {
async fn try_parse(reader: &mut InputReader) -> TapeResult<Option<Token>> {
let mut num = String::new();
while !reader.check_eof().await && reader.peek().await?.is_numeric() {
num.push(reader.consume().await?);
}
if num.is_empty() {
Ok(None)
} else {
Ok(Some(Token::new(TestToken(num.parse::<i32>().unwrap()))))
}
async fn parse_test_token(reader: &mut InputReader) -> TapeResult<Option<Token>> {
let mut num = String::new();
while !reader.check_eof().await && reader.peek().await?.is_numeric() {
num.push(reader.consume().await?);
}
if num.is_empty() {
Ok(None)
} else {
Ok(Some(Token::new(TestToken(num.parse::<i32>().unwrap()))))
}
}
#[tokio::test]
async fn it_parses() {
let mut reader = InputReader::new(Cursor::new("128"));
let token = TestToken::try_parse(&mut reader).await.unwrap();
let token = parse_test_token(&mut reader).await.unwrap();
assert!(token.is_some());
let token = token.unwrap().try_into::<TestToken>().unwrap();
assert_eq!(token.0, 128);
let mut reader = InputReader::new(Cursor::new("string a12 24\n"));
let token = TestToken::try_parse(&mut reader).await.unwrap();
let token = parse_test_token(&mut reader).await.unwrap();
assert!(token.is_none());
reader.seek_to(8).await.unwrap();
let token = TestToken::try_parse(&mut reader).await.unwrap();
let token = parse_test_token(&mut reader).await.unwrap();
assert!(token.is_some());
let token = token.unwrap().try_into::<TestToken>().unwrap();
assert_eq!(token.0, 12);

@ -1,13 +1,17 @@
use crate::error::TapeResult;
use crate::input_reader::InputReader;
use async_trait::async_trait;
use std::any::{Any, TypeId};
use std::future::Future;
use std::pin::Pin;
use std::sync::Arc;
#[async_trait]
pub trait ProtoToken {
/// Tries parsing the token
async fn try_parse(reader: &mut InputReader) -> TapeResult<Option<Token>>;
}
pub type TokenCheckerFn = Arc<
dyn for<'a> Fn(
&'a mut InputReader,
) -> Pin<Box<dyn Future<Output = TapeResult<Option<Token>>> + Send + 'a>>
+ Send
+ Sync,
>;
pub struct Token {
inner: Box<dyn Any>,
@ -38,3 +42,6 @@ impl Token {
self.inner.as_ref().type_id() == TypeId::of::<T>()
}
}
/// Parsed when no other matching token was found for the character
pub struct UnknownToken(pub char);

Loading…
Cancel
Save