commit
652045becb
@ -1,3 +1,86 @@
|
||||
# Charred
|
||||
|
||||
This library provides a CharTapeMachine that can be used for building parsers with direct string access rather than token stream access.
|
||||
This crate provides a generic asynchronous lexer that operates on files with tokio.
|
||||
Tokens are parsed with provided async closures.
|
||||
|
||||
## Usage
|
||||
|
||||
```rust
|
||||
use crate::error::TapeResult;
|
||||
use crate::input_reader::InputReader;
|
||||
use crate::lexer::Lexer;
|
||||
use crate::token::{Token, TokenCheckerFn};
|
||||
use std::io::Cursor;
|
||||
use std::sync::Arc;
|
||||
|
||||
struct NumberToken(i32);
|
||||
struct StringToken(String);
|
||||
struct WhiteSpaceToken;
|
||||
|
||||
async fn parse_number_token(reader: &mut InputReader) -> TapeResult<Option<Token>> {
|
||||
let mut num = String::new();
|
||||
while !reader.check_eof().await && reader.peek().await?.is_numeric() {
|
||||
num.push(reader.consume().await?);
|
||||
}
|
||||
if num.is_empty() {
|
||||
Ok(None)
|
||||
} else {
|
||||
Ok(Some(Token::new(NumberToken(num.parse::<i32>().unwrap()))))
|
||||
}
|
||||
}
|
||||
|
||||
async fn parse_whitespace_token(reader: &mut InputReader) -> TapeResult<Option<Token>> {
|
||||
let mut count = 0;
|
||||
while !reader.check_eof().await && reader.peek().await?.is_whitespace() {
|
||||
reader.consume().await?;
|
||||
count += 1;
|
||||
}
|
||||
if count > 0 {
|
||||
Ok(Some(Token::new(WhiteSpaceToken)))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
async fn parse_string_token(reader: &mut InputReader) -> TapeResult<Option<Token>> {
|
||||
let mut value = String::new();
|
||||
while !reader.check_eof().await
|
||||
&& !reader.peek().await?.is_numeric()
|
||||
&& !reader.peek().await?.is_whitespace()
|
||||
{
|
||||
value.push(reader.consume().await?);
|
||||
}
|
||||
if value.is_empty() {
|
||||
Ok(None)
|
||||
} else {
|
||||
Ok(Some(Token::new(StringToken(value))))
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
// functions that try to parse the token into an object
|
||||
let checkers: Vec<TokenCheckerFn> = vec![
|
||||
Arc::new(|reader| Box::pin(parse_number_token(reader))),
|
||||
Arc::new(|reader| Box::pin(parse_whitespace_token(reader))),
|
||||
Arc::new(|reader| Box::pin(parse_string_token(reader))),
|
||||
];
|
||||
// input reader encapsulates (almost) any type that implements AsyncBufRead
|
||||
let input_reader = InputReader::new(Cursor::new("Word 12"));
|
||||
let mut lexer = Lexer::new(input_reader, checkers);
|
||||
|
||||
// scan starts scanning the provided input
|
||||
let tokens = lexer.scan().await.unwrap();
|
||||
assert!(!tokens.is_empty());
|
||||
|
||||
let mut tokens = tokens.into_iter();
|
||||
// use the is, try_as and try_into methods on the token type to get the underlying value
|
||||
assert!(tokens.next().unwrap().is::<StringToken>());
|
||||
assert!(tokens.next().unwrap().is::<WhiteSpaceToken>());
|
||||
assert!(tokens.next().unwrap().is::<NumberToken>());
|
||||
}
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
Apache-2.0
|
||||
|
@ -0,0 +1,12 @@
|
||||
use thiserror::Error;
|
||||
|
||||
pub type TapeResult<T> = Result<T, TapeError>;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum TapeError {
|
||||
#[error("IO Error: {0}")]
|
||||
TokioIoError(#[from] tokio::io::Error),
|
||||
|
||||
#[error("Unexpected EOF")]
|
||||
EOF,
|
||||
}
|
@ -0,0 +1,86 @@
|
||||
use crate::error::{TapeError, TapeResult};
|
||||
use std::io::ErrorKind;
|
||||
use tokio::io::{AsyncBufRead, AsyncBufReadExt};
|
||||
|
||||
/// An Input reader to asynchronously read a type
|
||||
/// that implements AsyncBufRead and AsyncSeek.
|
||||
pub struct InputReader {
|
||||
inner: Box<dyn AsyncBufRead + Unpin + Send>,
|
||||
buf: String,
|
||||
index: usize,
|
||||
}
|
||||
|
||||
impl InputReader {
|
||||
pub fn new<T: AsyncBufRead + Unpin + Send + 'static>(inner: T) -> Self {
|
||||
Self {
|
||||
inner: Box::new(inner),
|
||||
buf: String::new(),
|
||||
index: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads the next char consuming it in the process
|
||||
#[inline]
|
||||
pub async fn consume(&mut self) -> TapeResult<char> {
|
||||
self.read_next().await
|
||||
}
|
||||
|
||||
/// Returns the next char without forwarding
|
||||
#[inline]
|
||||
pub async fn peek(&mut self) -> TapeResult<char> {
|
||||
let char = self.read_next().await?;
|
||||
self.seek_to(self.index - 1).await?;
|
||||
|
||||
Ok(char)
|
||||
}
|
||||
|
||||
/// Returns if EOF has been reached
|
||||
#[inline]
|
||||
pub async fn check_eof(&mut self) -> bool {
|
||||
if let Err(TapeError::EOF) = self.peek().await {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads the next char returning \x00 for EOF
|
||||
async fn read_next(&mut self) -> TapeResult<char> {
|
||||
self.seek_to(self.index + 1).await?;
|
||||
let result = self
|
||||
.buf
|
||||
.get(self.index - 1..self.index)
|
||||
.ok_or(TapeError::EOF)?
|
||||
.chars()
|
||||
.next()
|
||||
.ok_or(TapeError::EOF);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Seeks to a given index
|
||||
pub async fn seek_to(&mut self, to_index: usize) -> TapeResult<()> {
|
||||
while to_index >= self.buf.len() {
|
||||
let mut line = String::new();
|
||||
self.inner.read_line(&mut line).await.map_err(|e| {
|
||||
if e.kind() == ErrorKind::UnexpectedEof {
|
||||
TapeError::EOF
|
||||
} else {
|
||||
TapeError::TokioIoError(e)
|
||||
}
|
||||
})?;
|
||||
if line.is_empty() {
|
||||
break;
|
||||
}
|
||||
self.buf.push_str(&line);
|
||||
}
|
||||
self.index = to_index;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the current index
|
||||
pub fn index(&self) -> usize {
|
||||
self.index
|
||||
}
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
use crate::error::TapeResult;
|
||||
use crate::input_reader::InputReader;
|
||||
use crate::token::{Token, TokenCheckerFn, UnknownToken};
|
||||
|
||||
pub struct Lexer {
|
||||
reader: InputReader,
|
||||
checkers: Vec<TokenCheckerFn>,
|
||||
}
|
||||
|
||||
impl Lexer {
|
||||
/// Creates a new lexer with provided checker functions
|
||||
pub fn new(reader: InputReader, checkers: Vec<TokenCheckerFn>) -> Self {
|
||||
Self { reader, checkers }
|
||||
}
|
||||
|
||||
/// Scans for tokens
|
||||
pub async fn scan(&mut self) -> TapeResult<Vec<Token>> {
|
||||
let mut tokens = Vec::new();
|
||||
|
||||
while !self.reader.check_eof().await {
|
||||
let index = self.reader.index();
|
||||
let mut found = false;
|
||||
|
||||
for checker_fn in &self.checkers {
|
||||
if let Some(token) = checker_fn.as_ref()(&mut self.reader).await? {
|
||||
tokens.push(token);
|
||||
found = true;
|
||||
break;
|
||||
} else {
|
||||
self.reader.seek_to(index).await?;
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
tokens.push(Token::new(UnknownToken(self.reader.consume().await?)))
|
||||
}
|
||||
}
|
||||
|
||||
Ok(tokens)
|
||||
}
|
||||
}
|
@ -1,82 +1,7 @@
|
||||
pub mod tapemachine;
|
||||
pub mod error;
|
||||
pub mod input_reader;
|
||||
pub mod lexer;
|
||||
pub mod token;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::tapemachine::TapeResult;
|
||||
use crate::tapemachine::{CharTapeMachine, TapeError};
|
||||
|
||||
const TEST_STRING: &str = "TEST STRING 1234 \\l \\n";
|
||||
|
||||
#[test]
|
||||
fn it_returns_the_next_char() {
|
||||
let mut ctm = CharTapeMachine::new(TEST_STRING.chars().collect());
|
||||
let test_chars: Vec<char> = TEST_STRING.chars().collect();
|
||||
|
||||
let mut next = ctm.next_char().unwrap();
|
||||
assert_eq!(next, *test_chars.get(1).unwrap());
|
||||
|
||||
next = ctm.next_char().unwrap();
|
||||
assert_eq!(next, *test_chars.get(2).unwrap());
|
||||
|
||||
let _ = ctm.next_char().unwrap();
|
||||
let _ = ctm.next_char().unwrap();
|
||||
let _ = ctm.next_char().unwrap();
|
||||
next = ctm.next_char().unwrap();
|
||||
assert_eq!(next, *test_chars.get(6).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn it_rewinds() {
|
||||
let mut ctm = CharTapeMachine::new(TEST_STRING.chars().collect());
|
||||
let test_chars: Vec<char> = TEST_STRING.chars().collect();
|
||||
|
||||
ctm.next_char().unwrap();
|
||||
ctm.next_char().unwrap();
|
||||
assert_eq!(ctm.next_char(), Some(*test_chars.get(3).unwrap()));
|
||||
|
||||
ctm.rewind(1);
|
||||
assert_eq!(ctm.next_char(), Some(*test_chars.get(2).unwrap()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn it_seeks() {
|
||||
let mut ctm = CharTapeMachine::new(TEST_STRING.chars().collect());
|
||||
let test_chars: Vec<char> = TEST_STRING.chars().collect();
|
||||
|
||||
assert_eq!(ctm.next_char(), Some(*test_chars.get(1).unwrap()));
|
||||
ctm.seek_one().unwrap();
|
||||
assert_eq!(ctm.next_char(), Some(*test_chars.get(3).unwrap()));
|
||||
ctm.seek_one().unwrap();
|
||||
ctm.seek_whitespace();
|
||||
assert_eq!(ctm.next_char(), Some(*test_chars.get(6).unwrap()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn it_asserts_chars() -> TapeResult<()> {
|
||||
let mut ctm = CharTapeMachine::new(TEST_STRING.chars().collect());
|
||||
ctm.assert_any(&['A', 'B', 'T'], None)?;
|
||||
ctm.seek_one().unwrap();
|
||||
ctm.assert_char(&'E', None)?;
|
||||
ctm.seek_one().unwrap();
|
||||
ctm.assert_str_sequence("ST ", None)?;
|
||||
ctm.seek_one().unwrap();
|
||||
ctm.assert_any_sequence(&[&['C'], &['A'], &['A', 'B'], &['S', 'T', 'R']], None)?;
|
||||
|
||||
if let Ok(_) =
|
||||
ctm.assert_any_sequence(&[&['C'], &['A'], &['A', 'B'], &['S', 'T', 'R']], None)
|
||||
{
|
||||
Err(TapeError::new(0))
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn it_checks_eof() -> TapeResult<()> {
|
||||
let mut ctm = CharTapeMachine::new(TEST_STRING.chars().collect());
|
||||
let _ = ctm.get_string_until_any(&['n'], &[]);
|
||||
assert!(ctm.check_eof());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
mod tests;
|
||||
|
@ -1,424 +0,0 @@
|
||||
use std::error::Error;
|
||||
use std::fmt::{self, Display, Formatter};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TapeError {
|
||||
index: usize,
|
||||
}
|
||||
|
||||
impl TapeError {
|
||||
pub fn new(index: usize) -> Self {
|
||||
Self { index }
|
||||
}
|
||||
|
||||
/// Returns the index the error occured on
|
||||
pub fn get_index(&self) -> usize {
|
||||
self.index
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for TapeError {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "Tape Error at: {}", self.index)
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for TapeError {}
|
||||
|
||||
pub type TapeResult<T> = Result<T, TapeError>;
|
||||
|
||||
const ESCAPE: char = '\\';
|
||||
|
||||
pub struct CharTapeMachine {
|
||||
index: usize,
|
||||
text: Vec<char>,
|
||||
current_char: char,
|
||||
previous_char: char,
|
||||
}
|
||||
|
||||
impl CharTapeMachine {
|
||||
pub fn new(text: Vec<char>) -> Self {
|
||||
let current_char = if text.len() > 0 {
|
||||
*text.first().unwrap()
|
||||
} else {
|
||||
' '
|
||||
};
|
||||
Self {
|
||||
text,
|
||||
index: 0,
|
||||
previous_char: current_char,
|
||||
current_char,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn get_text(&self) -> Vec<char> {
|
||||
self.text.clone()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn get_index(&self) -> usize {
|
||||
self.index
|
||||
}
|
||||
|
||||
/// returns the current char
|
||||
#[inline]
|
||||
pub fn get_current(&self) -> char {
|
||||
self.current_char
|
||||
}
|
||||
|
||||
/// Creates an error at the current position
|
||||
#[inline]
|
||||
pub fn err(&self) -> TapeError {
|
||||
TapeError::new(self.index)
|
||||
}
|
||||
|
||||
/// Returns the next char
|
||||
/// if there is any
|
||||
pub fn next_char(&mut self) -> Option<char> {
|
||||
if self.index < self.text.len() {
|
||||
self.index += 1;
|
||||
self.previous_char = self.current_char;
|
||||
self.current_char = *self.text.get(self.index)?;
|
||||
|
||||
Some(self.current_char)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Peeks the next available char
|
||||
#[inline]
|
||||
pub fn peek_char(&mut self) -> Option<char> {
|
||||
Some(*self.text.get(self.index + 1)?)
|
||||
}
|
||||
|
||||
/// Rewinds to a given index
|
||||
#[inline]
|
||||
pub fn rewind(&mut self, index: usize) {
|
||||
if self.text.len() > index {
|
||||
self.index = index;
|
||||
self.current_char = *self.text.get(index).unwrap();
|
||||
if self.index > 0 {
|
||||
self.previous_char = *self.text.get(index - 1).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Rewinds to a given index and returns an error
|
||||
#[inline]
|
||||
pub fn rewind_with_error(&mut self, index: usize) -> TapeError {
|
||||
self.rewind(index);
|
||||
TapeError::new(index)
|
||||
}
|
||||
|
||||
/// Seeks one character or returns an error
|
||||
/// if there is no next character
|
||||
#[inline]
|
||||
pub fn seek_one(&mut self) -> TapeResult<()> {
|
||||
if let Some(_) = self.next_char() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(TapeError::new(self.index))
|
||||
}
|
||||
}
|
||||
|
||||
/// Seeks one character and returns
|
||||
/// if it seeked or an error occurred
|
||||
#[inline]
|
||||
pub fn try_seek(&mut self) -> bool {
|
||||
self.seek_one().is_ok()
|
||||
}
|
||||
|
||||
/// Seeks any character of the given group until none is encountered anymore
|
||||
pub fn seek_any(&mut self, chars: &[char]) -> TapeResult<()> {
|
||||
while self.check_any(chars) {
|
||||
self.seek_one()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Seeks until it encounters a non whitespace character
|
||||
pub fn seek_whitespace(&mut self) {
|
||||
if self.current_char.is_whitespace() {
|
||||
while let Some(next) = self.next_char() {
|
||||
if !next.is_whitespace() || self.check_escaped() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks if the machine has reached the eof
|
||||
pub fn check_eof(&self) -> bool {
|
||||
self.index >= self.text.len()
|
||||
}
|
||||
|
||||
/// checks if the current char is escaped
|
||||
#[inline]
|
||||
pub fn check_escaped(&mut self) -> bool {
|
||||
let start = self.index;
|
||||
|
||||
let escaped = if self.previous_char == ESCAPE {
|
||||
self.rewind(start - 1);
|
||||
!self.check_escaped()
|
||||
} else {
|
||||
false
|
||||
};
|
||||
self.rewind(start);
|
||||
|
||||
escaped
|
||||
}
|
||||
|
||||
/// Returns true if the given character is equal to the current one
|
||||
/// and the current character is not escaped
|
||||
#[inline]
|
||||
pub fn check_char(&mut self, value: &char) -> bool {
|
||||
self.current_char == *value && !self.check_escaped()
|
||||
}
|
||||
|
||||
/// Checks if one of the given chars matches the current one
|
||||
#[inline]
|
||||
pub fn check_any(&mut self, chars: &[char]) -> bool {
|
||||
!self.check_escaped() && chars.contains(&self.current_char)
|
||||
}
|
||||
|
||||
/// checks if the next characters match a given sequence of characters
|
||||
pub fn check_sequence(&mut self, sequence: &[char]) -> bool {
|
||||
let start_index = self.index;
|
||||
|
||||
if self.check_escaped() {
|
||||
self.rewind(start_index);
|
||||
|
||||
false
|
||||
} else {
|
||||
for sq_character in sequence {
|
||||
if self.current_char != *sq_character {
|
||||
self.rewind(start_index);
|
||||
return false;
|
||||
}
|
||||
if self.next_char() == None {
|
||||
self.rewind(start_index);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if self.index > 0 {
|
||||
self.rewind(self.index - 1);
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
// checks if the next characters mach a string sequence
|
||||
pub fn check_str_sequence(&mut self, sequence: &str) -> bool {
|
||||
let start_index = self.index;
|
||||
|
||||
if self.check_escaped() {
|
||||
self.rewind(start_index);
|
||||
|
||||
false
|
||||
} else {
|
||||
let matches = sequence.chars().all(|sq_character| {
|
||||
if self.current_char != sq_character {
|
||||
self.rewind(start_index);
|
||||
return false;
|
||||
}
|
||||
if self.next_char() == None {
|
||||
self.rewind(start_index);
|
||||
return false;
|
||||
}
|
||||
true
|
||||
});
|
||||
if !matches {
|
||||
false
|
||||
} else {
|
||||
if self.index > 0 {
|
||||
self.rewind(self.index - 1);
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// checks if the next characters match any given sequence
|
||||
#[inline]
|
||||
pub fn check_any_sequence(&mut self, sequences: &[&[char]]) -> bool {
|
||||
for seq in sequences {
|
||||
if self.check_sequence(*seq) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// checks if the next characters match any given sequence of strings
|
||||
#[inline]
|
||||
pub fn check_any_str_sequence(&mut self, sequences: &[&str]) -> bool {
|
||||
for str_seq in sequences {
|
||||
if self.check_str_sequence(str_seq) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// returns an error on the current position and optionally rewinds
|
||||
/// if a rewind index is given
|
||||
#[inline]
|
||||
pub fn assert_error(&mut self, rewind_index: Option<usize>) -> TapeError {
|
||||
if let Some(index) = rewind_index {
|
||||
self.rewind_with_error(index)
|
||||
} else {
|
||||
TapeError::new(self.index)
|
||||
}
|
||||
}
|
||||
|
||||
/// returns an error if the given char doesn't match the current one and rewinds
|
||||
/// if a rewind index is given
|
||||
#[inline]
|
||||
pub fn assert_char(&mut self, value: &char, rewind_index: Option<usize>) -> TapeResult<()> {
|
||||
if self.check_char(value) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(self.assert_error(rewind_index))
|
||||
}
|
||||
}
|
||||
|
||||
/// returns an error if the current char doesn't match any of the given group
|
||||
#[inline]
|
||||
pub fn assert_any(&mut self, chars: &[char], rewind_index: Option<usize>) -> TapeResult<()> {
|
||||
if self.check_any(chars) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(self.assert_error(rewind_index))
|
||||
}
|
||||
}
|
||||
|
||||
/// returns an error if the next chars don't match a special sequence
|
||||
#[inline]
|
||||
pub fn assert_sequence(
|
||||
&mut self,
|
||||
sequence: &[char],
|
||||
rewind_index: Option<usize>,
|
||||
) -> TapeResult<()> {
|
||||
if self.check_sequence(sequence) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(self.assert_error(rewind_index))
|
||||
}
|
||||
}
|
||||
|
||||
/// returns an error if the next chars don't match a special sequence
|
||||
#[inline]
|
||||
pub fn assert_str_sequence(
|
||||
&mut self,
|
||||
sequence: &str,
|
||||
rewind_index: Option<usize>,
|
||||
) -> TapeResult<()> {
|
||||
if self.check_str_sequence(sequence) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(self.assert_error(rewind_index))
|
||||
}
|
||||
}
|
||||
|
||||
/// returns an error if the next chars don't match any given sequence
|
||||
pub fn assert_any_sequence(
|
||||
&mut self,
|
||||
sequences: &[&[char]],
|
||||
rewind_index: Option<usize>,
|
||||
) -> TapeResult<()> {
|
||||
if self.check_any_sequence(sequences) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(self.assert_error(rewind_index))
|
||||
}
|
||||
}
|
||||
|
||||
/// returns an error if the next chars don't match any given sequence
|
||||
pub fn assert_any_str_sequence(
|
||||
&mut self,
|
||||
sequences: &[&str],
|
||||
rewind_index: Option<usize>,
|
||||
) -> TapeResult<()> {
|
||||
if self.check_any_str_sequence(sequences) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(self.assert_error(rewind_index))
|
||||
}
|
||||
}
|
||||
|
||||
/// returns the string until any given character is matched is matched.
|
||||
/// rewinds with error if it encounters a character form the error group
|
||||
#[inline]
|
||||
pub fn get_string_until_any(&mut self, until: &[char], err_at: &[char]) -> TapeResult<String> {
|
||||
let start_index = self.index;
|
||||
|
||||
self.get_string_until_any_or_rewind(until, err_at, start_index)
|
||||
}
|
||||
|
||||
/// Returns the string until it encounters a given sequence or rewinds with error
|
||||
/// if it encounters an err sequence
|
||||
pub fn get_string_until_sequence(
|
||||
&mut self,
|
||||
until: &[&[char]],
|
||||
err_at: &[&[char]],
|
||||
) -> Result<String, TapeError> {
|
||||
let start_index = self.index;
|
||||
let mut result = String::new();
|
||||
|
||||
if self.check_any_sequence(until) {
|
||||
return Ok(result);
|
||||
} else if self.check_any_sequence(err_at) {
|
||||
return Err(TapeError::new(self.index));
|
||||
}
|
||||
|
||||
result.push(self.current_char);
|
||||
while let Some(ch) = self.next_char() {
|
||||
if self.check_any_sequence(until) || self.check_any_sequence(err_at) {
|
||||
break;
|
||||
}
|
||||
result.push(ch);
|
||||
}
|
||||
|
||||
if self.check_any_sequence(err_at) {
|
||||
Err(self.rewind_with_error(start_index))
|
||||
} else {
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
/// returns the string until a special char is found
|
||||
/// or rewinds if an err_at char is found
|
||||
pub fn get_string_until_any_or_rewind(
|
||||
&mut self,
|
||||
until: &[char],
|
||||
err_at: &[char],
|
||||
rewind_index: usize,
|
||||
) -> TapeResult<String> {
|
||||
let mut result = String::new();
|
||||
|
||||
if self.check_any(until) {
|
||||
return Ok(result);
|
||||
} else if self.check_any(err_at) {
|
||||
return Err(self.rewind_with_error(rewind_index));
|
||||
}
|
||||
|
||||
result.push(self.current_char);
|
||||
while let Some(ch) = self.next_char() {
|
||||
if self.check_any(until) || self.check_any(err_at) {
|
||||
break;
|
||||
}
|
||||
result.push(ch);
|
||||
}
|
||||
|
||||
if self.check_any(err_at) {
|
||||
Err(self.rewind_with_error(rewind_index))
|
||||
} else {
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,3 @@
|
||||
mod test_input;
|
||||
mod test_lexer;
|
||||
mod test_token;
|
@ -0,0 +1,41 @@
|
||||
use crate::error::{TapeError, TapeResult};
|
||||
use crate::input_reader::InputReader;
|
||||
use std::io::Cursor;
|
||||
|
||||
fn get_reader() -> InputReader {
|
||||
let data = "ABCDEFG HIJKLMNOP 12345567890\nSecond Line\n\n";
|
||||
InputReader::new(Cursor::new(data))
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn it_peeks() {
|
||||
let mut reader = get_reader();
|
||||
assert_eq!(reader.peek().await.unwrap(), 'A');
|
||||
assert_eq!(reader.peek().await.unwrap(), 'A');
|
||||
assert_eq!(reader.peek().await.unwrap(), 'A');
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn it_consumes() {
|
||||
let mut reader = get_reader();
|
||||
assert_eq!(reader.consume().await.unwrap(), 'A');
|
||||
assert_eq!(reader.consume().await.unwrap(), 'B');
|
||||
assert_eq!(reader.consume().await.unwrap(), 'C');
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn it_checks_for_eof() {
|
||||
let mut reader = get_reader();
|
||||
assert!(!is_eof(reader.seek_to(29).await));
|
||||
assert!(!reader.check_eof().await);
|
||||
assert!(!is_eof(reader.seek_to(47).await));
|
||||
assert!(is_eof(reader.consume().await.map(|_| ())));
|
||||
assert!(reader.check_eof().await);
|
||||
}
|
||||
|
||||
fn is_eof(result: TapeResult<()>) -> bool {
|
||||
match result {
|
||||
Err(TapeError::EOF) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
@ -0,0 +1,93 @@
|
||||
use crate::error::TapeResult;
|
||||
use crate::input_reader::InputReader;
|
||||
use crate::lexer::Lexer;
|
||||
use crate::token::{Token, TokenCheckerFn, UnknownToken};
|
||||
use std::io::Cursor;
|
||||
use std::sync::Arc;
|
||||
|
||||
struct NumberToken(i32);
|
||||
struct StringToken(String);
|
||||
struct WhiteSpaceToken;
|
||||
|
||||
async fn parse_number_token(reader: &mut InputReader) -> TapeResult<Option<Token>> {
|
||||
let mut num = String::new();
|
||||
while !reader.check_eof().await && reader.peek().await?.is_numeric() {
|
||||
num.push(reader.consume().await?);
|
||||
}
|
||||
if num.is_empty() {
|
||||
Ok(None)
|
||||
} else {
|
||||
Ok(Some(Token::new(NumberToken(num.parse::<i32>().unwrap()))))
|
||||
}
|
||||
}
|
||||
|
||||
async fn parse_whitespace_token(reader: &mut InputReader) -> TapeResult<Option<Token>> {
|
||||
let mut count = 0;
|
||||
while !reader.check_eof().await && reader.peek().await?.is_whitespace() {
|
||||
reader.consume().await?;
|
||||
count += 1;
|
||||
}
|
||||
if count > 0 {
|
||||
Ok(Some(Token::new(WhiteSpaceToken)))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
async fn parse_string_token(reader: &mut InputReader) -> TapeResult<Option<Token>> {
|
||||
let mut value = String::new();
|
||||
while !reader.check_eof().await
|
||||
&& !reader.peek().await?.is_numeric()
|
||||
&& !reader.peek().await?.is_whitespace()
|
||||
{
|
||||
value.push(reader.consume().await?);
|
||||
}
|
||||
if value.is_empty() {
|
||||
Ok(None)
|
||||
} else {
|
||||
Ok(Some(Token::new(StringToken(value))))
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn it_scans() {
|
||||
let checkers: Vec<TokenCheckerFn> = vec![
|
||||
Arc::new(|reader| Box::pin(parse_number_token(reader))),
|
||||
Arc::new(|reader| Box::pin(parse_whitespace_token(reader))),
|
||||
Arc::new(|reader| Box::pin(parse_string_token(reader))),
|
||||
];
|
||||
let input_reader = InputReader::new(Cursor::new("The Alphabet 12 ok"));
|
||||
let mut lexer = Lexer::new(input_reader, checkers);
|
||||
|
||||
let tokens = lexer.scan().await.unwrap();
|
||||
assert!(!tokens.is_empty());
|
||||
|
||||
let mut tokens = tokens.into_iter();
|
||||
assert!(tokens.next().unwrap().is::<StringToken>());
|
||||
assert!(tokens.next().unwrap().is::<WhiteSpaceToken>());
|
||||
assert!(tokens.next().unwrap().is::<StringToken>());
|
||||
assert!(tokens.next().unwrap().is::<WhiteSpaceToken>());
|
||||
assert!(tokens.next().unwrap().is::<NumberToken>());
|
||||
assert!(tokens.next().unwrap().is::<WhiteSpaceToken>());
|
||||
assert!(tokens.next().unwrap().is::<StringToken>());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn it_falls_back_to_unknown() {
|
||||
let checkers: Vec<TokenCheckerFn> = vec![
|
||||
Arc::new(|reader| Box::pin(parse_number_token(reader))),
|
||||
Arc::new(|reader| Box::pin(parse_string_token(reader))),
|
||||
];
|
||||
let input_reader = InputReader::new(Cursor::new("The Alphabet 12 ok"));
|
||||
let mut lexer = Lexer::new(input_reader, checkers);
|
||||
let tokens = lexer.scan().await.unwrap();
|
||||
assert!(!tokens.is_empty());
|
||||
let mut tokens = tokens.into_iter();
|
||||
assert!(tokens.next().unwrap().is::<StringToken>());
|
||||
assert!(tokens.next().unwrap().is::<UnknownToken>());
|
||||
assert!(tokens.next().unwrap().is::<StringToken>());
|
||||
assert!(tokens.next().unwrap().is::<UnknownToken>());
|
||||
assert!(tokens.next().unwrap().is::<NumberToken>());
|
||||
assert!(tokens.next().unwrap().is::<UnknownToken>());
|
||||
assert!(tokens.next().unwrap().is::<StringToken>());
|
||||
}
|
@ -0,0 +1,52 @@
|
||||
use crate::error::TapeResult;
|
||||
use crate::input_reader::InputReader;
|
||||
use crate::token::Token;
|
||||
use std::io::Cursor;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TestToken(i32);
|
||||
|
||||
async fn parse_test_token(reader: &mut InputReader) -> TapeResult<Option<Token>> {
|
||||
let mut num = String::new();
|
||||
while !reader.check_eof().await && reader.peek().await?.is_numeric() {
|
||||
num.push(reader.consume().await?);
|
||||
}
|
||||
if num.is_empty() {
|
||||
Ok(None)
|
||||
} else {
|
||||
Ok(Some(Token::new(TestToken(num.parse::<i32>().unwrap()))))
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn it_parses() {
|
||||
let mut reader = InputReader::new(Cursor::new("128"));
|
||||
let token = parse_test_token(&mut reader).await.unwrap();
|
||||
assert!(token.is_some());
|
||||
let token = token.unwrap().try_into::<TestToken>().unwrap();
|
||||
assert_eq!(token.0, 128);
|
||||
|
||||
let mut reader = InputReader::new(Cursor::new("string a12 24\n"));
|
||||
let token = parse_test_token(&mut reader).await.unwrap();
|
||||
assert!(token.is_none());
|
||||
reader.seek_to(8).await.unwrap();
|
||||
|
||||
let token = parse_test_token(&mut reader).await.unwrap();
|
||||
assert!(token.is_some());
|
||||
let token = token.unwrap().try_into::<TestToken>().unwrap();
|
||||
assert_eq!(token.0, 12);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn it_converts() {
|
||||
let token = Token::new(TestToken(12));
|
||||
assert!(token.is::<TestToken>());
|
||||
|
||||
let test_token = token.try_as::<TestToken>();
|
||||
assert!(test_token.is_some());
|
||||
assert_eq!(test_token.unwrap().0, 12);
|
||||
|
||||
let test_token = token.try_into::<TestToken>();
|
||||
assert!(test_token.is_some());
|
||||
assert_eq!(test_token.unwrap().0, 12);
|
||||
}
|
@ -0,0 +1,47 @@
|
||||
use crate::error::TapeResult;
|
||||
use crate::input_reader::InputReader;
|
||||
use std::any::{Any, TypeId};
|
||||
use std::future::Future;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub type TokenCheckerFn = Arc<
|
||||
dyn for<'a> Fn(
|
||||
&'a mut InputReader,
|
||||
) -> Pin<Box<dyn Future<Output = TapeResult<Option<Token>>> + Send + 'a>>
|
||||
+ Send
|
||||
+ Sync,
|
||||
>;
|
||||
|
||||
pub struct Token {
|
||||
inner: Box<dyn Any>,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
/// Constructs a new token
|
||||
pub fn new<A: Any>(inner: A) -> Self {
|
||||
Self {
|
||||
inner: Box::new(inner),
|
||||
}
|
||||
}
|
||||
|
||||
/// Tries downcasting the value to a concrete type
|
||||
pub fn try_as<T: 'static>(&self) -> Option<&T> {
|
||||
self.inner.downcast_ref::<T>()
|
||||
}
|
||||
|
||||
pub fn try_into<T: 'static>(self) -> Option<T> {
|
||||
match self.inner.downcast() {
|
||||
Ok(value) => Some(*value),
|
||||
Err(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks if the inner value is of a given concrete type
|
||||
pub fn is<T: 'static>(&self) -> bool {
|
||||
self.inner.as_ref().type_id() == TypeId::of::<T>()
|
||||
}
|
||||
}
|
||||
|
||||
/// Parsed when no other matching token was found for the character
|
||||
pub struct UnknownToken(pub char);
|
Loading…
Reference in New Issue