From 8abf53cd6f55371b245c04d4b9f5dff63f1774d9 Mon Sep 17 00:00:00 2001 From: trivernis Date: Fri, 23 Apr 2021 12:21:11 +0200 Subject: [PATCH] Update READMDE Signed-off-by: trivernis --- README.md | 85 ++++++++++++++++++++++++++++++++++++++++- src/tests/test_lexer.rs | 2 + 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3b733fd..7fd0d3f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,86 @@ # Charred -This library provides a CharTapeMachine that can be used for building parsers with direct string access rather than token stream access. +This crate provides a generic asynchronous lexer that operates on files with tokio. +Tokens are parsed with provided async closures. + +## Usage + +```rust +use crate::error::TapeResult; +use crate::input_reader::InputReader; +use crate::lexer::Lexer; +use crate::token::{Token, TokenCheckerFn}; +use std::io::Cursor; +use std::sync::Arc; + +struct NumberToken(i32); +struct StringToken(String); +struct WhiteSpaceToken; + +async fn parse_number_token(reader: &mut InputReader) -> TapeResult> { + let mut num = String::new(); + while !reader.check_eof().await && reader.peek().await?.is_numeric() { + num.push(reader.consume().await?); + } + if num.is_empty() { + Ok(None) + } else { + Ok(Some(Token::new(NumberToken(num.parse::().unwrap())))) + } +} + +async fn parse_whitespace_token(reader: &mut InputReader) -> TapeResult> { + let mut count = 0; + while !reader.check_eof().await && reader.peek().await?.is_whitespace() { + reader.consume().await?; + count += 1; + } + if count > 0 { + Ok(Some(Token::new(WhiteSpaceToken))) + } else { + Ok(None) + } +} + +async fn parse_string_token(reader: &mut InputReader) -> TapeResult> { + let mut value = String::new(); + while !reader.check_eof().await + && !reader.peek().await?.is_numeric() + && !reader.peek().await?.is_whitespace() + { + value.push(reader.consume().await?); + } + if value.is_empty() { + Ok(None) + } else { + Ok(Some(Token::new(StringToken(value)))) + } +} + +#[tokio::main] +async fn main() { + // functions that try to parse the token into an object + let checkers: Vec = vec![ + Arc::new(|reader| Box::pin(parse_number_token(reader))), + Arc::new(|reader| Box::pin(parse_whitespace_token(reader))), + Arc::new(|reader| Box::pin(parse_string_token(reader))), + ]; + // input reader encapsulates (almost) any type that implements AsyncBufRead + let input_reader = InputReader::new(Cursor::new("Word 12")); + let mut lexer = Lexer::new(input_reader, checkers); + + // scan starts scanning the provided input + let tokens = lexer.scan().await.unwrap(); + assert!(!tokens.is_empty()); + + let mut tokens = tokens.into_iter(); + // use the is, try_as and try_into methods on the token type to get the underlying value + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); + assert!(tokens.next().unwrap().is::()); +} +``` + +## License + +Apache-2.0 diff --git a/src/tests/test_lexer.rs b/src/tests/test_lexer.rs index b0fa225..b1362de 100644 --- a/src/tests/test_lexer.rs +++ b/src/tests/test_lexer.rs @@ -58,8 +58,10 @@ async fn it_scans() { ]; let input_reader = InputReader::new(Cursor::new("The Alphabet 12 ok")); let mut lexer = Lexer::new(input_reader, checkers); + let tokens = lexer.scan().await.unwrap(); assert!(!tokens.is_empty()); + let mut tokens = tokens.into_iter(); assert!(tokens.next().unwrap().is::()); assert!(tokens.next().unwrap().is::());