From 077684c68119a61736f4d859e44de19eab7d8d87 Mon Sep 17 00:00:00 2001 From: trivernis Date: Mon, 3 Aug 2020 11:02:05 +0200 Subject: [PATCH] Add string sequence matching --- Cargo.toml | 2 +- src/lib.rs | 31 ++++++++++++++++++-- src/tapemachine.rs | 70 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7213771..00ab8c2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "charred" -version = "0.2.2" +version = "0.3.0" authors = ["trivernis "] edition = "2018" license-file = "LICENSE" diff --git a/src/lib.rs b/src/lib.rs index f25262f..b0d0a3c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,12 @@ +#![feature(test)] +extern crate test; pub mod tapemachine; #[cfg(test)] mod tests { - use crate::tapemachine::{CharTapeMachine, TapeError}; use crate::tapemachine::TapeResult; + use crate::tapemachine::{CharTapeMachine, TapeError}; + use test::Bencher; const TEST_STRING: &str = "TEST STRING 1234 \\l \\n"; @@ -58,11 +61,13 @@ mod tests { ctm.seek_one().unwrap(); ctm.assert_char(&'E', None)?; ctm.seek_one().unwrap(); - ctm.assert_sequence(&['S', 'T', ' '], None)?; + ctm.assert_str_sequence("ST ", None)?; ctm.seek_one().unwrap(); ctm.assert_any_sequence(&[&['C'], &['A'], &['A', 'B'], &['S', 'T', 'R']], None)?; - if let Ok(_) = ctm.assert_any_sequence(&[&['C'], &['A'], &['A', 'B'], &['S', 'T', 'R']], None) { + if let Ok(_) = + ctm.assert_any_sequence(&[&['C'], &['A'], &['A', 'B'], &['S', 'T', 'R']], None) + { Err(TapeError::new(0)) } else { Ok(()) @@ -77,4 +82,24 @@ mod tests { Ok(()) } + + #[bench] + fn bench_assert_seek(b: &mut Bencher) { + let mut ctm = CharTapeMachine::new(TEST_STRING.chars().collect()); + b.iter(|| { + ctm.check_char(&'T'); + ctm.seek_one().unwrap(); + ctm.check_char(&'E'); + ctm.seek_one().unwrap(); + ctm.check_char(&'F'); + ctm.seek_one().unwrap(); + ctm.check_any(&['A', 'B', 'C', 'D', 'E', '2']); + ctm.seek_one().unwrap(); + ctm.seek_whitespace(); + ctm.check_sequence(&['S', 'T', 'R', 'I', 'N', 'T']); + ctm.check_sequence(&['S', 'T', 'R', 'I', 'N', 'G']); + ctm.check_eof(); + ctm.rewind(0); + }) + } } diff --git a/src/tapemachine.rs b/src/tapemachine.rs index f9c15c5..2b5160f 100644 --- a/src/tapemachine.rs +++ b/src/tapemachine.rs @@ -185,6 +185,37 @@ impl CharTapeMachine { } } + // checks if the next characters mach a string sequence + pub fn check_str_sequence(&mut self, sequence: &str) -> bool { + let start_index = self.index; + + if self.check_escaped() { + self.rewind(start_index); + + false + } else { + let matches = sequence.chars().all(|sq_character| { + if self.current_char != sq_character { + self.rewind(start_index); + return false; + } + if self.next_char() == None { + self.rewind(start_index); + return false; + } + true + }); + if !matches { + false + } else { + if self.index > 0 { + self.rewind(self.index - 1); + } + true + } + } + } + /// checks if the next characters match any given sequence #[inline] pub fn check_any_sequence(&mut self, sequences: &[&[char]]) -> bool { @@ -197,6 +228,18 @@ impl CharTapeMachine { false } + /// checks if the next characters match any given sequence of strings + #[inline] + pub fn check_any_str_sequence(&mut self, sequences: &[&str]) -> bool { + for str_seq in sequences { + if self.check_str_sequence(str_seq) { + return true; + } + } + + false + } + /// returns an error on the current position and optionally rewinds /// if a rewind index is given #[inline] @@ -243,6 +286,20 @@ impl CharTapeMachine { } } + /// returns an error if the next chars don't match a special sequence + #[inline] + pub fn assert_str_sequence( + &mut self, + sequence: &str, + rewind_index: Option, + ) -> TapeResult<()> { + if self.check_str_sequence(sequence) { + Ok(()) + } else { + Err(self.assert_error(rewind_index)) + } + } + /// returns an error if the next chars don't match any given sequence pub fn assert_any_sequence( &mut self, @@ -256,6 +313,19 @@ impl CharTapeMachine { } } + /// returns an error if the next chars don't match any given sequence + pub fn assert_any_str_sequence( + &mut self, + sequences: &[&str], + rewind_index: Option, + ) -> TapeResult<()> { + if self.check_any_str_sequence(sequences) { + Ok(()) + } else { + Err(self.assert_error(rewind_index)) + } + } + /// returns the string until any given character is matched is matched. /// rewinds with error if it encounters a character form the error group #[inline]