diff --git a/Cargo.toml b/Cargo.toml index c81ab60..e8409d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ scraper = "0.12.0" thiserror = "1.0.24" lazy_static = "1.4.0" serde = {version = "1.0.125", features = ["serde_derive"]} +trigram = "0.4.4" [dev-dependencies] tokio = {version = "1.5.0", features = ["macros", "rt-multi-thread"]} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 3981d8c..66c2b5a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,8 +2,10 @@ mod archive; mod comic; mod error; +mod search; #[cfg(test)] mod tests; pub use archive::*; pub use comic::*; +pub use search::*; diff --git a/src/search.rs b/src/search.rs new file mode 100644 index 0000000..0ca19a1 --- /dev/null +++ b/src/search.rs @@ -0,0 +1,53 @@ +use crate::error::XKCDResult; +use crate::get_archive; +use std::cmp::Ordering; + +/// Searches for a comic containing the words from the query +pub async fn search(query: &str) -> XKCDResult> { + let archive = get_archive().await?; + let words: Vec = query + .split(" ") + .into_iter() + .map(|s| s.to_lowercase()) + .collect(); + let entries = archive + .into_iter() + .filter(|(key, _)| { + words + .iter() + .find(|w| key.to_lowercase().contains(*w)) + .is_some() + }) + .collect(); + + Ok(entries) +} + +/// Searches for a comic with a fuzzy compare function that assigns a score +/// to each match. +/// The Threshold defines what still counts as a match +pub async fn search_fuzzy(query: &str, threshold: f32) -> XKCDResult> { + let archive = get_archive().await?; + let query = query.to_lowercase(); + + let mut entries: Vec<(f32, String, u32)> = archive + .into_iter() + .map(|(key, id)| (trigram::similarity(&query, &key.to_lowercase()), key, id)) + .filter(|(score, _, _)| score >= &threshold) + .collect(); + entries.sort_by(|(s1, _, _), (s2, _, _)| { + if s1 > s2 { + Ordering::Greater + } else if s1 < s2 { + Ordering::Less + } else { + Ordering::Equal + } + }); + entries.reverse(); + + Ok(entries + .into_iter() + .map(|(_, title, id)| (title, id)) + .collect()) +} diff --git a/src/tests.rs b/src/tests.rs index a34fc7f..8aecad5 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1,4 +1,4 @@ -use crate::{get_archive, get_comic, get_latest_comic}; +use crate::{get_archive, get_comic, get_latest_comic, search, search_fuzzy}; #[tokio::test] async fn it_retrieves_the_archive() { @@ -15,3 +15,17 @@ async fn it_retrieves_a_comic() { async fn it_retrieves_the_latest_comic() { assert!(get_latest_comic().await.is_ok()) } + +#[tokio::test] +async fn it_searches_for_comics() { + let search_results = search("Phone").await.unwrap(); + assert!(!search_results.is_empty()); + assert!(search_results.len() < 1000); +} + +#[tokio::test] +async fn it_fuzzy_searches_for_comics() { + let search_results = search_fuzzy("Phone", 0.5).await.unwrap(); + assert!(!search_results.is_empty()); + assert!(search_results.len() < 1000); +}