Add search functions

Signed-off-by: trivernis <trivernis@protonmail.com>
main
trivernis 4 years ago
parent 5683137f7d
commit 3e3186caa7
Signed by: Trivernis
GPG Key ID: DFFFCC2C7A02DB45

@ -12,6 +12,7 @@ scraper = "0.12.0"
thiserror = "1.0.24" thiserror = "1.0.24"
lazy_static = "1.4.0" lazy_static = "1.4.0"
serde = {version = "1.0.125", features = ["serde_derive"]} serde = {version = "1.0.125", features = ["serde_derive"]}
trigram = "0.4.4"
[dev-dependencies] [dev-dependencies]
tokio = {version = "1.5.0", features = ["macros", "rt-multi-thread"]} tokio = {version = "1.5.0", features = ["macros", "rt-multi-thread"]}

@ -2,8 +2,10 @@ mod archive;
mod comic; mod comic;
mod error; mod error;
mod search;
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
pub use archive::*; pub use archive::*;
pub use comic::*; pub use comic::*;
pub use search::*;

@ -0,0 +1,53 @@
use crate::error::XKCDResult;
use crate::get_archive;
use std::cmp::Ordering;
/// Searches for a comic containing the words from the query
pub async fn search(query: &str) -> XKCDResult<Vec<(String, u32)>> {
let archive = get_archive().await?;
let words: Vec<String> = query
.split(" ")
.into_iter()
.map(|s| s.to_lowercase())
.collect();
let entries = archive
.into_iter()
.filter(|(key, _)| {
words
.iter()
.find(|w| key.to_lowercase().contains(*w))
.is_some()
})
.collect();
Ok(entries)
}
/// Searches for a comic with a fuzzy compare function that assigns a score
/// to each match.
/// The Threshold defines what still counts as a match
pub async fn search_fuzzy(query: &str, threshold: f32) -> XKCDResult<Vec<(String, u32)>> {
let archive = get_archive().await?;
let query = query.to_lowercase();
let mut entries: Vec<(f32, String, u32)> = archive
.into_iter()
.map(|(key, id)| (trigram::similarity(&query, &key.to_lowercase()), key, id))
.filter(|(score, _, _)| score >= &threshold)
.collect();
entries.sort_by(|(s1, _, _), (s2, _, _)| {
if s1 > s2 {
Ordering::Greater
} else if s1 < s2 {
Ordering::Less
} else {
Ordering::Equal
}
});
entries.reverse();
Ok(entries
.into_iter()
.map(|(_, title, id)| (title, id))
.collect())
}

@ -1,4 +1,4 @@
use crate::{get_archive, get_comic, get_latest_comic}; use crate::{get_archive, get_comic, get_latest_comic, search, search_fuzzy};
#[tokio::test] #[tokio::test]
async fn it_retrieves_the_archive() { async fn it_retrieves_the_archive() {
@ -15,3 +15,17 @@ async fn it_retrieves_a_comic() {
async fn it_retrieves_the_latest_comic() { async fn it_retrieves_the_latest_comic() {
assert!(get_latest_comic().await.is_ok()) assert!(get_latest_comic().await.is_ok())
} }
#[tokio::test]
async fn it_searches_for_comics() {
let search_results = search("Phone").await.unwrap();
assert!(!search_results.is_empty());
assert!(search_results.len() < 1000);
}
#[tokio::test]
async fn it_fuzzy_searches_for_comics() {
let search_results = search_fuzzy("Phone", 0.5).await.unwrap();
assert!(!search_results.is_empty());
assert!(search_results.len() < 1000);
}

Loading…
Cancel
Save