From e0618b5781472b9874be4b09c928dd48867ba8db Mon Sep 17 00:00:00 2001 From: trivernis Date: Fri, 29 Oct 2021 21:26:36 +0200 Subject: [PATCH] Add basic sorting by tags and file properties It's currently very slow. I have to write a bulk query to retrieve all files with tags to speed it up. Signed-off-by: trivernis --- mediarepo-daemon/Cargo.lock | 9 +- .../mediarepo-database/src/queries/mod.rs | 0 .../mediarepo-database/src/queries/tags.rs | 0 mediarepo-daemon/mediarepo-socket/Cargo.lock | 9 +- mediarepo-daemon/mediarepo-socket/Cargo.toml | 3 +- .../mediarepo-socket/src/namespaces/files.rs | 132 +++++++++++++++++- 6 files changed, 145 insertions(+), 8 deletions(-) create mode 100644 mediarepo-daemon/mediarepo-database/src/queries/mod.rs create mode 100644 mediarepo-daemon/mediarepo-database/src/queries/tags.rs diff --git a/mediarepo-daemon/Cargo.lock b/mediarepo-daemon/Cargo.lock index 9f2f1c3..4fb499c 100644 --- a/mediarepo-daemon/Cargo.lock +++ b/mediarepo-daemon/Cargo.lock @@ -278,6 +278,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" +[[package]] +name = "compare" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120133d4db2ec47efe2e26502ee984747630c67f51974fca0b6c1340cf2368d3" + [[package]] name = "constant_time_eq" version = "0.1.5" @@ -842,7 +848,7 @@ dependencies = [ [[package]] name = "mediarepo-api" version = "0.1.0" -source = "git+https://github.com/Trivernis/mediarepo-api.git?rev=6cb2b0e467b3554b07d04f98d9244d8e4952db68#6cb2b0e467b3554b07d04f98d9244d8e4952db68" +source = "git+https://github.com/Trivernis/mediarepo-api.git?rev=476b9d152457f78c73f6f6a36c2421cbce9c9194#476b9d152457f78c73f6f6a36c2421cbce9c9194" dependencies = [ "chrono", "serde", @@ -900,6 +906,7 @@ name = "mediarepo-socket" version = "0.1.0" dependencies = [ "chrono", + "compare", "mediarepo-api", "mediarepo-core", "mediarepo-model", diff --git a/mediarepo-daemon/mediarepo-database/src/queries/mod.rs b/mediarepo-daemon/mediarepo-database/src/queries/mod.rs new file mode 100644 index 0000000..e69de29 diff --git a/mediarepo-daemon/mediarepo-database/src/queries/tags.rs b/mediarepo-daemon/mediarepo-database/src/queries/tags.rs new file mode 100644 index 0000000..e69de29 diff --git a/mediarepo-daemon/mediarepo-socket/Cargo.lock b/mediarepo-daemon/mediarepo-socket/Cargo.lock index 8616341..416b4da 100644 --- a/mediarepo-daemon/mediarepo-socket/Cargo.lock +++ b/mediarepo-daemon/mediarepo-socket/Cargo.lock @@ -234,6 +234,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" +[[package]] +name = "compare" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120133d4db2ec47efe2e26502ee984747630c67f51974fca0b6c1340cf2368d3" + [[package]] name = "constant_time_eq" version = "0.1.5" @@ -764,7 +770,7 @@ checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" [[package]] name = "mediarepo-api" version = "0.1.0" -source = "git+https://github.com/Trivernis/mediarepo-api.git?rev=6cb2b0e467b3554b07d04f98d9244d8e4952db68#6cb2b0e467b3554b07d04f98d9244d8e4952db68" +source = "git+https://github.com/Trivernis/mediarepo-api.git?rev=476b9d152457f78c73f6f6a36c2421cbce9c9194#476b9d152457f78c73f6f6a36c2421cbce9c9194" dependencies = [ "chrono", "serde", @@ -822,6 +828,7 @@ name = "mediarepo-socket" version = "0.1.0" dependencies = [ "chrono", + "compare", "mediarepo-api", "mediarepo-core", "mediarepo-model", diff --git a/mediarepo-daemon/mediarepo-socket/Cargo.toml b/mediarepo-daemon/mediarepo-socket/Cargo.toml index 9faa014..733d461 100644 --- a/mediarepo-daemon/mediarepo-socket/Cargo.toml +++ b/mediarepo-daemon/mediarepo-socket/Cargo.toml @@ -8,6 +8,7 @@ edition = "2018" [dependencies] serde = "1.0.130" tracing = "0.1.29" +compare = "0.1.0" [dependencies.mediarepo-core] path = "../mediarepo-core" @@ -29,4 +30,4 @@ features = ["tokio-executor"] [dependencies.mediarepo-api] git = "https://github.com/Trivernis/mediarepo-api.git" -rev = "6cb2b0e467b3554b07d04f98d9244d8e4952db68" \ No newline at end of file +rev = "476b9d152457f78c73f6f6a36c2421cbce9c9194" \ No newline at end of file diff --git a/mediarepo-daemon/mediarepo-socket/src/namespaces/files.rs b/mediarepo-daemon/mediarepo-socket/src/namespaces/files.rs index c2ccb6e..e03ad82 100644 --- a/mediarepo-daemon/mediarepo-socket/src/namespaces/files.rs +++ b/mediarepo-daemon/mediarepo-socket/src/namespaces/files.rs @@ -1,11 +1,17 @@ use crate::from_model::FromModel; use crate::utils::{file_by_identifier, get_repo_from_context}; +use compare::Compare; use mediarepo_api::types::files::{ AddFileRequest, FileMetadataResponse, FindFilesByTagsRequest, GetFileThumbnailsRequest, - ReadFileRequest, ThumbnailMetadataResponse, + ReadFileRequest, SortDirection, SortKey, ThumbnailMetadataResponse, }; -use mediarepo_core::error::RepoError; +use mediarepo_core::error::{RepoError, RepoResult}; +use mediarepo_core::futures::future; use mediarepo_core::rmp_ipc::prelude::*; +use mediarepo_model::file::File; +use std::cmp::Ordering; +use std::collections::HashMap; +use std::iter::FromIterator; use std::path::PathBuf; use tokio::io::AsyncReadExt; @@ -34,6 +40,7 @@ impl FilesNamespace { async fn all_files(ctx: &Context, event: Event) -> IPCResult<()> { let repo = get_repo_from_context(ctx).await; let files = repo.files().await?; + let responses: Vec = files .into_iter() .map(FileMetadataResponse::from_model) @@ -49,10 +56,43 @@ impl FilesNamespace { /// Searches for files by tags #[tracing::instrument(skip_all)] async fn find_files(ctx: &Context, event: Event) -> IPCResult<()> { - let tags = event.data::()?; + let req = event.data::()?; let repo = get_repo_from_context(ctx).await; - let tags = tags.tags.into_iter().map(|t| (t.name, t.negate)).collect(); - let files = repo.find_files_by_tags(tags).await?; + let tags = req.tags.into_iter().map(|t| (t.name, t.negate)).collect(); + let mut files = repo.find_files_by_tags(tags).await?; + + let files_nsp: HashMap> = HashMap::from_iter( + future::join_all(files.iter().map(|f| { + let file = f.clone(); + async move { + let result: RepoResult<(String, HashMap)> = + Ok((f.hash().clone(), get_namespaces_for_file(&file).await?)); + result + } + })) + .await + .into_iter() + .filter_map(|r| match r { + Ok(value) => Some(value), + Err(e) => { + tracing::error!("{:?}", e); + None + } + }), + ); + + let sort_expression = req.sort_expression; + + files.sort_by(|a, b| { + compare_files( + a, + files_nsp.get(a.hash()).unwrap(), + b, + files_nsp.get(b.hash()).unwrap(), + &sort_expression, + ) + }); + let responses: Vec = files .into_iter() .map(FileMetadataResponse::from_model) @@ -151,3 +191,85 @@ impl FilesNamespace { Ok(()) } } + +#[tracing::instrument(level = "trace", skip_all)] +fn compare_files( + file_a: &File, + nsp_a: &HashMap, + file_b: &File, + nsp_b: &HashMap, + expression: &Vec, +) -> Ordering { + let cmp_date = compare::natural(); + for sort_key in expression { + let ordering = match sort_key { + SortKey::Namespace(namespace) => adjust_for_dir( + compare_opts(nsp_a.get(&namespace.tag), nsp_b.get(&namespace.tag)), + &namespace.direction, + ), + SortKey::FileName(direction) => adjust_for_dir( + compare_opts(file_a.name().clone(), file_b.name().clone()), + direction, + ), + SortKey::FileSize(_direction) => { + Ordering::Equal // TODO: Retrieve file size + } + SortKey::FileImportedTime(direction) => adjust_for_dir( + cmp_date.compare(file_a.import_time(), file_b.import_time()), + direction, + ), + SortKey::FileCreatedTime(direction) => adjust_for_dir( + cmp_date.compare(file_a.creation_time(), file_b.creation_time()), + direction, + ), + SortKey::FileChangeTime(direction) => adjust_for_dir( + cmp_date.compare(file_a.change_time(), file_b.change_time()), + direction, + ), + SortKey::FileType(direction) => adjust_for_dir( + compare_opts(file_a.mime_type().clone(), file_b.mime_type().clone()), + direction, + ), + SortKey::NumTags(_) => { + Ordering::Equal // TODO: Count tags + } + }; + if !ordering.is_eq() { + return ordering; + } + } + + Ordering::Equal +} + +async fn get_namespaces_for_file(file: &File) -> RepoResult> { + let tags = file.tags().await?; + let namespaces: HashMap = + HashMap::from_iter(tags.into_iter().filter_map(|tag| { + let namespace = tag.namespace()?; + Some((namespace.name().clone(), tag.name().clone())) + })); + + Ok(namespaces) +} + +fn compare_opts(opt_a: Option, opt_b: Option) -> Ordering { + let cmp = compare::natural(); + if let (Some(a), Some(b)) = (&opt_a, &opt_b) { + cmp.compare(a, b) + } else if opt_a.is_some() { + Ordering::Greater + } else if opt_b.is_some() { + Ordering::Less + } else { + Ordering::Equal + } +} + +fn adjust_for_dir(ordering: Ordering, direction: &SortDirection) -> Ordering { + if *direction == SortDirection::Descending { + ordering.reverse() + } else { + ordering + } +}