Add implementation to walk directories and read data in parallel

main
trivernis 11 months ago
commit 9cdcbfe7c6
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG Key ID: DFFFCC2C7A02DB45

1
.gitignore vendored

@ -0,0 +1 @@
/target

1822
Cargo.lock generated

File diff suppressed because it is too large Load Diff

@ -0,0 +1,19 @@
[package]
name = "viki"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
async-walkdir = "0.2.0"
clap = { version = "4.3.8", features = ["derive"] }
futures = "0.3.28"
globset = { version = "0.4.10", features = ["serde", "serde1"] }
miette = { version = "5.9.0", features = ["serde", "fancy"] }
serde = { version = "1.0.164", features = ["derive"] }
tera = "1.19.0"
tokio = { version = "1.29.0", features = ["rt-multi-thread", "net", "macros", "sync", "fs", "io-std", "io-util", "time", "process"] }
toml = "0.7.5"
tracing = { version = "0.1.37", features = ["async-await", "release_max_level_debug"] }
tracing-subscriber = { version = "0.3.17", features = ["serde", "env-filter"] }

@ -0,0 +1,22 @@
use std::path::PathBuf;
use clap::{Parser, Subcommand};
#[derive(Clone, Debug, Parser)]
#[clap(infer_subcommands = true)]
pub struct Args {
#[command(subcommand)]
pub command: Command,
}
#[derive(Clone, Debug, Subcommand)]
pub enum Command {
/// Builds the project
Build(BuildArgs),
}
#[derive(Clone, Debug, Parser)]
pub struct BuildArgs {
#[clap(default_value = ".")]
pub directory: PathBuf,
}

@ -0,0 +1,120 @@
use std::path::{Path, PathBuf};
use async_walkdir::WalkDir;
use futures::StreamExt;
use globset::{Glob, GlobSetBuilder};
use miette::{Context, IntoDiagnostic, Result};
use tokio::fs;
use super::IndexData;
/// loads directory data
pub struct DirLoader {
base_path: PathBuf,
}
#[derive(Clone, Debug)]
pub struct FolderData {
pub path: PathBuf,
pub index: IndexData,
pub pages: Vec<PathBuf>,
}
impl DirLoader {
pub fn new(base_path: PathBuf) -> Self {
Self { base_path }
}
/// Asynchronously reads all the entries at the given content location
#[tracing::instrument(level = "trace", skip(self))]
pub async fn read_content(&self) -> Result<Vec<FolderData>> {
let mut entries = WalkDir::new(&self.base_path);
let mut paths = Vec::new();
paths.push(self.base_path.to_owned());
while let Some(res) = entries.next().await {
match res {
Ok(entry) => {
let entry_path = entry.path();
if entry_path.is_dir() {
paths.push(entry_path)
}
}
Err(e) => return Err(e).into_diagnostic(),
}
}
let results = futures::future::join_all(paths.into_iter().map(Self::read_dir)).await;
let mut folder_data = Vec::new();
for res in results {
match res {
Ok(Some(data)) => folder_data.push(data),
Err(e) => return Err(e),
_ => {}
}
}
Ok(folder_data)
}
#[tracing::instrument(level = "trace")]
async fn read_dir(path: PathBuf) -> Result<Option<FolderData>> {
let index_path = path.join("_index.md");
if !index_path.exists() {
return Ok(None);
}
let index_data = read_index_data(&index_path).await?;
let pages = find_pages(&path, &index_data).await?;
Ok(Some(FolderData {
path,
index: index_data,
pages,
}))
}
}
#[tracing::instrument(level = "trace")]
async fn read_index_data(path: &Path) -> Result<IndexData> {
let index_str = fs::read_to_string(path)
.await
.into_diagnostic()
.context("reading index file")?;
toml::from_str(&index_str).into_diagnostic()
}
#[tracing::instrument(level = "trace")]
async fn find_pages(dir: &Path, index_data: &IndexData) -> Result<Vec<PathBuf>> {
let include_set = build_glob_set(&index_data.include_files)
.build()
.into_diagnostic()?;
let excluded_set = build_glob_set(&index_data.excluded_files)
.build()
.into_diagnostic()?;
let mut read_dir = fs::read_dir(dir).await.into_diagnostic()?;
let mut pages = Vec::new();
while let Some(entry) = read_dir.next_entry().await.into_diagnostic()? {
let entry_path = entry.path();
if entry_path.is_file()
&& include_set.is_match(&entry_path)
&& !excluded_set.is_match(&entry_path)
{
pages.push(entry_path);
}
}
Ok(pages)
}
#[tracing::instrument(level = "trace")]
fn build_glob_set(globs: &Vec<Glob>) -> GlobSetBuilder {
let mut builder = GlobSetBuilder::new();
globs.iter().fold(&mut builder, |b, g| b.add(g.clone()));
builder
}

@ -0,0 +1,19 @@
use std::collections::HashMap;
use globset::Glob;
use serde::Deserialize;
#[derive(Clone, Debug, Deserialize)]
pub struct IndexData {
/// the default template that is used for rendering
pub default_template: Option<String>,
/// files that are included for rendering
pub include_files: Vec<Glob>,
/// files that are explicitly excluded from rendering
pub excluded_files: Vec<Glob>,
/// File paths with templates used to rendering them
pub templates: HashMap<Glob, String>,
}

@ -0,0 +1,7 @@
mod dir_loader;
mod index;
mod page;
pub use dir_loader::*;
pub use index::*;
pub use page::*;

@ -0,0 +1,12 @@
use serde::Deserialize;
#[derive(Clone, Debug, Deserialize)]
pub struct PageMetadata {
/// template used to render this page
pub template: Option<String>,
/// remaining data of this page
/// passed to the templates when rendering
#[serde(flatten)]
pub data: toml::Value,
}

@ -0,0 +1,7 @@
pub mod args;
pub mod data;
#[tokio::main]
async fn main() {
println!("Hello, world!");
}
Loading…
Cancel
Save