From 4fc991fdeca5db36bd7be7197510e62a019e1677 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Wed, 16 Feb 2022 07:57:20 -0600 Subject: [PATCH] migrate grammar fetching/building code into helix-loader crate This is a rather large refactor that moves most of the code for loading, fetching, and building grammars into a new helix-loader module. This works well with the [[grammars]] syntax for languages.toml defined earlier: we only have to depend on the types for GrammarConfiguration in helix-loader and can leave all the [[language]] entries for helix-core. --- Cargo.lock | 19 +- Cargo.toml | 1 + book/src/guides/adding_languages.md | 10 +- docs/architecture.md | 16 +- helix-core/Cargo.toml | 6 +- helix-core/src/config.rs | 27 +- helix-core/src/indent.rs | 2 - helix-core/src/lib.rs | 141 ----------- helix-core/src/path.rs | 5 +- helix-core/src/syntax.rs | 91 +------ helix-loader/Cargo.toml | 23 ++ helix-loader/build.rs | 6 + .../src/grammar.rs | 230 ++++++++++++------ helix-loader/src/lib.rs | 161 ++++++++++++ helix-term/Cargo.toml | 1 + helix-term/build.rs | 5 - helix-term/src/application.rs | 6 +- helix-term/src/commands/typed.rs | 2 +- helix-term/src/health.rs | 14 +- helix-term/src/lib.rs | 1 - helix-term/src/main.rs | 10 +- helix-term/src/ui/mod.rs | 4 +- languages.toml | 12 +- 23 files changed, 419 insertions(+), 374 deletions(-) create mode 100644 helix-loader/Cargo.toml create mode 100644 helix-loader/build.rs rename helix-term/src/grammars.rs => helix-loader/src/grammar.rs (58%) create mode 100644 helix-loader/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 469a58624..de4b2279a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -358,12 +358,11 @@ dependencies = [ name = "helix-core" version = "0.6.0" dependencies = [ - "anyhow", "arc-swap", "chrono", "encoding_rs", "etcetera", - "libloading", + "helix-loader", "log", "once_cell", "quickcheck", @@ -397,6 +396,21 @@ dependencies = [ "which", ] +[[package]] +name = "helix-loader" +version = "0.6.0" +dependencies = [ + "anyhow", + "cc", + "etcetera", + "libloading", + "once_cell", + "serde", + "threadpool", + "toml", + "tree-sitter", +] + [[package]] name = "helix-lsp" version = "0.6.0" @@ -432,6 +446,7 @@ dependencies = [ "grep-searcher", "helix-core", "helix-dap", + "helix-loader", "helix-lsp", "helix-tui", "helix-view", diff --git a/Cargo.toml b/Cargo.toml index 0847e6ba4..780811f78 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "helix-tui", "helix-lsp", "helix-dap", + "helix-loader", "xtask", ] diff --git a/book/src/guides/adding_languages.md b/book/src/guides/adding_languages.md index 51256288f..e000770c6 100644 --- a/book/src/guides/adding_languages.md +++ b/book/src/guides/adding_languages.md @@ -51,16 +51,16 @@ Grammar configuration takes these keys: | Key | Description | | --- | ----------- | | `name` | The name of the tree-sitter grammar | -| `path` | A path within the grammar directory which should be built. Some grammar repositories host multiple grammars (for example `tree-sitter-typescript` and `tree-sitter-ocaml`) in subdirectories. This key is used to point `hx --build-grammars` to the correct path for compilation. When ommitted, the root of the grammar directory is used | | `source` | The method of fetching the grammar - a table with a schema defined below | Where `source` is a table with either these keys when using a grammar from a git repository: -| Key | Description | -| --- | ----------- | -| `git` | A git remote URL from which the grammar should be cloned | -| `rev` | The revision (commit hash or tag) which should be fetched | +| Key | Description | +| --- | ----------- | +| `git` | A git remote URL from which the grammar should be cloned | +| `rev` | The revision (commit hash or tag) which should be fetched | +| `subpath` | A path within the grammar directory which should be built. Some grammar repositories host multiple grammars (for example `tree-sitter-typescript` and `tree-sitter-ocaml`) in subdirectories. This key is used to point `hx --build-grammars` to the correct path for compilation. When omitted, the root of repository is used | Or a `path` key with an absolute path to a locally available grammar directory. diff --git a/docs/architecture.md b/docs/architecture.md index 3c743eca2..33624aac2 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -1,11 +1,13 @@ -| Crate | Description | -| ----------- | ----------- | -| helix-core | Core editing primitives, functional. | -| helix-lsp | Language server client | -| helix-view | UI abstractions for use in backends, imperative shell. | -| helix-term | Terminal UI | -| helix-tui | TUI primitives, forked from tui-rs, inspired by Cursive | +| Crate | Description | +| ----------- | ----------- | +| helix-core | Core editing primitives, functional. | +| helix-lsp | Language server client | +| helix-dap | Debug Adapter Protocol (DAP) client | +| helix-loader | Functions for building, fetching, and loading external resources | +| helix-view | UI abstractions for use in backends, imperative shell. | +| helix-term | Terminal UI | +| helix-tui | TUI primitives, forked from tui-rs, inspired by Cursive | This document contains a high-level overview of Helix internals. diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml index 5582d38b4..8152da574 100644 --- a/helix-core/Cargo.toml +++ b/helix-core/Cargo.toml @@ -13,6 +13,8 @@ include = ["src/**/*", "README.md"] [features] [dependencies] +helix-loader = { version = "0.6", path = "../helix-loader" } + ropey = "1.3" smallvec = "1.8" smartstring = "1.0.0" @@ -33,13 +35,11 @@ toml = "0.5" similar = "2.1" -etcetera = "0.3" encoding_rs = "0.8" chrono = { version = "0.4", default-features = false, features = ["alloc", "std"] } -libloading = "0.7" -anyhow = "1" +etcetera = "0.3" [dev-dependencies] quickcheck = { version = "1", default-features = false } diff --git a/helix-core/src/config.rs b/helix-core/src/config.rs index d4ebee1f8..f399850e6 100644 --- a/helix-core/src/config.rs +++ b/helix-core/src/config.rs @@ -1,33 +1,10 @@ -use crate::merge_toml_values; - -/// Default bultin-in languages.toml. -pub fn default_lang_config() -> toml::Value { - toml::from_slice(include_bytes!("../../languages.toml")) - .expect("Could not parse bultin-in languages.toml to valid toml") -} - -/// User configured languages.toml file, merged with the default config. -pub fn user_lang_config() -> Result { - let def_lang_conf = default_lang_config(); - let data = std::fs::read(crate::config_dir().join("languages.toml")); - let user_lang_conf = match data { - Ok(raw) => { - let value = toml::from_slice(&raw)?; - merge_toml_values(def_lang_conf, value) - } - Err(_) => def_lang_conf, - }; - - Ok(user_lang_conf) -} - /// Syntax configuration loader based on built-in languages.toml. pub fn default_syntax_loader() -> crate::syntax::Configuration { - default_lang_config() + helix_loader::default_lang_config() .try_into() .expect("Could not serialize built-in languages.toml") } /// Syntax configuration loader based on user configured languages.toml. pub fn user_syntax_loader() -> Result { - user_lang_config()?.try_into() + helix_loader::user_lang_config()?.try_into() } diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index ee9cbb165..30f4a3405 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -444,8 +444,6 @@ where debugger: None, auto_pairs: None, }], - grammar: vec![], - grammar_selection: None, }); // set runtime path so we can find the queries diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index c3a349c12..1f43c2667 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -33,9 +33,6 @@ pub mod unicode { pub use unicode_width as width; } -static RUNTIME_DIR: once_cell::sync::Lazy = - once_cell::sync::Lazy::new(runtime_dir); - pub fn find_first_non_whitespace_char(line: RopeSlice) -> Option { line.chars().position(|ch| !ch.is_whitespace()) } @@ -85,144 +82,6 @@ pub fn find_root(root: Option<&str>, root_markers: &[String]) -> Option std::path::PathBuf { - if let Ok(dir) = std::env::var("HELIX_RUNTIME") { - return dir.into(); - } - - const RT_DIR: &str = "runtime"; - let conf_dir = config_dir().join(RT_DIR); - if conf_dir.exists() { - return conf_dir; - } - - if let Ok(dir) = std::env::var("CARGO_MANIFEST_DIR") { - // this is the directory of the crate being run by cargo, we need the workspace path so we take the parent - return std::path::PathBuf::from(dir).parent().unwrap().join(RT_DIR); - } - - // fallback to location of the executable being run - std::env::current_exe() - .ok() - .and_then(|path| path.parent().map(|path| path.to_path_buf().join(RT_DIR))) - .unwrap() -} - -pub fn config_dir() -> std::path::PathBuf { - // TODO: allow env var override - let strategy = choose_base_strategy().expect("Unable to find the config directory!"); - let mut path = strategy.config_dir(); - path.push("helix"); - path -} - -pub fn cache_dir() -> std::path::PathBuf { - // TODO: allow env var override - let strategy = choose_base_strategy().expect("Unable to find the config directory!"); - let mut path = strategy.cache_dir(); - path.push("helix"); - path -} - -pub fn config_file() -> std::path::PathBuf { - config_dir().join("config.toml") -} - -pub fn lang_config_file() -> std::path::PathBuf { - config_dir().join("languages.toml") -} - -pub fn log_file() -> std::path::PathBuf { - cache_dir().join("helix.log") -} - -// right overrides left -pub fn merge_toml_values(left: toml::Value, right: toml::Value) -> toml::Value { - use toml::Value; - - fn get_name(v: &Value) -> Option<&str> { - v.get("name").and_then(Value::as_str) - } - - match (left, right) { - (Value::Array(mut left_items), Value::Array(right_items)) => { - left_items.reserve(right_items.len()); - for rvalue in right_items { - let lvalue = get_name(&rvalue) - .and_then(|rname| left_items.iter().position(|v| get_name(v) == Some(rname))) - .map(|lpos| left_items.remove(lpos)); - let mvalue = match lvalue { - Some(lvalue) => merge_toml_values(lvalue, rvalue), - None => rvalue, - }; - left_items.push(mvalue); - } - Value::Array(left_items) - } - (Value::Table(mut left_map), Value::Table(right_map)) => { - for (rname, rvalue) in right_map { - match left_map.remove(&rname) { - Some(lvalue) => { - let merged_value = merge_toml_values(lvalue, rvalue); - left_map.insert(rname, merged_value); - } - None => { - left_map.insert(rname, rvalue); - } - } - } - Value::Table(left_map) - } - // Catch everything else we didn't handle, and use the right value - (_, value) => value, - } -} - -#[cfg(test)] -mod merge_toml_tests { - use super::merge_toml_values; - - #[test] - fn language_tomls() { - use toml::Value; - - const USER: &str = " - [[language]] - name = \"nix\" - test = \"bbb\" - indent = { tab-width = 4, unit = \" \", test = \"aaa\" } - "; - - let base: Value = toml::from_slice(include_bytes!("../../languages.toml")) - .expect("Couldn't parse built-in languages config"); - let user: Value = toml::from_str(USER).unwrap(); - - let merged = merge_toml_values(base, user); - let languages = merged.get("language").unwrap().as_array().unwrap(); - let nix = languages - .iter() - .find(|v| v.get("name").unwrap().as_str().unwrap() == "nix") - .unwrap(); - let nix_indent = nix.get("indent").unwrap(); - - // We changed tab-width and unit in indent so check them if they are the new values - assert_eq!( - nix_indent.get("tab-width").unwrap().as_integer().unwrap(), - 4 - ); - assert_eq!(nix_indent.get("unit").unwrap().as_str().unwrap(), " "); - // We added a new keys, so check them - assert_eq!(nix.get("test").unwrap().as_str().unwrap(), "bbb"); - assert_eq!(nix_indent.get("test").unwrap().as_str().unwrap(), "aaa"); - // We didn't change comment-token so it should be same - assert_eq!(nix.get("comment-token").unwrap().as_str().unwrap(), "#"); - } -} - -pub use etcetera::home_dir; - -use etcetera::base_strategy::{choose_base_strategy, BaseStrategy}; - pub use ropey::{Rope, RopeBuilder, RopeSlice}; // pub use tendril::StrTendril as Tendril; diff --git a/helix-core/src/path.rs b/helix-core/src/path.rs index a66444651..e0c3bef65 100644 --- a/helix-core/src/path.rs +++ b/helix-core/src/path.rs @@ -1,9 +1,10 @@ +use etcetera::home_dir; use std::path::{Component, Path, PathBuf}; /// Replaces users home directory from `path` with tilde `~` if the directory /// is available, otherwise returns the path unchanged. pub fn fold_home_dir(path: &Path) -> PathBuf { - if let Ok(home) = super::home_dir() { + if let Ok(home) = home_dir() { if path.starts_with(&home) { // it's ok to unwrap, the path starts with home dir return PathBuf::from("~").join(path.strip_prefix(&home).unwrap()); @@ -20,7 +21,7 @@ pub fn expand_tilde(path: &Path) -> PathBuf { let mut components = path.components().peekable(); if let Some(Component::Normal(c)) = components.peek() { if c == &"~" { - if let Ok(home) = super::home_dir() { + if let Ok(home) = home_dir() { // it's ok to unwrap, the path starts with `~` return home.join(path.strip_prefix("~").unwrap()); } diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 28aa31f99..6ae46d4ff 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -7,10 +7,6 @@ use crate::{ Rope, RopeSlice, Tendril, }; -use anyhow::{Context, Result}; -use libloading::{Library, Symbol}; -use tree_sitter::Language; - use arc_swap::{ArcSwap, Guard}; use slotmap::{DefaultKey as LayerId, HopSlotMap}; @@ -27,33 +23,7 @@ use std::{ use once_cell::sync::{Lazy, OnceCell}; use serde::{Deserialize, Serialize}; -#[cfg(unix)] -pub const DYLIB_EXTENSION: &str = "so"; - -#[cfg(windows)] -pub const DYLIB_EXTENSION: &str = "dll"; - -fn replace_dashes_with_underscores(name: &str) -> String { - name.replace('-', "_") -} - -pub fn get_language(runtime_path: &std::path::Path, name: &str) -> Result { - let name = name.to_ascii_lowercase(); - let mut library_path = runtime_path.join("grammars").join(&name); - library_path.set_extension(DYLIB_EXTENSION); - - let library = unsafe { Library::new(&library_path) } - .with_context(|| format!("Error opening dynamic library {:?}", &library_path))?; - let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(&name)); - let language = unsafe { - let language_fn: Symbol Language> = library - .get(language_fn_name.as_bytes()) - .with_context(|| format!("Failed to load symbol {}", language_fn_name))?; - language_fn() - }; - std::mem::forget(library); - Ok(language) -} +use helix_loader::grammar::{get_language, load_runtime_file}; fn deserialize_regex<'de, D>(deserializer: D) -> Result, D::Error> where @@ -81,19 +51,8 @@ where } #[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case", deny_unknown_fields)] pub struct Configuration { - #[serde(rename = "use-grammars")] - pub grammar_selection: Option, pub language: Vec, - pub grammar: Vec, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "lowercase", untagged)] -pub enum GrammarSelection { - Only(HashSet), - Except(HashSet), } // largely based on tree-sitter/cli/src/loader.rs @@ -279,29 +238,6 @@ pub struct IndentQuery { pub outdent: HashSet, } -#[derive(Debug, Serialize, Deserialize)] -pub struct GrammarConfiguration { - #[serde(rename = "name")] - pub grammar_id: String, // c-sharp, rust - pub source: GrammarSource, - pub path: Option, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "lowercase")] -#[serde(untagged)] -pub enum GrammarSource { - Local { - path: String, - }, - Git { - #[serde(rename = "git")] - remote: String, - #[serde(rename = "rev")] - revision: String, - }, -} - #[derive(Debug)] pub struct TextObjectQuery { pub query: Query, @@ -398,14 +334,6 @@ impl TextObjectQuery { } } -pub fn load_runtime_file(language: &str, filename: &str) -> Result { - let path = crate::RUNTIME_DIR - .join("queries") - .join(language) - .join(filename); - std::fs::read_to_string(&path) -} - fn read_query(language: &str, filename: &str) -> String { static INHERITS_REGEX: Lazy = Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()]+)\s*").unwrap()); @@ -451,12 +379,9 @@ impl LanguageConfiguration { if highlights_query.is_empty() { None } else { - let language = get_language( - &crate::RUNTIME_DIR, - self.grammar.as_deref().unwrap_or(&self.language_id), - ) - .map_err(|e| log::info!("{}", e)) - .ok()?; + let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id)) + .map_err(|e| log::info!("{}", e)) + .ok()?; let config = HighlightConfiguration::new( language, &highlights_query, @@ -2116,13 +2041,9 @@ mod test { .map(String::from) .collect(); - let loader = Loader::new(Configuration { - language: vec![], - grammar: vec![], - grammar_selection: None, - }); + let loader = Loader::new(Configuration { language: vec![] }); - let language = get_language(&crate::RUNTIME_DIR, "Rust").unwrap(); + let language = get_language("Rust").unwrap(); let config = HighlightConfiguration::new( language, &std::fs::read_to_string("../runtime/grammars/sources/rust/queries/highlights.scm") diff --git a/helix-loader/Cargo.toml b/helix-loader/Cargo.toml new file mode 100644 index 000000000..21b37333a --- /dev/null +++ b/helix-loader/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "helix-loader" +version = "0.6.0" +description = "A post-modern text editor." +authors = ["Blaž Hrastnik "] +edition = "2021" +license = "MPL-2.0" +categories = ["editor"] +repository = "https://github.com/helix-editor/helix" +homepage = "https://helix-editor.com" + +[dependencies] +anyhow = "1" +serde = { version = "1.0", features = ["derive"] } +toml = "0.5" +etcetera = "0.3" +tree-sitter = "0.20" +libloading = "0.7" +once_cell = "1.9" + +# cloning/compiling tree-sitter grammars +cc = { version = "1" } +threadpool = { version = "1.0" } diff --git a/helix-loader/build.rs b/helix-loader/build.rs new file mode 100644 index 000000000..e0ebd1c48 --- /dev/null +++ b/helix-loader/build.rs @@ -0,0 +1,6 @@ +fn main() { + println!( + "cargo:rustc-env=BUILD_TARGET={}", + std::env::var("TARGET").unwrap() + ); +} diff --git a/helix-term/src/grammars.rs b/helix-loader/src/grammar.rs similarity index 58% rename from helix-term/src/grammars.rs rename to helix-loader/src/grammar.rs index 2e0be4bcb..61ef464fc 100644 --- a/helix-term/src/grammars.rs +++ b/helix-loader/src/grammar.rs @@ -1,63 +1,155 @@ use anyhow::{anyhow, Context, Result}; +use libloading::{Library, Symbol}; +use serde::{Deserialize, Serialize}; use std::fs; use std::time::SystemTime; use std::{ + collections::HashSet, path::{Path, PathBuf}, process::Command, sync::mpsc::channel, }; +use tree_sitter::Language; -use helix_core::syntax::{GrammarConfiguration, GrammarSelection, GrammarSource, DYLIB_EXTENSION}; +#[cfg(unix)] +const DYLIB_EXTENSION: &str = "so"; + +#[cfg(windows)] +const DYLIB_EXTENSION: &str = "dll"; + +#[derive(Debug, Serialize, Deserialize)] +struct Configuration { + #[serde(rename = "use-grammars")] + pub grammar_selection: Option, + pub grammar: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "lowercase", untagged)] +pub enum GrammarSelection { + Only(HashSet), + Except(HashSet), +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct GrammarConfiguration { + #[serde(rename = "name")] + pub grammar_id: String, + pub source: GrammarSource, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "lowercase", untagged)] +pub enum GrammarSource { + Local { + path: String, + }, + Git { + #[serde(rename = "git")] + remote: String, + #[serde(rename = "rev")] + revision: String, + subpath: Option, + }, +} const BUILD_TARGET: &str = env!("BUILD_TARGET"); const REMOTE_NAME: &str = "origin"; +pub fn get_language(name: &str) -> Result { + let name = name.to_ascii_lowercase(); + let mut library_path = crate::runtime_dir().join("grammars").join(&name); + library_path.set_extension(DYLIB_EXTENSION); + + let library = unsafe { Library::new(&library_path) } + .with_context(|| format!("Error opening dynamic library {library_path:?}"))?; + let language_fn_name = format!("tree_sitter_{}", name.replace('-', "_")); + let language = unsafe { + let language_fn: Symbol Language> = library + .get(language_fn_name.as_bytes()) + .with_context(|| format!("Failed to load symbol {language_fn_name}"))?; + language_fn() + }; + std::mem::forget(library); + Ok(language) +} + pub fn fetch_grammars() -> Result<()> { - run_parallel(get_grammar_configs(), fetch_grammar, "fetch") + run_parallel(get_grammar_configs()?, fetch_grammar, "fetch") } pub fn build_grammars() -> Result<()> { - run_parallel(get_grammar_configs(), build_grammar, "build") + run_parallel(get_grammar_configs()?, build_grammar, "build") +} + +// Returns the set of grammar configurations the user requests. +// Grammars are configured in the default and user `languages.toml` and are +// merged. The `grammar_selection` key of the config is then used to filter +// down all grammars into a subset of the user's choosing. +fn get_grammar_configs() -> Result> { + let config: Configuration = crate::user_lang_config() + .context("Could not parse languages.toml")? + .try_into()?; + + let grammars = match config.grammar_selection { + Some(GrammarSelection::Only(selections)) => config + .grammar + .into_iter() + .filter(|grammar| selections.contains(&grammar.grammar_id)) + .collect(), + Some(GrammarSelection::Except(rejections)) => config + .grammar + .into_iter() + .filter(|grammar| !rejections.contains(&grammar.grammar_id)) + .collect(), + None => config.grammar, + }; + + Ok(grammars) } fn run_parallel(grammars: Vec, job: F, action: &'static str) -> Result<()> where F: Fn(GrammarConfiguration) -> Result<()> + std::marker::Send + 'static + Copy, { - let mut n_jobs = 0; let pool = threadpool::Builder::new().build(); let (tx, rx) = channel(); for grammar in grammars { let tx = tx.clone(); - n_jobs += 1; pool.execute(move || { - let grammar_id = grammar.grammar_id.clone(); - job(grammar).unwrap_or_else(|err| { - eprintln!("Failed to {} grammar '{}'\n{}", action, grammar_id, err) - }); - - // report progress - tx.send(1).unwrap(); + tx.send(job(grammar)).unwrap(); }); } pool.join(); - if rx.try_iter().sum::() == n_jobs { - Ok(()) + // TODO: print all failures instead of the first one found. + if let Some(failure) = rx.try_iter().find_map(|result| result.err()) { + Err(anyhow!( + "Failed to {} some grammar(s).\n{}", + action, + failure + )) } else { - Err(anyhow!("Failed to {} some grammar(s).", action)) + Ok(()) } } fn fetch_grammar(grammar: GrammarConfiguration) -> Result<()> { - if let GrammarSource::Git { remote, revision } = grammar.source { - let grammar_dir = helix_core::runtime_dir() + if let GrammarSource::Git { + remote, revision, .. + } = grammar.source + { + let grammar_dir = crate::runtime_dir() .join("grammars/sources") - .join(grammar.grammar_id.clone()); + .join(&grammar.grammar_id); - fs::create_dir_all(grammar_dir.clone()).expect("Could not create grammar directory"); + fs::create_dir_all(&grammar_dir).context(format!( + "Could not create grammar directory {:?}", + grammar_dir + ))?; // create the grammar dir contains a git directory if !grammar_dir.join(".git").is_dir() { @@ -65,12 +157,12 @@ fn fetch_grammar(grammar: GrammarConfiguration) -> Result<()> { } // ensure the remote matches the configured remote - if get_remote_url(&grammar_dir).map_or(true, |s| s.trim_end() != remote) { + if get_remote_url(&grammar_dir).map_or(true, |s| s != remote) { set_remote(&grammar_dir, &remote)?; } // ensure the revision matches the configured revision - if get_revision(&grammar_dir).map_or(true, |s| s.trim_end() != revision) { + if get_revision(&grammar_dir).map_or(true, |s| s != revision) { // Fetch the exact revision from the remote. // Supported by server-side git since v2.5.0 (July 2015), // enabled by default on major git hosts. @@ -94,33 +186,38 @@ fn fetch_grammar(grammar: GrammarConfiguration) -> Result<()> { // Sets the remote for a repository to the given URL, creating the remote if // it does not yet exist. -fn set_remote(repository: &Path, remote_url: &str) -> Result { - git(repository, ["remote", "set-url", REMOTE_NAME, remote_url]) - .or_else(|_| git(repository, ["remote", "add", REMOTE_NAME, remote_url])) +fn set_remote(repository_dir: &Path, remote_url: &str) -> Result { + git( + repository_dir, + ["remote", "set-url", REMOTE_NAME, remote_url], + ) + .or_else(|_| git(repository_dir, ["remote", "add", REMOTE_NAME, remote_url])) } -fn get_remote_url(repository: &Path) -> Option { - git(repository, ["remote", "get-url", REMOTE_NAME]).ok() +fn get_remote_url(repository_dir: &Path) -> Option { + git(repository_dir, ["remote", "get-url", REMOTE_NAME]).ok() } -fn get_revision(repository: &Path) -> Option { - git(repository, ["rev-parse", "HEAD"]).ok() +fn get_revision(repository_dir: &Path) -> Option { + git(repository_dir, ["rev-parse", "HEAD"]).ok() } // A wrapper around 'git' commands which returns stdout in success and a // helpful error message showing the command, stdout, and stderr in error. -fn git(repository: &Path, args: I) -> Result +fn git(repository_dir: &Path, args: I) -> Result where I: IntoIterator, S: AsRef, { let output = Command::new("git") .args(args) - .current_dir(repository) + .current_dir(repository_dir) .output()?; if output.status.success() { - Ok(String::from_utf8_lossy(&output.stdout).into_owned()) + Ok(String::from_utf8_lossy(&output.stdout) + .trim_end() + .to_owned()) } else { // TODO: figure out how to display the git command using `args` Err(anyhow!( @@ -132,52 +229,37 @@ where } fn build_grammar(grammar: GrammarConfiguration) -> Result<()> { - let grammar_dir = if let GrammarSource::Local { ref path } = grammar.source { - PathBuf::from(path) + println!("{:#?}", grammar); + let grammar_dir = if let GrammarSource::Local { path } = &grammar.source { + PathBuf::from(&path) } else { - helix_core::runtime_dir() + crate::runtime_dir() .join("grammars/sources") - .join(grammar.grammar_id.clone()) + .join(&grammar.grammar_id) }; - grammar_dir.read_dir().with_context(|| { - format!( - "The directory {:?} is empty, you probably need to use 'hx --fetch-grammars'?", - grammar_dir - ) + let grammar_dir_entries = grammar_dir.read_dir().with_context(|| { + format!("Failed to read directory {grammar_dir:?}. Did you use 'hx --fetch-grammars'?") })?; - let path = match grammar.path { - Some(ref subpath) => grammar_dir.join(subpath), - None => grammar_dir, + if grammar_dir_entries.count() == 0 { + return Err(anyhow!( + "Directory {grammar_dir:?} is empty. Did you use 'hx --fetch-grammars'?" + )); + }; + + let path = match &grammar.source { + GrammarSource::Git { + subpath: Some(subpath), + .. + } => grammar_dir.join(subpath), + _ => grammar_dir, } .join("src"); build_tree_sitter_library(&path, grammar) } -// Returns the set of grammar configurations the user requests. -// Grammars are configured in the default and user `languages.toml` and are -// merged. The `grammar_selection` key of the config is then used to filter -// down all grammars into a subset of the user's choosing. -fn get_grammar_configs() -> Vec { - let config = helix_core::config::user_syntax_loader().expect("Could not parse languages.toml"); - - match config.grammar_selection { - Some(GrammarSelection::Only(selections)) => config - .grammar - .into_iter() - .filter(|grammar| selections.contains(&grammar.grammar_id)) - .collect(), - Some(GrammarSelection::Except(rejections)) => config - .grammar - .into_iter() - .filter(|grammar| !rejections.contains(&grammar.grammar_id)) - .collect(), - None => config.grammar, - } -} - fn build_tree_sitter_library(src_path: &Path, grammar: GrammarConfiguration) -> Result<()> { let header_path = src_path; let parser_path = src_path.join("parser.c"); @@ -193,8 +275,8 @@ fn build_tree_sitter_library(src_path: &Path, grammar: GrammarConfiguration) -> None } }; - let parser_lib_path = helix_core::runtime_dir().join("../runtime/grammars"); - let mut library_path = parser_lib_path.join(grammar.grammar_id.clone()); + let parser_lib_path = crate::runtime_dir().join("grammars"); + let mut library_path = parser_lib_path.join(&grammar.grammar_id); library_path.set_extension(DYLIB_EXTENSION); let recompile = needs_recompile(&library_path, &parser_path, &scanner_path) @@ -210,7 +292,7 @@ fn build_tree_sitter_library(src_path: &Path, grammar: GrammarConfiguration) -> let mut config = cc::Build::new(); config .cpp(true) - .opt_level(2) + .opt_level(3) .cargo_metadata(false) .host(BUILD_TARGET) .target(BUILD_TARGET); @@ -245,7 +327,7 @@ fn build_tree_sitter_library(src_path: &Path, grammar: GrammarConfiguration) -> .arg(header_path) .arg("-o") .arg(&library_path) - .arg("-O2"); + .arg("-O3"); if let Some(scanner_path) = scanner_path.as_ref() { if scanner_path.extension() == Some("c".as_ref()) { command.arg("-xc").arg("-std=c99").arg(scanner_path); @@ -294,3 +376,13 @@ fn needs_recompile( fn mtime(path: &Path) -> Result { Ok(fs::metadata(path)?.modified()?) } + +/// Gives the contents of a file from a language's `runtime/queries/` +/// directory +pub fn load_runtime_file(language: &str, filename: &str) -> Result { + let path = crate::RUNTIME_DIR + .join("queries") + .join(language) + .join(filename); + std::fs::read_to_string(&path) +} diff --git a/helix-loader/src/lib.rs b/helix-loader/src/lib.rs new file mode 100644 index 000000000..a2c4d96f0 --- /dev/null +++ b/helix-loader/src/lib.rs @@ -0,0 +1,161 @@ +pub mod grammar; + +use etcetera::base_strategy::{choose_base_strategy, BaseStrategy}; + +pub static RUNTIME_DIR: once_cell::sync::Lazy = + once_cell::sync::Lazy::new(runtime_dir); + +pub fn runtime_dir() -> std::path::PathBuf { + if let Ok(dir) = std::env::var("HELIX_RUNTIME") { + return dir.into(); + } + + const RT_DIR: &str = "runtime"; + let conf_dir = config_dir().join(RT_DIR); + if conf_dir.exists() { + return conf_dir; + } + + if let Ok(dir) = std::env::var("CARGO_MANIFEST_DIR") { + // this is the directory of the crate being run by cargo, we need the workspace path so we take the parent + return std::path::PathBuf::from(dir).parent().unwrap().join(RT_DIR); + } + + // fallback to location of the executable being run + std::env::current_exe() + .ok() + .and_then(|path| path.parent().map(|path| path.to_path_buf().join(RT_DIR))) + .unwrap() +} + +pub fn config_dir() -> std::path::PathBuf { + // TODO: allow env var override + let strategy = choose_base_strategy().expect("Unable to find the config directory!"); + let mut path = strategy.config_dir(); + path.push("helix"); + path +} + +pub fn cache_dir() -> std::path::PathBuf { + // TODO: allow env var override + let strategy = choose_base_strategy().expect("Unable to find the config directory!"); + let mut path = strategy.cache_dir(); + path.push("helix"); + path +} + +pub fn config_file() -> std::path::PathBuf { + config_dir().join("config.toml") +} + +pub fn lang_config_file() -> std::path::PathBuf { + config_dir().join("languages.toml") +} + +pub fn log_file() -> std::path::PathBuf { + cache_dir().join("helix.log") +} + +/// Default bultin-in languages.toml. +pub fn default_lang_config() -> toml::Value { + toml::from_slice(include_bytes!("../../languages.toml")) + .expect("Could not parse bultin-in languages.toml to valid toml") +} + +/// User configured languages.toml file, merged with the default config. +pub fn user_lang_config() -> Result { + let def_lang_conf = default_lang_config(); + let data = std::fs::read(crate::config_dir().join("languages.toml")); + let user_lang_conf = match data { + Ok(raw) => { + let value = toml::from_slice(&raw)?; + merge_toml_values(def_lang_conf, value) + } + Err(_) => def_lang_conf, + }; + + Ok(user_lang_conf) +} + +// right overrides left +pub fn merge_toml_values(left: toml::Value, right: toml::Value) -> toml::Value { + use toml::Value; + + fn get_name(v: &Value) -> Option<&str> { + v.get("name").and_then(Value::as_str) + } + + match (left, right) { + (Value::Array(mut left_items), Value::Array(right_items)) => { + left_items.reserve(right_items.len()); + for rvalue in right_items { + let lvalue = get_name(&rvalue) + .and_then(|rname| left_items.iter().position(|v| get_name(v) == Some(rname))) + .map(|lpos| left_items.remove(lpos)); + let mvalue = match lvalue { + Some(lvalue) => merge_toml_values(lvalue, rvalue), + None => rvalue, + }; + left_items.push(mvalue); + } + Value::Array(left_items) + } + (Value::Table(mut left_map), Value::Table(right_map)) => { + for (rname, rvalue) in right_map { + match left_map.remove(&rname) { + Some(lvalue) => { + let merged_value = merge_toml_values(lvalue, rvalue); + left_map.insert(rname, merged_value); + } + None => { + left_map.insert(rname, rvalue); + } + } + } + Value::Table(left_map) + } + // Catch everything else we didn't handle, and use the right value + (_, value) => value, + } +} + +#[cfg(test)] +mod merge_toml_tests { + use super::merge_toml_values; + + #[test] + fn language_tomls() { + use toml::Value; + + const USER: &str = " + [[language]] + name = \"nix\" + test = \"bbb\" + indent = { tab-width = 4, unit = \" \", test = \"aaa\" } + "; + + let base: Value = toml::from_slice(include_bytes!("../../languages.toml")) + .expect("Couldn't parse built-in languages config"); + let user: Value = toml::from_str(USER).unwrap(); + + let merged = merge_toml_values(base, user); + let languages = merged.get("language").unwrap().as_array().unwrap(); + let nix = languages + .iter() + .find(|v| v.get("name").unwrap().as_str().unwrap() == "nix") + .unwrap(); + let nix_indent = nix.get("indent").unwrap(); + + // We changed tab-width and unit in indent so check them if they are the new values + assert_eq!( + nix_indent.get("tab-width").unwrap().as_integer().unwrap(), + 4 + ); + assert_eq!(nix_indent.get("unit").unwrap().as_str().unwrap(), " "); + // We added a new keys, so check them + assert_eq!(nix.get("test").unwrap().as_str().unwrap(), "bbb"); + assert_eq!(nix_indent.get("test").unwrap().as_str().unwrap(), "aaa"); + // We didn't change comment-token so it should be same + assert_eq!(nix.get("comment-token").unwrap().as_str().unwrap(), "#"); + } +} diff --git a/helix-term/Cargo.toml b/helix-term/Cargo.toml index 93d50d7e3..86d72561a 100644 --- a/helix-term/Cargo.toml +++ b/helix-term/Cargo.toml @@ -26,6 +26,7 @@ helix-core = { version = "0.6", path = "../helix-core" } helix-view = { version = "0.6", path = "../helix-view" } helix-lsp = { version = "0.6", path = "../helix-lsp" } helix-dap = { version = "0.6", path = "../helix-dap" } +helix-loader = { version = "0.6", path = "../helix-loader" } anyhow = "1" once_cell = "1.10" diff --git a/helix-term/build.rs b/helix-term/build.rs index 7303041cd..b5d62b285 100644 --- a/helix-term/build.rs +++ b/helix-term/build.rs @@ -14,10 +14,5 @@ fn main() { None => env!("CARGO_PKG_VERSION").into(), }; - println!( - "cargo:rustc-env=BUILD_TARGET={}", - std::env::var("TARGET").unwrap() - ); - println!("cargo:rustc-env=VERSION_AND_GIT_HASH={}", version); } diff --git a/helix-term/src/application.rs b/helix-term/src/application.rs index e885bc49b..269ce13d1 100644 --- a/helix-term/src/application.rs +++ b/helix-term/src/application.rs @@ -61,10 +61,10 @@ impl Application { let mut compositor = Compositor::new()?; let size = compositor.size(); - let conf_dir = helix_core::config_dir(); + let conf_dir = helix_loader::config_dir(); let theme_loader = - std::sync::Arc::new(theme::Loader::new(&conf_dir, &helix_core::runtime_dir())); + std::sync::Arc::new(theme::Loader::new(&conf_dir, &helix_loader::runtime_dir())); let true_color = config.editor.true_color || crate::true_color(); let theme = config @@ -109,7 +109,7 @@ impl Application { compositor.push(editor_view); if args.load_tutor { - let path = helix_core::runtime_dir().join("tutor.txt"); + let path = helix_loader::runtime_dir().join("tutor.txt"); editor.open(path, Action::VerticalSplit)?; // Unset path to prevent accidentally saving to the original tutor file. doc_mut!(editor).set_path(None)?; diff --git a/helix-term/src/commands/typed.rs b/helix-term/src/commands/typed.rs index 4cc996d65..3301d1486 100644 --- a/helix-term/src/commands/typed.rs +++ b/helix-term/src/commands/typed.rs @@ -828,7 +828,7 @@ fn tutor( _args: &[Cow], _event: PromptEvent, ) -> anyhow::Result<()> { - let path = helix_core::runtime_dir().join("tutor.txt"); + let path = helix_loader::runtime_dir().join("tutor.txt"); cx.editor.open(path, Action::Replace)?; // Unset path to prevent accidentally saving to the original tutor file. doc_mut!(cx.editor).set_path(None)?; diff --git a/helix-term/src/health.rs b/helix-term/src/health.rs index 5ef20d934..f13d35f09 100644 --- a/helix-term/src/health.rs +++ b/helix-term/src/health.rs @@ -1,8 +1,6 @@ use crossterm::style::{Color, Print, Stylize}; -use helix_core::{ - config::{default_syntax_loader, user_syntax_loader}, - syntax::load_runtime_file, -}; +use helix_core::config::{default_syntax_loader, user_syntax_loader}; +use helix_loader::grammar::load_runtime_file; #[derive(Copy, Clone)] pub enum TsFeature { @@ -43,10 +41,10 @@ impl TsFeature { /// Display general diagnostics. pub fn general() { - let config_file = helix_core::config_file(); - let lang_file = helix_core::lang_config_file(); - let log_file = helix_core::log_file(); - let rt_dir = helix_core::runtime_dir(); + let config_file = helix_loader::config_file(); + let lang_file = helix_loader::lang_config_file(); + let log_file = helix_loader::log_file(); + let rt_dir = helix_loader::runtime_dir(); if config_file.exists() { println!("Config file: {}", config_file.display()); diff --git a/helix-term/src/lib.rs b/helix-term/src/lib.rs index 227479988..fc8e934e1 100644 --- a/helix-term/src/lib.rs +++ b/helix-term/src/lib.rs @@ -7,7 +7,6 @@ pub mod commands; pub mod compositor; pub mod config; pub mod health; -pub mod grammars; pub mod job; pub mod keymap; pub mod ui; diff --git a/helix-term/src/main.rs b/helix-term/src/main.rs index 67f88b4e1..a69e121b1 100644 --- a/helix-term/src/main.rs +++ b/helix-term/src/main.rs @@ -40,7 +40,7 @@ fn main() -> Result<()> { #[tokio::main] async fn main_impl() -> Result { - let logpath = helix_core::log_file(); + let logpath = helix_loader::log_file(); let parent = logpath.parent().unwrap(); if !parent.exists() { std::fs::create_dir_all(parent).ok(); @@ -105,21 +105,21 @@ FLAGS: } if args.fetch_grammars { - helix_term::grammars::fetch_grammars()?; + helix_loader::grammar::fetch_grammars()?; return Ok(0); } if args.build_grammars { - helix_term::grammars::build_grammars()?; + helix_loader::grammar::build_grammars()?; return Ok(0); } - let conf_dir = helix_core::config_dir(); + let conf_dir = helix_loader::config_dir(); if !conf_dir.exists() { std::fs::create_dir_all(&conf_dir).ok(); } - let config = match std::fs::read_to_string(helix_core::config_file()) { + let config = match std::fs::read_to_string(helix_loader::config_file()) { Ok(config) => toml::from_str(&config) .map(merge_keys) .unwrap_or_else(|err| { diff --git a/helix-term/src/ui/mod.rs b/helix-term/src/ui/mod.rs index d46de2d39..6299a4739 100644 --- a/helix-term/src/ui/mod.rs +++ b/helix-term/src/ui/mod.rs @@ -218,9 +218,9 @@ pub mod completers { } pub fn theme(_editor: &Editor, input: &str) -> Vec { - let mut names = theme::Loader::read_names(&helix_core::runtime_dir().join("themes")); + let mut names = theme::Loader::read_names(&helix_loader::runtime_dir().join("themes")); names.extend(theme::Loader::read_names( - &helix_core::config_dir().join("themes"), + &helix_loader::config_dir().join("themes"), )); names.push("default".into()); names.push("base16_default".into()); diff --git a/languages.toml b/languages.toml index 18b45ea60..181820112 100644 --- a/languages.toml +++ b/languages.toml @@ -302,8 +302,7 @@ indent = { tab-width = 2, unit = " " } [[grammar]] name = "typescript" -source = { git = "https://github.com/tree-sitter/tree-sitter-typescript", rev = "3e897ea5925f037cfae2e551f8e6b12eec2a201a" } -path = "typescript" +source = { git = "https://github.com/tree-sitter/tree-sitter-typescript", rev = "3e897ea5925f037cfae2e551f8e6b12eec2a201a", subpath = "typescript" } [[language]] name = "tsx" @@ -317,8 +316,7 @@ indent = { tab-width = 2, unit = " " } [[grammar]] name = "tsx" -source = { git = "https://github.com/tree-sitter/tree-sitter-typescript", rev = "3e897ea5925f037cfae2e551f8e6b12eec2a201a" } -path = "tsx" +source = { git = "https://github.com/tree-sitter/tree-sitter-typescript", rev = "3e897ea5925f037cfae2e551f8e6b12eec2a201a", subpath = "tsx" } [[language]] name = "css" @@ -522,8 +520,7 @@ indent = { tab-width = 2, unit = " " } [[grammar]] name = "ocaml" -source = { git = "https://github.com/tree-sitter/tree-sitter-ocaml", rev = "23d419ba45789c5a47d31448061557716b02750a" } -path = "ocaml" +source = { git = "https://github.com/tree-sitter/tree-sitter-ocaml", rev = "23d419ba45789c5a47d31448061557716b02750a", subpath = "ocaml" } [[language]] name = "ocaml-interface" @@ -536,8 +533,7 @@ indent = { tab-width = 2, unit = " "} [[grammar]] name = "ocaml-interface" -source = { git = "https://github.com/tree-sitter/tree-sitter-ocaml", rev = "23d419ba45789c5a47d31448061557716b02750a" } -path = "interface" +source = { git = "https://github.com/tree-sitter/tree-sitter-ocaml", rev = "23d419ba45789c5a47d31448061557716b02750a", subpath = "interface" } [[language]] name = "lua"