diff --git a/Cargo.lock b/Cargo.lock index 7156fc27e..c10757a2f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1441,6 +1441,7 @@ dependencies = [ "serde", "serde_json", "slotmap", + "spellbook", "tempfile", "thiserror", "tokio", @@ -2167,6 +2168,16 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "spellbook" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82fee930378e69527fbeb7e5f737e3ddaf32e675a04fc9375de676aa8de84693" +dependencies = [ + "ahash", + "hashbrown", +] + [[package]] name = "static_assertions" version = "1.1.0" diff --git a/Cargo.toml b/Cargo.toml index 763992480..57885c672 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,7 @@ tree-sitter = { version = "0.22" } nucleo = "0.5.0" slotmap = "1.0.7" thiserror = "1.0" +spellbook = "0.1" [workspace.package] version = "24.7.0" diff --git a/helix-loader/src/grammar.rs b/helix-loader/src/grammar.rs index 99e911544..0ccd173ea 100644 --- a/helix-loader/src/grammar.rs +++ b/helix-loader/src/grammar.rs @@ -592,6 +592,6 @@ fn mtime(path: &Path) -> Result { /// Gives the contents of a file from a language's `runtime/queries/` /// directory pub fn load_runtime_file(language: &str, filename: &str) -> Result { - let path = crate::runtime_file(&PathBuf::new().join("queries").join(language).join(filename)); + let path = crate::runtime_file(PathBuf::new().join("queries").join(language).join(filename)); std::fs::read_to_string(path) } diff --git a/helix-loader/src/lib.rs b/helix-loader/src/lib.rs index f36c76c4f..1a6210529 100644 --- a/helix-loader/src/lib.rs +++ b/helix-loader/src/lib.rs @@ -107,7 +107,8 @@ fn find_runtime_file(rel_path: &Path) -> Option { /// The valid runtime directories are searched in priority order and the first /// file found to exist is returned, otherwise the path to the final attempt /// that failed. -pub fn runtime_file(rel_path: &Path) -> PathBuf { +pub fn runtime_file>(rel_path: P) -> PathBuf { + let rel_path = rel_path.as_ref(); find_runtime_file(rel_path).unwrap_or_else(|| { RUNTIME_DIRS .last() @@ -132,6 +133,14 @@ pub fn cache_dir() -> PathBuf { path } +pub fn state_dir() -> PathBuf { + let strategy = choose_base_strategy().expect("Unable to find the cache directory!"); + // BaseStrategy always return Some in `state_dir`. + let mut path = strategy.state_dir().unwrap(); + path.push("helix"); + path +} + pub fn config_file() -> PathBuf { CONFIG_FILE.get().map(|path| path.to_path_buf()).unwrap() } @@ -152,6 +161,11 @@ pub fn default_log_file() -> PathBuf { cache_dir().join("helix.log") } +// HACK: there should a personal dictionary per-locale. +pub fn personal_dictionary_file() -> PathBuf { + state_dir().join("personal-dictionary.txt") +} + /// Merge two TOML documents, merging values from `right` onto `left` /// /// When an array exists in both `left` and `right`, `right`'s array is diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 6e037a471..d7561bd45 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -567,6 +567,7 @@ impl MappableCommand { command_palette, "Open command palette", goto_word, "Jump to a two-character label", extend_to_word, "Extend to a two-character label", + add_word_to_personal_dictionary, "Add the word under the primary cursor to the personal dictionary for the current locale", ); } @@ -6298,3 +6299,61 @@ fn jump_to_word(cx: &mut Context, behaviour: Movement) { } jump_to_label(cx, words, behaviour) } + +// HACK: this should be folded into code actions. +fn add_word_to_personal_dictionary(cx: &mut Context) { + let (view, doc) = current_ref!(cx.editor); + let text = doc.text().slice(..); + let selection = doc.selection(view.id).primary(); + let range = if selection.len() == 1 { + textobject::textobject_word(text, selection, textobject::TextObject::Inside, 1, false) + } else { + selection + }; + let word = range.fragment(text); + + let prompt = ui::Prompt::new( + "add-word:".into(), + None, + ui::completers::none, + move |cx, input: &str, event: PromptEvent| { + fn append_word(word: &str) -> std::io::Result<()> { + use std::io::Write; + let path = helix_loader::state_dir().join("personal-dictionary.txt"); + let mut file = std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(path)?; + file.write_all(word.as_bytes())?; + file.write_all(helix_core::NATIVE_LINE_ENDING.as_str().as_bytes())?; + file.sync_data()?; + + Ok(()) + } + + if event != PromptEvent::Validate { + return; + } + + if let Err(err) = cx.editor.dictionary.add(input) { + cx.editor.set_error(format!( + "Failed to add \"{input}\" to the dictionary: {err}" + )); + return; + } + + if let Err(err) = append_word(input) { + cx.editor.set_error(format!( + "Failed to persist \"{input}\" to the on-disk dictionary: {err}" + )); + return; + } + + cx.editor + .set_status(format!("Added \"{input}\" to the dictionary")); + }, + ) + .with_line(word.into(), cx.editor); + + cx.push_layer(Box::new(prompt)); +} diff --git a/helix-term/src/keymap/default.rs b/helix-term/src/keymap/default.rs index 5a3e8eed4..7d4e48435 100644 --- a/helix-term/src/keymap/default.rs +++ b/helix-term/src/keymap/default.rs @@ -229,6 +229,7 @@ pub fn default() -> HashMap { "D" => workspace_diagnostics_picker, "g" => changed_file_picker, "a" => code_action, + "A" => add_word_to_personal_dictionary, "'" => last_picker, "G" => { "Debug (experimental)" sticky=true "l" => dap_launch, diff --git a/helix-term/src/ui/editor.rs b/helix-term/src/ui/editor.rs index f7541fe25..5efa773b9 100644 --- a/helix-term/src/ui/editor.rs +++ b/helix-term/src/ui/editor.rs @@ -21,6 +21,7 @@ use helix_core::{ unicode::width::UnicodeWidthStr, visual_offset_from_block, Change, Position, Range, Selection, Transaction, }; +use helix_stdx::rope::RopeSliceExt; use helix_view::{ annotations::diagnostics::DiagnosticFilter, document::{Mode, SavePoint, SCRATCH_BUFFER_NAME}, @@ -28,7 +29,7 @@ use helix_view::{ graphics::{Color, CursorKind, Modifier, Rect, Style}, input::{KeyEvent, MouseButton, MouseEvent, MouseEventKind}, keyboard::{KeyCode, KeyModifiers}, - Document, Editor, Theme, View, + Dictionary, Document, Editor, Theme, View, }; use std::{mem::take, num::NonZeroUsize, path::PathBuf, rc::Rc, sync::Arc}; @@ -144,6 +145,10 @@ impl EditorView { } overlay_highlights = Box::new(syntax::merge(overlay_highlights, diagnostic)); } + let spell = Self::doc_spell_highlights(&editor.dictionary, doc, view, theme); + if !spell.is_empty() { + overlay_highlights = Box::new(syntax::merge(overlay_highlights, spell)); + } if is_focused { let highlights = syntax::merge( @@ -460,6 +465,55 @@ impl EditorView { ] } + pub fn doc_spell_highlights( + dict: &Dictionary, + doc: &Document, + view: &View, + theme: &Theme, + ) -> Vec<(usize, std::ops::Range)> { + // This is **very** ***very*** naive and not at all reflective of what the actual + // integration will look like. Doing this per-render is very needlessly expensive. + // Instead it should be done in the background and possibly incrementally (only + // re-checking ranges that are affected by document changes). However regex-cursor + // is very fast and so is spellbook (degenerate cases max out at 1μs in a release + // build on my machine, i.e. a worst case throughput of 2 million words / second) so + // this is suitable for my testing. I mostly want to find cases where spellbook's + // results are surprising. + // Also we want to use tree-sitter to mark nodes as ones that should be spellchecked + // and maybe specify strategies for doing tokenization (try to tokenize prose vs. + // programming languages). + // Plus these should really be proper diagnostics so that we can pull them up in the + // diagnostics picker and jump to them. + use helix_stdx::rope::Regex; + use once_cell::sync::Lazy; + use std::borrow::Cow; + static WORDS: Lazy = Lazy::new(|| Regex::new(r#"[0-9A-Z]*(['-]?[a-z]+)*"#).unwrap()); + + let mut spans = Vec::new(); + let error = theme.find_scope_index("diagnostic.error").unwrap(); + + let text = doc.text().slice(..); + let start = text.line_to_char(text.char_to_line(doc.view_offset(view.id).anchor)); + let end = text.line_to_char(view.estimate_last_doc_line(doc) + 1); + + for match_ in WORDS.find_iter(text.regex_input_at(start..end)) { + let range = text.byte_to_char(match_.start())..text.byte_to_char(match_.end()); + // TODO: consider how to allow passing the RopeSlice to spellbook: + // * Use an Input trait like regex-cursor? + // * Accept `impl Iterator`? + // * Maybe spellbook should have an internal `String` buffer and it should try to copy + // the word into that? Only in the best case do you not have to allocate at all. + // Maybe we should use a single string buffer and perform all changes to the string + // in-place instead of using `replace` from the stdlib and Cows. + let word = Cow::from(text.slice(range.clone())); + if !dict.check(&word) { + spans.push((error, range)) + } + } + + spans + } + /// Get highlight spans for selections in a document view. pub fn doc_selection_highlights( mode: Mode, diff --git a/helix-view/Cargo.toml b/helix-view/Cargo.toml index ddfa9f7e4..2163f0a6f 100644 --- a/helix-view/Cargo.toml +++ b/helix-view/Cargo.toml @@ -52,6 +52,8 @@ log = "~0.4" parking_lot = "0.12.3" thiserror.workspace = true +spellbook.workspace = true + [target.'cfg(windows)'.dependencies] clipboard-win = { version = "5.4", features = ["std"] } diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index 1708b3b4e..c44a13d97 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -10,7 +10,7 @@ use crate::{ register::Registers, theme::{self, Theme}, tree::{self, Tree}, - Document, DocumentId, View, ViewId, + Dictionary, Document, DocumentId, View, ViewId, }; use dap::StackFrame; use helix_vcs::DiffProviderRegistry; @@ -1078,6 +1078,9 @@ pub struct Editor { pub mouse_down_range: Option, pub cursor_cache: CursorCache, + + /// HACK: + pub dictionary: Dictionary, } pub type Motion = Box; @@ -1157,6 +1160,30 @@ impl Editor { // HAXX: offset the render area height by 1 to account for prompt/commandline area.height -= 1; + // HACK: what's the right interface for Spellbook to expose so we don't have to + // read these entire files into strings? (See associated TODO in Spellbook.) + let aff = + std::fs::read_to_string(helix_loader::runtime_file("dictionaries/en_US/en_US.aff")) + .unwrap(); + let dic = + std::fs::read_to_string(helix_loader::runtime_file("dictionaries/en_US/en_US.dic")) + .unwrap(); + // HACK: All this stuff should happen off the main thread. + let mut dictionary = Dictionary::new(&aff, &dic).unwrap(); + if let Ok(file) = std::fs::File::open(helix_loader::personal_dictionary_file()) { + use std::io::{BufRead as _, BufReader}; + let reader = BufReader::with_capacity(8 * 1024, file); + for line in reader.lines() { + let line = line.unwrap(); + let line = line.trim(); + if line.is_empty() { + continue; + } + + dictionary.add(line).unwrap(); + } + } + Self { mode: Mode::Normal, tree: Tree::new(area), @@ -1195,6 +1222,7 @@ impl Editor { handlers, mouse_down_range: None, cursor_cache: CursorCache::default(), + dictionary, } } diff --git a/helix-view/src/lib.rs b/helix-view/src/lib.rs index d54b49ef5..c57eff189 100644 --- a/helix-view/src/lib.rs +++ b/helix-view/src/lib.rs @@ -75,5 +75,6 @@ pub fn align_view(doc: &mut Document, view: &View, align: Align) { pub use document::Document; pub use editor::Editor; use helix_core::char_idx_at_visual_offset; +pub use spellbook::Dictionary; pub use theme::Theme; pub use view::View;