Naive spellbook integration

spellbook
Michael Davis 3 months ago
parent 84fbadbdde
commit 794b2bef34
No known key found for this signature in database

11
Cargo.lock generated

@ -1441,6 +1441,7 @@ dependencies = [
"serde",
"serde_json",
"slotmap",
"spellbook",
"tempfile",
"thiserror",
"tokio",
@ -2167,6 +2168,16 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "spellbook"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82fee930378e69527fbeb7e5f737e3ddaf32e675a04fc9375de676aa8de84693"
dependencies = [
"ahash",
"hashbrown",
]
[[package]]
name = "static_assertions"
version = "1.1.0"

@ -42,6 +42,7 @@ tree-sitter = { version = "0.22" }
nucleo = "0.5.0"
slotmap = "1.0.7"
thiserror = "1.0"
spellbook = "0.1"
[workspace.package]
version = "24.7.0"

@ -592,6 +592,6 @@ fn mtime(path: &Path) -> Result<SystemTime> {
/// Gives the contents of a file from a language's `runtime/queries/<lang>`
/// directory
pub fn load_runtime_file(language: &str, filename: &str) -> Result<String, std::io::Error> {
let path = crate::runtime_file(&PathBuf::new().join("queries").join(language).join(filename));
let path = crate::runtime_file(PathBuf::new().join("queries").join(language).join(filename));
std::fs::read_to_string(path)
}

@ -107,7 +107,8 @@ fn find_runtime_file(rel_path: &Path) -> Option<PathBuf> {
/// The valid runtime directories are searched in priority order and the first
/// file found to exist is returned, otherwise the path to the final attempt
/// that failed.
pub fn runtime_file(rel_path: &Path) -> PathBuf {
pub fn runtime_file<P: AsRef<Path>>(rel_path: P) -> PathBuf {
let rel_path = rel_path.as_ref();
find_runtime_file(rel_path).unwrap_or_else(|| {
RUNTIME_DIRS
.last()
@ -132,6 +133,14 @@ pub fn cache_dir() -> PathBuf {
path
}
pub fn state_dir() -> PathBuf {
let strategy = choose_base_strategy().expect("Unable to find the cache directory!");
// BaseStrategy always return Some in `state_dir`.
let mut path = strategy.state_dir().unwrap();
path.push("helix");
path
}
pub fn config_file() -> PathBuf {
CONFIG_FILE.get().map(|path| path.to_path_buf()).unwrap()
}
@ -152,6 +161,11 @@ pub fn default_log_file() -> PathBuf {
cache_dir().join("helix.log")
}
// HACK: there should a personal dictionary per-locale.
pub fn personal_dictionary_file() -> PathBuf {
state_dir().join("personal-dictionary.txt")
}
/// Merge two TOML documents, merging values from `right` onto `left`
///
/// When an array exists in both `left` and `right`, `right`'s array is

@ -567,6 +567,7 @@ impl MappableCommand {
command_palette, "Open command palette",
goto_word, "Jump to a two-character label",
extend_to_word, "Extend to a two-character label",
add_word_to_personal_dictionary, "Add the word under the primary cursor to the personal dictionary for the current locale",
);
}
@ -6298,3 +6299,61 @@ fn jump_to_word(cx: &mut Context, behaviour: Movement) {
}
jump_to_label(cx, words, behaviour)
}
// HACK: this should be folded into code actions.
fn add_word_to_personal_dictionary(cx: &mut Context) {
let (view, doc) = current_ref!(cx.editor);
let text = doc.text().slice(..);
let selection = doc.selection(view.id).primary();
let range = if selection.len() == 1 {
textobject::textobject_word(text, selection, textobject::TextObject::Inside, 1, false)
} else {
selection
};
let word = range.fragment(text);
let prompt = ui::Prompt::new(
"add-word:".into(),
None,
ui::completers::none,
move |cx, input: &str, event: PromptEvent| {
fn append_word(word: &str) -> std::io::Result<()> {
use std::io::Write;
let path = helix_loader::state_dir().join("personal-dictionary.txt");
let mut file = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(path)?;
file.write_all(word.as_bytes())?;
file.write_all(helix_core::NATIVE_LINE_ENDING.as_str().as_bytes())?;
file.sync_data()?;
Ok(())
}
if event != PromptEvent::Validate {
return;
}
if let Err(err) = cx.editor.dictionary.add(input) {
cx.editor.set_error(format!(
"Failed to add \"{input}\" to the dictionary: {err}"
));
return;
}
if let Err(err) = append_word(input) {
cx.editor.set_error(format!(
"Failed to persist \"{input}\" to the on-disk dictionary: {err}"
));
return;
}
cx.editor
.set_status(format!("Added \"{input}\" to the dictionary"));
},
)
.with_line(word.into(), cx.editor);
cx.push_layer(Box::new(prompt));
}

@ -229,6 +229,7 @@ pub fn default() -> HashMap<Mode, KeyTrie> {
"D" => workspace_diagnostics_picker,
"g" => changed_file_picker,
"a" => code_action,
"A" => add_word_to_personal_dictionary,
"'" => last_picker,
"G" => { "Debug (experimental)" sticky=true
"l" => dap_launch,

@ -21,6 +21,7 @@ use helix_core::{
unicode::width::UnicodeWidthStr,
visual_offset_from_block, Change, Position, Range, Selection, Transaction,
};
use helix_stdx::rope::RopeSliceExt;
use helix_view::{
annotations::diagnostics::DiagnosticFilter,
document::{Mode, SavePoint, SCRATCH_BUFFER_NAME},
@ -28,7 +29,7 @@ use helix_view::{
graphics::{Color, CursorKind, Modifier, Rect, Style},
input::{KeyEvent, MouseButton, MouseEvent, MouseEventKind},
keyboard::{KeyCode, KeyModifiers},
Document, Editor, Theme, View,
Dictionary, Document, Editor, Theme, View,
};
use std::{mem::take, num::NonZeroUsize, path::PathBuf, rc::Rc, sync::Arc};
@ -144,6 +145,10 @@ impl EditorView {
}
overlay_highlights = Box::new(syntax::merge(overlay_highlights, diagnostic));
}
let spell = Self::doc_spell_highlights(&editor.dictionary, doc, view, theme);
if !spell.is_empty() {
overlay_highlights = Box::new(syntax::merge(overlay_highlights, spell));
}
if is_focused {
let highlights = syntax::merge(
@ -460,6 +465,55 @@ impl EditorView {
]
}
pub fn doc_spell_highlights(
dict: &Dictionary,
doc: &Document,
view: &View,
theme: &Theme,
) -> Vec<(usize, std::ops::Range<usize>)> {
// This is **very** ***very*** naive and not at all reflective of what the actual
// integration will look like. Doing this per-render is very needlessly expensive.
// Instead it should be done in the background and possibly incrementally (only
// re-checking ranges that are affected by document changes). However regex-cursor
// is very fast and so is spellbook (degenerate cases max out at 1μs in a release
// build on my machine, i.e. a worst case throughput of 2 million words / second) so
// this is suitable for my testing. I mostly want to find cases where spellbook's
// results are surprising.
// Also we want to use tree-sitter to mark nodes as ones that should be spellchecked
// and maybe specify strategies for doing tokenization (try to tokenize prose vs.
// programming languages).
// Plus these should really be proper diagnostics so that we can pull them up in the
// diagnostics picker and jump to them.
use helix_stdx::rope::Regex;
use once_cell::sync::Lazy;
use std::borrow::Cow;
static WORDS: Lazy<Regex> = Lazy::new(|| Regex::new(r#"[0-9A-Z]*(['-]?[a-z]+)*"#).unwrap());
let mut spans = Vec::new();
let error = theme.find_scope_index("diagnostic.error").unwrap();
let text = doc.text().slice(..);
let start = text.line_to_char(text.char_to_line(doc.view_offset(view.id).anchor));
let end = text.line_to_char(view.estimate_last_doc_line(doc) + 1);
for match_ in WORDS.find_iter(text.regex_input_at(start..end)) {
let range = text.byte_to_char(match_.start())..text.byte_to_char(match_.end());
// TODO: consider how to allow passing the RopeSlice to spellbook:
// * Use an Input trait like regex-cursor?
// * Accept `impl Iterator<Item = char>`?
// * Maybe spellbook should have an internal `String` buffer and it should try to copy
// the word into that? Only in the best case do you not have to allocate at all.
// Maybe we should use a single string buffer and perform all changes to the string
// in-place instead of using `replace` from the stdlib and Cows.
let word = Cow::from(text.slice(range.clone()));
if !dict.check(&word) {
spans.push((error, range))
}
}
spans
}
/// Get highlight spans for selections in a document view.
pub fn doc_selection_highlights(
mode: Mode,

@ -52,6 +52,8 @@ log = "~0.4"
parking_lot = "0.12.3"
thiserror.workspace = true
spellbook.workspace = true
[target.'cfg(windows)'.dependencies]
clipboard-win = { version = "5.4", features = ["std"] }

@ -10,7 +10,7 @@ use crate::{
register::Registers,
theme::{self, Theme},
tree::{self, Tree},
Document, DocumentId, View, ViewId,
Dictionary, Document, DocumentId, View, ViewId,
};
use dap::StackFrame;
use helix_vcs::DiffProviderRegistry;
@ -1078,6 +1078,9 @@ pub struct Editor {
pub mouse_down_range: Option<Range>,
pub cursor_cache: CursorCache,
/// HACK:
pub dictionary: Dictionary,
}
pub type Motion = Box<dyn Fn(&mut Editor)>;
@ -1157,6 +1160,30 @@ impl Editor {
// HAXX: offset the render area height by 1 to account for prompt/commandline
area.height -= 1;
// HACK: what's the right interface for Spellbook to expose so we don't have to
// read these entire files into strings? (See associated TODO in Spellbook.)
let aff =
std::fs::read_to_string(helix_loader::runtime_file("dictionaries/en_US/en_US.aff"))
.unwrap();
let dic =
std::fs::read_to_string(helix_loader::runtime_file("dictionaries/en_US/en_US.dic"))
.unwrap();
// HACK: All this stuff should happen off the main thread.
let mut dictionary = Dictionary::new(&aff, &dic).unwrap();
if let Ok(file) = std::fs::File::open(helix_loader::personal_dictionary_file()) {
use std::io::{BufRead as _, BufReader};
let reader = BufReader::with_capacity(8 * 1024, file);
for line in reader.lines() {
let line = line.unwrap();
let line = line.trim();
if line.is_empty() {
continue;
}
dictionary.add(line).unwrap();
}
}
Self {
mode: Mode::Normal,
tree: Tree::new(area),
@ -1195,6 +1222,7 @@ impl Editor {
handlers,
mouse_down_range: None,
cursor_cache: CursorCache::default(),
dictionary,
}
}

@ -75,5 +75,6 @@ pub fn align_view(doc: &mut Document, view: &View, align: Align) {
pub use document::Document;
pub use editor::Editor;
use helix_core::char_idx_at_visual_offset;
pub use spellbook::Dictionary;
pub use theme::Theme;
pub use view::View;

Loading…
Cancel
Save