WIP: The proper Spellbook integration patch

Prior integration patches went about it in the most simple and direct
way, which is unfortunately completely different than how it _should_
look. Some bad prior art:

* Checking was done during each render only on the current viewport
* Dictionary loading was hard-coded and done during `Editor::new`
* The UX for suggestions was not hooked into code actions
* Same for "Add {word} to dictionary"

Ultimately this is still very unbaked. Big parts still to do:

* Run a tree-sitter query to discover parts of the document that need to
  be checked. Look at the queries used in Codebook - I believe we want
  to follow that strategy at least partially. It uses different captures
  to control the strategy used to parse the captured content. (For
  example capturing strings)
* Support multiple dictionaries at once. Not totally sure what this
  looks like yet, other than `dictionaries.iter().any(..)`.
* Figure out how many configuration levers we need. Again, Codebook is
  likely to be good inspiration here.
This commit is contained in:
Michael Davis 2025-03-20 20:52:51 -04:00
parent a8c82ea5e1
commit 67be15e749
No known key found for this signature in database
16 changed files with 443 additions and 7 deletions

12
Cargo.lock generated
View file

@ -1486,6 +1486,7 @@ dependencies = [
"nucleo",
"once_cell",
"open",
"parking_lot",
"pulldown-cmark",
"same-file",
"serde",
@ -1560,6 +1561,7 @@ dependencies = [
"serde",
"serde_json",
"slotmap",
"spellbook",
"tempfile",
"thiserror 2.0.12",
"tokio",
@ -2443,6 +2445,16 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "spellbook"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e3f1f8dbec9f9cb947f7401ac2ad7e50c5e7e53cea31bf5ee223f34277446ca"
dependencies = [
"foldhash",
"hashbrown 0.15.2",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"

View file

@ -47,6 +47,7 @@ unicode-segmentation = "1.2"
ropey = { version = "1.6.1", default-features = false, features = ["simd"] }
foldhash = "0.1"
parking_lot = "0.12"
spellbook = "0.3.1"
[workspace.package]
version = "25.1.1"

View file

@ -21,7 +21,7 @@ use globset::{GlobBuilder, GlobMatcher};
use crate::{
indent::{IndentStyle, MAX_INDENT},
LineEnding,
LineEnding, SpellingLanguage,
};
/// Configuration declared for a path in `.editorconfig` files.
@ -31,7 +31,7 @@ pub struct EditorConfig {
pub tab_width: Option<NonZeroU8>,
pub line_ending: Option<LineEnding>,
pub encoding: Option<&'static Encoding>,
// pub spelling_language: Option<SpellingLanguage>,
pub spelling_language: Option<SpellingLanguage>,
pub trim_trailing_whitespace: Option<bool>,
pub insert_final_newline: Option<bool>,
pub max_line_length: Option<NonZeroU16>,
@ -144,6 +144,7 @@ impl EditorConfig {
"utf-16be" => Some(encoding_rs::UTF_16BE),
_ => None,
});
let spelling_language = pairs.get("spelling_language").and_then(|s| s.parse().ok());
let trim_trailing_whitespace =
pairs
.get("trim_trailing_whitespace")
@ -170,6 +171,7 @@ impl EditorConfig {
tab_width,
line_ending,
encoding,
spelling_language,
trim_trailing_whitespace,
insert_final_newline,
max_line_length,

View file

@ -1,3 +1,5 @@
use std::fmt;
pub use encoding_rs as encoding;
pub mod auto_pairs;
@ -72,3 +74,48 @@ pub use line_ending::{LineEnding, NATIVE_LINE_ENDING};
pub use transaction::{Assoc, Change, ChangeSet, Deletion, Operation, Transaction};
pub use uri::Uri;
/// A language to use for spell checking.
///
/// This is defined in the form `"ab_CD"` where `a`, `b`, `C` and `D` are all ASCII alphanumeric.
/// The first two letters declare the ISO 639 language code and the later two are the ISO 3166
/// territory identifier. The territory identifier is optional, so a language may just be `"ab"`.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct SpellingLanguage([u8; 5]);
impl SpellingLanguage {
pub const EN_US: Self = Self(*b"en_US");
pub fn as_str(&self) -> &str {
// SAFETY: `.0` is all ASCII bytes which is valid UTF-8.
unsafe { std::str::from_utf8_unchecked(&self.0) }
}
}
impl fmt::Display for SpellingLanguage {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
#[derive(Debug)]
pub struct ParseSpellingLanguageError(String);
impl std::str::FromStr for SpellingLanguage {
type Err = ParseSpellingLanguageError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
// TODO: some parsing.
if s.as_bytes() == Self::EN_US.0 {
Ok(Self::EN_US)
} else {
Err(ParseSpellingLanguageError(s.to_owned()))
}
}
}
impl fmt::Display for ParseSpellingLanguageError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "expected ISO639 language code and optional ISO3166 territory code ('ab' or 'ab-CD'), found '{}'", self.0)
}
}

View file

@ -132,6 +132,15 @@ pub fn cache_dir() -> PathBuf {
path
}
pub fn state_dir() -> PathBuf {
let strategy = choose_base_strategy().expect("could not determine XDG strategy");
let mut path = strategy
.state_dir()
.expect("state_dir is always Some for default base strategy");
path.push("helix");
path
}
pub fn config_file() -> PathBuf {
CONFIG_FILE.get().map(|path| path.to_path_buf()).unwrap()
}
@ -152,6 +161,11 @@ pub fn default_log_file() -> PathBuf {
cache_dir().join("helix.log")
}
// TODO: personal dictionary per language.
pub fn personal_dictionary_file() -> PathBuf {
state_dir().join("personal-dictionary.txt")
}
/// Merge two TOML documents, merging values from `right` onto `left`
///
/// When an array exists in both `left` and `right`, `right`'s array is

View file

@ -91,6 +91,8 @@ serde = { version = "1.0", features = ["derive"] }
grep-regex = "0.1.13"
grep-searcher = "0.1.14"
parking_lot.workspace = true
[target.'cfg(not(windows))'.dependencies] # https://github.com/vorner/signal-hook/issues/100
signal-hook-tokio = { version = "0.3", features = ["futures-v0_3"] }
libc = "0.2.171"

View file

@ -18,20 +18,25 @@ mod diagnostics;
mod document_colors;
mod signature_help;
mod snippet;
mod spelling;
pub fn setup(config: Arc<ArcSwap<Config>>) -> Handlers {
events::register();
let event_tx = completion::CompletionHandler::new(config).spawn();
let completion_tx = completion::CompletionHandler::new(config).spawn();
let signature_hints = SignatureHelpHandler::new().spawn();
let auto_save = AutoSaveHandler::new().spawn();
let document_colors = DocumentColorsHandler::default().spawn();
let spelling = helix_view::handlers::spelling::SpellingHandler::new(
spelling::SpellingHandler::default().spawn(),
);
let handlers = Handlers {
completions: helix_view::handlers::completion::CompletionHandler::new(event_tx),
completions: helix_view::handlers::completion::CompletionHandler::new(completion_tx),
signature_hints,
auto_save,
document_colors,
spelling,
};
helix_view::handlers::register_hooks(&handlers);
@ -41,5 +46,6 @@ pub fn setup(config: Arc<ArcSwap<Config>>) -> Handlers {
diagnostics::register_hooks(&handlers);
snippet::register_hooks(&handlers);
document_colors::register_hooks(&handlers);
spelling::register_hooks(&handlers);
handlers
}

View file

@ -0,0 +1,208 @@
use std::{borrow::Cow, collections::HashSet, future::Future, sync::Arc, time::Duration};
use anyhow::Result;
use helix_core::{Rope, SpellingLanguage};
use helix_event::{cancelable_future, register_hook, send_blocking};
use helix_stdx::rope::{Regex, RopeSliceExt as _};
use helix_view::{
diagnostic::DiagnosticProvider,
editor::Severity,
events::{DocumentDidChange, DocumentDidOpen},
handlers::{spelling::SpellingEvent, Handlers},
Diagnostic, Dictionary, DocumentId, Editor,
};
use once_cell::sync::Lazy;
use parking_lot::RwLock;
use tokio::time::Instant;
use crate::job;
const PROVIDER: DiagnosticProvider = DiagnosticProvider::Spelling;
#[derive(Debug, Default)]
pub(super) struct SpellingHandler {
changed_docs: HashSet<DocumentId>,
}
impl helix_event::AsyncHook for SpellingHandler {
type Event = SpellingEvent;
fn handle_event(&mut self, event: Self::Event, timeout: Option<Instant>) -> Option<Instant> {
match event {
SpellingEvent::DictionaryLoaded { language } => {
job::dispatch_blocking(move |editor, _compositor| {
let docs: Vec<_> = editor
.documents
.iter()
.filter_map(|(&doc_id, doc)| {
(doc.spelling_language() == Some(language)).then_some(doc_id)
})
.collect();
for doc in docs {
check_document(editor, doc);
}
});
timeout
}
SpellingEvent::DocumentOpened { doc } => {
job::dispatch_blocking(move |editor, _compositor| {
check_document(editor, doc);
});
timeout
}
SpellingEvent::DocumentChanged { doc } => {
self.changed_docs.insert(doc);
Some(Instant::now() + Duration::from_secs(3))
}
}
}
fn finish_debounce(&mut self) {
let docs = std::mem::take(&mut self.changed_docs);
job::dispatch_blocking(move |editor, _compositor| {
for doc in docs {
check_document(editor, doc);
}
});
}
}
fn check_document(editor: &mut Editor, doc_id: DocumentId) {
let Some(doc) = editor.documents.get(&doc_id) else {
return;
};
let Some(language) = doc.spelling_language() else {
return;
};
let Some(dictionary) = editor.dictionaries.get(&language).cloned() else {
if editor
.handlers
.spelling
.loading_dictionaries
.insert(language)
{
load_dictionary(language);
}
return;
};
let uri = doc.uri();
let future = check_text(dictionary, doc.text().clone());
let cancel = editor.handlers.spelling.open_request(doc_id);
tokio::spawn(async move {
match cancelable_future(future, cancel).await {
Some(Ok(diagnostics)) => {
job::dispatch_blocking(move |editor, _compositor| {
editor.handlers.spelling.requests.remove(&doc_id);
editor.handle_diagnostics(&PROVIDER, uri, None, diagnostics);
});
}
Some(Err(err)) => log::error!("spelling background job failed: {err}"),
None => (),
}
});
}
fn load_dictionary(language: SpellingLanguage) {
tokio::task::spawn_blocking(move || {
let aff = std::fs::read_to_string(helix_loader::runtime_file(format!(
"dictionaries/{language}/{language}.aff"
)))
.unwrap();
let dic = std::fs::read_to_string(helix_loader::runtime_file(format!(
"dictionaries/{language}/{language}.dic"
)))
.unwrap();
let mut dictionary = Dictionary::new(&aff, &dic).unwrap();
// TODO: personal dictionaries should be namespaced under runtime directories under the
// language.
if let Ok(file) = std::fs::File::open(helix_loader::personal_dictionary_file()) {
use std::io::{BufRead as _, BufReader};
let reader = BufReader::with_capacity(8 * 1024, file);
for line in reader.lines() {
let line = line.unwrap();
let line = line.trim();
if line.is_empty() {
continue;
}
dictionary.add(line).unwrap();
}
}
job::dispatch_blocking(move |editor, _compositor| {
let was_removed = editor
.handlers
.spelling
.loading_dictionaries
.remove(&language);
// Other processes should respect that a dictionary is loading and not change
// `loading_dictionaries`. So this should always be true.
debug_assert!(was_removed);
editor
.dictionaries
.insert(language, Arc::new(RwLock::new(dictionary)));
send_blocking(
&editor.handlers.spelling.event_tx,
SpellingEvent::DictionaryLoaded { language },
);
})
});
}
fn check_text(
dictionary: Arc<RwLock<Dictionary>>,
text: Rope,
) -> impl Future<Output = Result<Vec<Diagnostic>, tokio::task::JoinError>> {
tokio::task::spawn_blocking(move || {
static WORDS: Lazy<Regex> = Lazy::new(|| Regex::new(r#"[0-9A-Z]*(['-]?[a-z]+)*"#).unwrap());
let dict = dictionary.read();
let text = text.slice(..);
let mut diagnostics = Vec::new();
for match_ in WORDS.find_iter(text.regex_input()) {
let word = Cow::from(text.byte_slice(match_.range()));
if !dict.check(&word) {
diagnostics.push(Diagnostic {
range: helix_view::Range::Document(helix_stdx::Range {
start: text.byte_to_char(match_.start()),
end: text.byte_to_char(match_.end()),
}),
message: format!("Possible spelling issue '{word}'"),
severity: Some(Severity::Error),
code: None,
provider: PROVIDER,
tags: Default::default(),
source: None,
data: None,
});
}
}
diagnostics
})
}
pub(super) fn register_hooks(handlers: &Handlers) {
let tx = handlers.spelling.event_tx.clone();
register_hook!(move |event: &mut DocumentDidOpen<'_>| {
let doc = doc!(event.editor, &event.doc);
if doc.spelling_language().is_some() {
send_blocking(&tx, SpellingEvent::DocumentOpened { doc: event.doc });
}
Ok(())
});
let tx = handlers.spelling.event_tx.clone();
register_hook!(move |event: &mut DocumentDidChange<'_>| {
if event.doc.spelling_language().is_some() {
send_blocking(
&tx,
SpellingEvent::DocumentChanged {
doc: event.doc.id(),
},
);
}
Ok(())
});
}

View file

@ -51,6 +51,7 @@ log = "~0.4"
parking_lot.workspace = true
thiserror.workspace = true
spellbook.workspace = true
[target.'cfg(windows)'.dependencies]
clipboard-win = { version = "5.4", features = ["std"] }

View file

@ -214,6 +214,7 @@ impl Editor {
}
.boxed()
})
.chain(self.spelling_actions())
.collect();
if futures.is_empty() {

View file

@ -44,14 +44,14 @@ pub enum DiagnosticProvider {
/// not clear the pull diagnostics and vice-versa.
identifier: Option<Arc<str>>,
},
// Future internal features can go here...
Spelling,
}
impl DiagnosticProvider {
pub fn language_server_id(&self) -> Option<LanguageServerId> {
match self {
Self::Lsp { server_id, .. } => Some(*server_id),
// _ => None,
_ => None,
}
}
}

View file

@ -1706,6 +1706,10 @@ impl Document {
current_revision
}
pub fn spelling_language(&self) -> Option<helix_core::SpellingLanguage> {
Some(helix_core::SpellingLanguage::EN_US)
}
/// Corresponding language scope name. Usually `source.<lang>`.
pub fn language_scope(&self) -> Option<&str> {
self.language

View file

@ -23,6 +23,7 @@ use helix_vcs::DiffProviderRegistry;
use futures_util::stream::select_all::SelectAll;
use futures_util::{future, StreamExt};
use helix_lsp::{Call, LanguageServerId};
use parking_lot::RwLock;
use tokio_stream::wrappers::UnboundedReceiverStream;
use std::{
@ -47,7 +48,7 @@ pub use helix_core::diagnostic::Severity;
use helix_core::{
auto_pairs::AutoPairs,
syntax::{self, AutoPairConfig, IndentationHeuristic, LanguageServerFeature, SoftWrap},
Change, LineEnding, Position, Range, Selection, Uri, NATIVE_LINE_ENDING,
Change, LineEnding, Position, Range, Selection, SpellingLanguage, Uri, NATIVE_LINE_ENDING,
};
use helix_dap as dap;
use helix_stdx::path::canonicalize;
@ -1118,8 +1119,12 @@ pub struct Editor {
pub mouse_down_range: Option<Range>,
pub cursor_cache: CursorCache,
pub dictionaries: Dictionaries,
}
type Dictionaries = HashMap<SpellingLanguage, Arc<RwLock<spellbook::Dictionary>>>;
pub type Motion = Box<dyn Fn(&mut Editor)>;
#[derive(Debug)]
@ -1240,6 +1245,7 @@ impl Editor {
handlers,
mouse_down_range: None,
cursor_cache: CursorCache::default(),
dictionaries: HashMap::new(),
}
}

View file

@ -1,5 +1,6 @@
use completion::{CompletionEvent, CompletionHandler};
use helix_event::send_blocking;
use spelling::SpellingHandler;
use tokio::sync::mpsc::Sender;
use crate::handlers::lsp::SignatureHelpInvoked;
@ -9,6 +10,7 @@ pub mod completion;
pub mod dap;
pub mod diagnostics;
pub mod lsp;
pub mod spelling;
#[derive(Debug)]
pub enum AutoSaveEvent {
@ -22,6 +24,7 @@ pub struct Handlers {
pub signature_hints: Sender<lsp::SignatureHelpEvent>,
pub auto_save: Sender<AutoSaveEvent>,
pub document_colors: Sender<lsp::DocumentColorsEvent>,
pub spelling: SpellingHandler,
}
impl Handlers {

View file

@ -0,0 +1,128 @@
use std::{
borrow::Cow,
collections::{HashMap, HashSet},
};
use futures_util::{future::BoxFuture, FutureExt as _};
use helix_core::{SpellingLanguage, Tendril, Transaction};
use helix_event::{TaskController, TaskHandle};
use tokio::sync::mpsc::Sender;
use crate::{diagnostic::DiagnosticProvider, Action, DocumentId, Editor};
const ACTION_PRIORITY: u8 = 0;
#[derive(Debug)]
pub struct SpellingHandler {
pub event_tx: Sender<SpellingEvent>,
pub requests: HashMap<DocumentId, TaskController>,
pub loading_dictionaries: HashSet<SpellingLanguage>,
}
impl SpellingHandler {
pub fn new(event_tx: Sender<SpellingEvent>) -> Self {
Self {
event_tx,
requests: Default::default(),
loading_dictionaries: Default::default(),
}
}
pub fn open_request(&mut self, document: DocumentId) -> TaskHandle {
let mut controller = TaskController::new();
let handle = controller.restart();
self.requests.insert(document, controller);
handle
}
}
#[derive(Debug)]
pub enum SpellingEvent {
/*
DictionaryUpdated {
word: String,
language: SpellingLanguage,
},
*/
DictionaryLoaded { language: SpellingLanguage },
DocumentOpened { doc: DocumentId },
DocumentChanged { doc: DocumentId },
}
impl Editor {
pub(crate) fn spelling_actions(
&self,
) -> Option<BoxFuture<'static, anyhow::Result<Vec<Action>>>> {
let (view, doc) = current_ref!(self);
let doc_id = doc.id();
let view_id = view.id;
let language = doc.spelling_language()?;
// TODO: consider fixes for all selections?
let range = doc.selection(view_id).primary();
let text = doc.text().clone();
let dictionary = self.dictionaries.get(&language)?.clone();
// TODO: can do this faster with partition_point + take_while
let selected_diagnostics: Vec<_> = doc
.diagnostics()
.iter()
.filter(|d| {
range.overlaps(&helix_core::Range::new(d.range.start, d.range.end))
&& d.inner.provider == DiagnosticProvider::Spelling
})
.map(|d| d.range)
.collect();
let future = tokio::task::spawn_blocking(move || {
let text = text.slice(..);
let dictionary = dictionary.read();
let mut suggest_buffer = Vec::new();
selected_diagnostics
.into_iter()
.flat_map(|range| {
suggest_buffer.clear();
let word = Cow::from(text.slice(range.start..range.end));
dictionary.suggest(&word, &mut suggest_buffer);
let mut actions = Vec::with_capacity(suggest_buffer.len() + 1);
actions.extend(
suggest_buffer.drain(..).map(|suggestion| {
Action::new(
format!("Replace '{word}' with '{suggestion}'"),
ACTION_PRIORITY,
move |editor| {
let doc = doc_mut!(editor, &doc_id);
let view = view_mut!(editor, view_id);
let transaction = Transaction::change(
doc.text(),
[(range.start, range.end, Some(Tendril::from(suggestion.as_str())))].into_iter(),
);
doc.apply(&transaction, view_id);
doc.append_changes_to_history(view);
// TODO: get rid of the diagnostic for this word.
},
)
})
);
let word = word.to_string();
actions.push(Action::new(
format!("Add '{word}' to dictionary '{language}'"),
ACTION_PRIORITY,
move |editor| {
let Some(dictionary) = editor.dictionaries.get(&language) else {
log::error!("Failed to add '{word}' to dictionary '{language}' because the dictionary does not exist");
return;
};
// TODO: fire an event?
let mut dictionary = dictionary.write();
if let Err(err) = dictionary.add(&word) {
log::error!("Failed to add '{word}' to dictionary '{language}': {err}");
}
}
));
actions
})
.collect()
});
Some(async move { Ok(future.await?) }.boxed())
}
}

View file

@ -71,5 +71,6 @@ pub use document::Document;
pub use editor::Editor;
use helix_core::char_idx_at_visual_offset;
pub use helix_core::uri::DocumentId;
pub use spellbook::Dictionary;
pub use theme::Theme;
pub use view::View;