diff --git a/Cargo.lock b/Cargo.lock index b0e34b3f5..265ca217c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1486,6 +1486,7 @@ dependencies = [ "nucleo", "once_cell", "open", + "parking_lot", "pulldown-cmark", "same-file", "serde", @@ -1560,6 +1561,7 @@ dependencies = [ "serde", "serde_json", "slotmap", + "spellbook", "tempfile", "thiserror 2.0.12", "tokio", @@ -2443,6 +2445,16 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "spellbook" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e3f1f8dbec9f9cb947f7401ac2ad7e50c5e7e53cea31bf5ee223f34277446ca" +dependencies = [ + "foldhash", + "hashbrown 0.15.2", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" diff --git a/Cargo.toml b/Cargo.toml index 4b9e8fea6..e7fb42030 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,7 @@ unicode-segmentation = "1.2" ropey = { version = "1.6.1", default-features = false, features = ["simd"] } foldhash = "0.1" parking_lot = "0.12" +spellbook = "0.3.1" [workspace.package] version = "25.1.1" diff --git a/helix-core/src/editor_config.rs b/helix-core/src/editor_config.rs index 714f577c5..f1ba195f0 100644 --- a/helix-core/src/editor_config.rs +++ b/helix-core/src/editor_config.rs @@ -21,7 +21,7 @@ use globset::{GlobBuilder, GlobMatcher}; use crate::{ indent::{IndentStyle, MAX_INDENT}, - LineEnding, + LineEnding, SpellingLanguage, }; /// Configuration declared for a path in `.editorconfig` files. @@ -31,7 +31,7 @@ pub struct EditorConfig { pub tab_width: Option, pub line_ending: Option, pub encoding: Option<&'static Encoding>, - // pub spelling_language: Option, + pub spelling_language: Option, pub trim_trailing_whitespace: Option, pub insert_final_newline: Option, pub max_line_length: Option, @@ -144,6 +144,7 @@ impl EditorConfig { "utf-16be" => Some(encoding_rs::UTF_16BE), _ => None, }); + let spelling_language = pairs.get("spelling_language").and_then(|s| s.parse().ok()); let trim_trailing_whitespace = pairs .get("trim_trailing_whitespace") @@ -170,6 +171,7 @@ impl EditorConfig { tab_width, line_ending, encoding, + spelling_language, trim_trailing_whitespace, insert_final_newline, max_line_length, diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 22ec1d653..81c03bca6 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -1,3 +1,5 @@ +use std::fmt; + pub use encoding_rs as encoding; pub mod auto_pairs; @@ -72,3 +74,48 @@ pub use line_ending::{LineEnding, NATIVE_LINE_ENDING}; pub use transaction::{Assoc, Change, ChangeSet, Deletion, Operation, Transaction}; pub use uri::Uri; + +/// A language to use for spell checking. +/// +/// This is defined in the form `"ab_CD"` where `a`, `b`, `C` and `D` are all ASCII alphanumeric. +/// The first two letters declare the ISO 639 language code and the later two are the ISO 3166 +/// territory identifier. The territory identifier is optional, so a language may just be `"ab"`. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SpellingLanguage([u8; 5]); + +impl SpellingLanguage { + pub const EN_US: Self = Self(*b"en_US"); + + pub fn as_str(&self) -> &str { + // SAFETY: `.0` is all ASCII bytes which is valid UTF-8. + unsafe { std::str::from_utf8_unchecked(&self.0) } + } +} + +impl fmt::Display for SpellingLanguage { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.as_str()) + } +} + +#[derive(Debug)] +pub struct ParseSpellingLanguageError(String); + +impl std::str::FromStr for SpellingLanguage { + type Err = ParseSpellingLanguageError; + + fn from_str(s: &str) -> Result { + // TODO: some parsing. + if s.as_bytes() == Self::EN_US.0 { + Ok(Self::EN_US) + } else { + Err(ParseSpellingLanguageError(s.to_owned())) + } + } +} + +impl fmt::Display for ParseSpellingLanguageError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "expected ISO639 language code and optional ISO3166 territory code ('ab' or 'ab-CD'), found '{}'", self.0) + } +} diff --git a/helix-loader/src/lib.rs b/helix-loader/src/lib.rs index ae9ffe550..5ceefe39e 100644 --- a/helix-loader/src/lib.rs +++ b/helix-loader/src/lib.rs @@ -132,6 +132,15 @@ pub fn cache_dir() -> PathBuf { path } +pub fn state_dir() -> PathBuf { + let strategy = choose_base_strategy().expect("could not determine XDG strategy"); + let mut path = strategy + .state_dir() + .expect("state_dir is always Some for default base strategy"); + path.push("helix"); + path +} + pub fn config_file() -> PathBuf { CONFIG_FILE.get().map(|path| path.to_path_buf()).unwrap() } @@ -152,6 +161,11 @@ pub fn default_log_file() -> PathBuf { cache_dir().join("helix.log") } +// TODO: personal dictionary per language. +pub fn personal_dictionary_file() -> PathBuf { + state_dir().join("personal-dictionary.txt") +} + /// Merge two TOML documents, merging values from `right` onto `left` /// /// When an array exists in both `left` and `right`, `right`'s array is diff --git a/helix-term/Cargo.toml b/helix-term/Cargo.toml index 9ea2d4589..aa46658f2 100644 --- a/helix-term/Cargo.toml +++ b/helix-term/Cargo.toml @@ -91,6 +91,8 @@ serde = { version = "1.0", features = ["derive"] } grep-regex = "0.1.13" grep-searcher = "0.1.14" +parking_lot.workspace = true + [target.'cfg(not(windows))'.dependencies] # https://github.com/vorner/signal-hook/issues/100 signal-hook-tokio = { version = "0.3", features = ["futures-v0_3"] } libc = "0.2.171" diff --git a/helix-term/src/handlers.rs b/helix-term/src/handlers.rs index c7d71526c..1d0d2551a 100644 --- a/helix-term/src/handlers.rs +++ b/helix-term/src/handlers.rs @@ -18,20 +18,25 @@ mod diagnostics; mod document_colors; mod signature_help; mod snippet; +mod spelling; pub fn setup(config: Arc>) -> Handlers { events::register(); - let event_tx = completion::CompletionHandler::new(config).spawn(); + let completion_tx = completion::CompletionHandler::new(config).spawn(); let signature_hints = SignatureHelpHandler::new().spawn(); let auto_save = AutoSaveHandler::new().spawn(); let document_colors = DocumentColorsHandler::default().spawn(); + let spelling = helix_view::handlers::spelling::SpellingHandler::new( + spelling::SpellingHandler::default().spawn(), + ); let handlers = Handlers { - completions: helix_view::handlers::completion::CompletionHandler::new(event_tx), + completions: helix_view::handlers::completion::CompletionHandler::new(completion_tx), signature_hints, auto_save, document_colors, + spelling, }; helix_view::handlers::register_hooks(&handlers); @@ -41,5 +46,6 @@ pub fn setup(config: Arc>) -> Handlers { diagnostics::register_hooks(&handlers); snippet::register_hooks(&handlers); document_colors::register_hooks(&handlers); + spelling::register_hooks(&handlers); handlers } diff --git a/helix-term/src/handlers/spelling.rs b/helix-term/src/handlers/spelling.rs new file mode 100644 index 000000000..5957b3666 --- /dev/null +++ b/helix-term/src/handlers/spelling.rs @@ -0,0 +1,208 @@ +use std::{borrow::Cow, collections::HashSet, future::Future, sync::Arc, time::Duration}; + +use anyhow::Result; +use helix_core::{Rope, SpellingLanguage}; +use helix_event::{cancelable_future, register_hook, send_blocking}; +use helix_stdx::rope::{Regex, RopeSliceExt as _}; +use helix_view::{ + diagnostic::DiagnosticProvider, + editor::Severity, + events::{DocumentDidChange, DocumentDidOpen}, + handlers::{spelling::SpellingEvent, Handlers}, + Diagnostic, Dictionary, DocumentId, Editor, +}; +use once_cell::sync::Lazy; +use parking_lot::RwLock; +use tokio::time::Instant; + +use crate::job; + +const PROVIDER: DiagnosticProvider = DiagnosticProvider::Spelling; + +#[derive(Debug, Default)] +pub(super) struct SpellingHandler { + changed_docs: HashSet, +} + +impl helix_event::AsyncHook for SpellingHandler { + type Event = SpellingEvent; + + fn handle_event(&mut self, event: Self::Event, timeout: Option) -> Option { + match event { + SpellingEvent::DictionaryLoaded { language } => { + job::dispatch_blocking(move |editor, _compositor| { + let docs: Vec<_> = editor + .documents + .iter() + .filter_map(|(&doc_id, doc)| { + (doc.spelling_language() == Some(language)).then_some(doc_id) + }) + .collect(); + for doc in docs { + check_document(editor, doc); + } + }); + timeout + } + SpellingEvent::DocumentOpened { doc } => { + job::dispatch_blocking(move |editor, _compositor| { + check_document(editor, doc); + }); + timeout + } + SpellingEvent::DocumentChanged { doc } => { + self.changed_docs.insert(doc); + Some(Instant::now() + Duration::from_secs(3)) + } + } + } + + fn finish_debounce(&mut self) { + let docs = std::mem::take(&mut self.changed_docs); + job::dispatch_blocking(move |editor, _compositor| { + for doc in docs { + check_document(editor, doc); + } + }); + } +} + +fn check_document(editor: &mut Editor, doc_id: DocumentId) { + let Some(doc) = editor.documents.get(&doc_id) else { + return; + }; + let Some(language) = doc.spelling_language() else { + return; + }; + let Some(dictionary) = editor.dictionaries.get(&language).cloned() else { + if editor + .handlers + .spelling + .loading_dictionaries + .insert(language) + { + load_dictionary(language); + } + return; + }; + + let uri = doc.uri(); + let future = check_text(dictionary, doc.text().clone()); + let cancel = editor.handlers.spelling.open_request(doc_id); + + tokio::spawn(async move { + match cancelable_future(future, cancel).await { + Some(Ok(diagnostics)) => { + job::dispatch_blocking(move |editor, _compositor| { + editor.handlers.spelling.requests.remove(&doc_id); + editor.handle_diagnostics(&PROVIDER, uri, None, diagnostics); + }); + } + Some(Err(err)) => log::error!("spelling background job failed: {err}"), + None => (), + } + }); +} + +fn load_dictionary(language: SpellingLanguage) { + tokio::task::spawn_blocking(move || { + let aff = std::fs::read_to_string(helix_loader::runtime_file(format!( + "dictionaries/{language}/{language}.aff" + ))) + .unwrap(); + let dic = std::fs::read_to_string(helix_loader::runtime_file(format!( + "dictionaries/{language}/{language}.dic" + ))) + .unwrap(); + + let mut dictionary = Dictionary::new(&aff, &dic).unwrap(); + // TODO: personal dictionaries should be namespaced under runtime directories under the + // language. + if let Ok(file) = std::fs::File::open(helix_loader::personal_dictionary_file()) { + use std::io::{BufRead as _, BufReader}; + let reader = BufReader::with_capacity(8 * 1024, file); + for line in reader.lines() { + let line = line.unwrap(); + let line = line.trim(); + if line.is_empty() { + continue; + } + dictionary.add(line).unwrap(); + } + } + + job::dispatch_blocking(move |editor, _compositor| { + let was_removed = editor + .handlers + .spelling + .loading_dictionaries + .remove(&language); + // Other processes should respect that a dictionary is loading and not change + // `loading_dictionaries`. So this should always be true. + debug_assert!(was_removed); + editor + .dictionaries + .insert(language, Arc::new(RwLock::new(dictionary))); + send_blocking( + &editor.handlers.spelling.event_tx, + SpellingEvent::DictionaryLoaded { language }, + ); + }) + }); +} + +fn check_text( + dictionary: Arc>, + text: Rope, +) -> impl Future, tokio::task::JoinError>> { + tokio::task::spawn_blocking(move || { + static WORDS: Lazy = Lazy::new(|| Regex::new(r#"[0-9A-Z]*(['-]?[a-z]+)*"#).unwrap()); + + let dict = dictionary.read(); + let text = text.slice(..); + let mut diagnostics = Vec::new(); + for match_ in WORDS.find_iter(text.regex_input()) { + let word = Cow::from(text.byte_slice(match_.range())); + if !dict.check(&word) { + diagnostics.push(Diagnostic { + range: helix_view::Range::Document(helix_stdx::Range { + start: text.byte_to_char(match_.start()), + end: text.byte_to_char(match_.end()), + }), + message: format!("Possible spelling issue '{word}'"), + severity: Some(Severity::Error), + code: None, + provider: PROVIDER, + tags: Default::default(), + source: None, + data: None, + }); + } + } + diagnostics + }) +} + +pub(super) fn register_hooks(handlers: &Handlers) { + let tx = handlers.spelling.event_tx.clone(); + register_hook!(move |event: &mut DocumentDidOpen<'_>| { + let doc = doc!(event.editor, &event.doc); + if doc.spelling_language().is_some() { + send_blocking(&tx, SpellingEvent::DocumentOpened { doc: event.doc }); + } + Ok(()) + }); + + let tx = handlers.spelling.event_tx.clone(); + register_hook!(move |event: &mut DocumentDidChange<'_>| { + if event.doc.spelling_language().is_some() { + send_blocking( + &tx, + SpellingEvent::DocumentChanged { + doc: event.doc.id(), + }, + ); + } + Ok(()) + }); +} diff --git a/helix-view/Cargo.toml b/helix-view/Cargo.toml index bcee1a0a7..e538d4e06 100644 --- a/helix-view/Cargo.toml +++ b/helix-view/Cargo.toml @@ -51,6 +51,7 @@ log = "~0.4" parking_lot.workspace = true thiserror.workspace = true +spellbook.workspace = true [target.'cfg(windows)'.dependencies] clipboard-win = { version = "5.4", features = ["std"] } diff --git a/helix-view/src/action.rs b/helix-view/src/action.rs index 5fc57bfc4..49352fbb0 100644 --- a/helix-view/src/action.rs +++ b/helix-view/src/action.rs @@ -214,6 +214,7 @@ impl Editor { } .boxed() }) + .chain(self.spelling_actions()) .collect(); if futures.is_empty() { diff --git a/helix-view/src/diagnostic.rs b/helix-view/src/diagnostic.rs index 414eb2023..9674713d9 100644 --- a/helix-view/src/diagnostic.rs +++ b/helix-view/src/diagnostic.rs @@ -44,14 +44,14 @@ pub enum DiagnosticProvider { /// not clear the pull diagnostics and vice-versa. identifier: Option>, }, - // Future internal features can go here... + Spelling, } impl DiagnosticProvider { pub fn language_server_id(&self) -> Option { match self { Self::Lsp { server_id, .. } => Some(*server_id), - // _ => None, + _ => None, } } } diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 6ea31cd9f..3430f1c1f 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -1706,6 +1706,10 @@ impl Document { current_revision } + pub fn spelling_language(&self) -> Option { + Some(helix_core::SpellingLanguage::EN_US) + } + /// Corresponding language scope name. Usually `source.`. pub fn language_scope(&self) -> Option<&str> { self.language diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index 3822aaf32..016094e9d 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -23,6 +23,7 @@ use helix_vcs::DiffProviderRegistry; use futures_util::stream::select_all::SelectAll; use futures_util::{future, StreamExt}; use helix_lsp::{Call, LanguageServerId}; +use parking_lot::RwLock; use tokio_stream::wrappers::UnboundedReceiverStream; use std::{ @@ -47,7 +48,7 @@ pub use helix_core::diagnostic::Severity; use helix_core::{ auto_pairs::AutoPairs, syntax::{self, AutoPairConfig, IndentationHeuristic, LanguageServerFeature, SoftWrap}, - Change, LineEnding, Position, Range, Selection, Uri, NATIVE_LINE_ENDING, + Change, LineEnding, Position, Range, Selection, SpellingLanguage, Uri, NATIVE_LINE_ENDING, }; use helix_dap as dap; use helix_stdx::path::canonicalize; @@ -1118,8 +1119,12 @@ pub struct Editor { pub mouse_down_range: Option, pub cursor_cache: CursorCache, + + pub dictionaries: Dictionaries, } +type Dictionaries = HashMap>>; + pub type Motion = Box; #[derive(Debug)] @@ -1240,6 +1245,7 @@ impl Editor { handlers, mouse_down_range: None, cursor_cache: CursorCache::default(), + dictionaries: HashMap::new(), } } diff --git a/helix-view/src/handlers.rs b/helix-view/src/handlers.rs index 258ed89e5..30a6ff9c2 100644 --- a/helix-view/src/handlers.rs +++ b/helix-view/src/handlers.rs @@ -1,5 +1,6 @@ use completion::{CompletionEvent, CompletionHandler}; use helix_event::send_blocking; +use spelling::SpellingHandler; use tokio::sync::mpsc::Sender; use crate::handlers::lsp::SignatureHelpInvoked; @@ -9,6 +10,7 @@ pub mod completion; pub mod dap; pub mod diagnostics; pub mod lsp; +pub mod spelling; #[derive(Debug)] pub enum AutoSaveEvent { @@ -22,6 +24,7 @@ pub struct Handlers { pub signature_hints: Sender, pub auto_save: Sender, pub document_colors: Sender, + pub spelling: SpellingHandler, } impl Handlers { diff --git a/helix-view/src/handlers/spelling.rs b/helix-view/src/handlers/spelling.rs new file mode 100644 index 000000000..49fd78123 --- /dev/null +++ b/helix-view/src/handlers/spelling.rs @@ -0,0 +1,128 @@ +use std::{ + borrow::Cow, + collections::{HashMap, HashSet}, +}; + +use futures_util::{future::BoxFuture, FutureExt as _}; +use helix_core::{SpellingLanguage, Tendril, Transaction}; +use helix_event::{TaskController, TaskHandle}; +use tokio::sync::mpsc::Sender; + +use crate::{diagnostic::DiagnosticProvider, Action, DocumentId, Editor}; + +const ACTION_PRIORITY: u8 = 0; + +#[derive(Debug)] +pub struct SpellingHandler { + pub event_tx: Sender, + pub requests: HashMap, + pub loading_dictionaries: HashSet, +} + +impl SpellingHandler { + pub fn new(event_tx: Sender) -> Self { + Self { + event_tx, + requests: Default::default(), + loading_dictionaries: Default::default(), + } + } + + pub fn open_request(&mut self, document: DocumentId) -> TaskHandle { + let mut controller = TaskController::new(); + let handle = controller.restart(); + self.requests.insert(document, controller); + handle + } +} + +#[derive(Debug)] +pub enum SpellingEvent { + /* + DictionaryUpdated { + word: String, + language: SpellingLanguage, + }, + */ + DictionaryLoaded { language: SpellingLanguage }, + DocumentOpened { doc: DocumentId }, + DocumentChanged { doc: DocumentId }, +} + +impl Editor { + pub(crate) fn spelling_actions( + &self, + ) -> Option>>> { + let (view, doc) = current_ref!(self); + let doc_id = doc.id(); + let view_id = view.id; + let language = doc.spelling_language()?; + // TODO: consider fixes for all selections? + let range = doc.selection(view_id).primary(); + let text = doc.text().clone(); + let dictionary = self.dictionaries.get(&language)?.clone(); + // TODO: can do this faster with partition_point + take_while + let selected_diagnostics: Vec<_> = doc + .diagnostics() + .iter() + .filter(|d| { + range.overlaps(&helix_core::Range::new(d.range.start, d.range.end)) + && d.inner.provider == DiagnosticProvider::Spelling + }) + .map(|d| d.range) + .collect(); + + let future = tokio::task::spawn_blocking(move || { + let text = text.slice(..); + let dictionary = dictionary.read(); + let mut suggest_buffer = Vec::new(); + selected_diagnostics + .into_iter() + .flat_map(|range| { + suggest_buffer.clear(); + let word = Cow::from(text.slice(range.start..range.end)); + dictionary.suggest(&word, &mut suggest_buffer); + + let mut actions = Vec::with_capacity(suggest_buffer.len() + 1); + actions.extend( + suggest_buffer.drain(..).map(|suggestion| { + Action::new( + format!("Replace '{word}' with '{suggestion}'"), + ACTION_PRIORITY, + move |editor| { + let doc = doc_mut!(editor, &doc_id); + let view = view_mut!(editor, view_id); + let transaction = Transaction::change( + doc.text(), + [(range.start, range.end, Some(Tendril::from(suggestion.as_str())))].into_iter(), + ); + doc.apply(&transaction, view_id); + doc.append_changes_to_history(view); + // TODO: get rid of the diagnostic for this word. + }, + ) + }) + ); + let word = word.to_string(); + actions.push(Action::new( + format!("Add '{word}' to dictionary '{language}'"), + ACTION_PRIORITY, + move |editor| { + let Some(dictionary) = editor.dictionaries.get(&language) else { + log::error!("Failed to add '{word}' to dictionary '{language}' because the dictionary does not exist"); + return; + }; + // TODO: fire an event? + let mut dictionary = dictionary.write(); + if let Err(err) = dictionary.add(&word) { + log::error!("Failed to add '{word}' to dictionary '{language}': {err}"); + } + } + )); + actions + }) + .collect() + }); + Some(async move { Ok(future.await?) }.boxed()) + } +} diff --git a/helix-view/src/lib.rs b/helix-view/src/lib.rs index 1c0d7e083..a9cf5c683 100644 --- a/helix-view/src/lib.rs +++ b/helix-view/src/lib.rs @@ -71,5 +71,6 @@ pub use document::Document; pub use editor::Editor; use helix_core::char_idx_at_visual_offset; pub use helix_core::uri::DocumentId; +pub use spellbook::Dictionary; pub use theme::Theme; pub use view::View;