From 979a0e45dbe0ef943fa561fabc87fec072bbf6c8 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Wed, 13 Nov 2024 20:08:01 -0500 Subject: [PATCH] Add a workspace syntax symbol picker --- Cargo.lock | 1 + book/src/generated/static-cmd.md | 4 +- helix-loader/src/lib.rs | 9 +- helix-term/Cargo.toml | 2 + helix-term/src/commands.rs | 16 ++ helix-term/src/commands/syntax.rs | 340 +++++++++++++++++++++++++++++- helix-term/src/keymap/default.rs | 2 +- 7 files changed, 367 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 559e9eb8c..652ad4579 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1368,6 +1368,7 @@ dependencies = [ "chrono", "content_inspector", "crossterm", + "dashmap", "fern", "futures-util", "grep-regex", diff --git a/book/src/generated/static-cmd.md b/book/src/generated/static-cmd.md index e13d0506f..986279126 100644 --- a/book/src/generated/static-cmd.md +++ b/book/src/generated/static-cmd.md @@ -106,7 +106,7 @@ | `symbol_picker` | Open symbol picker | | | `changed_file_picker` | Open changed file picker | normal: `` g ``, select: `` g `` | | `select_references_to_symbol_under_cursor` | Select symbol references | normal: `` h ``, select: `` h `` | -| `workspace_symbol_picker` | Open workspace symbol picker | normal: `` S ``, select: `` S `` | +| `workspace_symbol_picker` | Open workspace symbol picker | | | `diagnostics_picker` | Open diagnostic picker | normal: `` d ``, select: `` d `` | | `workspace_diagnostics_picker` | Open workspace diagnostic picker | normal: `` D ``, select: `` D `` | | `last_picker` | Open last picker | normal: `` ' ``, select: `` ' `` | @@ -295,4 +295,6 @@ | `goto_next_tabstop` | goto next snippet placeholder | | | `goto_prev_tabstop` | goto next snippet placeholder | | | `syntax_symbol_picker` | Open a picker of symbols from the syntax tree | | +| `syntax_workspace_symbol_picker` | Open a picker of symbols for the workspace based on syntax trees | | | `lsp_or_syntax_symbol_picker` | Open an LSP symbol picker if available, or syntax otherwise | normal: `` s ``, select: `` s `` | +| `lsp_or_syntax_workspace_symbol_picker` | Open a workspace LSP symbol picker if available, or syntax workspace symbol picker otherwise | normal: `` S ``, select: `` S `` | diff --git a/helix-loader/src/lib.rs b/helix-loader/src/lib.rs index 0e7c134d0..4ea6ce1ff 100644 --- a/helix-loader/src/lib.rs +++ b/helix-loader/src/lib.rs @@ -230,7 +230,12 @@ pub fn merge_toml_values(left: toml::Value, right: toml::Value, merge_depth: usi /// Otherwise (workspace, false) is returned pub fn find_workspace() -> (PathBuf, bool) { let current_dir = current_working_dir(); - for ancestor in current_dir.ancestors() { + find_workspace_in(current_dir) +} + +pub fn find_workspace_in(dir: impl AsRef) -> (PathBuf, bool) { + let dir = dir.as_ref(); + for ancestor in dir.ancestors() { if ancestor.join(".git").exists() || ancestor.join(".svn").exists() || ancestor.join(".jj").exists() @@ -240,7 +245,7 @@ pub fn find_workspace() -> (PathBuf, bool) { } } - (current_dir, true) + (dir.to_owned(), true) } fn default_config_file() -> PathBuf { diff --git a/helix-term/Cargo.toml b/helix-term/Cargo.toml index 5b46a261c..279a51ad5 100644 --- a/helix-term/Cargo.toml +++ b/helix-term/Cargo.toml @@ -72,6 +72,8 @@ serde = { version = "1.0", features = ["derive"] } grep-regex = "0.1.13" grep-searcher = "0.1.14" +dashmap = "6.0" + [target.'cfg(not(windows))'.dependencies] # https://github.com/vorner/signal-hook/issues/100 signal-hook-tokio = { version = "0.3", features = ["futures-v0_3"] } libc = "0.2.168" diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 820636525..0f0885c1b 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -590,7 +590,9 @@ impl MappableCommand { goto_next_tabstop, "goto next snippet placeholder", goto_prev_tabstop, "goto next snippet placeholder", syntax_symbol_picker, "Open a picker of symbols from the syntax tree", + syntax_workspace_symbol_picker, "Open a picker of symbols for the workspace based on syntax trees", lsp_or_syntax_symbol_picker, "Open an LSP symbol picker if available, or syntax otherwise", + lsp_or_syntax_workspace_symbol_picker, "Open a workspace LSP symbol picker if available, or syntax workspace symbol picker otherwise", ); } @@ -6520,3 +6522,17 @@ fn lsp_or_syntax_symbol_picker(cx: &mut Context) { .set_error("No language server supporting document symbols or syntax info available"); } } + +fn lsp_or_syntax_workspace_symbol_picker(cx: &mut Context) { + let doc = doc!(cx.editor); + + if doc + .language_servers_with_feature(LanguageServerFeature::WorkspaceSymbols) + .next() + .is_some() + { + lsp::workspace_symbol_picker(cx); + } else { + syntax_workspace_symbol_picker(cx); + } +} diff --git a/helix-term/src/commands/syntax.rs b/helix-term/src/commands/syntax.rs index a86dbc4b9..9a37ac810 100644 --- a/helix-term/src/commands/syntax.rs +++ b/helix-term/src/commands/syntax.rs @@ -1,7 +1,38 @@ -use helix_core::{tree_sitter::Query, Selection, Uri}; -use helix_view::{align_view, Align, DocumentId}; +use std::{ + collections::HashSet, + path::{Path, PathBuf}, + sync::Arc, +}; -use crate::ui::{overlay::overlaid, picker::PathOrId, Picker, PickerColumn}; +use arc_swap::ArcSwapAny; +use dashmap::DashMap; +use futures_util::FutureExt; +use grep_regex::RegexMatcherBuilder; +use grep_searcher::{sinks, BinaryDetection, SearcherBuilder}; +use helix_core::{ + syntax::{LanguageConfiguration, Loader}, + tree_sitter::Query, + Rope, RopeSlice, Selection, Syntax, Uri, +}; +use helix_stdx::{ + path, + rope::{self, RopeSliceExt}, +}; +use helix_view::{ + align_view, + document::{from_reader, SCRATCH_BUFFER_NAME}, + Align, Document, DocumentId, Editor, +}; +use ignore::{DirEntry, WalkBuilder, WalkState}; + +use crate::{ + filter_picker_entry, + ui::{ + overlay::overlaid, + picker::{Injector, PathOrId}, + Picker, PickerColumn, + }, +}; use super::Context; @@ -142,3 +173,306 @@ pub fn syntax_symbol_picker(cx: &mut Context) { cx.push_layer(Box::new(overlaid(picker))); } + +pub fn syntax_workspace_symbol_picker(cx: &mut Context) { + fn symbols_matching_pattern<'a>( + syntax: &'a Syntax, + symbols: &'a Query, + text: RopeSlice<'a>, + doc: &'a UriOrDocumentId, + pattern: &'a rope::Regex, + ) -> impl Iterator + 'a { + syntax + .captures(symbols, text, None) + .filter_map(move |(match_, capture_index)| { + let capture = match_.captures[capture_index]; + let kind = definition_symbol_kind_for_capture(symbols, capture_index)?; + let node = capture.node; + if !pattern.is_match(text.regex_input_at_bytes(node.start_byte()..node.end_byte())) + { + return None; + } + let start = text.byte_to_char(node.start_byte()); + let end = text.byte_to_char(node.end_byte()); + Some(Symbol { + kind, + name: text.slice(start..end).to_string(), + start, + end, + start_line: text.char_to_line(start), + end_line: text.char_to_line(end), + doc: doc.clone(), + }) + }) + } + + #[derive(Debug)] + struct SearchState { + searcher_builder: SearcherBuilder, + walk_builder: WalkBuilder, + regex_matcher_builder: RegexMatcherBuilder, + search_root: PathBuf, + /// A cache of files that have been parsed in prior searches. + syntax_cache: DashMap)>>, + } + + let mut searcher_builder = SearcherBuilder::new(); + searcher_builder.binary_detection(BinaryDetection::quit(b'\x00')); + + // Search from the workspace that the currently focused document is within. This behaves like global + // search most of the time but helps when you have two projects open in splits. + let search_root = if let Some(path) = doc!(cx.editor).path() { + helix_loader::find_workspace_in(path).0 + } else { + helix_loader::find_workspace().0 + }; + + let absolute_root = search_root + .canonicalize() + .unwrap_or_else(|_| search_root.clone()); + + let config = cx.editor.config(); + let dedup_symlinks = config.file_picker.deduplicate_links; + + let mut walk_builder = WalkBuilder::new(&search_root); + walk_builder + .hidden(config.file_picker.hidden) + .parents(config.file_picker.parents) + .ignore(config.file_picker.ignore) + .follow_links(config.file_picker.follow_symlinks) + .git_ignore(config.file_picker.git_ignore) + .git_global(config.file_picker.git_global) + .git_exclude(config.file_picker.git_exclude) + .max_depth(config.file_picker.max_depth) + .filter_entry(move |entry| filter_picker_entry(entry, &absolute_root, dedup_symlinks)) + .add_custom_ignore_filename(helix_loader::config_dir().join("ignore")) + .add_custom_ignore_filename(".helix/ignore"); + + let mut regex_matcher_builder = RegexMatcherBuilder::new(); + regex_matcher_builder.case_smart(config.search.smart_case); + let state = SearchState { + searcher_builder, + walk_builder, + regex_matcher_builder, + search_root, + syntax_cache: DashMap::default(), + }; + let reg = cx.register.unwrap_or('/'); + cx.editor.registers.last_search_register = reg; + let columns = vec![ + PickerColumn::new("kind", |symbol: &Symbol, _| symbol.kind.as_str().into()), + PickerColumn::new("name", |symbol: &Symbol, _| symbol.name.as_str().into()) + .without_filtering(), + PickerColumn::new("path", |symbol: &Symbol, state: &SearchState| { + match &symbol.doc { + UriOrDocumentId::Uri(uri) => { + if let Some(path) = uri.as_path() { + let path = if let Ok(stripped) = path.strip_prefix(&state.search_root) { + stripped + } else { + path + }; + path.to_string_lossy().into() + } else { + uri.to_string().into() + } + } + // This picker only uses `Id` for scratch buffers for better display. + UriOrDocumentId::Id(_) => SCRATCH_BUFFER_NAME.into(), + } + }), + ]; + + let get_symbols = |query: &str, + editor: &mut Editor, + state: Arc, + injector: &Injector<_, _>| { + if query.len() < 3 { + return async { Ok(()) }.boxed(); + } + // Attempt to find the symbol in any open documents. + let pattern = match rope::Regex::new(query) { + Ok(pattern) => pattern, + Err(err) => return async { Err(anyhow::anyhow!(err)) }.boxed(), + }; + for doc in editor.documents() { + let Some(syntax) = doc.syntax() else { continue }; + let Some(symbols_query) = doc + .language_config() + .and_then(|config| config.symbols_query()) + else { + continue; + }; + let text = doc.text().slice(..); + let uri_or_id = doc + .uri() + .map(UriOrDocumentId::Uri) + .unwrap_or_else(|| UriOrDocumentId::Id(doc.id())); + for symbol in symbols_matching_pattern( + syntax, + symbols_query, + text.slice(..), + &uri_or_id, + &pattern, + ) { + if injector.push(symbol).is_err() { + return async { Ok(()) }.boxed(); + } + } + } + if !state.search_root.exists() { + return async { Err(anyhow::anyhow!("Current working directory does not exist")) } + .boxed(); + } + let matcher = match state.regex_matcher_builder.build(query) { + Ok(matcher) => { + // Clear any "Failed to compile regex" errors out of the statusline. + editor.clear_status(); + matcher + } + Err(err) => { + log::info!( + "Failed to compile search pattern in workspace symbol search: {}", + err + ); + return async { Err(anyhow::anyhow!("Failed to compile regex")) }.boxed(); + } + }; + let pattern = Arc::from(pattern); + let injector = injector.clone(); + let loader = editor.syn_loader.clone(); + let documents: HashSet<_> = editor + .documents() + .filter_map(Document::path) + .cloned() + .collect(); + async move { + let searcher = state.searcher_builder.build(); + state.walk_builder.build_parallel().run(|| { + let mut searcher = searcher.clone(); + let matcher = matcher.clone(); + let injector = injector.clone(); + let loader = loader.clone(); + let documents = &documents; + let pattern = pattern.clone(); + let syntax_cache = &state.syntax_cache; + Box::new(move |entry: Result| -> WalkState { + let entry = match entry { + Ok(entry) => entry, + Err(_) => return WalkState::Continue, + }; + match entry.file_type() { + Some(entry) if entry.is_file() => {} + // skip everything else + _ => return WalkState::Continue, + }; + let path = entry.path(); + // If this document is open, skip it because we've already processed it above. + if documents.contains(path) { + return WalkState::Continue; + }; + let mut quit = false; + let sink = sinks::UTF8(|_line, _content| { + if !syntax_cache.contains_key(path) { + // Read the file into a Rope and attempt to recognize the language + // and parse it with tree-sitter. Save the Rope and Syntax for future + // queries. + syntax_cache.insert(path.to_path_buf(), syntax_for_path(path, &loader)); + }; + let entry = syntax_cache.get(path).unwrap(); + let Some((text, syntax, lang_config)) = entry.value() else { + // If the file couldn't be parsed, move on. + return Ok(false); + }; + let Some(query) = lang_config.symbols_query() else { + return Ok(false); + }; + let uri = Uri::from(path::normalize(path)); + for symbol in symbols_matching_pattern( + syntax, + query, + text.slice(..), + &UriOrDocumentId::Uri(uri), + &pattern, + ) { + if injector.push(symbol).is_err() { + quit = true; + break; + } + } + // Quit after seeing the first regex match. We only care to find files + // that contain the pattern and then we run the symbols query within + // those. The location and contents of a match are irrelevant - it's + // only important _if_ a file matches. + Ok(false) + }); + if let Err(err) = searcher.search_path(&matcher, path, sink) { + log::info!("Workspace syntax search error: {}, {}", path.display(), err); + } + if quit { + WalkState::Quit + } else { + WalkState::Continue + } + }) + }); + Ok(()) + } + .boxed() + }; + let picker = Picker::new( + columns, + 1, // name + [], + state, + move |cx, symbol, action| { + let doc_id = match &symbol.doc { + UriOrDocumentId::Id(id) => *id, + UriOrDocumentId::Uri(uri) => match cx.editor.open(uri.as_path().expect(""), action) { + Ok(id) => id, + Err(e) => { + cx.editor + .set_error(format!("Failed to open file '{uri:?}': {e}")); + return; + } + } + }; + let doc = doc_mut!(cx.editor, &doc_id); + let view = view_mut!(cx.editor); + let len_chars = doc.text().len_chars(); + if symbol.start >= len_chars || symbol.end > len_chars { + cx.editor.set_error("The location you jumped to does not exist anymore because the file has changed."); + return; + } + doc.set_selection(view.id, Selection::single(symbol.start, symbol.end)); + if action.align_view(view, doc.id()) { + align_view(doc, view, Align::Center) + } + }, + ) + .with_dynamic_query(get_symbols, Some(275)) + .with_preview(move |_editor, symbol| { + Some(( + symbol.doc.path_or_id()?, + Some((symbol.start_line, symbol.end_line)), + )) + }) + .truncate_start(false); + cx.push_layer(Box::new(overlaid(picker))); +} + +/// Create a Rope and language config for a given existing path without creating a full Document. +fn syntax_for_path( + path: &Path, + loader: &Arc>>, +) -> Option<(Rope, Syntax, Arc)> { + let mut file = std::fs::File::open(path).ok()?; + let (rope, _encoding, _has_bom) = from_reader(&mut file, None).ok()?; + let text = rope.slice(..); + let lang_config = loader + .load() + .language_config_for_file_name(path) + .or_else(|| loader.load().language_config_for_shebang(text))?; + let highlight_config = lang_config.highlight_config(&loader.load().scopes())?; + Syntax::new(text, highlight_config, loader.clone()).map(|syntax| (rope, syntax, lang_config)) +} diff --git a/helix-term/src/keymap/default.rs b/helix-term/src/keymap/default.rs index 20643e760..34b31fa9a 100644 --- a/helix-term/src/keymap/default.rs +++ b/helix-term/src/keymap/default.rs @@ -225,7 +225,7 @@ pub fn default() -> HashMap { "b" => buffer_picker, "j" => jumplist_picker, "s" => lsp_or_syntax_symbol_picker, - "S" => workspace_symbol_picker, + "S" => lsp_or_syntax_workspace_symbol_picker, "d" => diagnostics_picker, "D" => workspace_diagnostics_picker, "g" => changed_file_picker,