diff --git a/book/src/editor.md b/book/src/editor.md index 79f7284ce..3fe650e09 100644 --- a/book/src/editor.md +++ b/book/src/editor.md @@ -60,6 +60,7 @@ | `jump-label-alphabet` | The characters that are used to generate two character jump labels. Characters at the start of the alphabet are used first. | `"abcdefghijklmnopqrstuvwxyz"` | `end-of-line-diagnostics` | Minimum severity of diagnostics to render at the end of the line. Set to `disable` to disable entirely. Refer to the setting about `inline-diagnostics` for more details | "disable" | `clipboard-provider` | Which API to use for clipboard interaction. One of `pasteboard` (MacOS), `wayland`, `x-clip`, `x-sel`, `win-32-yank`, `termux`, `tmux`, `windows`, `termcode`, `none`, or a custom command set. | Platform and environment specific. | +| `editor-config` | Whether to read settings from [EditorConfig](https://editorconfig.org) files | `true` | ### `[editor.clipboard-provider]` Section diff --git a/helix-core/src/editor_config.rs b/helix-core/src/editor_config.rs new file mode 100644 index 000000000..714f577c5 --- /dev/null +++ b/helix-core/src/editor_config.rs @@ -0,0 +1,333 @@ +//! Support for [EditorConfig](https://EditorConfig.org) configuration loading. +//! +//! EditorConfig is an editor-agnostic format for specifying configuration in an INI-like, human +//! friendly syntax in `.editorconfig` files (which are intended to be checked into VCS). This +//! module provides functions to search for all `.editorconfig` files that apply to a given path +//! and returns an `EditorConfig` type containing any specified configuration options. +//! +//! At time of writing, this module follows the [spec](https://spec.editorconfig.org/) at +//! version 0.17.2. + +use std::{ + collections::HashMap, + fs, + num::{NonZeroU16, NonZeroU8}, + path::Path, + str::FromStr, +}; + +use encoding_rs::Encoding; +use globset::{GlobBuilder, GlobMatcher}; + +use crate::{ + indent::{IndentStyle, MAX_INDENT}, + LineEnding, +}; + +/// Configuration declared for a path in `.editorconfig` files. +#[derive(Debug, Default, PartialEq, Eq)] +pub struct EditorConfig { + pub indent_style: Option, + pub tab_width: Option, + pub line_ending: Option, + pub encoding: Option<&'static Encoding>, + // pub spelling_language: Option, + pub trim_trailing_whitespace: Option, + pub insert_final_newline: Option, + pub max_line_length: Option, +} + +impl EditorConfig { + /// Finds any configuration in `.editorconfig` files which applies to the given path. + /// + /// If no configuration applies then `EditorConfig::default()` is returned. + pub fn find(path: &Path) -> Self { + let mut configs = Vec::new(); + // + for ancestor in path.ancestors() { + let editor_config_file = ancestor.join(".editorconfig"); + let Ok(contents) = fs::read_to_string(&editor_config_file) else { + continue; + }; + let ini = match contents.parse::() { + Ok(ini) => ini, + Err(err) => { + log::warn!("Ignoring EditorConfig file at '{editor_config_file:?}' because a glob failed to compile: {err}"); + continue; + } + }; + let is_root = ini.pairs.get("root").map(AsRef::as_ref) == Some("true"); + configs.push((ini, ancestor)); + // > The search shall stop if an EditorConfig file is found with the `root` key set to + // > `true` in the preamble or when reaching the root filesystem directory. + if is_root { + break; + } + } + + let mut pairs = Pairs::new(); + // Reverse the configuration stack so that the `.editorconfig` files closest to `path` + // are applied last and overwrite settings in files closer to the search ceiling. + // + // > If multiple EditorConfig files have matching sections, the pairs from the closer + // > EditorConfig file are read last, so pairs in closer files take precedence. + for (config, dir) in configs.into_iter().rev() { + let relative_path = path.strip_prefix(dir).expect("dir is an ancestor of path"); + + for section in config.sections { + if section.glob.is_match(relative_path) { + log::info!( + "applying EditorConfig from section '{}' in file {:?}", + section.glob.glob(), + dir.join(".editorconfig") + ); + pairs.extend(section.pairs); + } + } + } + + Self::from_pairs(pairs) + } + + fn from_pairs(pairs: Pairs) -> Self { + enum IndentSize { + Tab, + Spaces(NonZeroU8), + } + + // + let indent_size = pairs.get("indent_size").and_then(|value| { + if value.as_ref() == "tab" { + Some(IndentSize::Tab) + } else if let Ok(spaces) = value.parse::() { + Some(IndentSize::Spaces(spaces)) + } else { + None + } + }); + let tab_width = pairs + .get("tab_width") + .and_then(|value| value.parse::().ok()) + .or(match indent_size { + Some(IndentSize::Spaces(spaces)) => Some(spaces), + _ => None, + }); + let indent_style = pairs + .get("indent_style") + .and_then(|value| match value.as_ref() { + "tab" => Some(IndentStyle::Tabs), + "space" => { + let spaces = match indent_size { + Some(IndentSize::Spaces(spaces)) => spaces.get(), + Some(IndentSize::Tab) => tab_width.map(|n| n.get()).unwrap_or(4), + None => 4, + }; + Some(IndentStyle::Spaces(spaces.clamp(1, MAX_INDENT))) + } + _ => None, + }); + let line_ending = pairs + .get("end_of_line") + .and_then(|value| match value.as_ref() { + "lf" => Some(LineEnding::LF), + "crlf" => Some(LineEnding::Crlf), + #[cfg(feature = "unicode-lines")] + "cr" => Some(LineEnding::CR), + _ => None, + }); + let encoding = pairs.get("charset").and_then(|value| match value.as_ref() { + "latin1" => Some(encoding_rs::WINDOWS_1252), + "utf-8" => Some(encoding_rs::UTF_8), + // `utf-8-bom` is intentionally ignored. + // > `utf-8-bom` is discouraged. + "utf-16le" => Some(encoding_rs::UTF_16LE), + "utf-16be" => Some(encoding_rs::UTF_16BE), + _ => None, + }); + let trim_trailing_whitespace = + pairs + .get("trim_trailing_whitespace") + .and_then(|value| match value.as_ref() { + "true" => Some(true), + "false" => Some(false), + _ => None, + }); + let insert_final_newline = pairs + .get("insert_final_newline") + .and_then(|value| match value.as_ref() { + "true" => Some(true), + "false" => Some(false), + _ => None, + }); + // This option is not in the spec but is supported by some editors. + // + let max_line_length = pairs + .get("max_line_length") + .and_then(|value| value.parse::().ok()); + + Self { + indent_style, + tab_width, + line_ending, + encoding, + trim_trailing_whitespace, + insert_final_newline, + max_line_length, + } + } +} + +type Pairs = HashMap, Box>; + +#[derive(Debug)] +struct Section { + glob: GlobMatcher, + pairs: Pairs, +} + +#[derive(Debug, Default)] +struct Ini { + pairs: Pairs, + sections: Vec
, +} + +impl FromStr for Ini { + type Err = globset::Error; + + fn from_str(source: &str) -> Result { + // + let mut ini = Ini::default(); + // > EditorConfig files are in an INI-like file format. To read an EditorConfig file, take + // > one line at a time, from beginning to end. For each line: + for full_line in source.lines() { + // > 1. Remove all leading and trailing whitespace. + let line = full_line.trim(); + // > 2. Process the remaining text as specified for its type below. + // > The types of lines are: + // > * Blank: contains nothing. Blank lines are ignored. + if line.is_empty() { + continue; + } + // > * Comment: starts with a ';' or '#'. Comment lines are ignored. + if line.starts_with([';', '#']) { + continue; + } + if let Some(section) = line.strip_prefix('[').and_then(|s| s.strip_suffix(']')) { + // > * Section Header: starts with a `[` and ends with a `]`. These lines define + // > globs... + + // + // We need to modify the glob string slightly since EditorConfig's glob flavor + // doesn't match `globset`'s exactly. `globset` only allows '**' at the beginning + // or end of a glob or between two '/'s. (This replacement is not very fancy but + // should cover most practical cases.) + let mut glob_str = section.replace("**.", "**/*."); + if !is_glob_relative(section) { + glob_str.insert_str(0, "**/"); + } + let glob = GlobBuilder::new(&glob_str) + .literal_separator(true) + .backslash_escape(true) + .build()?; + ini.sections.push(Section { + glob: glob.compile_matcher(), + pairs: Pairs::new(), + }); + } else if let Some((key, value)) = line.split_once('=') { + // > * Key-Value Pair (or Pair): contains a key and a value, separated by an `=`. + // > * Key: The part before the first `=` on the line. + // > * Value: The part, if any, after the first `=` on the line. + // > * Keys and values are trimmed of leading and trailing whitespace, but + // > include any whitespace that is between non-whitespace characters. + // > * If a value is not provided, then the value is an empty string. + let key = key.trim().to_lowercase().into_boxed_str(); + let value = value.trim().to_lowercase().into_boxed_str(); + if let Some(section) = ini.sections.last_mut() { + section.pairs.insert(key, value); + } else { + ini.pairs.insert(key, value); + } + } + } + Ok(ini) + } +} + +/// Determines whether a glob is relative to the directory of the config file. +fn is_glob_relative(source: &str) -> bool { + // > If the glob contains a path separator (a `/` not inside square brackets), then the + // > glob is relative to the directory level of the particular `.editorconfig` file itself. + let mut idx = 0; + while let Some(open) = source[idx..].find('[').map(|open| idx + open) { + if source[..open].contains('/') { + return true; + } + idx = source[open..] + .find(']') + .map_or(source.len(), |close| idx + close); + } + source[idx..].contains('/') +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn is_glob_relative_test() { + assert!(is_glob_relative("subdir/*.c")); + assert!(!is_glob_relative("*.txt")); + assert!(!is_glob_relative("[a/b].c")); + } + + fn editor_config(path: impl AsRef, source: &str) -> EditorConfig { + let path = path.as_ref(); + let ini = source.parse::().unwrap(); + let pairs = ini + .sections + .into_iter() + .filter(|section| section.glob.is_match(path)) + .fold(Pairs::new(), |mut acc, section| { + acc.extend(section.pairs); + acc + }); + EditorConfig::from_pairs(pairs) + } + + #[test] + fn parse_test() { + let source = r#" + [*] + indent_style = space + + [Makefile] + indent_style = tab + + [docs/**.txt] + insert_final_newline = true + "#; + + assert_eq!( + editor_config("a.txt", source), + EditorConfig { + indent_style: Some(IndentStyle::Spaces(4)), + ..Default::default() + } + ); + assert_eq!( + editor_config("pkg/Makefile", source), + EditorConfig { + indent_style: Some(IndentStyle::Tabs), + ..Default::default() + } + ); + assert_eq!( + editor_config("docs/config/editor.txt", source), + EditorConfig { + indent_style: Some(IndentStyle::Spaces(4)), + insert_final_newline: Some(true), + ..Default::default() + } + ); + } +} diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 02107b33f..3fcddfcd1 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -10,6 +10,7 @@ pub mod config; pub mod diagnostic; pub mod diff; pub mod doc_formatter; +pub mod editor_config; pub mod fuzzy; pub mod graphemes; pub mod history; diff --git a/helix-term/src/commands/typed.rs b/helix-term/src/commands/typed.rs index fd741bf9e..b570e50bc 100644 --- a/helix-term/src/commands/typed.rs +++ b/helix-term/src/commands/typed.rs @@ -326,13 +326,13 @@ fn write_impl(cx: &mut compositor::Context, path: Option<&str>, force: bool) -> let jobs = &mut cx.jobs; let (view, doc) = current!(cx.editor); - if config.trim_trailing_whitespace { + if doc.trim_trailing_whitespace() { trim_trailing_whitespace(doc, view.id); } if config.trim_final_newlines { trim_final_newlines(doc, view.id); } - if config.insert_final_newline { + if doc.insert_final_newline() { insert_final_newline(doc, view.id); } @@ -738,13 +738,13 @@ pub fn write_all_impl( let doc = doc_mut!(cx.editor, &doc_id); let view = view_mut!(cx.editor, target_view); - if config.trim_trailing_whitespace { + if doc.trim_trailing_whitespace() { trim_trailing_whitespace(doc, target_view); } if config.trim_final_newlines { trim_final_newlines(doc, target_view); } - if config.insert_final_newline { + if doc.insert_final_newline() { insert_final_newline(doc, target_view); } @@ -2146,7 +2146,6 @@ fn reflow(cx: &mut compositor::Context, args: Args, event: PromptEvent) -> anyho } let scrolloff = cx.editor.config().scrolloff; - let cfg_text_width: usize = cx.editor.config().text_width; let (view, doc) = current!(cx.editor); // Find the text_width by checking the following sources in order: @@ -2157,8 +2156,7 @@ fn reflow(cx: &mut compositor::Context, args: Args, event: PromptEvent) -> anyho .first() .map(|num| num.parse::()) .transpose()? - .or_else(|| doc.language_config().and_then(|config| config.text_width)) - .unwrap_or(cfg_text_width); + .unwrap_or_else(|| doc.text_width()); let rope = doc.text(); diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 1c58eda21..ee5cb468d 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -31,6 +31,7 @@ use std::sync::{Arc, Weak}; use std::time::SystemTime; use helix_core::{ + editor_config::EditorConfig, encoding, history::{History, State, UndoKind}, indent::{auto_detect_indent_style, IndentStyle}, @@ -50,6 +51,7 @@ use crate::{ const BUF_SIZE: usize = 8192; const DEFAULT_INDENT: IndentStyle = IndentStyle::Tabs; +const DEFAULT_TAB_WIDTH: usize = 4; pub const DEFAULT_LANGUAGE_NAME: &str = "text"; @@ -157,6 +159,7 @@ pub struct Document { /// Current indent style. pub indent_style: IndentStyle, + editor_config: EditorConfig, /// The document's default line ending. pub line_ending: LineEnding, @@ -678,6 +681,7 @@ impl Document { inlay_hints_oudated: false, view_data: Default::default(), indent_style: DEFAULT_INDENT, + editor_config: EditorConfig::default(), line_ending, restore_cursor: false, syntax: None, @@ -712,7 +716,7 @@ impl Document { /// overwritten with the `encoding` parameter. pub fn open( path: &Path, - encoding: Option<&'static Encoding>, + mut encoding: Option<&'static Encoding>, config_loader: Option>>, config: Arc>, ) -> Result { @@ -721,12 +725,21 @@ impl Document { return Err(DocumentOpenError::IrregularFile); } + let editor_config = if config.load().editor_config { + EditorConfig::find(path) + } else { + EditorConfig::default() + }; + encoding = encoding.or(editor_config.encoding); + // Open the file if it exists, otherwise assume it is a new file (and thus empty). let (rope, encoding, has_bom) = if path.exists() { let mut file = std::fs::File::open(path)?; from_reader(&mut file, encoding)? } else { - let line_ending: LineEnding = config.load().default_line_ending.into(); + let line_ending = editor_config + .line_ending + .unwrap_or_else(|| config.load().default_line_ending.into()); let encoding = encoding.unwrap_or(encoding::UTF_8); (Rope::from(line_ending.as_str()), encoding, false) }; @@ -739,6 +752,7 @@ impl Document { doc.detect_language(loader); } + doc.editor_config = editor_config; doc.detect_indent_and_line_ending(); Ok(doc) @@ -1090,16 +1104,32 @@ impl Document { /// configured in `languages.toml`, with a fallback to tabs if it isn't specified. Line ending /// is likewise auto-detected, and will remain unchanged if no line endings were detected. pub fn detect_indent_and_line_ending(&mut self) { - self.indent_style = auto_detect_indent_style(&self.text).unwrap_or_else(|| { - self.language_config() - .and_then(|config| config.indent.as_ref()) - .map_or(DEFAULT_INDENT, |config| IndentStyle::from_str(&config.unit)) - }); - if let Some(line_ending) = auto_detect_line_ending(&self.text) { + self.indent_style = if let Some(indent_style) = self.editor_config.indent_style { + indent_style + } else { + auto_detect_indent_style(&self.text).unwrap_or_else(|| { + self.language_config() + .and_then(|config| config.indent.as_ref()) + .map_or(DEFAULT_INDENT, |config| IndentStyle::from_str(&config.unit)) + }) + }; + if let Some(line_ending) = self + .editor_config + .line_ending + .or_else(|| auto_detect_line_ending(&self.text)) + { self.line_ending = line_ending; } } + pub(crate) fn detect_editor_config(&mut self) { + if self.config.load().editor_config { + if let Some(path) = self.path.as_ref() { + self.editor_config = EditorConfig::find(path); + } + } + } + pub fn pickup_last_saved_time(&mut self) { self.last_saved_time = match self.path() { Some(path) => match path.metadata() { @@ -1819,9 +1849,14 @@ impl Document { /// The width that the tab character is rendered at pub fn tab_width(&self) -> usize { - self.language_config() - .and_then(|config| config.indent.as_ref()) - .map_or(4, |config| config.tab_width) // fallback to 4 columns + self.editor_config + .tab_width + .map(|n| n.get() as usize) + .unwrap_or_else(|| { + self.language_config() + .and_then(|config| config.indent.as_ref()) + .map_or(DEFAULT_TAB_WIDTH, |config| config.tab_width) + }) } // The width (in spaces) of a level of indentation. @@ -1829,6 +1864,20 @@ impl Document { self.indent_style.indent_width(self.tab_width()) } + /// Whether the document should have a trailing line ending appended on save. + pub fn insert_final_newline(&self) -> bool { + self.editor_config + .insert_final_newline + .unwrap_or_else(|| self.config.load().insert_final_newline) + } + + /// Whether the document should trim whitespace preceding line endings on save. + pub fn trim_trailing_whitespace(&self) -> bool { + self.editor_config + .trim_trailing_whitespace + .unwrap_or_else(|| self.config.load().trim_trailing_whitespace) + } + pub fn changes(&self) -> &ChangeSet { &self.changes } @@ -2087,12 +2136,17 @@ impl Document { } } + pub fn text_width(&self) -> usize { + self.editor_config + .max_line_length + .map(|n| n.get() as usize) + .or_else(|| self.language_config().and_then(|config| config.text_width)) + .unwrap_or_else(|| self.config.load().text_width) + } + pub fn text_format(&self, mut viewport_width: u16, theme: Option<&Theme>) -> TextFormat { let config = self.config.load(); - let text_width = self - .language_config() - .and_then(|config| config.text_width) - .unwrap_or(config.text_width); + let text_width = self.text_width(); let mut soft_wrap_at_text_width = self .language_config() .and_then(|config| { diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index cdc48a545..e6a585d61 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -366,6 +366,9 @@ pub struct Config { pub end_of_line_diagnostics: DiagnosticFilter, // Set to override the default clipboard provider pub clipboard_provider: ClipboardProvider, + /// Whether to read settings from [EditorConfig](https://editorconfig.org) files. Defaults to + /// `true`. + pub editor_config: bool, } #[derive(Debug, Clone, PartialEq, Deserialize, Serialize, Eq, PartialOrd, Ord)] @@ -1009,6 +1012,7 @@ impl Default for Config { inline_diagnostics: InlineDiagnosticsConfig::default(), end_of_line_diagnostics: DiagnosticFilter::Disable, clipboard_provider: ClipboardProvider::default(), + editor_config: true, } } } @@ -1456,6 +1460,7 @@ impl Editor { // we have fully unregistered this document from its LS doc.language_servers.clear(); doc.set_path(Some(path)); + doc.detect_editor_config(); self.refresh_doc_language(doc_id) } @@ -1463,6 +1468,7 @@ impl Editor { let loader = self.syn_loader.clone(); let doc = doc_mut!(self, &doc_id); doc.detect_language(loader); + doc.detect_editor_config(); doc.detect_indent_and_line_ending(); self.refresh_language_servers(doc_id); let doc = doc_mut!(self, &doc_id);