Add support for path completion (#2608)

Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
Co-authored-by: Pascal Kuthe <pascalkuthe@pm.me>
This commit is contained in:
Philipp Mildenberger 2024-11-22 04:12:36 +01:00 committed by GitHub
parent f305c7299d
commit dc941d6d24
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 1124 additions and 212 deletions

View file

@ -18,6 +18,8 @@ ropey = { version = "1.6.1", default-features = false }
which = "7.0"
regex-cursor = "0.1.4"
bitflags = "2.6"
once_cell = "1.19"
regex-automata = "0.4.8"
[target.'cfg(windows)'.dependencies]
windows-sys = { version = "0.59", features = ["Win32_Foundation", "Win32_Security", "Win32_Security_Authorization", "Win32_Storage_FileSystem", "Win32_System_Threading"] }

View file

@ -1,9 +1,12 @@
use std::{
ffi::OsStr,
borrow::Cow,
ffi::{OsStr, OsString},
path::{Path, PathBuf},
sync::RwLock,
};
use once_cell::sync::Lazy;
static CWD: RwLock<Option<PathBuf>> = RwLock::new(None);
// Get the current working directory.
@ -59,6 +62,93 @@ pub fn which<T: AsRef<OsStr>>(
})
}
fn find_brace_end(src: &[u8]) -> Option<usize> {
use regex_automata::meta::Regex;
static REGEX: Lazy<Regex> = Lazy::new(|| Regex::builder().build("[{}]").unwrap());
let mut depth = 0;
for mat in REGEX.find_iter(src) {
let pos = mat.start();
match src[pos] {
b'{' => depth += 1,
b'}' if depth == 0 => return Some(pos),
b'}' => depth -= 1,
_ => unreachable!(),
}
}
None
}
fn expand_impl(src: &OsStr, mut resolve: impl FnMut(&OsStr) -> Option<OsString>) -> Cow<OsStr> {
use regex_automata::meta::Regex;
static REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::builder()
.build_many(&[
r"\$\{([^\}:]+):-",
r"\$\{([^\}:]+):=",
r"\$\{([^\}-]+)-",
r"\$\{([^\}=]+)=",
r"\$\{([^\}]+)",
r"\$(\w+)",
])
.unwrap()
});
let bytes = src.as_encoded_bytes();
let mut res = Vec::with_capacity(bytes.len());
let mut pos = 0;
for captures in REGEX.captures_iter(bytes) {
let mat = captures.get_match().unwrap();
let pattern_id = mat.pattern().as_usize();
let mut range = mat.range();
let var = &bytes[captures.get_group(1).unwrap().range()];
let default = if pattern_id != 5 {
let Some(bracket_pos) = find_brace_end(&bytes[range.end..]) else {
break;
};
let default = &bytes[range.end..range.end + bracket_pos];
range.end += bracket_pos + 1;
default
} else {
&[]
};
// safety: this is a codepoint aligned substring of an osstr (always valid)
let var = unsafe { OsStr::from_encoded_bytes_unchecked(var) };
let expansion = resolve(var);
let expansion = match &expansion {
Some(val) => {
if val.is_empty() && pattern_id < 2 {
default
} else {
val.as_encoded_bytes()
}
}
None => default,
};
res.extend_from_slice(&bytes[pos..range.start]);
pos = range.end;
res.extend_from_slice(expansion);
}
if pos == 0 {
src.into()
} else {
res.extend_from_slice(&bytes[pos..]);
// safety: this is a composition of valid osstr (and codepoint aligned slices which are also valid)
unsafe { OsString::from_encoded_bytes_unchecked(res) }.into()
}
}
/// performs substitution of enviorment variables. Supports the following (POSIX) syntax:
///
/// * `$<var>`, `${<var>}`
/// * `${<var>:-<default>}`, `${<var>-<default>}`
/// * `${<var>:=<default>}`, `${<var>=default}`
///
pub fn expand<S: AsRef<OsStr> + ?Sized>(src: &S) -> Cow<OsStr> {
expand_impl(src.as_ref(), |var| std::env::var_os(var))
}
#[derive(Debug)]
pub struct ExecutableNotFoundError {
command: String,
@ -75,7 +165,9 @@ impl std::error::Error for ExecutableNotFoundError {}
#[cfg(test)]
mod tests {
use super::{current_working_dir, set_current_working_dir};
use std::ffi::{OsStr, OsString};
use super::{current_working_dir, expand_impl, set_current_working_dir};
#[test]
fn current_dir_is_set() {
@ -88,4 +180,34 @@ mod tests {
let cwd = current_working_dir();
assert_eq!(cwd, new_path);
}
macro_rules! assert_env_expand {
($env: expr, $lhs: expr, $rhs: expr) => {
assert_eq!(&*expand_impl($lhs.as_ref(), $env), OsStr::new($rhs));
};
}
/// paths that should work on all platforms
#[test]
fn test_env_expand() {
let env = |var: &OsStr| -> Option<OsString> {
match var.to_str().unwrap() {
"FOO" => Some("foo".into()),
"EMPTY" => Some("".into()),
_ => None,
}
};
assert_env_expand!(env, "pass_trough", "pass_trough");
assert_env_expand!(env, "$FOO", "foo");
assert_env_expand!(env, "bar/$FOO/baz", "bar/foo/baz");
assert_env_expand!(env, "bar/${FOO}/baz", "bar/foo/baz");
assert_env_expand!(env, "baz/${BAR:-bar}/foo", "baz/bar/foo");
assert_env_expand!(env, "baz/${BAR:=bar}/foo", "baz/bar/foo");
assert_env_expand!(env, "baz/${BAR-bar}/foo", "baz/bar/foo");
assert_env_expand!(env, "baz/${BAR=bar}/foo", "baz/bar/foo");
assert_env_expand!(env, "baz/${EMPTY:-bar}/foo", "baz/bar/foo");
assert_env_expand!(env, "baz/${EMPTY:=bar}/foo", "baz/bar/foo");
assert_env_expand!(env, "baz/${EMPTY-bar}/foo", "baz//foo");
assert_env_expand!(env, "baz/${EMPTY=bar}/foo", "baz//foo");
}
}

View file

@ -1,8 +1,12 @@
pub use etcetera::home_dir;
use once_cell::sync::Lazy;
use regex_cursor::{engines::meta::Regex, Input};
use ropey::RopeSlice;
use std::{
borrow::Cow,
ffi::OsString,
ops::Range,
path::{Component, Path, PathBuf, MAIN_SEPARATOR_STR},
};
@ -51,7 +55,7 @@ where
/// Normalize a path without resolving symlinks.
// Strategy: start from the first component and move up. Cannonicalize previous path,
// join component, cannonicalize new path, strip prefix and join to the final result.
// join component, canonicalize new path, strip prefix and join to the final result.
pub fn normalize(path: impl AsRef<Path>) -> PathBuf {
let mut components = path.as_ref().components().peekable();
let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() {
@ -201,6 +205,96 @@ pub fn get_truncated_path(path: impl AsRef<Path>) -> PathBuf {
ret
}
fn path_component_regex(windows: bool) -> String {
// TODO: support backslash path escape on windows (when using git bash for example)
let space_escape = if windows { r"[\^`]\s" } else { r"[\\]\s" };
// partially baesd on what's allowed in an url but with some care to avoid
// false positivies (like any kind of brackets or quotes)
r"[\w@.\-+#$%?!,;~&]|".to_owned() + space_escape
}
/// Regex for delimited environment captures like `${HOME}`.
fn braced_env_regex(windows: bool) -> String {
r"\$\{(?:".to_owned() + &path_component_regex(windows) + r"|[/:=])+\}"
}
fn compile_path_regex(
prefix: &str,
postfix: &str,
match_single_file: bool,
windows: bool,
) -> Regex {
let first_component = format!(
"(?:{}|(?:{}))",
braced_env_regex(windows),
path_component_regex(windows)
);
// For all components except the first we allow an equals so that `foo=/
// bar/baz` does not include foo. This is primarily intended for url queries
// (where an equals is never in the first component)
let component = format!("(?:{first_component}|=)");
let sep = if windows { r"[/\\]" } else { "/" };
let url_prefix = r"[\w+\-.]+://??";
let path_prefix = if windows {
// single slash handles most windows prefixes (like\\server\...) but `\
// \?\C:\..` (and C:\) needs special handling, since we don't allow : in path
// components (so that colon separated paths and <path>:<line> work)
r"\\\\\?\\\w:|\w:|\\|"
} else {
""
};
let path_start = format!("(?:{first_component}+|~|{path_prefix}{url_prefix})");
let optional = if match_single_file {
format!("|{path_start}")
} else {
String::new()
};
let path_regex = format!(
"{prefix}(?:{path_start}?(?:(?:{sep}{component}+)+{sep}?|{sep}){optional}){postfix}"
);
Regex::new(&path_regex).unwrap()
}
/// If `src` ends with a path then this function returns the part of the slice.
pub fn get_path_suffix(src: RopeSlice<'_>, match_single_file: bool) -> Option<RopeSlice<'_>> {
let regex = if match_single_file {
static REGEX: Lazy<Regex> = Lazy::new(|| compile_path_regex("", "$", true, cfg!(windows)));
&*REGEX
} else {
static REGEX: Lazy<Regex> = Lazy::new(|| compile_path_regex("", "$", false, cfg!(windows)));
&*REGEX
};
regex
.find(Input::new(src))
.map(|mat| src.byte_slice(mat.range()))
}
/// Returns an iterator of the **byte** ranges in src that contain a path.
pub fn find_paths(
src: RopeSlice<'_>,
match_single_file: bool,
) -> impl Iterator<Item = Range<usize>> + '_ {
let regex = if match_single_file {
static REGEX: Lazy<Regex> = Lazy::new(|| compile_path_regex("", "", true, cfg!(windows)));
&*REGEX
} else {
static REGEX: Lazy<Regex> = Lazy::new(|| compile_path_regex("", "", false, cfg!(windows)));
&*REGEX
};
regex.find_iter(Input::new(src)).map(|mat| mat.range())
}
/// Performs substitution of `~` and environment variables, see [`env::expand`](crate::env::expand) and [`expand_tilde`]
pub fn expand<T: AsRef<Path> + ?Sized>(path: &T) -> Cow<'_, Path> {
let path = path.as_ref();
let path = expand_tilde(path);
match crate::env::expand(&*path) {
Cow::Borrowed(_) => path,
Cow::Owned(path) => PathBuf::from(path).into(),
}
}
#[cfg(test)]
mod tests {
use std::{
@ -208,7 +302,10 @@ mod tests {
path::{Component, Path},
};
use crate::path;
use regex_cursor::Input;
use ropey::RopeSlice;
use crate::path::{self, compile_path_regex};
#[test]
fn expand_tilde() {
@ -228,4 +325,127 @@ mod tests {
assert_ne!(component_count, 0);
}
}
macro_rules! assert_match {
($regex: expr, $haystack: expr) => {
let haystack = Input::new(RopeSlice::from($haystack));
assert!(
$regex.is_match(haystack),
"regex should match {}",
$haystack
);
};
}
macro_rules! assert_no_match {
($regex: expr, $haystack: expr) => {
let haystack = Input::new(RopeSlice::from($haystack));
assert!(
!$regex.is_match(haystack),
"regex should not match {}",
$haystack
);
};
}
macro_rules! assert_matches {
($regex: expr, $haystack: expr, [$($matches: expr),*]) => {
let src = $haystack;
let matches: Vec<_> = $regex
.find_iter(Input::new(RopeSlice::from(src)))
.map(|it| &src[it.range()])
.collect();
assert_eq!(matches, vec![$($matches),*]);
};
}
/// Linux-only path
#[test]
fn path_regex_unix() {
// due to ambiguity with the `\` path separator we can't support space escapes `\ ` on windows
let regex = compile_path_regex("^", "$", false, false);
assert_match!(regex, "${FOO}/hello\\ world");
assert_match!(regex, "${FOO}/\\ ");
}
/// Windows-only paths
#[test]
fn path_regex_windows() {
let regex = compile_path_regex("^", "$", false, true);
assert_match!(regex, "${FOO}/hello^ world");
assert_match!(regex, "${FOO}/hello` world");
assert_match!(regex, "${FOO}/^ ");
assert_match!(regex, "${FOO}/` ");
assert_match!(regex, r"foo\bar");
assert_match!(regex, r"foo\bar");
assert_match!(regex, r"..\bar");
assert_match!(regex, r"..\");
assert_match!(regex, r"C:\");
assert_match!(regex, r"\\?\C:\foo");
assert_match!(regex, r"\\server\foo");
}
/// Paths that should work on all platforms
#[test]
fn path_regex() {
for windows in [false, true] {
let regex = compile_path_regex("^", "$", false, windows);
assert_no_match!(regex, "foo");
assert_no_match!(regex, "");
assert_match!(regex, "https://github.com/notifications/query=foo");
assert_match!(regex, "file:///foo/bar");
assert_match!(regex, "foo/bar");
assert_match!(regex, "$HOME/foo");
assert_match!(regex, "${FOO:-bar}/baz");
assert_match!(regex, "foo/bar_");
assert_match!(regex, "/home/bar");
assert_match!(regex, "foo/");
assert_match!(regex, "./");
assert_match!(regex, "../");
assert_match!(regex, "../..");
assert_match!(regex, "./foo");
assert_match!(regex, "./foo.rs");
assert_match!(regex, "/");
assert_match!(regex, "~/");
assert_match!(regex, "~/foo");
assert_match!(regex, "~/foo");
assert_match!(regex, "~/foo/../baz");
assert_match!(regex, "${HOME}/foo");
assert_match!(regex, "$HOME/foo");
assert_match!(regex, "/$FOO");
assert_match!(regex, "/${FOO}");
assert_match!(regex, "/${FOO}/${BAR}");
assert_match!(regex, "/${FOO}/${BAR}/foo");
assert_match!(regex, "/${FOO}/${BAR}");
assert_match!(regex, "${FOO}/hello_$WORLD");
assert_match!(regex, "${FOO}/hello_${WORLD}");
let regex = compile_path_regex("", "", false, windows);
assert_no_match!(regex, "");
assert_matches!(
regex,
r#"${FOO}/hello_${WORLD} ${FOO}/hello_${WORLD} foo("./bar", "/home/foo")""#,
[
"${FOO}/hello_${WORLD}",
"${FOO}/hello_${WORLD}",
"./bar",
"/home/foo"
]
);
assert_matches!(
regex,
r#"--> helix-stdx/src/path.rs:427:13"#,
["helix-stdx/src/path.rs"]
);
assert_matches!(
regex,
r#"PATH=/foo/bar:/bar/baz:${foo:-/foo}/bar:${PATH}"#,
["/foo/bar", "/bar/baz", "${foo:-/foo}/bar"]
);
let regex = compile_path_regex("^", "$", true, windows);
assert_no_match!(regex, "");
assert_match!(regex, "foo");
assert_match!(regex, "foo/");
assert_match!(regex, "$FOO");
assert_match!(regex, "${BAR}");
}
}
}