Detect filetype from shebang line (#1001)

This commit is contained in:
ath3 2021-11-08 16:19:44 +01:00 committed by GitHub
parent 29e6849413
commit 77dbbc73f9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 65 additions and 2 deletions

View file

@ -14,6 +14,8 @@ use std::{
cell::RefCell,
collections::{HashMap, HashSet},
fmt,
fs::File,
io::Read,
path::Path,
sync::Arc,
};
@ -52,6 +54,7 @@ pub struct LanguageConfiguration {
pub language_id: String,
pub scope: String, // source.rust
pub file_types: Vec<String>, // filename ends_with? <Gemfile, rb, etc>
pub shebangs: Vec<String>, // interpreter(s) associated with language
pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml>
pub comment_token: Option<String>,
@ -254,6 +257,7 @@ pub struct Loader {
// highlight_names ?
language_configs: Vec<Arc<LanguageConfiguration>>,
language_config_ids_by_file_type: HashMap<String, usize>, // Vec<usize>
language_config_ids_by_shebang: HashMap<String, usize>,
}
impl Loader {
@ -261,6 +265,7 @@ impl Loader {
let mut loader = Self {
language_configs: Vec::new(),
language_config_ids_by_file_type: HashMap::new(),
language_config_ids_by_shebang: HashMap::new(),
};
for config in config.language {
@ -273,6 +278,11 @@ impl Loader {
.language_config_ids_by_file_type
.insert(file_type.clone(), language_id);
}
for shebang in &config.shebangs {
loader
.language_config_ids_by_shebang
.insert(shebang.clone(), language_id);
}
loader.language_configs.push(Arc::new(config));
}
@ -298,6 +308,20 @@ impl Loader {
// TODO: content_regex handling conflict resolution
}
pub fn language_config_for_shebang(&self, path: &Path) -> Option<Arc<LanguageConfiguration>> {
// Read the first 128 bytes of the file. If its a shebang line, try to find the language
let file = File::open(path).ok()?;
let mut buf = String::with_capacity(128);
file.take(128).read_to_string(&mut buf).ok()?;
static SHEBANG_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^#!\s*(?:\S*[/\\](?:env\s+)?)?([^\s\.\d]+)").unwrap());
let configuration_id = SHEBANG_REGEX
.captures(&buf)
.and_then(|cap| self.language_config_ids_by_shebang.get(&cap[1]));
configuration_id.and_then(|&id| self.language_configs.get(id).cloned())
}
pub fn language_config_for_scope(&self, scope: &str) -> Option<Arc<LanguageConfiguration>> {
self.language_configs
.iter()