Detect filetype from shebang line (#1001)

pull/993/head^2
ath3 3 years ago committed by GitHub
parent 29e6849413
commit 77dbbc73f9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -33,10 +33,11 @@ These are the available keys and descriptions for the file.
| scope | A string like `source.js` that identifies the language. Currently, we strive to match the scope names used by popular TextMate grammars and by the Linguist library. Usually `source.<name>` or `text.<name>` in case of markup languages | | scope | A string like `source.js` that identifies the language. Currently, we strive to match the scope names used by popular TextMate grammars and by the Linguist library. Usually `source.<name>` or `text.<name>` in case of markup languages |
| injection-regex | regex pattern that will be tested against a language name in order to determine whether this language should be used for a potential [language injection][treesitter-language-injection] site. | | injection-regex | regex pattern that will be tested against a language name in order to determine whether this language should be used for a potential [language injection][treesitter-language-injection] site. |
| file-types | The filetypes of the language, for example `["yml", "yaml"]` | | file-types | The filetypes of the language, for example `["yml", "yaml"]` |
| shebangs | The interpreters from the shebang line, for example `["sh", "bash"]` |
| roots | A set of marker files to look for when trying to find the workspace root. For example `Cargo.lock`, `yarn.lock` | | roots | A set of marker files to look for when trying to find the workspace root. For example `Cargo.lock`, `yarn.lock` |
| auto-format | Whether to autoformat this language when saving | | auto-format | Whether to autoformat this language when saving |
| comment-token | The token to use as a comment-token | | comment-token | The token to use as a comment-token |
| indent | The indent to use. Has sub keys `tab-width` and `unit` | | indent | The indent to use. Has sub keys `tab-width` and `unit` |
| config | Language server configuration | | config | Language server configuration |
## Queries ## Queries

@ -450,6 +450,7 @@ where
language: vec![LanguageConfiguration { language: vec![LanguageConfiguration {
scope: "source.rust".to_string(), scope: "source.rust".to_string(),
file_types: vec!["rs".to_string()], file_types: vec!["rs".to_string()],
shebangs: vec![],
language_id: "Rust".to_string(), language_id: "Rust".to_string(),
highlight_config: OnceCell::new(), highlight_config: OnceCell::new(),
config: None, config: None,

@ -14,6 +14,8 @@ use std::{
cell::RefCell, cell::RefCell,
collections::{HashMap, HashSet}, collections::{HashMap, HashSet},
fmt, fmt,
fs::File,
io::Read,
path::Path, path::Path,
sync::Arc, sync::Arc,
}; };
@ -52,6 +54,7 @@ pub struct LanguageConfiguration {
pub language_id: String, pub language_id: String,
pub scope: String, // source.rust pub scope: String, // source.rust
pub file_types: Vec<String>, // filename ends_with? <Gemfile, rb, etc> pub file_types: Vec<String>, // filename ends_with? <Gemfile, rb, etc>
pub shebangs: Vec<String>, // interpreter(s) associated with language
pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml> pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml>
pub comment_token: Option<String>, pub comment_token: Option<String>,
@ -254,6 +257,7 @@ pub struct Loader {
// highlight_names ? // highlight_names ?
language_configs: Vec<Arc<LanguageConfiguration>>, language_configs: Vec<Arc<LanguageConfiguration>>,
language_config_ids_by_file_type: HashMap<String, usize>, // Vec<usize> language_config_ids_by_file_type: HashMap<String, usize>, // Vec<usize>
language_config_ids_by_shebang: HashMap<String, usize>,
} }
impl Loader { impl Loader {
@ -261,6 +265,7 @@ impl Loader {
let mut loader = Self { let mut loader = Self {
language_configs: Vec::new(), language_configs: Vec::new(),
language_config_ids_by_file_type: HashMap::new(), language_config_ids_by_file_type: HashMap::new(),
language_config_ids_by_shebang: HashMap::new(),
}; };
for config in config.language { for config in config.language {
@ -273,6 +278,11 @@ impl Loader {
.language_config_ids_by_file_type .language_config_ids_by_file_type
.insert(file_type.clone(), language_id); .insert(file_type.clone(), language_id);
} }
for shebang in &config.shebangs {
loader
.language_config_ids_by_shebang
.insert(shebang.clone(), language_id);
}
loader.language_configs.push(Arc::new(config)); loader.language_configs.push(Arc::new(config));
} }
@ -298,6 +308,20 @@ impl Loader {
// TODO: content_regex handling conflict resolution // TODO: content_regex handling conflict resolution
} }
pub fn language_config_for_shebang(&self, path: &Path) -> Option<Arc<LanguageConfiguration>> {
// Read the first 128 bytes of the file. If its a shebang line, try to find the language
let file = File::open(path).ok()?;
let mut buf = String::with_capacity(128);
file.take(128).read_to_string(&mut buf).ok()?;
static SHEBANG_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^#!\s*(?:\S*[/\\](?:env\s+)?)?([^\s\.\d]+)").unwrap());
let configuration_id = SHEBANG_REGEX
.captures(&buf)
.and_then(|cap| self.language_config_ids_by_shebang.get(&cap[1]));
configuration_id.and_then(|&id| self.language_configs.get(id).cloned())
}
pub fn language_config_for_scope(&self, scope: &str) -> Option<Arc<LanguageConfiguration>> { pub fn language_config_for_scope(&self, scope: &str) -> Option<Arc<LanguageConfiguration>> {
self.language_configs self.language_configs
.iter() .iter()

@ -494,7 +494,9 @@ impl Document {
/// Detect the programming language based on the file type. /// Detect the programming language based on the file type.
pub fn detect_language(&mut self, theme: Option<&Theme>, config_loader: &syntax::Loader) { pub fn detect_language(&mut self, theme: Option<&Theme>, config_loader: &syntax::Loader) {
if let Some(path) = &self.path { if let Some(path) = &self.path {
let language_config = config_loader.language_config_for_file_name(path); let language_config = config_loader
.language_config_for_file_name(path)
.or_else(|| config_loader.language_config_for_shebang(path));
self.set_language(theme, language_config); self.set_language(theme, language_config);
} }
} }

@ -3,6 +3,7 @@ name = "rust"
scope = "source.rust" scope = "source.rust"
injection-regex = "rust" injection-regex = "rust"
file-types = ["rs"] file-types = ["rs"]
shebangs = []
roots = [] roots = []
auto-format = true auto-format = true
comment-token = "//" comment-token = "//"
@ -17,6 +18,7 @@ name = "toml"
scope = "source.toml" scope = "source.toml"
injection-regex = "toml" injection-regex = "toml"
file-types = ["toml"] file-types = ["toml"]
shebangs = []
roots = [] roots = []
comment-token = "#" comment-token = "#"
@ -27,6 +29,7 @@ name = "protobuf"
scope = "source.proto" scope = "source.proto"
injection-regex = "protobuf" injection-regex = "protobuf"
file-types = ["proto"] file-types = ["proto"]
shebangs = []
roots = [] roots = []
comment-token = "//" comment-token = "//"
@ -37,6 +40,7 @@ name = "elixir"
scope = "source.elixir" scope = "source.elixir"
injection-regex = "elixir" injection-regex = "elixir"
file-types = ["ex", "exs"] file-types = ["ex", "exs"]
shebangs = []
roots = [] roots = []
comment-token = "#" comment-token = "#"
@ -48,6 +52,7 @@ name = "mint"
scope = "source.mint" scope = "source.mint"
injection-regex = "mint" injection-regex = "mint"
file-types = ["mint"] file-types = ["mint"]
shebangs = []
roots = [] roots = []
comment-token = "//" comment-token = "//"
@ -59,6 +64,7 @@ name = "json"
scope = "source.json" scope = "source.json"
injection-regex = "json" injection-regex = "json"
file-types = ["json"] file-types = ["json"]
shebangs = []
roots = [] roots = []
indent = { tab-width = 2, unit = " " } indent = { tab-width = 2, unit = " " }
@ -68,6 +74,7 @@ name = "c"
scope = "source.c" scope = "source.c"
injection-regex = "c" injection-regex = "c"
file-types = ["c"] # TODO: ["h"] file-types = ["c"] # TODO: ["h"]
shebangs = []
roots = [] roots = []
comment-token = "//" comment-token = "//"
@ -79,6 +86,7 @@ name = "cpp"
scope = "source.cpp" scope = "source.cpp"
injection-regex = "cpp" injection-regex = "cpp"
file-types = ["cc", "hh", "cpp", "hpp", "h", "ipp", "tpp", "cxx", "hxx", "ixx", "txx", "ino"] file-types = ["cc", "hh", "cpp", "hpp", "h", "ipp", "tpp", "cxx", "hxx", "ixx", "txx", "ino"]
shebangs = []
roots = [] roots = []
comment-token = "//" comment-token = "//"
@ -90,6 +98,7 @@ name = "c-sharp"
scope = "source.csharp" scope = "source.csharp"
injection-regex = "c-?sharp" injection-regex = "c-?sharp"
file-types = ["cs"] file-types = ["cs"]
shebangs = []
roots = [] roots = []
comment-token = "//" comment-token = "//"
@ -100,6 +109,7 @@ name = "go"
scope = "source.go" scope = "source.go"
injection-regex = "go" injection-regex = "go"
file-types = ["go"] file-types = ["go"]
shebangs = []
roots = ["Gopkg.toml", "go.mod"] roots = ["Gopkg.toml", "go.mod"]
auto-format = true auto-format = true
comment-token = "//" comment-token = "//"
@ -113,6 +123,7 @@ name = "javascript"
scope = "source.js" scope = "source.js"
injection-regex = "^(js|javascript)$" injection-regex = "^(js|javascript)$"
file-types = ["js", "mjs"] file-types = ["js", "mjs"]
shebangs = []
roots = [] roots = []
comment-token = "//" comment-token = "//"
# TODO: highlights-jsx, highlights-params # TODO: highlights-jsx, highlights-params
@ -124,6 +135,7 @@ name = "typescript"
scope = "source.ts" scope = "source.ts"
injection-regex = "^(ts|typescript)$" injection-regex = "^(ts|typescript)$"
file-types = ["ts"] file-types = ["ts"]
shebangs = []
roots = [] roots = []
# TODO: highlights-jsx, highlights-params # TODO: highlights-jsx, highlights-params
@ -135,6 +147,7 @@ name = "tsx"
scope = "source.tsx" scope = "source.tsx"
injection-regex = "^(tsx)$" # |typescript injection-regex = "^(tsx)$" # |typescript
file-types = ["tsx"] file-types = ["tsx"]
shebangs = []
roots = [] roots = []
# TODO: highlights-jsx, highlights-params # TODO: highlights-jsx, highlights-params
@ -146,6 +159,7 @@ name = "css"
scope = "source.css" scope = "source.css"
injection-regex = "css" injection-regex = "css"
file-types = ["css"] file-types = ["css"]
shebangs = []
roots = [] roots = []
indent = { tab-width = 2, unit = " " } indent = { tab-width = 2, unit = " " }
@ -155,6 +169,7 @@ name = "html"
scope = "text.html.basic" scope = "text.html.basic"
injection-regex = "html" injection-regex = "html"
file-types = ["html"] file-types = ["html"]
shebangs = []
roots = [] roots = []
indent = { tab-width = 2, unit = " " } indent = { tab-width = 2, unit = " " }
@ -164,6 +179,7 @@ name = "python"
scope = "source.python" scope = "source.python"
injection-regex = "python" injection-regex = "python"
file-types = ["py"] file-types = ["py"]
shebangs = ["python"]
roots = [] roots = []
comment-token = "#" comment-token = "#"
@ -176,6 +192,7 @@ name = "nix"
scope = "source.nix" scope = "source.nix"
injection-regex = "nix" injection-regex = "nix"
file-types = ["nix"] file-types = ["nix"]
shebangs = []
roots = [] roots = []
comment-token = "#" comment-token = "#"
@ -187,6 +204,7 @@ name = "ruby"
scope = "source.ruby" scope = "source.ruby"
injection-regex = "ruby" injection-regex = "ruby"
file-types = ["rb"] file-types = ["rb"]
shebangs = ["ruby"]
roots = [] roots = []
comment-token = "#" comment-token = "#"
@ -198,6 +216,7 @@ name = "bash"
scope = "source.bash" scope = "source.bash"
injection-regex = "bash" injection-regex = "bash"
file-types = ["sh", "bash"] file-types = ["sh", "bash"]
shebangs = ["sh", "bash", "dash"]
roots = [] roots = []
comment-token = "#" comment-token = "#"
@ -209,6 +228,7 @@ name = "php"
scope = "source.php" scope = "source.php"
injection-regex = "php" injection-regex = "php"
file-types = ["php"] file-types = ["php"]
shebangs = ["php"]
roots = [] roots = []
indent = { tab-width = 4, unit = " " } indent = { tab-width = 4, unit = " " }
@ -218,6 +238,7 @@ name = "latex"
scope = "source.tex" scope = "source.tex"
injection-regex = "tex" injection-regex = "tex"
file-types = ["tex"] file-types = ["tex"]
shebangs = []
roots = [] roots = []
comment-token = "%" comment-token = "%"
@ -228,6 +249,7 @@ name = "julia"
scope = "source.julia" scope = "source.julia"
injection-regex = "julia" injection-regex = "julia"
file-types = ["jl"] file-types = ["jl"]
shebangs = []
roots = [] roots = []
comment-token = "#" comment-token = "#"
language-server = { command = "julia", args = [ language-server = { command = "julia", args = [
@ -253,6 +275,7 @@ name = "java"
scope = "source.java" scope = "source.java"
injection-regex = "java" injection-regex = "java"
file-types = ["java"] file-types = ["java"]
shebangs = []
roots = [] roots = []
indent = { tab-width = 4, unit = " " } indent = { tab-width = 4, unit = " " }
@ -261,6 +284,7 @@ name = "ledger"
scope = "source.ledger" scope = "source.ledger"
injection-regex = "ledger" injection-regex = "ledger"
file-types = ["ldg", "ledger", "journal"] file-types = ["ldg", "ledger", "journal"]
shebangs = []
roots = [] roots = []
comment-token = ";" comment-token = ";"
indent = { tab-width = 4, unit = " " } indent = { tab-width = 4, unit = " " }
@ -270,6 +294,7 @@ name = "ocaml"
scope = "source.ocaml" scope = "source.ocaml"
injection-regex = "ocaml" injection-regex = "ocaml"
file-types = ["ml"] file-types = ["ml"]
shebangs = []
roots = [] roots = []
comment-token = "(**)" comment-token = "(**)"
indent = { tab-width = 2, unit = " " } indent = { tab-width = 2, unit = " " }
@ -278,6 +303,7 @@ indent = { tab-width = 2, unit = " " }
name = "ocaml-interface" name = "ocaml-interface"
scope = "source.ocaml.interface" scope = "source.ocaml.interface"
file-types = ["mli"] file-types = ["mli"]
shebangs = []
roots = [] roots = []
comment-token = "(**)" comment-token = "(**)"
indent = { tab-width = 2, unit = " "} indent = { tab-width = 2, unit = " "}
@ -286,6 +312,7 @@ indent = { tab-width = 2, unit = " "}
name = "lua" name = "lua"
scope = "source.lua" scope = "source.lua"
file-types = ["lua"] file-types = ["lua"]
shebangs = []
roots = [] roots = []
comment-token = "--" comment-token = "--"
indent = { tab-width = 2, unit = " " } indent = { tab-width = 2, unit = " " }
@ -295,6 +322,7 @@ name = "svelte"
scope = "source.svelte" scope = "source.svelte"
injection-regex = "svelte" injection-regex = "svelte"
file-types = ["svelte"] file-types = ["svelte"]
shebangs = []
roots = [] roots = []
indent = { tab-width = 2, unit = " " } indent = { tab-width = 2, unit = " " }
language-server = { command = "svelteserver", args = ["--stdio"] } language-server = { command = "svelteserver", args = ["--stdio"] }
@ -305,6 +333,7 @@ name = "vue"
scope = "source.vue" scope = "source.vue"
injection-regex = "vue" injection-regex = "vue"
file-types = ["vue"] file-types = ["vue"]
shebangs = []
roots = [] roots = []
indent = { tab-width = 2, unit = " " } indent = { tab-width = 2, unit = " " }
@ -312,6 +341,7 @@ indent = { tab-width = 2, unit = " " }
name = "yaml" name = "yaml"
scope = "source.yaml" scope = "source.yaml"
file-types = ["yml", "yaml"] file-types = ["yml", "yaml"]
shebangs = []
roots = [] roots = []
comment-token = "#" comment-token = "#"
indent = { tab-width = 2, unit = " " } indent = { tab-width = 2, unit = " " }
@ -331,6 +361,7 @@ name = "zig"
scope = "source.zig" scope = "source.zig"
injection-regex = "zig" injection-regex = "zig"
file-types = ["zig"] file-types = ["zig"]
shebangs = []
roots = ["build.zig"] roots = ["build.zig"]
auto-format = true auto-format = true
comment-token = "//" comment-token = "//"
@ -343,6 +374,7 @@ name = "prolog"
scope = "source.prolog" scope = "source.prolog"
roots = [] roots = []
file-types = ["pl", "prolog"] file-types = ["pl", "prolog"]
shebangs = ["swipl"]
comment-token = "%" comment-token = "%"
language-server = { command = "swipl", args = [ language-server = { command = "swipl", args = [
@ -354,6 +386,7 @@ language-server = { command = "swipl", args = [
name = "tsq" name = "tsq"
scope = "source.tsq" scope = "source.tsq"
file-types = ["scm"] file-types = ["scm"]
shebangs = []
roots = [] roots = []
comment-token = ";" comment-token = ";"
indent = { tab-width = 2, unit = " " } indent = { tab-width = 2, unit = " " }
@ -362,6 +395,7 @@ indent = { tab-width = 2, unit = " " }
name = "cmake" name = "cmake"
scope = "source.cmake" scope = "source.cmake"
file-types = ["cmake", "CMakeLists.txt"] file-types = ["cmake", "CMakeLists.txt"]
shebangs = []
roots = [] roots = []
comment-token = "#" comment-token = "#"
indent = { tab-width = 2, unit = " " } indent = { tab-width = 2, unit = " " }
@ -371,6 +405,7 @@ language-server = { command = "cmake-language-server" }
name = "perl" name = "perl"
scope = "source.perl" scope = "source.perl"
file-types = ["pl", "pm"] file-types = ["pl", "pm"]
shebangs = ["perl"]
roots = [] roots = []
comment-token = "#" comment-token = "#"
indent = { tab-width = 2, unit = " " } indent = { tab-width = 2, unit = " " }

Loading…
Cancel
Save