diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 1abd90d10..cdfe6cc77 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -15,6 +15,7 @@ pub mod indent; pub mod line_ending; pub mod macros; pub mod match_brackets; +pub mod modeline; pub mod movement; pub mod object; mod position; diff --git a/helix-core/src/modeline.rs b/helix-core/src/modeline.rs new file mode 100644 index 000000000..66b02b60c --- /dev/null +++ b/helix-core/src/modeline.rs @@ -0,0 +1,233 @@ +use std::borrow::Cow; + +use once_cell::sync::Lazy; + +use crate::indent::IndentStyle; +use crate::regex::Regex; +use crate::{LineEnding, RopeSlice}; + +// 5 is the vim default +const LINES_TO_CHECK: usize = 5; +const LENGTH_TO_CHECK: usize = 256; + +static MODELINE_REGEX: Lazy = + Lazy::new(|| Regex::new(r"^(\S*\s+)?(vi|[vV]im[<=>]?\d*|ex):\s*(set?\s+)?").unwrap()); + +#[derive(Default, Debug, Eq, PartialEq)] +pub struct Modeline { + language: Option, + indent_style: Option, + line_ending: Option, +} + +impl Modeline { + pub fn parse(text: RopeSlice) -> Self { + let mut modeline = Self::default(); + + for line in text.lines().take(LINES_TO_CHECK).chain( + text.lines_at(text.len_lines()) + .reversed() + .take(LINES_TO_CHECK), + ) { + // can't guarantee no extra copies, since we need to regex and + // regexes can't operate over chunks yet, but we can at least + // limit how much we potentially need to copy because modelines + // are typically quite short. + if line.len_chars() > LENGTH_TO_CHECK { + continue; + } + let line = Cow::::from(line); + modeline.parse_from_line(&line); + } + + modeline + } + + pub fn language(&self) -> Option<&str> { + self.language.as_deref() + } + + pub fn indent_style(&self) -> Option { + self.indent_style + } + + pub fn line_ending(&self) -> Option { + self.line_ending + } + + fn parse_from_line(&mut self, line: &str) { + let mut saw_backslash = false; + let split_modeline = move |c| { + saw_backslash = match c { + ':' if !saw_backslash => return true, + '\\' => true, + _ => false, + }; + c == ' ' || c == '\t' + }; + if let Some(pos) = MODELINE_REGEX.find(line) { + for option in line[pos.end()..].split(split_modeline) { + let parts: Vec<_> = option.split('=').collect(); + match parts[0] { + "ft" | "filetype" => { + if let Some(val) = parts.get(1) { + self.language = Some(val.to_string()); + } + } + "sw" | "shiftwidth" => { + if let Some(val) = parts.get(1).and_then(|val| val.parse().ok()) { + if self.indent_style != Some(IndentStyle::Tabs) { + self.indent_style = Some(IndentStyle::Spaces(val)); + } + } + } + "ff" | "fileformat" => { + if let Some(val) = parts.get(1) { + self.line_ending = vim_ff_to_helix_line_ending(val); + } + } + "noet" | "noexpandtab" => { + self.indent_style = Some(IndentStyle::Tabs); + } + _ => {} + } + } + } + } +} + +fn vim_ff_to_helix_line_ending(val: &str) -> Option { + match val { + "dos" => Some(LineEnding::Crlf), + "unix" => Some(LineEnding::LF), + #[cfg(feature = "unicode-lines")] + "mac" => Some(LineEnding::CR), + _ => None, + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_modeline_parsing() { + let tests = [ + ( + "vi:noai:sw=3 ts=6", + Modeline { + indent_style: Some(IndentStyle::Spaces(3)), + ..Default::default() + }, + ), + ( + "vim: tw=77", + Modeline { + ..Default::default() + }, + ), + ( + "/* vim: set ai sw=5: */", + Modeline { + indent_style: Some(IndentStyle::Spaces(5)), + ..Default::default() + }, + ), + ( + "# vim: set noexpandtab:", + Modeline { + indent_style: Some(IndentStyle::Tabs), + ..Default::default() + }, + ), + ( + "// vim: noai:ts=4:sw=4", + Modeline { + indent_style: Some(IndentStyle::Spaces(4)), + ..Default::default() + }, + ), + ( + "/* vim: set noai ts=4 sw=4: */", + Modeline { + indent_style: Some(IndentStyle::Spaces(4)), + ..Default::default() + }, + ), + ( + "/* vim: set fdm=expr ft=c fde=getline(v\\:lnum)=~'{'?'>1'\\:'1' sw=4: */", + Modeline { + language: Some("c".to_string()), + indent_style: Some(IndentStyle::Spaces(4)), + ..Default::default() + }, + ), + ( + "/* vim: set ts=8 sw=4 tw=0 noet : */", + Modeline { + indent_style: Some(IndentStyle::Tabs), + ..Default::default() + }, + ), + ( + "vim:ff=unix ts=4 sw=4", + Modeline { + indent_style: Some(IndentStyle::Spaces(4)), + line_ending: Some(LineEnding::LF), + ..Default::default() + }, + ), + ( + "vim:tw=78:sw=2:ts=2:ft=help:norl:nowrap:", + Modeline { + language: Some("help".to_string()), + indent_style: Some(IndentStyle::Spaces(2)), + ..Default::default() + }, + ), + ( + "# vim: ft=zsh sw=2 ts=2 et", + Modeline { + language: Some("zsh".to_string()), + indent_style: Some(IndentStyle::Spaces(2)), + ..Default::default() + }, + ), + ( + "# vim:ft=sh:", + Modeline { + language: Some("sh".to_string()), + ..Default::default() + }, + ), + ( + "\" vim:ts=8:sts=4:sw=4:expandtab:ft=vim", + Modeline { + language: Some("vim".to_string()), + indent_style: Some(IndentStyle::Spaces(4)), + ..Default::default() + }, + ), + ( + "\" vim: ts=8 noet tw=100 sw=8 sts=0 ft=vim isk+=-", + Modeline { + language: Some("vim".to_string()), + indent_style: Some(IndentStyle::Tabs), + ..Default::default() + }, + ), + ( + "; vim:ft=gitconfig:", + Modeline { + language: Some("gitconfig".to_string()), + ..Default::default() + }, + ), + ]; + for (line, expected) in tests { + let mut got = Modeline::default(); + got.parse_from_line(line); + assert_eq!(got, expected); + } + } +} diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index d44b4240c..f3cca4517 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -31,6 +31,7 @@ use helix_core::{ history::{History, State, UndoKind}, indent::{auto_detect_indent_style, IndentStyle}, line_ending::auto_detect_line_ending, + modeline::Modeline, syntax::{self, LanguageConfiguration}, ChangeSet, Diagnostic, LineEnding, Range, Rope, RopeBuilder, Selection, Syntax, Transaction, }; @@ -178,6 +179,8 @@ pub struct Document { pub focused_at: std::time::Instant, pub readonly: bool, + + modeline: Modeline, } /// Inlay hints for a single `(Document, View)` combo. @@ -637,6 +640,7 @@ impl Document { let line_ending = config.load().default_line_ending.into(); let changes = ChangeSet::new(text.slice(..)); let old_state = None; + let modeline = Modeline::parse(text.slice(..)); Self { id: DocumentId::default(), @@ -668,6 +672,7 @@ impl Document { focused_at: std::time::Instant::now(), readonly: false, jump_labels: HashMap::new(), + modeline, } } @@ -995,21 +1000,35 @@ impl Document { &self, config_loader: &syntax::Loader, ) -> Option> { - config_loader - .language_config_for_file_name(self.path.as_ref()?) - .or_else(|| config_loader.language_config_for_shebang(self.text().slice(..))) + self.modeline + .language() + .and_then(|language| config_loader.language_config_for_language_id(language)) + .or_else(|| { + config_loader + .language_config_for_file_name(self.path.as_ref()?) + .or_else(|| config_loader.language_config_for_shebang(self.text().slice(..))) + }) } /// Detect the indentation used in the file, or otherwise defaults to the language indentation /// configured in `languages.toml`, with a fallback to tabs if it isn't specified. Line ending /// is likewise auto-detected, and will remain unchanged if no line endings were detected. pub fn detect_indent_and_line_ending(&mut self) { - self.indent_style = auto_detect_indent_style(&self.text).unwrap_or_else(|| { - self.language_config() - .and_then(|config| config.indent.as_ref()) - .map_or(DEFAULT_INDENT, |config| IndentStyle::from_str(&config.unit)) - }); - if let Some(line_ending) = auto_detect_line_ending(&self.text) { + self.indent_style = self + .modeline + .indent_style() + .or_else(|| auto_detect_indent_style(&self.text)) + .unwrap_or_else(|| { + self.language_config() + .and_then(|config| config.indent.as_ref()) + .map_or(DEFAULT_INDENT, |config| IndentStyle::from_str(&config.unit)) + }); + + if let Some(line_ending) = self + .modeline + .line_ending() + .or_else(|| auto_detect_line_ending(&self.text)) + { self.line_ending = line_ending; } }