diff --git a/helix-core/src/chars.rs b/helix-core/src/chars.rs new file mode 100644 index 00000000..243a1374 --- /dev/null +++ b/helix-core/src/chars.rs @@ -0,0 +1,41 @@ +/// Determine whether a character is a line break. +pub fn char_is_linebreak(c: char) -> bool { + matches!( + c, + '\u{000A}' | // LineFeed + '\u{000B}' | // VerticalTab + '\u{000C}' | // FormFeed + '\u{000D}' | // CarriageReturn + '\u{0085}' | // NextLine + '\u{2028}' | // Line Separator + '\u{2029}' // ParagraphSeparator + ) +} + +/// Determine whether a character qualifies as (non-line-break) +/// whitespace. +pub fn char_is_whitespace(c: char) -> bool { + // TODO: this is a naive binary categorization of whitespace + // characters. For display, word wrapping, etc. we'll need a better + // categorization based on e.g. breaking vs non-breaking spaces + // and whether they're zero-width or not. + match c { + //'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace) + '\u{0009}' | // Character Tabulation + '\u{0020}' | // Space + '\u{00A0}' | // No-break Space + '\u{180E}' | // Mongolian Vowel Separator + '\u{202F}' | // Narrow No-break Space + '\u{205F}' | // Medium Mathematical Space + '\u{3000}' | // Ideographic Space + '\u{FEFF}' // Zero Width No-break Space + => true, + + // En Quad, Em Quad, En Space, Em Space, Three-per-em Space, + // Four-per-em Space, Six-per-em Space, Figure Space, + // Punctuation Space, Thin Space, Hair Space, Zero Width Space. + c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true, + + _ => false, + } +} diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 79a22547..b11faeab 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -1,5 +1,6 @@ #![allow(unused)] pub mod auto_pairs; +pub mod chars; pub mod comment; pub mod diagnostic; pub mod graphemes; diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index d5ab1425..f4e4b7c6 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -5,6 +5,7 @@ use std::path::{Component, Path, PathBuf}; use std::sync::Arc; use helix_core::{ + chars::{char_is_linebreak, char_is_whitespace}, history::History, syntax::{LanguageConfiguration, LOADER}, ChangeSet, Diagnostic, Rope, Selection, State, Syntax, Transaction, @@ -277,55 +278,6 @@ impl Document { } fn detect_indent_style(&mut self) { - // Determine whether a character is a line break. - // - // TODO: this is probably a generally useful utility function. Where - // should we put it? - fn char_is_linebreak(c: char) -> bool { - [ - '\u{000A}', // LineFeed - '\u{000B}', // VerticalTab - '\u{000C}', // FormFeed - '\u{000D}', // CarriageReturn - '\u{0085}', // NextLine - '\u{2028}', // Line Separator - '\u{2029}', // ParagraphSeparator - ] - .contains(&c) - } - - // Determine whether a character qualifies as (non-line-break) - // whitespace. - // - // TODO: this is probably a generally useful utility function. Where - // should we put it? - // - // TODO: this is a naive binary categorization of whitespace - // characters. For display, word wrapping, etc. we'll need a better - // categorization based on e.g. breaking vs non-breaking spaces - // and whether they're zero-width or not. - pub fn char_is_whitespace(c: char) -> bool { - match c { - //'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace) - '\u{0009}' | // Character Tabulation - '\u{0020}' | // Space - '\u{00A0}' | // No-break Space - '\u{180E}' | // Mongolian Vowel Separator - '\u{202F}' | // Narrow No-break Space - '\u{205F}' | // Medium Mathematical Space - '\u{3000}' | // Ideographic Space - '\u{FEFF}' // Zero Width No-break Space - => true, - - // En Quad, Em Quad, En Space, Em Space, Three-per-em Space, - // Four-per-em Space, Six-per-em Space, Figure Space, - // Punctuation Space, Thin Space, Hair Space, Zero Width Space. - c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true, - - _ => false, - } - } - // Build a histogram of the indentation *increases* between // subsequent lines, ignoring lines that are all whitespace. // @@ -689,7 +641,7 @@ impl Document { /// /// TODO: we might not need this function anymore, since the information /// is conveniently available in `Document::indent_style` now. - pub fn indent_unit(&self) -> &str { + pub fn indent_unit(&self) -> &'static str { match self.indent_style { IndentStyle::Tabs => "\t", IndentStyle::Spaces(1) => " ",