forked from Mirrors/helix
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
322 lines
11 KiB
Rust
322 lines
11 KiB
Rust
use crate::{Rope, RopeSlice};
|
|
|
|
#[cfg(target_os = "windows")]
|
|
pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf;
|
|
#[cfg(not(target_os = "windows"))]
|
|
pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF;
|
|
|
|
/// Represents one of the valid Unicode line endings.
|
|
#[derive(PartialEq, Copy, Clone, Debug)]
|
|
pub enum LineEnding {
|
|
Crlf, // CarriageReturn followed by LineFeed
|
|
LF, // U+000A -- LineFeed
|
|
#[cfg(feature = "unicode-lines")]
|
|
VT, // U+000B -- VerticalTab
|
|
#[cfg(feature = "unicode-lines")]
|
|
FF, // U+000C -- FormFeed
|
|
#[cfg(feature = "unicode-lines")]
|
|
CR, // U+000D -- CarriageReturn
|
|
#[cfg(feature = "unicode-lines")]
|
|
Nel, // U+0085 -- NextLine
|
|
#[cfg(feature = "unicode-lines")]
|
|
LS, // U+2028 -- Line Separator
|
|
#[cfg(feature = "unicode-lines")]
|
|
PS, // U+2029 -- ParagraphSeparator
|
|
}
|
|
|
|
impl LineEnding {
|
|
#[inline]
|
|
pub const fn len_chars(&self) -> usize {
|
|
match self {
|
|
Self::Crlf => 2,
|
|
_ => 1,
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
pub const fn as_str(&self) -> &'static str {
|
|
match self {
|
|
Self::Crlf => "\u{000D}\u{000A}",
|
|
Self::LF => "\u{000A}",
|
|
#[cfg(feature = "unicode-lines")]
|
|
Self::VT => "\u{000B}",
|
|
#[cfg(feature = "unicode-lines")]
|
|
Self::FF => "\u{000C}",
|
|
#[cfg(feature = "unicode-lines")]
|
|
Self::CR => "\u{000D}",
|
|
#[cfg(feature = "unicode-lines")]
|
|
Self::Nel => "\u{0085}",
|
|
#[cfg(feature = "unicode-lines")]
|
|
Self::LS => "\u{2028}",
|
|
#[cfg(feature = "unicode-lines")]
|
|
Self::PS => "\u{2029}",
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
pub const fn from_char(ch: char) -> Option<LineEnding> {
|
|
match ch {
|
|
'\u{000A}' => Some(LineEnding::LF),
|
|
#[cfg(feature = "unicode-lines")]
|
|
'\u{000B}' => Some(LineEnding::VT),
|
|
#[cfg(feature = "unicode-lines")]
|
|
'\u{000C}' => Some(LineEnding::FF),
|
|
#[cfg(feature = "unicode-lines")]
|
|
'\u{000D}' => Some(LineEnding::CR),
|
|
#[cfg(feature = "unicode-lines")]
|
|
'\u{0085}' => Some(LineEnding::Nel),
|
|
#[cfg(feature = "unicode-lines")]
|
|
'\u{2028}' => Some(LineEnding::LS),
|
|
#[cfg(feature = "unicode-lines")]
|
|
'\u{2029}' => Some(LineEnding::PS),
|
|
// Not a line ending
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
// Normally we'd want to implement the FromStr trait, but in this case
|
|
// that would force us into a different return type than from_char or
|
|
// or from_rope_slice, which would be weird.
|
|
#[allow(clippy::should_implement_trait)]
|
|
#[inline]
|
|
pub fn from_str(g: &str) -> Option<LineEnding> {
|
|
match g {
|
|
"\u{000D}\u{000A}" => Some(LineEnding::Crlf),
|
|
"\u{000A}" => Some(LineEnding::LF),
|
|
#[cfg(feature = "unicode-lines")]
|
|
"\u{000B}" => Some(LineEnding::VT),
|
|
#[cfg(feature = "unicode-lines")]
|
|
"\u{000C}" => Some(LineEnding::FF),
|
|
#[cfg(feature = "unicode-lines")]
|
|
"\u{000D}" => Some(LineEnding::CR),
|
|
#[cfg(feature = "unicode-lines")]
|
|
"\u{0085}" => Some(LineEnding::Nel),
|
|
#[cfg(feature = "unicode-lines")]
|
|
"\u{2028}" => Some(LineEnding::LS),
|
|
#[cfg(feature = "unicode-lines")]
|
|
"\u{2029}" => Some(LineEnding::PS),
|
|
// Not a line ending
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
pub fn from_rope_slice(g: &RopeSlice) -> Option<LineEnding> {
|
|
if let Some(text) = g.as_str() {
|
|
LineEnding::from_str(text)
|
|
} else {
|
|
// Non-contiguous, so it can't be a line ending.
|
|
// Specifically, Ropey guarantees that CRLF is always
|
|
// contiguous. And the remaining line endings are all
|
|
// single `char`s, and therefore trivially contiguous.
|
|
None
|
|
}
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
pub fn str_is_line_ending(s: &str) -> bool {
|
|
LineEnding::from_str(s).is_some()
|
|
}
|
|
|
|
#[inline]
|
|
pub fn rope_is_line_ending(r: RopeSlice) -> bool {
|
|
r.chunks().all(str_is_line_ending)
|
|
}
|
|
|
|
/// Attempts to detect what line ending the passed document uses.
|
|
pub fn auto_detect_line_ending(doc: &Rope) -> Option<LineEnding> {
|
|
// Return first matched line ending. Not all possible line endings
|
|
// are being matched, as they might be special-use only
|
|
for line in doc.lines().take(100) {
|
|
match get_line_ending(&line) {
|
|
None => {}
|
|
#[cfg(feature = "unicode-lines")]
|
|
Some(LineEnding::VT) | Some(LineEnding::FF) | Some(LineEnding::PS) => {}
|
|
ending => return ending,
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
/// Returns the passed line's line ending, if any.
|
|
pub fn get_line_ending(line: &RopeSlice) -> Option<LineEnding> {
|
|
// Last character as str.
|
|
let g1 = line
|
|
.slice(line.len_chars().saturating_sub(1)..)
|
|
.as_str()
|
|
.unwrap();
|
|
|
|
// Last two characters as str, or empty str if they're not contiguous.
|
|
// It's fine to punt on the non-contiguous case, because Ropey guarantees
|
|
// that CRLF is always contiguous.
|
|
let g2 = line
|
|
.slice(line.len_chars().saturating_sub(2)..)
|
|
.as_str()
|
|
.unwrap_or("");
|
|
|
|
// First check the two-character case for CRLF, then check the single-character case.
|
|
LineEnding::from_str(g2).or_else(|| LineEnding::from_str(g1))
|
|
}
|
|
|
|
#[cfg(not(feature = "unicode-lines"))]
|
|
/// Returns the passed line's line ending, if any.
|
|
pub fn get_line_ending_of_str(line: &str) -> Option<LineEnding> {
|
|
if line.ends_with("\u{000D}\u{000A}") {
|
|
Some(LineEnding::Crlf)
|
|
} else if line.ends_with('\u{000A}') {
|
|
Some(LineEnding::LF)
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
#[cfg(feature = "unicode-lines")]
|
|
/// Returns the passed line's line ending, if any.
|
|
pub fn get_line_ending_of_str(line: &str) -> Option<LineEnding> {
|
|
if line.ends_with("\u{000D}\u{000A}") {
|
|
Some(LineEnding::Crlf)
|
|
} else if line.ends_with('\u{000A}') {
|
|
Some(LineEnding::LF)
|
|
} else if line.ends_with('\u{000B}') {
|
|
Some(LineEnding::VT)
|
|
} else if line.ends_with('\u{000C}') {
|
|
Some(LineEnding::FF)
|
|
} else if line.ends_with('\u{000D}') {
|
|
Some(LineEnding::CR)
|
|
} else if line.ends_with('\u{0085}') {
|
|
Some(LineEnding::Nel)
|
|
} else if line.ends_with('\u{2028}') {
|
|
Some(LineEnding::LS)
|
|
} else if line.ends_with('\u{2029}') {
|
|
Some(LineEnding::PS)
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
/// Returns the char index of the end of the given line, not including its line ending.
|
|
pub fn line_end_char_index(slice: &RopeSlice, line: usize) -> usize {
|
|
slice.line_to_char(line + 1)
|
|
- get_line_ending(&slice.line(line))
|
|
.map(|le| le.len_chars())
|
|
.unwrap_or(0)
|
|
}
|
|
|
|
/// Fetches line `line_idx` from the passed rope slice, sans any line ending.
|
|
pub fn line_without_line_ending<'a>(slice: &'a RopeSlice, line_idx: usize) -> RopeSlice<'a> {
|
|
let start = slice.line_to_char(line_idx);
|
|
let end = line_end_char_index(slice, line_idx);
|
|
slice.slice(start..end)
|
|
}
|
|
|
|
/// Returns the char index of the end of the given RopeSlice, not including
|
|
/// any final line ending.
|
|
pub fn rope_end_without_line_ending(slice: &RopeSlice) -> usize {
|
|
slice.len_chars() - get_line_ending(slice).map(|le| le.len_chars()).unwrap_or(0)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod line_ending_tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn line_ending_autodetect() {
|
|
assert_eq!(
|
|
auto_detect_line_ending(&Rope::from_str("\n")),
|
|
Some(LineEnding::LF)
|
|
);
|
|
assert_eq!(
|
|
auto_detect_line_ending(&Rope::from_str("\r\n")),
|
|
Some(LineEnding::Crlf)
|
|
);
|
|
assert_eq!(auto_detect_line_ending(&Rope::from_str("hello")), None);
|
|
assert_eq!(auto_detect_line_ending(&Rope::from_str("")), None);
|
|
assert_eq!(
|
|
auto_detect_line_ending(&Rope::from_str("hello\nhelix\r\n")),
|
|
Some(LineEnding::LF)
|
|
);
|
|
assert_eq!(
|
|
auto_detect_line_ending(&Rope::from_str("a formfeed\u{000C}")),
|
|
None
|
|
);
|
|
assert_eq!(
|
|
auto_detect_line_ending(&Rope::from_str("\n\u{000A}\n \u{000A}")),
|
|
Some(LineEnding::LF)
|
|
);
|
|
assert_eq!(
|
|
auto_detect_line_ending(&Rope::from_str(
|
|
"a formfeed\u{000C} with a\u{000C} linefeed\u{000A}"
|
|
)),
|
|
Some(LineEnding::LF)
|
|
);
|
|
assert_eq!(auto_detect_line_ending(&Rope::from_str("a formfeed\u{000C} with a\u{000C} carriage return linefeed\u{000D}\u{000A} and a linefeed\u{000A}")), Some(LineEnding::Crlf));
|
|
}
|
|
|
|
#[test]
|
|
fn str_to_line_ending() {
|
|
#[cfg(feature = "unicode-lines")]
|
|
assert_eq!(LineEnding::from_str("\r"), Some(LineEnding::CR));
|
|
assert_eq!(LineEnding::from_str("\n"), Some(LineEnding::LF));
|
|
assert_eq!(LineEnding::from_str("\r\n"), Some(LineEnding::Crlf));
|
|
assert_eq!(LineEnding::from_str("hello\n"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn rope_slice_to_line_ending() {
|
|
let r = Rope::from_str("hello\r\n");
|
|
#[cfg(feature = "unicode-lines")]
|
|
assert_eq!(
|
|
LineEnding::from_rope_slice(&r.slice(5..6)),
|
|
Some(LineEnding::CR)
|
|
);
|
|
assert_eq!(
|
|
LineEnding::from_rope_slice(&r.slice(6..7)),
|
|
Some(LineEnding::LF)
|
|
);
|
|
assert_eq!(
|
|
LineEnding::from_rope_slice(&r.slice(5..7)),
|
|
Some(LineEnding::Crlf)
|
|
);
|
|
assert_eq!(LineEnding::from_rope_slice(&r.slice(..)), None);
|
|
}
|
|
|
|
#[test]
|
|
fn get_line_ending_rope_slice() {
|
|
let r = Rope::from_str("Hello\rworld\nhow\r\nare you?");
|
|
#[cfg(feature = "unicode-lines")]
|
|
assert_eq!(get_line_ending(&r.slice(..6)), Some(LineEnding::CR));
|
|
assert_eq!(get_line_ending(&r.slice(..12)), Some(LineEnding::LF));
|
|
assert_eq!(get_line_ending(&r.slice(..17)), Some(LineEnding::Crlf));
|
|
assert_eq!(get_line_ending(&r.slice(..)), None);
|
|
}
|
|
|
|
#[test]
|
|
fn get_line_ending_str() {
|
|
let text = "Hello\rworld\nhow\r\nare you?";
|
|
#[cfg(feature = "unicode-lines")]
|
|
assert_eq!(get_line_ending_of_str(&text[..6]), Some(LineEnding::CR));
|
|
assert_eq!(get_line_ending_of_str(&text[..12]), Some(LineEnding::LF));
|
|
assert_eq!(get_line_ending_of_str(&text[..17]), Some(LineEnding::Crlf));
|
|
assert_eq!(get_line_ending_of_str(text), None);
|
|
}
|
|
|
|
#[test]
|
|
fn line_end_char_index_rope_slice() {
|
|
let r = Rope::from_str("Hello\rworld\nhow\r\nare you?");
|
|
let s = &r.slice(..);
|
|
#[cfg(not(feature = "unicode-lines"))]
|
|
{
|
|
assert_eq!(line_end_char_index(s, 0), 11);
|
|
assert_eq!(line_end_char_index(s, 1), 15);
|
|
assert_eq!(line_end_char_index(s, 2), 25);
|
|
}
|
|
#[cfg(feature = "unicode-lines")]
|
|
{
|
|
assert_eq!(line_end_char_index(s, 0), 5);
|
|
assert_eq!(line_end_char_index(s, 1), 11);
|
|
assert_eq!(line_end_char_index(s, 2), 15);
|
|
}
|
|
}
|
|
}
|