From 3756c21baefa6182beaa9a3d5ced9d720cf9adcb Mon Sep 17 00:00:00 2001 From: Jan Hrastnik Date: Wed, 16 Jun 2021 17:00:21 +0200 Subject: [PATCH 01/26] rebase on branch line_ending_detection --- helix-core/src/lib.rs | 1 + helix-term/src/ui/editor.rs | 2 +- helix-view/src/document.rs | 77 +++++++++++++++++++++++++++++++++++-- helix-view/src/editor.rs | 4 +- helix-view/src/lib.rs | 1 + 5 files changed, 79 insertions(+), 6 deletions(-) diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index b11faeab0..c0c8937a6 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -104,6 +104,7 @@ pub use position::{coords_at_pos, pos_at_coords, Position}; pub use selection::{Range, Selection}; pub use smallvec::SmallVec; pub use syntax::Syntax; +pub use graphemes::RopeGraphemes; pub use diagnostic::Diagnostic; pub use state::State; diff --git a/helix-term/src/ui/editor.rs b/helix-term/src/ui/editor.rs index 63b3e277c..0a646e93f 100644 --- a/helix-term/src/ui/editor.rs +++ b/helix-term/src/ui/editor.rs @@ -179,7 +179,7 @@ impl EditorView { // iterate over range char by char for grapheme in RopeGraphemes::new(text) { - if grapheme == "\n" { + if grapheme == "\n" || grapheme == "\r\n" { visual_x = 0; line += 1; diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index a1c4b4073..8b735b9d1 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -8,7 +8,7 @@ use helix_core::{ chars::{char_is_linebreak, char_is_whitespace}, history::History, syntax::{LanguageConfiguration, LOADER}, - ChangeSet, Diagnostic, Rope, Selection, State, Syntax, Transaction, + ChangeSet, Diagnostic, History, Rope, RopeSlice, RopeGraphemes, Selection, State, Syntax, Transaction, }; use crate::{DocumentId, ViewId}; @@ -22,12 +22,28 @@ pub enum Mode { Insert, } + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum IndentStyle { Tabs, Spaces(u8), } +/// Represents one of the valid Unicode line endings. +/// Also acts as an index into `LINE_ENDINGS`. +#[derive(PartialEq, Copy, Clone, Debug)] +pub enum LineEnding { + None = 0, // No line ending + CRLF = 1, // CarriageReturn followed by LineFeed + LF = 2, // U+000A -- LineFeed + VT = 3, // U+000B -- VerticalTab + FF = 4, // U+000C -- FormFeed + CR = 5, // U+000D -- CarriageReturn + NEL = 6, // U+0085 -- NextLine + LS = 7, // U+2028 -- Line Separator + PS = 8, // U+2029 -- ParagraphSeparator +} + pub struct Document { // rope + selection pub(crate) id: DocumentId, @@ -61,6 +77,7 @@ pub struct Document { diagnostics: Vec, language_server: Option>, + line_ending: LineEnding } use std::fmt; @@ -146,11 +163,61 @@ pub fn canonicalize_path(path: &Path) -> std::io::Result { std::env::current_dir().map(|current_dir| normalize_path(¤t_dir.join(path))) } +pub fn auto_detect_line_ending(doc: &Rope) -> LineEnding { + // based on https://github.com/cessen/led/blob/27572c8838a1c664ee378a19358604063881cc1d/src/editor/mod.rs#L88-L162 + + let mut ending = LineEnding::None; + for line in doc.lines().take(1) { // check first line only - unsure how sound this is + // Get the line ending + ending = if line.len_chars() == 1 { + let g = RopeGraphemes::new(line.slice((line.len_chars() - 1)..)) + .last() + .unwrap(); + rope_slice_to_line_ending(&g) + } else if line.len_chars() > 1 { + let g = RopeGraphemes::new(line.slice((line.len_chars() - 2)..)) + .last() + .unwrap(); + rope_slice_to_line_ending(&g) + } else { + LineEnding::None + }; + } + ending +} + +pub fn rope_slice_to_line_ending(g: &RopeSlice) -> LineEnding { + if let Some(text) = g.as_str() { + str_to_line_ending(text) + } else if g == "\u{000D}\u{000A}" { + LineEnding::CRLF + } else { + // Not a line ending + LineEnding::None + } +} + +pub fn str_to_line_ending(g: &str) -> LineEnding { + match g { + "\u{000D}\u{000A}" => LineEnding::CRLF, + "\u{000A}" => LineEnding::LF, + "\u{000B}" => LineEnding::VT, + "\u{000C}" => LineEnding::FF, + "\u{000D}" => LineEnding::CR, + "\u{0085}" => LineEnding::NEL, + "\u{2028}" => LineEnding::LS, + "\u{2029}" => LineEnding::PS, + + // Not a line ending + _ => LineEnding::None, + } +} + use helix_lsp::lsp; use url::Url; impl Document { - pub fn new(text: Rope) -> Self { + pub fn new(text: Rope, line_ending: LineEnding) -> Self { let changes = ChangeSet::new(&text); let old_state = None; @@ -171,6 +238,7 @@ impl Document { history: Cell::new(History::default()), last_saved_revision: 0, language_server: None, + line_ending: line_ending } } @@ -190,7 +258,10 @@ impl Document { doc }; - let mut doc = Self::new(doc); + // search for line endings + let line_ending = auto_detect_line_ending(&doc); + + let mut doc = Self::new(doc, line_ending); // set the path and try detecting the language doc.set_path(&path)?; doc.detect_indent_style(); diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index 24f43c0ef..90abd0673 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -1,4 +1,4 @@ -use crate::{theme::Theme, tree::Tree, Document, DocumentId, RegisterSelection, View, ViewId}; +use crate::{theme::Theme, tree::Tree, Document, DocumentId, RegisterSelection, View, ViewId, LineEnding}; use tui::layout::Rect; use tui::terminal::CursorKind; @@ -148,7 +148,7 @@ impl Editor { pub fn new_file(&mut self, action: Action) -> DocumentId { use helix_core::Rope; - let doc = Document::new(Rope::from("\n")); + let doc = Document::new(Rope::from("\n"), LineEnding::LF); let id = self.documents.insert(doc); self.documents[id].id = id; self.switch(id, action); diff --git a/helix-view/src/lib.rs b/helix-view/src/lib.rs index 7e2533200..e8c12af7d 100644 --- a/helix-view/src/lib.rs +++ b/helix-view/src/lib.rs @@ -10,6 +10,7 @@ new_key_type! { pub struct DocumentId; } new_key_type! { pub struct ViewId; } pub use document::Document; +pub use document::LineEnding; pub use editor::Editor; pub use register_selection::RegisterSelection; pub use theme::Theme; From 17f69a03e0bd6c7bee7e26237dc47dfa0e0dd7c9 Mon Sep 17 00:00:00 2001 From: Jan Hrastnik Date: Fri, 11 Jun 2021 23:57:16 +0200 Subject: [PATCH 02/26] ran cargo clippy and cargo fmt --- helix-core/src/lib.rs | 2 +- helix-view/src/document.rs | 51 +++++++++++++++++++------------------- helix-view/src/editor.rs | 4 ++- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index c0c8937a6..91d2bee08 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -100,11 +100,11 @@ pub use unicode_general_category::get_general_category; #[doc(inline)] pub use {regex, tree_sitter}; +pub use graphemes::RopeGraphemes; pub use position::{coords_at_pos, pos_at_coords, Position}; pub use selection::{Range, Selection}; pub use smallvec::SmallVec; pub use syntax::Syntax; -pub use graphemes::RopeGraphemes; pub use diagnostic::Diagnostic; pub use state::State; diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 8b735b9d1..425210e62 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -8,7 +8,8 @@ use helix_core::{ chars::{char_is_linebreak, char_is_whitespace}, history::History, syntax::{LanguageConfiguration, LOADER}, - ChangeSet, Diagnostic, History, Rope, RopeSlice, RopeGraphemes, Selection, State, Syntax, Transaction, + ChangeSet, Diagnostic, History, Rope, RopeGraphemes, RopeSlice, Selection, State, Syntax, + Transaction, }; use crate::{DocumentId, ViewId}; @@ -34,12 +35,12 @@ pub enum IndentStyle { #[derive(PartialEq, Copy, Clone, Debug)] pub enum LineEnding { None = 0, // No line ending - CRLF = 1, // CarriageReturn followed by LineFeed + Crlf = 1, // CarriageReturn followed by LineFeed LF = 2, // U+000A -- LineFeed VT = 3, // U+000B -- VerticalTab FF = 4, // U+000C -- FormFeed CR = 5, // U+000D -- CarriageReturn - NEL = 6, // U+0085 -- NextLine + Nel = 6, // U+0085 -- NextLine LS = 7, // U+2028 -- Line Separator PS = 8, // U+2029 -- ParagraphSeparator } @@ -77,7 +78,7 @@ pub struct Document { diagnostics: Vec, language_server: Option>, - line_ending: LineEnding + _line_ending: LineEnding, } use std::fmt; @@ -166,31 +167,29 @@ pub fn canonicalize_path(path: &Path) -> std::io::Result { pub fn auto_detect_line_ending(doc: &Rope) -> LineEnding { // based on https://github.com/cessen/led/blob/27572c8838a1c664ee378a19358604063881cc1d/src/editor/mod.rs#L88-L162 - let mut ending = LineEnding::None; - for line in doc.lines().take(1) { // check first line only - unsure how sound this is - // Get the line ending - ending = if line.len_chars() == 1 { - let g = RopeGraphemes::new(line.slice((line.len_chars() - 1)..)) - .last() - .unwrap(); - rope_slice_to_line_ending(&g) - } else if line.len_chars() > 1 { - let g = RopeGraphemes::new(line.slice((line.len_chars() - 2)..)) - .last() - .unwrap(); - rope_slice_to_line_ending(&g) - } else { - LineEnding::None - }; + let mut ending = LineEnding::None; + for line in doc.lines().take(1) { // check first line only - unsure how sound this is + ending = match line.len_chars() { + 1 => { let g = RopeGraphemes::new(line.slice((line.len_chars() - 1)..)) + .last() + .unwrap(); + rope_slice_to_line_ending(&g)} + n if n > 1 => { let g = RopeGraphemes::new(line.slice((line.len_chars() - 2)..)) + .last() + .unwrap(); + rope_slice_to_line_ending(&g) } + _ => LineEnding::None + } - ending + } + ending } pub fn rope_slice_to_line_ending(g: &RopeSlice) -> LineEnding { if let Some(text) = g.as_str() { str_to_line_ending(text) } else if g == "\u{000D}\u{000A}" { - LineEnding::CRLF + LineEnding::Crlf } else { // Not a line ending LineEnding::None @@ -199,12 +198,12 @@ pub fn rope_slice_to_line_ending(g: &RopeSlice) -> LineEnding { pub fn str_to_line_ending(g: &str) -> LineEnding { match g { - "\u{000D}\u{000A}" => LineEnding::CRLF, + "\u{000D}\u{000A}" => LineEnding::Crlf, "\u{000A}" => LineEnding::LF, "\u{000B}" => LineEnding::VT, "\u{000C}" => LineEnding::FF, "\u{000D}" => LineEnding::CR, - "\u{0085}" => LineEnding::NEL, + "\u{0085}" => LineEnding::Nel, "\u{2028}" => LineEnding::LS, "\u{2029}" => LineEnding::PS, @@ -217,7 +216,7 @@ use helix_lsp::lsp; use url::Url; impl Document { - pub fn new(text: Rope, line_ending: LineEnding) -> Self { + pub fn new(text: Rope, _line_ending: LineEnding) -> Self { let changes = ChangeSet::new(&text); let old_state = None; @@ -238,7 +237,7 @@ impl Document { history: Cell::new(History::default()), last_saved_revision: 0, language_server: None, - line_ending: line_ending + _line_ending } } diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index 90abd0673..d7205fbd6 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -1,4 +1,6 @@ -use crate::{theme::Theme, tree::Tree, Document, DocumentId, RegisterSelection, View, ViewId, LineEnding}; +use crate::{ + theme::Theme, tree::Tree, Document, DocumentId, LineEnding, RegisterSelection, View, ViewId, +}; use tui::layout::Rect; use tui::terminal::CursorKind; From 5eb69183926ab2f781aa08abf587ba338027854b Mon Sep 17 00:00:00 2001 From: Jan Hrastnik Date: Wed, 16 Jun 2021 17:05:14 +0200 Subject: [PATCH 03/26] resolved conflict in rebase --- helix-core/src/lib.rs | 2 + helix-core/src/line_ending.rs | 74 ++++++++++++++++++++++++++++++++++ helix-view/src/document.rs | 75 ++++++++++------------------------- helix-view/src/editor.rs | 10 +++-- helix-view/src/lib.rs | 1 - 5 files changed, 103 insertions(+), 59 deletions(-) create mode 100644 helix-core/src/line_ending.rs diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 91d2bee08..bc25b1bea 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -6,6 +6,7 @@ pub mod diagnostic; pub mod graphemes; pub mod history; pub mod indent; +pub mod line_ending; pub mod macros; pub mod match_brackets; pub mod movement; @@ -109,4 +110,5 @@ pub use syntax::Syntax; pub use diagnostic::Diagnostic; pub use state::State; +pub use line_ending::{auto_detect_line_ending, default_line_ending, LineEnding}; pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction}; diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs new file mode 100644 index 000000000..8e9b2ec04 --- /dev/null +++ b/helix-core/src/line_ending.rs @@ -0,0 +1,74 @@ +use crate::{Rope, RopeGraphemes, RopeSlice}; + +/// Represents one of the valid Unicode line endings. +#[derive(PartialEq, Copy, Clone, Debug)] +pub enum LineEnding { + Crlf, // CarriageReturn followed by LineFeed + LF, // U+000A -- LineFeed + VT, // U+000B -- VerticalTab + FF, // U+000C -- FormFeed + CR, // U+000D -- CarriageReturn + Nel, // U+0085 -- NextLine + LS, // U+2028 -- Line Separator + PS, // U+2029 -- ParagraphSeparator +} + +pub fn rope_slice_to_line_ending(g: &RopeSlice) -> Option { + if let Some(text) = g.as_str() { + str_to_line_ending(text) + } else if g == "\u{000D}\u{000A}" { + Some(LineEnding::Crlf) + } else { + // Not a line ending + None + } +} + +pub fn str_to_line_ending(g: &str) -> Option { + match g { + "\u{000D}\u{000A}" => Some(LineEnding::Crlf), + "\u{000A}" => Some(LineEnding::LF), + "\u{000B}" => Some(LineEnding::VT), + "\u{000C}" => Some(LineEnding::FF), + "\u{000D}" => Some(LineEnding::CR), + "\u{0085}" => Some(LineEnding::Nel), + "\u{2028}" => Some(LineEnding::LS), + "\u{2029}" => Some(LineEnding::PS), + + // Not a line ending + _ => None, + } +} + +pub fn auto_detect_line_ending(doc: &Rope) -> Option { + // based on https://github.com/cessen/led/blob/27572c8838a1c664ee378a19358604063881cc1d/src/editor/mod.rs#L88-L162 + + let mut ending = None; + for line in doc.lines().take(1) { + // check first line only - unsure how sound this is + ending = match line.len_chars() { + 1 => { + let g = RopeGraphemes::new(line.slice((line.len_chars() - 1)..)) + .last() + .unwrap(); + rope_slice_to_line_ending(&g) + } + n if n > 1 => { + let g = RopeGraphemes::new(line.slice((line.len_chars() - 2)..)) + .last() + .unwrap(); + rope_slice_to_line_ending(&g) + } + _ => None, + } + } + ending +} + +pub fn default_line_ending() -> Option { + if cfg!(windows) { + Some(LineEnding::Crlf) + } else { + Some(LineEnding::LF) + } +} diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 425210e62..48e899ba1 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -5,11 +5,14 @@ use std::path::{Component, Path, PathBuf}; use std::sync::Arc; use helix_core::{ +<<<<<<< HEAD chars::{char_is_linebreak, char_is_whitespace}, history::History, +======= + auto_detect_line_ending, default_line_ending, +>>>>>>> 491a8b3 (resolved conflict in rebase) syntax::{LanguageConfiguration, LOADER}, - ChangeSet, Diagnostic, History, Rope, RopeGraphemes, RopeSlice, Selection, State, Syntax, - Transaction, + ChangeSet, Diagnostic, History, LineEnding, Rope, Selection, State, Syntax, Transaction, }; use crate::{DocumentId, ViewId}; @@ -23,6 +26,7 @@ pub enum Mode { Insert, } +<<<<<<< HEAD #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum IndentStyle { @@ -45,6 +49,8 @@ pub enum LineEnding { PS = 8, // U+2029 -- ParagraphSeparator } +======= +>>>>>>> 491a8b3 (resolved conflict in rebase) pub struct Document { // rope + selection pub(crate) id: DocumentId, @@ -78,7 +84,7 @@ pub struct Document { diagnostics: Vec, language_server: Option>, - _line_ending: LineEnding, + line_ending: Option, } use std::fmt; @@ -164,61 +170,14 @@ pub fn canonicalize_path(path: &Path) -> std::io::Result { std::env::current_dir().map(|current_dir| normalize_path(¤t_dir.join(path))) } -pub fn auto_detect_line_ending(doc: &Rope) -> LineEnding { - // based on https://github.com/cessen/led/blob/27572c8838a1c664ee378a19358604063881cc1d/src/editor/mod.rs#L88-L162 - - let mut ending = LineEnding::None; - for line in doc.lines().take(1) { // check first line only - unsure how sound this is - ending = match line.len_chars() { - 1 => { let g = RopeGraphemes::new(line.slice((line.len_chars() - 1)..)) - .last() - .unwrap(); - rope_slice_to_line_ending(&g)} - n if n > 1 => { let g = RopeGraphemes::new(line.slice((line.len_chars() - 2)..)) - .last() - .unwrap(); - rope_slice_to_line_ending(&g) } - _ => LineEnding::None - - } - } - ending -} - -pub fn rope_slice_to_line_ending(g: &RopeSlice) -> LineEnding { - if let Some(text) = g.as_str() { - str_to_line_ending(text) - } else if g == "\u{000D}\u{000A}" { - LineEnding::Crlf - } else { - // Not a line ending - LineEnding::None - } -} - -pub fn str_to_line_ending(g: &str) -> LineEnding { - match g { - "\u{000D}\u{000A}" => LineEnding::Crlf, - "\u{000A}" => LineEnding::LF, - "\u{000B}" => LineEnding::VT, - "\u{000C}" => LineEnding::FF, - "\u{000D}" => LineEnding::CR, - "\u{0085}" => LineEnding::Nel, - "\u{2028}" => LineEnding::LS, - "\u{2029}" => LineEnding::PS, - - // Not a line ending - _ => LineEnding::None, - } -} - use helix_lsp::lsp; use url::Url; impl Document { - pub fn new(text: Rope, _line_ending: LineEnding) -> Self { + pub fn new(text: Rope) -> Self { let changes = ChangeSet::new(&text); let old_state = None; + let line_ending = default_line_ending(); Self { id: DocumentId::default(), @@ -237,7 +196,7 @@ impl Document { history: Cell::new(History::default()), last_saved_revision: 0, language_server: None, - _line_ending + line_ending, } } @@ -260,10 +219,14 @@ impl Document { // search for line endings let line_ending = auto_detect_line_ending(&doc); - let mut doc = Self::new(doc, line_ending); + let mut doc = Self::new(doc); // set the path and try detecting the language doc.set_path(&path)?; +<<<<<<< HEAD doc.detect_indent_style(); +======= + doc.set_line_ending(line_ending); +>>>>>>> 491a8b3 (resolved conflict in rebase) Ok(doc) } @@ -522,6 +485,10 @@ impl Document { self.selections.insert(view_id, selection); } + pub fn set_line_ending(&mut self, line_ending: Option) { + self.line_ending = line_ending; + } + fn _apply(&mut self, transaction: &Transaction, view_id: ViewId) -> bool { let old_doc = self.text().clone(); diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index d7205fbd6..c7e1fea7d 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -1,6 +1,4 @@ -use crate::{ - theme::Theme, tree::Tree, Document, DocumentId, LineEnding, RegisterSelection, View, ViewId, -}; +use crate::{theme::Theme, tree::Tree, Document, DocumentId, RegisterSelection, View, ViewId}; use tui::layout::Rect; use tui::terminal::CursorKind; @@ -10,9 +8,13 @@ use slotmap::SlotMap; use anyhow::Error; +<<<<<<< HEAD pub use helix_core::diagnostic::Severity; pub use helix_core::register::Registers; use helix_core::Position; +======= +pub use helix_core::{diagnostic::Severity, LineEnding}; +>>>>>>> 491a8b3 (resolved conflict in rebase) #[derive(Debug)] pub struct Editor { @@ -150,7 +152,7 @@ impl Editor { pub fn new_file(&mut self, action: Action) -> DocumentId { use helix_core::Rope; - let doc = Document::new(Rope::from("\n"), LineEnding::LF); + let doc = Document::new(Rope::from("\n")); let id = self.documents.insert(doc); self.documents[id].id = id; self.switch(id, action); diff --git a/helix-view/src/lib.rs b/helix-view/src/lib.rs index e8c12af7d..7e2533200 100644 --- a/helix-view/src/lib.rs +++ b/helix-view/src/lib.rs @@ -10,7 +10,6 @@ new_key_type! { pub struct DocumentId; } new_key_type! { pub struct ViewId; } pub use document::Document; -pub use document::LineEnding; pub use editor::Editor; pub use register_selection::RegisterSelection; pub use theme::Theme; From 9c419fe05cd51c96df29ac02e3dc5c73cae4ef97 Mon Sep 17 00:00:00 2001 From: Jan Hrastnik Date: Wed, 16 Jun 2021 17:08:46 +0200 Subject: [PATCH 04/26] added more changes from pr review for line_ending_detection --- helix-core/src/lib.rs | 2 +- helix-core/src/line_ending.rs | 19 +++++-------------- helix-view/src/document.rs | 24 ++++++------------------ helix-view/src/editor.rs | 4 ---- 4 files changed, 12 insertions(+), 37 deletions(-) diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index bc25b1bea..758e614ec 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -110,5 +110,5 @@ pub use syntax::Syntax; pub use diagnostic::Diagnostic; pub use state::State; -pub use line_ending::{auto_detect_line_ending, default_line_ending, LineEnding}; +pub use line_ending::{auto_detect_line_ending, DEFAULT_LINE_ENDING, LineEnding}; pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction}; diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index 8e9b2ec04..809dffc01 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -1,16 +1,14 @@ use crate::{Rope, RopeGraphemes, RopeSlice}; /// Represents one of the valid Unicode line endings. +/// VT, FF and PS are excluded here, as we don't expect them to show up as a default line break #[derive(PartialEq, Copy, Clone, Debug)] pub enum LineEnding { Crlf, // CarriageReturn followed by LineFeed LF, // U+000A -- LineFeed - VT, // U+000B -- VerticalTab - FF, // U+000C -- FormFeed CR, // U+000D -- CarriageReturn Nel, // U+0085 -- NextLine LS, // U+2028 -- Line Separator - PS, // U+2029 -- ParagraphSeparator } pub fn rope_slice_to_line_ending(g: &RopeSlice) -> Option { @@ -28,13 +26,9 @@ pub fn str_to_line_ending(g: &str) -> Option { match g { "\u{000D}\u{000A}" => Some(LineEnding::Crlf), "\u{000A}" => Some(LineEnding::LF), - "\u{000B}" => Some(LineEnding::VT), - "\u{000C}" => Some(LineEnding::FF), "\u{000D}" => Some(LineEnding::CR), "\u{0085}" => Some(LineEnding::Nel), "\u{2028}" => Some(LineEnding::LS), - "\u{2029}" => Some(LineEnding::PS), - // Not a line ending _ => None, } @@ -65,10 +59,7 @@ pub fn auto_detect_line_ending(doc: &Rope) -> Option { ending } -pub fn default_line_ending() -> Option { - if cfg!(windows) { - Some(LineEnding::Crlf) - } else { - Some(LineEnding::LF) - } -} +#[cfg(target_os = "windows")] +pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf; +#[cfg(not(target_os = "windows"))] +pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Lf; \ No newline at end of file diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 48e899ba1..704094a65 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -5,14 +5,10 @@ use std::path::{Component, Path, PathBuf}; use std::sync::Arc; use helix_core::{ -<<<<<<< HEAD chars::{char_is_linebreak, char_is_whitespace}, - history::History, -======= - auto_detect_line_ending, default_line_ending, ->>>>>>> 491a8b3 (resolved conflict in rebase) + auto_detect_line_ending, DEFAULT_LINE_ENDING, history::History, syntax::{LanguageConfiguration, LOADER}, - ChangeSet, Diagnostic, History, LineEnding, Rope, Selection, State, Syntax, Transaction, + ChangeSet, Diagnostic, LineEnding, Rope, Selection, State, Syntax, Transaction, }; use crate::{DocumentId, ViewId}; @@ -26,8 +22,6 @@ pub enum Mode { Insert, } -<<<<<<< HEAD - #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum IndentStyle { Tabs, @@ -49,8 +43,6 @@ pub enum LineEnding { PS = 8, // U+2029 -- ParagraphSeparator } -======= ->>>>>>> 491a8b3 (resolved conflict in rebase) pub struct Document { // rope + selection pub(crate) id: DocumentId, @@ -84,7 +76,7 @@ pub struct Document { diagnostics: Vec, language_server: Option>, - line_ending: Option, + line_ending: LineEnding, } use std::fmt; @@ -177,7 +169,6 @@ impl Document { pub fn new(text: Rope) -> Self { let changes = ChangeSet::new(&text); let old_state = None; - let line_ending = default_line_ending(); Self { id: DocumentId::default(), @@ -196,7 +187,7 @@ impl Document { history: Cell::new(History::default()), last_saved_revision: 0, language_server: None, - line_ending, + line_ending: DEFAULT_LINE_ENDING, } } @@ -217,16 +208,13 @@ impl Document { }; // search for line endings - let line_ending = auto_detect_line_ending(&doc); + let line_ending = auto_detect_line_ending(&doc).unwrap_or(DEFAULT_LINE_ENDING); let mut doc = Self::new(doc); // set the path and try detecting the language doc.set_path(&path)?; -<<<<<<< HEAD doc.detect_indent_style(); -======= doc.set_line_ending(line_ending); ->>>>>>> 491a8b3 (resolved conflict in rebase) Ok(doc) } @@ -485,7 +473,7 @@ impl Document { self.selections.insert(view_id, selection); } - pub fn set_line_ending(&mut self, line_ending: Option) { + pub fn set_line_ending(&mut self, line_ending: LineEnding) { self.line_ending = line_ending; } diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index c7e1fea7d..24f43c0ef 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -8,13 +8,9 @@ use slotmap::SlotMap; use anyhow::Error; -<<<<<<< HEAD pub use helix_core::diagnostic::Severity; pub use helix_core::register::Registers; use helix_core::Position; -======= -pub use helix_core::{diagnostic::Severity, LineEnding}; ->>>>>>> 491a8b3 (resolved conflict in rebase) #[derive(Debug)] pub struct Editor { From e4849f41beb2d35a4833a8b7de717b4f38b3f270 Mon Sep 17 00:00:00 2001 From: Jan Hrastnik Date: Sun, 13 Jun 2021 12:27:27 +0200 Subject: [PATCH 05/26] fix typo --- helix-core/src/line_ending.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index 809dffc01..bcfecaf6c 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -62,4 +62,4 @@ pub fn auto_detect_line_ending(doc: &Rope) -> Option { #[cfg(target_os = "windows")] pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf; #[cfg(not(target_os = "windows"))] -pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Lf; \ No newline at end of file +pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF; \ No newline at end of file From a9a718c3cad3af7b9fa38cd1aaa6ceb6c7126130 Mon Sep 17 00:00:00 2001 From: Jan Hrastnik Date: Sun, 13 Jun 2021 21:38:31 +0200 Subject: [PATCH 06/26] added some tests and a line_ending helper function in document.rs --- helix-core/src/lib.rs | 2 +- helix-core/src/line_ending.rs | 64 ++++++++++++++++++++++++++++++++--- helix-view/src/document.rs | 16 ++++++++- 3 files changed, 76 insertions(+), 6 deletions(-) diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 758e614ec..351240692 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -110,5 +110,5 @@ pub use syntax::Syntax; pub use diagnostic::Diagnostic; pub use state::State; -pub use line_ending::{auto_detect_line_ending, DEFAULT_LINE_ENDING, LineEnding}; +pub use line_ending::{auto_detect_line_ending, LineEnding, DEFAULT_LINE_ENDING}; pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction}; diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index bcfecaf6c..47420f9e7 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -1,7 +1,6 @@ use crate::{Rope, RopeGraphemes, RopeSlice}; /// Represents one of the valid Unicode line endings. -/// VT, FF and PS are excluded here, as we don't expect them to show up as a default line break #[derive(PartialEq, Copy, Clone, Debug)] pub enum LineEnding { Crlf, // CarriageReturn followed by LineFeed @@ -9,6 +8,9 @@ pub enum LineEnding { CR, // U+000D -- CarriageReturn Nel, // U+0085 -- NextLine LS, // U+2028 -- Line Separator + VT, // U+000B -- VerticalTab + FF, // U+000C -- FormFeed + PS, // U+2029 -- ParagraphSeparator } pub fn rope_slice_to_line_ending(g: &RopeSlice) -> Option { @@ -38,8 +40,8 @@ pub fn auto_detect_line_ending(doc: &Rope) -> Option { // based on https://github.com/cessen/led/blob/27572c8838a1c664ee378a19358604063881cc1d/src/editor/mod.rs#L88-L162 let mut ending = None; - for line in doc.lines().take(1) { - // check first line only - unsure how sound this is + // return first matched line ending. Not all possible line endings are being matched, as they might be special-use only + for line in doc.lines().take(100) { ending = match line.len_chars() { 1 => { let g = RopeGraphemes::new(line.slice((line.len_chars() - 1)..)) @@ -54,6 +56,9 @@ pub fn auto_detect_line_ending(doc: &Rope) -> Option { rope_slice_to_line_ending(&g) } _ => None, + }; + if ending.is_some() { + return ending; } } ending @@ -62,4 +67,55 @@ pub fn auto_detect_line_ending(doc: &Rope) -> Option { #[cfg(target_os = "windows")] pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf; #[cfg(not(target_os = "windows"))] -pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF; \ No newline at end of file +pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF; + +#[cfg(test)] +mod line_ending_tests { + use super::*; + + #[test] + fn test_autodetect() { + assert_eq!( + auto_detect_line_ending(&Rope::from_str("\n")), + Some(LineEnding::LF) + ); + assert_eq!( + auto_detect_line_ending(&Rope::from_str("\r\n")), + Some(LineEnding::Crlf) + ); + assert_eq!(auto_detect_line_ending(&Rope::from_str("hello")), None); + assert_eq!(auto_detect_line_ending(&Rope::from_str("")), None); + assert_eq!( + auto_detect_line_ending(&Rope::from_str("hello\nhelix\r\n")), + Some(LineEnding::LF) + ); + assert_eq!( + auto_detect_line_ending(&Rope::from_str("a formfeed\u{000C}")), + None + ); + assert_eq!( + auto_detect_line_ending(&Rope::from_str("\n\u{000A}\n \u{000A}")), + Some(LineEnding::LF) + ); + assert_eq!( + auto_detect_line_ending(&Rope::from_str( + "a formfeed\u{000C} with a\u{000C} linefeed\u{000A}" + )), + Some(LineEnding::LF) + ); + assert_eq!(auto_detect_line_ending(&Rope::from_str("a formfeed\u{000C} with a\u{000C} carriage return linefeed\u{000D}\u{000A} and a linefeed\u{000A}")), Some(LineEnding::Crlf)); + } + + #[test] + fn test_rope_slice_to_line_ending() { + let r = Rope::from_str("\r\n"); + assert_eq!( + rope_slice_to_line_ending(&r.slice(1..2)), + Some(LineEnding::LF) + ); + assert_eq!( + rope_slice_to_line_ending(&r.slice(0..2)), + Some(LineEnding::Crlf) + ); + } +} diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 704094a65..bd5f80122 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -6,9 +6,9 @@ use std::sync::Arc; use helix_core::{ chars::{char_is_linebreak, char_is_whitespace}, - auto_detect_line_ending, DEFAULT_LINE_ENDING, history::History, syntax::{LanguageConfiguration, LOADER}, ChangeSet, Diagnostic, LineEnding, Rope, Selection, State, Syntax, Transaction, + DEFAULT_LINE_ENDING, }; use crate::{DocumentId, ViewId}; @@ -740,6 +740,20 @@ impl Document { pub fn set_diagnostics(&mut self, diagnostics: Vec) { self.diagnostics = diagnostics; } + + pub fn line_ending(&self) -> &str { + match self.line_ending { + LineEnding::Crlf => "\u{000D}\u{000A}", + LineEnding::LF => "\u{000A}", + LineEnding::Nel => "\u{0085}", + LineEnding::LS => "\u{2028}", + LineEnding::CR => "\u{000D}", + _ => panic!( + "Unexpected line ending: {:?}, expected Crlf, LF, CR, Nel, or LS.", + self.line_ending + ), + } + } } #[cfg(test)] From a4f5a0134e308ffad318ab6920fe1ed0264a93cb Mon Sep 17 00:00:00 2001 From: Jan Hrastnik Date: Mon, 14 Jun 2021 15:09:54 +0200 Subject: [PATCH 07/26] trying out line ending helper functions in commands.rs --- helix-core/src/lib.rs | 4 +++- helix-core/src/line_ending.rs | 8 ++++++- helix-term/src/commands.rs | 44 +++++++++++++++++++++++++++-------- helix-term/src/ui/editor.rs | 4 ++-- helix-view/src/document.rs | 2 +- 5 files changed, 47 insertions(+), 15 deletions(-) diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 351240692..55365500c 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -110,5 +110,7 @@ pub use syntax::Syntax; pub use diagnostic::Diagnostic; pub use state::State; -pub use line_ending::{auto_detect_line_ending, LineEnding, DEFAULT_LINE_ENDING}; +pub use line_ending::{ + auto_detect_line_ending, rope_slice_to_line_ending, LineEnding, DEFAULT_LINE_ENDING, +}; pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction}; diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index 47420f9e7..4f5708ecf 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -31,6 +31,9 @@ pub fn str_to_line_ending(g: &str) -> Option { "\u{000D}" => Some(LineEnding::CR), "\u{0085}" => Some(LineEnding::Nel), "\u{2028}" => Some(LineEnding::LS), + "\u{000B}" => Some(LineEnding::VT), + "\u{000C}" => Some(LineEnding::FF), + "\u{2029}" => Some(LineEnding::PS), // Not a line ending _ => None, } @@ -58,7 +61,10 @@ pub fn auto_detect_line_ending(doc: &Rope) -> Option { _ => None, }; if ending.is_some() { - return ending; + match ending { + Some(LineEnding::VT) | Some(LineEnding::FF) | Some(LineEnding::PS) => {} + _ => return ending, + } } } ending diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index c80716d45..144e1f3ca 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -4,9 +4,14 @@ use helix_core::{ movement::{self, Direction}, object, pos_at_coords, regex::{self, Regex}, +<<<<<<< HEAD register::{self, Register, Registers}, search, selection, Change, ChangeSet, Position, Range, Rope, RopeSlice, Selection, SmallVec, Tendril, Transaction, +======= + register, search, selection, Change, ChangeSet, LineEnding, Position, Range, Rope, RopeSlice, + Selection, SmallVec, Tendril, Transaction, +>>>>>>> 856fd95 (trying out line ending helper functions in commands.rs) }; use helix_view::{ @@ -184,8 +189,14 @@ pub fn move_line_end(cx: &mut Context) { let line = text.char_to_line(range.head); // Line end is pos at the start of next line - 1 - // subtract another 1 because the line ends with \n - let pos = text.line_to_char(line + 1).saturating_sub(2); + // subtract 3 if the line ending is \r\n, otherwise subtract 2 as I assume all others are just 1 char length + let pos = + text.line_to_char(line + 1) + .saturating_sub(if doc.line_ending == LineEnding::Crlf { + 3 + } else { + 2 + }); Range::new(pos, pos) }); @@ -337,7 +348,7 @@ where KeyEvent { code: KeyCode::Enter, .. - } => '\n', + } => '\n', // TODO: we should be calling doc.line_ending() here KeyEvent { code: KeyCode::Char(ch), .. @@ -465,7 +476,7 @@ pub fn replace(cx: &mut Context) { KeyEvent { code: KeyCode::Enter, .. - } => Some('\n'), + } => Some('\n'), // TODO: we should be calling doc.line_ending() here _ => None, }; @@ -606,8 +617,14 @@ pub fn extend_line_end(cx: &mut Context) { let line = text.char_to_line(range.head); // Line end is pos at the start of next line - 1 - // subtract another 1 because the line ends with \n - let pos = text.line_to_char(line + 1).saturating_sub(2); + // subtract 3 if the line ending is \r\n, otherwise subtract 2 as I assume all others are just 1 char length + let pos = + text.line_to_char(line + 1) + .saturating_sub(if doc.line_ending == LineEnding::Crlf { + 3 + } else { + 2 + }); Range::new(range.anchor, pos) }); @@ -896,7 +913,7 @@ pub fn append_mode(cx: &mut Context) { if selection.iter().any(|range| range.head == end) { let transaction = Transaction::change( doc.text(), - std::array::IntoIter::new([(end, end, Some(Tendril::from_char('\n')))]), + std::array::IntoIter::new([(end, end, Some(Tendril::from_char('\n')))]), // TODO: change \n to doc.line_ending() ); doc.apply(&transaction, view.id); } @@ -1523,7 +1540,7 @@ fn open(cx: &mut Context, open: Open) { ); let indent = doc.indent_unit().repeat(indent_level); let mut text = String::with_capacity(1 + indent.len()); - text.push('\n'); + text.push_str(doc.line_ending()); text.push_str(&indent); let text = text.repeat(count); @@ -2131,7 +2148,7 @@ pub mod insert { ); let indent = doc.indent_unit().repeat(indent_level); let mut text = String::with_capacity(1 + indent.len()); - text.push('\n'); + text.push_str(doc.line_ending()); text.push_str(&indent); let head = pos + offs + text.chars().count(); @@ -2152,7 +2169,7 @@ pub mod insert { if helix_core::auto_pairs::PAIRS.contains(&(prev, curr)) { // another newline, indent the end bracket one level less let indent = doc.indent_unit().repeat(indent_level.saturating_sub(1)); - text.push('\n'); + text.push_str(doc.line_ending()); text.push_str(&indent); } @@ -2268,8 +2285,15 @@ fn paste_impl( .unwrap(), ); +<<<<<<< HEAD // if any of values ends \n it's linewise paste let linewise = values.iter().any(|value| value.ends_with('\n')); +======= + // if any of values ends \n it's linewise paste + let linewise = values + .iter() + .any(|value| value.ends_with(doc.line_ending())); +>>>>>>> 856fd95 (trying out line ending helper functions in commands.rs) let mut values = values.iter().cloned().map(Tendril::from).chain(repeat); diff --git a/helix-term/src/ui/editor.rs b/helix-term/src/ui/editor.rs index 0a646e93f..d6010e6cb 100644 --- a/helix-term/src/ui/editor.rs +++ b/helix-term/src/ui/editor.rs @@ -7,7 +7,7 @@ use crate::{ }; use helix_core::{ - coords_at_pos, + coords_at_pos, rope_slice_to_line_ending, syntax::{self, HighlightEvent}, Position, Range, }; @@ -179,7 +179,7 @@ impl EditorView { // iterate over range char by char for grapheme in RopeGraphemes::new(text) { - if grapheme == "\n" || grapheme == "\r\n" { + if rope_slice_to_line_ending(&grapheme).is_some() { visual_x = 0; line += 1; diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index bd5f80122..5f61f05a6 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -76,7 +76,7 @@ pub struct Document { diagnostics: Vec, language_server: Option>, - line_ending: LineEnding, + pub line_ending: LineEnding, } use std::fmt; From 7cf0fa05a4ca6bf62d38836f3aa99a1ac585a261 Mon Sep 17 00:00:00 2001 From: Jan Hrastnik Date: Wed, 16 Jun 2021 16:57:47 +0200 Subject: [PATCH 08/26] doc.line_ending() now returns &'static str --- helix-term/src/commands.rs | 4 ++-- helix-view/src/document.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 144e1f3ca..31e80345e 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -348,7 +348,7 @@ where KeyEvent { code: KeyCode::Enter, .. - } => '\n', // TODO: we should be calling doc.line_ending() here + } => '\n', KeyEvent { code: KeyCode::Char(ch), .. @@ -476,7 +476,7 @@ pub fn replace(cx: &mut Context) { KeyEvent { code: KeyCode::Enter, .. - } => Some('\n'), // TODO: we should be calling doc.line_ending() here + } => Some('\n'), _ => None, }; diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 5f61f05a6..c15a42abc 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -741,7 +741,7 @@ impl Document { self.diagnostics = diagnostics; } - pub fn line_ending(&self) -> &str { + pub fn line_ending(&self) -> &'static str { match self.line_ending { LineEnding::Crlf => "\u{000D}\u{000A}", LineEnding::LF => "\u{000A}", From 9c3eadb2e4fd297abcc8ceb02b3088ab3b9b1ceb Mon Sep 17 00:00:00 2001 From: Jan Hrastnik Date: Wed, 16 Jun 2021 17:22:55 +0200 Subject: [PATCH 09/26] fixed some problems from rebasing --- helix-term/src/commands.rs | 22 ++++++---------------- helix-view/src/document.rs | 17 ++--------------- 2 files changed, 8 insertions(+), 31 deletions(-) diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 31e80345e..644c2e231 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -4,14 +4,9 @@ use helix_core::{ movement::{self, Direction}, object, pos_at_coords, regex::{self, Regex}, -<<<<<<< HEAD register::{self, Register, Registers}, - search, selection, Change, ChangeSet, Position, Range, Rope, RopeSlice, Selection, SmallVec, - Tendril, Transaction, -======= - register, search, selection, Change, ChangeSet, LineEnding, Position, Range, Rope, RopeSlice, - Selection, SmallVec, Tendril, Transaction, ->>>>>>> 856fd95 (trying out line ending helper functions in commands.rs) + search, selection, Change, ChangeSet, LineEnding, Position, Range, Rope, RopeSlice, Selection, + SmallVec, Tendril, Transaction, }; use helix_view::{ @@ -348,7 +343,7 @@ where KeyEvent { code: KeyCode::Enter, .. - } => '\n', + } => '\n', KeyEvent { code: KeyCode::Char(ch), .. @@ -2285,15 +2280,10 @@ fn paste_impl( .unwrap(), ); -<<<<<<< HEAD // if any of values ends \n it's linewise paste - let linewise = values.iter().any(|value| value.ends_with('\n')); -======= - // if any of values ends \n it's linewise paste - let linewise = values - .iter() - .any(|value| value.ends_with(doc.line_ending())); ->>>>>>> 856fd95 (trying out line ending helper functions in commands.rs) + let linewise = values + .iter() + .any(|value| value.ends_with(doc.line_ending())); let mut values = values.iter().cloned().map(Tendril::from).chain(repeat); diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index c15a42abc..bf26d8a00 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -5,7 +5,9 @@ use std::path::{Component, Path, PathBuf}; use std::sync::Arc; use helix_core::{ + auto_detect_line_ending, chars::{char_is_linebreak, char_is_whitespace}, + history::History, syntax::{LanguageConfiguration, LOADER}, ChangeSet, Diagnostic, LineEnding, Rope, Selection, State, Syntax, Transaction, DEFAULT_LINE_ENDING, @@ -28,21 +30,6 @@ pub enum IndentStyle { Spaces(u8), } -/// Represents one of the valid Unicode line endings. -/// Also acts as an index into `LINE_ENDINGS`. -#[derive(PartialEq, Copy, Clone, Debug)] -pub enum LineEnding { - None = 0, // No line ending - Crlf = 1, // CarriageReturn followed by LineFeed - LF = 2, // U+000A -- LineFeed - VT = 3, // U+000B -- VerticalTab - FF = 4, // U+000C -- FormFeed - CR = 5, // U+000D -- CarriageReturn - Nel = 6, // U+0085 -- NextLine - LS = 7, // U+2028 -- Line Separator - PS = 8, // U+2029 -- ParagraphSeparator -} - pub struct Document { // rope + selection pub(crate) id: DocumentId, From 8bccd6df3054143baf128157d8dcecb10a911956 Mon Sep 17 00:00:00 2001 From: Jan Hrastnik Date: Thu, 17 Jun 2021 13:49:50 +0200 Subject: [PATCH 10/26] applied changes from pr review --- helix-core/src/line_ending.rs | 23 +++++++++++++++++++++++ helix-term/src/commands.rs | 34 +++++++++++++--------------------- helix-view/src/document.rs | 16 +++------------- 3 files changed, 39 insertions(+), 34 deletions(-) diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index 4f5708ecf..f9d67b573 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -13,6 +13,29 @@ pub enum LineEnding { PS, // U+2029 -- ParagraphSeparator } +impl LineEnding { + pub fn len(&self) -> usize { + match self { + Self::Crlf => 2, + _ => 1, + } + } + + pub fn as_str(&self) -> &str { + match self { + Self::Crlf => "\u{000D}\u{000A}", + Self::LF => "\u{000A}", + Self::Nel => "\u{0085}", + Self::LS => "\u{2028}", + Self::CR => "\u{000D}", + _ => panic!( + "Unexpected line ending: {:?}, expected Crlf, LF, CR, Nel, or LS.", + self + ), + } + } +} + pub fn rope_slice_to_line_ending(g: &RopeSlice) -> Option { if let Some(text) = g.as_str() { str_to_line_ending(text) diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 644c2e231..62faadf9b 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -184,14 +184,10 @@ pub fn move_line_end(cx: &mut Context) { let line = text.char_to_line(range.head); // Line end is pos at the start of next line - 1 - // subtract 3 if the line ending is \r\n, otherwise subtract 2 as I assume all others are just 1 char length - let pos = - text.line_to_char(line + 1) - .saturating_sub(if doc.line_ending == LineEnding::Crlf { - 3 - } else { - 2 - }); + // subtract another 1 because the line ends with \n + let pos = text + .line_to_char(line + 1) + .saturating_sub(doc.line_ending().len() + 1); Range::new(pos, pos) }); @@ -612,14 +608,10 @@ pub fn extend_line_end(cx: &mut Context) { let line = text.char_to_line(range.head); // Line end is pos at the start of next line - 1 - // subtract 3 if the line ending is \r\n, otherwise subtract 2 as I assume all others are just 1 char length - let pos = - text.line_to_char(line + 1) - .saturating_sub(if doc.line_ending == LineEnding::Crlf { - 3 - } else { - 2 - }); + // subtract another 1 because the line ends with \n + let pos = text + .line_to_char(line + 1) + .saturating_sub(doc.line_ending().len() + 1); Range::new(range.anchor, pos) }); @@ -908,7 +900,7 @@ pub fn append_mode(cx: &mut Context) { if selection.iter().any(|range| range.head == end) { let transaction = Transaction::change( doc.text(), - std::array::IntoIter::new([(end, end, Some(Tendril::from_char('\n')))]), // TODO: change \n to doc.line_ending() + std::array::IntoIter::new([(end, end, Some(doc.line_ending().as_str().into()))]), ); doc.apply(&transaction, view.id); } @@ -1535,7 +1527,7 @@ fn open(cx: &mut Context, open: Open) { ); let indent = doc.indent_unit().repeat(indent_level); let mut text = String::with_capacity(1 + indent.len()); - text.push_str(doc.line_ending()); + text.push_str(doc.line_ending().as_str()); text.push_str(&indent); let text = text.repeat(count); @@ -2143,7 +2135,7 @@ pub mod insert { ); let indent = doc.indent_unit().repeat(indent_level); let mut text = String::with_capacity(1 + indent.len()); - text.push_str(doc.line_ending()); + text.push_str(doc.line_ending().as_str()); text.push_str(&indent); let head = pos + offs + text.chars().count(); @@ -2164,7 +2156,7 @@ pub mod insert { if helix_core::auto_pairs::PAIRS.contains(&(prev, curr)) { // another newline, indent the end bracket one level less let indent = doc.indent_unit().repeat(indent_level.saturating_sub(1)); - text.push_str(doc.line_ending()); + text.push_str(doc.line_ending().as_str()); text.push_str(&indent); } @@ -2283,7 +2275,7 @@ fn paste_impl( // if any of values ends \n it's linewise paste let linewise = values .iter() - .any(|value| value.ends_with(doc.line_ending())); + .any(|value| value.ends_with(doc.line_ending().as_str())); let mut values = values.iter().cloned().map(Tendril::from).chain(repeat); diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index bf26d8a00..fe06d09d8 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -63,7 +63,7 @@ pub struct Document { diagnostics: Vec, language_server: Option>, - pub line_ending: LineEnding, + line_ending: LineEnding, } use std::fmt; @@ -728,18 +728,8 @@ impl Document { self.diagnostics = diagnostics; } - pub fn line_ending(&self) -> &'static str { - match self.line_ending { - LineEnding::Crlf => "\u{000D}\u{000A}", - LineEnding::LF => "\u{000A}", - LineEnding::Nel => "\u{0085}", - LineEnding::LS => "\u{2028}", - LineEnding::CR => "\u{000D}", - _ => panic!( - "Unexpected line ending: {:?}, expected Crlf, LF, CR, Nel, or LS.", - self.line_ending - ), - } + pub fn line_ending(&self) -> LineEnding { + self.line_ending } } From ecb884db98fbe6ed70743d1080ce7f78e121ba50 Mon Sep 17 00:00:00 2001 From: Jan Hrastnik Date: Sat, 19 Jun 2021 14:03:14 +0200 Subject: [PATCH 11/26] added get_line_ending from pr comment --- helix-core/src/lib.rs | 2 +- helix-core/src/line_ending.rs | 23 ++++++++++++++++++----- helix-term/src/commands.rs | 12 +++++------- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 55365500c..3f6bea5a9 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -111,6 +111,6 @@ pub use diagnostic::Diagnostic; pub use state::State; pub use line_ending::{ - auto_detect_line_ending, rope_slice_to_line_ending, LineEnding, DEFAULT_LINE_ENDING, + auto_detect_line_ending, rope_slice_to_line_ending, LineEnding, DEFAULT_LINE_ENDING, get_line_ending }; pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction}; diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index f9d67b573..423f4b929 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -14,7 +14,7 @@ pub enum LineEnding { } impl LineEnding { - pub fn len(&self) -> usize { + pub fn len_chars(&self) -> usize { match self { Self::Crlf => 2, _ => 1, @@ -28,10 +28,9 @@ impl LineEnding { Self::Nel => "\u{0085}", Self::LS => "\u{2028}", Self::CR => "\u{000D}", - _ => panic!( - "Unexpected line ending: {:?}, expected Crlf, LF, CR, Nel, or LS.", - self - ), + Self::VT => "\u{000B}", + Self::FF => "\u{000C}", + Self::PS => "\u{2029}", } } } @@ -93,6 +92,20 @@ pub fn auto_detect_line_ending(doc: &Rope) -> Option { ending } +/// Returns the passed line's line ending, if any. +pub fn get_line_ending(line: &RopeSlice) -> Option { + // Last character as str. + let g1 = line.slice(line.len_chars().saturating_sub(1)..).as_str().unwrap(); + + // Last two characters as str, or empty str if they're not contiguous. + // It's fine to punt on the non-contiguous case, because Ropey guarantees + // that CRLF is always contiguous. + let g2 = line.slice(line.len_chars().saturating_sub(2)..).as_str().unwrap_or(""); + + // First check the two-character case for CRLF, then check the single-character case. + str_to_line_ending(g2).or_else(|| str_to_line_ending(g1)) +} + #[cfg(target_os = "windows")] pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf; #[cfg(not(target_os = "windows"))] diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 62faadf9b..d894a646d 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -5,7 +5,7 @@ use helix_core::{ object, pos_at_coords, regex::{self, Regex}, register::{self, Register, Registers}, - search, selection, Change, ChangeSet, LineEnding, Position, Range, Rope, RopeSlice, Selection, + search, selection, Change, ChangeSet, LineEnding, Position, Range, Rope, RopeSlice, Selection, get_line_ending, SmallVec, Tendril, Transaction, }; @@ -183,11 +183,10 @@ pub fn move_line_end(cx: &mut Context) { let text = doc.text(); let line = text.char_to_line(range.head); - // Line end is pos at the start of next line - 1 - // subtract another 1 because the line ends with \n let pos = text .line_to_char(line + 1) - .saturating_sub(doc.line_ending().len() + 1); + .saturating_sub(get_line_ending(&text.line(line)).map(|le| le.len_chars()).unwrap_or(0)); + Range::new(pos, pos) }); @@ -607,11 +606,10 @@ pub fn extend_line_end(cx: &mut Context) { let text = doc.text(); let line = text.char_to_line(range.head); - // Line end is pos at the start of next line - 1 - // subtract another 1 because the line ends with \n let pos = text .line_to_char(line + 1) - .saturating_sub(doc.line_ending().len() + 1); + .saturating_sub(get_line_ending(&text.line(line)).map(|le| le.len_chars()).unwrap_or(0)); + Range::new(range.anchor, pos) }); From 97323dc2f90f81afc82bd929d111abda540bebe5 Mon Sep 17 00:00:00 2001 From: Jan Hrastnik Date: Sat, 19 Jun 2021 14:05:11 +0200 Subject: [PATCH 12/26] ran cargo fmt --- helix-core/src/lib.rs | 3 ++- helix-core/src/line_ending.rs | 10 ++++++++-- helix-term/src/commands.rs | 22 +++++++++++++--------- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 3f6bea5a9..c02d41f88 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -111,6 +111,7 @@ pub use diagnostic::Diagnostic; pub use state::State; pub use line_ending::{ - auto_detect_line_ending, rope_slice_to_line_ending, LineEnding, DEFAULT_LINE_ENDING, get_line_ending + auto_detect_line_ending, get_line_ending, rope_slice_to_line_ending, LineEnding, + DEFAULT_LINE_ENDING, }; pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction}; diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index 423f4b929..2cc5b5d84 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -95,12 +95,18 @@ pub fn auto_detect_line_ending(doc: &Rope) -> Option { /// Returns the passed line's line ending, if any. pub fn get_line_ending(line: &RopeSlice) -> Option { // Last character as str. - let g1 = line.slice(line.len_chars().saturating_sub(1)..).as_str().unwrap(); + let g1 = line + .slice(line.len_chars().saturating_sub(1)..) + .as_str() + .unwrap(); // Last two characters as str, or empty str if they're not contiguous. // It's fine to punt on the non-contiguous case, because Ropey guarantees // that CRLF is always contiguous. - let g2 = line.slice(line.len_chars().saturating_sub(2)..).as_str().unwrap_or(""); + let g2 = line + .slice(line.len_chars().saturating_sub(2)..) + .as_str() + .unwrap_or(""); // First check the two-character case for CRLF, then check the single-character case. str_to_line_ending(g2).or_else(|| str_to_line_ending(g1)) diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index d894a646d..463869f6e 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -1,11 +1,11 @@ use helix_core::{ - comment, coords_at_pos, find_first_non_whitespace_char, find_root, graphemes, indent, - match_brackets, + comment, coords_at_pos, find_first_non_whitespace_char, find_root, get_line_ending, graphemes, + indent, match_brackets, movement::{self, Direction}, object, pos_at_coords, regex::{self, Regex}, register::{self, Register, Registers}, - search, selection, Change, ChangeSet, LineEnding, Position, Range, Rope, RopeSlice, Selection, get_line_ending, + search, selection, Change, ChangeSet, LineEnding, Position, Range, Rope, RopeSlice, Selection, SmallVec, Tendril, Transaction, }; @@ -183,9 +183,11 @@ pub fn move_line_end(cx: &mut Context) { let text = doc.text(); let line = text.char_to_line(range.head); - let pos = text - .line_to_char(line + 1) - .saturating_sub(get_line_ending(&text.line(line)).map(|le| le.len_chars()).unwrap_or(0)); + let pos = text.line_to_char(line + 1).saturating_sub( + get_line_ending(&text.line(line)) + .map(|le| le.len_chars()) + .unwrap_or(0), + ); Range::new(pos, pos) }); @@ -606,9 +608,11 @@ pub fn extend_line_end(cx: &mut Context) { let text = doc.text(); let line = text.char_to_line(range.head); - let pos = text - .line_to_char(line + 1) - .saturating_sub(get_line_ending(&text.line(line)).map(|le| le.len_chars()).unwrap_or(0)); + let pos = text.line_to_char(line + 1).saturating_sub( + get_line_ending(&text.line(line)) + .map(|le| le.len_chars()) + .unwrap_or(0), + ); Range::new(range.anchor, pos) }); From 1e80fbb6020c75bdc5d7e3edc9513b4ed52c4e6b Mon Sep 17 00:00:00 2001 From: Jan Hrastnik Date: Sat, 19 Jun 2021 14:58:49 +0200 Subject: [PATCH 13/26] fix merge issue --- helix-term/src/commands.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 07d2999b1..af2e8eeef 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -1668,16 +1668,16 @@ fn open(cx: &mut Context, open: Open) { let mut ranges = SmallVec::with_capacity(selection.len()); let mut offs = 0; - let line = match open { - // adjust position to the end of the line (next line - 1) - Open::Below => line + 1, - // adjust position to the end of the previous line (current line - 1) - Open::Above => line, - }; - let mut transaction = Transaction::change_by_selection(contents, selection, |range| { let line = text.char_to_line(range.head); + let line = match open { + // adjust position to the end of the line (next line - 1) + Open::Below => line + 1, + // adjust position to the end of the previous line (current line - 1) + Open::Above => line, + }; + // insert newlines after this index for both Above and Below variants let linend_index = doc.text().line_to_char(line).saturating_sub(1); From 701eb0dd6800e75116f36e503787dd0f50df709e Mon Sep 17 00:00:00 2001 From: Jan Hrastnik Date: Sun, 20 Jun 2021 01:24:36 +0200 Subject: [PATCH 14/26] changed some hardcoded newlines, removed a else if in line_ending.rs --- helix-core/src/line_ending.rs | 5 ++--- helix-core/src/movement.rs | 6 ++++-- helix-term/src/commands.rs | 20 +++++++++++++++----- helix-view/src/document.rs | 2 +- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index 2cc5b5d84..33f8d0783 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -38,9 +38,8 @@ impl LineEnding { pub fn rope_slice_to_line_ending(g: &RopeSlice) -> Option { if let Some(text) = g.as_str() { str_to_line_ending(text) - } else if g == "\u{000D}\u{000A}" { - Some(LineEnding::Crlf) - } else { + } + else { // Not a line ending None } diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs index 8b1e802f4..cf7ea8548 100644 --- a/helix-core/src/movement.rs +++ b/helix-core/src/movement.rs @@ -5,7 +5,7 @@ use ropey::iter::Chars; use crate::{ coords_at_pos, graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary}, - pos_at_coords, Position, Range, RopeSlice, + pos_at_coords, Position, Range, RopeSlice, get_line_ending }; #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -38,7 +38,9 @@ pub fn move_horizontally( } Direction::Forward => { // Line end is pos at the start of next line - 1 - let end = slice.line_to_char(line + 1).saturating_sub(1); + let end = slice.line_to_char(line + 1).saturating_sub(get_line_ending(&slice.line(line)) + .map(|le| le.len_chars()) + .unwrap_or(0)); nth_next_grapheme_boundary(slice, pos, count).min(end) } }; diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index af2e8eeef..cee0e752f 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -6,7 +6,7 @@ use helix_core::{ regex::{self, Regex}, register::{self, Register, Registers}, search, selection, Change, ChangeSet, LineEnding, Position, Range, Rope, RopeSlice, Selection, - SmallVec, Tendril, Transaction, + SmallVec, Tendril, Transaction, DEFAULT_LINE_ENDING, }; use helix_view::{ @@ -627,7 +627,7 @@ fn replace(cx: &mut Context) { KeyEvent { code: KeyCode::Enter, .. - } => Some('\n'), + } => Some('\n'), // TODO: replace this with DEFAULT_LINE_ENDING _ => None, }; @@ -982,7 +982,13 @@ fn delete_selection_impl(reg: &mut Register, doc: &mut Document, view_id: ViewId // then delete let transaction = Transaction::change_by_selection(doc.text(), doc.selection(view_id), |range| { - let max_to = doc.text().len_chars().saturating_sub(1); + let alltext = doc.text(); + let line = alltext.char_to_line(range.head); + let max_to = doc.text().len_chars().saturating_sub( + get_line_ending(&alltext.line(line)) + .map(|le| le.len_chars()) + .unwrap_or(0), + ); let to = std::cmp::min(max_to, range.to() + 1); (range.from(), to, None) }); @@ -1644,8 +1650,12 @@ fn append_to_line(cx: &mut Context) { let selection = doc.selection(view.id).transform(|range| { let text = doc.text(); let line = text.char_to_line(range.head); - // we can't use line_to_char(line + 1) - 2 because the last line might not contain \n - let pos = (text.line_to_char(line) + text.line(line).len_chars()).saturating_sub(1); + // we can't use line_to_char(line + 1) - 2 because the last line might not contain a newline + let pos = (text.line_to_char(line) + text.line(line).len_chars()).saturating_sub( + get_line_ending(&text.line(line)) + .map(|le| le.len_chars()) + .unwrap_or(0), + ); Range::new(pos, pos) }); doc.set_selection(view.id, selection); diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 49d270e4d..44d505830 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -255,7 +255,7 @@ impl Document { use std::{fs::File, io::BufReader}; let doc = if !path.exists() { - Rope::from("\n") + Rope::from(DEFAULT_LINE_ENDING.as_str()) } else { let file = File::open(&path).context(format!("unable to open {:?}", path))?; let mut doc = Rope::from_reader(BufReader::new(file))?; From 8634e04a31f8f761b3d0505528295d31d63c7918 Mon Sep 17 00:00:00 2001 From: Jan Hrastnik Date: Sun, 20 Jun 2021 02:22:10 +0200 Subject: [PATCH 15/26] added the line_end helper function --- helix-core/src/lib.rs | 2 +- helix-core/src/line_ending.rs | 6 ++++++ helix-core/src/movement.rs | 6 ++---- helix-term/src/commands.rs | 21 ++++----------------- 4 files changed, 13 insertions(+), 22 deletions(-) diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 9ac506a63..d99bb66d1 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -114,6 +114,6 @@ pub use state::State; pub use line_ending::{ auto_detect_line_ending, get_line_ending, rope_slice_to_line_ending, LineEnding, - DEFAULT_LINE_ENDING, + DEFAULT_LINE_ENDING, line_end }; pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction}; diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index 33f8d0783..f6118493d 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -111,6 +111,12 @@ pub fn get_line_ending(line: &RopeSlice) -> Option { str_to_line_ending(g2).or_else(|| str_to_line_ending(g1)) } +pub fn line_end(slice: &RopeSlice, line: usize) -> usize { + slice.line_to_char(line + 1).saturating_sub(get_line_ending(&slice.line(line)) + .map(|le| le.len_chars()) + .unwrap_or(0)) +} + #[cfg(target_os = "windows")] pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf; #[cfg(not(target_os = "windows"))] diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs index cf7ea8548..a3cd9b96f 100644 --- a/helix-core/src/movement.rs +++ b/helix-core/src/movement.rs @@ -5,7 +5,7 @@ use ropey::iter::Chars; use crate::{ coords_at_pos, graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary}, - pos_at_coords, Position, Range, RopeSlice, get_line_ending + pos_at_coords, Position, Range, RopeSlice, get_line_ending, line_end }; #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -38,9 +38,7 @@ pub fn move_horizontally( } Direction::Forward => { // Line end is pos at the start of next line - 1 - let end = slice.line_to_char(line + 1).saturating_sub(get_line_ending(&slice.line(line)) - .map(|le| le.len_chars()) - .unwrap_or(0)); + let end = line_end(&slice, line); nth_next_grapheme_boundary(slice, pos, count).min(end) } }; diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index cee0e752f..8124c17af 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -1,6 +1,6 @@ use helix_core::{ comment, coords_at_pos, find_first_non_whitespace_char, find_root, get_line_ending, graphemes, - indent, match_brackets, + indent, line_end, match_brackets, movement::{self, Direction}, object, pos_at_coords, regex::{self, Regex}, @@ -342,11 +342,7 @@ fn move_line_end(cx: &mut Context) { let text = doc.text(); let line = text.char_to_line(range.head); - let pos = text.line_to_char(line + 1).saturating_sub( - get_line_ending(&text.line(line)) - .map(|le| le.len_chars()) - .unwrap_or(0), - ); + let pos = line_end(&text.slice(..), line); Range::new(pos, pos) }); @@ -767,11 +763,7 @@ fn extend_line_end(cx: &mut Context) { let text = doc.text(); let line = text.char_to_line(range.head); - let pos = text.line_to_char(line + 1).saturating_sub( - get_line_ending(&text.line(line)) - .map(|le| le.len_chars()) - .unwrap_or(0), - ); + let pos = line_end(&text.slice(..), line); Range::new(range.anchor, pos) }); @@ -1650,12 +1642,7 @@ fn append_to_line(cx: &mut Context) { let selection = doc.selection(view.id).transform(|range| { let text = doc.text(); let line = text.char_to_line(range.head); - // we can't use line_to_char(line + 1) - 2 because the last line might not contain a newline - let pos = (text.line_to_char(line) + text.line(line).len_chars()).saturating_sub( - get_line_ending(&text.line(line)) - .map(|le| le.len_chars()) - .unwrap_or(0), - ); + let pos = line_end(&text.slice(..), line); Range::new(pos, pos) }); doc.set_selection(view.id, selection); From 5d22e3c4e574eb24260966de7f20f582e6184e24 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Sun, 20 Jun 2021 00:40:41 -0700 Subject: [PATCH 16/26] Misc fixes and clean up of line ending detect code. --- helix-core/src/lib.rs | 3 +- helix-core/src/line_ending.rs | 99 +++++++++++++++-------------------- helix-core/src/movement.rs | 4 +- helix-term/src/ui/editor.rs | 6 +-- helix-view/src/document.rs | 2 +- 5 files changed, 50 insertions(+), 64 deletions(-) diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index d99bb66d1..e00e56be6 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -113,7 +113,6 @@ pub use diagnostic::Diagnostic; pub use state::State; pub use line_ending::{ - auto_detect_line_ending, get_line_ending, rope_slice_to_line_ending, LineEnding, - DEFAULT_LINE_ENDING, line_end + auto_detect_line_ending, get_line_ending, line_end, LineEnding, DEFAULT_LINE_ENDING, }; pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction}; diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index f6118493d..45e20c888 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -5,11 +5,11 @@ use crate::{Rope, RopeGraphemes, RopeSlice}; pub enum LineEnding { Crlf, // CarriageReturn followed by LineFeed LF, // U+000A -- LineFeed + VT, // U+000B -- VerticalTab + FF, // U+000C -- FormFeed CR, // U+000D -- CarriageReturn Nel, // U+0085 -- NextLine LS, // U+2028 -- Line Separator - VT, // U+000B -- VerticalTab - FF, // U+000C -- FormFeed PS, // U+2029 -- ParagraphSeparator } @@ -21,74 +21,58 @@ impl LineEnding { } } - pub fn as_str(&self) -> &str { + pub fn as_str(&self) -> &'static str { match self { Self::Crlf => "\u{000D}\u{000A}", Self::LF => "\u{000A}", - Self::Nel => "\u{0085}", - Self::LS => "\u{2028}", - Self::CR => "\u{000D}", Self::VT => "\u{000B}", Self::FF => "\u{000C}", + Self::CR => "\u{000D}", + Self::Nel => "\u{0085}", + Self::LS => "\u{2028}", Self::PS => "\u{2029}", } } -} -pub fn rope_slice_to_line_ending(g: &RopeSlice) -> Option { - if let Some(text) = g.as_str() { - str_to_line_ending(text) - } - else { - // Not a line ending - None + pub fn from_str(g: &str) -> Option { + match g { + "\u{000D}\u{000A}" => Some(LineEnding::Crlf), + "\u{000A}" => Some(LineEnding::LF), + "\u{000B}" => Some(LineEnding::VT), + "\u{000C}" => Some(LineEnding::FF), + "\u{000D}" => Some(LineEnding::CR), + "\u{0085}" => Some(LineEnding::Nel), + "\u{2028}" => Some(LineEnding::LS), + "\u{2029}" => Some(LineEnding::PS), + // Not a line ending + _ => None, + } } -} -pub fn str_to_line_ending(g: &str) -> Option { - match g { - "\u{000D}\u{000A}" => Some(LineEnding::Crlf), - "\u{000A}" => Some(LineEnding::LF), - "\u{000D}" => Some(LineEnding::CR), - "\u{0085}" => Some(LineEnding::Nel), - "\u{2028}" => Some(LineEnding::LS), - "\u{000B}" => Some(LineEnding::VT), - "\u{000C}" => Some(LineEnding::FF), - "\u{2029}" => Some(LineEnding::PS), - // Not a line ending - _ => None, + pub fn from_rope_slice(g: &RopeSlice) -> Option { + if let Some(text) = g.as_str() { + LineEnding::from_str(text) + } else { + // Non-contiguous, so it can't be a line ending. + // Specifically, Ropey guarantees that CRLF is always + // contiguous. And the remaining line endings are all + // single `char`s, and therefore trivially contiguous. + None + } } } +/// Attempts to detect what line ending the passed document uses. pub fn auto_detect_line_ending(doc: &Rope) -> Option { - // based on https://github.com/cessen/led/blob/27572c8838a1c664ee378a19358604063881cc1d/src/editor/mod.rs#L88-L162 - - let mut ending = None; - // return first matched line ending. Not all possible line endings are being matched, as they might be special-use only + // Return first matched line ending. Not all possible line endings + // are being matched, as they might be special-use only for line in doc.lines().take(100) { - ending = match line.len_chars() { - 1 => { - let g = RopeGraphemes::new(line.slice((line.len_chars() - 1)..)) - .last() - .unwrap(); - rope_slice_to_line_ending(&g) - } - n if n > 1 => { - let g = RopeGraphemes::new(line.slice((line.len_chars() - 2)..)) - .last() - .unwrap(); - rope_slice_to_line_ending(&g) - } - _ => None, - }; - if ending.is_some() { - match ending { - Some(LineEnding::VT) | Some(LineEnding::FF) | Some(LineEnding::PS) => {} - _ => return ending, - } + match get_line_ending(&line) { + None | Some(LineEnding::VT) | Some(LineEnding::FF) | Some(LineEnding::PS) => {} + ending => return ending, } } - ending + None } /// Returns the passed line's line ending, if any. @@ -108,13 +92,16 @@ pub fn get_line_ending(line: &RopeSlice) -> Option { .unwrap_or(""); // First check the two-character case for CRLF, then check the single-character case. - str_to_line_ending(g2).or_else(|| str_to_line_ending(g1)) + LineEnding::from_str(g2).or_else(|| LineEnding::from_str(g1)) } +/// Returns the char index of the end of the given line, not including its line ending. pub fn line_end(slice: &RopeSlice, line: usize) -> usize { - slice.line_to_char(line + 1).saturating_sub(get_line_ending(&slice.line(line)) - .map(|le| le.len_chars()) - .unwrap_or(0)) + slice.line_to_char(line + 1).saturating_sub( + get_line_ending(&slice.line(line)) + .map(|le| le.len_chars()) + .unwrap_or(0), + ) } #[cfg(target_os = "windows")] diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs index a3cd9b96f..7f47e6625 100644 --- a/helix-core/src/movement.rs +++ b/helix-core/src/movement.rs @@ -3,9 +3,9 @@ use std::iter::{self, from_fn, Peekable, SkipWhile}; use ropey::iter::Chars; use crate::{ - coords_at_pos, + coords_at_pos, get_line_ending, graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary}, - pos_at_coords, Position, Range, RopeSlice, get_line_ending, line_end + line_end, pos_at_coords, Position, Range, RopeSlice, }; #[derive(Debug, Copy, Clone, PartialEq, Eq)] diff --git a/helix-term/src/ui/editor.rs b/helix-term/src/ui/editor.rs index 42bb3ba8a..da8f0f531 100644 --- a/helix-term/src/ui/editor.rs +++ b/helix-term/src/ui/editor.rs @@ -7,9 +7,9 @@ use crate::{ }; use helix_core::{ - coords_at_pos, rope_slice_to_line_ending, + coords_at_pos, syntax::{self, HighlightEvent}, - Position, Range, + LineEnding, Position, Range, }; use helix_view::input::{KeyCode, KeyEvent, KeyModifiers}; use helix_view::{document::Mode, Document, Editor, Theme, View}; @@ -177,7 +177,7 @@ impl EditorView { // iterate over range char by char for grapheme in RopeGraphemes::new(text) { - if rope_slice_to_line_ending(&grapheme).is_some() { + if LineEnding::from_rope_slice(&grapheme).is_some() { visual_x = 0; line += 1; diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 44d505830..80be1ed25 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -255,7 +255,7 @@ impl Document { use std::{fs::File, io::BufReader}; let doc = if !path.exists() { - Rope::from(DEFAULT_LINE_ENDING.as_str()) + Rope::from(DEFAULT_LINE_ENDING.as_str()) } else { let file = File::open(&path).context(format!("unable to open {:?}", path))?; let mut doc = Rope::from_reader(BufReader::new(file))?; From 4efd6713c5b30b33c497a1f85b77a7b0a7fd17e0 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Sun, 20 Jun 2021 15:09:10 -0700 Subject: [PATCH 17/26] Work on moving code over to LineEnding instead of assuming '\n'. Also some general cleanup and some minor fixes along the way. --- Cargo.lock | 1 + helix-core/src/auto_pairs.rs | 2 +- helix-core/src/chars.rs | 122 ++++++++++++++++++++--- helix-core/src/lib.rs | 2 +- helix-core/src/line_ending.rs | 47 ++++++--- helix-core/src/movement.rs | 167 ++++++++------------------------ helix-core/src/position.rs | 6 +- helix-core/src/syntax.rs | 9 +- helix-lsp/src/client.rs | 7 +- helix-term/src/commands.rs | 12 ++- helix-term/src/ui/markdown.rs | 2 + helix-tui/Cargo.toml | 1 + helix-tui/src/text.rs | 3 +- helix-tui/src/widgets/reflow.rs | 9 +- helix-view/src/document.rs | 24 ++--- helix-view/src/editor.rs | 4 +- 16 files changed, 228 insertions(+), 190 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 24c277e12..a1de71385 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -331,6 +331,7 @@ dependencies = [ "bitflags", "cassowary", "crossterm", + "helix-core", "serde", "unicode-segmentation", "unicode-width", diff --git a/helix-core/src/auto_pairs.rs b/helix-core/src/auto_pairs.rs index 74e25ac90..746f201a4 100644 --- a/helix-core/src/auto_pairs.rs +++ b/helix-core/src/auto_pairs.rs @@ -12,7 +12,7 @@ pub const PAIRS: &[(char, char)] = &[ ('`', '`'), ]; -const CLOSE_BEFORE: &str = ")]}'\":;> \n"; // includes space and newline +const CLOSE_BEFORE: &str = ")]}'\":;> \n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}"; // includes space and newlines // insert hook: // Fn(doc, selection, char) => Option diff --git a/helix-core/src/chars.rs b/helix-core/src/chars.rs index 243a13743..24133dd33 100644 --- a/helix-core/src/chars.rs +++ b/helix-core/src/chars.rs @@ -1,25 +1,44 @@ -/// Determine whether a character is a line break. -pub fn char_is_linebreak(c: char) -> bool { - matches!( - c, - '\u{000A}' | // LineFeed - '\u{000B}' | // VerticalTab - '\u{000C}' | // FormFeed - '\u{000D}' | // CarriageReturn - '\u{0085}' | // NextLine - '\u{2028}' | // Line Separator - '\u{2029}' // ParagraphSeparator - ) +use crate::LineEnding; + +#[derive(Debug, Eq, PartialEq)] +pub enum CharCategory { + Whitespace, + Eol, + Word, + Punctuation, + Unknown, +} + +#[inline] +pub fn categorize_char(ch: char) -> CharCategory { + if char_is_line_ending(ch) { + CharCategory::Eol + } else if ch.is_whitespace() { + CharCategory::Whitespace + } else if char_is_word(ch) { + CharCategory::Word + } else if char_is_punctuation(ch) { + CharCategory::Punctuation + } else { + CharCategory::Unknown + } +} + +/// Determine whether a character is a line ending. +#[inline] +pub fn char_is_line_ending(ch: char) -> bool { + LineEnding::from_char(ch).is_some() } /// Determine whether a character qualifies as (non-line-break) /// whitespace. -pub fn char_is_whitespace(c: char) -> bool { +#[inline] +pub fn char_is_whitespace(ch: char) -> bool { // TODO: this is a naive binary categorization of whitespace // characters. For display, word wrapping, etc. we'll need a better // categorization based on e.g. breaking vs non-breaking spaces // and whether they're zero-width or not. - match c { + match ch { //'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace) '\u{0009}' | // Character Tabulation '\u{0020}' | // Space @@ -34,8 +53,81 @@ pub fn char_is_whitespace(c: char) -> bool { // En Quad, Em Quad, En Space, Em Space, Three-per-em Space, // Four-per-em Space, Six-per-em Space, Figure Space, // Punctuation Space, Thin Space, Hair Space, Zero Width Space. - c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true, + ch if ('\u{2000}' ..= '\u{200B}').contains(&ch) => true, _ => false, } } + +#[inline] +pub fn char_is_punctuation(ch: char) -> bool { + use unicode_general_category::{get_general_category, GeneralCategory}; + + matches!( + get_general_category(ch), + GeneralCategory::OtherPunctuation + | GeneralCategory::OpenPunctuation + | GeneralCategory::ClosePunctuation + | GeneralCategory::InitialPunctuation + | GeneralCategory::FinalPunctuation + | GeneralCategory::ConnectorPunctuation + | GeneralCategory::DashPunctuation + | GeneralCategory::MathSymbol + | GeneralCategory::CurrencySymbol + | GeneralCategory::ModifierSymbol + ) +} + +#[inline] +pub fn char_is_word(ch: char) -> bool { + ch.is_alphanumeric() || ch == '_' +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_categorize() { + const EOL_TEST_CASE: &'static str = "\n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}"; + const WORD_TEST_CASE: &'static str = + "_hello_world_あいうえおー12345678901234567890"; + const PUNCTUATION_TEST_CASE: &'static str = + "!\"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~!”#$%&’()*+、。:;<=>?@「」^`{|}~"; + const WHITESPACE_TEST_CASE: &'static str = "      "; + + for ch in EOL_TEST_CASE.chars() { + assert_eq!(CharCategory::Eol, categorize_char(ch)); + } + + for ch in WHITESPACE_TEST_CASE.chars() { + assert_eq!( + CharCategory::Whitespace, + categorize_char(ch), + "Testing '{}', but got `{:?}` instead of `Category::Whitespace`", + ch, + categorize_char(ch) + ); + } + + for ch in WORD_TEST_CASE.chars() { + assert_eq!( + CharCategory::Word, + categorize_char(ch), + "Testing '{}', but got `{:?}` instead of `Category::Word`", + ch, + categorize_char(ch) + ); + } + + for ch in PUNCTUATION_TEST_CASE.chars() { + assert_eq!( + CharCategory::Punctuation, + categorize_char(ch), + "Testing '{}', but got `{:?}` instead of `Category::Punctuation`", + ch, + categorize_char(ch) + ); + } + } +} diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index e00e56be6..183b9f0a6 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -113,6 +113,6 @@ pub use diagnostic::Diagnostic; pub use state::State; pub use line_ending::{ - auto_detect_line_ending, get_line_ending, line_end, LineEnding, DEFAULT_LINE_ENDING, + auto_detect_line_ending, get_line_ending, line_end_char_index, LineEnding, DEFAULT_LINE_ENDING, }; pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction}; diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index 45e20c888..c4636c632 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -1,5 +1,10 @@ use crate::{Rope, RopeGraphemes, RopeSlice}; +#[cfg(target_os = "windows")] +pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf; +#[cfg(not(target_os = "windows"))] +pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF; + /// Represents one of the valid Unicode line endings. #[derive(PartialEq, Copy, Clone, Debug)] pub enum LineEnding { @@ -14,6 +19,7 @@ pub enum LineEnding { } impl LineEnding { + #[inline] pub fn len_chars(&self) -> usize { match self { Self::Crlf => 2, @@ -21,6 +27,7 @@ impl LineEnding { } } + #[inline] pub fn as_str(&self) -> &'static str { match self { Self::Crlf => "\u{000D}\u{000A}", @@ -34,6 +41,22 @@ impl LineEnding { } } + #[inline] + pub fn from_char(ch: char) -> Option { + match ch { + '\u{000A}' => Some(LineEnding::LF), + '\u{000B}' => Some(LineEnding::VT), + '\u{000C}' => Some(LineEnding::FF), + '\u{000D}' => Some(LineEnding::CR), + '\u{0085}' => Some(LineEnding::Nel), + '\u{2028}' => Some(LineEnding::LS), + '\u{2029}' => Some(LineEnding::PS), + // Not a line ending + _ => None, + } + } + + #[inline] pub fn from_str(g: &str) -> Option { match g { "\u{000D}\u{000A}" => Some(LineEnding::Crlf), @@ -49,6 +72,7 @@ impl LineEnding { } } + #[inline] pub fn from_rope_slice(g: &RopeSlice) -> Option { if let Some(text) = g.as_str() { LineEnding::from_str(text) @@ -62,6 +86,11 @@ impl LineEnding { } } +#[inline] +pub fn str_is_line_ending(s: &str) -> bool { + LineEnding::from_str(s).is_some() +} + /// Attempts to detect what line ending the passed document uses. pub fn auto_detect_line_ending(doc: &Rope) -> Option { // Return first matched line ending. Not all possible line endings @@ -96,19 +125,13 @@ pub fn get_line_ending(line: &RopeSlice) -> Option { } /// Returns the char index of the end of the given line, not including its line ending. -pub fn line_end(slice: &RopeSlice, line: usize) -> usize { - slice.line_to_char(line + 1).saturating_sub( - get_line_ending(&slice.line(line)) +pub fn line_end_char_index(slice: &RopeSlice, line: usize) -> usize { + slice.line_to_char(line + 1) + - get_line_ending(&slice.line(line)) .map(|le| le.len_chars()) - .unwrap_or(0), - ) + .unwrap_or(0) } -#[cfg(target_os = "windows")] -pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf; -#[cfg(not(target_os = "windows"))] -pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF; - #[cfg(test)] mod line_ending_tests { use super::*; @@ -150,11 +173,11 @@ mod line_ending_tests { fn test_rope_slice_to_line_ending() { let r = Rope::from_str("\r\n"); assert_eq!( - rope_slice_to_line_ending(&r.slice(1..2)), + LineEnding::from_rope_slice(&r.slice(1..2)), Some(LineEnding::LF) ); assert_eq!( - rope_slice_to_line_ending(&r.slice(0..2)), + LineEnding::from_rope_slice(&r.slice(0..2)), Some(LineEnding::Crlf) ); } diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs index 7f47e6625..d0023e9f8 100644 --- a/helix-core/src/movement.rs +++ b/helix-core/src/movement.rs @@ -3,9 +3,13 @@ use std::iter::{self, from_fn, Peekable, SkipWhile}; use ropey::iter::Chars; use crate::{ + chars::{ + categorize_char, char_is_line_ending, char_is_punctuation, char_is_whitespace, + char_is_word, CharCategory, + }, coords_at_pos, get_line_ending, graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary}, - line_end, pos_at_coords, Position, Range, RopeSlice, + line_end_char_index, pos_at_coords, Position, Range, RopeSlice, }; #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -37,9 +41,8 @@ pub fn move_horizontally( nth_prev_grapheme_boundary(slice, pos, count).max(start) } Direction::Forward => { - // Line end is pos at the start of next line - 1 - let end = line_end(&slice, line); - nth_next_grapheme_boundary(slice, pos, count).min(end) + let end_char_idx = line_end_char_index(&slice, line); + nth_next_grapheme_boundary(slice, pos, count).min(end_char_idx) } }; let anchor = match behaviour { @@ -68,8 +71,11 @@ pub fn move_vertically( ), }; - // convert to 0-indexed, subtract another 1 because len_chars() counts \n - let new_line_len = slice.line(new_line).len_chars().saturating_sub(2); + // Length of the line sans line-ending. + let new_line_len = { + let line = slice.line(new_line); + line.len_chars() - get_line_ending(&line).map(|le| le.len_chars()).unwrap_or(0) + }; let new_col = std::cmp::min(horiz as usize, new_line_len); @@ -104,64 +110,6 @@ fn word_move(slice: RopeSlice, mut range: Range, count: usize, target: WordMotio } // ---- util ------------ -#[inline] -pub(crate) fn is_word(ch: char) -> bool { - ch.is_alphanumeric() || ch == '_' -} - -#[inline] -pub(crate) fn is_end_of_line(ch: char) -> bool { - ch == '\n' -} - -#[inline] -// Whitespace, but not end of line -pub(crate) fn is_strict_whitespace(ch: char) -> bool { - ch.is_whitespace() && !is_end_of_line(ch) -} - -#[inline] -pub(crate) fn is_punctuation(ch: char) -> bool { - use unicode_general_category::{get_general_category, GeneralCategory}; - - matches!( - get_general_category(ch), - GeneralCategory::OtherPunctuation - | GeneralCategory::OpenPunctuation - | GeneralCategory::ClosePunctuation - | GeneralCategory::InitialPunctuation - | GeneralCategory::FinalPunctuation - | GeneralCategory::ConnectorPunctuation - | GeneralCategory::DashPunctuation - | GeneralCategory::MathSymbol - | GeneralCategory::CurrencySymbol - | GeneralCategory::ModifierSymbol - ) -} - -#[derive(Debug, Eq, PartialEq)] -pub enum Category { - Whitespace, - Eol, - Word, - Punctuation, - Unknown, -} - -#[inline] -pub(crate) fn categorize(ch: char) -> Category { - if is_end_of_line(ch) { - Category::Eol - } else if ch.is_whitespace() { - Category::Whitespace - } else if is_word(ch) { - Category::Word - } else if is_punctuation(ch) { - Category::Punctuation - } else { - Category::Unknown - } -} #[inline] /// Returns first index that doesn't satisfy a given predicate when @@ -235,7 +183,8 @@ impl CharHelpers for Chars<'_> { let mut phase = WordMotionPhase::Start; let mut head = origin.head; let mut anchor: Option = None; - let is_boundary = |a: char, b: Option| categorize(a) != categorize(b.unwrap_or(a)); + let is_boundary = + |a: char, b: Option| categorize_char(a) != categorize_char(b.unwrap_or(a)); while let Some(peek) = characters.peek().copied() { phase = match phase { WordMotionPhase::Start => { @@ -244,7 +193,8 @@ impl CharHelpers for Chars<'_> { break; // We're at the end, so there's nothing to do. } // Anchor may remain here if the head wasn't at a boundary - if !is_boundary(peek, characters.peek().copied()) && !is_end_of_line(peek) { + if !is_boundary(peek, characters.peek().copied()) && !char_is_line_ending(peek) + { anchor = Some(head); } // First character is always skipped by the head @@ -252,7 +202,7 @@ impl CharHelpers for Chars<'_> { WordMotionPhase::SkipNewlines } WordMotionPhase::SkipNewlines => { - if is_end_of_line(peek) { + if char_is_line_ending(peek) { characters.next(); if characters.peek().is_some() { advance(&mut head); @@ -286,12 +236,12 @@ fn reached_target(target: WordMotionTarget, peek: char, next_peek: Option<&char> match target { WordMotionTarget::NextWordStart => { - ((categorize(peek) != categorize(*next_peek)) - && (is_end_of_line(*next_peek) || !next_peek.is_whitespace())) + ((categorize_char(peek) != categorize_char(*next_peek)) + && (char_is_line_ending(*next_peek) || !next_peek.is_whitespace())) } WordMotionTarget::NextWordEnd | WordMotionTarget::PrevWordStart => { - ((categorize(peek) != categorize(*next_peek)) - && (!peek.is_whitespace() || is_end_of_line(*next_peek))) + ((categorize_char(peek) != categorize_char(*next_peek)) + && (!peek.is_whitespace() || char_is_line_ending(*next_peek))) } } } @@ -330,7 +280,7 @@ mod test { slice, move_vertically(slice, range, Direction::Forward, 1, Movement::Move).head ), - (1, 2).into() + (1, 3).into() ); } @@ -343,12 +293,12 @@ mod test { let mut range = Range::point(position); let moves_and_expected_coordinates = [ - ((Direction::Forward, 1usize), (0, 1)), - ((Direction::Forward, 2usize), (0, 3)), - ((Direction::Forward, 0usize), (0, 3)), - ((Direction::Forward, 999usize), (0, 31)), - ((Direction::Forward, 999usize), (0, 31)), - ((Direction::Backward, 999usize), (0, 0)), + ((Direction::Forward, 1usize), (0, 1)), // T|his is a simple alphabetic line + ((Direction::Forward, 2usize), (0, 3)), // Thi|s is a simple alphabetic line + ((Direction::Forward, 0usize), (0, 3)), // Thi|s is a simple alphabetic line + ((Direction::Forward, 999usize), (0, 32)), // This is a simple alphabetic line| + ((Direction::Forward, 999usize), (0, 32)), // This is a simple alphabetic line| + ((Direction::Backward, 999usize), (0, 0)), // |This is a simple alphabetic line ]; for ((direction, amount), coordinates) in IntoIter::new(moves_and_expected_coordinates) { @@ -366,15 +316,15 @@ mod test { let mut range = Range::point(position); let moves_and_expected_coordinates = IntoIter::new([ - ((Direction::Forward, 1usize), (0, 1)), // M_ltiline - ((Direction::Forward, 2usize), (0, 3)), // Mul_iline - ((Direction::Backward, 6usize), (0, 0)), // _ultiline - ((Direction::Backward, 999usize), (0, 0)), // _ultiline - ((Direction::Forward, 3usize), (0, 3)), // Mul_iline - ((Direction::Forward, 0usize), (0, 3)), // Mul_iline - ((Direction::Backward, 0usize), (0, 3)), // Mul_iline - ((Direction::Forward, 999usize), (0, 9)), // Multilin_ - ((Direction::Forward, 999usize), (0, 9)), // Multilin_ + ((Direction::Forward, 1usize), (0, 1)), // M|ultiline\n + ((Direction::Forward, 2usize), (0, 3)), // Mul|tiline\n + ((Direction::Backward, 6usize), (0, 0)), // |Multiline\n + ((Direction::Backward, 999usize), (0, 0)), // |Multiline\n + ((Direction::Forward, 3usize), (0, 3)), // Mul|tiline\n + ((Direction::Forward, 0usize), (0, 3)), // Mul|tiline\n + ((Direction::Backward, 0usize), (0, 3)), // Mul|tiline\n + ((Direction::Forward, 999usize), (0, 9)), // Multiline|\n + ((Direction::Forward, 999usize), (0, 9)), // Multiline|\n ]); for ((direction, amount), coordinates) in moves_and_expected_coordinates { @@ -446,7 +396,7 @@ mod test { // First descent preserves column as the target line is wider ((Axis::V, Direction::Forward, 1usize), (1, 8)), // Second descent clamps column as the target line is shorter - ((Axis::V, Direction::Forward, 1usize), (2, 4)), + ((Axis::V, Direction::Forward, 1usize), (2, 5)), // Third descent restores the original column ((Axis::V, Direction::Forward, 1usize), (3, 8)), // Behaviour is preserved even through long jumps @@ -760,45 +710,4 @@ mod test { } } } - - #[test] - fn test_categorize() { - const WORD_TEST_CASE: &'static str = - "_hello_world_あいうえおー12345678901234567890"; - const PUNCTUATION_TEST_CASE: &'static str = - "!\"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~!”#$%&’()*+、。:;<=>?@「」^`{|}~"; - const WHITESPACE_TEST_CASE: &'static str = "      "; - - assert_eq!(Category::Eol, categorize('\n')); - - for ch in WHITESPACE_TEST_CASE.chars() { - assert_eq!( - Category::Whitespace, - categorize(ch), - "Testing '{}', but got `{:?}` instead of `Category::Whitespace`", - ch, - categorize(ch) - ); - } - - for ch in WORD_TEST_CASE.chars() { - assert_eq!( - Category::Word, - categorize(ch), - "Testing '{}', but got `{:?}` instead of `Category::Word`", - ch, - categorize(ch) - ); - } - - for ch in PUNCTUATION_TEST_CASE.chars() { - assert_eq!( - Category::Punctuation, - categorize(ch), - "Testing '{}', but got `{:?}` instead of `Category::Punctuation`", - ch, - categorize(ch) - ); - } - } } diff --git a/helix-core/src/position.rs b/helix-core/src/position.rs index 3d85ff2f4..392eee9cc 100644 --- a/helix-core/src/position.rs +++ b/helix-core/src/position.rs @@ -1,4 +1,5 @@ use crate::{ + chars::char_is_line_ending, graphemes::{nth_next_grapheme_boundary, RopeGraphemes}, Rope, RopeSlice, }; @@ -23,8 +24,9 @@ impl Position { pub fn traverse(self, text: &crate::Tendril) -> Self { let Self { mut row, mut col } = self; // TODO: there should be a better way here - for ch in text.chars() { - if ch == '\n' { + let mut chars = text.chars().peekable(); + while let Some(ch) = chars.next() { + if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) { row += 1; col = 0; } else { diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index ae058eb18..92e52d73b 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -1,4 +1,4 @@ -use crate::{regex::Regex, Change, Rope, RopeSlice, Transaction}; +use crate::{chars::char_is_line_ending, regex::Regex, Change, Rope, RopeSlice, Transaction}; pub use helix_syntax::{get_language, get_language_name, Lang}; use std::{ @@ -579,9 +579,10 @@ impl LanguageLayer { mut column, } = point; - // TODO: there should be a better way here - for ch in text.bytes() { - if ch == b'\n' { + // TODO: there should be a better way here. + let mut chars = text.chars().peekable(); + while let Some(ch) = chars.next() { + if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) { row += 1; column = 0; } else { diff --git a/helix-lsp/src/client.rs b/helix-lsp/src/client.rs index 101d2f9b1..7f136fe84 100644 --- a/helix-lsp/src/client.rs +++ b/helix-lsp/src/client.rs @@ -3,7 +3,7 @@ use crate::{ Call, Error, OffsetEncoding, Result, }; -use helix_core::{find_root, ChangeSet, Rope}; +use helix_core::{chars::char_is_line_ending, find_root, ChangeSet, Rope}; use jsonrpc_core as jsonrpc; use lsp_types as lsp; use serde_json::Value; @@ -337,8 +337,9 @@ impl Client { mut character, } = pos; - for ch in text.chars() { - if ch == '\n' { + let mut chars = text.chars().peekable(); + while let Some(ch) = chars.next() { + if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) { line += 1; character = 0; } else { diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 8124c17af..b006504b1 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -1,6 +1,6 @@ use helix_core::{ comment, coords_at_pos, find_first_non_whitespace_char, find_root, get_line_ending, graphemes, - indent, line_end, match_brackets, + indent, line_end_char_index, match_brackets, movement::{self, Direction}, object, pos_at_coords, regex::{self, Regex}, @@ -342,7 +342,7 @@ fn move_line_end(cx: &mut Context) { let text = doc.text(); let line = text.char_to_line(range.head); - let pos = line_end(&text.slice(..), line); + let pos = line_end_char_index(&text.slice(..), line); Range::new(pos, pos) }); @@ -490,6 +490,8 @@ where let count = cx.count(); // need to wait for next key + // TODO: should this be done by grapheme rather than char? For example, + // we can't properly handle the line-ending case here in terms of char. cx.on_next_key(move |cx, event| { let ch = match event { KeyEvent { @@ -623,7 +625,7 @@ fn replace(cx: &mut Context) { KeyEvent { code: KeyCode::Enter, .. - } => Some('\n'), // TODO: replace this with DEFAULT_LINE_ENDING + } => Some('\n'), // TODO: use the document's default line ending. _ => None, }; @@ -763,7 +765,7 @@ fn extend_line_end(cx: &mut Context) { let text = doc.text(); let line = text.char_to_line(range.head); - let pos = line_end(&text.slice(..), line); + let pos = line_end_char_index(&text.slice(..), line); Range::new(range.anchor, pos) }); @@ -1642,7 +1644,7 @@ fn append_to_line(cx: &mut Context) { let selection = doc.selection(view.id).transform(|range| { let text = doc.text(); let line = text.char_to_line(range.head); - let pos = line_end(&text.slice(..), line); + let pos = line_end_char_index(&text.slice(..), line); Range::new(pos, pos) }); doc.set_selection(view.id, selection); diff --git a/helix-term/src/ui/markdown.rs b/helix-term/src/ui/markdown.rs index be113747d..3ce3a5b80 100644 --- a/helix-term/src/ui/markdown.rs +++ b/helix-term/src/ui/markdown.rs @@ -110,6 +110,8 @@ fn parse<'a>(contents: &'a str, theme: Option<&Theme>) -> tui::text::Text<'a> { // TODO: replace tabs with indentation let mut slice = &text[start..end]; + // TODO: do we need to handle all unicode line endings + // here, or is just '\n' okay? while let Some(end) = slice.find('\n') { // emit span up to newline let text = &slice[..end]; diff --git a/helix-tui/Cargo.toml b/helix-tui/Cargo.toml index 89fa755d3..30e2374dd 100644 --- a/helix-tui/Cargo.toml +++ b/helix-tui/Cargo.toml @@ -22,3 +22,4 @@ unicode-segmentation = "1.2" unicode-width = "0.1" crossterm = { version = "0.20", optional = true } serde = { version = "1", "optional" = true, features = ["derive"]} +helix-core = { version = "0.2", path = "../helix-core" } diff --git a/helix-tui/src/text.rs b/helix-tui/src/text.rs index c671e918e..b23bfd81d 100644 --- a/helix-tui/src/text.rs +++ b/helix-tui/src/text.rs @@ -47,6 +47,7 @@ //! ]); //! ``` use crate::style::Style; +use helix_core::line_ending::str_is_line_ending; use std::borrow::Cow; use unicode_segmentation::UnicodeSegmentation; use unicode_width::UnicodeWidthStr; @@ -177,7 +178,7 @@ impl<'a> Span<'a> { symbol: g, style: base_style.patch(self.style), }) - .filter(|s| s.symbol != "\n") + .filter(|s| !str_is_line_ending(s.symbol)) } } diff --git a/helix-tui/src/widgets/reflow.rs b/helix-tui/src/widgets/reflow.rs index 94ff7330d..ae561a4f9 100644 --- a/helix-tui/src/widgets/reflow.rs +++ b/helix-tui/src/widgets/reflow.rs @@ -1,4 +1,5 @@ use crate::text::StyledGrapheme; +use helix_core::line_ending::str_is_line_ending; use unicode_segmentation::UnicodeSegmentation; use unicode_width::UnicodeWidthStr; @@ -62,13 +63,13 @@ impl<'a, 'b> LineComposer<'a> for WordWrapper<'a, 'b> { // Ignore characters wider that the total max width. if symbol.width() as u16 > self.max_line_width // Skip leading whitespace when trim is enabled. - || self.trim && symbol_whitespace && symbol != "\n" && current_line_width == 0 + || self.trim && symbol_whitespace && !str_is_line_ending(symbol) && current_line_width == 0 { continue; } // Break on newline and discard it. - if symbol == "\n" { + if str_is_line_ending(symbol) { if prev_whitespace { current_line_width = width_to_last_word_end; self.current_line.truncate(symbols_to_last_word_end); @@ -170,7 +171,7 @@ impl<'a, 'b> LineComposer<'a> for LineTruncator<'a, 'b> { } // Break on newline and discard it. - if symbol == "\n" { + if str_is_line_ending(symbol) { break; } @@ -199,7 +200,7 @@ impl<'a, 'b> LineComposer<'a> for LineTruncator<'a, 'b> { if skip_rest { for StyledGrapheme { symbol, .. } in &mut self.symbols { - if symbol == "\n" { + if str_is_line_ending(symbol) { break; } } diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 80be1ed25..3e38c24d6 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -10,7 +10,7 @@ use std::sync::Arc; use helix_core::{ auto_detect_line_ending, - chars::{char_is_linebreak, char_is_whitespace}, + chars::{char_is_line_ending, char_is_whitespace}, history::History, syntax::{LanguageConfiguration, LOADER}, ChangeSet, Diagnostic, LineEnding, Rope, Selection, State, Syntax, Transaction, @@ -81,6 +81,9 @@ pub struct Document { /// Current indent style. pub indent_style: IndentStyle, + /// The document's default line ending. + pub line_ending: LineEnding, + syntax: Option, // /// Corresponding language scope name. Usually `source.`. pub(crate) language: Option>, @@ -99,7 +102,6 @@ pub struct Document { diagnostics: Vec, language_server: Option>, - line_ending: LineEnding, } use std::fmt; @@ -254,21 +256,21 @@ impl Document { pub fn load(path: PathBuf) -> Result { use std::{fs::File, io::BufReader}; - let doc = if !path.exists() { + let mut doc = if !path.exists() { Rope::from(DEFAULT_LINE_ENDING.as_str()) } else { let file = File::open(&path).context(format!("unable to open {:?}", path))?; - let mut doc = Rope::from_reader(BufReader::new(file))?; - // add missing newline at the end of file - if doc.len_bytes() == 0 || doc.byte(doc.len_bytes() - 1) != b'\n' { - doc.insert_char(doc.len_chars(), '\n'); - } - doc + Rope::from_reader(BufReader::new(file))? }; // search for line endings let line_ending = auto_detect_line_ending(&doc).unwrap_or(DEFAULT_LINE_ENDING); + // add missing newline at the end of file + if doc.len_bytes() == 0 || char_is_line_ending(doc.char(doc.len_chars() - 1)) { + doc.insert(doc.len_chars(), line_ending.as_str()); + } + let mut doc = Self::new(doc); // set the path and try detecting the language doc.set_path(&path)?; @@ -379,7 +381,7 @@ impl Document { Some(' ') => false, // Ignore blank lines. - Some(c) if char_is_linebreak(c) => continue, + Some(c) if char_is_line_ending(c) => continue, _ => { prev_line_is_tabs = false; @@ -403,7 +405,7 @@ impl Document { c if char_is_whitespace(c) => count_is_done = true, // Ignore blank lines. - c if char_is_linebreak(c) => continue 'outer, + c if char_is_line_ending(c) => continue 'outer, _ => break, } diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index db8ae87ab..fb2eb36d6 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -12,7 +12,7 @@ use anyhow::Error; pub use helix_core::diagnostic::Severity; pub use helix_core::register::Registers; -use helix_core::Position; +use helix_core::{Position, DEFAULT_LINE_ENDING}; #[derive(Debug)] pub struct Editor { @@ -150,7 +150,7 @@ impl Editor { pub fn new_file(&mut self, action: Action) -> DocumentId { use helix_core::Rope; - let doc = Document::new(Rope::from("\n")); + let doc = Document::new(Rope::from(DEFAULT_LINE_ENDING.as_str())); let id = self.documents.insert(doc); self.documents[id].id = id; self.switch(id, action); From 3d3149e0d55f54440fa28a446ef3facdb386e5c1 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Sun, 20 Jun 2021 16:13:59 -0700 Subject: [PATCH 18/26] Silence clippy warning. --- helix-core/src/line_ending.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index c4636c632..3055c96ef 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -56,6 +56,10 @@ impl LineEnding { } } + // Normally we'd want to implement the FromStr trait, but in this case + // that would force us into a different return type than from_char or + // or from_rope_slice, which would be weird. + #[allow(clippy::should_implement_trait)] #[inline] pub fn from_str(g: &str) -> Option { match g { From 714002048cc9601bf0981435c6d3ad43d1c765e8 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Mon, 21 Jun 2021 09:52:21 -0700 Subject: [PATCH 19/26] Don't need getters/setters for line_ending property. It's plain-old-data. If we want to do fancier things later, it's easy to switch back. --- helix-term/src/commands.rs | 8 ++++---- helix-view/src/document.rs | 10 +--------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 28c4fe3ab..70441fcfa 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -1009,7 +1009,7 @@ fn append_mode(cx: &mut Context) { if selection.iter().any(|range| range.head == end) { let transaction = Transaction::change( doc.text(), - std::array::IntoIter::new([(end, end, Some(doc.line_ending().as_str().into()))]), + std::array::IntoIter::new([(end, end, Some(doc.line_ending.as_str().into()))]), ); doc.apply(&transaction, view.id); } @@ -2349,7 +2349,7 @@ pub mod insert { ); let indent = doc.indent_unit().repeat(indent_level); let mut text = String::with_capacity(1 + indent.len()); - text.push_str(doc.line_ending().as_str()); + text.push_str(doc.line_ending.as_str()); text.push_str(&indent); let head = pos + offs + text.chars().count(); @@ -2370,7 +2370,7 @@ pub mod insert { if helix_core::auto_pairs::PAIRS.contains(&(prev, curr)) { // another newline, indent the end bracket one level less let indent = doc.indent_unit().repeat(indent_level.saturating_sub(1)); - text.push_str(doc.line_ending().as_str()); + text.push_str(doc.line_ending.as_str()); text.push_str(&indent); } @@ -2537,7 +2537,7 @@ fn paste_impl( // if any of values ends \n it's linewise paste let linewise = values .iter() - .any(|value| value.ends_with(doc.line_ending().as_str())); + .any(|value| value.ends_with(doc.line_ending.as_str())); let mut values = values.iter().cloned().map(Tendril::from).chain(repeat); diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 9326fb79c..945271ead 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -268,7 +268,7 @@ impl Document { // set the path and try detecting the language doc.set_path(&path)?; doc.detect_indent_style(); - doc.set_line_ending(line_ending); + doc.line_ending = line_ending; if let Some(loader) = config_loader { doc.detect_language(theme, loader); @@ -531,10 +531,6 @@ impl Document { self.selections.insert(view_id, selection); } - pub fn set_line_ending(&mut self, line_ending: LineEnding) { - self.line_ending = line_ending; - } - fn _apply(&mut self, transaction: &Transaction, view_id: ViewId) -> bool { let old_doc = self.text().clone(); @@ -805,10 +801,6 @@ impl Document { pub fn set_diagnostics(&mut self, diagnostics: Vec) { self.diagnostics = diagnostics; } - - pub fn line_ending(&self) -> LineEnding { - self.line_ending - } } #[cfg(test)] From 07e28802f6b61a17e839d05b2a031575f323b9c9 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Mon, 21 Jun 2021 10:29:29 -0700 Subject: [PATCH 20/26] Add function to get the line ending of a str slice. This is needed in some places. --- helix-core/src/lib.rs | 4 +--- helix-core/src/line_ending.rs | 23 +++++++++++++++++++++++ helix-core/src/movement.rs | 5 +++-- helix-term/src/commands.rs | 9 +++++---- helix-view/src/document.rs | 2 +- 5 files changed, 33 insertions(+), 10 deletions(-) diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 692946880..f697bc7fd 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -116,7 +116,5 @@ pub use syntax::Syntax; pub use diagnostic::Diagnostic; pub use state::State; -pub use line_ending::{ - auto_detect_line_ending, get_line_ending, line_end_char_index, LineEnding, DEFAULT_LINE_ENDING, -}; +pub use line_ending::{LineEnding, DEFAULT_LINE_ENDING}; pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction}; diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index 3055c96ef..19de22317 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -128,6 +128,29 @@ pub fn get_line_ending(line: &RopeSlice) -> Option { LineEnding::from_str(g2).or_else(|| LineEnding::from_str(g1)) } +/// Returns the passed line's line ending, if any. +pub fn get_line_ending_of_str(line: &str) -> Option { + if line.ends_with("\u{000D}\u{000A}") { + Some(LineEnding::Crlf) + } else if line.ends_with("\u{000A}") { + Some(LineEnding::LF) + } else if line.ends_with("\u{000B}") { + Some(LineEnding::VT) + } else if line.ends_with("\u{000C}") { + Some(LineEnding::FF) + } else if line.ends_with("\u{000D}") { + Some(LineEnding::CR) + } else if line.ends_with("\u{0085}") { + Some(LineEnding::Nel) + } else if line.ends_with("\u{2028}") { + Some(LineEnding::LS) + } else if line.ends_with("\u{2029}") { + Some(LineEnding::PS) + } else { + None + } +} + /// Returns the char index of the end of the given line, not including its line ending. pub fn line_end_char_index(slice: &RopeSlice, line: usize) -> usize { slice.line_to_char(line + 1) diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs index d0023e9f8..bfceb4ef5 100644 --- a/helix-core/src/movement.rs +++ b/helix-core/src/movement.rs @@ -7,9 +7,10 @@ use crate::{ categorize_char, char_is_line_ending, char_is_punctuation, char_is_whitespace, char_is_word, CharCategory, }, - coords_at_pos, get_line_ending, + coords_at_pos, graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary}, - line_end_char_index, pos_at_coords, Position, Range, RopeSlice, + line_ending::{get_line_ending, line_end_char_index}, + pos_at_coords, Position, Range, RopeSlice, }; #[derive(Debug, Copy, Clone, PartialEq, Eq)] diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 70441fcfa..3d2a90288 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -1,6 +1,7 @@ use helix_core::{ - comment, coords_at_pos, find_first_non_whitespace_char, find_root, get_line_ending, graphemes, - indent, line_end_char_index, match_brackets, + comment, coords_at_pos, find_first_non_whitespace_char, find_root, graphemes, indent, + line_ending::{get_line_ending, get_line_ending_of_str, line_end_char_index}, + match_brackets, movement::{self, Direction}, object, pos_at_coords, regex::{self, Regex}, @@ -2534,10 +2535,10 @@ fn paste_impl( .unwrap(), ); - // if any of values ends \n it's linewise paste + // if any of values ends with a line ending, it's linewise paste let linewise = values .iter() - .any(|value| value.ends_with(doc.line_ending.as_str())); + .any(|value| get_line_ending_of_str(value).is_some()); let mut values = values.iter().cloned().map(Tendril::from).chain(repeat); diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 945271ead..bd45db5a1 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -7,9 +7,9 @@ use std::str::FromStr; use std::sync::Arc; use helix_core::{ - auto_detect_line_ending, chars::{char_is_line_ending, char_is_whitespace}, history::History, + line_ending::auto_detect_line_ending, syntax::{self, LanguageConfiguration}, ChangeSet, Diagnostic, LineEnding, Rope, Selection, State, Syntax, Transaction, DEFAULT_LINE_ENDING, From 23d618853553afbac78c8d2a4ad048195f3484b8 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Mon, 21 Jun 2021 11:08:05 -0700 Subject: [PATCH 21/26] Update `replace` command to use document line ending setting. --- helix-term/src/commands.rs | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 3d2a90288..7f60f2f92 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -1,13 +1,15 @@ use helix_core::{ comment, coords_at_pos, find_first_non_whitespace_char, find_root, graphemes, indent, - line_ending::{get_line_ending, get_line_ending_of_str, line_end_char_index}, + line_ending::{ + get_line_ending, get_line_ending_of_str, line_end_char_index, str_is_line_ending, + }, match_brackets, movement::{self, Direction}, object, pos_at_coords, regex::{self, Regex}, register::{self, Register, Registers}, - search, selection, Change, ChangeSet, LineEnding, Position, Range, Rope, RopeSlice, Selection, - SmallVec, Tendril, Transaction, DEFAULT_LINE_ENDING, + search, selection, Change, ChangeSet, LineEnding, Position, Range, Rope, RopeGraphemes, + RopeSlice, Selection, SmallVec, Tendril, Transaction, DEFAULT_LINE_ENDING, }; use helix_view::{ @@ -577,32 +579,37 @@ fn extend_first_nonwhitespace(cx: &mut Context) { } fn replace(cx: &mut Context) { + let mut buf = [0u8; 4]; // To hold utf8 encoded char. + // need to wait for next key cx.on_next_key(move |cx, event| { + let (view, doc) = current!(cx.editor); let ch = match event { KeyEvent { code: KeyCode::Char(ch), .. - } => Some(ch), + } => Some(&ch.encode_utf8(&mut buf[..])[..]), KeyEvent { code: KeyCode::Enter, .. - } => Some('\n'), // TODO: use the document's default line ending. + } => Some(doc.line_ending.as_str()), _ => None, }; if let Some(ch) = ch { - let (view, doc) = current!(cx.editor); - let transaction = Transaction::change_by_selection(doc.text(), doc.selection(view.id), |range| { let max_to = doc.text().len_chars().saturating_sub(1); let to = std::cmp::min(max_to, range.to() + 1); - let text: String = doc - .text() - .slice(range.from()..to) - .chars() - .map(|c| if c == '\n' { '\n' } else { ch }) + let text: String = RopeGraphemes::new(doc.text().slice(range.from()..to)) + .map(|g| { + let cow: Cow = g.into(); + if str_is_line_ending(&cow) { + cow + } else { + ch.into() + } + }) .collect(); (range.from(), to, Some(text.into())) From e436c30ed7df303a32455c4cf9e0574ab87c0683 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Mon, 21 Jun 2021 11:22:07 -0700 Subject: [PATCH 22/26] Make split_selection_on_newline command handle all line endings. --- helix-term/src/commands.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 7f60f2f92..dc7f8913b 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -791,7 +791,8 @@ fn split_selection_on_newline(cx: &mut Context) { let text = doc.text().slice(..); // only compile the regex once #[allow(clippy::trivial_regex)] - static REGEX: Lazy = Lazy::new(|| Regex::new(r"\n").unwrap()); + static REGEX: Lazy = + Lazy::new(|| Regex::new(r"\r\n|[\n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}]").unwrap()); let selection = selection::split_on_matches(text, doc.selection(view.id), ®EX); doc.set_selection(view.id, selection); } From d33355650fd53c05b4e3d4e0f421eaf013b5ea1e Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Mon, 21 Jun 2021 11:59:03 -0700 Subject: [PATCH 23/26] Convert remaining commands to use the document's line ending setting. --- helix-term/src/commands.rs | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index dc7f8913b..6ccbaaff2 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -455,13 +455,27 @@ where // need to wait for next key // TODO: should this be done by grapheme rather than char? For example, - // we can't properly handle the line-ending case here in terms of char. + // we can't properly handle the line-ending CRLF case here in terms of char. cx.on_next_key(move |cx, event| { let ch = match event { KeyEvent { code: KeyCode::Enter, .. - } => '\n', + } => + // TODO: this isn't quite correct when CRLF is involved. + // This hack will work in most cases, since documents don't + // usually mix line endings. But we should fix it eventually + // anyway. + { + current!(cx.editor) + .1 + .line_ending + .as_str() + .chars() + .next() + .unwrap() + } + KeyEvent { code: KeyCode::Char(ch), .. @@ -1289,7 +1303,8 @@ mod cmd { } fn yank_joined_to_clipboard(editor: &mut Editor, args: &[&str], _: PromptEvent) { - let separator = args.first().copied().unwrap_or("\n"); + let (_, doc) = current!(editor); + let separator = args.first().copied().unwrap_or(doc.line_ending.as_str()); yank_joined_to_clipboard_impl(editor, separator); } @@ -1745,7 +1760,7 @@ fn open(cx: &mut Context, open: Open) { let indent = doc.indent_unit().repeat(indent_level); let indent_len = indent.len(); let mut text = String::with_capacity(1 + indent_len); - text.push('\n'); + text.push_str(doc.line_ending.as_str()); text.push_str(&indent); let text = text.repeat(count); @@ -2502,7 +2517,8 @@ fn yank_joined_to_clipboard_impl(editor: &mut Editor, separator: &str) { } fn yank_joined_to_clipboard(cx: &mut Context) { - yank_joined_to_clipboard_impl(&mut cx.editor, "\n"); + let line_ending = current!(cx.editor).1.line_ending; + yank_joined_to_clipboard_impl(&mut cx.editor, line_ending.as_str()); } fn yank_main_selection_to_clipboard_impl(editor: &mut Editor) { From 7c4fa18764ed6a6c30125ed6bfc9b025216e9668 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Mon, 21 Jun 2021 12:02:44 -0700 Subject: [PATCH 24/26] Fix clippy warnings. --- helix-core/src/line_ending.rs | 14 +++++++------- helix-term/src/commands.rs | 5 ++++- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index 19de22317..dfc745519 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -132,19 +132,19 @@ pub fn get_line_ending(line: &RopeSlice) -> Option { pub fn get_line_ending_of_str(line: &str) -> Option { if line.ends_with("\u{000D}\u{000A}") { Some(LineEnding::Crlf) - } else if line.ends_with("\u{000A}") { + } else if line.ends_with('\u{000A}') { Some(LineEnding::LF) - } else if line.ends_with("\u{000B}") { + } else if line.ends_with('\u{000B}') { Some(LineEnding::VT) - } else if line.ends_with("\u{000C}") { + } else if line.ends_with('\u{000C}') { Some(LineEnding::FF) - } else if line.ends_with("\u{000D}") { + } else if line.ends_with('\u{000D}') { Some(LineEnding::CR) - } else if line.ends_with("\u{0085}") { + } else if line.ends_with('\u{0085}') { Some(LineEnding::Nel) - } else if line.ends_with("\u{2028}") { + } else if line.ends_with('\u{2028}') { Some(LineEnding::LS) - } else if line.ends_with("\u{2029}") { + } else if line.ends_with('\u{2029}') { Some(LineEnding::PS) } else { None diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 6ccbaaff2..5cb30da6b 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -1304,7 +1304,10 @@ mod cmd { fn yank_joined_to_clipboard(editor: &mut Editor, args: &[&str], _: PromptEvent) { let (_, doc) = current!(editor); - let separator = args.first().copied().unwrap_or(doc.line_ending.as_str()); + let separator = args + .first() + .copied() + .unwrap_or_else(|| doc.line_ending.as_str()); yank_joined_to_clipboard_impl(editor, separator); } From a18d50b77759d6305b99ff59baa1ccaf9d644a88 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Mon, 21 Jun 2021 12:36:01 -0700 Subject: [PATCH 25/26] Add command to set the document's default line ending. --- helix-term/src/commands.rs | 46 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 5cb30da6b..63a4d9011 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -1160,6 +1160,45 @@ mod cmd { } } + /// Sets or reports the current document's line ending setting. + fn set_line_ending(editor: &mut Editor, args: &[&str], event: PromptEvent) { + use LineEnding::*; + + // If no argument, report current line ending setting. + if args.is_empty() { + let line_ending = current!(editor).1.line_ending; + editor.set_status(match line_ending { + Crlf => "crlf".into(), + LF => "line feed".into(), + FF => "form feed".into(), + CR => "carriage return".into(), + Nel => "next line".into(), + + // These should never be a document's default line ending. + VT | LS | PS => "error".into(), + }); + return; + } + + // Attempt to parse argument as a line ending. + let line_ending = match args.get(0) { + // We check for CR first because it shares a common prefix with CRLF. + Some(arg) if "cr".starts_with(&arg.to_lowercase()) => Some(CR), + Some(arg) if "crlf".starts_with(&arg.to_lowercase()) => Some(Crlf), + Some(arg) if "lf".starts_with(&arg.to_lowercase()) => Some(LF), + Some(arg) if "ff".starts_with(&arg.to_lowercase()) => Some(FF), + Some(arg) if "nel".starts_with(&arg.to_lowercase()) => Some(Nel), + _ => None, + }; + + if let Some(le) = line_ending { + doc_mut!(editor).line_ending = le; + } else { + // Invalid argument. + editor.set_error(format!("invalid line ending '{}'", args[0],)); + } + } + fn earlier(editor: &mut Editor, args: &[&str], event: PromptEvent) { let uk = match args.join(" ").parse::() { Ok(uk) => uk, @@ -1392,6 +1431,13 @@ mod cmd { fun: set_indent_style, completer: None, }, + TypableCommand { + name: "line-ending", + alias: None, + doc: "Set the document's default line ending. Options: crlf, lf, cr, ff, nel.", + fun: set_line_ending, + completer: None, + }, TypableCommand { name: "earlier", alias: Some("ear"), From f2954fa153ccb6b147d8d38020341a2f1b0b6df2 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Mon, 21 Jun 2021 12:56:42 -0700 Subject: [PATCH 26/26] Flesh out the line ending utility unit tests. --- helix-core/src/line_ending.rs | 51 +++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index dfc745519..fa33204c0 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -164,7 +164,7 @@ mod line_ending_tests { use super::*; #[test] - fn test_autodetect() { + fn line_ending_autodetect() { assert_eq!( auto_detect_line_ending(&Rope::from_str("\n")), Some(LineEnding::LF) @@ -197,15 +197,56 @@ mod line_ending_tests { } #[test] - fn test_rope_slice_to_line_ending() { - let r = Rope::from_str("\r\n"); + fn str_to_line_ending() { + assert_eq!(LineEnding::from_str("\r"), Some(LineEnding::CR)); + assert_eq!(LineEnding::from_str("\n"), Some(LineEnding::LF)); + assert_eq!(LineEnding::from_str("\r\n"), Some(LineEnding::Crlf)); + assert_eq!(LineEnding::from_str("hello\n"), None); + } + + #[test] + fn rope_slice_to_line_ending() { + let r = Rope::from_str("hello\r\n"); assert_eq!( - LineEnding::from_rope_slice(&r.slice(1..2)), + LineEnding::from_rope_slice(&r.slice(5..6)), + Some(LineEnding::CR) + ); + assert_eq!( + LineEnding::from_rope_slice(&r.slice(6..7)), Some(LineEnding::LF) ); assert_eq!( - LineEnding::from_rope_slice(&r.slice(0..2)), + LineEnding::from_rope_slice(&r.slice(5..7)), Some(LineEnding::Crlf) ); + assert_eq!(LineEnding::from_rope_slice(&r.slice(..)), None); + } + + #[test] + fn get_line_ending_rope_slice() { + let r = Rope::from_str("Hello\rworld\nhow\r\nare you?"); + assert_eq!(get_line_ending(&r.slice(..6)), Some(LineEnding::CR)); + assert_eq!(get_line_ending(&r.slice(..12)), Some(LineEnding::LF)); + assert_eq!(get_line_ending(&r.slice(..17)), Some(LineEnding::Crlf)); + assert_eq!(get_line_ending(&r.slice(..)), None); + } + + #[test] + fn get_line_ending_str() { + let text = "Hello\rworld\nhow\r\nare you?"; + assert_eq!(get_line_ending_of_str(&text[..6]), Some(LineEnding::CR)); + assert_eq!(get_line_ending_of_str(&text[..12]), Some(LineEnding::LF)); + assert_eq!(get_line_ending_of_str(&text[..17]), Some(LineEnding::Crlf)); + assert_eq!(get_line_ending_of_str(&text[..]), None); + } + + #[test] + fn line_end_char_index_rope_slice() { + let r = Rope::from_str("Hello\rworld\nhow\r\nare you?"); + let s = &r.slice(..); + assert_eq!(line_end_char_index(s, 0), 5); + assert_eq!(line_end_char_index(s, 1), 11); + assert_eq!(line_end_char_index(s, 2), 15); + assert_eq!(line_end_char_index(s, 3), 25); } }