use anyhow ::{ anyhow , Context , Error } ;
use serde ::de ::{ self , Deserialize , Deserializer } ;
use std ::cell ::Cell ;
use std ::collections ::HashMap ;
use std ::fmt ::Display ;
use std ::future ::Future ;
use std ::path ::{ Component , Path , PathBuf } ;
use std ::str ::FromStr ;
use std ::sync ::Arc ;
use helix_core ::{
chars ::{ char_is_line_ending , char_is_whitespace } ,
history ::History ,
line_ending ::auto_detect_line_ending ,
syntax ::{ self , LanguageConfiguration } ,
ChangeSet , Diagnostic , LineEnding , Rope , RopeBuilder , Selection , State , Syntax , Transaction ,
DEFAULT_LINE_ENDING ,
} ;
use helix_lsp ::util ::LspFormatting ;
use crate ::{ DocumentId , Theme , ViewId } ;
const BUF_SIZE : usize = 8192 ;
#[ derive(Debug, Copy, Clone, PartialEq, Eq, Hash) ]
pub enum Mode {
Normal ,
Select ,
Insert ,
}
impl Display for Mode {
fn fmt ( & self , f : & mut fmt ::Formatter < ' _ > ) -> fmt ::Result {
match self {
Mode ::Normal = > f . write_str ( "normal" ) ,
Mode ::Select = > f . write_str ( "select" ) ,
Mode ::Insert = > f . write_str ( "insert" ) ,
}
}
}
impl FromStr for Mode {
type Err = Error ;
fn from_str ( s : & str ) -> Result < Self , Self ::Err > {
match s {
"normal" = > Ok ( Mode ::Normal ) ,
"select" = > Ok ( Mode ::Select ) ,
"insert" = > Ok ( Mode ::Insert ) ,
_ = > Err ( anyhow ! ( "Invalid mode '{}'" , s ) ) ,
}
}
}
// toml deserializer doesn't seem to recognize string as enum
impl < ' de > Deserialize < ' de > for Mode {
fn deserialize < D > ( deserializer : D ) -> Result < Self , D ::Error >
where
D : Deserializer < ' de > ,
{
let s = String ::deserialize ( deserializer ) ? ;
s . parse ( ) . map_err ( de ::Error ::custom )
}
}
#[ derive(Debug, Copy, Clone, PartialEq, Eq, Hash) ]
pub enum IndentStyle {
Tabs ,
Spaces ( u8 ) ,
}
pub struct Document {
pub ( crate ) id : DocumentId ,
text : Rope ,
pub ( crate ) selections : HashMap < ViewId , Selection > ,
path : Option < PathBuf > ,
encoding : & ' static encoding_rs ::Encoding ,
/// Current editing mode.
pub mode : Mode ,
pub restore_cursor : bool ,
/// Current indent style.
pub indent_style : IndentStyle ,
/// The document's default line ending.
pub line_ending : LineEnding ,
syntax : Option < Syntax > ,
// /// Corresponding language scope name. Usually `source.<lang>`.
pub ( crate ) language : Option < Arc < LanguageConfiguration > > ,
/// Pending changes since last history commit.
changes : ChangeSet ,
/// State at last commit. Used for calculating reverts.
old_state : Option < State > ,
/// Undo tree.
// It can be used as a cell where we will take it out to get some parts of the history and put
// it back as it separated from the edits. We could split out the parts manually but that will
// be more troublesome.
history : Cell < History > ,
last_saved_revision : usize ,
version : i32 , // should be usize?
diagnostics : Vec < Diagnostic > ,
language_server : Option < Arc < helix_lsp ::Client > > ,
}
use std ::fmt ;
impl fmt ::Debug for Document {
fn fmt ( & self , f : & mut fmt ::Formatter < ' _ > ) -> fmt ::Result {
f . debug_struct ( "Document" )
. field ( "id" , & self . id )
. field ( "text" , & self . text )
. field ( "selections" , & self . selections )
. field ( "path" , & self . path )
. field ( "encoding" , & self . encoding )
. field ( "mode" , & self . mode )
. field ( "restore_cursor" , & self . restore_cursor )
. field ( "syntax" , & self . syntax )
. field ( "language" , & self . language )
. field ( "changes" , & self . changes )
. field ( "old_state" , & self . old_state )
// .field("history", &self.history)
. field ( "last_saved_revision" , & self . last_saved_revision )
. field ( "version" , & self . version )
. field ( "diagnostics" , & self . diagnostics )
// .field("language_server", &self.language_server)
. finish ( )
}
}
// The documentation and implementation of this function should be up-to-date with
// its sibling function, `to_writer()`.
//
/// Decodes a stream of bytes into UTF-8, returning a `Rope` and the
/// encoding it was decoded as. The optional `encoding` parameter can
/// be used to override encoding auto-detection.
pub fn from_reader < R : std ::io ::Read + ? Sized > (
reader : & mut R ,
encoding : Option < & ' static encoding_rs ::Encoding > ,
) -> Result < ( Rope , & ' static encoding_rs ::Encoding ) , Error > {
// These two buffers are 8192 bytes in size each and are used as
// intermediaries during the decoding process. Text read into `buf`
// from `reader` is decoded into `buf_out` as UTF-8. Once either
// `buf_out` is full or the end of the reader was reached, the
// contents are appended to `builder`.
let mut buf = [ 0 u8 ; BUF_SIZE ] ;
let mut buf_out = [ 0 u8 ; BUF_SIZE ] ;
let mut builder = RopeBuilder ::new ( ) ;
// By default, the encoding of the text is auto-detected via the
// `chardetng` crate which requires sample data from the reader.
// As a manual override to this auto-detection is possible, the
// same data is read into `buf` to ensure symmetry in the upcoming
// loop.
let ( encoding , mut decoder , mut slice , mut is_empty ) = {
let read = reader . read ( & mut buf ) ? ;
let is_empty = read = = 0 ;
let encoding = encoding . unwrap_or_else ( | | {
let mut encoding_detector = chardetng ::EncodingDetector ::new ( ) ;
encoding_detector . feed ( & buf , is_empty ) ;
encoding_detector . guess ( None , true )
} ) ;
let decoder = encoding . new_decoder ( ) ;
// If the amount of bytes read from the reader is less than
// `buf.len()`, it is undesirable to read the bytes afterwards.
let slice = & buf [ .. read ] ;
( encoding , decoder , slice , is_empty )
} ;
// `RopeBuilder::append()` expects a `&str`, so this is the "real"
// output buffer. When decoding, the number of bytes in the output
// buffer will often exceed the number of bytes in the input buffer.
// The `result` returned by `decode_to_str()` will state whether or
// not that happened. The contents of `buf_str` is appended to
// `builder` and it is reused for the next iteration of the decoding
// loop.
//
// As it is possible to read less than the buffer's maximum from `read()`
// even when the end of the reader has yet to be reached, the end of
// the reader is determined only when a `read()` call returns `0`.
//
// SAFETY: `buf_out` is a zero-initialized array, thus it will always
// contain valid UTF-8.
let buf_str = unsafe { std ::str ::from_utf8_unchecked_mut ( & mut buf_out [ .. ] ) } ;
let mut total_written = 0 usize ;
loop {
let mut total_read = 0 usize ;
// An inner loop is necessary as it is possible that the input buffer
// may not be completely decoded on the first `decode_to_str()` call
// which would happen in cases where the output buffer is filled to
// capacity.
loop {
let ( result , read , written , .. ) = decoder . decode_to_str (
& slice [ total_read .. ] ,
& mut buf_str [ total_written .. ] ,
is_empty ,
) ;
// These variables act as the read and write cursors of `buf` and `buf_str` respectively.
// They are necessary in case the output buffer fills before decoding of the entire input
// loop is complete. Otherwise, the loop would endlessly iterate over the same `buf` and
// the data inside the output buffer would be overwritten.
total_read + = read ;
total_written + = written ;
match result {
encoding_rs ::CoderResult ::InputEmpty = > {
debug_assert_eq! ( slice . len ( ) , total_read ) ;
break ;
}
encoding_rs ::CoderResult ::OutputFull = > {
debug_assert! ( slice . len ( ) > total_read ) ;
builder . append ( & buf_str [ .. total_written ] ) ;
total_written = 0 ;
}
}
}
// Once the end of the stream is reached, the output buffer is
// flushed and the loop terminates.
if is_empty {
debug_assert_eq! ( reader . read ( & mut buf ) ? , 0 ) ;
builder . append ( & buf_str [ .. total_written ] ) ;
break ;
}
// Once the previous input has been processed and decoded, the next set of
// data is fetched from the reader. The end of the reader is determined to
// be when exactly `0` bytes were read from the reader, as per the invariants
// of the `Read` trait.
let read = reader . read ( & mut buf ) ? ;
slice = & buf [ .. read ] ;
is_empty = read = = 0 ;
}
let rope = builder . finish ( ) ;
Ok ( ( rope , encoding ) )
}
// The documentation and implementation of this function should be up-to-date with
// its sibling function, `from_reader()`.
//
/// Encodes the text inside `rope` into the given `encoding` and writes the
/// encoded output into `writer.` As a `Rope` can only contain valid UTF-8,
/// replacement characters may appear in the encoded text.
pub async fn to_writer < ' a , W : tokio ::io ::AsyncWriteExt + Unpin + ? Sized > (
writer : & ' a mut W ,
encoding : & ' static encoding_rs ::Encoding ,
rope : & ' a Rope ,
) -> Result < ( ) , Error > {
// Text inside a `Rope` is stored as non-contiguous blocks of data called
// chunks. The absolute size of each chunk is unknown, thus it is impossible
// to predict the end of the chunk iterator ahead of time. Instead, it is
// determined by filtering the iterator to remove all empty chunks and then
// appending an empty chunk to it. This is valuable for detecting when all
// chunks in the `Rope` have been iterated over in the subsequent loop.
let iter = rope
. chunks ( )
. filter ( | c | ! c . is_empty ( ) )
. chain ( std ::iter ::once ( "" ) ) ;
let mut buf = [ 0 u8 ; BUF_SIZE ] ;
let mut encoder = encoding . new_encoder ( ) ;
let mut total_written = 0 usize ;
for chunk in iter {
let is_empty = chunk . is_empty ( ) ;
let mut total_read = 0 usize ;
// An inner loop is necessary as it is possible that the input buffer
// may not be completely encoded on the first `encode_from_utf8()` call
// which would happen in cases where the output buffer is filled to
// capacity.
loop {
let ( result , read , written , .. ) =
encoder . encode_from_utf8 ( & chunk [ total_read .. ] , & mut buf [ total_written .. ] , is_empty ) ;
// These variables act as the read and write cursors of `chunk` and `buf` respectively.
// They are necessary in case the output buffer fills before encoding of the entire input
// loop is complete. Otherwise, the loop would endlessly iterate over the same `chunk` and
// the data inside the output buffer would be overwritten.
total_read + = read ;
total_written + = written ;
match result {
encoding_rs ::CoderResult ::InputEmpty = > {
debug_assert_eq! ( chunk . len ( ) , total_read ) ;
debug_assert! ( buf . len ( ) > = total_written ) ;
break ;
}
encoding_rs ::CoderResult ::OutputFull = > {
debug_assert! ( chunk . len ( ) > total_read ) ;
writer . write_all ( & buf [ .. total_written ] ) . await ? ;
total_written = 0 ;
}
}
}
// Once the end of the iterator is reached, the output buffer is
// flushed and the outer loop terminates.
if is_empty {
writer . write_all ( & buf [ .. total_written ] ) . await ? ;
writer . flush ( ) . await ? ;
break ;
}
}
Ok ( ( ) )
}
/// Inserts the final line ending into `rope` if it's missing. [Why?](https://stackoverflow.com/questions/729692/why-should-text-files-end-with-a-newline)
pub fn with_line_ending ( rope : & mut Rope ) -> LineEnding {
// search for line endings
let line_ending = auto_detect_line_ending ( rope ) . unwrap_or ( DEFAULT_LINE_ENDING ) ;
// add missing newline at the end of file
if rope . len_bytes ( ) = = 0 | | ! char_is_line_ending ( rope . char ( rope . len_chars ( ) - 1 ) ) {
rope . insert ( rope . len_chars ( ) , line_ending . as_str ( ) ) ;
}
line_ending
}
/// Like std::mem::replace() except it allows the replacement value to be mapped from the
/// original value.
fn take_with < T , F > ( mut_ref : & mut T , closure : F )
where
F : FnOnce ( T ) -> T ,
{
use std ::{ panic , ptr } ;
unsafe {
let old_t = ptr ::read ( mut_ref ) ;
let new_t = panic ::catch_unwind ( panic ::AssertUnwindSafe ( | | closure ( old_t ) ) )
. unwrap_or_else ( | _ | ::std ::process ::abort ( ) ) ;
ptr ::write ( mut_ref , new_t ) ;
}
}
/// Expands tilde `~` into users home directory if avilable, otherwise returns the path
/// unchanged. The tilde will only be expanded when present as the first component of the path
/// and only slash follows it.
pub fn expand_tilde ( path : & Path ) -> PathBuf {
let mut components = path . components ( ) . peekable ( ) ;
if let Some ( Component ::Normal ( c ) ) = components . peek ( ) {
if c = = & "~" {
if let Ok ( home ) = helix_core ::home_dir ( ) {
// it's ok to unwrap, the path starts with `~`
return home . join ( path . strip_prefix ( "~" ) . unwrap ( ) ) ;
}
}
}
path . to_path_buf ( )
}
/// Replaces users home directory from `path` with tilde `~` if the directory
/// is available, otherwise returns the path unchanged.
pub fn fold_home_dir ( path : & Path ) -> PathBuf {
if let Ok ( home ) = helix_core ::home_dir ( ) {
if path . starts_with ( & home ) {
// it's ok to unwrap, the path starts with home dir
return PathBuf ::from ( "~" ) . join ( path . strip_prefix ( & home ) . unwrap ( ) ) ;
}
}
path . to_path_buf ( )
}
/// Normalize a path, removing things like `.` and `..`.
///
/// CAUTION: This does not resolve symlinks (unlike
/// [`std::fs::canonicalize`]). This may cause incorrect or surprising
/// behavior at times. This should be used carefully. Unfortunately,
/// [`std::fs::canonicalize`] can be hard to use correctly, since it can often
/// fail, or on Windows returns annoying device paths. This is a problem Cargo
/// needs to improve on.
/// Copied from cargo: <https://github.com/rust-lang/cargo/blob/070e459c2d8b79c5b2ac5218064e7603329c92ae/crates/cargo-util/src/paths.rs#L81>
pub fn normalize_path ( path : & Path ) -> PathBuf {
let path = expand_tilde ( path ) ;
let mut components = path . components ( ) . peekable ( ) ;
let mut ret = if let Some ( c @ Component ::Prefix ( .. ) ) = components . peek ( ) . cloned ( ) {
components . next ( ) ;
PathBuf ::from ( c . as_os_str ( ) )
} else {
PathBuf ::new ( )
} ;
for component in components {
match component {
Component ::Prefix ( .. ) = > unreachable! ( ) ,
Component ::RootDir = > {
ret . push ( component . as_os_str ( ) ) ;
}
Component ::CurDir = > { }
Component ::ParentDir = > {
ret . pop ( ) ;
}
Component ::Normal ( c ) = > {
ret . push ( c ) ;
}
}
}
ret
}
/// Returns the canonical, absolute form of a path with all intermediate components normalized.
///
/// This function is used instead of `std::fs::canonicalize` because we don't want to verify
/// here if the path exists, just normalize it's components.
pub fn canonicalize_path ( path : & Path ) -> std ::io ::Result < PathBuf > {
let path = if path . is_relative ( ) {
std ::env ::current_dir ( ) . map ( | current_dir | current_dir . join ( path ) ) ?
} else {
path . to_path_buf ( )
} ;
Ok ( normalize_path ( & path ) )
}
use helix_lsp ::lsp ;
use url ::Url ;
impl Document {
pub fn from ( text : Rope , encoding : Option < & ' static encoding_rs ::Encoding > ) -> Self {
let encoding = encoding . unwrap_or ( encoding_rs ::UTF_8 ) ;
let changes = ChangeSet ::new ( & text ) ;
let old_state = None ;
Self {
id : DocumentId ::default ( ) ,
path : None ,
encoding ,
text ,
selections : HashMap ::default ( ) ,
indent_style : IndentStyle ::Spaces ( 4 ) ,
mode : Mode ::Normal ,
restore_cursor : false ,
syntax : None ,
language : None ,
changes ,
old_state ,
diagnostics : Vec ::new ( ) ,
version : 0 ,
history : Cell ::new ( History ::default ( ) ) ,
last_saved_revision : 0 ,
language_server : None ,
line_ending : DEFAULT_LINE_ENDING ,
}
}
// TODO: async fn?
/// Create a new document from `path`. Encoding is auto-detected, but it can be manually
/// overwritten with the `encoding` parameter.
pub fn open (
path : PathBuf ,
encoding : Option < & ' static encoding_rs ::Encoding > ,
theme : Option < & Theme > ,
config_loader : Option < & syntax ::Loader > ,
) -> Result < Self , Error > {
if ! path . exists ( ) {
return Ok ( Self ::default ( ) ) ;
}
let mut file = std ::fs ::File ::open ( & path ) . context ( format! ( "unable to open {:?}" , path ) ) ? ;
let ( mut rope , encoding ) = from_reader ( & mut file , encoding ) ? ;
let line_ending = with_line_ending ( & mut rope ) ;
let mut doc = Self ::from ( rope , Some ( encoding ) ) ;
// set the path and try detecting the language
doc . set_path ( & path ) ? ;
if let Some ( loader ) = config_loader {
doc . detect_language ( theme , loader ) ;
}
// Detect indentation style and set line ending.
doc . detect_indent_style ( ) ;
doc . line_ending = line_ending ;
Ok ( doc )
}
/// The same as [`format`], but only returns formatting changes if auto-formatting
/// is configured.
pub fn auto_format ( & self ) -> Option < impl Future < Output = LspFormatting > + ' static > {
if self . language_config ( ) . map ( | c | c . auto_format ) = = Some ( true ) {
self . format ( )
} else {
None
}
}
/// If supported, returns the changes that should be applied to this document in order
/// to format it nicely.
pub fn format ( & self ) -> Option < impl Future < Output = LspFormatting > + ' static > {
if let Some ( language_server ) = self . language_server . clone ( ) {
let text = self . text . clone ( ) ;
let id = self . identifier ( ) ;
let fut = async move {
let edits = language_server
. text_document_formatting ( id , lsp ::FormattingOptions ::default ( ) , None )
. await
. unwrap_or_else ( | e | {
log ::warn ! ( "LSP formatting failed: {}" , e ) ;
Default ::default ( )
} ) ;
LspFormatting {
doc : text ,
edits ,
offset_encoding : language_server . offset_encoding ( ) ,
}
} ;
Some ( fut )
} else {
None
}
}
pub fn save ( & mut self ) -> impl Future < Output = Result < ( ) , anyhow ::Error > > {
self . save_impl ::< futures_util ::future ::Ready < _ > > ( None )
}
pub fn format_and_save (
& mut self ,
formatting : Option < impl Future < Output = LspFormatting > > ,
) -> impl Future < Output = anyhow ::Result < ( ) > > {
self . save_impl ( formatting )
}
// TODO: do we need some way of ensuring two save operations on the same doc can't run at once?
// or is that handled by the OS/async layer
/// The `Document`'s text is encoded according to its encoding and written to the file located
/// at its `path()`.
///
/// If `formatting` is present, it supplies some changes that we apply to the text before saving.
fn save_impl < F : Future < Output = LspFormatting > > (
& mut self ,
formatting : Option < F > ,
) -> impl Future < Output = Result < ( ) , anyhow ::Error > > {
// we clone and move text + path into the future so that we asynchronously save the current
// state without blocking any further edits.
let mut text = self . text ( ) . clone ( ) ;
let path = self . path . clone ( ) . expect ( "Can't save with no path set!" ) ; // TODO: handle no path
let identifier = self . identifier ( ) ;
// TODO: mark changes up to now as saved
let language_server = self . language_server . clone ( ) ;
self . reset_modified ( ) ;
let encoding = self . encoding ;
// We encode the file according to the `Document`'s encoding.
async move {
use tokio ::fs ::File ;
if let Some ( parent ) = path . parent ( ) {
// TODO: display a prompt asking the user if the directories should be created
if ! parent . exists ( ) {
return Err ( Error ::msg (
"can't save file, parent directory does not exist" ,
) ) ;
}
}
if let Some ( fmt ) = formatting {
let success = Transaction ::from ( fmt . await ) . changes ( ) . apply ( & mut text ) ;
if ! success {
// This shouldn't happen, because the transaction changes were generated
// from the same text we're saving.
log ::error ! ( "failed to apply format changes before saving" ) ;
}
}
let mut file = File ::create ( path ) . await ? ;
to_writer ( & mut file , encoding , & text ) . await ? ;
if let Some ( language_server ) = language_server {
language_server
. text_document_did_save ( identifier , & text )
. await ? ;
}
Ok ( ( ) )
}
}
pub fn detect_language ( & mut self , theme : Option < & Theme > , config_loader : & syntax ::Loader ) {
if let Some ( path ) = & self . path {
let language_config = config_loader . language_config_for_file_name ( path ) ;
self . set_language ( theme , language_config ) ;
}
}
/// Reload the document from its path.
pub fn reload ( & mut self , view_id : ViewId ) -> Result < ( ) , Error > {
let encoding = & self . encoding ;
let path = self . path ( ) . filter ( | path | path . exists ( ) ) ;
// If there is no path or the path no longer exists.
if path . is_none ( ) {
return Err ( anyhow ! ( "can't find file to reload from" ) ) ;
}
let mut file = std ::fs ::File ::open ( path . unwrap ( ) ) ? ;
let ( mut rope , .. ) = from_reader ( & mut file , Some ( encoding ) ) ? ;
let line_ending = with_line_ending ( & mut rope ) ;
let transaction = helix_core ::diff ::compare_ropes ( self . text ( ) , & rope ) ;
self . apply ( & transaction , view_id ) ;
self . append_changes_to_history ( view_id ) ;
// Detect indentation style and set line ending.
self . detect_indent_style ( ) ;
self . line_ending = line_ending ;
Ok ( ( ) )
}
/// Sets the [`Document`]'s encoding with the encoding correspondent to `label`.
pub fn set_encoding ( & mut self , label : & str ) -> Result < ( ) , Error > {
match encoding_rs ::Encoding ::for_label ( label . as_bytes ( ) ) {
Some ( encoding ) = > self . encoding = encoding ,
None = > return Err ( anyhow ::anyhow ! ( "unknown encoding" ) ) ,
}
Ok ( ( ) )
}
/// Returns the [`Document`]'s current encoding.
pub fn encoding ( & self ) -> & ' static encoding_rs ::Encoding {
self . encoding
}
fn detect_indent_style ( & mut self ) {
// Build a histogram of the indentation *increases* between
// subsequent lines, ignoring lines that are all whitespace.
//
// Index 0 is for tabs, the rest are 1-8 spaces.
let histogram : [ usize ; 9 ] = {
let mut histogram = [ 0 ; 9 ] ;
let mut prev_line_is_tabs = false ;
let mut prev_line_leading_count = 0 usize ;
// Loop through the lines, checking for and recording indentation
// increases as we go.
' outer : for line in self . text . lines ( ) . take ( 1000 ) {
let mut c_iter = line . chars ( ) ;
// Is first character a tab or space?
let is_tabs = match c_iter . next ( ) {
Some ( '\t' ) = > true ,
Some ( ' ' ) = > false ,
// Ignore blank lines.
Some ( c ) if char_is_line_ending ( c ) = > continue ,
_ = > {
prev_line_is_tabs = false ;
prev_line_leading_count = 0 ;
continue ;
}
} ;
// Count the line's total leading tab/space characters.
let mut leading_count = 1 ;
let mut count_is_done = false ;
for c in c_iter {
match c {
'\t' if is_tabs & & ! count_is_done = > leading_count + = 1 ,
' ' if ! is_tabs & & ! count_is_done = > leading_count + = 1 ,
// We stop counting if we hit whitespace that doesn't
// qualify as indent or doesn't match the leading
// whitespace, but we don't exit the loop yet because
// we still want to determine if the line is blank.
c if char_is_whitespace ( c ) = > count_is_done = true ,
// Ignore blank lines.
c if char_is_line_ending ( c ) = > continue 'outer ,
_ = > break ,
}
// Bound the worst-case execution time for weird text files.
if leading_count > 256 {
continue 'outer ;
}
}
// If there was an increase in indentation over the previous
// line, update the histogram with that increase.
if ( prev_line_is_tabs = = is_tabs | | prev_line_leading_count = = 0 )
& & prev_line_leading_count < leading_count
{
if is_tabs {
histogram [ 0 ] + = 1 ;
} else {
let amount = leading_count - prev_line_leading_count ;
if amount < = 8 {
histogram [ amount ] + = 1 ;
}
}
}
// Store this line's leading whitespace info for use with
// the next line.
prev_line_is_tabs = is_tabs ;
prev_line_leading_count = leading_count ;
}
// Give more weight to tabs, because their presence is a very
// strong indicator.
histogram [ 0 ] * = 2 ;
histogram
} ;
// Find the most frequent indent, its frequency, and the frequency of
// the next-most frequent indent.
let indent = histogram
. iter ( )
. enumerate ( )
. max_by_key ( | kv | kv . 1 )
. unwrap ( )
. 0 ;
let indent_freq = histogram [ indent ] ;
let indent_freq_2 = * histogram
. iter ( )
. enumerate ( )
. filter ( | kv | kv . 0 ! = indent )
. map ( | kv | kv . 1 )
. max ( )
. unwrap ( ) ;
// Use the auto-detected result if we're confident enough in its
// accuracy, based on some heuristics. Otherwise fall back to
// the language-based setting.
if indent_freq > = 1 & & ( indent_freq_2 as f64 / indent_freq as f64 ) < 0.66 {
// Use the auto-detected setting.
self . indent_style = match indent {
0 = > IndentStyle ::Tabs ,
_ = > IndentStyle ::Spaces ( indent as u8 ) ,
} ;
} else {
// Fall back to language-based setting.
let indent = self
. language
. as_ref ( )
. and_then ( | config | config . indent . as_ref ( ) )
. map_or ( " " , | config | config . unit . as_str ( ) ) ; // fallback to 2 spaces
self . indent_style = if indent . starts_with ( ' ' ) {
IndentStyle ::Spaces ( indent . len ( ) as u8 )
} else {
IndentStyle ::Tabs
} ;
}
}
pub fn set_path ( & mut self , path : & Path ) -> Result < ( ) , std ::io ::Error > {
let path = canonicalize_path ( path ) ? ;
// if parent doesn't exist we still want to open the document
// and error out when document is saved
self . path = Some ( path ) ;
Ok ( ( ) )
}
pub fn set_language (
& mut self ,
theme : Option < & Theme > ,
language_config : Option < Arc < helix_core ::syntax ::LanguageConfiguration > > ,
) {
if let Some ( language_config ) = language_config {
let scopes = theme . map ( | theme | theme . scopes ( ) ) . unwrap_or ( & [ ] ) ;
if let Some ( highlight_config ) = language_config . highlight_config ( scopes ) {
let syntax = Syntax ::new ( & self . text , highlight_config ) ;
self . syntax = Some ( syntax ) ;
// TODO: config.configure(scopes) is now delayed, is that ok?
}
self . language = Some ( language_config ) ;
} else {
self . syntax = None ;
self . language = None ;
} ;
}
pub fn set_language2 (
& mut self ,
scope : & str ,
theme : Option < & Theme > ,
config_loader : Arc < syntax ::Loader > ,
) {
let language_config = config_loader . language_config_for_scope ( scope ) ;
self . set_language ( theme , language_config ) ;
}
pub fn set_language_server ( & mut self , language_server : Option < Arc < helix_lsp ::Client > > ) {
self . language_server = language_server ;
}
pub fn set_selection ( & mut self , view_id : ViewId , selection : Selection ) {
// TODO: use a transaction?
self . selections . insert ( view_id , selection ) ;
}
fn apply_impl ( & mut self , transaction : & Transaction , view_id : ViewId ) -> bool {
let old_doc = self . text ( ) . clone ( ) ;
let success = transaction . changes ( ) . apply ( & mut self . text ) ;
if success {
// update the selection: either take the selection specified in the transaction, or map the
// current selection through changes.
let selection = transaction
. selection ( )
. cloned ( )
. unwrap_or_else ( | | self . selection ( view_id ) . clone ( ) . map ( transaction . changes ( ) ) ) ;
self . set_selection ( view_id , selection ) ;
}
if ! transaction . changes ( ) . is_empty ( ) {
self . version + = 1 ;
// update tree-sitter syntax tree
if let Some ( syntax ) = & mut self . syntax {
// TODO: no unwrap
syntax
. update ( & old_doc , & self . text , transaction . changes ( ) )
. unwrap ( ) ;
}
// map state.diagnostics over changes::map_pos too
// NOTE: seems to do nothing since the language server resends diagnostics on each edit
// for diagnostic in &mut self.diagnostics {
// use helix_core::Assoc;
// let changes = transaction.changes();
// diagnostic.range.start = changes.map_pos(diagnostic.range.start, Assoc::After);
// diagnostic.range.end = changes.map_pos(diagnostic.range.end, Assoc::After);
// diagnostic.line = self.text.char_to_line(diagnostic.range.start);
// }
// emit lsp notification
if let Some ( language_server ) = & self . language_server {
let notify = language_server . text_document_did_change (
self . versioned_identifier ( ) ,
& old_doc ,
self . text ( ) ,
transaction . changes ( ) ,
) ;
if let Some ( notify ) = notify {
tokio ::spawn ( notify ) ;
} //.expect("failed to emit textDocument/didChange");
}
}
success
}
pub fn apply ( & mut self , transaction : & Transaction , view_id : ViewId ) -> bool {
// store the state just before any changes are made. This allows us to undo to the
// state just before a transaction was applied.
if self . changes . is_empty ( ) & & ! transaction . changes ( ) . is_empty ( ) {
self . old_state = Some ( State {
doc : self . text . clone ( ) ,
selection : self . selection ( view_id ) . clone ( ) ,
} ) ;
}
let success = self . apply_impl ( transaction , view_id ) ;
if ! transaction . changes ( ) . is_empty ( ) {
// Compose this transaction with the previous one
take_with ( & mut self . changes , | changes | {
changes . compose ( transaction . changes ( ) . clone ( ) )
} ) ;
}
success
}
pub fn undo ( & mut self , view_id : ViewId ) {
let mut history = self . history . take ( ) ;
let success = if let Some ( transaction ) = history . undo ( ) {
self . apply_impl ( transaction , view_id )
} else {
false
} ;
self . history . set ( history ) ;
if success {
// reset changeset to fix len
self . changes = ChangeSet ::new ( self . text ( ) ) ;
}
}
pub fn redo ( & mut self , view_id : ViewId ) {
let mut history = self . history . take ( ) ;
let success = if let Some ( transaction ) = history . redo ( ) {
self . apply_impl ( transaction , view_id )
} else {
false
} ;
self . history . set ( history ) ;
if success {
// reset changeset to fix len
self . changes = ChangeSet ::new ( self . text ( ) ) ;
}
}
pub fn earlier ( & mut self , view_id : ViewId , uk : helix_core ::history ::UndoKind ) {
let txns = self . history . get_mut ( ) . earlier ( uk ) ;
for txn in txns {
self . apply_impl ( & txn , view_id ) ;
}
}
pub fn later ( & mut self , view_id : ViewId , uk : helix_core ::history ::UndoKind ) {
let txns = self . history . get_mut ( ) . later ( uk ) ;
for txn in txns {
self . apply_impl ( & txn , view_id ) ;
}
}
/// Commit pending changes to history
pub fn append_changes_to_history ( & mut self , view_id : ViewId ) {
if self . changes . is_empty ( ) {
return ;
}
let new_changeset = ChangeSet ::new ( self . text ( ) ) ;
let changes = std ::mem ::replace ( & mut self . changes , new_changeset ) ;
// Instead of doing this messy merge we could always commit, and based on transaction
// annotations either add a new layer or compose into the previous one.
let transaction =
Transaction ::from ( changes ) . with_selection ( self . selection ( view_id ) . clone ( ) ) ;
// HAXX: we need to reconstruct the state as it was before the changes..
let old_state = self . old_state . take ( ) . expect ( "no old_state available" ) ;
let mut history = self . history . take ( ) ;
history . commit_revision ( & transaction , & old_state ) ;
self . history . set ( history ) ;
}
pub fn id ( & self ) -> DocumentId {
self . id
}
pub fn is_modified ( & self ) -> bool {
let history = self . history . take ( ) ;
let current_revision = history . current_revision ( ) ;
self . history . set ( history ) ;
current_revision ! = self . last_saved_revision | | ! self . changes . is_empty ( )
}
pub fn reset_modified ( & mut self ) {
let history = self . history . take ( ) ;
let current_revision = history . current_revision ( ) ;
self . history . set ( history ) ;
self . last_saved_revision = current_revision ;
}
pub fn mode ( & self ) -> Mode {
self . mode
}
/// Corresponding language scope name. Usually `source.<lang>`.
pub fn language ( & self ) -> Option < & str > {
self . language
. as_ref ( )
. map ( | language | language . scope . as_str ( ) )
}
pub fn language_config ( & self ) -> Option < & LanguageConfiguration > {
self . language . as_deref ( )
}
/// Current document version, incremented at each change.
pub fn version ( & self ) -> i32 {
self . version
}
#[ inline ]
pub fn language_server ( & self ) -> Option < & helix_lsp ::Client > {
self . language_server . as_deref ( )
}
#[ inline ]
/// Tree-sitter AST tree
pub fn syntax ( & self ) -> Option < & Syntax > {
self . syntax . as_ref ( )
}
/// Tab size in columns.
pub fn tab_width ( & self ) -> usize {
self . language
. as_ref ( )
. and_then ( | config | config . indent . as_ref ( ) )
. map_or ( 4 , | config | config . tab_width ) // fallback to 4 columns
}
/// Returns a string containing a single level of indentation.
///
/// TODO: we might not need this function anymore, since the information
/// is conveniently available in `Document::indent_style` now.
pub fn indent_unit ( & self ) -> & ' static str {
match self . indent_style {
IndentStyle ::Tabs = > "\t" ,
IndentStyle ::Spaces ( 1 ) = > " " ,
IndentStyle ::Spaces ( 2 ) = > " " ,
IndentStyle ::Spaces ( 3 ) = > " " ,
IndentStyle ::Spaces ( 4 ) = > " " ,
IndentStyle ::Spaces ( 5 ) = > " " ,
IndentStyle ::Spaces ( 6 ) = > " " ,
IndentStyle ::Spaces ( 7 ) = > " " ,
IndentStyle ::Spaces ( 8 ) = > " " ,
// Unsupported indentation style. This should never happen,
// but just in case fall back to two spaces.
_ = > " " ,
}
}
#[ inline ]
/// File path on disk.
pub fn path ( & self ) -> Option < & PathBuf > {
self . path . as_ref ( )
}
pub fn url ( & self ) -> Option < Url > {
self . path ( ) . map ( | path | Url ::from_file_path ( path ) . unwrap ( ) )
}
#[ inline ]
pub fn text ( & self ) -> & Rope {
& self . text
}
#[ inline ]
pub fn selection ( & self , view_id : ViewId ) -> & Selection {
& self . selections [ & view_id ]
}
pub fn relative_path ( & self ) -> Option < PathBuf > {
let cwdir = std ::env ::current_dir ( ) . expect ( "couldn't determine current directory" ) ;
self . path . as_ref ( ) . map ( | path | {
let mut path = path . as_path ( ) ;
if path . is_absolute ( ) {
path = path . strip_prefix ( cwdir ) . unwrap_or ( path )
} ;
fold_home_dir ( path )
} )
}
// pub fn slice<R>(&self, range: R) -> RopeSlice where R: RangeBounds {
// self.state.doc.slice
// }
// transact(Fn) ?
// -- LSP methods
#[ inline ]
pub fn identifier ( & self ) -> lsp ::TextDocumentIdentifier {
lsp ::TextDocumentIdentifier ::new ( self . url ( ) . unwrap ( ) )
}
pub fn versioned_identifier ( & self ) -> lsp ::VersionedTextDocumentIdentifier {
lsp ::VersionedTextDocumentIdentifier ::new ( self . url ( ) . unwrap ( ) , self . version )
}
#[ inline ]
pub fn diagnostics ( & self ) -> & [ Diagnostic ] {
& self . diagnostics
}
pub fn set_diagnostics ( & mut self , diagnostics : Vec < Diagnostic > ) {
self . diagnostics = diagnostics ;
}
}
impl Default for Document {
fn default ( ) -> Self {
let text = Rope ::from ( DEFAULT_LINE_ENDING . as_str ( ) ) ;
Self ::from ( text , None )
}
}
#[ cfg(test) ]
mod test {
use super ::* ;
#[ test ]
fn changeset_to_changes ( ) {
use helix_lsp ::{ lsp , Client , OffsetEncoding } ;
let text = Rope ::from ( "hello" ) ;
let mut doc = Document ::from ( text , None ) ;
let view = ViewId ::default ( ) ;
doc . set_selection ( view , Selection ::single ( 5 , 5 ) ) ;
// insert
let transaction = Transaction ::insert ( doc . text ( ) , doc . selection ( view ) , " world" . into ( ) ) ;
let old_doc = doc . text ( ) . clone ( ) ;
doc . apply ( & transaction , view ) ;
let changes = Client ::changeset_to_changes (
& old_doc ,
doc . text ( ) ,
transaction . changes ( ) ,
OffsetEncoding ::Utf8 ,
) ;
assert_eq! (
changes ,
& [ lsp ::TextDocumentContentChangeEvent {
range : Some ( lsp ::Range ::new (
lsp ::Position ::new ( 0 , 5 ) ,
lsp ::Position ::new ( 0 , 5 )
) ) ,
text : " world" . into ( ) ,
range_length : None ,
} ]
) ;
// delete
let transaction = transaction . invert ( & old_doc ) ;
let old_doc = doc . text ( ) . clone ( ) ;
doc . apply ( & transaction , view ) ;
let changes = Client ::changeset_to_changes (
& old_doc ,
doc . text ( ) ,
transaction . changes ( ) ,
OffsetEncoding ::Utf8 ,
) ;
// line: 0-based.
// col: 0-based, gaps between chars.
// 0 1 2 3 4 5 6 7 8 9 0 1
// |h|e|l|l|o| |w|o|r|l|d|
// -------------
// (0, 5)-(0, 11)
assert_eq! (
changes ,
& [ lsp ::TextDocumentContentChangeEvent {
range : Some ( lsp ::Range ::new (
lsp ::Position ::new ( 0 , 5 ) ,
lsp ::Position ::new ( 0 , 11 )
) ) ,
text : "" . into ( ) ,
range_length : None ,
} ]
) ;
// replace
// also tests that changes are layered, positions depend on previous changes.
doc . set_selection ( view , Selection ::single ( 0 , 5 ) ) ;
let transaction = Transaction ::change (
doc . text ( ) ,
vec! [ ( 0 , 2 , Some ( "aei" . into ( ) ) ) , ( 3 , 5 , Some ( "ou" . into ( ) ) ) ] . into_iter ( ) ,
) ;
// aeilou
let old_doc = doc . text ( ) . clone ( ) ;
doc . apply ( & transaction , view ) ;
let changes = Client ::changeset_to_changes (
& old_doc ,
doc . text ( ) ,
transaction . changes ( ) ,
OffsetEncoding ::Utf8 ,
) ;
assert_eq! (
changes ,
& [
// 0 1 2 3 4 5
// |h|e|l|l|o|
// ----
//
// aeillo
lsp ::TextDocumentContentChangeEvent {
range : Some ( lsp ::Range ::new (
lsp ::Position ::new ( 0 , 0 ) ,
lsp ::Position ::new ( 0 , 2 )
) ) ,
text : "aei" . into ( ) ,
range_length : None ,
} ,
// 0 1 2 3 4 5 6
// |a|e|i|l|l|o|
// -----
//
// aeilou
lsp ::TextDocumentContentChangeEvent {
range : Some ( lsp ::Range ::new (
lsp ::Position ::new ( 0 , 4 ) ,
lsp ::Position ::new ( 0 , 6 )
) ) ,
text : "ou" . into ( ) ,
range_length : None ,
}
]
) ;
}
#[ test ]
fn test_line_ending ( ) {
if cfg! ( windows ) {
assert_eq! ( Document ::default ( ) . text ( ) . to_string ( ) , "\r\n" ) ;
} else {
assert_eq! ( Document ::default ( ) . text ( ) . to_string ( ) , "\n" ) ;
}
}
macro_rules! test_decode {
( $label :expr , $label_override :expr ) = > {
let encoding = encoding_rs ::Encoding ::for_label ( $label_override . as_bytes ( ) ) . unwrap ( ) ;
let base_path = PathBuf ::from ( env! ( "CARGO_MANIFEST_DIR" ) ) . join ( "tests/encoding" ) ;
let path = base_path . join ( format! ( "{}_in.txt" , $label ) ) ;
let ref_path = base_path . join ( format! ( "{}_in_ref.txt" , $label ) ) ;
assert! ( path . exists ( ) ) ;
assert! ( ref_path . exists ( ) ) ;
let mut file = std ::fs ::File ::open ( path ) . unwrap ( ) ;
let text = from_reader ( & mut file , Some ( encoding ) )
. unwrap ( )
. 0
. to_string ( ) ;
let expectation = std ::fs ::read_to_string ( ref_path ) . unwrap ( ) ;
assert_eq! ( text [ .. ] , expectation [ .. ] ) ;
} ;
}
macro_rules! test_encode {
( $label :expr , $label_override :expr ) = > {
let encoding = encoding_rs ::Encoding ::for_label ( $label_override . as_bytes ( ) ) . unwrap ( ) ;
let base_path = PathBuf ::from ( env! ( "CARGO_MANIFEST_DIR" ) ) . join ( "tests/encoding" ) ;
let path = base_path . join ( format! ( "{}_out.txt" , $label ) ) ;
let ref_path = base_path . join ( format! ( "{}_out_ref.txt" , $label ) ) ;
assert! ( path . exists ( ) ) ;
assert! ( ref_path . exists ( ) ) ;
let text = Rope ::from_str ( & std ::fs ::read_to_string ( path ) . unwrap ( ) ) ;
let mut buf : Vec < u8 > = Vec ::new ( ) ;
helix_lsp ::block_on ( to_writer ( & mut buf , encoding , & text ) ) . unwrap ( ) ;
let expectation = std ::fs ::read ( ref_path ) . unwrap ( ) ;
assert_eq! ( buf , expectation ) ;
} ;
}
macro_rules! test_decode_fn {
( $name :ident , $label :expr , $label_override :expr ) = > {
#[ test ]
fn $name ( ) {
test_decode ! ( $label , $label_override ) ;
}
} ;
( $name :ident , $label :expr ) = > {
#[ test ]
fn $name ( ) {
test_decode ! ( $label , $label ) ;
}
} ;
}
macro_rules! test_encode_fn {
( $name :ident , $label :expr , $label_override :expr ) = > {
#[ test ]
fn $name ( ) {
test_encode ! ( $label , $label_override ) ;
}
} ;
( $name :ident , $label :expr ) = > {
#[ test ]
fn $name ( ) {
test_encode ! ( $label , $label ) ;
}
} ;
}
test_decode_fn ! ( test_big5_decode , "big5" ) ;
test_encode_fn ! ( test_big5_encode , "big5" ) ;
test_decode_fn ! ( test_euc_kr_decode , "euc_kr" , "EUC-KR" ) ;
test_encode_fn ! ( test_euc_kr_encode , "euc_kr" , "EUC-KR" ) ;
test_decode_fn ! ( test_gb18030_decode , "gb18030" ) ;
test_encode_fn ! ( test_gb18030_encode , "gb18030" ) ;
test_decode_fn ! ( test_iso_2022_jp_decode , "iso_2022_jp" , "ISO-2022-JP" ) ;
test_encode_fn ! ( test_iso_2022_jp_encode , "iso_2022_jp" , "ISO-2022-JP" ) ;
test_decode_fn ! ( test_jis0208_decode , "jis0208" , "EUC-JP" ) ;
test_encode_fn ! ( test_jis0208_encode , "jis0208" , "EUC-JP" ) ;
test_decode_fn ! ( test_jis0212_decode , "jis0212" , "EUC-JP" ) ;
test_decode_fn ! ( test_shift_jis_decode , "shift_jis" ) ;
test_encode_fn ! ( test_shift_jis_encode , "shift_jis" ) ;
}