@ -57,6 +57,7 @@ pub enum OffsetEncoding {
pub mod util {
use super ::* ;
use helix_core ::line_ending ::{ line_end_byte_index , line_end_char_index } ;
use helix_core ::{ diagnostic ::NumberOrString , Range , Rope , Selection , Tendril , Transaction } ;
/// Converts a diagnostic in the document to [`lsp::Diagnostic`].
@ -117,7 +118,7 @@ pub mod util {
/// Converts [`lsp::Position`] to a position in the document.
///
/// Returns `None` if position exceeds document length or an operation overflows.
/// Returns `None` if position .line is out of bounds or an overflow occurs
pub fn lsp_pos_to_pos (
doc : & Rope ,
pos : lsp ::Position ,
@ -128,22 +129,58 @@ pub mod util {
return None ;
}
match offset_encoding {
// We need to be careful here to fully comply ith the LSP spec.
// Two relevant quotes from the spec:
//
// https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#position
// > If the character value is greater than the line length it defaults back
// > to the line length.
//
// https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocuments
// > To ensure that both client and server split the string into the same
// > line representation the protocol specifies the following end-of-line sequences:
// > ‘ \n’ , ‘ \r\n’ and ‘ \r’ . Positions are line end character agnostic.
// > So you can not specify a position that denotes \r|\n or \n| where | represents the character offset.
//
// This means that while the line must be in bounds the `charater`
// must be capped to the end of the line.
// Note that the end of the line here is **before** the line terminator
// so we must use `line_end_char_index` istead of `doc.line_to_char(pos_line + 1)`
//
// FIXME: Helix does not fully comply with the LSP spec for line terminators.
// The LSP standard requires that line terminators are ['\n', '\r\n', '\r'].
// Without the unicode-linebreak feature disabled, the `\r` terminator is not handled by helix.
// With the unicode-linebreak feature, helix recognizes multiple extra line break chars
// which means that positions will be decoded/encoded incorrectly in their presence
let line = match offset_encoding {
OffsetEncoding ::Utf8 = > {
let line = doc . line_to_char ( pos_line ) ;
let pos = line . checked_add ( pos . character as usize ) ? ;
if pos < = doc . len_chars ( ) {
Some ( pos )
} else {
None
}
let line_start = doc . line_to_byte ( pos_line ) ;
let line_end = line_end_byte_index ( & doc . slice ( .. ) , pos_line ) ;
line_start .. line_end
}
OffsetEncoding ::Utf16 = > {
let line = doc . line_to_char ( pos_line ) ;
let line_start = doc . char_to_utf16_cu ( line ) ;
let pos = line_start . checked_add ( pos . character as usize ) ? ;
doc . try_utf16_cu_to_char ( pos ) . ok ( )
// TODO directly translate line index to char-idx
// ropey can do this just as easily as utf-8 byte translation
// but the functions are just missing.
// Translate to char first and then utf-16 as a workaround
let line_start = doc . line_to_char ( pos_line ) ;
let line_end = line_end_char_index ( & doc . slice ( .. ) , pos_line ) ;
doc . char_to_utf16_cu ( line_start ) .. doc . char_to_utf16_cu ( line_end )
}
} ;
// The LSP spec demands that the offset is capped to the end of the line
let pos = line
. start
. checked_add ( pos . character as usize )
. unwrap_or ( line . end )
. min ( line . end ) ;
// TODO prefer UTF32/char indices to avoid this step
match offset_encoding {
OffsetEncoding ::Utf8 = > doc . try_byte_to_char ( pos ) . ok ( ) ,
OffsetEncoding ::Utf16 = > doc . try_utf16_cu_to_char ( pos ) . ok ( ) ,
}
}
@ -158,8 +195,8 @@ pub mod util {
match offset_encoding {
OffsetEncoding ::Utf8 = > {
let line = doc . char_to_line ( pos ) ;
let line_start = doc . line_to_ char ( line ) ;
let col = pos - line_start ;
let line_start = doc . line_to_ byte ( line ) ;
let col = doc. char_to_byte ( pos) - line_start ;
lsp ::Position ::new ( line as u32 , col as u32 )
}
@ -606,16 +643,55 @@ mod tests {
}
test_case ! ( "" , ( 0 , 0 ) = > Some ( 0 ) ) ;
test_case ! ( "" , ( 0 , 1 ) = > None ) ;
test_case ! ( "" , ( 0 , 1 ) = > Some( 0 ) ) ;
test_case ! ( "" , ( 1 , 0 ) = > None ) ;
test_case ! ( "\n\n" , ( 0 , 0 ) = > Some ( 0 ) ) ;
test_case ! ( "\n\n" , ( 1 , 0 ) = > Some ( 1 ) ) ;
test_case ! ( "\n\n" , ( 1 , 1 ) = > Some ( 2 ) ) ;
test_case ! ( "\n\n" , ( 1 , 1 ) = > Some ( 1 ) ) ;
test_case ! ( "\n\n" , ( 2 , 0 ) = > Some ( 2 ) ) ;
test_case ! ( "\n\n" , ( 3 , 0 ) = > None ) ;
test_case ! ( "test\n\n\n\ncase" , ( 4 , 3 ) = > Some ( 11 ) ) ;
test_case ! ( "test\n\n\n\ncase" , ( 4 , 4 ) = > Some ( 12 ) ) ;
test_case ! ( "test\n\n\n\ncase" , ( 4 , 5 ) = > None ) ;
test_case ! ( "test\n\n\n\ncase" , ( 4 , 5 ) = > Some( 12 ) ) ;
test_case ! ( "" , ( u32 ::MAX , u32 ::MAX ) = > None ) ;
}
#[ test ]
fn emoji_format_gh_4791 ( ) {
use lsp_types ::{ Position , Range , TextEdit } ;
let edits = vec! [
TextEdit {
range : Range {
start : Position {
line : 0 ,
character : 1 ,
} ,
end : Position {
line : 1 ,
character : 0 ,
} ,
} ,
new_text : "\n " . to_string ( ) ,
} ,
TextEdit {
range : Range {
start : Position {
line : 1 ,
character : 7 ,
} ,
end : Position {
line : 2 ,
character : 0 ,
} ,
} ,
new_text : "\n " . to_string ( ) ,
} ,
] ;
let mut source = Rope ::from_str ( "[\n\"🇺🇸\",\n\"🎄\",\n]" ) ;
let transaction = generate_transaction_from_edits ( & source , edits , OffsetEncoding ::Utf8 ) ;
assert! ( transaction . apply ( & mut source ) ) ;
}
}