Negotiate LSP Position Encoding (#5894)

So far LSP always required that `PositionEncoding.characters` is an
UTF-16 offset. Now that LSP 3.17 is available in `lsp-types` request
the server to send char offsets (UTF-32) or byte offsets (UTF-8)
instead. For compatability with old servers, UTF-16 remains as the
fallback as required by the standard.
pull/5/head
Pascal Kuthe 2 years ago committed by GitHub
parent 6929a12f29
commit 93c7afc4ed
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

4
Cargo.lock generated

@ -1481,9 +1481,9 @@ dependencies = [
[[package]]
name = "lsp-types"
version = "0.93.2"
version = "0.94.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9be6e9c7e2d18f651974370d7aff703f9513e0df6e464fd795660edc77e6ca51"
checksum = "0b63735a13a1f9cd4f4835223d828ed9c2e35c8c5e61837774399f558b6a1237"
dependencies = [
"bitflags",
"serde",

@ -19,7 +19,7 @@ anyhow = "1.0"
futures-executor = "0.3"
futures-util = { version = "0.3", features = ["std", "async-await"], default-features = false }
log = "0.4"
lsp-types = { version = "0.93", features = ["proposed"] }
lsp-types = { version = "0.94" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
thiserror = "1.0"

@ -6,6 +6,7 @@ use crate::{
use helix_core::{find_root, ChangeSet, Rope};
use helix_loader::{self, VERSION_AND_GIT_HASH};
use lsp::PositionEncodingKind;
use lsp_types as lsp;
use serde::Deserialize;
use serde_json::Value;
@ -32,7 +33,6 @@ pub struct Client {
server_tx: UnboundedSender<Payload>,
request_counter: AtomicU64,
pub(crate) capabilities: OnceCell<lsp::ServerCapabilities>,
offset_encoding: OffsetEncoding,
config: Option<Value>,
root_path: std::path::PathBuf,
root_uri: Option<lsp::Url>,
@ -104,7 +104,6 @@ impl Client {
server_tx,
request_counter: AtomicU64::new(0),
capabilities: OnceCell::new(),
offset_encoding: OffsetEncoding::Utf16,
config,
req_timeout,
@ -147,7 +146,19 @@ impl Client {
}
pub fn offset_encoding(&self) -> OffsetEncoding {
self.offset_encoding
self.capabilities()
.position_encoding
.as_ref()
.and_then(|encoding| match encoding.as_str() {
"utf-8" => Some(OffsetEncoding::Utf8),
"utf-16" => Some(OffsetEncoding::Utf16),
"utf-32" => Some(OffsetEncoding::Utf32),
encoding => {
log::error!("Server provided invalid position encording {encoding}, defaulting to utf-16");
None
},
})
.unwrap_or_default()
}
pub fn config(&self) -> Option<&Value> {
@ -377,6 +388,14 @@ impl Client {
work_done_progress: Some(true),
..Default::default()
}),
general: Some(lsp::GeneralClientCapabilities {
position_encodings: Some(vec![
PositionEncodingKind::UTF32,
PositionEncodingKind::UTF8,
PositionEncodingKind::UTF16,
]),
..Default::default()
}),
..Default::default()
},
trace: None,
@ -577,7 +596,7 @@ impl Client {
}]
}
lsp::TextDocumentSyncKind::INCREMENTAL => {
Self::changeset_to_changes(old_text, new_text, changes, self.offset_encoding)
Self::changeset_to_changes(old_text, new_text, changes, self.offset_encoding())
}
lsp::TextDocumentSyncKind::NONE => return None,
kind => unimplemented!("{:?}", kind),
@ -1027,7 +1046,7 @@ impl Client {
partial_result_params: lsp::PartialResultParams::default(),
};
Some(self.call::<lsp::request::WorkspaceSymbol>(params))
Some(self.call::<lsp::request::WorkspaceSymbolRequest>(params))
}
pub fn code_actions(

@ -20,7 +20,6 @@ use std::{
},
};
use serde::{Deserialize, Serialize};
use thiserror::Error;
use tokio_stream::wrappers::UnboundedReceiverStream;
@ -45,13 +44,14 @@ pub enum Error {
Other(#[from] anyhow::Error),
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
#[derive(Clone, Copy, Debug, Default)]
pub enum OffsetEncoding {
/// UTF-8 code units aka bytes
#[serde(rename = "utf-8")]
Utf8,
/// UTF-32 code units aka chars
Utf32,
/// UTF-16 code units
#[serde(rename = "utf-16")]
#[default]
Utf16,
}
@ -168,6 +168,11 @@ pub mod util {
let line_end = line_end_char_index(&doc.slice(..), pos_line);
doc.char_to_utf16_cu(line_start)..doc.char_to_utf16_cu(line_end)
}
OffsetEncoding::Utf32 => {
let line_start = doc.line_to_char(pos_line);
let line_end = line_end_char_index(&doc.slice(..), pos_line);
line_start..line_end
}
};
// The LSP spec demands that the offset is capped to the end of the line
@ -177,10 +182,10 @@ pub mod util {
.unwrap_or(line.end)
.min(line.end);
// TODO prefer UTF32/char indices to avoid this step
match offset_encoding {
OffsetEncoding::Utf8 => doc.try_byte_to_char(pos).ok(),
OffsetEncoding::Utf16 => doc.try_utf16_cu_to_char(pos).ok(),
OffsetEncoding::Utf32 => Some(pos),
}
}
@ -205,6 +210,13 @@ pub mod util {
let line_start = doc.char_to_utf16_cu(doc.line_to_char(line));
let col = doc.char_to_utf16_cu(pos) - line_start;
lsp::Position::new(line as u32, col as u32)
}
OffsetEncoding::Utf32 => {
let line = doc.char_to_line(pos);
let line_start = doc.line_to_char(line);
let col = pos - line_start;
lsp::Position::new(line as u32, col as u32)
}
}

@ -1,7 +1,10 @@
use futures_util::FutureExt;
use helix_lsp::{
block_on,
lsp::{self, CodeAction, CodeActionOrCommand, DiagnosticSeverity, NumberOrString},
lsp::{
self, CodeAction, CodeActionOrCommand, CodeActionTriggerKind, DiagnosticSeverity,
NumberOrString,
},
util::{diagnostic_to_lsp_diagnostic, lsp_pos_to_pos, lsp_range_to_range, range_to_lsp_range},
OffsetEncoding,
};
@ -561,6 +564,7 @@ pub fn code_action(cx: &mut Context) {
.map(|diag| diagnostic_to_lsp_diagnostic(doc.text(), diag, offset_encoding))
.collect(),
only: None,
trigger_kind: Some(CodeActionTriggerKind::INVOKED),
},
) {
Some(future) => future,

Loading…
Cancel
Save