From 3f88a3f4e6f75bf04246a8015652931e640e0821 Mon Sep 17 00:00:00 2001 From: woojiq <122799969+woojiq@users.noreply.github.com> Date: Sun, 14 Jan 2024 16:46:32 +0200 Subject: [PATCH] Change path normalization strategy to not resolve symlinks (#9330) --- Cargo.lock | 1 + helix-core/Cargo.toml | 1 + helix-core/src/path.rs | 71 ++++++++++++++-------- helix-core/tests/path.rs | 124 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 171 insertions(+), 26 deletions(-) create mode 100644 helix-core/tests/path.rs diff --git a/Cargo.lock b/Cargo.lock index a7e06b6ef..3643b9532 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1074,6 +1074,7 @@ dependencies = [ "slotmap", "smallvec", "smartstring", + "tempfile", "textwrap", "toml", "tree-sitter", diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml index d7fff6c6f..be41fa010 100644 --- a/helix-core/Cargo.toml +++ b/helix-core/Cargo.toml @@ -55,3 +55,4 @@ parking_lot = "0.12" [dev-dependencies] quickcheck = { version = "1", default-features = false } indoc = "2.0.4" +tempfile = "3.7.0" diff --git a/helix-core/src/path.rs b/helix-core/src/path.rs index ede37e044..0cf6f812f 100644 --- a/helix-core/src/path.rs +++ b/helix-core/src/path.rs @@ -30,31 +30,10 @@ pub fn expand_tilde(path: &Path) -> PathBuf { path.to_path_buf() } -/// Normalize a path, removing things like `.` and `..`. -/// -/// CAUTION: This does not resolve symlinks (unlike -/// [`std::fs::canonicalize`]). This may cause incorrect or surprising -/// behavior at times. This should be used carefully. Unfortunately, -/// [`std::fs::canonicalize`] can be hard to use correctly, since it can often -/// fail, or on Windows returns annoying device paths. This is a problem Cargo -/// needs to improve on. -/// Copied from cargo: +/// Normalize a path without resolving symlinks. +// Strategy: start from the first component and move up. Cannonicalize previous path, +// join component, cannonicalize new path, strip prefix and join to the final result. pub fn get_normalized_path(path: &Path) -> PathBuf { - // normalization strategy is to canonicalize first ancestor path that exists (i.e., canonicalize as much as possible), - // then run handrolled normalization on the non-existent remainder - let (base, path) = path - .ancestors() - .find_map(|base| { - let canonicalized_base = dunce::canonicalize(base).ok()?; - let remainder = path.strip_prefix(base).ok()?.into(); - Some((canonicalized_base, remainder)) - }) - .unwrap_or_else(|| (PathBuf::new(), PathBuf::from(path))); - - if path.as_os_str().is_empty() { - return base; - } - let mut components = path.components().peekable(); let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() { components.next(); @@ -70,20 +49,60 @@ pub fn get_normalized_path(path: &Path) -> PathBuf { ret.push(component.as_os_str()); } Component::CurDir => {} + #[cfg(not(windows))] Component::ParentDir => { ret.pop(); } + #[cfg(windows)] + Component::ParentDir => { + if let Some(head) = ret.components().next_back() { + match head { + Component::Prefix(_) | Component::RootDir => {} + Component::CurDir => unreachable!(), + // If we left previous component as ".." it means we met a symlink before and we can't pop path. + Component::ParentDir => { + ret.push(".."); + } + Component::Normal(_) => { + if ret.is_symlink() { + ret.push(".."); + } else { + ret.pop(); + } + } + } + } + } + #[cfg(not(windows))] Component::Normal(c) => { ret.push(c); } + #[cfg(windows)] + Component::Normal(c) => 'normal: { + use std::fs::canonicalize; + + let new_path = ret.join(c); + if new_path.is_symlink() { + ret = new_path; + break 'normal; + } + let (can_new, can_old) = (canonicalize(&new_path), canonicalize(&ret)); + match (can_new, can_old) { + (Ok(can_new), Ok(can_old)) => { + let striped = can_new.strip_prefix(can_old); + ret.push(striped.unwrap_or_else(|_| c.as_ref())); + } + _ => ret.push(c), + } + } } } - base.join(ret) + dunce::simplified(&ret).to_path_buf() } /// Returns the canonical, absolute form of a path with all intermediate components normalized. /// -/// This function is used instead of `std::fs::canonicalize` because we don't want to verify +/// This function is used instead of [`std::fs::canonicalize`] because we don't want to verify /// here if the path exists, just normalize it's components. pub fn get_canonicalized_path(path: &Path) -> PathBuf { let path = expand_tilde(path); diff --git a/helix-core/tests/path.rs b/helix-core/tests/path.rs new file mode 100644 index 000000000..cbda5e1ab --- /dev/null +++ b/helix-core/tests/path.rs @@ -0,0 +1,124 @@ +#![cfg(windows)] + +use std::{ + env::set_current_dir, + error::Error, + path::{Component, Path, PathBuf}, +}; + +use helix_core::path::get_normalized_path; +use tempfile::Builder; + +// Paths on Windows are almost always case-insensitive. +// Normalization should return the original path. +// E.g. mkdir `CaSe`, normalize(`case`) = `CaSe`. +#[test] +fn test_case_folding_windows() -> Result<(), Box> { + // tmp/root/case + let tmp_prefix = std::env::temp_dir(); + set_current_dir(&tmp_prefix)?; + + let root = Builder::new().prefix("root-").tempdir()?; + let case = Builder::new().prefix("CaSe-").tempdir_in(&root)?; + + let root_without_prefix = root.path().strip_prefix(&tmp_prefix)?; + + let lowercase_case = format!( + "case-{}", + case.path() + .file_name() + .unwrap() + .to_string_lossy() + .split_at(5) + .1 + ); + let test_path = root_without_prefix.join(lowercase_case); + assert_eq!( + get_normalized_path(&test_path), + case.path().strip_prefix(&tmp_prefix)? + ); + + Ok(()) +} + +#[test] +fn test_normalize_path() -> Result<(), Box> { + /* + tmp/root/ + ├── link -> dir1/orig_file + ├── dir1/ + │ └── orig_file + └── dir2/ + └── dir_link -> ../dir1/ + */ + + let tmp_prefix = std::env::temp_dir(); + set_current_dir(&tmp_prefix)?; + + // Create a tree structure as shown above + let root = Builder::new().prefix("root-").tempdir()?; + let dir1 = Builder::new().prefix("dir1-").tempdir_in(&root)?; + let orig_file = Builder::new().prefix("orig_file-").tempfile_in(&dir1)?; + let dir2 = Builder::new().prefix("dir2-").tempdir_in(&root)?; + + // Create path and delete existing file + let dir_link = Builder::new() + .prefix("dir_link-") + .tempfile_in(&dir2)? + .path() + .to_owned(); + let link = Builder::new() + .prefix("link-") + .tempfile_in(&root)? + .path() + .to_owned(); + + use std::os::windows; + windows::fs::symlink_dir(&dir1, &dir_link)?; + windows::fs::symlink_file(&orig_file, &link)?; + + // root/link + let path = link.strip_prefix(&tmp_prefix)?; + assert_eq!( + get_normalized_path(path), + path, + "input {:?} and symlink last component shouldn't be resolved", + path + ); + + // root/dir2/dir_link/orig_file/../.. + let path = dir_link + .strip_prefix(&tmp_prefix) + .unwrap() + .join(orig_file.path().file_name().unwrap()) + .join(Component::ParentDir) + .join(Component::ParentDir); + let expected = dir_link + .strip_prefix(&tmp_prefix) + .unwrap() + .join(Component::ParentDir); + assert_eq!( + get_normalized_path(&path), + expected, + "input {:?} and \"..\" should not erase the simlink that goes ahead", + &path + ); + + // root/link/.././../dir2/../ + let path = link + .strip_prefix(&tmp_prefix) + .unwrap() + .join(Component::ParentDir) + .join(Component::CurDir) + .join(Component::ParentDir) + .join(dir2.path().file_name().unwrap()) + .join(Component::ParentDir); + let expected = link + .strip_prefix(&tmp_prefix) + .unwrap() + .join(Component::ParentDir) + .join(Component::ParentDir); + assert_eq!(get_normalized_path(&path), expected, "input {:?}", &path); + + Ok(()) +}