From da355a3231174ac019b43a31958b73e818e6463f Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Mon, 28 Nov 2022 03:20:54 +0100 Subject: [PATCH] Significantly improve performance of `:reload` (#4457) * bump ropey to 1.5.1-alpha * significantly improve performance of :reload --- Cargo.lock | 259 ++++++++++++++++++++++++---------- helix-core/Cargo.toml | 2 +- helix-core/src/diff.rs | 248 ++++++++++++++++++++++++++------ helix-core/src/transaction.rs | 6 +- 4 files changed, 389 insertions(+), 126 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1d0ee4df9..05a0396fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -27,9 +27,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "0.7.18" +version = "0.7.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" dependencies = [ "memchr", ] @@ -80,9 +80,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.11.0" +version = "3.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1ad822118d20d2c234f427000d5acc36eabe1e29a348c89b63dd60b13f28e5d" +checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" [[package]] name = "bytecount" @@ -92,9 +92,9 @@ checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" [[package]] name = "bytes" -version = "1.2.1" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db" +checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c" [[package]] name = "cassowary" @@ -148,6 +148,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "codespan-reporting" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +dependencies = [ + "termcolor", + "unicode-width", +] + [[package]] name = "content_inspector" version = "0.2.4" @@ -165,12 +175,11 @@ checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" [[package]] name = "crossbeam-utils" -version = "0.8.11" +version = "0.8.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc" +checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f" dependencies = [ "cfg-if", - "once_cell", ] [[package]] @@ -199,6 +208,50 @@ dependencies = [ "winapi", ] +[[package]] +name = "cxx" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4a41a86530d0fe7f5d9ea779916b7cadd2d4f9add748b99c2c029cbbdfaf453" +dependencies = [ + "cc", + "cxxbridge-flags", + "cxxbridge-macro", + "link-cplusplus", +] + +[[package]] +name = "cxx-build" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06416d667ff3e3ad2df1cd8cd8afae5da26cf9cec4d0825040f88b5ca659a2f0" +dependencies = [ + "cc", + "codespan-reporting", + "once_cell", + "proc-macro2", + "quote", + "scratch", + "syn", +] + +[[package]] +name = "cxxbridge-flags" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "820a9a2af1669deeef27cb271f476ffd196a2c4b6731336011e0ba63e2c7cf71" + +[[package]] +name = "cxxbridge-macro" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a08a6e2fcc370a089ad3b4aaf54db3b1b4cee38ddabce5896b33eb693275f470" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -345,9 +398,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" +checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" dependencies = [ "cfg-if", "libc", @@ -436,6 +489,7 @@ dependencies = [ "etcetera", "hashbrown 0.13.1", "helix-loader", + "imara-diff", "log", "once_cell", "quickcheck", @@ -443,7 +497,6 @@ dependencies = [ "ropey", "serde", "serde_json", - "similar", "slotmap", "smallvec", "smartstring", @@ -594,18 +647,28 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.47" +version = "0.1.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c495f162af0bf17656d0014a0eded5f3cd2f365fdd204548c2869db89359dc7" +checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765" dependencies = [ "android_system_properties", "core-foundation-sys", + "iana-time-zone-haiku", "js-sys", - "once_cell", "wasm-bindgen", "winapi", ] +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca" +dependencies = [ + "cxx", + "cxx-build", +] + [[package]] name = "idna" version = "0.3.0" @@ -634,6 +697,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "imara-diff" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e98c1d0ad70fc91b8b9654b1f33db55e59579d3b3de2bffdced0fdb810570cb8" +dependencies = [ + "ahash 0.8.2", + "hashbrown 0.12.3", +] + [[package]] name = "indoc" version = "1.0.7" @@ -651,15 +724,15 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754" +checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc" [[package]] name = "js-sys" -version = "0.3.59" +version = "0.3.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "258451ab10b34f8af53416d1fdab72c22e805f0c92a1136d59470ec0b11138b2" +checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" dependencies = [ "wasm-bindgen", ] @@ -672,9 +745,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.132" +version = "0.2.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5" +checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89" [[package]] name = "libloading" @@ -686,11 +759,20 @@ dependencies = [ "winapi", ] +[[package]] +name = "link-cplusplus" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9272ab7b96c9046fbc5bc56c06c117cb639fe2d509df0c421cad82d2915cf369" +dependencies = [ + "cc", +] + [[package]] name = "lock_api" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f80bf5aacaf25cbfc8210d1cfb718f2bf3b11c4c54e5afe36c236853a8ec390" +checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" dependencies = [ "autocfg", "scopeguard", @@ -726,18 +808,18 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "memmap2" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95af15f345b17af2efc8ead6080fb8bc376f8cec1b35277b935637595fe77498" +checksum = "4b182332558b18d807c4ce1ca8ca983b34c3ee32765e47b3f0f69b90355cc1dc" dependencies = [ "libc", ] [[package]] name = "mio" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf" +checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" dependencies = [ "libc", "log", @@ -766,9 +848,9 @@ dependencies = [ [[package]] name = "num_cpus" -version = "1.13.1" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" +checksum = "f6058e64324c71e02bc2b150e4f3bc8286db6c83092132ffa3f6b1eab0f9def5" dependencies = [ "hermit-abi", "libc", @@ -792,9 +874,9 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" +checksum = "4dc9e0dc2adc1c69d09143aff38d3d30c5c3f0df0dad82e6d25547af174ebec0" dependencies = [ "cfg-if", "libc", @@ -823,9 +905,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "proc-macro2" -version = "1.0.43" +version = "1.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab" +checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" dependencies = [ "unicode-ident", ] @@ -870,9 +952,9 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ "getrandom", ] @@ -916,9 +998,9 @@ checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" [[package]] name = "regex-syntax" -version = "0.6.27" +version = "0.6.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" +checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" [[package]] name = "remove_dir_all" @@ -960,20 +1042,26 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "scratch" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8132065adcfd6e02db789d9285a0deb2f3fcb04002865ab67d5fb103533898" + [[package]] name = "serde" -version = "1.0.147" +version = "1.0.148" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965" +checksum = "e53f64bb4ba0191d6d0676e1b141ca55047d83b74f5607e6d8eb88126c52c2dc" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.147" +version = "1.0.148" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f1d362ca8fc9c3e3a7484440752472d68a6caa98f1ab81d99b5dfe517cec852" +checksum = "a55492425aa53521babf6137309e7d34c20bbfbbfcfe2c7f3a047fd1f6b92c0c" dependencies = [ "proc-macro2", "quote", @@ -982,9 +1070,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.88" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e8b3801309262e8184d9687fb697586833e939767aea0dda89f5a8e650e8bd7" +checksum = "020ff22c755c2ed3f8cf162dbb41a7268d934702f3ed3631656ea597e08fc3db" dependencies = [ "itoa", "ryu", @@ -1044,12 +1132,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "similar" -version = "2.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420acb44afdae038210c99e69aae24109f32f15500aa708e81d46c9f29d55fcf" - [[package]] name = "slab" version = "0.4.7" @@ -1121,9 +1203,9 @@ checksum = "9d9199fa80c817e074620be84374a520062ebac833f358d74b37060ce4a0f2c0" [[package]] name = "syn" -version = "1.0.99" +version = "1.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13" +checksum = "4ae548ec36cf198c0ef7710d3c230987c2d6d7bd98ad6edc0274462724c585ce" dependencies = [ "proc-macro2", "quote", @@ -1144,6 +1226,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "termcolor" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +dependencies = [ + "winapi-util", +] + [[package]] name = "termini" version = "0.1.4" @@ -1301,9 +1392,9 @@ checksum = "2281c8c1d221438e373249e065ca4989c4c36952c211ff21a0ee91c44a3869e7" [[package]] name = "unicode-ident" -version = "1.0.3" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf" +checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" [[package]] name = "unicode-linebreak" @@ -1317,9 +1408,9 @@ dependencies = [ [[package]] name = "unicode-normalization" -version = "0.1.21" +version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854cbdc4f7bc6ae19c820d44abdc3277ac3e1b2b93db20a636825d9322fb60e6" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" dependencies = [ "tinyvec", ] @@ -1373,9 +1464,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.82" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7652e3f6c4706c8d9cd54832c4a4ccb9b5336e2c3bd154d5cccfbf1c1f5f7d" +checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -1383,9 +1474,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.82" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "662cd44805586bd52971b9586b1df85cdbbd9112e4ef4d8f41559c334dc6ac3f" +checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" dependencies = [ "bumpalo", "log", @@ -1398,9 +1489,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.82" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b260f13d3012071dfb1512849c033b1925038373aea48ced3012c09df952c602" +checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1408,9 +1499,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.82" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5be8e654bdd9b79216c2929ab90721aa82faf65c48cdf08bdc4e7f51357b80da" +checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" dependencies = [ "proc-macro2", "quote", @@ -1421,9 +1512,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.82" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6598dd0bd3c7d51095ff6531a5b23e02acdc81804e30d8f07afb77b7215a140a" +checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" [[package]] name = "which" @@ -1469,46 +1560,60 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-sys" -version = "0.36.1" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ + "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_msvc", "windows_x86_64_gnu", + "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" + [[package]] name = "windows_aarch64_msvc" -version = "0.36.1" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" +checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" [[package]] name = "windows_i686_gnu" -version = "0.36.1" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" [[package]] name = "windows_i686_msvc" -version = "0.36.1" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" [[package]] name = "windows_x86_64_gnu" -version = "0.36.1" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" [[package]] name = "windows_x86_64_msvc" -version = "0.36.1" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" [[package]] name = "xtask" diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml index 09665b916..31b6546f0 100644 --- a/helix-core/Cargo.toml +++ b/helix-core/Cargo.toml @@ -38,7 +38,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" toml = "0.5" -similar = "2.2" +imara-diff = "0.1.0" encoding_rs = "0.8" diff --git a/helix-core/src/diff.rs b/helix-core/src/diff.rs index 6960c679c..c754da30f 100644 --- a/helix-core/src/diff.rs +++ b/helix-core/src/diff.rs @@ -1,58 +1,195 @@ -use crate::{Rope, Transaction}; +use std::ops::Range; +use std::time::Instant; -/// Compares `old` and `new` to generate a [`Transaction`] describing -/// the steps required to get from `old` to `new`. -pub fn compare_ropes(old: &Rope, new: &Rope) -> Transaction { - // `similar` only works on contiguous data, so a `Rope` has - // to be temporarily converted into a `String`. - let old_converted = old.to_string(); - let new_converted = new.to_string(); - - // A timeout is set so after 1 seconds, the algorithm will start - // approximating. This is especially important for big `Rope`s or - // `Rope`s that are extremely dissimilar to each other. - let mut config = similar::TextDiff::configure(); - config.timeout(std::time::Duration::from_secs(1)); - - let diff = config.diff_chars(&old_converted, &new_converted); - - // The current position of the change needs to be tracked to - // construct the `Change`s. - let mut pos = 0; - Transaction::change( - old, - diff.ops() +use imara_diff::intern::InternedInput; +use imara_diff::Algorithm; +use ropey::RopeSlice; + +use crate::{ChangeSet, Rope, Tendril, Transaction}; + +/// A `imara_diff::Sink` that builds a `ChangeSet` for a character diff of a hunk +struct CharChangeSetBuilder<'a> { + res: &'a mut ChangeSet, + hunk: &'a InternedInput, + pos: u32, +} + +impl imara_diff::Sink for CharChangeSetBuilder<'_> { + type Out = (); + fn process_change(&mut self, before: Range, after: Range) { + self.res.retain((before.start - self.pos) as usize); + self.res.delete(before.len()); + self.pos = before.end; + + let res = self.hunk.after[after.start as usize..after.end as usize] + .iter() + .map(|&token| self.hunk.interner[token]) + .collect(); + + self.res.insert(res); + } + + fn finish(self) -> Self::Out { + self.res.retain(self.hunk.before.len() - self.pos as usize); + } +} + +struct LineChangeSetBuilder<'a> { + res: ChangeSet, + after: RopeSlice<'a>, + file: &'a InternedInput>, + current_hunk: InternedInput, + pos: u32, +} + +impl imara_diff::Sink for LineChangeSetBuilder<'_> { + type Out = ChangeSet; + + fn process_change(&mut self, before: Range, after: Range) { + let len = self.file.before[self.pos as usize..before.start as usize] .iter() - .map(|op| op.as_tag_tuple()) - .filter_map(|(tag, old_range, new_range)| { - // `old_pos..pos` is equivalent to `start..end` for where - // the change should be applied. - let old_pos = pos; - pos += old_range.end - old_range.start; - - match tag { - // Semantically, inserts and replacements are the same thing. - similar::DiffTag::Insert | similar::DiffTag::Replace => { - // This is the text from the `new` rope that should be - // inserted into `old`. - let text: &str = { - let start = new.char_to_byte(new_range.start); - let end = new.char_to_byte(new_range.end); - &new_converted[start..end] - }; - Some((old_pos, pos, Some(text.into()))) + .map(|&it| self.file.interner[it].len_chars()) + .sum(); + self.res.retain(len); + self.pos = before.end; + + // do not perform diffs on large hunks + let len_before = before.end - before.start; + let len_after = after.end - after.start; + + // Pure insertions/removals do not require a character diff. + // Very large changes are ignored because their character diff is expensive to compute + // TODO adjust heuristic to detect large changes? + if len_before == 0 + || len_after == 0 + || len_after > 5 * len_before + || 5 * len_after < len_before && len_before > 10 + || len_before + len_after > 200 + { + let remove = self.file.before[before.start as usize..before.end as usize] + .iter() + .map(|&it| self.file.interner[it].len_chars()) + .sum(); + self.res.delete(remove); + let mut fragment = Tendril::new(); + if len_after > 500 { + // copying a rope line by line is slower then copying the entire + // rope. Use to_string for very large changes instead.. + if self.file.after.len() == after.end as usize { + if after.start == 0 { + fragment = self.after.to_string().into(); + } else { + let start = self.after.line_to_char(after.start as usize); + fragment = self.after.slice(start..).to_string().into(); } - similar::DiffTag::Delete => Some((old_pos, pos, None)), - similar::DiffTag::Equal => None, + } else if after.start == 0 { + let end = self.after.line_to_char(after.end as usize); + fragment = self.after.slice(..end).to_string().into(); + } else { + let start = self.after.line_to_char(after.start as usize); + let end = self.after.line_to_char(after.end as usize); + fragment = self.after.slice(start..end).to_string().into(); } - }), - ) + } else { + for &line in &self.file.after[after.start as usize..after.end as usize] { + for chunk in self.file.interner[line].chunks() { + fragment.push_str(chunk) + } + } + }; + self.res.insert(fragment); + } else { + // for reasonably small hunks, generating a ChangeSet from char diff can save memory + // TODO use a tokenizer (word diff?) for improved performance + let hunk_before = self.file.before[before.start as usize..before.end as usize] + .iter() + .flat_map(|&it| self.file.interner[it].chars()); + let hunk_after = self.file.after[after.start as usize..after.end as usize] + .iter() + .flat_map(|&it| self.file.interner[it].chars()); + self.current_hunk.update_before(hunk_before); + self.current_hunk.update_after(hunk_after); + + // the histogram heuristic does not work as well + // for characters because the same characters often reoccur + // use myer diff instead + imara_diff::diff( + Algorithm::Myers, + &self.current_hunk, + CharChangeSetBuilder { + res: &mut self.res, + hunk: &self.current_hunk, + pos: 0, + }, + ); + + self.current_hunk.clear(); + } + } + + fn finish(mut self) -> Self::Out { + let len = self.file.before[self.pos as usize..] + .iter() + .map(|&it| self.file.interner[it].len_chars()) + .sum(); + + self.res.retain(len); + self.res + } +} + +struct RopeLines<'a>(RopeSlice<'a>); + +impl<'a> imara_diff::intern::TokenSource for RopeLines<'a> { + type Token = RopeSlice<'a>; + // TODO: improve performance of lines iterator (https://github.com/cessen/ropey/issues/25) + type Tokenizer = ropey::iter::Lines<'a>; + + fn tokenize(&self) -> Self::Tokenizer { + self.0.lines() + } + + fn estimate_tokens(&self) -> u32 { + // we can provide a perfect estimate which is very nice for performance + self.0.len_lines() as u32 + } +} + +/// Compares `old` and `new` to generate a [`Transaction`] describing +/// the steps required to get from `old` to `new`. +pub fn compare_ropes(before: &Rope, after: &Rope) -> Transaction { + let start = Instant::now(); + let res = ChangeSet::with_capacity(32); + let after = after.slice(..); + let file = InternedInput::new(RopeLines(before.slice(..)), RopeLines(after)); + let builder = LineChangeSetBuilder { + res, + file: &file, + after, + pos: 0, + current_hunk: InternedInput::default(), + }; + + let res = imara_diff::diff(Algorithm::Histogram, &file, builder).into(); + + log::debug!( + "rope diff took {}s", + Instant::now().duration_since(start).as_secs_f64() + ); + res } #[cfg(test)] mod tests { use super::*; + fn test_identity(a: &str, b: &str) { + let mut old = Rope::from(a); + let new = Rope::from(b); + compare_ropes(&old, &new).apply(&mut old); + assert_eq!(old, new); + } + quickcheck::quickcheck! { fn test_compare_ropes(a: String, b: String) -> bool { let mut old = Rope::from(a); @@ -61,4 +198,25 @@ mod tests { old == new } } + + #[test] + fn equal_files() { + test_identity("foo", "foo"); + } + + #[test] + fn trailing_newline() { + test_identity("foo\n", "foo"); + test_identity("foo", "foo\n"); + } + + #[test] + fn new_file() { + test_identity("", "foo"); + } + + #[test] + fn deleted_file() { + test_identity("foo", ""); + } } diff --git a/helix-core/src/transaction.rs b/helix-core/src/transaction.rs index 3fb394138..482fd6d97 100644 --- a/helix-core/src/transaction.rs +++ b/helix-core/src/transaction.rs @@ -56,7 +56,7 @@ impl ChangeSet { } // Changeset builder operations: delete/insert/retain - fn delete(&mut self, n: usize) { + pub(crate) fn delete(&mut self, n: usize) { use Operation::*; if n == 0 { return; @@ -71,7 +71,7 @@ impl ChangeSet { } } - fn insert(&mut self, fragment: Tendril) { + pub(crate) fn insert(&mut self, fragment: Tendril) { use Operation::*; if fragment.is_empty() { @@ -93,7 +93,7 @@ impl ChangeSet { self.changes.push(new_last); } - fn retain(&mut self, n: usize) { + pub(crate) fn retain(&mut self, n: usize) { use Operation::*; if n == 0 { return;