diff options
author | Rory Dudley | 2024-12-16 17:07:35 -0700 |
---|---|---|
committer | Rory Dudley | 2024-12-16 17:52:34 -0700 |
commit | 960b0123ee202aef0cfcd294d56af1713cb91de1 (patch) | |
tree | b1f6a432de1d55c3fb64bfee08a22d2209bba5fb | |
parent | ac5152886fedc9b65fbb874c67711cd5227d2e00 (diff) | |
download | dwarvish-960b0123ee202aef0cfcd294d56af1713cb91de1.tar.gz |
Update left + right arrow keys, backspace with width fix
This patch updates the codepaths of the arrow keys and backspace keys to
calculate the width correctly when moving around in the buffer. See
commit ac5152886fed ("Workaround for faulty unicode_width values") for
more details, or you can also read the large comment at the top of the
new width! macro (which was added to reduce some redundant code, since
we need to properly compute character width in 4 different places).
Signed-off-by: Rory Dudley <rory@netc.lu>
-rw-r--r-- | src/buffer.rs | 113 |
1 files changed, 62 insertions, 51 deletions
diff --git a/src/buffer.rs b/src/buffer.rs index 5771668..a9d925b 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -77,6 +77,50 @@ macro_rules! char { }; } +/// This bit of code is annoying, but it appears that the unicode_width library is (incorrectly) +/// computing some character's width to be 0. So, we need to loop through each character from +/// the last path, and if unicode_width says its width is 0, add 1 to it. +/// +/// The string I have been testing with is: "01\ エレクトリック・パブリック.flac". +/// There are some unicode characters with a width of zero +/// (https://unicode-explorer.com/articles/space-characters), however, I have analyzed the raw +/// byte sequence of all the characters in the string above, and I do not believe that any of +/// them are zero width chars. +/// +/// In theory, this workaround may mess up completion for strings that DO have real zero width +/// characters. However, as those characters seem to be mostly for typesetting, I am not going +/// to worry about it, unless I run into it myself, or someone complains to me about it. +/// +/// Here is a simple ruby script that might help anyone looking to do their own analysis: +/// #!/usr/bin/env ruby +/// # frozen_string_literal: true +/// +/// str = '01\ エレクトリック・パブリック.flac' +/// puts "len: #{str.length}" +/// +/// str.bytes do |b| +/// # puts b +/// puts b if b == 191 +/// end +/// +/// puts +/// # oth = "hi\u200chi" +/// oth = "hi\ufeffhi" +/// puts oth +/// puts oth.bytes do |b| +/// puts b +/// end +macro_rules! width { + ($c:expr) => {{ + let w = UnicodeWidthChar::width($c).unwrap_or(1); + if w == 0 { + 1 + } else { + w + } + }}; +} + /// Retrieve the next character from STDIN /// /// A UTF-8 compatible function used to get the next character from STDIN. A UTF-8 character may be @@ -165,47 +209,10 @@ fn comp( *bpos -= *len; } - // This bit of code is annoying, but it appears that the unicode_width library is (incorrectly) - // computing some character's width to be 0. So, we need to loop through each character from - // the last path, and if unicode_width says its width is 0, add 1 to it. - // - // The string I have been testing with is: "01\ エレクトリック・パブリック.flac". - // There are some unicode characters with a width of zero - // (https://unicode-explorer.com/articles/space-characters), however, I have analyzed the raw - // byte sequence of all the characters in the string above, and I do not believe that any of - // them are zero width chars. - // - // In theory, this workaround may mess up completion for strings that DO have real zero width - // characters. However, as those characters seem to be mostly for typesetting, I am not going - // to worry about it, unless I run into it myself, or someone complains to me about it. - // - // Here is a simple ruby script that might help anyone looking to do their own analysis: - // #!/usr/bin/env ruby - // # frozen_string_literal: true - // - // str = '01\ エレクトリック・パブリック.flac' - // puts "len: #{str.length}" - // - // str.bytes do |b| - // # puts b - // puts b if b == 191 - // end - // - // puts - // # oth = "hi\u200chi" - // oth = "hi\ufeffhi" - // puts oth - // puts oth.bytes do |b| - // puts b - // end let ori_path = buffer[*bpos..].into_iter().collect::<Vec<_>>(); let mut width = 0; for c in ori_path.clone() { - let mut w = UnicodeWidthChar::width(*c).unwrap_or(1); - if w == 0 { - w += 1; - } - width += w; + width += width!(*c); } // Remove the last autocomplete value from the buffer @@ -451,27 +458,33 @@ pub fn getline( } *pos.lock().unwrap() -= 1; - let trunc = &buffer.lock().unwrap()[*pos.lock().unwrap()..] - .iter() - .collect::<String>(); - let trunc_width = UnicodeWidthStr::width(trunc.as_str()); - let c = buffer.lock().unwrap().remove(*pos.lock().unwrap()); - let width = UnicodeWidthChar::width(c).unwrap_or(1); + let mut buffer = buffer.lock().unwrap(); + let trunc = &buffer[*pos.lock().unwrap()..].iter().collect::<Vec<_>>(); - if *pos.lock().unwrap() == buffer.lock().unwrap().len() { + let mut trunc_width = 0; + for c in trunc { + trunc_width += width!(**c); + } + + let c = buffer.remove(*pos.lock().unwrap()); + let mut width = UnicodeWidthChar::width(c).unwrap_or(1); + if width == 0 { + width += 1; + } + + if *pos.lock().unwrap() == buffer.len() { (0..width).for_each(|_| print!("\u{8}")); print!(" \u{8}"); } else { (0..trunc_width).for_each(|_| print!(" ")); (0..trunc_width + width).for_each(|_| print!("\u{8}")); - buffer.lock().unwrap()[*pos.lock().unwrap()..] + buffer[*pos.lock().unwrap()..] .iter() .for_each(|c| print!("{}", c)); (0..trunc_width - width).for_each(|_| print!("\u{8}")); } // Update directory for autocomplete - let buffer = buffer.lock().unwrap(); let comp_path = match buffer.last() { Some(c) if *c == ' ' => String::new(), None => String::new(), @@ -529,8 +542,7 @@ pub fn getline( continue; } - let width = UnicodeWidthChar::width(buffer.lock().unwrap()[*pos.lock().unwrap()]) - .unwrap_or(1); + let width = width!(buffer.lock().unwrap()[*pos.lock().unwrap()]); for _ in 0..width { print!("\x1b[1C"); } @@ -543,8 +555,7 @@ pub fn getline( } *pos.lock().unwrap() -= 1; - let width = UnicodeWidthChar::width(buffer.lock().unwrap()[*pos.lock().unwrap()]) - .unwrap_or(1); + let width = width!(buffer.lock().unwrap()[*pos.lock().unwrap()]); for _ in 0..width { print!("\u{8}"); } |