From 960b0123ee202aef0cfcd294d56af1713cb91de1 Mon Sep 17 00:00:00 2001 From: Rory Dudley Date: Mon, 16 Dec 2024 17:07:35 -0700 Subject: Update left + right arrow keys, backspace with width fix This patch updates the codepaths of the arrow keys and backspace keys to calculate the width correctly when moving around in the buffer. See commit ac5152886fed ("Workaround for faulty unicode_width values") for more details, or you can also read the large comment at the top of the new width! macro (which was added to reduce some redundant code, since we need to properly compute character width in 4 different places). Signed-off-by: Rory Dudley --- src/buffer.rs | 113 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 62 insertions(+), 51 deletions(-) diff --git a/src/buffer.rs b/src/buffer.rs index 5771668..a9d925b 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -77,6 +77,50 @@ macro_rules! char { }; } +/// This bit of code is annoying, but it appears that the unicode_width library is (incorrectly) +/// computing some character's width to be 0. So, we need to loop through each character from +/// the last path, and if unicode_width says its width is 0, add 1 to it. +/// +/// The string I have been testing with is: "01\ エレクトリック・パブリック.flac". +/// There are some unicode characters with a width of zero +/// (https://unicode-explorer.com/articles/space-characters), however, I have analyzed the raw +/// byte sequence of all the characters in the string above, and I do not believe that any of +/// them are zero width chars. +/// +/// In theory, this workaround may mess up completion for strings that DO have real zero width +/// characters. However, as those characters seem to be mostly for typesetting, I am not going +/// to worry about it, unless I run into it myself, or someone complains to me about it. +/// +/// Here is a simple ruby script that might help anyone looking to do their own analysis: +/// #!/usr/bin/env ruby +/// # frozen_string_literal: true +/// +/// str = '01\ エレクトリック・パブリック.flac' +/// puts "len: #{str.length}" +/// +/// str.bytes do |b| +/// # puts b +/// puts b if b == 191 +/// end +/// +/// puts +/// # oth = "hi\u200chi" +/// oth = "hi\ufeffhi" +/// puts oth +/// puts oth.bytes do |b| +/// puts b +/// end +macro_rules! width { + ($c:expr) => {{ + let w = UnicodeWidthChar::width($c).unwrap_or(1); + if w == 0 { + 1 + } else { + w + } + }}; +} + /// Retrieve the next character from STDIN /// /// A UTF-8 compatible function used to get the next character from STDIN. A UTF-8 character may be @@ -165,47 +209,10 @@ fn comp( *bpos -= *len; } - // This bit of code is annoying, but it appears that the unicode_width library is (incorrectly) - // computing some character's width to be 0. So, we need to loop through each character from - // the last path, and if unicode_width says its width is 0, add 1 to it. - // - // The string I have been testing with is: "01\ エレクトリック・パブリック.flac". - // There are some unicode characters with a width of zero - // (https://unicode-explorer.com/articles/space-characters), however, I have analyzed the raw - // byte sequence of all the characters in the string above, and I do not believe that any of - // them are zero width chars. - // - // In theory, this workaround may mess up completion for strings that DO have real zero width - // characters. However, as those characters seem to be mostly for typesetting, I am not going - // to worry about it, unless I run into it myself, or someone complains to me about it. - // - // Here is a simple ruby script that might help anyone looking to do their own analysis: - // #!/usr/bin/env ruby - // # frozen_string_literal: true - // - // str = '01\ エレクトリック・パブリック.flac' - // puts "len: #{str.length}" - // - // str.bytes do |b| - // # puts b - // puts b if b == 191 - // end - // - // puts - // # oth = "hi\u200chi" - // oth = "hi\ufeffhi" - // puts oth - // puts oth.bytes do |b| - // puts b - // end let ori_path = buffer[*bpos..].into_iter().collect::>(); let mut width = 0; for c in ori_path.clone() { - let mut w = UnicodeWidthChar::width(*c).unwrap_or(1); - if w == 0 { - w += 1; - } - width += w; + width += width!(*c); } // Remove the last autocomplete value from the buffer @@ -451,27 +458,33 @@ pub fn getline( } *pos.lock().unwrap() -= 1; - let trunc = &buffer.lock().unwrap()[*pos.lock().unwrap()..] - .iter() - .collect::(); - let trunc_width = UnicodeWidthStr::width(trunc.as_str()); - let c = buffer.lock().unwrap().remove(*pos.lock().unwrap()); - let width = UnicodeWidthChar::width(c).unwrap_or(1); + let mut buffer = buffer.lock().unwrap(); + let trunc = &buffer[*pos.lock().unwrap()..].iter().collect::>(); - if *pos.lock().unwrap() == buffer.lock().unwrap().len() { + let mut trunc_width = 0; + for c in trunc { + trunc_width += width!(**c); + } + + let c = buffer.remove(*pos.lock().unwrap()); + let mut width = UnicodeWidthChar::width(c).unwrap_or(1); + if width == 0 { + width += 1; + } + + if *pos.lock().unwrap() == buffer.len() { (0..width).for_each(|_| print!("\u{8}")); print!(" \u{8}"); } else { (0..trunc_width).for_each(|_| print!(" ")); (0..trunc_width + width).for_each(|_| print!("\u{8}")); - buffer.lock().unwrap()[*pos.lock().unwrap()..] + buffer[*pos.lock().unwrap()..] .iter() .for_each(|c| print!("{}", c)); (0..trunc_width - width).for_each(|_| print!("\u{8}")); } // Update directory for autocomplete - let buffer = buffer.lock().unwrap(); let comp_path = match buffer.last() { Some(c) if *c == ' ' => String::new(), None => String::new(), @@ -529,8 +542,7 @@ pub fn getline( continue; } - let width = UnicodeWidthChar::width(buffer.lock().unwrap()[*pos.lock().unwrap()]) - .unwrap_or(1); + let width = width!(buffer.lock().unwrap()[*pos.lock().unwrap()]); for _ in 0..width { print!("\x1b[1C"); } @@ -543,8 +555,7 @@ pub fn getline( } *pos.lock().unwrap() -= 1; - let width = UnicodeWidthChar::width(buffer.lock().unwrap()[*pos.lock().unwrap()]) - .unwrap_or(1); + let width = width!(buffer.lock().unwrap()[*pos.lock().unwrap()]); for _ in 0..width { print!("\u{8}"); } -- cgit v1.2.3