Fix unicode encoding of hyperlinks

The problem, is I based the code on the implementation in ripgrep. But
while ripgrep is writing directly to the stream, I am using a Formatter,
which means I have to write characters, not raw bytes.

Thus we need to percent encode all non-ascii bytes (or we could switch
to writing bytes directly, but that would be more complicated, and I
think percent encoding is safer anyway).
This commit is contained in:
Thayne McCombs 2024-06-09 22:57:00 -06:00
parent bd649e2fd7
commit 609f1adf90

View File

@ -26,17 +26,16 @@ impl fmt::Display for PathUrl {
} }
fn encode(f: &mut Formatter, byte: u8) -> fmt::Result { fn encode(f: &mut Formatter, byte: u8) -> fmt::Result {
// NOTE:
// Most terminals can handle non-ascii unicode characters in a file url fine. But on some OSes (notably
// windows), the encoded bytes of the path may not be valid UTF-8. Since we don't know if a
// byte >= 128 is part of a valid UTF-8 encoding or not, we just percent encode any non-ascii
// byte.
// Percent encoding these bytes is probably safer anyway.
match byte { match byte {
b'0'..=b'9' b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'/' | b':' | b'-' | b'.' | b'_' | b'~' => {
| b'A'..=b'Z' f.write_char(byte.into())
| b'a'..=b'z' }
| b'/'
| b':'
| b'-'
| b'.'
| b'_'
| b'~'
| 128.. => f.write_char(byte.into()),
#[cfg(windows)] #[cfg(windows)]
b'\\' => f.write_char('/'), b'\\' => f.write_char('/'),
_ => { _ => {
@ -61,3 +60,21 @@ fn host() -> &'static str {
const fn host() -> &'static str { const fn host() -> &'static str {
"" ""
} }
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_unicode_encoding() {
let path: PathBuf = "/$*\x1bßé/∫😃".into();
let url = PathUrl::new(&path).unwrap();
assert_eq!(
url.to_string(),
format!(
"file://{}/%24%2A%1B%C3%9F%C3%A9/%E2%88%AB%F0%9F%98%83",
host()
),
);
}
}