From 3d87b25b190e0990e0e75a2ab8f994d6c277d263 Mon Sep 17 00:00:00 2001 From: einfachIrgendwer0815 <85333734+einfachIrgendwer0815@users.noreply.github.com> Date: Fri, 20 Oct 2023 19:41:48 +0200 Subject: [PATCH] Add unicode/caret replacements for all control characters (#2712) --- src/preprocessor.rs | 46 +++++++--------- tests/examples/control_characters.txt | Bin 0 -> 33 bytes tests/integration_tests.rs | 19 +++++++ .../highlighted/Plaintext/plaintext.txt | 52 +++++++++--------- 4 files changed, 66 insertions(+), 51 deletions(-) create mode 100644 tests/examples/control_characters.txt diff --git a/src/preprocessor.rs b/src/preprocessor.rs index ab19322b..3328f3b6 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -91,31 +91,27 @@ pub fn replace_nonprintable( }); line_idx = 0; } - // carriage return - '\x0D' => output.push_str(match nonprintable_notation { - NonprintableNotation::Caret => "^M", - NonprintableNotation::Unicode => "␍", - }), - // null - '\x00' => output.push_str(match nonprintable_notation { - NonprintableNotation::Caret => "^@", - NonprintableNotation::Unicode => "␀", - }), - // bell - '\x07' => output.push_str(match nonprintable_notation { - NonprintableNotation::Caret => "^G", - NonprintableNotation::Unicode => "␇", - }), - // backspace - '\x08' => output.push_str(match nonprintable_notation { - NonprintableNotation::Caret => "^H", - NonprintableNotation::Unicode => "␈", - }), - // escape - '\x1B' => output.push_str(match nonprintable_notation { - NonprintableNotation::Caret => "^[", - NonprintableNotation::Unicode => "␛", - }), + // ASCII control characters + '\x00'..='\x1F' => { + let c = u32::from(chr); + + match nonprintable_notation { + NonprintableNotation::Caret => { + let caret_character = char::from_u32(0x40 + c).unwrap(); + write!(output, "^{caret_character}").ok(); + } + + NonprintableNotation::Unicode => { + let replacement_symbol = char::from_u32(0x2400 + c).unwrap(); + output.push(replacement_symbol) + } + } + } + // delete + '\x7F' => match nonprintable_notation { + NonprintableNotation::Caret => output.push_str("^?"), + NonprintableNotation::Unicode => output.push('\u{2421}'), + }, // printable ASCII c if c.is_ascii_alphanumeric() || c.is_ascii_punctuation() diff --git a/tests/examples/control_characters.txt b/tests/examples/control_characters.txt new file mode 100644 index 0000000000000000000000000000000000000000..378445f79cc4d60091049839f3377eacb64af61c GIT binary patch literal 33 ocmZQzWMXDvWn<^yMC+6cQE@6%&_`l#-T_m6NXr02kl_Z~y=R literal 0 HcmV?d00001 diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 8d2d3624..be70fdca 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -1728,6 +1728,25 @@ fn show_all_with_caret_notation() { .assert() .stdout("hello·world^J\n├──┤^M^@^G^H^[") .stderr(""); + + bat() + .arg("--show-all") + .arg("--nonprintable-notation=caret") + .arg("control_characters.txt") + .assert() + .stdout("^@^A^B^C^D^E^F^G^H├─┤^J\n^K^L^M^N^O^P^Q^R^S^T^U^V^W^X^Y^Z^[^\\^]^^^_^?") + .stderr(""); +} + +#[test] +fn show_all_with_unicode() { + bat() + .arg("--show-all") + .arg("--nonprintable-notation=unicode") + .arg("control_characters.txt") + .assert() + .stdout("␀␁␂␃␄␅␆␇␈├─┤␊\n␋␌␍␎␏␐␑␒␓␔␕␖␗␘␙␚␛␜␝␞␟␡") + .stderr(""); } #[test] diff --git a/tests/syntax-tests/highlighted/Plaintext/plaintext.txt b/tests/syntax-tests/highlighted/Plaintext/plaintext.txt index 1f27a79f..6eb229d2 100644 --- a/tests/syntax-tests/highlighted/Plaintext/plaintext.txt +++ b/tests/syntax-tests/highlighted/Plaintext/plaintext.txt @@ -1,36 +1,36 @@ ␀␊ -\u{1}␊ -\u{2}␊ -\u{3}␊ -\u{4}␊ -\u{5}␊ -\u{6}␊ +␁␊ +␂␊ +␃␊ +␄␊ +␅␊ +␆␊ ␇␊ ␈␊ ├──┤␊ ␊ ␊ -\u{b}␊ -\u{c}␊ +␋␊ +␌␊ ␊ -\u{e}␊ -\u{f}␊ -\u{10}␊ -\u{11}␊ -\u{12}␊ -\u{13}␊ -\u{14}␊ -\u{15}␊ -\u{16}␊ -\u{17}␊ -\u{18}␊ -\u{19}␊ -\u{1a}␊ +␎␊ +␏␊ +␐␊ +␑␊ +␒␊ +␓␊ +␔␊ +␕␊ +␖␊ +␗␊ +␘␊ +␙␊ +␚␊ ␛␊ -\u{1c}␊ -\u{1d}␊ -\u{1e}␊ -\u{1f}␊ +␜␊ +␝␊ +␞␊ +␟␊ ·␊ !␊ "␊ @@ -126,7 +126,7 @@ |␊ }␊ ~␊ -\u{7f}␊ +␡␊ \u{80}␊ \u{81}␊ \u{82}␊