diff --git a/src/preprocessor.rs b/src/preprocessor.rs index ab19322b..3328f3b6 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -91,31 +91,27 @@ pub fn replace_nonprintable( }); line_idx = 0; } - // carriage return - '\x0D' => output.push_str(match nonprintable_notation { - NonprintableNotation::Caret => "^M", - NonprintableNotation::Unicode => "␍", - }), - // null - '\x00' => output.push_str(match nonprintable_notation { - NonprintableNotation::Caret => "^@", - NonprintableNotation::Unicode => "␀", - }), - // bell - '\x07' => output.push_str(match nonprintable_notation { - NonprintableNotation::Caret => "^G", - NonprintableNotation::Unicode => "␇", - }), - // backspace - '\x08' => output.push_str(match nonprintable_notation { - NonprintableNotation::Caret => "^H", - NonprintableNotation::Unicode => "␈", - }), - // escape - '\x1B' => output.push_str(match nonprintable_notation { - NonprintableNotation::Caret => "^[", - NonprintableNotation::Unicode => "␛", - }), + // ASCII control characters + '\x00'..='\x1F' => { + let c = u32::from(chr); + + match nonprintable_notation { + NonprintableNotation::Caret => { + let caret_character = char::from_u32(0x40 + c).unwrap(); + write!(output, "^{caret_character}").ok(); + } + + NonprintableNotation::Unicode => { + let replacement_symbol = char::from_u32(0x2400 + c).unwrap(); + output.push(replacement_symbol) + } + } + } + // delete + '\x7F' => match nonprintable_notation { + NonprintableNotation::Caret => output.push_str("^?"), + NonprintableNotation::Unicode => output.push('\u{2421}'), + }, // printable ASCII c if c.is_ascii_alphanumeric() || c.is_ascii_punctuation() diff --git a/tests/examples/control_characters.txt b/tests/examples/control_characters.txt new file mode 100644 index 00000000..378445f7 Binary files /dev/null and b/tests/examples/control_characters.txt differ diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 8d2d3624..be70fdca 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -1728,6 +1728,25 @@ fn show_all_with_caret_notation() { .assert() .stdout("hello·world^J\n├──┤^M^@^G^H^[") .stderr(""); + + bat() + .arg("--show-all") + .arg("--nonprintable-notation=caret") + .arg("control_characters.txt") + .assert() + .stdout("^@^A^B^C^D^E^F^G^H├─┤^J\n^K^L^M^N^O^P^Q^R^S^T^U^V^W^X^Y^Z^[^\\^]^^^_^?") + .stderr(""); +} + +#[test] +fn show_all_with_unicode() { + bat() + .arg("--show-all") + .arg("--nonprintable-notation=unicode") + .arg("control_characters.txt") + .assert() + .stdout("␀␁␂␃␄␅␆␇␈├─┤␊\n␋␌␍␎␏␐␑␒␓␔␕␖␗␘␙␚␛␜␝␞␟␡") + .stderr(""); } #[test] diff --git a/tests/syntax-tests/highlighted/Plaintext/plaintext.txt b/tests/syntax-tests/highlighted/Plaintext/plaintext.txt index 1f27a79f..6eb229d2 100644 --- a/tests/syntax-tests/highlighted/Plaintext/plaintext.txt +++ b/tests/syntax-tests/highlighted/Plaintext/plaintext.txt @@ -1,36 +1,36 @@ ␀␊ -\u{1}␊ -\u{2}␊ -\u{3}␊ -\u{4}␊ -\u{5}␊ -\u{6}␊ +␁␊ +␂␊ +␃␊ +␄␊ +␅␊ +␆␊ ␇␊ ␈␊ ├──┤␊ ␊ ␊ -\u{b}␊ -\u{c}␊ +␋␊ +␌␊ ␊ -\u{e}␊ -\u{f}␊ -\u{10}␊ -\u{11}␊ -\u{12}␊ -\u{13}␊ -\u{14}␊ -\u{15}␊ -\u{16}␊ -\u{17}␊ -\u{18}␊ -\u{19}␊ -\u{1a}␊ +␎␊ +␏␊ +␐␊ +␑␊ +␒␊ +␓␊ +␔␊ +␕␊ +␖␊ +␗␊ +␘␊ +␙␊ +␚␊ ␛␊ -\u{1c}␊ -\u{1d}␊ -\u{1e}␊ -\u{1f}␊ +␜␊ +␝␊ +␞␊ +␟␊ ·␊ !␊ "␊ @@ -126,7 +126,7 @@ |␊ }␊ ~␊ -\u{7f}␊ +␡␊ \u{80}␊ \u{81}␊ \u{82}␊