From 8f99a78cf1e77e16d7114466defc9cf302ceaa5a Mon Sep 17 00:00:00 2001 From: einfachIrgendwer0815 <85333734+einfachIrgendwer0815@users.noreply.github.com> Date: Tue, 14 Mar 2023 22:21:30 +0100 Subject: [PATCH] Print non-printable characters using caret notation (#2443) When the new flag is set, non-printable characters are printed using caret notation. --- CHANGELOG.md | 1 + doc/long-help.txt | 7 ++++ doc/short-help.txt | 74 +++++++++++++++++++++++------------- src/bin/bat/app.rs | 11 +++++- src/bin/bat/clap_app.rs | 16 ++++++++ src/config.rs | 4 ++ src/lib.rs | 2 + src/nonprintable_notation.rs | 12 ++++++ src/preprocessor.rs | 38 ++++++++++++++---- src/printer.rs | 12 +++++- tests/integration_tests.rs | 11 ++++++ 11 files changed, 151 insertions(+), 37 deletions(-) create mode 100644 src/nonprintable_notation.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 4216e943..013f00e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Implemented `-S` and `--chop-long-lines` flags as aliases for `--wrap=never`. See #2309 (@johnmatthiggins) - Breaking change: Environment variables can now override config file settings (but command-line arguments still have the highest precedence), see #1152, #1281, and #2381 (@aaronkollasch) +- Implemented `--nonprintable-notation=caret` to support showing non-printable characters using caret notation. See #2429 (@einfachIrgendwer0815) ## Bugfixes diff --git a/doc/long-help.txt b/doc/long-help.txt index 733eb7bd..a1162e97 100644 --- a/doc/long-help.txt +++ b/doc/long-help.txt @@ -13,6 +13,13 @@ Options: Show non-printable characters like space, tab or newline. This option can also be used to print binary files. Use '--tabs' to control the width of the tab-placeholders. + --nonprintable-notation + Set notation for non-printable characters. + + Possible values: + * unicode (␇, ␊, ␀, ..) + * caret (^G, ^J, ^@, ..) + -p, --plain... Only show plain style, no decorations. This is an alias for '--style=plain'. When '-p' is used twice ('-pp'), it also disables automatic paging (alias for '--style=plain diff --git a/doc/short-help.txt b/doc/short-help.txt index d4a90603..18f08bbe 100644 --- a/doc/short-help.txt +++ b/doc/short-help.txt @@ -7,30 +7,50 @@ Arguments: [FILE]... File(s) to print / concatenate. Use '-' for standard input. Options: - -A, --show-all Show non-printable characters (space, tab, newline, ..). - -p, --plain... Show plain style (alias for '--style=plain'). - -l, --language Set the language for syntax highlighting. - -H, --highlight-line Highlight lines N through M. - --file-name Specify the name to display for a file. - -d, --diff Only show lines that have been added/removed/modified. - --tabs Set the tab width to T spaces. - --wrap Specify the text-wrapping mode (*auto*, never, character). - -S, --chop-long-lines Truncate all lines longer than screen width. Alias for - '--wrap=never'. - -n, --number Show line numbers (alias for '--style=numbers'). - --color When to use colors (*auto*, never, always). - --italic-text Use italics in output (always, *never*) - --decorations When to show the decorations (*auto*, never, always). - --paging Specify when to use the pager, or use `-P` to disable (*auto*, - never, always). - -m, --map-syntax Use the specified syntax for files matching the glob pattern - ('*.cpp:C++'). - --theme Set the color theme for syntax highlighting. - --list-themes Display all supported highlighting themes. - --style Comma-separated list of style elements to display (*default*, - auto, full, plain, changes, header, header-filename, - header-filesize, grid, rule, numbers, snip). - -r, --line-range Only print the lines from N to M. - -L, --list-languages Display all supported languages. - -h, --help Print help information (use `--help` for more detail) - -V, --version Print version information + -A, --show-all + Show non-printable characters (space, tab, newline, ..). + --nonprintable-notation + Set notation for non-printable characters. + -p, --plain... + Show plain style (alias for '--style=plain'). + -l, --language + Set the language for syntax highlighting. + -H, --highlight-line + Highlight lines N through M. + --file-name + Specify the name to display for a file. + -d, --diff + Only show lines that have been added/removed/modified. + --tabs + Set the tab width to T spaces. + --wrap + Specify the text-wrapping mode (*auto*, never, character). + -S, --chop-long-lines + Truncate all lines longer than screen width. Alias for '--wrap=never'. + -n, --number + Show line numbers (alias for '--style=numbers'). + --color + When to use colors (*auto*, never, always). + --italic-text + Use italics in output (always, *never*) + --decorations + When to show the decorations (*auto*, never, always). + --paging + Specify when to use the pager, or use `-P` to disable (*auto*, never, always). + -m, --map-syntax + Use the specified syntax for files matching the glob pattern ('*.cpp:C++'). + --theme + Set the color theme for syntax highlighting. + --list-themes + Display all supported highlighting themes. + --style + Comma-separated list of style elements to display (*default*, auto, full, plain, changes, + header, header-filename, header-filesize, grid, rule, numbers, snip). + -r, --line-range + Only print the lines from N to M. + -L, --list-languages + Display all supported languages. + -h, --help + Print help information (use `--help` for more detail) + -V, --version + Print version information diff --git a/src/bin/bat/app.rs b/src/bin/bat/app.rs index 58389beb..fb408d9b 100644 --- a/src/bin/bat/app.rs +++ b/src/bin/bat/app.rs @@ -21,7 +21,7 @@ use bat::{ input::Input, line_range::{HighlightedLineRanges, LineRange, LineRanges}, style::{StyleComponent, StyleComponents}, - MappingTarget, PagingMode, SyntaxMapping, WrappingMode, + MappingTarget, NonprintableNotation, PagingMode, SyntaxMapping, WrappingMode, }; fn is_truecolor_terminal() -> bool { @@ -173,6 +173,15 @@ impl App { } }), show_nonprintable: self.matches.get_flag("show-all"), + nonprintable_notation: match self + .matches + .get_one::("nonprintable-notation") + .map(|s| s.as_str()) + { + Some("unicode") => NonprintableNotation::Unicode, + Some("caret") => NonprintableNotation::Caret, + _ => unreachable!("other values for --nonprintable-notation are not allowed"), + }, wrapping_mode: if self.interactive_output || maybe_term_width.is_some() { if !self.matches.get_flag("chop-long-lines") { match self.matches.get_one::("wrap").map(|s| s.as_str()) { diff --git a/src/bin/bat/clap_app.rs b/src/bin/bat/clap_app.rs index 027701cb..c4803073 100644 --- a/src/bin/bat/clap_app.rs +++ b/src/bin/bat/clap_app.rs @@ -59,6 +59,22 @@ pub fn build_app(interactive_output: bool) -> Command { Use '--tabs' to control the width of the tab-placeholders.", ), ) + .arg( + Arg::new("nonprintable-notation") + .long("nonprintable-notation") + .action(ArgAction::Set) + .default_value("unicode") + .value_parser(["unicode", "caret"]) + .value_name("notation") + .hide_default_value(true) + .help("Set notation for non-printable characters.") + .long_help( + "Set notation for non-printable characters.\n\n\ + Possible values:\n \ + * unicode (␇, ␊, ␀, ..)\n \ + * caret (^G, ^J, ^@, ..)", + ), + ) .arg( Arg::new("plain") .overrides_with("plain") diff --git a/src/config.rs b/src/config.rs index 76eb3990..eaefb7d6 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,4 +1,5 @@ use crate::line_range::{HighlightedLineRanges, LineRanges}; +use crate::nonprintable_notation::NonprintableNotation; #[cfg(feature = "paging")] use crate::paging::PagingMode; use crate::style::StyleComponents; @@ -39,6 +40,9 @@ pub struct Config<'a> { /// Whether or not to show/replace non-printable characters like space, tab and newline. pub show_nonprintable: bool, + /// The configured notation for non-printable characters + pub nonprintable_notation: NonprintableNotation, + /// The character width of the terminal pub term_width: usize, diff --git a/src/lib.rs b/src/lib.rs index 0ca67bb9..4f56f85b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,6 +35,7 @@ pub mod error; pub mod input; mod less; pub mod line_range; +pub(crate) mod nonprintable_notation; mod output; #[cfg(feature = "paging")] mod pager; @@ -49,6 +50,7 @@ mod terminal; mod vscreen; pub(crate) mod wrapping; +pub use nonprintable_notation::NonprintableNotation; pub use pretty_printer::{Input, PrettyPrinter, Syntax}; pub use syntax_mapping::{MappingTarget, SyntaxMapping}; pub use wrapping::WrappingMode; diff --git a/src/nonprintable_notation.rs b/src/nonprintable_notation.rs new file mode 100644 index 00000000..ff09aca6 --- /dev/null +++ b/src/nonprintable_notation.rs @@ -0,0 +1,12 @@ +/// How to print non-printable characters with +/// [crate::config::Config::show_nonprintable] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub enum NonprintableNotation { + /// Use caret notation (^G, ^J, ^@, ..) + Caret, + + /// Use unicode notation (␇, ␊, ␀, ..) + #[default] + Unicode, +} diff --git a/src/preprocessor.rs b/src/preprocessor.rs index 74590355..8ee83c62 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -2,6 +2,8 @@ use std::fmt::Write; use console::AnsiCodeIterator; +use crate::nonprintable_notation::NonprintableNotation; + /// Expand tabs like an ANSI-enabled expand(1). pub fn expand_tabs(line: &str, width: usize, cursor: &mut usize) -> String { let mut buffer = String::with_capacity(line.len() * 2); @@ -49,7 +51,11 @@ fn try_parse_utf8_char(input: &[u8]) -> Option<(char, usize)> { decoded.map(|(seq, n)| (seq.chars().next().unwrap(), n)) } -pub fn replace_nonprintable(input: &[u8], tab_width: usize) -> String { +pub fn replace_nonprintable( + input: &[u8], + tab_width: usize, + nonprintable_notation: NonprintableNotation, +) -> String { let mut output = String::new(); let tab_width = if tab_width == 0 { 4 } else { tab_width }; @@ -79,19 +85,37 @@ pub fn replace_nonprintable(input: &[u8], tab_width: usize) -> String { } // line feed '\x0A' => { - output.push_str("␊\x0A"); + output.push_str(match nonprintable_notation { + NonprintableNotation::Caret => "^J\x0A", + NonprintableNotation::Unicode => "␊\x0A", + }); line_idx = 0; } // carriage return - '\x0D' => output.push('␍'), + '\x0D' => output.push_str(match nonprintable_notation { + NonprintableNotation::Caret => "^M", + NonprintableNotation::Unicode => "␍", + }), // null - '\x00' => output.push('␀'), + '\x00' => output.push_str(match nonprintable_notation { + NonprintableNotation::Caret => "^@", + NonprintableNotation::Unicode => "␀", + }), // bell - '\x07' => output.push('␇'), + '\x07' => output.push_str(match nonprintable_notation { + NonprintableNotation::Caret => "^G", + NonprintableNotation::Unicode => "␇", + }), // backspace - '\x08' => output.push('␈'), + '\x08' => output.push_str(match nonprintable_notation { + NonprintableNotation::Caret => "^H", + NonprintableNotation::Unicode => "␈", + }), // escape - '\x1B' => output.push('␛'), + '\x1B' => output.push_str(match nonprintable_notation { + NonprintableNotation::Caret => "^[", + NonprintableNotation::Unicode => "␛", + }), // printable ASCII c if c.is_ascii_alphanumeric() || c.is_ascii_punctuation() diff --git a/src/printer.rs b/src/printer.rs index 27c92b91..51a243e6 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -93,7 +93,11 @@ impl<'a> Printer for SimplePrinter<'a> { ) -> Result<()> { if !out_of_range { if self.config.show_nonprintable { - let line = replace_nonprintable(line_buffer, self.config.tab_width); + let line = replace_nonprintable( + line_buffer, + self.config.tab_width, + self.config.nonprintable_notation, + ); write!(handle, "{}", line)?; } else { handle.write_all(line_buffer)? @@ -422,7 +426,11 @@ impl<'a> Printer for InteractivePrinter<'a> { line_buffer: &[u8], ) -> Result<()> { let line = if self.config.show_nonprintable { - replace_nonprintable(line_buffer, self.config.tab_width) + replace_nonprintable( + line_buffer, + self.config.tab_width, + self.config.nonprintable_notation, + ) } else { let line = match self.content_type { Some(ContentType::BINARY) | None => { diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index ee1ad035..61948280 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -1623,6 +1623,17 @@ fn show_all_extends_tab_markers_to_next_tabstop_width_8() { ); } +#[test] +fn show_all_with_caret_notation() { + bat() + .arg("--show-all") + .arg("--nonprintable-notation=caret") + .arg("nonprintable.txt") + .assert() + .stdout("hello·world^J\n├──┤^M^@^G^H^[") + .stderr(""); +} + #[test] fn no_paging_arg() { bat()