From 70ff93d2386433723acbbe76711d299dfe9dca99 Mon Sep 17 00:00:00 2001 From: "Ethan P." Date: Mon, 10 Jun 2024 21:05:20 -0700 Subject: [PATCH 1/5] Add `--strip-ansi` option --- doc/long-help.txt | 4 +++ src/bin/bat/app.rs | 10 ++++++ src/bin/bat/clap_app.rs | 12 +++++++ src/config.rs | 4 +++ src/lib.rs | 1 + src/preprocessor.rs | 31 ++++++++++++++++ src/printer.rs | 20 ++++++++++- tests/integration_tests.rs | 74 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 155 insertions(+), 1 deletion(-) diff --git a/doc/long-help.txt b/doc/long-help.txt index a6ffe962..93f56968 100644 --- a/doc/long-help.txt +++ b/doc/long-help.txt @@ -122,6 +122,10 @@ Options: --squeeze-limit Set the maximum number of consecutive empty lines to be printed. + --strip-ansi + Specify when to strip ANSI escape sequences from the input. Possible values: always, + *never*. + --style Configure which elements (line numbers, file headers, grid borders, Git modifications, ..) to display in addition to the file contents. The argument is a comma-separated list of diff --git a/src/bin/bat/app.rs b/src/bin/bat/app.rs index 6fc85321..62ffdd6d 100644 --- a/src/bin/bat/app.rs +++ b/src/bin/bat/app.rs @@ -7,6 +7,7 @@ use crate::{ clap_app, config::{get_args_from_config_file, get_args_from_env_opts_var, get_args_from_env_vars}, }; +use bat::StripAnsiMode; use clap::ArgMatches; use console::Term; @@ -242,6 +243,15 @@ impl App { 4 }, ), + strip_ansi: match self + .matches + .get_one::("strip-ansi") + .map(|s| s.as_str()) + { + Some("never") => StripAnsiMode::Never, + Some("always") => StripAnsiMode::Always, + _ => unreachable!("other values for --strip-ansi are not allowed"), + }, theme: self .matches .get_one::("theme") diff --git a/src/bin/bat/clap_app.rs b/src/bin/bat/clap_app.rs index b82762b6..32c7c077 100644 --- a/src/bin/bat/clap_app.rs +++ b/src/bin/bat/clap_app.rs @@ -402,6 +402,18 @@ pub fn build_app(interactive_output: bool) -> Command { .long_help("Set the maximum number of consecutive empty lines to be printed.") .hide_short_help(true) ) + .arg( + Arg::new("strip-ansi") + .long("strip-ansi") + .overrides_with("strip-ansi") + .value_name("when") + .value_parser(["always", "never"]) + .default_value("never") + .hide_default_value(true) + .help("Strip colors from the input (always, *never*)") + .long_help("Specify when to strip ANSI escape sequences from the input. Possible values: always, *never*.") + .hide_short_help(true) + ) .arg( Arg::new("style") .long("style") diff --git a/src/config.rs b/src/config.rs index 0298bb2a..eb7df8ee 100644 --- a/src/config.rs +++ b/src/config.rs @@ -5,6 +5,7 @@ use crate::paging::PagingMode; use crate::style::StyleComponents; use crate::syntax_mapping::SyntaxMapping; use crate::wrapping::WrappingMode; +use crate::StripAnsiMode; #[derive(Debug, Clone)] pub enum VisibleLines { @@ -100,6 +101,9 @@ pub struct Config<'a> { /// The maximum number of consecutive empty lines to display pub squeeze_lines: Option, + + // Weather or not to set terminal title when using a pager + pub strip_ansi: StripAnsiMode, } #[cfg(all(feature = "minimal-application", feature = "paging"))] diff --git a/src/lib.rs b/src/lib.rs index 0296ad32..23c4a800 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -53,6 +53,7 @@ mod vscreen; pub(crate) mod wrapping; pub use nonprintable_notation::NonprintableNotation; +pub use preprocessor::StripAnsiMode; pub use pretty_printer::{Input, PrettyPrinter, Syntax}; pub use syntax_mapping::{MappingTarget, SyntaxMapping}; pub use wrapping::WrappingMode; diff --git a/src/preprocessor.rs b/src/preprocessor.rs index 02d1b289..707946f9 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -136,6 +136,26 @@ pub fn replace_nonprintable( output } +/// Strips ANSI escape sequences from the input. +pub fn strip_ansi(line: &str) -> String { + let mut buffer = String::with_capacity(line.len()); + + for seq in EscapeSequenceOffsetsIterator::new(line) { + if let EscapeSequenceOffsets::Text { .. } = seq { + buffer.push_str(&line[seq.index_of_start()..seq.index_past_end()]); + } + } + + buffer +} + +#[derive(Debug, PartialEq, Clone, Copy, Default)] +pub enum StripAnsiMode { + #[default] + Never, + Always, +} + #[test] fn test_try_parse_utf8_char() { assert_eq!(try_parse_utf8_char(&[0x20]), Some((' ', 1))); @@ -179,3 +199,14 @@ fn test_try_parse_utf8_char() { assert_eq!(try_parse_utf8_char(&[0xef, 0x20]), None); assert_eq!(try_parse_utf8_char(&[0xf0, 0xf0]), None); } + +#[test] +fn test_strip_ansi() { + // The sequence detection is covered by the tests in the vscreen module. + assert_eq!(strip_ansi("no ansi"), "no ansi"); + assert_eq!(strip_ansi("\x1B[33mone"), "one"); + assert_eq!( + strip_ansi("\x1B]1\x07multiple\x1B[J sequences"), + "multiple sequences" + ); +} diff --git a/src/printer.rs b/src/printer.rs index 282f0fe1..d76e6e0a 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -29,11 +29,13 @@ use crate::diff::LineChanges; use crate::error::*; use crate::input::OpenedInput; use crate::line_range::RangeCheckResult; +use crate::preprocessor::strip_ansi; use crate::preprocessor::{expand_tabs, replace_nonprintable}; use crate::style::StyleComponent; use crate::terminal::{as_terminal_escaped, to_ansi_color}; use crate::vscreen::{AnsiStyle, EscapeSequence, EscapeSequenceIterator}; use crate::wrapping::WrappingMode; +use crate::StripAnsiMode; const ANSI_UNDERLINE_ENABLE: EscapeSequence = EscapeSequence::CSI { raw_sequence: "\x1B[4m", @@ -207,6 +209,7 @@ pub(crate) struct InteractivePrinter<'a> { highlighter_from_set: Option>, background_color_highlight: Option, consecutive_empty_lines: usize, + strip_ansi: bool, } impl<'a> InteractivePrinter<'a> { @@ -281,6 +284,13 @@ impl<'a> InteractivePrinter<'a> { Some(HighlighterFromSet::new(syntax_in_set, theme)) }; + // Determine when to strip ANSI sequences + let strip_ansi = match config.strip_ansi { + _ if config.show_nonprintable => false, + StripAnsiMode::Always => true, + _ => false, + }; + Ok(InteractivePrinter { panel_width, colors, @@ -293,6 +303,7 @@ impl<'a> InteractivePrinter<'a> { highlighter_from_set, background_color_highlight, consecutive_empty_lines: 0, + strip_ansi, }) } @@ -573,7 +584,7 @@ impl<'a> Printer for InteractivePrinter<'a> { ) .into() } else { - match self.content_type { + let mut line = match self.content_type { Some(ContentType::BINARY) | None => { return Ok(()); } @@ -590,7 +601,14 @@ impl<'a> Printer for InteractivePrinter<'a> { line } } + }; + + // If ANSI escape sequences are supposed to be stripped, do it before syntax highlighting. + if self.strip_ansi { + line = strip_ansi(&line).into() } + + line }; let regions = self.highlight_regions_for_line(&line)?; diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 0285ac26..bc86cb9b 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -2666,3 +2666,77 @@ fn highlighting_independant_from_map_syntax_case() { .stdout(expected) .stderr(""); } + +#[test] +fn strip_ansi_always_strips_ansi() { + bat() + .arg("--style=plain") + .arg("--decorations=always") + .arg("--color=never") + .arg("--strip-ansi=always") + .write_stdin("\x1B[33mYellow\x1B[m") + .assert() + .success() + .stdout("Yellow"); +} + +#[test] +fn strip_ansi_never_does_not_strip_ansi() { + let output = String::from_utf8( + bat() + .arg("--style=plain") + .arg("--decorations=always") + .arg("--color=never") + .arg("--strip-ansi=never") + .write_stdin("\x1B[33mYellow\x1B[m") + .assert() + .success() + .get_output() + .stdout + .clone(), + ) + .expect("valid utf8"); + + assert!(output.contains("\x1B[33mYellow")) +} + +#[test] +fn strip_ansi_does_not_affect_simple_printer() { + let output = String::from_utf8( + bat() + .arg("--style=plain") + .arg("--decorations=never") + .arg("--color=never") + .arg("--strip-ansi=always") + .write_stdin("\x1B[33mYellow\x1B[m") + .assert() + .success() + .get_output() + .stdout + .clone(), + ) + .expect("valid utf8"); + + assert!(output.contains("\x1B[33mYellow")) +} + +#[test] +fn strip_ansi_does_not_strip_when_show_nonprintable() { + let output = String::from_utf8( + bat() + .arg("--style=plain") + .arg("--decorations=never") + .arg("--color=always") + .arg("--strip-ansi=always") + .arg("--show-nonprintable") + .write_stdin("\x1B[33mY") + .assert() + .success() + .get_output() + .stdout + .clone(), + ) + .expect("valid utf8"); + + assert!(output.contains("␛")) +} From 9e8176b1c69235b91c14aaa56c5862ed7dff3a9b Mon Sep 17 00:00:00 2001 From: "Ethan P." Date: Mon, 10 Jun 2024 21:17:28 -0700 Subject: [PATCH 2/5] Add `--strip-ansi=auto` option When using `auto`, escape sequences will be stripped unless printing plain text. --- doc/long-help.txt | 5 +-- src/bin/bat/app.rs | 1 + src/bin/bat/clap_app.rs | 8 +++-- src/preprocessor.rs | 1 + src/printer.rs | 40 +++++++++++++++------- tests/integration_tests.rs | 70 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 107 insertions(+), 18 deletions(-) diff --git a/doc/long-help.txt b/doc/long-help.txt index 93f56968..d9cdce39 100644 --- a/doc/long-help.txt +++ b/doc/long-help.txt @@ -123,8 +123,9 @@ Options: Set the maximum number of consecutive empty lines to be printed. --strip-ansi - Specify when to strip ANSI escape sequences from the input. Possible values: always, - *never*. + Specify when to strip ANSI escape sequences from the input. The automatic mode will remove + escape sequences unless the syntax highlighting language is plain text. Possible values: + auto, always, *never*. --style Configure which elements (line numbers, file headers, grid borders, Git modifications, ..) diff --git a/src/bin/bat/app.rs b/src/bin/bat/app.rs index 62ffdd6d..0d2600b2 100644 --- a/src/bin/bat/app.rs +++ b/src/bin/bat/app.rs @@ -250,6 +250,7 @@ impl App { { Some("never") => StripAnsiMode::Never, Some("always") => StripAnsiMode::Always, + Some("auto") => StripAnsiMode::Auto, _ => unreachable!("other values for --strip-ansi are not allowed"), }, theme: self diff --git a/src/bin/bat/clap_app.rs b/src/bin/bat/clap_app.rs index 32c7c077..e70b1a5b 100644 --- a/src/bin/bat/clap_app.rs +++ b/src/bin/bat/clap_app.rs @@ -407,11 +407,13 @@ pub fn build_app(interactive_output: bool) -> Command { .long("strip-ansi") .overrides_with("strip-ansi") .value_name("when") - .value_parser(["always", "never"]) + .value_parser(["auto", "always", "never"]) .default_value("never") .hide_default_value(true) - .help("Strip colors from the input (always, *never*)") - .long_help("Specify when to strip ANSI escape sequences from the input. Possible values: always, *never*.") + .help("Strip colors from the input (auto, always, *never*)") + .long_help("Specify when to strip ANSI escape sequences from the input. \ + The automatic mode will remove escape sequences unless the syntax highlighting \ + language is plain text. Possible values: auto, always, *never*.") .hide_short_help(true) ) .arg( diff --git a/src/preprocessor.rs b/src/preprocessor.rs index 707946f9..dc2aa66e 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -154,6 +154,7 @@ pub enum StripAnsiMode { #[default] Never, Always, + Auto, } #[test] diff --git a/src/printer.rs b/src/printer.rs index d76e6e0a..e9bea3fd 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -268,26 +268,40 @@ impl<'a> InteractivePrinter<'a> { .content_type .map_or(false, |c| c.is_binary() && !config.show_nonprintable); - let highlighter_from_set = if is_printing_binary || !config.colored_output { - None - } else { - // Determine the type of syntax for highlighting - let syntax_in_set = - match assets.get_syntax(config.language, input, &config.syntax_mapping) { - Ok(syntax_in_set) => syntax_in_set, - Err(Error::UndetectedSyntax(_)) => assets - .find_syntax_by_name("Plain Text")? - .expect("A plain text syntax is available"), - Err(e) => return Err(e), - }; + let needs_to_match_syntax = !is_printing_binary + && (config.colored_output || config.strip_ansi == StripAnsiMode::Auto); - Some(HighlighterFromSet::new(syntax_in_set, theme)) + let (is_plain_text, highlighter_from_set) = if needs_to_match_syntax { + // Determine the type of syntax for highlighting + const PLAIN_TEXT_SYNTAX: &str = "Plain Text"; + match assets.get_syntax(config.language, input, &config.syntax_mapping) { + Ok(syntax_in_set) => ( + syntax_in_set.syntax.name == PLAIN_TEXT_SYNTAX, + Some(HighlighterFromSet::new(syntax_in_set, theme)), + ), + + Err(Error::UndetectedSyntax(_)) => ( + true, + Some( + assets + .find_syntax_by_name(PLAIN_TEXT_SYNTAX)? + .map(|s| HighlighterFromSet::new(s, theme)) + .expect("A plain text syntax is available"), + ), + ), + + Err(e) => return Err(e), + } + } else { + (false, None) }; // Determine when to strip ANSI sequences let strip_ansi = match config.strip_ansi { _ if config.show_nonprintable => false, StripAnsiMode::Always => true, + StripAnsiMode::Auto if is_plain_text => false, // Plain text may already contain escape sequences. + StripAnsiMode::Auto => true, _ => false, }; diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index bc86cb9b..d6009361 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -2740,3 +2740,73 @@ fn strip_ansi_does_not_strip_when_show_nonprintable() { assert!(output.contains("␛")) } + +#[test] +fn strip_ansi_auto_strips_ansi_when_detected_syntax_by_filename() { + bat() + .arg("--style=plain") + .arg("--decorations=always") + .arg("--color=never") + .arg("--strip-ansi=auto") + .arg("--file-name=test.rs") + .write_stdin("fn \x1B[33mYellow\x1B[m() -> () {}") + .assert() + .success() + .stdout("fn Yellow() -> () {}"); +} + +#[test] +fn strip_ansi_auto_strips_ansi_when_provided_syntax_by_option() { + bat() + .arg("--style=plain") + .arg("--decorations=always") + .arg("--color=never") + .arg("--strip-ansi=auto") + .arg("--language=rust") + .write_stdin("fn \x1B[33mYellow\x1B[m() -> () {}") + .assert() + .success() + .stdout("fn Yellow() -> () {}"); +} + +#[test] +fn strip_ansi_auto_does_not_strip_when_plain_text_by_filename() { + let output = String::from_utf8( + bat() + .arg("--style=plain") + .arg("--decorations=always") + .arg("--color=never") + .arg("--strip-ansi=auto") + .arg("--file-name=ansi.txt") + .write_stdin("\x1B[33mYellow\x1B[m") + .assert() + .success() + .get_output() + .stdout + .clone(), + ) + .expect("valid utf8"); + + assert!(output.contains("\x1B[33mYellow")) +} + +#[test] +fn strip_ansi_auto_does_not_strip_ansi_when_plain_text_by_option() { + let output = String::from_utf8( + bat() + .arg("--style=plain") + .arg("--decorations=always") + .arg("--color=never") + .arg("--strip-ansi=auto") + .arg("--language=txt") + .write_stdin("\x1B[33mYellow\x1B[m") + .assert() + .success() + .get_output() + .stdout + .clone(), + ) + .expect("valid utf8"); + + assert!(output.contains("\x1B[33mYellow")) +} From 3f22311ec8812e83bc9c26e22501acf3b92337bf Mon Sep 17 00:00:00 2001 From: "Ethan P." Date: Sat, 15 Jun 2024 16:27:44 -0700 Subject: [PATCH 3/5] Add ANSI stripping to bat-as-a-library --- src/pretty_printer.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/pretty_printer.rs b/src/pretty_printer.rs index c6203aa9..eb123ea3 100644 --- a/src/pretty_printer.rs +++ b/src/pretty_printer.rs @@ -11,7 +11,7 @@ use crate::{ input, line_range::{HighlightedLineRanges, LineRange, LineRanges}, style::StyleComponent, - SyntaxMapping, WrappingMode, + StripAnsiMode, SyntaxMapping, WrappingMode, }; #[cfg(feature = "paging")] @@ -182,6 +182,15 @@ impl<'a> PrettyPrinter<'a> { self } + /// Whether to remove ANSI escape sequences from the input (default: never) + /// + /// If `Auto` is used, escape sequences will only be removed when the input + /// is not plain text. + pub fn strip_ansi(&mut self, mode: StripAnsiMode) -> &mut Self { + self.config.strip_ansi = mode; + self + } + /// Text wrapping mode (default: do not wrap) pub fn wrapping_mode(&mut self, mode: WrappingMode) -> &mut Self { self.config.wrapping_mode = mode; From 3f8d62e7d659b4c86da9d3e74d2fc65388804e90 Mon Sep 17 00:00:00 2001 From: "Ethan P." Date: Sat, 15 Jun 2024 16:36:07 -0700 Subject: [PATCH 4/5] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7aca166..7e361ddb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - `bat --squeeze-limit` to set the maximum number of empty consecutive when using `--squeeze-blank`, see #1441 (@eth-p) and #2665 (@einfachIrgendwer0815) - `PrettyPrinter::squeeze_empty_lines` to support line squeezing for bat as a library, see #1441 (@eth-p) and #2665 (@einfachIrgendwer0815) - Syntax highlighting for JavaScript files that start with `#!/usr/bin/env bun` #2913 (@sharunkumar) +- `bat --strip-ansi={never,always,auto}` to remove ANSI escape sequences from bat's input, see #2999 (@eth-p) ## Bugfixes From 90dfa7f18d72c7236199f0309941a9c59f061fe2 Mon Sep 17 00:00:00 2001 From: "Ethan P." Date: Sat, 15 Jun 2024 17:12:02 -0700 Subject: [PATCH 5/5] Update README.md to mention `--strip-ansi` --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 57baf2b0..0abd9bbd 100644 --- a/README.md +++ b/README.md @@ -759,9 +759,14 @@ bat() { If an input file contains color codes or other ANSI escape sequences or control characters, `bat` will have problems performing syntax highlighting and text wrapping, and thus the output can become garbled. -When displaying such files it is recommended to disable both syntax highlighting and wrapping by + +If your version of `bat` supports the `--strip-ansi=auto` option, it can be used to remove such sequences +before syntax highlighting. Alternatively, you may disable both syntax highlighting and wrapping by passing the `--color=never --wrap=never` options to `bat`. +> [!NOTE] +> The `auto` option of `--strip-ansi` avoids removing escape sequences when the syntax is plain text. + ### Terminals & colors `bat` handles terminals *with* and *without* truecolor support. However, the colors in most syntax