From 08386daa3ae9a149ac27261579745cad11993b06 Mon Sep 17 00:00:00 2001 From: dag-h Date: Tue, 6 Sep 2022 19:08:38 +0200 Subject: [PATCH] Strip BOM from output in interactive mode (#1938) * Strip BOM from output in interactive mode * Strip BOM when not loop_through, add regression tests * Update CHANGELOG.md * Only strip BOM from beginning of first line * Fix integration test on macOS that relied on color scheme * Fix integration test on Windows that relied on detected terminal width * Fix syntax test that was failing due to a previously wrong (now fixed) highlighting Co-authored-by: David Peter Co-authored-by: Martin Nordholts --- CHANGELOG.md | 1 + src/printer.rs | 11 +++- tests/examples/test_BOM.txt | 1 + tests/integration_tests.rs | 56 ++++++++++++++++++- .../highlighted/PowerShell/test.ps1 | 2 +- 5 files changed, 67 insertions(+), 4 deletions(-) create mode 100644 tests/examples/test_BOM.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b5c3c1d..2ae47448 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - Prevent fork nightmare with `PAGER=batcat`. See #2235 (@johnmatthiggins) - Make `--no-paging`/`-P` override `--paging=...` if passed as a later arg, see #2201 (@themkat) - `--map-syntax` and `--ignored-suffix` now works together, see #2093 (@czzrr) +- Strips byte order mark from output when in non-loop-through mode. See #1922 (@dag-h) ## Other diff --git a/src/printer.rs b/src/printer.rs index 3f7f1e09..4f962df9 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -419,7 +419,7 @@ impl<'a> Printer for InteractivePrinter<'a> { let line = if self.config.show_nonprintable { replace_nonprintable(line_buffer, self.config.tab_width) } else { - match self.content_type { + let line = match self.content_type { Some(ContentType::BINARY) | None => { return Ok(()); } @@ -430,6 +430,15 @@ impl<'a> Printer for InteractivePrinter<'a> { .decode(line_buffer, DecoderTrap::Replace) .map_err(|_| "Invalid UTF-16BE")?, _ => String::from_utf8_lossy(line_buffer).to_string(), + }; + // Remove byte order mark from the first line if it exists + if line_number == 1 { + match line.strip_prefix('\u{feff}') { + Some(stripped) => stripped.to_string(), + None => line, + } + } else { + line } }; diff --git a/tests/examples/test_BOM.txt b/tests/examples/test_BOM.txt new file mode 100644 index 00000000..c638ce61 --- /dev/null +++ b/tests/examples/test_BOM.txt @@ -0,0 +1 @@ +hello world diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 6a626327..02e60a75 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -758,14 +758,66 @@ fn config_read_arguments_from_file() { #[test] fn utf16() { - // The output will be converted to UTF-8 with a leading UTF-8 BOM + // The output will be converted to UTF-8 with the leading UTF-16 + // BOM removed. This behavior is wanted in interactive mode as + // some terminals seem to display the BOM character as a space, + // and it also breaks syntax highlighting. bat() .arg("--plain") .arg("--decorations=always") .arg("test_UTF-16LE.txt") .assert() .success() - .stdout(std::str::from_utf8(b"\xEF\xBB\xBFhello world\n").unwrap()); + .stdout("hello world\n"); +} + +// Regression test for https://github.com/sharkdp/bat/issues/1922 +#[test] +fn bom_not_stripped_in_loop_through_mode() { + bat() + .arg("--plain") + .arg("--decorations=never") + .arg("--color=never") + .arg("test_BOM.txt") + .assert() + .success() + .stdout("\u{feff}hello world\n"); +} + +// Regression test for https://github.com/sharkdp/bat/issues/1922 +#[test] +fn bom_stripped_when_colored_output() { + bat() + .arg("--color=always") + .arg("--decorations=never") + .arg("test_BOM.txt") + .assert() + .success() + .stdout( + predicate::str::is_match("\u{1b}\\[38;5;[0-9]{3}mhello world\u{1b}\\[0m\n").unwrap(), + ); +} + +// Regression test for https://github.com/sharkdp/bat/issues/1922 +#[test] +fn bom_stripped_when_no_color_and_not_loop_through() { + bat() + .arg("--color=never") + .arg("--decorations=always") + .arg("--style=numbers,grid,header") + .arg("--terminal-width=80") + .arg("test_BOM.txt") + .assert() + .success() + .stdout( + "\ +─────┬────────────────────────────────────────────────────────────────────────── + │ File: test_BOM.txt +─────┼────────────────────────────────────────────────────────────────────────── + 1 │ hello world +─────┴────────────────────────────────────────────────────────────────────────── +", + ); } #[test] diff --git a/tests/syntax-tests/highlighted/PowerShell/test.ps1 b/tests/syntax-tests/highlighted/PowerShell/test.ps1 index aae615d5..97e8e738 100644 --- a/tests/syntax-tests/highlighted/PowerShell/test.ps1 +++ b/tests/syntax-tests/highlighted/PowerShell/test.ps1 @@ -1,4 +1,4 @@ -# PowerShell script for testing syntax highlighting +# PowerShell script for testing syntax highlighting function Get-FutureTime {  param (