diff --git a/CHANGELOG.md b/CHANGELOG.md index e3399922..b41d8d10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,9 @@ ## Bugfixes +- Fix long file name wrapping in header, see #2835 (@FilipRazek) - Fix `NO_COLOR` support, see #2767 (@acuteenvy) +- Fix handling of inputs with OSC ANSI escape sequences, see #2541 and #2544 (@eth-p) ## Other @@ -17,18 +19,27 @@ - Minor benchmark script improvements #2768 (@cyqsimon) - Update Arch Linux package URL in README files #2779 (@brunobell) - Update and improve `zsh` completion, see #2772 (@okapia) +- More extensible syntax mapping mechanism #2755 (@cyqsimon) - Use proper Architecture for Debian packages built for musl, see #2811 (@Enselic) - Pull in fix for unsafe-libyaml security advisory, see #2812 (@dtolnay) - Update git-version dependency to use Syn v2, see #2816 (@dtolnay) +- Update git2 dependency to v0.18.2, see #2852 (@eth-p) ## Syntaxes - `cmd-help`: scope subcommands followed by other terms, and other misc improvements, see #2819 (@victor-gp) +- Upgrade JQ syntax, see #2820 (@dependabot[bot]) ## Themes ## `bat` as a library +- Changes to `syntax_mapping::SyntaxMapping` #2755 (@cyqsimon) + - `SyntaxMapping::get_syntax_for` is now correctly public + - [BREAKING] `SyntaxMapping::{empty,builtin}` are removed; use `SyntaxMapping::new` instead + - [BREAKING] `SyntaxMapping::mappings` is replaced by `SyntaxMapping::{builtin,custom,all}_mappings` +- Make `Controller::run_with_error_handler`'s error handler `FnMut`, see #2831 (@rhysd) +- Improve compile time by 20%, see #2815 (@dtolnay) # v0.24.0 diff --git a/Cargo.lock b/Cargo.lock index 19ee5928..ff674b9d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -129,6 +129,8 @@ dependencies = [ "globset", "grep-cli", "home", + "indexmap 2.2.2", + "itertools", "nix", "nu-ansi-term", "once_cell", @@ -140,12 +142,15 @@ dependencies = [ "run_script", "semver", "serde", + "serde_derive", + "serde_with", "serde_yaml", "serial_test", "shell-words", "syntect", "tempfile", "thiserror", + "toml", "unicode-width", "wait-timeout", "walkdir", @@ -224,11 +229,12 @@ checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc" [[package]] name = "cc" -version = "1.0.73" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" dependencies = [ "jobserver", + "libc", ] [[package]] @@ -267,13 +273,14 @@ checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" [[package]] name = "clircle" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e87cbed5354f17bd8ca8821a097fb62599787fe8f611743fad7ee156a0a600" +checksum = "ec0b92245ea62a7a751db4b0e4a583f8978e508077ef6de24fcc0d0dc5311a8d" dependencies = [ "cfg-if", "libc", "serde", + "serde_derive", "winapi", ] @@ -314,6 +321,41 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "darling" +version = "0.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" +dependencies = [ + "darling_core", + "quote", + "syn", +] + [[package]] name = "dashmap" version = "5.4.0" @@ -540,7 +582,7 @@ dependencies = [ "bstr", "log", "regex-automata", - "regex-syntax 0.8.2", + "regex-syntax", ] [[package]] @@ -578,6 +620,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "0.3.0" @@ -600,12 +648,13 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.0.2" +version = "2.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897" +checksum = "824b2ae422412366ba479e8111fd301f7b5faece8149317bb81925979a53f520" dependencies = [ "equivalent", "hashbrown 0.14.1", + "serde", ] [[package]] @@ -646,9 +695,9 @@ checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" [[package]] name = "libgit2-sys" -version = "0.16.1+1.7.1" +version = "0.16.2+1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2a2bb3680b094add03bb3732ec520ece34da31a8cd2d633d1389d0f0fb60d0c" +checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8" dependencies = [ "cc", "libc", @@ -980,7 +1029,7 @@ dependencies = [ "aho-corasick", "memchr", "regex-automata", - "regex-syntax 0.8.2", + "regex-syntax", ] [[package]] @@ -991,15 +1040,9 @@ checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.2", + "regex-syntax", ] -[[package]] -name = "regex-syntax" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" - [[package]] name = "regex-syntax" version = "0.8.2" @@ -1066,9 +1109,9 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "semver" -version = "1.0.20" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" +checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0" [[package]] name = "serde" @@ -1101,13 +1144,45 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_spanned" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_with" +version = "3.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15d167997bd841ec232f5b2b8e0e26606df2e7caa4c31b95ea9ca52b200bd270" +dependencies = [ + "serde", + "serde_derive", + "serde_with_macros", +] + +[[package]] +name = "serde_with_macros" +version = "3.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "865f9743393e638991566a8b7a479043c2c8da94a33e0a31f18214c9cae0a64d" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "serde_yaml" version = "0.9.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a15e0ef66bf939a7c890a0bf6d5a733c70202225f9888a89ed5c62298b019129" dependencies = [ - "indexmap 2.0.2", + "indexmap 2.2.2", "itoa", "ryu", "serde", @@ -1180,9 +1255,9 @@ dependencies = [ [[package]] name = "syntect" -version = "5.1.0" +version = "5.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e02b4b303bf8d08bfeb0445cba5068a3d306b6baece1d5582171a9bf49188f91" +checksum = "874dcfa363995604333cf947ae9f751ca3af4522c60886774c4963943b4746b1" dependencies = [ "bincode", "bitflags 1.3.2", @@ -1192,8 +1267,9 @@ dependencies = [ "once_cell", "onig", "plist", - "regex-syntax 0.7.5", + "regex-syntax", "serde", + "serde_derive", "serde_json", "thiserror", "walkdir", @@ -1294,6 +1370,41 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" +[[package]] +name = "toml" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6a4b9e8023eb94392d3dca65d717c53abc5dad49c07cb65bb8fcd87115fa325" +dependencies = [ + "indexmap 2.2.2", + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" +dependencies = [ + "indexmap 2.2.2", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + [[package]] name = "unicode-bidi" version = "0.3.8" @@ -1613,6 +1724,15 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" +[[package]] +name = "winnow" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "176b6138793677221d420fd2f0aeeced263f197688b36484660da767bca2fa32" +dependencies = [ + "memchr", +] + [[package]] name = "yaml-rust" version = "0.4.5" diff --git a/Cargo.toml b/Cargo.toml index 53bc2da4..05a2acb7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,11 +53,12 @@ content_inspector = "0.2.4" shell-words = { version = "1.1.0", optional = true } unicode-width = "0.1.11" globset = "0.4" -serde = { version = "1.0", features = ["derive"] } +serde = "1.0" +serde_derive = "1.0" serde_yaml = "0.9.28" semver = "1.0" path_abs = { version = "0.5", default-features = false } -clircle = "0.4" +clircle = "0.5" bugreport = { version = "0.5.0", optional = true } etcetera = { version = "0.8.0", optional = true } grep-cli = { version = "0.1.10", optional = true } @@ -74,7 +75,7 @@ optional = true default-features = false [dependencies.syntect] -version = "5.1.0" +version = "5.2.0" default-features = false features = ["parsing"] @@ -94,12 +95,22 @@ serial_test = { version = "2.0.0", default-features = false } predicates = "3.0.4" wait-timeout = "0.2.0" tempfile = "3.8.1" +serde = { version = "1.0", features = ["derive"] } [target.'cfg(unix)'.dev-dependencies] nix = { version = "0.26.4", default-features = false, features = ["term"] } [build-dependencies] anyhow = "1.0.78" +indexmap = { version = "2.2.2", features = ["serde"] } +itertools = "0.11.0" +once_cell = "1.18" +regex = "1.10.2" +serde = "1.0" +serde_derive = "1.0" +serde_with = { version = "3.6.1", default-features = false, features = ["macros"] } +toml = { version = "0.8.9", features = ["preserve_order"] } +walkdir = "2.4" [build-dependencies.clap] version = "4.4.12" diff --git a/README.md b/README.md index 352ae64d..57baf2b0 100644 --- a/README.md +++ b/README.md @@ -602,7 +602,8 @@ set, `less` is used by default. If you want to use a different pager, you can ei `PAGER` variable or set the `BAT_PAGER` environment variable to override what is specified in `PAGER`. -**Note**: If `PAGER` is `more` or `most`, `bat` will silently use `less` instead to ensure support for colors. +>[!NOTE] +> If `PAGER` is `more` or `most`, `bat` will silently use `less` instead to ensure support for colors. If you want to pass command-line arguments to the pager, you can also set them via the `PAGER`/`BAT_PAGER` variables: @@ -613,20 +614,37 @@ export BAT_PAGER="less -RF" Instead of using environment variables, you can also use `bat`s [configuration file](https://github.com/sharkdp/bat#configuration-file) to configure the pager (`--pager` option). -**Note**: By default, if the pager is set to `less` (and no command-line options are specified), -`bat` will pass the following command line options to the pager: `-R`/`--RAW-CONTROL-CHARS`, -`-F`/`--quit-if-one-screen` and `-X`/`--no-init`. The last option (`-X`) is only used for `less` -versions older than 530. -The `-R` option is needed to interpret ANSI colors correctly. The second option (`-F`) instructs -less to exit immediately if the output size is smaller than the vertical size of the terminal. -This is convenient for small files because you do not have to press `q` to quit the pager. The -third option (`-X`) is needed to fix a bug with the `--quit-if-one-screen` feature in old versions -of `less`. Unfortunately, it also breaks mouse-wheel support in `less`. +### Using `less` as a pager -If you want to enable mouse-wheel scrolling on older versions of `less`, you can pass just `-R` (as -in the example above, this will disable the quit-if-one-screen feature). For less 530 or newer, -it should work out of the box. +When using `less` as a pager, `bat` will automatically pass extra options along to `less` +to improve the experience. Specifically, `-R`/`--RAW-CONTROL-CHARS`, `-F`/`--quit-if-one-screen`, +and under certain conditions, `-X`/`--no-init` and/or `-S`/`--chop-long-lines`. + +>[!IMPORTANT] +> These options will not be added if: +> - The pager is not named `less`. +> - The `--pager` argument contains any command-line arguments (e.g. `--pager="less -R"`). +> - The `BAT_PAGER` environment variable contains any command-line arguments (e.g. `export BAT_PAGER="less -R"`) +> +> The `--quit-if-one-screen` option will not be added when: +> - The `--paging=always` argument is used. +> - The `BAT_PAGING` environment is set to `always`. + +The `-R` option is needed to interpret ANSI colors correctly. + +The `-F` option instructs `less` to exit immediately if the output size is smaller than +the vertical size of the terminal. This is convenient for small files because you do not +have to press `q` to quit the pager. + +The `-X` option is needed to fix a bug with the `--quit-if-one-screen` feature in versions +of `less` older than version 530. Unfortunately, it also breaks mouse-wheel support in `less`. +If you want to enable mouse-wheel scrolling on older versions of `less` and do not mind losing +the quit-if-one-screen feature, you can set the pager (via `--pager` or `BAT_PAGER`) to `less -R`. +For `less` 530 or newer, it should work out of the box. + +The `-S` option is added when `bat`'s `-S`/`--chop-long-lines` option is used. This tells `less` +to truncate any lines larger than the terminal width. ### Indentation diff --git a/assets/syntaxes/02_Extra/SublimeJQ b/assets/syntaxes/02_Extra/SublimeJQ index 68705828..b7e53e5d 160000 --- a/assets/syntaxes/02_Extra/SublimeJQ +++ b/assets/syntaxes/02_Extra/SublimeJQ @@ -1 +1 @@ -Subproject commit 687058289c1a888e0895378432d66b41609a84d8 +Subproject commit b7e53e5d86814f04a48d2e441bcf5f9fdf07e9c1 diff --git a/assets/syntaxes/02_Extra/cmd-help b/assets/syntaxes/02_Extra/cmd-help index b150d845..209559b7 160000 --- a/assets/syntaxes/02_Extra/cmd-help +++ b/assets/syntaxes/02_Extra/cmd-help @@ -1 +1 @@ -Subproject commit b150d84534dd060afdcaf3f58977faeaf5917e56 +Subproject commit 209559b72f7e8848c988828088231b3a4d8b6838 diff --git a/assets/themes/zenburn b/assets/themes/zenburn index e627f1cb..86d4ee7a 160000 --- a/assets/themes/zenburn +++ b/assets/themes/zenburn @@ -1 +1 @@ -Subproject commit e627f1cb223c1171ab0a6a48d166c87aeae2a1d5 +Subproject commit 86d4ee7a1f884851a1d21d66249687f527fced32 diff --git a/build/main.rs b/build/main.rs index 416d90d5..8966ee52 100644 --- a/build/main.rs +++ b/build/main.rs @@ -1,5 +1,6 @@ #[cfg(feature = "application")] mod application; +mod syntax_mapping; mod util; fn main() -> anyhow::Result<()> { @@ -7,6 +8,8 @@ fn main() -> anyhow::Result<()> { // see: https://doc.rust-lang.org/cargo/reference/build-scripts.html#rerun-if-changed println!("cargo:rerun-if-changed=build/"); + syntax_mapping::build_static_mappings()?; + #[cfg(feature = "application")] application::gen_man_and_comp()?; diff --git a/build/syntax_mapping.rs b/build/syntax_mapping.rs new file mode 100644 index 00000000..959caea8 --- /dev/null +++ b/build/syntax_mapping.rs @@ -0,0 +1,292 @@ +use std::{ + convert::Infallible, + env, fs, + path::{Path, PathBuf}, + str::FromStr, +}; + +use anyhow::{anyhow, bail}; +use indexmap::IndexMap; +use itertools::Itertools; +use once_cell::sync::Lazy; +use regex::Regex; +use serde_derive::Deserialize; +use serde_with::DeserializeFromStr; +use walkdir::WalkDir; + +/// Known mapping targets. +/// +/// Corresponds to `syntax_mapping::MappingTarget`. +#[allow(clippy::enum_variant_names)] +#[derive(Clone, Debug, Eq, PartialEq, Hash, DeserializeFromStr)] +pub enum MappingTarget { + MapTo(String), + MapToUnknown, + MapExtensionToUnknown, +} +impl FromStr for MappingTarget { + type Err = Infallible; + fn from_str(s: &str) -> Result { + match s { + "MappingTarget::MapToUnknown" => Ok(Self::MapToUnknown), + "MappingTarget::MapExtensionToUnknown" => Ok(Self::MapExtensionToUnknown), + syntax => Ok(Self::MapTo(syntax.into())), + } + } +} +impl MappingTarget { + fn codegen(&self) -> String { + match self { + Self::MapTo(syntax) => format!(r###"MappingTarget::MapTo(r#"{syntax}"#)"###), + Self::MapToUnknown => "MappingTarget::MapToUnknown".into(), + Self::MapExtensionToUnknown => "MappingTarget::MapExtensionToUnknown".into(), + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash, DeserializeFromStr)] +/// A single matcher. +/// +/// Codegen converts this into a `Lazy>`. +struct Matcher(Vec); +/// Parse a matcher. +/// +/// Note that this implementation is rather strict: it will greedily interpret +/// every valid environment variable replacement as such, then immediately +/// hard-error if it finds a '$', '{', or '}' anywhere in the remaining text +/// segments. +/// +/// The reason for this strictness is I currently cannot think of a valid reason +/// why you would ever need '$', '{', or '}' as plaintext in a glob pattern. +/// Therefore any such occurrences are likely human errors. +/// +/// If we later discover some edge cases, it's okay to make it more permissive. +impl FromStr for Matcher { + type Err = anyhow::Error; + fn from_str(s: &str) -> Result { + use MatcherSegment as Seg; + static VAR_REGEX: Lazy = Lazy::new(|| Regex::new(r"\$\{([\w\d_]+)\}").unwrap()); + + let mut segments = vec![]; + let mut text_start = 0; + for capture in VAR_REGEX.captures_iter(s) { + let match_0 = capture.get(0).unwrap(); + + // text before this var + let text_end = match_0.start(); + segments.push(Seg::Text(s[text_start..text_end].into())); + text_start = match_0.end(); + + // this var + segments.push(Seg::Env(capture.get(1).unwrap().as_str().into())); + } + // possible trailing text + segments.push(Seg::Text(s[text_start..].into())); + + // cleanup empty text segments + let non_empty_segments = segments + .into_iter() + .filter(|seg| seg.text().map(|t| !t.is_empty()).unwrap_or(true)) + .collect_vec(); + + // sanity check + if non_empty_segments + .windows(2) + .any(|segs| segs[0].is_text() && segs[1].is_text()) + { + unreachable!("Parsed into consecutive text segments: {non_empty_segments:?}"); + } + + // guard empty case + if non_empty_segments.is_empty() { + bail!(r#"Parsed an empty matcher: "{s}""#); + } + + // guard variable syntax leftover fragments + if non_empty_segments + .iter() + .filter_map(Seg::text) + .any(|t| t.contains(['$', '{', '}'])) + { + bail!(r#"Invalid matcher: "{s}""#); + } + + Ok(Self(non_empty_segments)) + } +} +impl Matcher { + fn codegen(&self) -> String { + match self.0.len() { + 0 => unreachable!("0-length matcher should never be created"), + // if-let guard would be ideal here + // see: https://github.com/rust-lang/rust/issues/51114 + 1 if self.0[0].is_text() => { + let s = self.0[0].text().unwrap(); + format!(r###"Lazy::new(|| Some(build_matcher_fixed(r#"{s}"#)))"###) + } + // parser logic ensures that this case can only happen when there are dynamic segments + _ => { + let segs = self.0.iter().map(MatcherSegment::codegen).join(", "); + format!(r###"Lazy::new(|| build_matcher_dynamic(&[{segs}]))"###) + } + } + } +} + +/// A segment in a matcher. +/// +/// Corresponds to `syntax_mapping::MatcherSegment`. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +enum MatcherSegment { + Text(String), + Env(String), +} +#[allow(dead_code)] +impl MatcherSegment { + fn is_text(&self) -> bool { + matches!(self, Self::Text(_)) + } + fn is_env(&self) -> bool { + matches!(self, Self::Env(_)) + } + fn text(&self) -> Option<&str> { + match self { + Self::Text(t) => Some(t), + Self::Env(_) => None, + } + } + fn env(&self) -> Option<&str> { + match self { + Self::Text(_) => None, + Self::Env(t) => Some(t), + } + } + fn codegen(&self) -> String { + match self { + Self::Text(s) => format!(r###"MatcherSegment::Text(r#"{s}"#)"###), + Self::Env(s) => format!(r###"MatcherSegment::Env(r#"{s}"#)"###), + } + } +} + +/// A struct that models a single .toml file in /src/syntax_mapping/builtins/. +#[derive(Clone, Debug, Deserialize)] +struct MappingDefModel { + mappings: IndexMap>, +} +impl MappingDefModel { + fn into_mapping_list(self) -> MappingList { + let list = self + .mappings + .into_iter() + .flat_map(|(target, matchers)| { + matchers + .into_iter() + .map(|matcher| (matcher, target.clone())) + .collect::>() + }) + .collect(); + MappingList(list) + } +} + +#[derive(Clone, Debug)] +struct MappingList(Vec<(Matcher, MappingTarget)>); +impl MappingList { + fn codegen(&self) -> String { + let array_items: Vec<_> = self + .0 + .iter() + .map(|(matcher, target)| { + format!("({m}, {t})", m = matcher.codegen(), t = target.codegen()) + }) + .collect(); + let len = array_items.len(); + + format!( + "/// Generated by build script from /src/syntax_mapping/builtins/.\n\ + pub(crate) static BUILTIN_MAPPINGS: [(Lazy>, MappingTarget); {len}] = [\n{items}\n];", + items = array_items.join(",\n") + ) + } +} + +/// Get the list of paths to all mapping definition files that should be +/// included for the current target platform. +fn get_def_paths() -> anyhow::Result> { + let source_subdirs = [ + "common", + #[cfg(target_family = "unix")] + "unix-family", + #[cfg(any( + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + target_os = "macos" + ))] + "bsd-family", + #[cfg(target_os = "linux")] + "linux", + #[cfg(target_os = "macos")] + "macos", + #[cfg(target_os = "windows")] + "windows", + ]; + + let mut toml_paths = vec![]; + for subdir in source_subdirs { + let wd = WalkDir::new(Path::new("src/syntax_mapping/builtins").join(subdir)); + let paths = wd + .into_iter() + .filter_map_ok(|entry| { + let path = entry.path(); + (path.is_file() && path.extension().map(|ext| ext == "toml").unwrap_or(false)) + .then(|| path.to_owned()) + }) + .collect::, _>>()?; + toml_paths.extend(paths); + } + + toml_paths.sort_by_key(|path| { + path.file_name() + .expect("file name should not terminate in ..") + .to_owned() + }); + + Ok(toml_paths) +} + +fn read_all_mappings() -> anyhow::Result { + let mut all_mappings = vec![]; + + for path in get_def_paths()? { + let toml_string = fs::read_to_string(path)?; + let mappings = toml::from_str::(&toml_string)?.into_mapping_list(); + all_mappings.extend(mappings.0); + } + + let duplicates = all_mappings + .iter() + .duplicates_by(|(matcher, _)| matcher) + .collect_vec(); + if !duplicates.is_empty() { + bail!("Rules with duplicate matchers found: {duplicates:?}"); + } + + Ok(MappingList(all_mappings)) +} + +/// Build the static syntax mappings defined in /src/syntax_mapping/builtins/ +/// into a .rs source file, which is to be inserted with `include!`. +pub fn build_static_mappings() -> anyhow::Result<()> { + println!("cargo:rerun-if-changed=src/syntax_mapping/builtins/"); + + let mappings = read_all_mappings()?; + + let codegen_path = Path::new(&env::var_os("OUT_DIR").ok_or(anyhow!("OUT_DIR is unset"))?) + .join("codegen_static_syntax_mappings.rs"); + + fs::write(codegen_path, mappings.codegen())?; + + Ok(()) +} diff --git a/src/assets.rs b/src/assets.rs index 7ce1a8f6..0129f76b 100644 --- a/src/assets.rs +++ b/src/assets.rs @@ -441,7 +441,7 @@ mod tests { fn new() -> Self { SyntaxDetectionTest { assets: HighlightingAssets::from_binary(), - syntax_mapping: SyntaxMapping::builtin(), + syntax_mapping: SyntaxMapping::new(), temp_dir: TempDir::new().expect("creation of temporary directory"), } } diff --git a/src/assets/assets_metadata.rs b/src/assets/assets_metadata.rs index 700c4c3b..cfc7a9e0 100644 --- a/src/assets/assets_metadata.rs +++ b/src/assets/assets_metadata.rs @@ -3,7 +3,7 @@ use std::path::Path; use std::time::SystemTime; use semver::Version; -use serde::{Deserialize, Serialize}; +use serde_derive::{Deserialize, Serialize}; use crate::error::*; diff --git a/src/assets/lazy_theme_set.rs b/src/assets/lazy_theme_set.rs index bf749154..fcc3eb46 100644 --- a/src/assets/lazy_theme_set.rs +++ b/src/assets/lazy_theme_set.rs @@ -3,8 +3,7 @@ use super::*; use std::collections::BTreeMap; use std::convert::TryFrom; -use serde::Deserialize; -use serde::Serialize; +use serde_derive::{Deserialize, Serialize}; use once_cell::unsync::OnceCell; diff --git a/src/bin/bat/app.rs b/src/bin/bat/app.rs index 8ec3caa5..8843d53b 100644 --- a/src/bin/bat/app.rs +++ b/src/bin/bat/app.rs @@ -121,7 +121,7 @@ impl App { _ => unreachable!("other values for --paging are not allowed"), }; - let mut syntax_mapping = SyntaxMapping::builtin(); + let mut syntax_mapping = SyntaxMapping::new(); if let Some(values) = self.matches.get_many::("ignored-suffix") { for suffix in values { @@ -130,7 +130,9 @@ impl App { } if let Some(values) = self.matches.get_many::("map-syntax") { - for from_to in values { + // later args take precedence over earlier ones, hence `.rev()` + // see: https://github.com/sharkdp/bat/pull/2755#discussion_r1456416875 + for from_to in values.rev() { let parts: Vec<_> = from_to.split(':').collect(); if parts.len() != 2 { diff --git a/src/bin/bat/main.rs b/src/bin/bat/main.rs index afc0d59b..d877bb9b 100644 --- a/src/bin/bat/main.rs +++ b/src/bin/bat/main.rs @@ -78,9 +78,11 @@ fn run_cache_subcommand( Ok(()) } -fn get_syntax_mapping_to_paths<'a>( - mappings: &[(GlobMatcher, MappingTarget<'a>)], -) -> HashMap<&'a str, Vec> { +fn get_syntax_mapping_to_paths<'r, 't, I>(mappings: I) -> HashMap<&'t str, Vec> +where + I: IntoIterator)>, + 't: 'r, // target text outlives rule +{ let mut map = HashMap::new(); for mapping in mappings { if let (matcher, MappingTarget::MapTo(s)) = mapping { @@ -123,7 +125,7 @@ pub fn get_languages(config: &Config, cache_dir: &Path) -> Result { languages.sort_by_key(|lang| lang.name.to_uppercase()); - let configured_languages = get_syntax_mapping_to_paths(config.syntax_mapping.mappings()); + let configured_languages = get_syntax_mapping_to_paths(config.syntax_mapping.all_mappings()); for lang in &mut languages { if let Some(additional_paths) = configured_languages.get(lang.name.as_str()) { diff --git a/src/controller.rs b/src/controller.rs index f378cbc6..ffc5dd5b 100644 --- a/src/controller.rs +++ b/src/controller.rs @@ -47,7 +47,7 @@ impl<'b> Controller<'b> { &self, inputs: Vec, output_buffer: Option<&mut dyn std::fmt::Write>, - handle_error: impl Fn(&Error, &mut dyn Write), + mut handle_error: impl FnMut(&Error, &mut dyn Write), ) -> Result { let mut output_type; diff --git a/src/printer.rs b/src/printer.rs index cb80edc4..f413fdc3 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -7,8 +7,6 @@ use nu_ansi_term::Style; use bytesize::ByteSize; -use console::AnsiCodeIterator; - use syntect::easy::HighlightLines; use syntect::highlighting::Color; use syntect::highlighting::Theme; @@ -33,9 +31,23 @@ use crate::line_range::RangeCheckResult; use crate::preprocessor::{expand_tabs, replace_nonprintable}; use crate::style::StyleComponent; use crate::terminal::{as_terminal_escaped, to_ansi_color}; -use crate::vscreen::AnsiStyle; +use crate::vscreen::{AnsiStyle, EscapeSequence, EscapeSequenceIterator}; use crate::wrapping::WrappingMode; +const ANSI_UNDERLINE_ENABLE: EscapeSequence = EscapeSequence::CSI { + raw_sequence: "\x1B[4m", + parameters: "4", + intermediates: "", + final_byte: "m", +}; + +const ANSI_UNDERLINE_DISABLE: EscapeSequence = EscapeSequence::CSI { + raw_sequence: "\x1B[24m", + parameters: "24", + intermediates: "", + final_byte: "m", +}; + pub enum OutputHandle<'a> { IoWrite(&'a mut dyn io::Write), FmtWrite(&'a mut dyn fmt::Write), @@ -287,6 +299,14 @@ impl<'a> InteractivePrinter<'a> { } } + fn get_header_component_indent_length(&self) -> usize { + if self.config.style_components.grid() && self.panel_width > 0 { + self.panel_width + 2 + } else { + self.panel_width + } + } + fn print_header_component_indent(&mut self, handle: &mut OutputHandle) -> Result<()> { if self.config.style_components.grid() { write!( @@ -302,6 +322,30 @@ impl<'a> InteractivePrinter<'a> { } } + fn print_header_component_with_indent( + &mut self, + handle: &mut OutputHandle, + content: &str, + ) -> Result<()> { + self.print_header_component_indent(handle)?; + writeln!(handle, "{}", content) + } + + fn print_header_multiline_component( + &mut self, + handle: &mut OutputHandle, + content: &str, + ) -> Result<()> { + let mut content = content; + let content_width = self.config.term_width - self.get_header_component_indent_length(); + while content.len() > content_width { + let (content_line, remaining) = content.split_at(content_width); + self.print_header_component_with_indent(handle, content_line)?; + content = remaining; + } + self.print_header_component_with_indent(handle, content) + } + fn preprocess(&self, text: &str, cursor: &mut usize) -> String { if self.config.tab_width > 0 { return expand_tabs(text, self.config.tab_width, cursor); @@ -377,31 +421,32 @@ impl<'a> Printer for InteractivePrinter<'a> { } } - header_components.iter().try_for_each(|component| { - self.print_header_component_indent(handle)?; - - match component { - StyleComponent::HeaderFilename => writeln!( - handle, - "{}{}{}", - description - .kind() - .map(|kind| format!("{}: ", kind)) - .unwrap_or_else(|| "".into()), - self.colors.header_value.paint(description.title()), - mode - ), - + header_components + .iter() + .try_for_each(|component| match component { + StyleComponent::HeaderFilename => { + let header_filename = format!( + "{}{}{}", + description + .kind() + .map(|kind| format!("{}: ", kind)) + .unwrap_or_else(|| "".into()), + self.colors.header_value.paint(description.title()), + mode + ); + self.print_header_multiline_component(handle, &header_filename) + } StyleComponent::HeaderFilesize => { let bsize = metadata .size .map(|s| format!("{}", ByteSize(s))) .unwrap_or_else(|| "-".into()); - writeln!(handle, "Size: {}", self.colors.header_value.paint(bsize)) + let header_filesize = + format!("Size: {}", self.colors.header_value.paint(bsize)); + self.print_header_multiline_component(handle, &header_filesize) } _ => Ok(()), - } - })?; + })?; if self.config.style_components.grid() { if self.content_type.map_or(false, |c| c.is_text()) || self.config.show_nonprintable { @@ -521,7 +566,7 @@ impl<'a> Printer for InteractivePrinter<'a> { self.config.highlighted_lines.0.check(line_number) == RangeCheckResult::InRange; if highlight_this_line && self.config.theme == "ansi" { - self.ansi_style.update("^[4m"); + self.ansi_style.update(ANSI_UNDERLINE_ENABLE); } let background_color = self @@ -548,23 +593,17 @@ impl<'a> Printer for InteractivePrinter<'a> { let italics = self.config.use_italic_text; for &(style, region) in ®ions { - let ansi_iterator = AnsiCodeIterator::new(region); + let ansi_iterator = EscapeSequenceIterator::new(region); for chunk in ansi_iterator { match chunk { - // ANSI escape passthrough. - (ansi, true) => { - self.ansi_style.update(ansi); - write!(handle, "{}", ansi)?; - } - // Regular text. - (text, false) => { - let text = &*self.preprocess(text, &mut cursor_total); + EscapeSequence::Text(text) => { + let text = self.preprocess(text, &mut cursor_total); let text_trimmed = text.trim_end_matches(|c| c == '\r' || c == '\n'); write!( handle, - "{}", + "{}{}", as_terminal_escaped( style, &format!("{}{}", self.ansi_style, text_trimmed), @@ -572,9 +611,11 @@ impl<'a> Printer for InteractivePrinter<'a> { colored_output, italics, background_color - ) + ), + self.ansi_style.to_reset_sequence(), )?; + // Pad the rest of the line. if text.len() != text_trimmed.len() { if let Some(background_color) = background_color { let ansi_style = Style { @@ -592,6 +633,12 @@ impl<'a> Printer for InteractivePrinter<'a> { write!(handle, "{}", &text[text_trimmed.len()..])?; } } + + // ANSI escape passthrough. + _ => { + write!(handle, "{}", chunk.raw())?; + self.ansi_style.update(chunk); + } } } } @@ -601,17 +648,11 @@ impl<'a> Printer for InteractivePrinter<'a> { } } else { for &(style, region) in ®ions { - let ansi_iterator = AnsiCodeIterator::new(region); + let ansi_iterator = EscapeSequenceIterator::new(region); for chunk in ansi_iterator { match chunk { - // ANSI escape passthrough. - (ansi, true) => { - self.ansi_style.update(ansi); - write!(handle, "{}", ansi)?; - } - // Regular text. - (text, false) => { + EscapeSequence::Text(text) => { let text = self.preprocess( text.trim_end_matches(|c| c == '\r' || c == '\n'), &mut cursor_total, @@ -654,7 +695,7 @@ impl<'a> Printer for InteractivePrinter<'a> { // It wraps. write!( handle, - "{}\n{}", + "{}{}\n{}", as_terminal_escaped( style, &format!("{}{}", self.ansi_style, line_buf), @@ -663,6 +704,7 @@ impl<'a> Printer for InteractivePrinter<'a> { self.config.use_italic_text, background_color ), + self.ansi_style.to_reset_sequence(), panel_wrap.clone().unwrap() )?; @@ -691,6 +733,12 @@ impl<'a> Printer for InteractivePrinter<'a> { ) )?; } + + // ANSI escape passthrough. + _ => { + write!(handle, "{}", chunk.raw())?; + self.ansi_style.update(chunk); + } } } } @@ -711,8 +759,8 @@ impl<'a> Printer for InteractivePrinter<'a> { } if highlight_this_line && self.config.theme == "ansi" { - self.ansi_style.update("^[24m"); - write!(handle, "\x1B[24m")?; + write!(handle, "{}", ANSI_UNDERLINE_DISABLE.raw())?; + self.ansi_style.update(ANSI_UNDERLINE_DISABLE); } Ok(()) diff --git a/src/syntax_mapping.rs b/src/syntax_mapping.rs index c8c361ab..0dac0c02 100644 --- a/src/syntax_mapping.rs +++ b/src/syntax_mapping.rs @@ -1,12 +1,23 @@ use std::path::Path; -use crate::error::Result; -use ignored_suffixes::IgnoredSuffixes; - use globset::{Candidate, GlobBuilder, GlobMatcher}; +use crate::error::Result; +use builtin::BUILTIN_MAPPINGS; +use ignored_suffixes::IgnoredSuffixes; + +mod builtin; pub mod ignored_suffixes; +fn make_glob_matcher(from: &str) -> Result { + let matcher = GlobBuilder::new(from) + .case_insensitive(true) + .literal_separator(true) + .build()? + .compile_matcher(); + Ok(matcher) +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[non_exhaustive] pub enum MappingTarget<'a> { @@ -29,204 +40,72 @@ pub enum MappingTarget<'a> { #[derive(Debug, Clone, Default)] pub struct SyntaxMapping<'a> { - mappings: Vec<(GlobMatcher, MappingTarget<'a>)>, + /// User-defined mappings at run time. + /// + /// Rules in front have precedence. + custom_mappings: Vec<(GlobMatcher, MappingTarget<'a>)>, pub(crate) ignored_suffixes: IgnoredSuffixes<'a>, } impl<'a> SyntaxMapping<'a> { - pub fn empty() -> SyntaxMapping<'a> { + pub fn new() -> SyntaxMapping<'a> { Default::default() } - pub fn builtin() -> SyntaxMapping<'a> { - let mut mapping = Self::empty(); - mapping.insert("*.h", MappingTarget::MapTo("C++")).unwrap(); - mapping - .insert(".clang-format", MappingTarget::MapTo("YAML")) - .unwrap(); - mapping.insert("*.fs", MappingTarget::MapTo("F#")).unwrap(); - mapping - .insert("build", MappingTarget::MapToUnknown) - .unwrap(); - mapping - .insert("**/.ssh/config", MappingTarget::MapTo("SSH Config")) - .unwrap(); - mapping - .insert( - "**/bat/config", - MappingTarget::MapTo("Bourne Again Shell (bash)"), - ) - .unwrap(); - mapping - .insert( - "/etc/profile", - MappingTarget::MapTo("Bourne Again Shell (bash)"), - ) - .unwrap(); - mapping - .insert( - "os-release", - MappingTarget::MapTo("Bourne Again Shell (bash)"), - ) - .unwrap(); - mapping - .insert("*.pac", MappingTarget::MapTo("JavaScript (Babel)")) - .unwrap(); - mapping - .insert("fish_history", MappingTarget::MapTo("YAML")) - .unwrap(); - - for glob in ["*.jsonl", "*.sarif"] { - mapping.insert(glob, MappingTarget::MapTo("JSON")).unwrap(); - } - - // See #2151, https://nmap.org/book/nse-language.html - mapping - .insert("*.nse", MappingTarget::MapTo("Lua")) - .unwrap(); - - // See #1008 - mapping - .insert("rails", MappingTarget::MapToUnknown) - .unwrap(); - - mapping - .insert("Containerfile", MappingTarget::MapTo("Dockerfile")) - .unwrap(); - - mapping - .insert("*.ksh", MappingTarget::MapTo("Bourne Again Shell (bash)")) - .unwrap(); - - // Nginx and Apache syntax files both want to style all ".conf" files - // see #1131 and #1137 - mapping - .insert("*.conf", MappingTarget::MapExtensionToUnknown) - .unwrap(); - - for glob in &[ - "/etc/nginx/**/*.conf", - "/etc/nginx/sites-*/**/*", - "nginx.conf", - "mime.types", - ] { - mapping.insert(glob, MappingTarget::MapTo("nginx")).unwrap(); - } - - for glob in &[ - "/etc/apache2/**/*.conf", - "/etc/apache2/sites-*/**/*", - "httpd.conf", - ] { - mapping - .insert(glob, MappingTarget::MapTo("Apache Conf")) - .unwrap(); - } - - for glob in &[ - "**/systemd/**/*.conf", - "**/systemd/**/*.example", - "*.automount", - "*.device", - "*.dnssd", - "*.link", - "*.mount", - "*.netdev", - "*.network", - "*.nspawn", - "*.path", - "*.service", - "*.scope", - "*.slice", - "*.socket", - "*.swap", - "*.target", - "*.timer", - ] { - mapping.insert(glob, MappingTarget::MapTo("INI")).unwrap(); - } - - // unix mail spool - for glob in &["/var/spool/mail/*", "/var/mail/*"] { - mapping.insert(glob, MappingTarget::MapTo("Email")).unwrap() - } - - // pacman hooks - mapping - .insert("*.hook", MappingTarget::MapTo("INI")) - .unwrap(); - - mapping - .insert("*.ron", MappingTarget::MapTo("Rust")) - .unwrap(); - - // Global git config files rooted in `$XDG_CONFIG_HOME/git/` or `$HOME/.config/git/` - // See e.g. https://git-scm.com/docs/git-config#FILES - match ( - std::env::var_os("XDG_CONFIG_HOME").filter(|val| !val.is_empty()), - std::env::var_os("HOME") - .filter(|val| !val.is_empty()) - .map(|home| Path::new(&home).join(".config")), - ) { - (Some(xdg_config_home), Some(default_config_home)) - if xdg_config_home == default_config_home => { - insert_git_config_global(&mut mapping, &xdg_config_home) - } - (Some(xdg_config_home), Some(default_config_home)) /* else guard */ => { - insert_git_config_global(&mut mapping, &xdg_config_home); - insert_git_config_global(&mut mapping, &default_config_home) - } - (Some(config_home), None) => insert_git_config_global(&mut mapping, &config_home), - (None, Some(config_home)) => insert_git_config_global(&mut mapping, &config_home), - (None, None) => (), - }; - - fn insert_git_config_global(mapping: &mut SyntaxMapping, config_home: impl AsRef) { - let git_config_path = config_home.as_ref().join("git"); - - mapping - .insert( - &git_config_path.join("config").to_string_lossy(), - MappingTarget::MapTo("Git Config"), - ) - .ok(); - - mapping - .insert( - &git_config_path.join("ignore").to_string_lossy(), - MappingTarget::MapTo("Git Ignore"), - ) - .ok(); - - mapping - .insert( - &git_config_path.join("attributes").to_string_lossy(), - MappingTarget::MapTo("Git Attributes"), - ) - .ok(); - } - - mapping - } - pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> { - let glob = GlobBuilder::new(from) - .case_insensitive(true) - .literal_separator(true) - .build()?; - self.mappings.push((glob.compile_matcher(), to)); + let matcher = make_glob_matcher(from)?; + self.custom_mappings.push((matcher, to)); Ok(()) } - pub fn mappings(&self) -> &[(GlobMatcher, MappingTarget<'a>)] { - &self.mappings + /// Returns an iterator over all mappings. User-defined mappings are listed + /// before builtin mappings; mappings in front have higher precedence. + /// + /// Builtin mappings' `GlobMatcher`s are lazily compiled. + /// + /// Note that this function only returns mappings that are valid under the + /// current environment. For details see [`Self::builtin_mappings`]. + pub fn all_mappings(&self) -> impl Iterator)> { + self.custom_mappings() + .iter() + .map(|(matcher, target)| (matcher, target)) // as_ref + .chain( + // we need a map with a closure to "do" the lifetime variance + // see: https://discord.com/channels/273534239310479360/1120124565591425034/1170543402870382653 + // also, clippy false positive: + // see: https://github.com/rust-lang/rust-clippy/issues/9280 + #[allow(clippy::map_identity)] + self.builtin_mappings().map(|rule| rule), + ) } - pub(crate) fn get_syntax_for(&self, path: impl AsRef) -> Option> { + /// Returns an iterator over all valid builtin mappings. Mappings in front + /// have higher precedence. + /// + /// The `GlabMatcher`s are lazily compiled. + /// + /// Mappings that are invalid under the current environment (i.e. rule + /// requires environment variable(s) that is unset, or the joined string + /// after variable(s) replacement is not a valid glob expression) are + /// ignored. + pub fn builtin_mappings( + &self, + ) -> impl Iterator)> { + BUILTIN_MAPPINGS + .iter() + .filter_map(|(matcher, target)| matcher.as_ref().map(|glob| (glob, target))) + } + + /// Returns all user-defined mappings. + pub fn custom_mappings(&self) -> &[(GlobMatcher, MappingTarget<'a>)] { + &self.custom_mappings + } + + pub fn get_syntax_for(&self, path: impl AsRef) -> Option> { // Try matching on the file name as-is. let candidate = Candidate::new(&path); let candidate_filename = path.as_ref().file_name().map(Candidate::new); - for (ref glob, ref syntax) in self.mappings.iter().rev() { + for (glob, syntax) in self.all_mappings() { if glob.is_match_candidate(&candidate) || candidate_filename .as_ref() @@ -252,9 +131,46 @@ impl<'a> SyntaxMapping<'a> { #[cfg(test)] mod tests { use super::*; + #[test] - fn basic() { - let mut map = SyntaxMapping::empty(); + fn builtin_mappings_work() { + let map = SyntaxMapping::new(); + + assert_eq!( + map.get_syntax_for("/path/to/build"), + Some(MappingTarget::MapToUnknown) + ); + } + + #[test] + fn all_fixed_builtin_mappings_can_compile() { + let map = SyntaxMapping::new(); + + // collect call evaluates all lazy closures + // fixed builtin mappings will panic if they fail to compile + let _mappings = map.builtin_mappings().collect::>(); + } + + #[test] + fn builtin_mappings_matcher_only_compile_once() { + let map = SyntaxMapping::new(); + + let two_iterations: Vec<_> = (0..2) + .map(|_| { + // addresses of every matcher + map.builtin_mappings() + .map(|(matcher, _)| matcher as *const _ as usize) + .collect::>() + }) + .collect(); + + // if the matchers are only compiled once, their address should remain the same + assert_eq!(two_iterations[0], two_iterations[1]); + } + + #[test] + fn custom_mappings_work() { + let mut map = SyntaxMapping::new(); map.insert("/path/to/Cargo.lock", MappingTarget::MapTo("TOML")) .ok(); map.insert("/path/to/.ignore", MappingTarget::MapTo("Git Ignore")) @@ -273,52 +189,32 @@ mod tests { } #[test] - fn user_can_override_builtin_mappings() { - let mut map = SyntaxMapping::builtin(); + fn custom_mappings_override_builtin() { + let mut map = SyntaxMapping::new(); assert_eq!( - map.get_syntax_for("/etc/profile"), - Some(MappingTarget::MapTo("Bourne Again Shell (bash)")) + map.get_syntax_for("/path/to/httpd.conf"), + Some(MappingTarget::MapTo("Apache Conf")) ); - map.insert("/etc/profile", MappingTarget::MapTo("My Syntax")) + map.insert("httpd.conf", MappingTarget::MapTo("My Syntax")) .ok(); assert_eq!( - map.get_syntax_for("/etc/profile"), + map.get_syntax_for("/path/to/httpd.conf"), Some(MappingTarget::MapTo("My Syntax")) ); } #[test] - fn builtin_mappings() { - let map = SyntaxMapping::builtin(); + fn custom_mappings_precedence() { + let mut map = SyntaxMapping::new(); + map.insert("/path/to/foo", MappingTarget::MapTo("alpha")) + .ok(); + map.insert("/path/to/foo", MappingTarget::MapTo("bravo")) + .ok(); assert_eq!( - map.get_syntax_for("/path/to/build"), - Some(MappingTarget::MapToUnknown) + map.get_syntax_for("/path/to/foo"), + Some(MappingTarget::MapTo("alpha")) ); } - - #[test] - /// verifies that SyntaxMapping::builtin() doesn't repeat `Glob`-based keys - fn no_duplicate_builtin_keys() { - let mappings = SyntaxMapping::builtin().mappings; - for i in 0..mappings.len() { - let tail = mappings[i + 1..].into_iter(); - let (dupl, _): (Vec<_>, Vec<_>) = - tail.partition(|item| item.0.glob() == mappings[i].0.glob()); - - // emit repeats on failure - assert_eq!( - dupl.len(), - 0, - "Glob pattern `{}` mapped to multiple: {:?}", - mappings[i].0.glob().glob(), - { - let (_, mut dupl_targets): (Vec, Vec) = - dupl.into_iter().cloned().unzip(); - dupl_targets.push(mappings[i].1) - }, - ) - } - } } diff --git a/src/syntax_mapping/builtin.rs b/src/syntax_mapping/builtin.rs new file mode 100644 index 00000000..1822be57 --- /dev/null +++ b/src/syntax_mapping/builtin.rs @@ -0,0 +1,91 @@ +use std::env; + +use globset::GlobMatcher; +use once_cell::sync::Lazy; + +use crate::syntax_mapping::{make_glob_matcher, MappingTarget}; + +// Static syntax mappings generated from /src/syntax_mapping/builtins/ by the +// build script (/build/syntax_mapping.rs). +include!(concat!( + env!("OUT_DIR"), + "/codegen_static_syntax_mappings.rs" +)); + +// The defined matcher strings are analysed at compile time and converted into +// lazily-compiled `GlobMatcher`s. This is so that the string searches are moved +// from run time to compile time, thus improving startup performance. +// +// To any future maintainer (including possibly myself) wondering why there is +// not a `BuiltinMatcher` enum that looks like this: +// +// ``` +// enum BuiltinMatcher { +// Fixed(&'static str), +// Dynamic(Lazy>), +// } +// ``` +// +// Because there was. I tried it and threw it out. +// +// Naively looking at the problem from a distance, this may seem like a good +// design (strongly typed etc. etc.). It would also save on compiled size by +// extracting out common behaviour into functions. But while actually +// implementing the lazy matcher compilation logic, I realised that it's most +// convenient for `BUILTIN_MAPPINGS` to have the following type: +// +// `[(Lazy>, MappingTarget); N]` +// +// The benefit for this is that operations like listing all builtin mappings +// would be effectively memoised. The caller would not have to compile another +// `GlobMatcher` for rules that they have previously visited. +// +// Unfortunately, this means we are going to have to store a distinct closure +// for each rule anyway, which makes a `BuiltinMatcher` enum a pointless layer +// of indirection. +// +// In the current implementation, the closure within each generated rule simply +// calls either `build_matcher_fixed` or `build_matcher_dynamic`, depending on +// whether the defined matcher contains dynamic segments or not. + +/// Compile a fixed glob string into a glob matcher. +/// +/// A failure to compile is a fatal error. +/// +/// Used internally by `Lazy>`'s lazy evaluation closure. +fn build_matcher_fixed(from: &str) -> GlobMatcher { + make_glob_matcher(from).expect("A builtin fixed glob matcher failed to compile") +} + +/// Join a list of matcher segments to create a glob string, replacing all +/// environment variables, then compile to a glob matcher. +/// +/// Returns `None` if any replacement fails, or if the joined glob string fails +/// to compile. +/// +/// Used internally by `Lazy>`'s lazy evaluation closure. +fn build_matcher_dynamic(segs: &[MatcherSegment]) -> Option { + // join segments + let mut buf = String::new(); + for seg in segs { + match seg { + MatcherSegment::Text(s) => buf.push_str(s), + MatcherSegment::Env(var) => { + let replaced = env::var(var).ok()?; + buf.push_str(&replaced); + } + } + } + // compile glob matcher + let matcher = make_glob_matcher(&buf).ok()?; + Some(matcher) +} + +/// A segment of a dynamic builtin matcher. +/// +/// Used internally by `Lazy>`'s lazy evaluation closure. +#[derive(Clone, Debug)] +enum MatcherSegment { + Text(&'static str), + Env(&'static str), +} diff --git a/src/syntax_mapping/builtins/README.md b/src/syntax_mapping/builtins/README.md new file mode 100644 index 00000000..29cf43ee --- /dev/null +++ b/src/syntax_mapping/builtins/README.md @@ -0,0 +1,116 @@ +# `/src/syntax_mapping/builtins` + +The files in this directory define path/name-based syntax mappings, which amend +and take precedence over the extension/content-based syntax mappings provided by +[syntect](https://github.com/trishume/syntect). + +## File organisation + +Each TOML file should describe the syntax mappings of a single application, or +otherwise a set of logically-related rules. + +What defines "a single application" here is deliberately vague, since the +file-splitting is purely for maintainability reasons. (Technically, we could +just as well use a single TOML file.) So just use common sense. + +TOML files should reside in the corresponding subdirectory of the platform(s) +that they intend to target. At compile time, the build script will go through +each subdirectory that is applicable to the compilation target, collect the +syntax mappings defined by all TOML files, and embed them into the binary. + +## File syntax + +Each TOML file should contain a single section named `mappings`, with each of +its keys being a language identifier (first column of `bat -L`; also referred to +as "target"). + +The value of each key should be an array of strings, with each item being a glob +matcher. We will call each of these items a "rule". + +For example, if `foo-application` uses both TOML and YAML configuration files, +we could write something like this: + +```toml +# 30-foo-application.toml +[mappings] +"TOML" = [ + # rules for TOML syntax go here + "/usr/share/foo-application/toml-config/*.conf", + "/etc/foo-application/toml-config/*.conf", +] +"YAML" = [ + # rules for YAML syntax go here + # ... +] +``` + +### Dynamic environment variable replacement + +In additional to the standard glob matcher syntax, rules also support dynamic +replacement of environment variables at runtime. This allows us to concisely +handle things like [XDG](https://specifications.freedesktop.org/basedir-spec/latest/). + +All environment variables intended to be replaced at runtime must be enclosed in +`${}`, for example `"/foo/*/${YOUR_ENV}-suffix/*.log"`. Note that this is the +**only** admissible syntax; other variable substitution syntaxes are not +supported and will either cause a compile time error, or be treated as plain +text. + +For example, if `foo-application` also supports per-user configuration files, we +could write something like this: + +```toml +# 30-foo-application.toml +[mappings] +"TOML" = [ + # rules for TOML syntax go here + "/usr/share/foo-application/toml-config/*.conf", + "/etc/foo-application/toml-config/*.conf", + "${XDG_CONFIG_HOME}/foo-application/toml-config/*.conf", + "${HOME}/.config/foo-application/toml-config/*.conf", +] +"YAML" = [ + # rules for YAML syntax go here + # ... +] +``` + +If any environment variable replacement in a rule fails (for example when a +variable is unset), or if the glob string after replacements is invalid, the +entire rule will be ignored. + +### Explicitly mapping to unknown + +Sometimes it may be necessary to "unset" a particular syntect mapping - perhaps +a syntax's matching rules are "too greedy", and is claiming files that it should +not. In this case, there are two special identifiers: +`MappingTarget::MapToUnknown` and `MappingTarget::MapExtensionToUnknown` +(corresponding to the two variants of the `syntax_mapping::MappingTarget` enum). + +An example of this would be `*.conf` files in general. So we may write something +like this: + +```toml +# 99-unset-ambiguous-extensions.toml +[mappings] +"MappingTarget::MapExtensionToUnknown" = [ + "*.conf", +] +``` + +## Ordering + +At compile time, all TOML files applicable to the target are processed in +lexicographical filename order. So `00-foo.toml` takes precedence over +`10-bar.toml`, which takes precedence over `20-baz.toml`, and so on. Note that +**only** the filenames of the TOML files are taken into account; the +subdirectories they are placed in have no influence on ordering. + +This behaviour can be occasionally useful for creating high/low priority rules, +such as in the aforementioned example of explicitly mapping `*.conf` files to +unknown. Generally this should not be much of a concern though, since rules +should be written as specifically as possible for each application. + +Rules within each TOML file are processed (and therefore matched) in the order +in which they are defined. At runtime, the syntax selection algorithm will +short-circuit and return the target of the first matching rule. diff --git a/src/syntax_mapping/builtins/bsd-family/.gitkeep b/src/syntax_mapping/builtins/bsd-family/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/src/syntax_mapping/builtins/bsd-family/50-os-release.toml b/src/syntax_mapping/builtins/bsd-family/50-os-release.toml new file mode 100644 index 00000000..91b003d7 --- /dev/null +++ b/src/syntax_mapping/builtins/bsd-family/50-os-release.toml @@ -0,0 +1,2 @@ +[mappings] +"Bourne Again Shell (bash)" = ["/etc/os-release", "/var/run/os-release"] diff --git a/src/syntax_mapping/builtins/common/.gitkeep b/src/syntax_mapping/builtins/common/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/src/syntax_mapping/builtins/common/50-apache.toml b/src/syntax_mapping/builtins/common/50-apache.toml new file mode 100644 index 00000000..0e557aff --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-apache.toml @@ -0,0 +1,2 @@ +[mappings] +"Apache Conf" = ["httpd.conf"] diff --git a/src/syntax_mapping/builtins/common/50-bat.toml b/src/syntax_mapping/builtins/common/50-bat.toml new file mode 100644 index 00000000..e70b6b09 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-bat.toml @@ -0,0 +1,2 @@ +[mappings] +"Bourne Again Shell (bash)" = ["**/bat/config"] diff --git a/src/syntax_mapping/builtins/common/50-container.toml b/src/syntax_mapping/builtins/common/50-container.toml new file mode 100644 index 00000000..ad48c29b --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-container.toml @@ -0,0 +1,2 @@ +[mappings] +"Dockerfile" = ["Containerfile"] diff --git a/src/syntax_mapping/builtins/common/50-cpp.toml b/src/syntax_mapping/builtins/common/50-cpp.toml new file mode 100644 index 00000000..99d8a32b --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-cpp.toml @@ -0,0 +1,6 @@ +[mappings] +"C++" = [ + # probably better than the default Objective C mapping #877 + "*.h", +] +"YAML" = [".clang-format"] diff --git a/src/syntax_mapping/builtins/common/50-f-sharp.toml b/src/syntax_mapping/builtins/common/50-f-sharp.toml new file mode 100644 index 00000000..a39e7ebd --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-f-sharp.toml @@ -0,0 +1,2 @@ +[mappings] +"F#" = ["*.fs"] diff --git a/src/syntax_mapping/builtins/common/50-git.toml b/src/syntax_mapping/builtins/common/50-git.toml new file mode 100644 index 00000000..44a49a25 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-git.toml @@ -0,0 +1,10 @@ +# Global git config files rooted in `$XDG_CONFIG_HOME/git/` or `$HOME/.config/git/` +# See e.g. https://git-scm.com/docs/git-config#FILES + +[mappings] +"Git Config" = ["${XDG_CONFIG_HOME}/git/config", "${HOME}/.config/git/config"] +"Git Ignore" = ["${XDG_CONFIG_HOME}/git/ignore", "${HOME}/.config/git/ignore"] +"Git Attributes" = [ + "${XDG_CONFIG_HOME}/git/attributes", + "${HOME}/.config/git/attributes", +] diff --git a/src/syntax_mapping/builtins/common/50-jsonl.toml b/src/syntax_mapping/builtins/common/50-jsonl.toml new file mode 100644 index 00000000..4b70a4d0 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-jsonl.toml @@ -0,0 +1,3 @@ +# JSON Lines is a simple variation of JSON #2535 +[mappings] +"JSON" = ["*.jsonl"] diff --git a/src/syntax_mapping/builtins/common/50-nginx.toml b/src/syntax_mapping/builtins/common/50-nginx.toml new file mode 100644 index 00000000..305418bb --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-nginx.toml @@ -0,0 +1,2 @@ +[mappings] +"nginx" = ["nginx.conf", "mime.types"] diff --git a/src/syntax_mapping/builtins/common/50-nmap.toml b/src/syntax_mapping/builtins/common/50-nmap.toml new file mode 100644 index 00000000..f79a5e97 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-nmap.toml @@ -0,0 +1,3 @@ +[mappings] +# See #2151, https://nmap.org/book/nse-language.html +"Lua" = ["*.nse"] diff --git a/src/syntax_mapping/builtins/common/50-proxy-auto-config.toml b/src/syntax_mapping/builtins/common/50-proxy-auto-config.toml new file mode 100644 index 00000000..70e51c92 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-proxy-auto-config.toml @@ -0,0 +1,3 @@ +# 1515 +[mappings] +"JavaScript (Babel)" = ["*.pac"] diff --git a/src/syntax_mapping/builtins/common/50-ron.toml b/src/syntax_mapping/builtins/common/50-ron.toml new file mode 100644 index 00000000..bc04221b --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-ron.toml @@ -0,0 +1,3 @@ +# Rusty Object Notation #2427 +[mappings] +"Rust" = ["*.ron"] diff --git a/src/syntax_mapping/builtins/common/50-sarif.toml b/src/syntax_mapping/builtins/common/50-sarif.toml new file mode 100644 index 00000000..2542b9cd --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-sarif.toml @@ -0,0 +1,3 @@ +# SARIF is a format for reporting static analysis results #2695 +[mappings] +"JSON" = ["*.sarif"] diff --git a/src/syntax_mapping/builtins/common/50-ssh.toml b/src/syntax_mapping/builtins/common/50-ssh.toml new file mode 100644 index 00000000..6ec24050 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-ssh.toml @@ -0,0 +1,2 @@ +[mappings] +"SSH Config" = ["**/.ssh/config"] diff --git a/src/syntax_mapping/builtins/common/99-unset-ambiguous-extensions.toml b/src/syntax_mapping/builtins/common/99-unset-ambiguous-extensions.toml new file mode 100644 index 00000000..d87537d7 --- /dev/null +++ b/src/syntax_mapping/builtins/common/99-unset-ambiguous-extensions.toml @@ -0,0 +1,5 @@ +[mappings] +"MappingTarget::MapExtensionToUnknown" = [ + # common extension used for all kinds of formats + "*.conf", +] diff --git a/src/syntax_mapping/builtins/common/99-unset-ambiguous-filenames.toml b/src/syntax_mapping/builtins/common/99-unset-ambiguous-filenames.toml new file mode 100644 index 00000000..21941ebc --- /dev/null +++ b/src/syntax_mapping/builtins/common/99-unset-ambiguous-filenames.toml @@ -0,0 +1,7 @@ +[mappings] +"MappingTarget::MapToUnknown" = [ + # "NAnt Build File" should only match *.build files, not files named "build" + "build", + # "bin/rails" scripts in a Ruby project misidentified as HTML (Rails) #1008 + "rails", +] diff --git a/src/syntax_mapping/builtins/linux/.gitkeep b/src/syntax_mapping/builtins/linux/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/src/syntax_mapping/builtins/linux/50-os-release.toml b/src/syntax_mapping/builtins/linux/50-os-release.toml new file mode 100644 index 00000000..791599aa --- /dev/null +++ b/src/syntax_mapping/builtins/linux/50-os-release.toml @@ -0,0 +1,7 @@ +[mappings] +"Bourne Again Shell (bash)" = [ + "/etc/os-release", + "/usr/lib/os-release", + "/etc/initrd-release", + "/usr/lib/extension-release.d/extension-release.*", +] diff --git a/src/syntax_mapping/builtins/linux/50-pacman.toml b/src/syntax_mapping/builtins/linux/50-pacman.toml new file mode 100644 index 00000000..655118c5 --- /dev/null +++ b/src/syntax_mapping/builtins/linux/50-pacman.toml @@ -0,0 +1,3 @@ +[mappings] +# pacman hooks +"INI" = ["/usr/share/libalpm/hooks/*.hook", "/etc/pacman.d/hooks/*.hook"] diff --git a/src/syntax_mapping/builtins/linux/50-systemd.toml b/src/syntax_mapping/builtins/linux/50-systemd.toml new file mode 100644 index 00000000..6f91b0be --- /dev/null +++ b/src/syntax_mapping/builtins/linux/50-systemd.toml @@ -0,0 +1,21 @@ +[mappings] +"INI" = [ + "**/systemd/**/*.conf", + "**/systemd/**/*.example", + "*.automount", + "*.device", + "*.dnssd", + "*.link", + "*.mount", + "*.netdev", + "*.network", + "*.nspawn", + "*.path", + "*.service", + "*.scope", + "*.slice", + "*.socket", + "*.swap", + "*.target", + "*.timer", +] diff --git a/src/syntax_mapping/builtins/macos/.gitkeep b/src/syntax_mapping/builtins/macos/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/src/syntax_mapping/builtins/unix-family/.gitkeep b/src/syntax_mapping/builtins/unix-family/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/src/syntax_mapping/builtins/unix-family/50-apache.toml b/src/syntax_mapping/builtins/unix-family/50-apache.toml new file mode 100644 index 00000000..dfb920f3 --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/50-apache.toml @@ -0,0 +1,2 @@ +[mappings] +"Apache Conf" = ["/etc/apache2/**/*.conf", "/etc/apache2/sites-*/**/*"] diff --git a/src/syntax_mapping/builtins/unix-family/50-fish-shell.toml b/src/syntax_mapping/builtins/unix-family/50-fish-shell.toml new file mode 100644 index 00000000..f2a9e224 --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/50-fish-shell.toml @@ -0,0 +1,2 @@ +[mappings] +"YAML" = ["fish_history"] diff --git a/src/syntax_mapping/builtins/unix-family/50-korn-shell.toml b/src/syntax_mapping/builtins/unix-family/50-korn-shell.toml new file mode 100644 index 00000000..6c788d1d --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/50-korn-shell.toml @@ -0,0 +1,3 @@ +# KornShell is backward-compatible with the Bourne shell #2633 +[mappings] +"Bourne Again Shell (bash)" = ["*.ksh"] diff --git a/src/syntax_mapping/builtins/unix-family/50-mail-spool.toml b/src/syntax_mapping/builtins/unix-family/50-mail-spool.toml new file mode 100644 index 00000000..c798358c --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/50-mail-spool.toml @@ -0,0 +1,2 @@ +[mappings] +"Email" = ["/var/spool/mail/*", "/var/mail/*"] diff --git a/src/syntax_mapping/builtins/unix-family/50-nginx.toml b/src/syntax_mapping/builtins/unix-family/50-nginx.toml new file mode 100644 index 00000000..580b65d8 --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/50-nginx.toml @@ -0,0 +1,2 @@ +[mappings] +"nginx" = ["/etc/nginx/**/*.conf", "/etc/nginx/sites-*/**/*"] diff --git a/src/syntax_mapping/builtins/unix-family/50-shell.toml b/src/syntax_mapping/builtins/unix-family/50-shell.toml new file mode 100644 index 00000000..d015ca81 --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/50-shell.toml @@ -0,0 +1,5 @@ +[mappings] +"Bourne Again Shell (bash)" = [ + # used by lots of shells + "/etc/profile", +] diff --git a/src/syntax_mapping/builtins/windows/.gitkeep b/src/syntax_mapping/builtins/windows/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/src/vscreen.rs b/src/vscreen.rs index ea5d4da6..c902d42b 100644 --- a/src/vscreen.rs +++ b/src/vscreen.rs @@ -1,4 +1,8 @@ -use std::fmt::{Display, Formatter}; +use std::{ + fmt::{Display, Formatter}, + iter::Peekable, + str::CharIndices, +}; // Wrapper to avoid unnecessary branching when input doesn't have ANSI escape sequences. pub struct AnsiStyle { @@ -10,7 +14,7 @@ impl AnsiStyle { AnsiStyle { attributes: None } } - pub fn update(&mut self, sequence: &str) -> bool { + pub fn update(&mut self, sequence: EscapeSequence) -> bool { match &mut self.attributes { Some(a) => a.update(sequence), None => { @@ -19,6 +23,13 @@ impl AnsiStyle { } } } + + pub fn to_reset_sequence(&mut self) -> String { + match &mut self.attributes { + Some(a) => a.to_reset_sequence(), + None => String::new(), + } + } } impl Display for AnsiStyle { @@ -31,6 +42,8 @@ impl Display for AnsiStyle { } struct Attributes { + has_sgr_sequences: bool, + foreground: String, background: String, underlined: String, @@ -61,11 +74,20 @@ struct Attributes { /// ON: ^[9m /// OFF: ^[29m strike: String, + + /// The hyperlink sequence. + /// FORMAT: \x1B]8;{ID};{URL}\e\\ + /// + /// `\e\\` may be replaced with BEL `\x07`. + /// Setting both {ID} and {URL} to an empty string represents no hyperlink. + hyperlink: String, } impl Attributes { pub fn new() -> Self { Attributes { + has_sgr_sequences: false, + foreground: "".to_owned(), background: "".to_owned(), underlined: "".to_owned(), @@ -76,34 +98,56 @@ impl Attributes { underline: "".to_owned(), italic: "".to_owned(), strike: "".to_owned(), + hyperlink: "".to_owned(), } } /// Update the attributes with an escape sequence. /// Returns `false` if the sequence is unsupported. - pub fn update(&mut self, sequence: &str) -> bool { - let mut chars = sequence.char_indices().skip(1); - - if let Some((_, t)) = chars.next() { - match t { - '(' => self.update_with_charset('(', chars.map(|(_, c)| c)), - ')' => self.update_with_charset(')', chars.map(|(_, c)| c)), - '[' => { - if let Some((i, last)) = chars.last() { - // SAFETY: Always starts with ^[ and ends with m. - self.update_with_csi(last, &sequence[2..i]) - } else { - false + pub fn update(&mut self, sequence: EscapeSequence) -> bool { + use EscapeSequence::*; + match sequence { + Text(_) => return false, + Unknown(_) => { /* defer to update_with_unsupported */ } + OSC { + raw_sequence, + command, + .. + } => { + if command.starts_with("8;") { + return self.update_with_hyperlink(raw_sequence); + } + /* defer to update_with_unsupported */ + } + CSI { + final_byte, + parameters, + .. + } => { + match final_byte { + "m" => return self.update_with_sgr(parameters), + _ => { + // NOTE(eth-p): We might want to ignore these, since they involve cursor or buffer manipulation. + /* defer to update_with_unsupported */ } } - _ => self.update_with_unsupported(sequence), } - } else { - false + NF { nf_sequence, .. } => { + let mut iter = nf_sequence.chars(); + match iter.next() { + Some('(') => return self.update_with_charset('(', iter), + Some(')') => return self.update_with_charset(')', iter), + _ => { /* defer to update_with_unsupported */ } + } + } } + + self.update_with_unsupported(sequence.raw()) } fn sgr_reset(&mut self) { + self.has_sgr_sequences = false; + self.foreground.clear(); self.background.clear(); self.underlined.clear(); @@ -121,6 +165,7 @@ impl Attributes { .map(|p| p.parse::()) .map(|p| p.unwrap_or(0)); // Treat errors as 0. + self.has_sgr_sequences = true; while let Some(p) = iter.next() { match p { 0 => self.sgr_reset(), @@ -149,19 +194,23 @@ impl Attributes { true } - fn update_with_csi(&mut self, finalizer: char, sequence: &str) -> bool { - if finalizer == 'm' { - self.update_with_sgr(sequence) - } else { - false - } - } - fn update_with_unsupported(&mut self, sequence: &str) -> bool { self.unknown_buffer.push_str(sequence); false } + fn update_with_hyperlink(&mut self, sequence: &str) -> bool { + if sequence == "8;;" { + // Empty hyperlink ID and HREF -> end of hyperlink. + self.hyperlink.clear(); + } else { + self.hyperlink.clear(); + self.hyperlink.push_str(sequence); + } + + true + } + fn update_with_charset(&mut self, kind: char, set: impl Iterator) -> bool { self.charset = format!("\x1B{}{}", kind, set.take(1).collect::()); true @@ -179,13 +228,35 @@ impl Attributes { _ => format!("\x1B[{}m", color), } } + + /// Gets an ANSI escape sequence to reset all the known attributes. + pub fn to_reset_sequence(&self) -> String { + let mut buf = String::with_capacity(17); + + // TODO: Enable me in a later pull request. + // if self.has_sgr_sequences { + // buf.push_str("\x1B[m"); + // } + + if !self.hyperlink.is_empty() { + buf.push_str("\x1B]8;;\x1B\\"); // Disable hyperlink. + } + + // TODO: Enable me in a later pull request. + // if !self.charset.is_empty() { + // // https://espterm.github.io/docs/VT100%20escape%20codes.html + // buf.push_str("\x1B(B\x1B)B"); // setusg0 and setusg1 + // } + + buf + } } impl Display for Attributes { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!( f, - "{}{}{}{}{}{}{}{}{}", + "{}{}{}{}{}{}{}{}{}{}", self.foreground, self.background, self.underlined, @@ -195,6 +266,7 @@ impl Display for Attributes { self.underline, self.italic, self.strike, + self.hyperlink, ) } } @@ -210,3 +282,612 @@ fn join( .collect::>() .join(delimiter) } + +/// A range of indices for a raw ANSI escape sequence. +#[derive(Debug, PartialEq)] +enum EscapeSequenceOffsets { + Text { + start: usize, + end: usize, + }, + Unknown { + start: usize, + end: usize, + }, + NF { + // https://en.wikipedia.org/wiki/ANSI_escape_code#nF_Escape_sequences + start_sequence: usize, + start: usize, + end: usize, + }, + OSC { + // https://en.wikipedia.org/wiki/ANSI_escape_code#OSC_(Operating_System_Command)_sequences + start_sequence: usize, + start_command: usize, + start_terminator: usize, + end: usize, + }, + CSI { + // https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_(Control_Sequence_Introducer)_sequences + start_sequence: usize, + start_parameters: usize, + start_intermediates: usize, + start_final_byte: usize, + end: usize, + }, +} + +/// An iterator over the offests of ANSI/VT escape sequences within a string. +/// +/// ## Example +/// +/// ```ignore +/// let iter = EscapeSequenceOffsetsIterator::new("\x1B[33mThis is yellow text.\x1B[m"); +/// ``` +struct EscapeSequenceOffsetsIterator<'a> { + text: &'a str, + chars: Peekable>, +} + +impl<'a> EscapeSequenceOffsetsIterator<'a> { + pub fn new(text: &'a str) -> EscapeSequenceOffsetsIterator<'a> { + return EscapeSequenceOffsetsIterator { + text, + chars: text.char_indices().peekable(), + }; + } + + /// Takes values from the iterator while the predicate returns true. + /// If the predicate returns false, that value is left. + fn chars_take_while(&mut self, pred: impl Fn(char) -> bool) -> Option<(usize, usize)> { + if self.chars.peek().is_none() { + return None; + } + + let start = self.chars.peek().unwrap().0; + let mut end: usize = start; + while let Some((i, c)) = self.chars.peek() { + if !pred(*c) { + break; + } + + end = *i + c.len_utf8(); + self.chars.next(); + } + + Some((start, end)) + } + + fn next_text(&mut self) -> Option { + match self.chars_take_while(|c| c != '\x1B') { + None => None, + Some((start, end)) => Some(EscapeSequenceOffsets::Text { start, end }), + } + } + + fn next_sequence(&mut self) -> Option { + let (start_sequence, c) = self.chars.next().expect("to not be finished"); + match self.chars.peek() { + None => Some(EscapeSequenceOffsets::Unknown { + start: start_sequence, + end: start_sequence + c.len_utf8(), + }), + + Some((_, ']')) => self.next_osc(start_sequence), + Some((_, '[')) => self.next_csi(start_sequence), + Some((i, c)) => match c { + '\x20'..='\x2F' => self.next_nf(start_sequence), + c => Some(EscapeSequenceOffsets::Unknown { + start: start_sequence, + end: i + c.len_utf8(), + }), + }, + } + } + + fn next_osc(&mut self, start_sequence: usize) -> Option { + let (osc_open_index, osc_open_char) = self.chars.next().expect("to not be finished"); + debug_assert_eq!(osc_open_char, ']'); + + let mut start_terminator: usize; + let mut end_sequence: usize; + + loop { + match self.chars_take_while(|c| !matches!(c, '\x07' | '\x1B')) { + None => { + start_terminator = self.text.len(); + end_sequence = start_terminator; + break; + } + + Some((_, end)) => { + start_terminator = end; + end_sequence = end; + } + } + + match self.chars.next() { + Some((ti, '\x07')) => { + end_sequence = ti + '\x07'.len_utf8(); + break; + } + + Some((ti, '\x1B')) => { + match self.chars.next() { + Some((i, '\\')) => { + end_sequence = i + '\\'.len_utf8(); + break; + } + + None => { + end_sequence = ti + '\x1B'.len_utf8(); + break; + } + + _ => { + // Repeat, since `\\`(anything) isn't a valid ST. + } + } + } + + None => { + // Prematurely ends. + break; + } + + Some((_, tc)) => { + panic!("this should not be reached: char {:?}", tc) + } + } + } + + Some(EscapeSequenceOffsets::OSC { + start_sequence, + start_command: osc_open_index + osc_open_char.len_utf8(), + start_terminator: start_terminator, + end: end_sequence, + }) + } + + fn next_csi(&mut self, start_sequence: usize) -> Option { + let (csi_open_index, csi_open_char) = self.chars.next().expect("to not be finished"); + debug_assert_eq!(csi_open_char, '['); + + let start_parameters: usize = csi_open_index + csi_open_char.len_utf8(); + + // Keep iterating while within the range of `0x30-0x3F`. + let mut start_intermediates: usize = start_parameters; + if let Some((_, end)) = self.chars_take_while(|c| matches!(c, '\x30'..='\x3F')) { + start_intermediates = end; + } + + // Keep iterating while within the range of `0x20-0x2F`. + let mut start_final_byte: usize = start_intermediates; + if let Some((_, end)) = self.chars_take_while(|c| matches!(c, '\x20'..='\x2F')) { + start_final_byte = end; + } + + // Take the last char. + let end_of_sequence = match self.chars.next() { + None => start_final_byte, + Some((i, c)) => i + c.len_utf8(), + }; + + Some(EscapeSequenceOffsets::CSI { + start_sequence, + start_parameters, + start_intermediates, + start_final_byte, + end: end_of_sequence, + }) + } + + fn next_nf(&mut self, start_sequence: usize) -> Option { + let (nf_open_index, nf_open_char) = self.chars.next().expect("to not be finished"); + debug_assert!(matches!(nf_open_char, '\x20'..='\x2F')); + + let start: usize = nf_open_index; + let mut end: usize = start; + + // Keep iterating while within the range of `0x20-0x2F`. + match self.chars_take_while(|c| matches!(c, '\x20'..='\x2F')) { + Some((_, i)) => end = i, + None => { + return Some(EscapeSequenceOffsets::NF { + start_sequence, + start, + end, + }) + } + } + + // Get the final byte. + match self.chars.next() { + Some((i, c)) => end = i + c.len_utf8(), + None => {} + } + + Some(EscapeSequenceOffsets::NF { + start_sequence, + start, + end, + }) + } +} + +impl<'a> Iterator for EscapeSequenceOffsetsIterator<'a> { + type Item = EscapeSequenceOffsets; + fn next(&mut self) -> Option { + match self.chars.peek() { + Some((_, '\x1B')) => self.next_sequence(), + Some((_, _)) => self.next_text(), + None => None, + } + } +} + +/// An iterator over ANSI/VT escape sequences within a string. +/// +/// ## Example +/// +/// ```ignore +/// let iter = EscapeSequenceIterator::new("\x1B[33mThis is yellow text.\x1B[m"); +/// ``` +pub struct EscapeSequenceIterator<'a> { + text: &'a str, + offset_iter: EscapeSequenceOffsetsIterator<'a>, +} + +impl<'a> EscapeSequenceIterator<'a> { + pub fn new(text: &'a str) -> EscapeSequenceIterator<'a> { + return EscapeSequenceIterator { + text, + offset_iter: EscapeSequenceOffsetsIterator::new(text), + }; + } +} + +impl<'a> Iterator for EscapeSequenceIterator<'a> { + type Item = EscapeSequence<'a>; + fn next(&mut self) -> Option { + use EscapeSequenceOffsets::*; + self.offset_iter.next().map(|offsets| match offsets { + Unknown { start, end } => EscapeSequence::Unknown(&self.text[start..end]), + Text { start, end } => EscapeSequence::Text(&self.text[start..end]), + NF { + start_sequence, + start, + end, + } => EscapeSequence::NF { + raw_sequence: &self.text[start_sequence..end], + nf_sequence: &self.text[start..end], + }, + OSC { + start_sequence, + start_command, + start_terminator, + end, + } => EscapeSequence::OSC { + raw_sequence: &self.text[start_sequence..end], + command: &self.text[start_command..start_terminator], + terminator: &self.text[start_terminator..end], + }, + CSI { + start_sequence, + start_parameters, + start_intermediates, + start_final_byte, + end, + } => EscapeSequence::CSI { + raw_sequence: &self.text[start_sequence..end], + parameters: &self.text[start_parameters..start_intermediates], + intermediates: &self.text[start_intermediates..start_final_byte], + final_byte: &self.text[start_final_byte..end], + }, + }) + } +} + +/// A parsed ANSI/VT100 escape sequence. +#[derive(Debug, PartialEq)] +pub enum EscapeSequence<'a> { + Text(&'a str), + Unknown(&'a str), + NF { + raw_sequence: &'a str, + nf_sequence: &'a str, + }, + OSC { + raw_sequence: &'a str, + command: &'a str, + terminator: &'a str, + }, + CSI { + raw_sequence: &'a str, + parameters: &'a str, + intermediates: &'a str, + final_byte: &'a str, + }, +} + +impl<'a> EscapeSequence<'a> { + pub fn raw(&self) -> &'a str { + use EscapeSequence::*; + match *self { + Text(raw) => raw, + Unknown(raw) => raw, + NF { raw_sequence, .. } => raw_sequence, + OSC { raw_sequence, .. } => raw_sequence, + CSI { raw_sequence, .. } => raw_sequence, + } + } +} + +#[cfg(test)] +mod tests { + use crate::vscreen::{ + EscapeSequence, EscapeSequenceIterator, EscapeSequenceOffsets, + EscapeSequenceOffsetsIterator, + }; + + #[test] + fn test_escape_sequence_offsets_iterator_parses_text() { + let mut iter = EscapeSequenceOffsetsIterator::new("text"); + assert_eq!( + iter.next(), + Some(EscapeSequenceOffsets::Text { start: 0, end: 4 }) + ); + } + + #[test] + fn test_escape_sequence_offsets_iterator_parses_text_stops_at_esc() { + let mut iter = EscapeSequenceOffsetsIterator::new("text\x1B[ming"); + assert_eq!( + iter.next(), + Some(EscapeSequenceOffsets::Text { start: 0, end: 4 }) + ); + } + + #[test] + fn test_escape_sequence_offsets_iterator_parses_osc_with_bel() { + let mut iter = EscapeSequenceOffsetsIterator::new("\x1B]abc\x07"); + assert_eq!( + iter.next(), + Some(EscapeSequenceOffsets::OSC { + start_sequence: 0, + start_command: 2, + start_terminator: 5, + end: 6, + }) + ); + } + + #[test] + fn test_escape_sequence_offsets_iterator_parses_osc_with_st() { + let mut iter = EscapeSequenceOffsetsIterator::new("\x1B]abc\x1B\\"); + assert_eq!( + iter.next(), + Some(EscapeSequenceOffsets::OSC { + start_sequence: 0, + start_command: 2, + start_terminator: 5, + end: 7, + }) + ); + } + + #[test] + fn test_escape_sequence_offsets_iterator_parses_osc_thats_broken() { + let mut iter = EscapeSequenceOffsetsIterator::new("\x1B]ab"); + assert_eq!( + iter.next(), + Some(EscapeSequenceOffsets::OSC { + start_sequence: 0, + start_command: 2, + start_terminator: 4, + end: 4, + }) + ); + } + + #[test] + fn test_escape_sequence_offsets_iterator_parses_csi() { + let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[m"); + assert_eq!( + iter.next(), + Some(EscapeSequenceOffsets::CSI { + start_sequence: 0, + start_parameters: 2, + start_intermediates: 2, + start_final_byte: 2, + end: 3 + }) + ); + } + + #[test] + fn test_escape_sequence_offsets_iterator_parses_csi_with_parameters() { + let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[1;34m"); + assert_eq!( + iter.next(), + Some(EscapeSequenceOffsets::CSI { + start_sequence: 0, + start_parameters: 2, + start_intermediates: 6, + start_final_byte: 6, + end: 7 + }) + ); + } + + #[test] + fn test_escape_sequence_offsets_iterator_parses_csi_with_intermediates() { + let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[$m"); + assert_eq!( + iter.next(), + Some(EscapeSequenceOffsets::CSI { + start_sequence: 0, + start_parameters: 2, + start_intermediates: 2, + start_final_byte: 3, + end: 4 + }) + ); + } + + #[test] + fn test_escape_sequence_offsets_iterator_parses_csi_with_parameters_and_intermediates() { + let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[1$m"); + assert_eq!( + iter.next(), + Some(EscapeSequenceOffsets::CSI { + start_sequence: 0, + start_parameters: 2, + start_intermediates: 3, + start_final_byte: 4, + end: 5 + }) + ); + } + + #[test] + fn test_escape_sequence_offsets_iterator_parses_csi_thats_broken() { + let mut iter = EscapeSequenceOffsetsIterator::new("\x1B["); + assert_eq!( + iter.next(), + Some(EscapeSequenceOffsets::CSI { + start_sequence: 0, + start_parameters: 2, + start_intermediates: 2, + start_final_byte: 2, + end: 2 + }) + ); + + let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[1"); + assert_eq!( + iter.next(), + Some(EscapeSequenceOffsets::CSI { + start_sequence: 0, + start_parameters: 2, + start_intermediates: 3, + start_final_byte: 3, + end: 3 + }) + ); + + let mut iter = EscapeSequenceOffsetsIterator::new("\x1B[1$"); + assert_eq!( + iter.next(), + Some(EscapeSequenceOffsets::CSI { + start_sequence: 0, + start_parameters: 2, + start_intermediates: 3, + start_final_byte: 4, + end: 4 + }) + ); + } + + #[test] + fn test_escape_sequence_offsets_iterator_parses_nf() { + let mut iter = EscapeSequenceOffsetsIterator::new("\x1B($0"); + assert_eq!( + iter.next(), + Some(EscapeSequenceOffsets::NF { + start_sequence: 0, + start: 1, + end: 4 + }) + ); + } + + #[test] + fn test_escape_sequence_offsets_iterator_parses_nf_thats_broken() { + let mut iter = EscapeSequenceOffsetsIterator::new("\x1B("); + assert_eq!( + iter.next(), + Some(EscapeSequenceOffsets::NF { + start_sequence: 0, + start: 1, + end: 1 + }) + ); + } + + #[test] + fn test_escape_sequence_offsets_iterator_iterates() { + let mut iter = EscapeSequenceOffsetsIterator::new("text\x1B[33m\x1B]OSC\x07\x1B(0"); + assert_eq!( + iter.next(), + Some(EscapeSequenceOffsets::Text { start: 0, end: 4 }) + ); + assert_eq!( + iter.next(), + Some(EscapeSequenceOffsets::CSI { + start_sequence: 4, + start_parameters: 6, + start_intermediates: 8, + start_final_byte: 8, + end: 9 + }) + ); + assert_eq!( + iter.next(), + Some(EscapeSequenceOffsets::OSC { + start_sequence: 9, + start_command: 11, + start_terminator: 14, + end: 15 + }) + ); + assert_eq!( + iter.next(), + Some(EscapeSequenceOffsets::NF { + start_sequence: 15, + start: 16, + end: 18 + }) + ); + assert_eq!(iter.next(), None); + } + + #[test] + fn test_escape_sequence_iterator_iterates() { + let mut iter = EscapeSequenceIterator::new("text\x1B[33m\x1B]OSC\x07\x1B]OSC\x1B\\\x1B(0"); + assert_eq!(iter.next(), Some(EscapeSequence::Text("text"))); + assert_eq!( + iter.next(), + Some(EscapeSequence::CSI { + raw_sequence: "\x1B[33m", + parameters: "33", + intermediates: "", + final_byte: "m", + }) + ); + assert_eq!( + iter.next(), + Some(EscapeSequence::OSC { + raw_sequence: "\x1B]OSC\x07", + command: "OSC", + terminator: "\x07", + }) + ); + assert_eq!( + iter.next(), + Some(EscapeSequence::OSC { + raw_sequence: "\x1B]OSC\x1B\\", + command: "OSC", + terminator: "\x1B\\", + }) + ); + assert_eq!( + iter.next(), + Some(EscapeSequence::NF { + raw_sequence: "\x1B(0", + nf_sequence: "(0", + }) + ); + assert_eq!(iter.next(), None); + } +} diff --git a/tests/benchmarks/run-benchmarks.sh b/tests/benchmarks/run-benchmarks.sh index 0f43bc6b..2809ccbb 100755 --- a/tests/benchmarks/run-benchmarks.sh +++ b/tests/benchmarks/run-benchmarks.sh @@ -9,6 +9,13 @@ if ! command -v hyperfine > /dev/null 2>&1; then exit 1 fi +# Check that jq is installed. +if ! command -v jq > /dev/null 2>&1; then + echo "'jq' does not seem to be installed." + echo "You can get it here: https://jqlang.github.io/jq/download/" + exit 1 +fi + # Check that python3 is installed. if ! command -v python3 > /dev/null 2>&1; then echo "'python3' does not seem to be installed." @@ -95,10 +102,20 @@ hyperfine \ cat "$RESULT_DIR/startup-time.md" >> "$REPORT" +heading "Startup time without syntax highlighting" +hyperfine \ + "$(printf "%q" "$BAT") --no-config startup-time-src/small-CpuInfo-file.cpuinfo" \ + --command-name "bat … small-CpuInfo-file.cpuinfo" \ + --warmup "$WARMUP_COUNT" \ + --runs "$RUN_COUNT" \ + --export-markdown "$RESULT_DIR/startup-time-without-syntax-highlighting.md" \ + --export-json "$RESULT_DIR/startup-time-without-syntax-highlighting.json" +cat "$RESULT_DIR/startup-time-without-syntax-highlighting.md" >> "$REPORT" + heading "Startup time with syntax highlighting" hyperfine \ "$(printf "%q" "$BAT") --no-config --color=always startup-time-src/small-CpuInfo-file.cpuinfo" \ - --command-name "bat … small-CpuInfo-file.cpuinfo" \ + --command-name "bat … --color=always small-CpuInfo-file.cpuinfo" \ --warmup "$WARMUP_COUNT" \ --runs "$RUN_COUNT" \ --export-markdown "$RESULT_DIR/startup-time-with-syntax-highlighting.md" \ @@ -117,6 +134,40 @@ hyperfine \ cat "$RESULT_DIR/startup-time-with-syntax-with-dependencies.md" >> "$REPORT" +heading "Startup time with indeterminant syntax" +hyperfine \ + "$(printf "%q" "$BAT") --no-config --color=always startup-time-src/mystery-file" \ + --shell none \ + --command-name 'bat … mystery-file' \ + --warmup "$WARMUP_COUNT" \ + --runs "$RUN_COUNT" \ + --export-markdown "$RESULT_DIR/startup-time-with-indeterminant-syntax.md" \ + --export-json "$RESULT_DIR/startup-time-with-indeterminant-syntax.json" +cat "$RESULT_DIR/startup-time-with-indeterminant-syntax.md" >> "$REPORT" + +heading "Startup time with manually set syntax" +hyperfine \ + "$(printf "%q" "$BAT") --no-config --color=always --language=Dockerfile startup-time-src/mystery-file" \ + --shell none \ + --command-name 'bat … --language=Dockerfile mystery-file' \ + --warmup "$WARMUP_COUNT" \ + --runs "$RUN_COUNT" \ + --export-markdown "$RESULT_DIR/startup-time-with-manually-set-syntax.md" \ + --export-json "$RESULT_DIR/startup-time-with-manually-set-syntax.json" +cat "$RESULT_DIR/startup-time-with-manually-set-syntax.md" >> "$REPORT" + +heading "Startup time with mapped syntax" +hyperfine \ + "$(printf "%q" "$BAT") --no-config --color=always startup-time-src/Containerfile" \ + --shell none \ + --command-name 'bat … Containerfile' \ + --warmup "$WARMUP_COUNT" \ + --runs "$RUN_COUNT" \ + --export-markdown "$RESULT_DIR/startup-time-with-mapped-syntax.md" \ + --export-json "$RESULT_DIR/startup-time-with-mapped-syntax.json" +cat "$RESULT_DIR/startup-time-with-mapped-syntax.md" >> "$REPORT" + + heading "Plain-text speed" hyperfine \ "$(printf "%q" "$BAT") --no-config --language=txt --style=plain highlighting-speed-src/numpy_test_multiarray.py" \ diff --git a/tests/benchmarks/startup-time-src/Containerfile b/tests/benchmarks/startup-time-src/Containerfile new file mode 100644 index 00000000..a93ce851 --- /dev/null +++ b/tests/benchmarks/startup-time-src/Containerfile @@ -0,0 +1,3 @@ +FROM docker.io/alpine:latest +COPY foo /root/bar +RUN sleep 60 diff --git a/tests/benchmarks/startup-time-src/mystery-file b/tests/benchmarks/startup-time-src/mystery-file new file mode 100644 index 00000000..a93ce851 --- /dev/null +++ b/tests/benchmarks/startup-time-src/mystery-file @@ -0,0 +1,3 @@ +FROM docker.io/alpine:latest +COPY foo /root/bar +RUN sleep 60 diff --git a/tests/examples/regression_tests/issue_2541.txt b/tests/examples/regression_tests/issue_2541.txt new file mode 100644 index 00000000..1059b94e --- /dev/null +++ b/tests/examples/regression_tests/issue_2541.txt @@ -0,0 +1 @@ +]8;;http://example.com\This is a link]8;;\n \ No newline at end of file diff --git a/tests/examples/this-file-path-is-really-long-and-would-have-broken-the-layout-of-the-header.txt b/tests/examples/this-file-path-is-really-long-and-would-have-broken-the-layout-of-the-header.txt new file mode 100644 index 00000000..4dac98dc --- /dev/null +++ b/tests/examples/this-file-path-is-really-long-and-would-have-broken-the-layout-of-the-header.txt @@ -0,0 +1 @@ +The header is not broken \ No newline at end of file diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index f1a22d9d..3612654b 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -1175,6 +1175,20 @@ fn bom_stripped_when_no_color_and_not_loop_through() { ); } +// Regression test for https://github.com/sharkdp/bat/issues/2541 +#[test] +fn no_broken_osc_emit_with_line_wrapping() { + bat() + .arg("--color=always") + .arg("--decorations=never") + .arg("--wrap=character") + .arg("--terminal-width=40") + .arg("regression_tests/issue_2541.txt") + .assert() + .success() + .stdout(predicate::function(|s: &str| s.lines().count() == 1)); +} + #[test] fn can_print_file_named_cache() { bat_with_config() @@ -1393,6 +1407,61 @@ fn header_full_binary() { .stderr(""); } +#[test] +#[cfg(not(feature = "git"))] +fn header_narrow_terminal() { + bat() + .arg("--terminal-width=30") + .arg("--decorations=always") + .arg("this-file-path-is-really-long-and-would-have-broken-the-layout-of-the-header.txt") + .assert() + .success() + .stdout( + "\ +─────┬──────────────────────── + │ File: this-file-path-is + │ -really-long-and-would- + │ have-broken-the-layout- + │ of-the-header.txt +─────┼──────────────────────── + 1 │ The header is not broke + │ n +─────┴──────────────────────── +", + ) + .stderr(""); +} + +#[test] +fn header_very_narrow_terminal() { + bat() + .arg("--terminal-width=10") + .arg("--decorations=always") + .arg("this-file-path-is-really-long-and-would-have-broken-the-layout-of-the-header.txt") + .assert() + .success() + .stdout( + "\ +────────── +File: this +-file-path +-is-really +-long-and- +would-have +-broken-th +e-layout-o +f-the-head +er.txt +────────── +The header + is not br +oken +────────── +", + ) + .stderr(""); +} + #[test] #[cfg(feature = "git")] // Expected output assumes git is enabled fn header_default() { @@ -1876,6 +1945,62 @@ fn ansi_passthrough_emit() { } } +// Ensure that a simple ANSI sequence passthrough is emitted properly on wrapped lines. +// This also helps ensure that escape sequences are counted as part of the visible characters when wrapping. +#[test] +fn ansi_sgr_emitted_when_wrapped() { + bat() + .arg("--paging=never") + .arg("--color=never") + .arg("--terminal-width=20") + .arg("--wrap=character") + .arg("--decorations=always") + .arg("--style=plain") + .write_stdin("\x1B[33mColor...............Also color.\n") + .assert() + .success() + .stdout("\x1B[33m\x1B[33mColor...............\n\x1B[33mAlso color.\n") + // FIXME: ~~~~~~~~ should not be emitted twice. + .stderr(""); +} + +// Ensure that a simple ANSI sequence passthrough is emitted properly on wrapped lines. +// This also helps ensure that escape sequences are counted as part of the visible characters when wrapping. +#[test] +fn ansi_hyperlink_emitted_when_wrapped() { + bat() + .arg("--paging=never") + .arg("--color=never") + .arg("--terminal-width=20") + .arg("--wrap=character") + .arg("--decorations=always") + .arg("--style=plain") + .write_stdin("\x1B]8;;http://example.com/\x1B\\Hyperlinks..........Wrap across lines.\n") + .assert() + .success() + .stdout("\x1B]8;;http://example.com/\x1B\\\x1B]8;;http://example.com/\x1B\\Hyperlinks..........\x1B]8;;\x1B\\\n\x1B]8;;http://example.com/\x1B\\Wrap across lines.\n") + // FIXME: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ should not be emitted twice. + .stderr(""); +} + +// Ensure that multiple ANSI sequence SGR attributes are combined when emitted on wrapped lines. +#[test] +fn ansi_sgr_joins_attributes_when_wrapped() { + bat() + .arg("--paging=never") + .arg("--color=never") + .arg("--terminal-width=20") + .arg("--wrap=character") + .arg("--decorations=always") + .arg("--style=plain") + .write_stdin("\x1B[33mColor. \x1B[1mBold.........Also bold and color.\n") + .assert() + .success() + .stdout("\x1B[33m\x1B[33mColor. \x1B[1m\x1B[33m\x1B[1mBold.........\n\x1B[33m\x1B[1mAlso bold and color.\n") + // FIXME: ~~~~~~~~ ~~~~~~~~~~~~~~~ should not be emitted twice. + .stderr(""); +} + #[test] fn ignored_suffix_arg() { bat() diff --git a/tests/syntax-tests/highlighted/JQ/sample.jq b/tests/syntax-tests/highlighted/JQ/sample.jq index ba9c853f..057e9edd 100644 --- a/tests/syntax-tests/highlighted/JQ/sample.jq +++ b/tests/syntax-tests/highlighted/JQ/sample.jq @@ -1,31 +1,31 @@ import "../imported-file" ; # With Comments ! -def weird($a; $b; $c): - [ $a, $b, $c ] | transpose | reduce .[][] as $item ( - []; - . + $item.property - ) +def weird($a; $b; $c): + [ $a, $b, $c ] | transpose | reduce .[][] as $item ( + []; + . + $item.property + ) ; -. | weird (.a; .b; .c) | +. | weird (.a; .b; .c) | ( -if (. | contains("never") ) then +if (. | contains("never") ) then  "Why yes" else  12.23 end -) as $never | +) as $never | {  hello,  why: "because", - hello: ( weird | ascii_upcase ), - format_eg: ( . | @json "My json string \( . | this | part | just | white | ascii_upcase | transpose)" ), - never: $never, + hello: ( weird | ascii_upcase ), + format_eg: ( . | @json "My json string \( . | this | part | just | white | ascii_upcase | transpose)" ), + never: $never,  "literal_key": literal_value,  "this": 12.1e12,  "part": "almost" @@ -38,8 +38,8 @@  similar: "but not quite"  }  } - ], -} | ( + ], +} | (    # And with very basic brace matching   @@ -47,13 +47,13 @@  ]     # Other invalid ends - ( [ } ] ) + ( [ } ] )  # A "valid" sequence - ( [ { key: () , other_key:( [ [] [[]] ] ), gaga } ] ) + ( [ { key: () , other_key:( [ [] [[]] ] ), gaga } ] )  # A "invalid" sequence - ( [ { key: () , other_key:( [ [] [[] ] ), gaga } ] ) + ( [ { key: () , other_key:( [ [] [[] ] ), gaga } ] )  "A string\n whith escaped characters \" because we can" )