diff --git a/CHANGELOG.md b/CHANGELOG.md index 492adaf9..bb027c55 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ - Minor benchmark script improvements #2768 (@cyqsimon) - Update Arch Linux package URL in README files #2779 (@brunobell) - Update and improve `zsh` completion, see #2772 (@okapia) +- More extensible syntax mapping mechanism #2755 (@cyqsimon) - Use proper Architecture for Debian packages built for musl, see #2811 (@Enselic) - Pull in fix for unsafe-libyaml security advisory, see #2812 (@dtolnay) - Update git-version dependency to use Syn v2, see #2816 (@dtolnay) @@ -28,6 +29,10 @@ ## `bat` as a library +- Changes to `syntax_mapping::SyntaxMapping` #2755 (@cyqsimon) + - `SyntaxMapping::get_syntax_for` is now correctly public + - [BREAKING] `SyntaxMapping::{empty,builtin}` are removed; use `SyntaxMapping::new` instead + - [BREAKING] `SyntaxMapping::mappings` is replaced by `SyntaxMapping::{builtin,custom,all}_mappings` - Make `Controller::run_with_error_handler`'s error handler `FnMut`, see #2831 (@rhysd) # v0.24.0 diff --git a/Cargo.lock b/Cargo.lock index 19ee5928..4aaf7b32 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -129,6 +129,8 @@ dependencies = [ "globset", "grep-cli", "home", + "indexmap 2.1.0", + "itertools", "nix", "nu-ansi-term", "once_cell", @@ -140,12 +142,14 @@ dependencies = [ "run_script", "semver", "serde", + "serde_with", "serde_yaml", "serial_test", "shell-words", "syntect", "tempfile", "thiserror", + "toml", "unicode-width", "wait-timeout", "walkdir", @@ -224,11 +228,12 @@ checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc" [[package]] name = "cc" -version = "1.0.73" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" dependencies = [ "jobserver", + "libc", ] [[package]] @@ -314,6 +319,41 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "darling" +version = "0.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" +dependencies = [ + "darling_core", + "quote", + "syn", +] + [[package]] name = "dashmap" version = "5.4.0" @@ -578,6 +618,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "0.3.0" @@ -600,12 +646,13 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.0.2" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897" +checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" dependencies = [ "equivalent", "hashbrown 0.14.1", + "serde", ] [[package]] @@ -1101,13 +1148,44 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_spanned" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12022b835073e5b11e90a14f86838ceb1c8fb0325b72416845c487ac0fa95e80" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_with" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64cd236ccc1b7a29e7e2739f27c0b2dd199804abc4290e32f59f3b68d6405c23" +dependencies = [ + "serde", + "serde_with_macros", +] + +[[package]] +name = "serde_with_macros" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93634eb5f75a2323b16de4748022ac4297f9e76b6dced2be287a099f41b5e788" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "serde_yaml" version = "0.9.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a15e0ef66bf939a7c890a0bf6d5a733c70202225f9888a89ed5c62298b019129" dependencies = [ - "indexmap 2.0.2", + "indexmap 2.1.0", "itoa", "ryu", "serde", @@ -1294,6 +1372,41 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" +[[package]] +name = "toml" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ff9e3abce27ee2c9a37f9ad37238c1bdd4e789c84ba37df76aa4d528f5072cc" +dependencies = [ + "indexmap 2.1.0", + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.20.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70f427fce4d84c72b5b732388bf4a9f4531b53f74e2887e3ecb2481f68f66d81" +dependencies = [ + "indexmap 2.1.0", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + [[package]] name = "unicode-bidi" version = "0.3.8" @@ -1613,6 +1726,15 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" +[[package]] +name = "winnow" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "176b6138793677221d420fd2f0aeeced263f197688b36484660da767bca2fa32" +dependencies = [ + "memchr", +] + [[package]] name = "yaml-rust" version = "0.4.5" diff --git a/Cargo.toml b/Cargo.toml index 53bc2da4..3b7f10e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -100,6 +100,14 @@ nix = { version = "0.26.4", default-features = false, features = ["term"] } [build-dependencies] anyhow = "1.0.78" +indexmap = { version = "2.1.0", features = ["serde"] } +itertools = "0.11.0" +once_cell = "1.18" +regex = "1.10.2" +serde = { version = "1.0", features = ["derive"] } +serde_with = { version = "3.4.0", default-features = false, features = ["macros"] } +toml = { version = "0.8.6", features = ["preserve_order"] } +walkdir = "2.4" [build-dependencies.clap] version = "4.4.12" diff --git a/build/main.rs b/build/main.rs index 416d90d5..8966ee52 100644 --- a/build/main.rs +++ b/build/main.rs @@ -1,5 +1,6 @@ #[cfg(feature = "application")] mod application; +mod syntax_mapping; mod util; fn main() -> anyhow::Result<()> { @@ -7,6 +8,8 @@ fn main() -> anyhow::Result<()> { // see: https://doc.rust-lang.org/cargo/reference/build-scripts.html#rerun-if-changed println!("cargo:rerun-if-changed=build/"); + syntax_mapping::build_static_mappings()?; + #[cfg(feature = "application")] application::gen_man_and_comp()?; diff --git a/build/syntax_mapping.rs b/build/syntax_mapping.rs new file mode 100644 index 00000000..c29b9225 --- /dev/null +++ b/build/syntax_mapping.rs @@ -0,0 +1,292 @@ +use std::{ + convert::Infallible, + env, fs, + path::{Path, PathBuf}, + str::FromStr, +}; + +use anyhow::{anyhow, bail}; +use indexmap::IndexMap; +use itertools::Itertools; +use once_cell::sync::Lazy; +use regex::Regex; +use serde::Deserialize; +use serde_with::DeserializeFromStr; +use walkdir::WalkDir; + +/// Known mapping targets. +/// +/// Corresponds to `syntax_mapping::MappingTarget`. +#[allow(clippy::enum_variant_names)] +#[derive(Clone, Debug, Eq, PartialEq, Hash, DeserializeFromStr)] +pub enum MappingTarget { + MapTo(String), + MapToUnknown, + MapExtensionToUnknown, +} +impl FromStr for MappingTarget { + type Err = Infallible; + fn from_str(s: &str) -> Result { + match s { + "MappingTarget::MapToUnknown" => Ok(Self::MapToUnknown), + "MappingTarget::MapExtensionToUnknown" => Ok(Self::MapExtensionToUnknown), + syntax => Ok(Self::MapTo(syntax.into())), + } + } +} +impl MappingTarget { + fn codegen(&self) -> String { + match self { + Self::MapTo(syntax) => format!(r###"MappingTarget::MapTo(r#"{syntax}"#)"###), + Self::MapToUnknown => "MappingTarget::MapToUnknown".into(), + Self::MapExtensionToUnknown => "MappingTarget::MapExtensionToUnknown".into(), + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash, DeserializeFromStr)] +/// A single matcher. +/// +/// Codegen converts this into a `Lazy>`. +struct Matcher(Vec); +/// Parse a matcher. +/// +/// Note that this implementation is rather strict: it will greedily interpret +/// every valid environment variable replacement as such, then immediately +/// hard-error if it finds a '$', '{', or '}' anywhere in the remaining text +/// segments. +/// +/// The reason for this strictness is I currently cannot think of a valid reason +/// why you would ever need '$', '{', or '}' as plaintext in a glob pattern. +/// Therefore any such occurrences are likely human errors. +/// +/// If we later discover some edge cases, it's okay to make it more permissive. +impl FromStr for Matcher { + type Err = anyhow::Error; + fn from_str(s: &str) -> Result { + use MatcherSegment as Seg; + static VAR_REGEX: Lazy = Lazy::new(|| Regex::new(r"\$\{([\w\d_]+)\}").unwrap()); + + let mut segments = vec![]; + let mut text_start = 0; + for capture in VAR_REGEX.captures_iter(s) { + let match_0 = capture.get(0).unwrap(); + + // text before this var + let text_end = match_0.start(); + segments.push(Seg::Text(s[text_start..text_end].into())); + text_start = match_0.end(); + + // this var + segments.push(Seg::Env(capture.get(1).unwrap().as_str().into())); + } + // possible trailing text + segments.push(Seg::Text(s[text_start..].into())); + + // cleanup empty text segments + let non_empty_segments = segments + .into_iter() + .filter(|seg| seg.text().map(|t| !t.is_empty()).unwrap_or(true)) + .collect_vec(); + + // sanity check + if non_empty_segments + .windows(2) + .any(|segs| segs[0].is_text() && segs[1].is_text()) + { + unreachable!("Parsed into consecutive text segments: {non_empty_segments:?}"); + } + + // guard empty case + if non_empty_segments.is_empty() { + bail!(r#"Parsed an empty matcher: "{s}""#); + } + + // guard variable syntax leftover fragments + if non_empty_segments + .iter() + .filter_map(Seg::text) + .any(|t| t.contains(['$', '{', '}'])) + { + bail!(r#"Invalid matcher: "{s}""#); + } + + Ok(Self(non_empty_segments)) + } +} +impl Matcher { + fn codegen(&self) -> String { + match self.0.len() { + 0 => unreachable!("0-length matcher should never be created"), + // if-let guard would be ideal here + // see: https://github.com/rust-lang/rust/issues/51114 + 1 if self.0[0].is_text() => { + let s = self.0[0].text().unwrap(); + format!(r###"Lazy::new(|| Some(build_matcher_fixed(r#"{s}"#)))"###) + } + // parser logic ensures that this case can only happen when there are dynamic segments + _ => { + let segs = self.0.iter().map(MatcherSegment::codegen).join(", "); + format!(r###"Lazy::new(|| build_matcher_dynamic(&[{segs}]))"###) + } + } + } +} + +/// A segment in a matcher. +/// +/// Corresponds to `syntax_mapping::MatcherSegment`. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +enum MatcherSegment { + Text(String), + Env(String), +} +#[allow(dead_code)] +impl MatcherSegment { + fn is_text(&self) -> bool { + matches!(self, Self::Text(_)) + } + fn is_env(&self) -> bool { + matches!(self, Self::Env(_)) + } + fn text(&self) -> Option<&str> { + match self { + Self::Text(t) => Some(t), + Self::Env(_) => None, + } + } + fn env(&self) -> Option<&str> { + match self { + Self::Text(_) => None, + Self::Env(t) => Some(t), + } + } + fn codegen(&self) -> String { + match self { + Self::Text(s) => format!(r###"MatcherSegment::Text(r#"{s}"#)"###), + Self::Env(s) => format!(r###"MatcherSegment::Env(r#"{s}"#)"###), + } + } +} + +/// A struct that models a single .toml file in /src/syntax_mapping/builtins/. +#[derive(Clone, Debug, Deserialize)] +struct MappingDefModel { + mappings: IndexMap>, +} +impl MappingDefModel { + fn into_mapping_list(self) -> MappingList { + let list = self + .mappings + .into_iter() + .flat_map(|(target, matchers)| { + matchers + .into_iter() + .map(|matcher| (matcher, target.clone())) + .collect::>() + }) + .collect(); + MappingList(list) + } +} + +#[derive(Clone, Debug)] +struct MappingList(Vec<(Matcher, MappingTarget)>); +impl MappingList { + fn codegen(&self) -> String { + let array_items: Vec<_> = self + .0 + .iter() + .map(|(matcher, target)| { + format!("({m}, {t})", m = matcher.codegen(), t = target.codegen()) + }) + .collect(); + let len = array_items.len(); + + format!( + "/// Generated by build script from /src/syntax_mapping/builtins/.\n\ + pub(crate) static BUILTIN_MAPPINGS: [(Lazy>, MappingTarget); {len}] = [\n{items}\n];", + items = array_items.join(",\n") + ) + } +} + +/// Get the list of paths to all mapping definition files that should be +/// included for the current target platform. +fn get_def_paths() -> anyhow::Result> { + let source_subdirs = [ + "common", + #[cfg(target_family = "unix")] + "unix-family", + #[cfg(any( + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + target_os = "macos" + ))] + "bsd-family", + #[cfg(target_os = "linux")] + "linux", + #[cfg(target_os = "macos")] + "macos", + #[cfg(target_os = "windows")] + "windows", + ]; + + let mut toml_paths = vec![]; + for subdir in source_subdirs { + let wd = WalkDir::new(Path::new("src/syntax_mapping/builtins").join(subdir)); + let paths = wd + .into_iter() + .filter_map_ok(|entry| { + let path = entry.path(); + (path.is_file() && path.extension().map(|ext| ext == "toml").unwrap_or(false)) + .then(|| path.to_owned()) + }) + .collect::, _>>()?; + toml_paths.extend(paths); + } + + toml_paths.sort_by_key(|path| { + path.file_name() + .expect("file name should not terminate in ..") + .to_owned() + }); + + Ok(toml_paths) +} + +fn read_all_mappings() -> anyhow::Result { + let mut all_mappings = vec![]; + + for path in get_def_paths()? { + let toml_string = fs::read_to_string(path)?; + let mappings = toml::from_str::(&toml_string)?.into_mapping_list(); + all_mappings.extend(mappings.0); + } + + let duplicates = all_mappings + .iter() + .duplicates_by(|(matcher, _)| matcher) + .collect_vec(); + if !duplicates.is_empty() { + bail!("Rules with duplicate matchers found: {duplicates:?}"); + } + + Ok(MappingList(all_mappings)) +} + +/// Build the static syntax mappings defined in /src/syntax_mapping/builtins/ +/// into a .rs source file, which is to be inserted with `include!`. +pub fn build_static_mappings() -> anyhow::Result<()> { + println!("cargo:rerun-if-changed=src/syntax_mapping/builtins/"); + + let mappings = read_all_mappings()?; + + let codegen_path = Path::new(&env::var_os("OUT_DIR").ok_or(anyhow!("OUT_DIR is unset"))?) + .join("codegen_static_syntax_mappings.rs"); + + fs::write(codegen_path, mappings.codegen())?; + + Ok(()) +} diff --git a/src/assets.rs b/src/assets.rs index 7ce1a8f6..0129f76b 100644 --- a/src/assets.rs +++ b/src/assets.rs @@ -441,7 +441,7 @@ mod tests { fn new() -> Self { SyntaxDetectionTest { assets: HighlightingAssets::from_binary(), - syntax_mapping: SyntaxMapping::builtin(), + syntax_mapping: SyntaxMapping::new(), temp_dir: TempDir::new().expect("creation of temporary directory"), } } diff --git a/src/bin/bat/app.rs b/src/bin/bat/app.rs index 09430623..a2c09770 100644 --- a/src/bin/bat/app.rs +++ b/src/bin/bat/app.rs @@ -121,7 +121,7 @@ impl App { _ => unreachable!("other values for --paging are not allowed"), }; - let mut syntax_mapping = SyntaxMapping::builtin(); + let mut syntax_mapping = SyntaxMapping::new(); if let Some(values) = self.matches.get_many::("ignored-suffix") { for suffix in values { @@ -130,7 +130,9 @@ impl App { } if let Some(values) = self.matches.get_many::("map-syntax") { - for from_to in values { + // later args take precedence over earlier ones, hence `.rev()` + // see: https://github.com/sharkdp/bat/pull/2755#discussion_r1456416875 + for from_to in values.rev() { let parts: Vec<_> = from_to.split(':').collect(); if parts.len() != 2 { diff --git a/src/bin/bat/main.rs b/src/bin/bat/main.rs index 43e9d288..f48abdc1 100644 --- a/src/bin/bat/main.rs +++ b/src/bin/bat/main.rs @@ -78,9 +78,11 @@ fn run_cache_subcommand( Ok(()) } -fn get_syntax_mapping_to_paths<'a>( - mappings: &[(GlobMatcher, MappingTarget<'a>)], -) -> HashMap<&'a str, Vec> { +fn get_syntax_mapping_to_paths<'r, 't, I>(mappings: I) -> HashMap<&'t str, Vec> +where + I: IntoIterator)>, + 't: 'r, // target text outlives rule +{ let mut map = HashMap::new(); for mapping in mappings { if let (matcher, MappingTarget::MapTo(s)) = mapping { @@ -123,7 +125,7 @@ pub fn get_languages(config: &Config, cache_dir: &Path) -> Result { languages.sort_by_key(|lang| lang.name.to_uppercase()); - let configured_languages = get_syntax_mapping_to_paths(config.syntax_mapping.mappings()); + let configured_languages = get_syntax_mapping_to_paths(config.syntax_mapping.all_mappings()); for lang in &mut languages { if let Some(additional_paths) = configured_languages.get(lang.name.as_str()) { diff --git a/src/syntax_mapping.rs b/src/syntax_mapping.rs index c8c361ab..0dac0c02 100644 --- a/src/syntax_mapping.rs +++ b/src/syntax_mapping.rs @@ -1,12 +1,23 @@ use std::path::Path; -use crate::error::Result; -use ignored_suffixes::IgnoredSuffixes; - use globset::{Candidate, GlobBuilder, GlobMatcher}; +use crate::error::Result; +use builtin::BUILTIN_MAPPINGS; +use ignored_suffixes::IgnoredSuffixes; + +mod builtin; pub mod ignored_suffixes; +fn make_glob_matcher(from: &str) -> Result { + let matcher = GlobBuilder::new(from) + .case_insensitive(true) + .literal_separator(true) + .build()? + .compile_matcher(); + Ok(matcher) +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[non_exhaustive] pub enum MappingTarget<'a> { @@ -29,204 +40,72 @@ pub enum MappingTarget<'a> { #[derive(Debug, Clone, Default)] pub struct SyntaxMapping<'a> { - mappings: Vec<(GlobMatcher, MappingTarget<'a>)>, + /// User-defined mappings at run time. + /// + /// Rules in front have precedence. + custom_mappings: Vec<(GlobMatcher, MappingTarget<'a>)>, pub(crate) ignored_suffixes: IgnoredSuffixes<'a>, } impl<'a> SyntaxMapping<'a> { - pub fn empty() -> SyntaxMapping<'a> { + pub fn new() -> SyntaxMapping<'a> { Default::default() } - pub fn builtin() -> SyntaxMapping<'a> { - let mut mapping = Self::empty(); - mapping.insert("*.h", MappingTarget::MapTo("C++")).unwrap(); - mapping - .insert(".clang-format", MappingTarget::MapTo("YAML")) - .unwrap(); - mapping.insert("*.fs", MappingTarget::MapTo("F#")).unwrap(); - mapping - .insert("build", MappingTarget::MapToUnknown) - .unwrap(); - mapping - .insert("**/.ssh/config", MappingTarget::MapTo("SSH Config")) - .unwrap(); - mapping - .insert( - "**/bat/config", - MappingTarget::MapTo("Bourne Again Shell (bash)"), - ) - .unwrap(); - mapping - .insert( - "/etc/profile", - MappingTarget::MapTo("Bourne Again Shell (bash)"), - ) - .unwrap(); - mapping - .insert( - "os-release", - MappingTarget::MapTo("Bourne Again Shell (bash)"), - ) - .unwrap(); - mapping - .insert("*.pac", MappingTarget::MapTo("JavaScript (Babel)")) - .unwrap(); - mapping - .insert("fish_history", MappingTarget::MapTo("YAML")) - .unwrap(); - - for glob in ["*.jsonl", "*.sarif"] { - mapping.insert(glob, MappingTarget::MapTo("JSON")).unwrap(); - } - - // See #2151, https://nmap.org/book/nse-language.html - mapping - .insert("*.nse", MappingTarget::MapTo("Lua")) - .unwrap(); - - // See #1008 - mapping - .insert("rails", MappingTarget::MapToUnknown) - .unwrap(); - - mapping - .insert("Containerfile", MappingTarget::MapTo("Dockerfile")) - .unwrap(); - - mapping - .insert("*.ksh", MappingTarget::MapTo("Bourne Again Shell (bash)")) - .unwrap(); - - // Nginx and Apache syntax files both want to style all ".conf" files - // see #1131 and #1137 - mapping - .insert("*.conf", MappingTarget::MapExtensionToUnknown) - .unwrap(); - - for glob in &[ - "/etc/nginx/**/*.conf", - "/etc/nginx/sites-*/**/*", - "nginx.conf", - "mime.types", - ] { - mapping.insert(glob, MappingTarget::MapTo("nginx")).unwrap(); - } - - for glob in &[ - "/etc/apache2/**/*.conf", - "/etc/apache2/sites-*/**/*", - "httpd.conf", - ] { - mapping - .insert(glob, MappingTarget::MapTo("Apache Conf")) - .unwrap(); - } - - for glob in &[ - "**/systemd/**/*.conf", - "**/systemd/**/*.example", - "*.automount", - "*.device", - "*.dnssd", - "*.link", - "*.mount", - "*.netdev", - "*.network", - "*.nspawn", - "*.path", - "*.service", - "*.scope", - "*.slice", - "*.socket", - "*.swap", - "*.target", - "*.timer", - ] { - mapping.insert(glob, MappingTarget::MapTo("INI")).unwrap(); - } - - // unix mail spool - for glob in &["/var/spool/mail/*", "/var/mail/*"] { - mapping.insert(glob, MappingTarget::MapTo("Email")).unwrap() - } - - // pacman hooks - mapping - .insert("*.hook", MappingTarget::MapTo("INI")) - .unwrap(); - - mapping - .insert("*.ron", MappingTarget::MapTo("Rust")) - .unwrap(); - - // Global git config files rooted in `$XDG_CONFIG_HOME/git/` or `$HOME/.config/git/` - // See e.g. https://git-scm.com/docs/git-config#FILES - match ( - std::env::var_os("XDG_CONFIG_HOME").filter(|val| !val.is_empty()), - std::env::var_os("HOME") - .filter(|val| !val.is_empty()) - .map(|home| Path::new(&home).join(".config")), - ) { - (Some(xdg_config_home), Some(default_config_home)) - if xdg_config_home == default_config_home => { - insert_git_config_global(&mut mapping, &xdg_config_home) - } - (Some(xdg_config_home), Some(default_config_home)) /* else guard */ => { - insert_git_config_global(&mut mapping, &xdg_config_home); - insert_git_config_global(&mut mapping, &default_config_home) - } - (Some(config_home), None) => insert_git_config_global(&mut mapping, &config_home), - (None, Some(config_home)) => insert_git_config_global(&mut mapping, &config_home), - (None, None) => (), - }; - - fn insert_git_config_global(mapping: &mut SyntaxMapping, config_home: impl AsRef) { - let git_config_path = config_home.as_ref().join("git"); - - mapping - .insert( - &git_config_path.join("config").to_string_lossy(), - MappingTarget::MapTo("Git Config"), - ) - .ok(); - - mapping - .insert( - &git_config_path.join("ignore").to_string_lossy(), - MappingTarget::MapTo("Git Ignore"), - ) - .ok(); - - mapping - .insert( - &git_config_path.join("attributes").to_string_lossy(), - MappingTarget::MapTo("Git Attributes"), - ) - .ok(); - } - - mapping - } - pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> { - let glob = GlobBuilder::new(from) - .case_insensitive(true) - .literal_separator(true) - .build()?; - self.mappings.push((glob.compile_matcher(), to)); + let matcher = make_glob_matcher(from)?; + self.custom_mappings.push((matcher, to)); Ok(()) } - pub fn mappings(&self) -> &[(GlobMatcher, MappingTarget<'a>)] { - &self.mappings + /// Returns an iterator over all mappings. User-defined mappings are listed + /// before builtin mappings; mappings in front have higher precedence. + /// + /// Builtin mappings' `GlobMatcher`s are lazily compiled. + /// + /// Note that this function only returns mappings that are valid under the + /// current environment. For details see [`Self::builtin_mappings`]. + pub fn all_mappings(&self) -> impl Iterator)> { + self.custom_mappings() + .iter() + .map(|(matcher, target)| (matcher, target)) // as_ref + .chain( + // we need a map with a closure to "do" the lifetime variance + // see: https://discord.com/channels/273534239310479360/1120124565591425034/1170543402870382653 + // also, clippy false positive: + // see: https://github.com/rust-lang/rust-clippy/issues/9280 + #[allow(clippy::map_identity)] + self.builtin_mappings().map(|rule| rule), + ) } - pub(crate) fn get_syntax_for(&self, path: impl AsRef) -> Option> { + /// Returns an iterator over all valid builtin mappings. Mappings in front + /// have higher precedence. + /// + /// The `GlabMatcher`s are lazily compiled. + /// + /// Mappings that are invalid under the current environment (i.e. rule + /// requires environment variable(s) that is unset, or the joined string + /// after variable(s) replacement is not a valid glob expression) are + /// ignored. + pub fn builtin_mappings( + &self, + ) -> impl Iterator)> { + BUILTIN_MAPPINGS + .iter() + .filter_map(|(matcher, target)| matcher.as_ref().map(|glob| (glob, target))) + } + + /// Returns all user-defined mappings. + pub fn custom_mappings(&self) -> &[(GlobMatcher, MappingTarget<'a>)] { + &self.custom_mappings + } + + pub fn get_syntax_for(&self, path: impl AsRef) -> Option> { // Try matching on the file name as-is. let candidate = Candidate::new(&path); let candidate_filename = path.as_ref().file_name().map(Candidate::new); - for (ref glob, ref syntax) in self.mappings.iter().rev() { + for (glob, syntax) in self.all_mappings() { if glob.is_match_candidate(&candidate) || candidate_filename .as_ref() @@ -252,9 +131,46 @@ impl<'a> SyntaxMapping<'a> { #[cfg(test)] mod tests { use super::*; + #[test] - fn basic() { - let mut map = SyntaxMapping::empty(); + fn builtin_mappings_work() { + let map = SyntaxMapping::new(); + + assert_eq!( + map.get_syntax_for("/path/to/build"), + Some(MappingTarget::MapToUnknown) + ); + } + + #[test] + fn all_fixed_builtin_mappings_can_compile() { + let map = SyntaxMapping::new(); + + // collect call evaluates all lazy closures + // fixed builtin mappings will panic if they fail to compile + let _mappings = map.builtin_mappings().collect::>(); + } + + #[test] + fn builtin_mappings_matcher_only_compile_once() { + let map = SyntaxMapping::new(); + + let two_iterations: Vec<_> = (0..2) + .map(|_| { + // addresses of every matcher + map.builtin_mappings() + .map(|(matcher, _)| matcher as *const _ as usize) + .collect::>() + }) + .collect(); + + // if the matchers are only compiled once, their address should remain the same + assert_eq!(two_iterations[0], two_iterations[1]); + } + + #[test] + fn custom_mappings_work() { + let mut map = SyntaxMapping::new(); map.insert("/path/to/Cargo.lock", MappingTarget::MapTo("TOML")) .ok(); map.insert("/path/to/.ignore", MappingTarget::MapTo("Git Ignore")) @@ -273,52 +189,32 @@ mod tests { } #[test] - fn user_can_override_builtin_mappings() { - let mut map = SyntaxMapping::builtin(); + fn custom_mappings_override_builtin() { + let mut map = SyntaxMapping::new(); assert_eq!( - map.get_syntax_for("/etc/profile"), - Some(MappingTarget::MapTo("Bourne Again Shell (bash)")) + map.get_syntax_for("/path/to/httpd.conf"), + Some(MappingTarget::MapTo("Apache Conf")) ); - map.insert("/etc/profile", MappingTarget::MapTo("My Syntax")) + map.insert("httpd.conf", MappingTarget::MapTo("My Syntax")) .ok(); assert_eq!( - map.get_syntax_for("/etc/profile"), + map.get_syntax_for("/path/to/httpd.conf"), Some(MappingTarget::MapTo("My Syntax")) ); } #[test] - fn builtin_mappings() { - let map = SyntaxMapping::builtin(); + fn custom_mappings_precedence() { + let mut map = SyntaxMapping::new(); + map.insert("/path/to/foo", MappingTarget::MapTo("alpha")) + .ok(); + map.insert("/path/to/foo", MappingTarget::MapTo("bravo")) + .ok(); assert_eq!( - map.get_syntax_for("/path/to/build"), - Some(MappingTarget::MapToUnknown) + map.get_syntax_for("/path/to/foo"), + Some(MappingTarget::MapTo("alpha")) ); } - - #[test] - /// verifies that SyntaxMapping::builtin() doesn't repeat `Glob`-based keys - fn no_duplicate_builtin_keys() { - let mappings = SyntaxMapping::builtin().mappings; - for i in 0..mappings.len() { - let tail = mappings[i + 1..].into_iter(); - let (dupl, _): (Vec<_>, Vec<_>) = - tail.partition(|item| item.0.glob() == mappings[i].0.glob()); - - // emit repeats on failure - assert_eq!( - dupl.len(), - 0, - "Glob pattern `{}` mapped to multiple: {:?}", - mappings[i].0.glob().glob(), - { - let (_, mut dupl_targets): (Vec, Vec) = - dupl.into_iter().cloned().unzip(); - dupl_targets.push(mappings[i].1) - }, - ) - } - } } diff --git a/src/syntax_mapping/builtin.rs b/src/syntax_mapping/builtin.rs new file mode 100644 index 00000000..1822be57 --- /dev/null +++ b/src/syntax_mapping/builtin.rs @@ -0,0 +1,91 @@ +use std::env; + +use globset::GlobMatcher; +use once_cell::sync::Lazy; + +use crate::syntax_mapping::{make_glob_matcher, MappingTarget}; + +// Static syntax mappings generated from /src/syntax_mapping/builtins/ by the +// build script (/build/syntax_mapping.rs). +include!(concat!( + env!("OUT_DIR"), + "/codegen_static_syntax_mappings.rs" +)); + +// The defined matcher strings are analysed at compile time and converted into +// lazily-compiled `GlobMatcher`s. This is so that the string searches are moved +// from run time to compile time, thus improving startup performance. +// +// To any future maintainer (including possibly myself) wondering why there is +// not a `BuiltinMatcher` enum that looks like this: +// +// ``` +// enum BuiltinMatcher { +// Fixed(&'static str), +// Dynamic(Lazy>), +// } +// ``` +// +// Because there was. I tried it and threw it out. +// +// Naively looking at the problem from a distance, this may seem like a good +// design (strongly typed etc. etc.). It would also save on compiled size by +// extracting out common behaviour into functions. But while actually +// implementing the lazy matcher compilation logic, I realised that it's most +// convenient for `BUILTIN_MAPPINGS` to have the following type: +// +// `[(Lazy>, MappingTarget); N]` +// +// The benefit for this is that operations like listing all builtin mappings +// would be effectively memoised. The caller would not have to compile another +// `GlobMatcher` for rules that they have previously visited. +// +// Unfortunately, this means we are going to have to store a distinct closure +// for each rule anyway, which makes a `BuiltinMatcher` enum a pointless layer +// of indirection. +// +// In the current implementation, the closure within each generated rule simply +// calls either `build_matcher_fixed` or `build_matcher_dynamic`, depending on +// whether the defined matcher contains dynamic segments or not. + +/// Compile a fixed glob string into a glob matcher. +/// +/// A failure to compile is a fatal error. +/// +/// Used internally by `Lazy>`'s lazy evaluation closure. +fn build_matcher_fixed(from: &str) -> GlobMatcher { + make_glob_matcher(from).expect("A builtin fixed glob matcher failed to compile") +} + +/// Join a list of matcher segments to create a glob string, replacing all +/// environment variables, then compile to a glob matcher. +/// +/// Returns `None` if any replacement fails, or if the joined glob string fails +/// to compile. +/// +/// Used internally by `Lazy>`'s lazy evaluation closure. +fn build_matcher_dynamic(segs: &[MatcherSegment]) -> Option { + // join segments + let mut buf = String::new(); + for seg in segs { + match seg { + MatcherSegment::Text(s) => buf.push_str(s), + MatcherSegment::Env(var) => { + let replaced = env::var(var).ok()?; + buf.push_str(&replaced); + } + } + } + // compile glob matcher + let matcher = make_glob_matcher(&buf).ok()?; + Some(matcher) +} + +/// A segment of a dynamic builtin matcher. +/// +/// Used internally by `Lazy>`'s lazy evaluation closure. +#[derive(Clone, Debug)] +enum MatcherSegment { + Text(&'static str), + Env(&'static str), +} diff --git a/src/syntax_mapping/builtins/README.md b/src/syntax_mapping/builtins/README.md new file mode 100644 index 00000000..29cf43ee --- /dev/null +++ b/src/syntax_mapping/builtins/README.md @@ -0,0 +1,116 @@ +# `/src/syntax_mapping/builtins` + +The files in this directory define path/name-based syntax mappings, which amend +and take precedence over the extension/content-based syntax mappings provided by +[syntect](https://github.com/trishume/syntect). + +## File organisation + +Each TOML file should describe the syntax mappings of a single application, or +otherwise a set of logically-related rules. + +What defines "a single application" here is deliberately vague, since the +file-splitting is purely for maintainability reasons. (Technically, we could +just as well use a single TOML file.) So just use common sense. + +TOML files should reside in the corresponding subdirectory of the platform(s) +that they intend to target. At compile time, the build script will go through +each subdirectory that is applicable to the compilation target, collect the +syntax mappings defined by all TOML files, and embed them into the binary. + +## File syntax + +Each TOML file should contain a single section named `mappings`, with each of +its keys being a language identifier (first column of `bat -L`; also referred to +as "target"). + +The value of each key should be an array of strings, with each item being a glob +matcher. We will call each of these items a "rule". + +For example, if `foo-application` uses both TOML and YAML configuration files, +we could write something like this: + +```toml +# 30-foo-application.toml +[mappings] +"TOML" = [ + # rules for TOML syntax go here + "/usr/share/foo-application/toml-config/*.conf", + "/etc/foo-application/toml-config/*.conf", +] +"YAML" = [ + # rules for YAML syntax go here + # ... +] +``` + +### Dynamic environment variable replacement + +In additional to the standard glob matcher syntax, rules also support dynamic +replacement of environment variables at runtime. This allows us to concisely +handle things like [XDG](https://specifications.freedesktop.org/basedir-spec/latest/). + +All environment variables intended to be replaced at runtime must be enclosed in +`${}`, for example `"/foo/*/${YOUR_ENV}-suffix/*.log"`. Note that this is the +**only** admissible syntax; other variable substitution syntaxes are not +supported and will either cause a compile time error, or be treated as plain +text. + +For example, if `foo-application` also supports per-user configuration files, we +could write something like this: + +```toml +# 30-foo-application.toml +[mappings] +"TOML" = [ + # rules for TOML syntax go here + "/usr/share/foo-application/toml-config/*.conf", + "/etc/foo-application/toml-config/*.conf", + "${XDG_CONFIG_HOME}/foo-application/toml-config/*.conf", + "${HOME}/.config/foo-application/toml-config/*.conf", +] +"YAML" = [ + # rules for YAML syntax go here + # ... +] +``` + +If any environment variable replacement in a rule fails (for example when a +variable is unset), or if the glob string after replacements is invalid, the +entire rule will be ignored. + +### Explicitly mapping to unknown + +Sometimes it may be necessary to "unset" a particular syntect mapping - perhaps +a syntax's matching rules are "too greedy", and is claiming files that it should +not. In this case, there are two special identifiers: +`MappingTarget::MapToUnknown` and `MappingTarget::MapExtensionToUnknown` +(corresponding to the two variants of the `syntax_mapping::MappingTarget` enum). + +An example of this would be `*.conf` files in general. So we may write something +like this: + +```toml +# 99-unset-ambiguous-extensions.toml +[mappings] +"MappingTarget::MapExtensionToUnknown" = [ + "*.conf", +] +``` + +## Ordering + +At compile time, all TOML files applicable to the target are processed in +lexicographical filename order. So `00-foo.toml` takes precedence over +`10-bar.toml`, which takes precedence over `20-baz.toml`, and so on. Note that +**only** the filenames of the TOML files are taken into account; the +subdirectories they are placed in have no influence on ordering. + +This behaviour can be occasionally useful for creating high/low priority rules, +such as in the aforementioned example of explicitly mapping `*.conf` files to +unknown. Generally this should not be much of a concern though, since rules +should be written as specifically as possible for each application. + +Rules within each TOML file are processed (and therefore matched) in the order +in which they are defined. At runtime, the syntax selection algorithm will +short-circuit and return the target of the first matching rule. diff --git a/src/syntax_mapping/builtins/bsd-family/.gitkeep b/src/syntax_mapping/builtins/bsd-family/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/src/syntax_mapping/builtins/bsd-family/50-os-release.toml b/src/syntax_mapping/builtins/bsd-family/50-os-release.toml new file mode 100644 index 00000000..91b003d7 --- /dev/null +++ b/src/syntax_mapping/builtins/bsd-family/50-os-release.toml @@ -0,0 +1,2 @@ +[mappings] +"Bourne Again Shell (bash)" = ["/etc/os-release", "/var/run/os-release"] diff --git a/src/syntax_mapping/builtins/common/.gitkeep b/src/syntax_mapping/builtins/common/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/src/syntax_mapping/builtins/common/50-apache.toml b/src/syntax_mapping/builtins/common/50-apache.toml new file mode 100644 index 00000000..0e557aff --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-apache.toml @@ -0,0 +1,2 @@ +[mappings] +"Apache Conf" = ["httpd.conf"] diff --git a/src/syntax_mapping/builtins/common/50-bat.toml b/src/syntax_mapping/builtins/common/50-bat.toml new file mode 100644 index 00000000..e70b6b09 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-bat.toml @@ -0,0 +1,2 @@ +[mappings] +"Bourne Again Shell (bash)" = ["**/bat/config"] diff --git a/src/syntax_mapping/builtins/common/50-container.toml b/src/syntax_mapping/builtins/common/50-container.toml new file mode 100644 index 00000000..ad48c29b --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-container.toml @@ -0,0 +1,2 @@ +[mappings] +"Dockerfile" = ["Containerfile"] diff --git a/src/syntax_mapping/builtins/common/50-cpp.toml b/src/syntax_mapping/builtins/common/50-cpp.toml new file mode 100644 index 00000000..99d8a32b --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-cpp.toml @@ -0,0 +1,6 @@ +[mappings] +"C++" = [ + # probably better than the default Objective C mapping #877 + "*.h", +] +"YAML" = [".clang-format"] diff --git a/src/syntax_mapping/builtins/common/50-f-sharp.toml b/src/syntax_mapping/builtins/common/50-f-sharp.toml new file mode 100644 index 00000000..a39e7ebd --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-f-sharp.toml @@ -0,0 +1,2 @@ +[mappings] +"F#" = ["*.fs"] diff --git a/src/syntax_mapping/builtins/common/50-git.toml b/src/syntax_mapping/builtins/common/50-git.toml new file mode 100644 index 00000000..44a49a25 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-git.toml @@ -0,0 +1,10 @@ +# Global git config files rooted in `$XDG_CONFIG_HOME/git/` or `$HOME/.config/git/` +# See e.g. https://git-scm.com/docs/git-config#FILES + +[mappings] +"Git Config" = ["${XDG_CONFIG_HOME}/git/config", "${HOME}/.config/git/config"] +"Git Ignore" = ["${XDG_CONFIG_HOME}/git/ignore", "${HOME}/.config/git/ignore"] +"Git Attributes" = [ + "${XDG_CONFIG_HOME}/git/attributes", + "${HOME}/.config/git/attributes", +] diff --git a/src/syntax_mapping/builtins/common/50-jsonl.toml b/src/syntax_mapping/builtins/common/50-jsonl.toml new file mode 100644 index 00000000..4b70a4d0 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-jsonl.toml @@ -0,0 +1,3 @@ +# JSON Lines is a simple variation of JSON #2535 +[mappings] +"JSON" = ["*.jsonl"] diff --git a/src/syntax_mapping/builtins/common/50-nginx.toml b/src/syntax_mapping/builtins/common/50-nginx.toml new file mode 100644 index 00000000..305418bb --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-nginx.toml @@ -0,0 +1,2 @@ +[mappings] +"nginx" = ["nginx.conf", "mime.types"] diff --git a/src/syntax_mapping/builtins/common/50-nmap.toml b/src/syntax_mapping/builtins/common/50-nmap.toml new file mode 100644 index 00000000..f79a5e97 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-nmap.toml @@ -0,0 +1,3 @@ +[mappings] +# See #2151, https://nmap.org/book/nse-language.html +"Lua" = ["*.nse"] diff --git a/src/syntax_mapping/builtins/common/50-proxy-auto-config.toml b/src/syntax_mapping/builtins/common/50-proxy-auto-config.toml new file mode 100644 index 00000000..70e51c92 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-proxy-auto-config.toml @@ -0,0 +1,3 @@ +# 1515 +[mappings] +"JavaScript (Babel)" = ["*.pac"] diff --git a/src/syntax_mapping/builtins/common/50-ron.toml b/src/syntax_mapping/builtins/common/50-ron.toml new file mode 100644 index 00000000..bc04221b --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-ron.toml @@ -0,0 +1,3 @@ +# Rusty Object Notation #2427 +[mappings] +"Rust" = ["*.ron"] diff --git a/src/syntax_mapping/builtins/common/50-sarif.toml b/src/syntax_mapping/builtins/common/50-sarif.toml new file mode 100644 index 00000000..2542b9cd --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-sarif.toml @@ -0,0 +1,3 @@ +# SARIF is a format for reporting static analysis results #2695 +[mappings] +"JSON" = ["*.sarif"] diff --git a/src/syntax_mapping/builtins/common/50-ssh.toml b/src/syntax_mapping/builtins/common/50-ssh.toml new file mode 100644 index 00000000..6ec24050 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-ssh.toml @@ -0,0 +1,2 @@ +[mappings] +"SSH Config" = ["**/.ssh/config"] diff --git a/src/syntax_mapping/builtins/common/99-unset-ambiguous-extensions.toml b/src/syntax_mapping/builtins/common/99-unset-ambiguous-extensions.toml new file mode 100644 index 00000000..d87537d7 --- /dev/null +++ b/src/syntax_mapping/builtins/common/99-unset-ambiguous-extensions.toml @@ -0,0 +1,5 @@ +[mappings] +"MappingTarget::MapExtensionToUnknown" = [ + # common extension used for all kinds of formats + "*.conf", +] diff --git a/src/syntax_mapping/builtins/common/99-unset-ambiguous-filenames.toml b/src/syntax_mapping/builtins/common/99-unset-ambiguous-filenames.toml new file mode 100644 index 00000000..21941ebc --- /dev/null +++ b/src/syntax_mapping/builtins/common/99-unset-ambiguous-filenames.toml @@ -0,0 +1,7 @@ +[mappings] +"MappingTarget::MapToUnknown" = [ + # "NAnt Build File" should only match *.build files, not files named "build" + "build", + # "bin/rails" scripts in a Ruby project misidentified as HTML (Rails) #1008 + "rails", +] diff --git a/src/syntax_mapping/builtins/linux/.gitkeep b/src/syntax_mapping/builtins/linux/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/src/syntax_mapping/builtins/linux/50-os-release.toml b/src/syntax_mapping/builtins/linux/50-os-release.toml new file mode 100644 index 00000000..791599aa --- /dev/null +++ b/src/syntax_mapping/builtins/linux/50-os-release.toml @@ -0,0 +1,7 @@ +[mappings] +"Bourne Again Shell (bash)" = [ + "/etc/os-release", + "/usr/lib/os-release", + "/etc/initrd-release", + "/usr/lib/extension-release.d/extension-release.*", +] diff --git a/src/syntax_mapping/builtins/linux/50-pacman.toml b/src/syntax_mapping/builtins/linux/50-pacman.toml new file mode 100644 index 00000000..655118c5 --- /dev/null +++ b/src/syntax_mapping/builtins/linux/50-pacman.toml @@ -0,0 +1,3 @@ +[mappings] +# pacman hooks +"INI" = ["/usr/share/libalpm/hooks/*.hook", "/etc/pacman.d/hooks/*.hook"] diff --git a/src/syntax_mapping/builtins/linux/50-systemd.toml b/src/syntax_mapping/builtins/linux/50-systemd.toml new file mode 100644 index 00000000..6f91b0be --- /dev/null +++ b/src/syntax_mapping/builtins/linux/50-systemd.toml @@ -0,0 +1,21 @@ +[mappings] +"INI" = [ + "**/systemd/**/*.conf", + "**/systemd/**/*.example", + "*.automount", + "*.device", + "*.dnssd", + "*.link", + "*.mount", + "*.netdev", + "*.network", + "*.nspawn", + "*.path", + "*.service", + "*.scope", + "*.slice", + "*.socket", + "*.swap", + "*.target", + "*.timer", +] diff --git a/src/syntax_mapping/builtins/macos/.gitkeep b/src/syntax_mapping/builtins/macos/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/src/syntax_mapping/builtins/unix-family/.gitkeep b/src/syntax_mapping/builtins/unix-family/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/src/syntax_mapping/builtins/unix-family/50-apache.toml b/src/syntax_mapping/builtins/unix-family/50-apache.toml new file mode 100644 index 00000000..dfb920f3 --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/50-apache.toml @@ -0,0 +1,2 @@ +[mappings] +"Apache Conf" = ["/etc/apache2/**/*.conf", "/etc/apache2/sites-*/**/*"] diff --git a/src/syntax_mapping/builtins/unix-family/50-fish-shell.toml b/src/syntax_mapping/builtins/unix-family/50-fish-shell.toml new file mode 100644 index 00000000..f2a9e224 --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/50-fish-shell.toml @@ -0,0 +1,2 @@ +[mappings] +"YAML" = ["fish_history"] diff --git a/src/syntax_mapping/builtins/unix-family/50-korn-shell.toml b/src/syntax_mapping/builtins/unix-family/50-korn-shell.toml new file mode 100644 index 00000000..6c788d1d --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/50-korn-shell.toml @@ -0,0 +1,3 @@ +# KornShell is backward-compatible with the Bourne shell #2633 +[mappings] +"Bourne Again Shell (bash)" = ["*.ksh"] diff --git a/src/syntax_mapping/builtins/unix-family/50-mail-spool.toml b/src/syntax_mapping/builtins/unix-family/50-mail-spool.toml new file mode 100644 index 00000000..c798358c --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/50-mail-spool.toml @@ -0,0 +1,2 @@ +[mappings] +"Email" = ["/var/spool/mail/*", "/var/mail/*"] diff --git a/src/syntax_mapping/builtins/unix-family/50-nginx.toml b/src/syntax_mapping/builtins/unix-family/50-nginx.toml new file mode 100644 index 00000000..580b65d8 --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/50-nginx.toml @@ -0,0 +1,2 @@ +[mappings] +"nginx" = ["/etc/nginx/**/*.conf", "/etc/nginx/sites-*/**/*"] diff --git a/src/syntax_mapping/builtins/unix-family/50-shell.toml b/src/syntax_mapping/builtins/unix-family/50-shell.toml new file mode 100644 index 00000000..d015ca81 --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/50-shell.toml @@ -0,0 +1,5 @@ +[mappings] +"Bourne Again Shell (bash)" = [ + # used by lots of shells + "/etc/profile", +] diff --git a/src/syntax_mapping/builtins/windows/.gitkeep b/src/syntax_mapping/builtins/windows/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/tests/benchmarks/run-benchmarks.sh b/tests/benchmarks/run-benchmarks.sh index 0f43bc6b..2809ccbb 100755 --- a/tests/benchmarks/run-benchmarks.sh +++ b/tests/benchmarks/run-benchmarks.sh @@ -9,6 +9,13 @@ if ! command -v hyperfine > /dev/null 2>&1; then exit 1 fi +# Check that jq is installed. +if ! command -v jq > /dev/null 2>&1; then + echo "'jq' does not seem to be installed." + echo "You can get it here: https://jqlang.github.io/jq/download/" + exit 1 +fi + # Check that python3 is installed. if ! command -v python3 > /dev/null 2>&1; then echo "'python3' does not seem to be installed." @@ -95,10 +102,20 @@ hyperfine \ cat "$RESULT_DIR/startup-time.md" >> "$REPORT" +heading "Startup time without syntax highlighting" +hyperfine \ + "$(printf "%q" "$BAT") --no-config startup-time-src/small-CpuInfo-file.cpuinfo" \ + --command-name "bat … small-CpuInfo-file.cpuinfo" \ + --warmup "$WARMUP_COUNT" \ + --runs "$RUN_COUNT" \ + --export-markdown "$RESULT_DIR/startup-time-without-syntax-highlighting.md" \ + --export-json "$RESULT_DIR/startup-time-without-syntax-highlighting.json" +cat "$RESULT_DIR/startup-time-without-syntax-highlighting.md" >> "$REPORT" + heading "Startup time with syntax highlighting" hyperfine \ "$(printf "%q" "$BAT") --no-config --color=always startup-time-src/small-CpuInfo-file.cpuinfo" \ - --command-name "bat … small-CpuInfo-file.cpuinfo" \ + --command-name "bat … --color=always small-CpuInfo-file.cpuinfo" \ --warmup "$WARMUP_COUNT" \ --runs "$RUN_COUNT" \ --export-markdown "$RESULT_DIR/startup-time-with-syntax-highlighting.md" \ @@ -117,6 +134,40 @@ hyperfine \ cat "$RESULT_DIR/startup-time-with-syntax-with-dependencies.md" >> "$REPORT" +heading "Startup time with indeterminant syntax" +hyperfine \ + "$(printf "%q" "$BAT") --no-config --color=always startup-time-src/mystery-file" \ + --shell none \ + --command-name 'bat … mystery-file' \ + --warmup "$WARMUP_COUNT" \ + --runs "$RUN_COUNT" \ + --export-markdown "$RESULT_DIR/startup-time-with-indeterminant-syntax.md" \ + --export-json "$RESULT_DIR/startup-time-with-indeterminant-syntax.json" +cat "$RESULT_DIR/startup-time-with-indeterminant-syntax.md" >> "$REPORT" + +heading "Startup time with manually set syntax" +hyperfine \ + "$(printf "%q" "$BAT") --no-config --color=always --language=Dockerfile startup-time-src/mystery-file" \ + --shell none \ + --command-name 'bat … --language=Dockerfile mystery-file' \ + --warmup "$WARMUP_COUNT" \ + --runs "$RUN_COUNT" \ + --export-markdown "$RESULT_DIR/startup-time-with-manually-set-syntax.md" \ + --export-json "$RESULT_DIR/startup-time-with-manually-set-syntax.json" +cat "$RESULT_DIR/startup-time-with-manually-set-syntax.md" >> "$REPORT" + +heading "Startup time with mapped syntax" +hyperfine \ + "$(printf "%q" "$BAT") --no-config --color=always startup-time-src/Containerfile" \ + --shell none \ + --command-name 'bat … Containerfile' \ + --warmup "$WARMUP_COUNT" \ + --runs "$RUN_COUNT" \ + --export-markdown "$RESULT_DIR/startup-time-with-mapped-syntax.md" \ + --export-json "$RESULT_DIR/startup-time-with-mapped-syntax.json" +cat "$RESULT_DIR/startup-time-with-mapped-syntax.md" >> "$REPORT" + + heading "Plain-text speed" hyperfine \ "$(printf "%q" "$BAT") --no-config --language=txt --style=plain highlighting-speed-src/numpy_test_multiarray.py" \ diff --git a/tests/benchmarks/startup-time-src/Containerfile b/tests/benchmarks/startup-time-src/Containerfile new file mode 100644 index 00000000..a93ce851 --- /dev/null +++ b/tests/benchmarks/startup-time-src/Containerfile @@ -0,0 +1,3 @@ +FROM docker.io/alpine:latest +COPY foo /root/bar +RUN sleep 60 diff --git a/tests/benchmarks/startup-time-src/mystery-file b/tests/benchmarks/startup-time-src/mystery-file new file mode 100644 index 00000000..a93ce851 --- /dev/null +++ b/tests/benchmarks/startup-time-src/mystery-file @@ -0,0 +1,3 @@ +FROM docker.io/alpine:latest +COPY foo /root/bar +RUN sleep 60