Migrate `SyntaxMapping` impl to new system

Most existing builtins are removed but not yet ported, so the test
`user_can_override_builtin_mappings` should fail. It should pass once the old
rules have been ported.
This commit is contained in:
cyqsimon 2023-11-05 01:41:39 +08:00
parent 1c7c9a6b6d
commit cfd622d6e1
No known key found for this signature in database
GPG Key ID: 1D8CE2F297390D65
5 changed files with 48 additions and 208 deletions

View File

@ -16,6 +16,8 @@
- Update Arch Linux package URL in README files #2779 (@brunobell)
- Update and improve `zsh` completion, see #2772 (@okapia)
- More extensible syntax mapping mechanism #2755 (@cyqsimon)
- [BREAKING] Precedence order of user-defined syntax mappings (using `-m/--map-syntax`) has been reversed
- `-m '*.foo:alpha' -m '*.foo:bravo'` now maps `bar.foo` to `alpha`
## Syntaxes
@ -23,6 +25,10 @@
## `bat` as a library
- Changes to `syntax_mapping::SyntaxMapping` #2755 (@cyqsimon)
- `SyntaxMapping::get_syntax_for` is now correctly public
- [BREAKING] `SyntaxMapping::{empty,builtin}` are removed; use `SyntaxMapping::new` instead
- [BREAKING] `SyntaxMapping::mappings` is replaced by `SyntaxMapping::{builtin,custom,all}_mappings`
# v0.24.0

View File

@ -441,7 +441,7 @@ mod tests {
fn new() -> Self {
SyntaxDetectionTest {
assets: HighlightingAssets::from_binary(),
syntax_mapping: SyntaxMapping::builtin(),
syntax_mapping: SyntaxMapping::new(),
temp_dir: TempDir::new().expect("creation of temporary directory"),
}
}

View File

@ -121,7 +121,7 @@ impl App {
_ => unreachable!("other values for --paging are not allowed"),
};
let mut syntax_mapping = SyntaxMapping::builtin();
let mut syntax_mapping = SyntaxMapping::new();
if let Some(values) = self.matches.get_many::<String>("ignored-suffix") {
for suffix in values {

View File

@ -79,7 +79,7 @@ fn run_cache_subcommand(
}
fn get_syntax_mapping_to_paths<'a>(
mappings: &[(GlobMatcher, MappingTarget<'a>)],
mappings: &[(&GlobMatcher, &MappingTarget<'a>)],
) -> HashMap<&'a str, Vec<String>> {
let mut map = HashMap::new();
for mapping in mappings {
@ -123,7 +123,7 @@ pub fn get_languages(config: &Config, cache_dir: &Path) -> Result<String> {
languages.sort_by_key(|lang| lang.name.to_uppercase());
let configured_languages = get_syntax_mapping_to_paths(config.syntax_mapping.mappings());
let configured_languages = get_syntax_mapping_to_paths(&config.syntax_mapping.all_mappings());
for lang in &mut languages {
if let Some(additional_paths) = configured_languages.get(lang.name.as_str()) {

View File

@ -3,6 +3,7 @@ use std::path::Path;
use globset::{Candidate, GlobBuilder, GlobMatcher};
use crate::error::Result;
use builtin::BUILTIN_MAPPINGS;
use ignored_suffixes::IgnoredSuffixes;
mod builtin;
@ -39,201 +40,58 @@ pub enum MappingTarget<'a> {
#[derive(Debug, Clone, Default)]
pub struct SyntaxMapping<'a> {
mappings: Vec<(GlobMatcher, MappingTarget<'a>)>,
/// User-defined mappings at run time.
custom_mappings: Vec<(GlobMatcher, MappingTarget<'a>)>,
pub(crate) ignored_suffixes: IgnoredSuffixes<'a>,
}
impl<'a> SyntaxMapping<'a> {
pub fn empty() -> SyntaxMapping<'a> {
pub fn new() -> SyntaxMapping<'a> {
Default::default()
}
pub fn builtin() -> SyntaxMapping<'a> {
let mut mapping = Self::empty();
mapping.insert("*.h", MappingTarget::MapTo("C++")).unwrap();
mapping
.insert(".clang-format", MappingTarget::MapTo("YAML"))
.unwrap();
mapping.insert("*.fs", MappingTarget::MapTo("F#")).unwrap();
mapping
.insert("build", MappingTarget::MapToUnknown)
.unwrap();
mapping
.insert("**/.ssh/config", MappingTarget::MapTo("SSH Config"))
.unwrap();
mapping
.insert(
"**/bat/config",
MappingTarget::MapTo("Bourne Again Shell (bash)"),
)
.unwrap();
mapping
.insert(
"/etc/profile",
MappingTarget::MapTo("Bourne Again Shell (bash)"),
)
.unwrap();
mapping
.insert(
"os-release",
MappingTarget::MapTo("Bourne Again Shell (bash)"),
)
.unwrap();
mapping
.insert("*.pac", MappingTarget::MapTo("JavaScript (Babel)"))
.unwrap();
mapping
.insert("fish_history", MappingTarget::MapTo("YAML"))
.unwrap();
for glob in ["*.jsonl", "*.sarif"] {
mapping.insert(glob, MappingTarget::MapTo("JSON")).unwrap();
}
// See #2151, https://nmap.org/book/nse-language.html
mapping
.insert("*.nse", MappingTarget::MapTo("Lua"))
.unwrap();
// See #1008
mapping
.insert("rails", MappingTarget::MapToUnknown)
.unwrap();
mapping
.insert("Containerfile", MappingTarget::MapTo("Dockerfile"))
.unwrap();
mapping
.insert("*.ksh", MappingTarget::MapTo("Bourne Again Shell (bash)"))
.unwrap();
// Nginx and Apache syntax files both want to style all ".conf" files
// see #1131 and #1137
mapping
.insert("*.conf", MappingTarget::MapExtensionToUnknown)
.unwrap();
for glob in &[
"/etc/nginx/**/*.conf",
"/etc/nginx/sites-*/**/*",
"nginx.conf",
"mime.types",
] {
mapping.insert(glob, MappingTarget::MapTo("nginx")).unwrap();
}
for glob in &[
"/etc/apache2/**/*.conf",
"/etc/apache2/sites-*/**/*",
"httpd.conf",
] {
mapping
.insert(glob, MappingTarget::MapTo("Apache Conf"))
.unwrap();
}
for glob in &[
"**/systemd/**/*.conf",
"**/systemd/**/*.example",
"*.automount",
"*.device",
"*.dnssd",
"*.link",
"*.mount",
"*.netdev",
"*.network",
"*.nspawn",
"*.path",
"*.service",
"*.scope",
"*.slice",
"*.socket",
"*.swap",
"*.target",
"*.timer",
] {
mapping.insert(glob, MappingTarget::MapTo("INI")).unwrap();
}
// unix mail spool
for glob in &["/var/spool/mail/*", "/var/mail/*"] {
mapping.insert(glob, MappingTarget::MapTo("Email")).unwrap()
}
// pacman hooks
mapping
.insert("*.hook", MappingTarget::MapTo("INI"))
.unwrap();
mapping
.insert("*.ron", MappingTarget::MapTo("Rust"))
.unwrap();
// Global git config files rooted in `$XDG_CONFIG_HOME/git/` or `$HOME/.config/git/`
// See e.g. https://git-scm.com/docs/git-config#FILES
match (
std::env::var_os("XDG_CONFIG_HOME").filter(|val| !val.is_empty()),
std::env::var_os("HOME")
.filter(|val| !val.is_empty())
.map(|home| Path::new(&home).join(".config")),
) {
(Some(xdg_config_home), Some(default_config_home))
if xdg_config_home == default_config_home => {
insert_git_config_global(&mut mapping, &xdg_config_home)
}
(Some(xdg_config_home), Some(default_config_home)) /* else guard */ => {
insert_git_config_global(&mut mapping, &xdg_config_home);
insert_git_config_global(&mut mapping, &default_config_home)
}
(Some(config_home), None) => insert_git_config_global(&mut mapping, &config_home),
(None, Some(config_home)) => insert_git_config_global(&mut mapping, &config_home),
(None, None) => (),
};
fn insert_git_config_global(mapping: &mut SyntaxMapping, config_home: impl AsRef<Path>) {
let git_config_path = config_home.as_ref().join("git");
mapping
.insert(
&git_config_path.join("config").to_string_lossy(),
MappingTarget::MapTo("Git Config"),
)
.ok();
mapping
.insert(
&git_config_path.join("ignore").to_string_lossy(),
MappingTarget::MapTo("Git Ignore"),
)
.ok();
mapping
.insert(
&git_config_path.join("attributes").to_string_lossy(),
MappingTarget::MapTo("Git Attributes"),
)
.ok();
}
mapping
}
pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> {
let matcher = make_glob_matcher(from)?;
self.mappings.push((matcher, to));
self.custom_mappings.push((matcher, to));
Ok(())
}
pub fn mappings(&self) -> &[(GlobMatcher, MappingTarget<'a>)] {
&self.mappings
/// Returns all mappings. User-defined mappings are listed before builtin
/// mappings; mappings in front have higher precedence.
///
/// Note that this function ignores builtin mappings that are invalid under
/// the current environment (i.e. their rules require an environment
/// variable that is unset).
pub fn all_mappings(&self) -> Vec<(&GlobMatcher, &MappingTarget<'a>)> {
self.custom_mappings()
.iter()
.map(|(matcher, target)| (matcher, target)) // as_ref
.chain(self.builtin_mappings())
.collect()
}
/// Returns all valid builtin mappings. Mappings in front have higher
/// precedence.
///
/// If a mapping rule requires an environment variable that is unset, it
/// will be ignored.
pub fn builtin_mappings(&self) -> Vec<(&'static GlobMatcher, &'static MappingTarget<'static>)> {
BUILTIN_MAPPINGS
.iter()
.filter_map(|(matcher, target)| matcher.as_ref().map(|glob| (glob, target)))
.collect()
}
/// Returns all user-defined mappings.
pub fn custom_mappings(&self) -> &[(GlobMatcher, MappingTarget<'a>)] {
&self.custom_mappings
}
pub fn get_syntax_for(&self, path: impl AsRef<Path>) -> Option<MappingTarget<'a>> {
// Try matching on the file name as-is.
let candidate = Candidate::new(&path);
let candidate_filename = path.as_ref().file_name().map(Candidate::new);
for (ref glob, ref syntax) in self.mappings.iter().rev() {
for (glob, syntax) in self.all_mappings().into_iter() {
if glob.is_match_candidate(&candidate)
|| candidate_filename
.as_ref()
@ -261,7 +119,7 @@ mod tests {
use super::*;
#[test]
fn basic() {
let mut map = SyntaxMapping::empty();
let mut map = SyntaxMapping::new();
map.insert("/path/to/Cargo.lock", MappingTarget::MapTo("TOML"))
.ok();
map.insert("/path/to/.ignore", MappingTarget::MapTo("Git Ignore"))
@ -281,7 +139,7 @@ mod tests {
#[test]
fn user_can_override_builtin_mappings() {
let mut map = SyntaxMapping::builtin();
let mut map = SyntaxMapping::new();
assert_eq!(
map.get_syntax_for("/etc/profile"),
@ -297,35 +155,11 @@ mod tests {
#[test]
fn builtin_mappings() {
let map = SyntaxMapping::builtin();
let map = SyntaxMapping::new();
assert_eq!(
map.get_syntax_for("/path/to/build"),
Some(MappingTarget::MapToUnknown)
);
}
#[test]
/// verifies that SyntaxMapping::builtin() doesn't repeat `Glob`-based keys
fn no_duplicate_builtin_keys() {
let mappings = SyntaxMapping::builtin().mappings;
for i in 0..mappings.len() {
let tail = mappings[i + 1..].into_iter();
let (dupl, _): (Vec<_>, Vec<_>) =
tail.partition(|item| item.0.glob() == mappings[i].0.glob());
// emit repeats on failure
assert_eq!(
dupl.len(),
0,
"Glob pattern `{}` mapped to multiple: {:?}",
mappings[i].0.glob().glob(),
{
let (_, mut dupl_targets): (Vec<GlobMatcher>, Vec<MappingTarget>) =
dupl.into_iter().cloned().unzip();
dupl_targets.push(mappings[i].1)
},
)
}
}
}