From 7fe4fdf33df3aa958b3263a939a8b24b9d06f534 Mon Sep 17 00:00:00 2001 From: cbolgiano Date: Mon, 25 Oct 2021 11:59:12 -0400 Subject: [PATCH] Introduce MapExtensionToUnknown MappingTarget (#1889) Co-authored-by: Martin Nordholts --- CHANGELOG.md | 3 ++ src/assets.rs | 98 ++++++++++++++++++++++++++++++++++--------- src/bin/bat/main.rs | 1 + src/syntax_mapping.rs | 20 ++++++--- 4 files changed, 97 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e2fc2df..92fad76d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - Python syntax highlighting no longer suffers from abysmal performance in specific scenarios. See #1688 (@keith-hall) - First line not shown in diff context. See #1891 (@divagant-martian) +- Do not ignore syntaxes that handle file names with a `*.conf` extension. See #1703 (@cbolgiano) ## Performance @@ -41,6 +42,8 @@ - Deprecate `HighlightingAssets::syntaxes()` and `HighlightingAssets::syntax_for_file_name()`. Use `HighlightingAssets::get_syntaxes()` and `HighlightingAssets::get_syntax_for_path()` instead. They return a `Result` which is needed for upcoming lazy-loading work to improve startup performance. They also return which `SyntaxSet` the returned `SyntaxReference` belongs to. See #1747, #1755, #1776, #1862 (@Enselic) - Remove `HighlightingAssets::from_files` and `HighlightingAssets::save_to_cache`. Instead of calling the former and then the latter you now make a single call to `bat::assets::build`. See #1802 (@Enselic) - Replace the `error::Error(error::ErrorKind, _)` struct and enum with an `error::Error` enum. `Error(ErrorKind::UnknownSyntax, _)` becomes `Error::UnknownSyntax`, etc. Also remove the `error::ResultExt` trait. These changes stem from replacing `error-chain` with `thiserror`. See #1820 (@Enselic) +- Add new `MappingTarget` enum variant `MapExtensionToUnknown`. Refer to its docummentation for more information. Clients are adviced to treat `MapExtensionToUnknown` the same as `MapToUnknown` in exhaustive matches. See #1703 (@cbolgiano) + diff --git a/src/assets.rs b/src/assets.rs index 5b35d803..e299d02b 100644 --- a/src/assets.rs +++ b/src/assets.rs @@ -153,9 +153,12 @@ impl HighlightingAssets { } /// Detect the syntax based on, in order: - /// 1. Syntax mappings (e.g. `/etc/profile` -> `Bourne Again Shell (bash)`) + /// 1. Syntax mappings with [MappingTarget::MapTo] and [MappingTarget::MapToUnknown] + /// (e.g. `/etc/profile` -> `Bourne Again Shell (bash)`) /// 2. The file name (e.g. `Dockerfile`) - /// 3. The file name extension (e.g. `.rs`) + /// 3. Syntax mappings with [MappingTarget::MapExtensionToUnknown] + /// (e.g. `*.conf`) + /// 4. The file name extension (e.g. `.rs`) /// /// When detecting syntax based on syntax mappings, the full path is taken /// into account. When detecting syntax based on file name, no regard is @@ -165,9 +168,9 @@ impl HighlightingAssets { /// /// Returns [Error::UndetectedSyntax] if it was not possible detect syntax /// based on path/file name/extension (or if the path was mapped to - /// [MappingTarget::MapToUnknown]). In this case it is appropriate to fall - /// back to other methods to detect syntax. Such as using the contents of - /// the first line of the file. + /// [MappingTarget::MapToUnknown] or [MappingTarget::MapExtensionToUnknown]). + /// In this case it is appropriate to fall back to other methods to detect + /// syntax. Such as using the contents of the first line of the file. /// /// Returns [Error::UnknownSyntax] if a syntax mapping exist, but the mapped /// syntax does not exist. @@ -177,20 +180,31 @@ impl HighlightingAssets { mapping: &SyntaxMapping, ) -> Result { let path = path.as_ref(); - match mapping.get_syntax_for(path) { - Some(MappingTarget::MapToUnknown) => { + + let syntax_match = mapping.get_syntax_for(path); + + if let Some(MappingTarget::MapToUnknown) = syntax_match { + return Err(Error::UndetectedSyntax(path.to_string_lossy().into())); + } + + if let Some(MappingTarget::MapTo(syntax_name)) = syntax_match { + return self + .find_syntax_by_name(syntax_name)? + .ok_or_else(|| Error::UnknownSyntax(syntax_name.to_owned())); + } + + let file_name = path.file_name().unwrap_or_default(); + + match (self.get_syntax_for_file_name(file_name)?, syntax_match) { + (Some(syntax), _) => Ok(syntax), + + (_, Some(MappingTarget::MapExtensionToUnknown)) => { Err(Error::UndetectedSyntax(path.to_string_lossy().into())) } - Some(MappingTarget::MapTo(syntax_name)) => self - .find_syntax_by_name(syntax_name)? - .ok_or_else(|| Error::UnknownSyntax(syntax_name.to_owned())), - - None => { - let file_name = path.file_name().unwrap_or_default(); - self.get_extension_syntax(file_name)? - .ok_or_else(|| Error::UndetectedSyntax(path.to_string_lossy().into())) - } + _ => self + .get_syntax_for_file_extension(file_name)? + .ok_or_else(|| Error::UndetectedSyntax(path.to_string_lossy().into())), } } @@ -263,14 +277,24 @@ impl HighlightingAssets { .map(|syntax| SyntaxReferenceInSet { syntax, syntax_set })) } - fn get_extension_syntax(&self, file_name: &OsStr) -> Result> { + fn get_syntax_for_file_name(&self, file_name: &OsStr) -> Result> { let mut syntax = self.find_syntax_by_extension(Some(file_name))?; if syntax.is_none() { - syntax = self.find_syntax_by_extension(Path::new(file_name).extension())?; + syntax = try_with_stripped_suffix(file_name, |stripped_file_name| { + self.get_syntax_for_file_name(stripped_file_name) // Note: recursion + })?; } + Ok(syntax) + } + + fn get_syntax_for_file_extension( + &self, + file_name: &OsStr, + ) -> Result> { + let mut syntax = self.find_syntax_by_extension(Path::new(file_name).extension())?; if syntax.is_none() { syntax = try_with_stripped_suffix(file_name, |stripped_file_name| { - self.get_extension_syntax(stripped_file_name) // Note: recursion + self.get_syntax_for_file_extension(stripped_file_name) // Note: recursion })?; } Ok(syntax) @@ -530,6 +554,42 @@ mod tests { assert_eq!(test.syntax_for_file("test.h"), "C"); } + #[test] + fn syntax_detection_with_extension_mapping_to_unknown() { + let mut test = SyntaxDetectionTest::new(); + + // Normally, a CMakeLists.txt file shall use the CMake syntax, even if it is + // a bash script in disguise + assert_eq!( + test.syntax_for_file_with_content("CMakeLists.txt", "#!/bin/bash"), + "CMake" + ); + + // Other .txt files shall use the Plain Text syntax + assert_eq!( + test.syntax_for_file_with_content("some-other.txt", "#!/bin/bash"), + "Plain Text" + ); + + // If we setup MapExtensionToUnknown on *.txt, the match on the full + // file name of "CMakeLists.txt" shall have higher prio, and CMake shall + // still be used for it + test.syntax_mapping + .insert("*.txt", MappingTarget::MapExtensionToUnknown) + .ok(); + assert_eq!( + test.syntax_for_file_with_content("CMakeLists.txt", "#!/bin/bash"), + "CMake" + ); + + // However, for *other* files with a .txt extension, first-line fallback + // shall now be used + assert_eq!( + test.syntax_for_file_with_content("some-other.txt", "#!/bin/bash"), + "Bourne Again Shell (bash)" + ); + } + #[test] fn syntax_detection_is_case_sensitive() { let mut test = SyntaxDetectionTest::new(); diff --git a/src/bin/bat/main.rs b/src/bin/bat/main.rs index 53e246d1..d4ce33d9 100644 --- a/src/bin/bat/main.rs +++ b/src/bin/bat/main.rs @@ -72,6 +72,7 @@ fn get_syntax_mapping_to_paths<'a>( for mapping in mappings { match mapping { (_, MappingTarget::MapToUnknown) => {} + (_, MappingTarget::MapExtensionToUnknown) => {} (matcher, MappingTarget::MapTo(s)) => { let globs = map.entry(*s).or_insert_with(Vec::new); globs.push(matcher.glob().glob().into()); diff --git a/src/syntax_mapping.rs b/src/syntax_mapping.rs index 36b7bc3e..8b105df2 100644 --- a/src/syntax_mapping.rs +++ b/src/syntax_mapping.rs @@ -6,8 +6,21 @@ use globset::{Candidate, GlobBuilder, GlobMatcher}; #[derive(Debug, Clone, Copy, PartialEq)] pub enum MappingTarget<'a> { + /// For mapping a path to a specific syntax. MapTo(&'a str), + + /// For mapping a path (typically an extension-less file name) to an unknown + /// syntax. This typically means later using the contents of the first line + /// of the file to determine what syntax to use. MapToUnknown, + + /// For mapping a file extension (e.g. `*.conf`) to an unknown syntax. This + /// typically means later using the contents of the first line of the file + /// to determine what syntax to use. However, if a syntax handles a file + /// name that happens to have the given file extension (e.g. `resolv.conf`), + /// then that association will have higher precedence, and the mapping will + /// be ignored. + MapExtensionToUnknown, } #[derive(Debug, Clone, Default)] @@ -54,12 +67,7 @@ impl<'a> SyntaxMapping<'a> { // Nginx and Apache syntax files both want to style all ".conf" files // see #1131 and #1137 mapping - .insert("*.conf", MappingTarget::MapToUnknown) - .unwrap(); - - // Re-insert a mapping for resolv.conf, see #1510 - mapping - .insert("resolv.conf", MappingTarget::MapTo("resolv")) + .insert("*.conf", MappingTarget::MapExtensionToUnknown) .unwrap(); for glob in &[