Introduce MapExtensionToUnknown MappingTarget (#1889)

Co-authored-by: Martin Nordholts <enselic@gmail.com>
This commit is contained in:
cbolgiano 2021-10-25 11:59:12 -04:00 committed by GitHub
parent 0b63ad5e8b
commit 7fe4fdf33d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 97 additions and 25 deletions

View File

@ -9,6 +9,7 @@
- Python syntax highlighting no longer suffers from abysmal performance in specific scenarios. See #1688 (@keith-hall)
- First line not shown in diff context. See #1891 (@divagant-martian)
- Do not ignore syntaxes that handle file names with a `*.conf` extension. See #1703 (@cbolgiano)
## Performance
@ -41,6 +42,8 @@
- Deprecate `HighlightingAssets::syntaxes()` and `HighlightingAssets::syntax_for_file_name()`. Use `HighlightingAssets::get_syntaxes()` and `HighlightingAssets::get_syntax_for_path()` instead. They return a `Result` which is needed for upcoming lazy-loading work to improve startup performance. They also return which `SyntaxSet` the returned `SyntaxReference` belongs to. See #1747, #1755, #1776, #1862 (@Enselic)
- Remove `HighlightingAssets::from_files` and `HighlightingAssets::save_to_cache`. Instead of calling the former and then the latter you now make a single call to `bat::assets::build`. See #1802 (@Enselic)
- Replace the `error::Error(error::ErrorKind, _)` struct and enum with an `error::Error` enum. `Error(ErrorKind::UnknownSyntax, _)` becomes `Error::UnknownSyntax`, etc. Also remove the `error::ResultExt` trait. These changes stem from replacing `error-chain` with `thiserror`. See #1820 (@Enselic)
- Add new `MappingTarget` enum variant `MapExtensionToUnknown`. Refer to its docummentation for more information. Clients are adviced to treat `MapExtensionToUnknown` the same as `MapToUnknown` in exhaustive matches. See #1703 (@cbolgiano)

View File

@ -153,9 +153,12 @@ impl HighlightingAssets {
}
/// Detect the syntax based on, in order:
/// 1. Syntax mappings (e.g. `/etc/profile` -> `Bourne Again Shell (bash)`)
/// 1. Syntax mappings with [MappingTarget::MapTo] and [MappingTarget::MapToUnknown]
/// (e.g. `/etc/profile` -> `Bourne Again Shell (bash)`)
/// 2. The file name (e.g. `Dockerfile`)
/// 3. The file name extension (e.g. `.rs`)
/// 3. Syntax mappings with [MappingTarget::MapExtensionToUnknown]
/// (e.g. `*.conf`)
/// 4. The file name extension (e.g. `.rs`)
///
/// When detecting syntax based on syntax mappings, the full path is taken
/// into account. When detecting syntax based on file name, no regard is
@ -165,9 +168,9 @@ impl HighlightingAssets {
///
/// Returns [Error::UndetectedSyntax] if it was not possible detect syntax
/// based on path/file name/extension (or if the path was mapped to
/// [MappingTarget::MapToUnknown]). In this case it is appropriate to fall
/// back to other methods to detect syntax. Such as using the contents of
/// the first line of the file.
/// [MappingTarget::MapToUnknown] or [MappingTarget::MapExtensionToUnknown]).
/// In this case it is appropriate to fall back to other methods to detect
/// syntax. Such as using the contents of the first line of the file.
///
/// Returns [Error::UnknownSyntax] if a syntax mapping exist, but the mapped
/// syntax does not exist.
@ -177,20 +180,31 @@ impl HighlightingAssets {
mapping: &SyntaxMapping,
) -> Result<SyntaxReferenceInSet> {
let path = path.as_ref();
match mapping.get_syntax_for(path) {
Some(MappingTarget::MapToUnknown) => {
let syntax_match = mapping.get_syntax_for(path);
if let Some(MappingTarget::MapToUnknown) = syntax_match {
return Err(Error::UndetectedSyntax(path.to_string_lossy().into()));
}
if let Some(MappingTarget::MapTo(syntax_name)) = syntax_match {
return self
.find_syntax_by_name(syntax_name)?
.ok_or_else(|| Error::UnknownSyntax(syntax_name.to_owned()));
}
let file_name = path.file_name().unwrap_or_default();
match (self.get_syntax_for_file_name(file_name)?, syntax_match) {
(Some(syntax), _) => Ok(syntax),
(_, Some(MappingTarget::MapExtensionToUnknown)) => {
Err(Error::UndetectedSyntax(path.to_string_lossy().into()))
}
Some(MappingTarget::MapTo(syntax_name)) => self
.find_syntax_by_name(syntax_name)?
.ok_or_else(|| Error::UnknownSyntax(syntax_name.to_owned())),
None => {
let file_name = path.file_name().unwrap_or_default();
self.get_extension_syntax(file_name)?
.ok_or_else(|| Error::UndetectedSyntax(path.to_string_lossy().into()))
}
_ => self
.get_syntax_for_file_extension(file_name)?
.ok_or_else(|| Error::UndetectedSyntax(path.to_string_lossy().into())),
}
}
@ -263,14 +277,24 @@ impl HighlightingAssets {
.map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }))
}
fn get_extension_syntax(&self, file_name: &OsStr) -> Result<Option<SyntaxReferenceInSet>> {
fn get_syntax_for_file_name(&self, file_name: &OsStr) -> Result<Option<SyntaxReferenceInSet>> {
let mut syntax = self.find_syntax_by_extension(Some(file_name))?;
if syntax.is_none() {
syntax = self.find_syntax_by_extension(Path::new(file_name).extension())?;
syntax = try_with_stripped_suffix(file_name, |stripped_file_name| {
self.get_syntax_for_file_name(stripped_file_name) // Note: recursion
})?;
}
Ok(syntax)
}
fn get_syntax_for_file_extension(
&self,
file_name: &OsStr,
) -> Result<Option<SyntaxReferenceInSet>> {
let mut syntax = self.find_syntax_by_extension(Path::new(file_name).extension())?;
if syntax.is_none() {
syntax = try_with_stripped_suffix(file_name, |stripped_file_name| {
self.get_extension_syntax(stripped_file_name) // Note: recursion
self.get_syntax_for_file_extension(stripped_file_name) // Note: recursion
})?;
}
Ok(syntax)
@ -530,6 +554,42 @@ mod tests {
assert_eq!(test.syntax_for_file("test.h"), "C");
}
#[test]
fn syntax_detection_with_extension_mapping_to_unknown() {
let mut test = SyntaxDetectionTest::new();
// Normally, a CMakeLists.txt file shall use the CMake syntax, even if it is
// a bash script in disguise
assert_eq!(
test.syntax_for_file_with_content("CMakeLists.txt", "#!/bin/bash"),
"CMake"
);
// Other .txt files shall use the Plain Text syntax
assert_eq!(
test.syntax_for_file_with_content("some-other.txt", "#!/bin/bash"),
"Plain Text"
);
// If we setup MapExtensionToUnknown on *.txt, the match on the full
// file name of "CMakeLists.txt" shall have higher prio, and CMake shall
// still be used for it
test.syntax_mapping
.insert("*.txt", MappingTarget::MapExtensionToUnknown)
.ok();
assert_eq!(
test.syntax_for_file_with_content("CMakeLists.txt", "#!/bin/bash"),
"CMake"
);
// However, for *other* files with a .txt extension, first-line fallback
// shall now be used
assert_eq!(
test.syntax_for_file_with_content("some-other.txt", "#!/bin/bash"),
"Bourne Again Shell (bash)"
);
}
#[test]
fn syntax_detection_is_case_sensitive() {
let mut test = SyntaxDetectionTest::new();

View File

@ -72,6 +72,7 @@ fn get_syntax_mapping_to_paths<'a>(
for mapping in mappings {
match mapping {
(_, MappingTarget::MapToUnknown) => {}
(_, MappingTarget::MapExtensionToUnknown) => {}
(matcher, MappingTarget::MapTo(s)) => {
let globs = map.entry(*s).or_insert_with(Vec::new);
globs.push(matcher.glob().glob().into());

View File

@ -6,8 +6,21 @@ use globset::{Candidate, GlobBuilder, GlobMatcher};
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum MappingTarget<'a> {
/// For mapping a path to a specific syntax.
MapTo(&'a str),
/// For mapping a path (typically an extension-less file name) to an unknown
/// syntax. This typically means later using the contents of the first line
/// of the file to determine what syntax to use.
MapToUnknown,
/// For mapping a file extension (e.g. `*.conf`) to an unknown syntax. This
/// typically means later using the contents of the first line of the file
/// to determine what syntax to use. However, if a syntax handles a file
/// name that happens to have the given file extension (e.g. `resolv.conf`),
/// then that association will have higher precedence, and the mapping will
/// be ignored.
MapExtensionToUnknown,
}
#[derive(Debug, Clone, Default)]
@ -54,12 +67,7 @@ impl<'a> SyntaxMapping<'a> {
// Nginx and Apache syntax files both want to style all ".conf" files
// see #1131 and #1137
mapping
.insert("*.conf", MappingTarget::MapToUnknown)
.unwrap();
// Re-insert a mapping for resolv.conf, see #1510
mapping
.insert("resolv.conf", MappingTarget::MapTo("resolv"))
.insert("*.conf", MappingTarget::MapExtensionToUnknown)
.unwrap();
for glob in &[