From bd8a13dbc920b8739ef0f5e3fa84d90b3a24608b Mon Sep 17 00:00:00 2001 From: sharkdp Date: Sun, 22 Mar 2020 09:55:13 +0100 Subject: [PATCH] Initial implementation of glob-based syntax mapping --- Cargo.lock | 23 ++++++++++++ Cargo.toml | 1 + src/assets.rs | 56 +++++++++++++++++++--------- src/bin/bat/app.rs | 8 ++-- src/config.rs | 4 +- src/errors.rs | 1 + src/syntax_mapping.rs | 86 ++++++++++++++++++++++++++++++++++--------- 7 files changed, 137 insertions(+), 42 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5e52b40d..b9c6e8fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -127,6 +127,7 @@ dependencies = [ "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", "error-chain 0.12.2 (registry+https://github.com/rust-lang/crates.io-index)", "git2 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)", + "globset 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "liquid 0.19.0 (registry+https://github.com/rust-lang/crates.io-index)", "shell-words 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -201,6 +202,14 @@ dependencies = [ "byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "bstr" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "byte-tools" version = "0.3.1" @@ -542,6 +551,18 @@ name = "glob" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "globset" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.7.10 (registry+https://github.com/rust-lang/crates.io-index)", + "bstr 0.2.12 (registry+https://github.com/rust-lang/crates.io-index)", + "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.3.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "hermit-abi" version = "0.1.8" @@ -1453,6 +1474,7 @@ dependencies = [ "checksum blake2b_simd 0.5.10 (registry+https://github.com/rust-lang/crates.io-index)" = "d8fb2d74254a3a0b5cac33ac9f8ed0e44aa50378d9dbb2e5d83bd21ed1dc2c8a" "checksum block-buffer 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b" "checksum block-padding 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5" +"checksum bstr 0.2.12 (registry+https://github.com/rust-lang/crates.io-index)" = "2889e6d50f394968c8bf4240dc3f2a7eb4680844d27308f798229ac9d4725f41" "checksum byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" "checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" "checksum cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)" = "95e28fa049fda1c330bcf9d723be7663a899c4679724b34c81e9f5a326aab8cd" @@ -1495,6 +1517,7 @@ dependencies = [ "checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" "checksum git2 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b7da16ceafe24cedd9ba02c4463a2b506b6493baf4317c79c5acb553134a3c15" "checksum glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" +"checksum globset 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "7ad1da430bd7281dde2576f44c84cc3f0f7b475e7202cd503042dff01a8c8120" "checksum hermit-abi 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "1010591b26bbfe835e9faeabeb11866061cc7dcebffd56ad7d0942d0e61aefd8" "checksum humantime 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" "checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e" diff --git a/Cargo.toml b/Cargo.toml index b664138c..7439ad71 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ content_inspector = "0.2.4" encoding = "0.2" shell-words = "0.1.0" unicode-width = "0.1.7" +globset = "0.4" [dependencies.git2] version = "0.13" diff --git a/src/assets.rs b/src/assets.rs index 4196b6af..73491aac 100644 --- a/src/assets.rs +++ b/src/assets.rs @@ -9,7 +9,7 @@ use syntect::parsing::{SyntaxReference, SyntaxSet, SyntaxSetBuilder}; use crate::errors::*; use crate::inputfile::{InputFile, InputFileReader}; -use crate::syntax_mapping::SyntaxMapping; +use crate::syntax_mapping::{MappingTarget, SyntaxMapping}; #[derive(Debug)] pub struct HighlightingAssets { @@ -184,25 +184,28 @@ impl HighlightingAssets { (Some(language), _) => self.syntax_set.find_syntax_by_token(language), (None, InputFile::Ordinary(filename)) => { let path = Path::new(filename); + let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); let extension = path.extension().and_then(|x| x.to_str()).unwrap_or(""); - let file_name = mapping.replace(file_name); - let extension = mapping.replace(extension); - let ext_syntax = self .syntax_set .find_syntax_by_extension(&file_name) .or_else(|| self.syntax_set.find_syntax_by_extension(&extension)); - let line_syntax = if ext_syntax.is_none() { - String::from_utf8(reader.first_line.clone()) - .ok() - .and_then(|l| self.syntax_set.find_syntax_by_first_line(&l)) - } else { - None - }; + let line_syntax = String::from_utf8(reader.first_line.clone()) + .ok() + .and_then(|l| self.syntax_set.find_syntax_by_first_line(&l)); - ext_syntax.or(line_syntax) + dbg!(path); + match dbg!(mapping.get_syntax_for(path)) { + Some(MappingTarget::MapTo(syntax_name)) => { + // TODO: we should probably return an error here if this syntax can not be + // found. Currently, we just fall back to 'plain'. + self.syntax_set.find_syntax_by_name(syntax_name) + } + Some(MappingTarget::MapToUnknown) => line_syntax, + None => ext_syntax.or(line_syntax), + } } (None, InputFile::StdIn) => String::from_utf8(reader.first_line.clone()) .ok() @@ -225,19 +228,19 @@ mod tests { use crate::assets::HighlightingAssets; use crate::inputfile::InputFile; - use crate::syntax_mapping::SyntaxMapping; + use crate::syntax_mapping::{MappingTarget, SyntaxMapping}; - struct SyntaxDetectionTest { + struct SyntaxDetectionTest<'a> { assets: HighlightingAssets, - pub syntax_mapping: SyntaxMapping, + pub syntax_mapping: SyntaxMapping<'a>, temp_dir: TempDir, } - impl SyntaxDetectionTest { + impl<'a> SyntaxDetectionTest<'a> { fn new() -> Self { SyntaxDetectionTest { assets: HighlightingAssets::from_binary(), - syntax_mapping: SyntaxMapping::new(), + syntax_mapping: SyntaxMapping::builtin(), temp_dir: TempDir::new("bat_syntax_detection_tests") .expect("creation of temporary directory"), } @@ -294,6 +297,10 @@ mod tests { test.syntax_name_with_content("my_script", "#!/bin/bash"), "Bourne Again Shell (bash)" ); + assert_eq!( + test.syntax_name_with_content("build", "#!/bin/bash"), + "Bourne Again Shell (bash)" + ); assert_eq!(test.syntax_name_with_content("my_script", " { pub theme: String, /// File extension/name mappings - pub syntax_mapping: SyntaxMapping, + pub syntax_mapping: SyntaxMapping<'a>, /// Command to start the pager pub pager: Option<&'a str>, diff --git a/src/errors.rs b/src/errors.rs index 607661cf..a0f7e5eb 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -6,6 +6,7 @@ error_chain! { Io(::std::io::Error); SyntectError(::syntect::LoadingError); ParseIntError(::std::num::ParseIntError); + GlobParsingError(::globset::Error); } } diff --git a/src/syntax_mapping.rs b/src/syntax_mapping.rs index 267feb63..84c89d32 100644 --- a/src/syntax_mapping.rs +++ b/src/syntax_mapping.rs @@ -1,35 +1,85 @@ -use std::borrow::Cow; -use std::collections::HashMap; +use std::path::Path; + +use crate::errors::Result; + +use globset::{Candidate, GlobBuilder, GlobMatcher}; + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum MappingTarget<'a> { + MapTo(&'a str), + MapToUnknown, +} #[derive(Debug, Clone, Default)] -pub struct SyntaxMapping(HashMap); +pub struct SyntaxMapping<'a> { + mappings: Vec<(GlobMatcher, MappingTarget<'a>)>, +} -impl SyntaxMapping { - pub fn new() -> SyntaxMapping { +impl<'a> SyntaxMapping<'a> { + pub fn empty() -> SyntaxMapping<'a> { Default::default() } - pub fn insert(&mut self, from: impl Into, to: impl Into) -> Option { - self.0.insert(from.into(), to.into()) + pub fn builtin() -> SyntaxMapping<'a> { + let mut mapping = Self::empty(); + mapping + .insert("build", MappingTarget::MapToUnknown) + .unwrap(); + + mapping } - pub(crate) fn replace<'a>(&self, input: impl Into>) -> Cow<'a, str> { - let input = input.into(); - match self.0.get(input.as_ref()) { - Some(s) => Cow::from(s.clone()), - None => input, + pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> { + let glob = GlobBuilder::new(from) + .case_insensitive(false) + .literal_separator(true) + .build()?; + self.mappings.push((glob.compile_matcher(), to)); + Ok(()) + } + + pub(crate) fn get_syntax_for(&self, path: impl AsRef) -> Option> { + let candidate = Candidate::new(path.as_ref()); + let canddidate_filename = path.as_ref().file_name().map(Candidate::new); + for (ref glob, ref syntax) in &self.mappings { + if glob.is_match_candidate(&candidate) + || canddidate_filename + .as_ref() + .map_or(false, |filename| glob.is_match_candidate(filename)) + { + return Some(*syntax); + } } + None } } #[test] fn basic() { - let mut map = SyntaxMapping::new(); - map.insert("Cargo.lock", "toml"); - map.insert(".ignore", ".gitignore"); + let mut map = SyntaxMapping::empty(); + map.insert("/path/to/Cargo.lock", MappingTarget::MapTo("TOML")) + .ok(); + map.insert("/path/to/.ignore", MappingTarget::MapTo("Git Ignore")) + .ok(); - assert_eq!("toml", map.replace("Cargo.lock")); - assert_eq!("other.lock", map.replace("other.lock")); + assert_eq!( + map.get_syntax_for("/path/to/Cargo.lock"), + Some(MappingTarget::MapTo("TOML")) + ); + assert_eq!(map.get_syntax_for("/path/to/other.lock"), None); - assert_eq!(".gitignore", map.replace(".ignore")); + assert_eq!( + map.get_syntax_for("/path/to/.ignore"), + Some(MappingTarget::MapTo("Git Ignore")) + ); +} + +#[test] +fn builtin_mappings() { + let map = SyntaxMapping::builtin(); + + assert_eq!( + map.get_syntax_for("/path/to/build"), + Some(MappingTarget::MapToUnknown) + ); }