Initial implementation of glob-based syntax mapping

This commit is contained in:
sharkdp 2020-03-22 09:55:13 +01:00 committed by David Peter
parent ba29e07636
commit bd8a13dbc9
7 changed files with 137 additions and 42 deletions

23
Cargo.lock generated
View File

@ -127,6 +127,7 @@ dependencies = [
"encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
"error-chain 0.12.2 (registry+https://github.com/rust-lang/crates.io-index)", "error-chain 0.12.2 (registry+https://github.com/rust-lang/crates.io-index)",
"git2 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)", "git2 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)",
"globset 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"liquid 0.19.0 (registry+https://github.com/rust-lang/crates.io-index)", "liquid 0.19.0 (registry+https://github.com/rust-lang/crates.io-index)",
"shell-words 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "shell-words 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -201,6 +202,14 @@ dependencies = [
"byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "bstr"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "byte-tools" name = "byte-tools"
version = "0.3.1" version = "0.3.1"
@ -542,6 +551,18 @@ name = "glob"
version = "0.3.0" version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "globset"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.7.10 (registry+https://github.com/rust-lang/crates.io-index)",
"bstr 0.2.12 (registry+https://github.com/rust-lang/crates.io-index)",
"fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "hermit-abi" name = "hermit-abi"
version = "0.1.8" version = "0.1.8"
@ -1453,6 +1474,7 @@ dependencies = [
"checksum blake2b_simd 0.5.10 (registry+https://github.com/rust-lang/crates.io-index)" = "d8fb2d74254a3a0b5cac33ac9f8ed0e44aa50378d9dbb2e5d83bd21ed1dc2c8a" "checksum blake2b_simd 0.5.10 (registry+https://github.com/rust-lang/crates.io-index)" = "d8fb2d74254a3a0b5cac33ac9f8ed0e44aa50378d9dbb2e5d83bd21ed1dc2c8a"
"checksum block-buffer 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b" "checksum block-buffer 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b"
"checksum block-padding 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5" "checksum block-padding 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5"
"checksum bstr 0.2.12 (registry+https://github.com/rust-lang/crates.io-index)" = "2889e6d50f394968c8bf4240dc3f2a7eb4680844d27308f798229ac9d4725f41"
"checksum byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" "checksum byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7"
"checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" "checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
"checksum cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)" = "95e28fa049fda1c330bcf9d723be7663a899c4679724b34c81e9f5a326aab8cd" "checksum cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)" = "95e28fa049fda1c330bcf9d723be7663a899c4679724b34c81e9f5a326aab8cd"
@ -1495,6 +1517,7 @@ dependencies = [
"checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" "checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb"
"checksum git2 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b7da16ceafe24cedd9ba02c4463a2b506b6493baf4317c79c5acb553134a3c15" "checksum git2 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b7da16ceafe24cedd9ba02c4463a2b506b6493baf4317c79c5acb553134a3c15"
"checksum glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" "checksum glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
"checksum globset 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "7ad1da430bd7281dde2576f44c84cc3f0f7b475e7202cd503042dff01a8c8120"
"checksum hermit-abi 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "1010591b26bbfe835e9faeabeb11866061cc7dcebffd56ad7d0942d0e61aefd8" "checksum hermit-abi 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "1010591b26bbfe835e9faeabeb11866061cc7dcebffd56ad7d0942d0e61aefd8"
"checksum humantime 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" "checksum humantime 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f"
"checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e" "checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e"

View File

@ -27,6 +27,7 @@ content_inspector = "0.2.4"
encoding = "0.2" encoding = "0.2"
shell-words = "0.1.0" shell-words = "0.1.0"
unicode-width = "0.1.7" unicode-width = "0.1.7"
globset = "0.4"
[dependencies.git2] [dependencies.git2]
version = "0.13" version = "0.13"

View File

@ -9,7 +9,7 @@ use syntect::parsing::{SyntaxReference, SyntaxSet, SyntaxSetBuilder};
use crate::errors::*; use crate::errors::*;
use crate::inputfile::{InputFile, InputFileReader}; use crate::inputfile::{InputFile, InputFileReader};
use crate::syntax_mapping::SyntaxMapping; use crate::syntax_mapping::{MappingTarget, SyntaxMapping};
#[derive(Debug)] #[derive(Debug)]
pub struct HighlightingAssets { pub struct HighlightingAssets {
@ -184,25 +184,28 @@ impl HighlightingAssets {
(Some(language), _) => self.syntax_set.find_syntax_by_token(language), (Some(language), _) => self.syntax_set.find_syntax_by_token(language),
(None, InputFile::Ordinary(filename)) => { (None, InputFile::Ordinary(filename)) => {
let path = Path::new(filename); let path = Path::new(filename);
let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
let extension = path.extension().and_then(|x| x.to_str()).unwrap_or(""); let extension = path.extension().and_then(|x| x.to_str()).unwrap_or("");
let file_name = mapping.replace(file_name);
let extension = mapping.replace(extension);
let ext_syntax = self let ext_syntax = self
.syntax_set .syntax_set
.find_syntax_by_extension(&file_name) .find_syntax_by_extension(&file_name)
.or_else(|| self.syntax_set.find_syntax_by_extension(&extension)); .or_else(|| self.syntax_set.find_syntax_by_extension(&extension));
let line_syntax = if ext_syntax.is_none() { let line_syntax = String::from_utf8(reader.first_line.clone())
String::from_utf8(reader.first_line.clone()) .ok()
.ok() .and_then(|l| self.syntax_set.find_syntax_by_first_line(&l));
.and_then(|l| self.syntax_set.find_syntax_by_first_line(&l))
} else {
None
};
ext_syntax.or(line_syntax) dbg!(path);
match dbg!(mapping.get_syntax_for(path)) {
Some(MappingTarget::MapTo(syntax_name)) => {
// TODO: we should probably return an error here if this syntax can not be
// found. Currently, we just fall back to 'plain'.
self.syntax_set.find_syntax_by_name(syntax_name)
}
Some(MappingTarget::MapToUnknown) => line_syntax,
None => ext_syntax.or(line_syntax),
}
} }
(None, InputFile::StdIn) => String::from_utf8(reader.first_line.clone()) (None, InputFile::StdIn) => String::from_utf8(reader.first_line.clone())
.ok() .ok()
@ -225,19 +228,19 @@ mod tests {
use crate::assets::HighlightingAssets; use crate::assets::HighlightingAssets;
use crate::inputfile::InputFile; use crate::inputfile::InputFile;
use crate::syntax_mapping::SyntaxMapping; use crate::syntax_mapping::{MappingTarget, SyntaxMapping};
struct SyntaxDetectionTest { struct SyntaxDetectionTest<'a> {
assets: HighlightingAssets, assets: HighlightingAssets,
pub syntax_mapping: SyntaxMapping, pub syntax_mapping: SyntaxMapping<'a>,
temp_dir: TempDir, temp_dir: TempDir,
} }
impl SyntaxDetectionTest { impl<'a> SyntaxDetectionTest<'a> {
fn new() -> Self { fn new() -> Self {
SyntaxDetectionTest { SyntaxDetectionTest {
assets: HighlightingAssets::from_binary(), assets: HighlightingAssets::from_binary(),
syntax_mapping: SyntaxMapping::new(), syntax_mapping: SyntaxMapping::builtin(),
temp_dir: TempDir::new("bat_syntax_detection_tests") temp_dir: TempDir::new("bat_syntax_detection_tests")
.expect("creation of temporary directory"), .expect("creation of temporary directory"),
} }
@ -294,6 +297,10 @@ mod tests {
test.syntax_name_with_content("my_script", "#!/bin/bash"), test.syntax_name_with_content("my_script", "#!/bin/bash"),
"Bourne Again Shell (bash)" "Bourne Again Shell (bash)"
); );
assert_eq!(
test.syntax_name_with_content("build", "#!/bin/bash"),
"Bourne Again Shell (bash)"
);
assert_eq!(test.syntax_name_with_content("my_script", "<?php"), "PHP"); assert_eq!(test.syntax_name_with_content("my_script", "<?php"), "PHP");
} }
@ -302,7 +309,20 @@ mod tests {
let mut test = SyntaxDetectionTest::new(); let mut test = SyntaxDetectionTest::new();
assert_ne!(test.syntax_name("test.h"), "C++"); assert_ne!(test.syntax_name("test.h"), "C++");
test.syntax_mapping.insert("h", "cpp"); test.syntax_mapping
.insert("*.h", MappingTarget::MapTo("C++"))
.ok();
assert_eq!(test.syntax_name("test.h"), "C++"); assert_eq!(test.syntax_name("test.h"), "C++");
} }
#[test]
fn syntax_detection_is_case_sensitive() {
let mut test = SyntaxDetectionTest::new();
assert_ne!(test.syntax_name("README.MD"), "Markdown");
test.syntax_mapping
.insert("*.MD", MappingTarget::MapTo("Markdown"))
.ok();
assert_eq!(test.syntax_name("README.MD"), "Markdown");
}
} }

View File

@ -18,8 +18,8 @@ use ansi_term;
use bat::{ use bat::{
config::{ config::{
Config, HighlightedLineRanges, InputFile, LineRange, LineRanges, OutputWrap, PagingMode, Config, HighlightedLineRanges, InputFile, LineRange, LineRanges, MappingTarget, OutputWrap,
StyleComponent, StyleComponents, SyntaxMapping, PagingMode, StyleComponent, StyleComponents, SyntaxMapping,
}, },
errors::*, errors::*,
HighlightingAssets, HighlightingAssets,
@ -104,7 +104,7 @@ impl App {
} }
}; };
let mut syntax_mapping = SyntaxMapping::new(); let mut syntax_mapping = SyntaxMapping::builtin();
if let Some(values) = self.matches.values_of("map-syntax") { if let Some(values) = self.matches.values_of("map-syntax") {
for from_to in values { for from_to in values {
@ -114,7 +114,7 @@ impl App {
return Err("Invalid syntax mapping. The format of the -m/--map-syntax option is 'from:to'.".into()); return Err("Invalid syntax mapping. The format of the -m/--map-syntax option is 'from:to'.".into());
} }
syntax_mapping.insert(parts[0], parts[1]); syntax_mapping.insert(parts[0], MappingTarget::MapTo(parts[1]))?;
} }
} }

View File

@ -1,7 +1,7 @@
pub use crate::inputfile::InputFile; pub use crate::inputfile::InputFile;
pub use crate::line_range::{HighlightedLineRanges, LineRange, LineRanges}; pub use crate::line_range::{HighlightedLineRanges, LineRange, LineRanges};
pub use crate::style::{StyleComponent, StyleComponents}; pub use crate::style::{StyleComponent, StyleComponents};
pub use crate::syntax_mapping::SyntaxMapping; pub use crate::syntax_mapping::{MappingTarget, SyntaxMapping};
pub use crate::wrap::OutputWrap; pub use crate::wrap::OutputWrap;
#[derive(Debug, Clone, Copy, PartialEq)] #[derive(Debug, Clone, Copy, PartialEq)]
@ -60,7 +60,7 @@ pub struct Config<'a> {
pub theme: String, pub theme: String,
/// File extension/name mappings /// File extension/name mappings
pub syntax_mapping: SyntaxMapping, pub syntax_mapping: SyntaxMapping<'a>,
/// Command to start the pager /// Command to start the pager
pub pager: Option<&'a str>, pub pager: Option<&'a str>,

View File

@ -6,6 +6,7 @@ error_chain! {
Io(::std::io::Error); Io(::std::io::Error);
SyntectError(::syntect::LoadingError); SyntectError(::syntect::LoadingError);
ParseIntError(::std::num::ParseIntError); ParseIntError(::std::num::ParseIntError);
GlobParsingError(::globset::Error);
} }
} }

View File

@ -1,35 +1,85 @@
use std::borrow::Cow; use std::path::Path;
use std::collections::HashMap;
use crate::errors::Result;
use globset::{Candidate, GlobBuilder, GlobMatcher};
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum MappingTarget<'a> {
MapTo(&'a str),
MapToUnknown,
}
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone, Default)]
pub struct SyntaxMapping(HashMap<String, String>); pub struct SyntaxMapping<'a> {
mappings: Vec<(GlobMatcher, MappingTarget<'a>)>,
}
impl SyntaxMapping { impl<'a> SyntaxMapping<'a> {
pub fn new() -> SyntaxMapping { pub fn empty() -> SyntaxMapping<'a> {
Default::default() Default::default()
} }
pub fn insert(&mut self, from: impl Into<String>, to: impl Into<String>) -> Option<String> { pub fn builtin() -> SyntaxMapping<'a> {
self.0.insert(from.into(), to.into()) let mut mapping = Self::empty();
mapping
.insert("build", MappingTarget::MapToUnknown)
.unwrap();
mapping
} }
pub(crate) fn replace<'a>(&self, input: impl Into<Cow<'a, str>>) -> Cow<'a, str> { pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> {
let input = input.into(); let glob = GlobBuilder::new(from)
match self.0.get(input.as_ref()) { .case_insensitive(false)
Some(s) => Cow::from(s.clone()), .literal_separator(true)
None => input, .build()?;
self.mappings.push((glob.compile_matcher(), to));
Ok(())
}
pub(crate) fn get_syntax_for(&self, path: impl AsRef<Path>) -> Option<MappingTarget<'a>> {
let candidate = Candidate::new(path.as_ref());
let canddidate_filename = path.as_ref().file_name().map(Candidate::new);
for (ref glob, ref syntax) in &self.mappings {
if glob.is_match_candidate(&candidate)
|| canddidate_filename
.as_ref()
.map_or(false, |filename| glob.is_match_candidate(filename))
{
return Some(*syntax);
}
} }
None
} }
} }
#[test] #[test]
fn basic() { fn basic() {
let mut map = SyntaxMapping::new(); let mut map = SyntaxMapping::empty();
map.insert("Cargo.lock", "toml"); map.insert("/path/to/Cargo.lock", MappingTarget::MapTo("TOML"))
map.insert(".ignore", ".gitignore"); .ok();
map.insert("/path/to/.ignore", MappingTarget::MapTo("Git Ignore"))
.ok();
assert_eq!("toml", map.replace("Cargo.lock")); assert_eq!(
assert_eq!("other.lock", map.replace("other.lock")); map.get_syntax_for("/path/to/Cargo.lock"),
Some(MappingTarget::MapTo("TOML"))
);
assert_eq!(map.get_syntax_for("/path/to/other.lock"), None);
assert_eq!(".gitignore", map.replace(".ignore")); assert_eq!(
map.get_syntax_for("/path/to/.ignore"),
Some(MappingTarget::MapTo("Git Ignore"))
);
}
#[test]
fn builtin_mappings() {
let map = SyntaxMapping::builtin();
assert_eq!(
map.get_syntax_for("/path/to/build"),
Some(MappingTarget::MapToUnknown)
);
} }