Initial implementation of glob-based syntax mapping

This commit is contained in:
sharkdp 2020-03-22 09:55:13 +01:00 committed by David Peter
parent ba29e07636
commit bd8a13dbc9
7 changed files with 137 additions and 42 deletions

23
Cargo.lock generated
View File

@ -127,6 +127,7 @@ dependencies = [
"encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
"error-chain 0.12.2 (registry+https://github.com/rust-lang/crates.io-index)",
"git2 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)",
"globset 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"liquid 0.19.0 (registry+https://github.com/rust-lang/crates.io-index)",
"shell-words 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -201,6 +202,14 @@ dependencies = [
"byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "bstr"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "byte-tools"
version = "0.3.1"
@ -542,6 +551,18 @@ name = "glob"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "globset"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.7.10 (registry+https://github.com/rust-lang/crates.io-index)",
"bstr 0.2.12 (registry+https://github.com/rust-lang/crates.io-index)",
"fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "hermit-abi"
version = "0.1.8"
@ -1453,6 +1474,7 @@ dependencies = [
"checksum blake2b_simd 0.5.10 (registry+https://github.com/rust-lang/crates.io-index)" = "d8fb2d74254a3a0b5cac33ac9f8ed0e44aa50378d9dbb2e5d83bd21ed1dc2c8a"
"checksum block-buffer 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b"
"checksum block-padding 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5"
"checksum bstr 0.2.12 (registry+https://github.com/rust-lang/crates.io-index)" = "2889e6d50f394968c8bf4240dc3f2a7eb4680844d27308f798229ac9d4725f41"
"checksum byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7"
"checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
"checksum cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)" = "95e28fa049fda1c330bcf9d723be7663a899c4679724b34c81e9f5a326aab8cd"
@ -1495,6 +1517,7 @@ dependencies = [
"checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb"
"checksum git2 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b7da16ceafe24cedd9ba02c4463a2b506b6493baf4317c79c5acb553134a3c15"
"checksum glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
"checksum globset 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "7ad1da430bd7281dde2576f44c84cc3f0f7b475e7202cd503042dff01a8c8120"
"checksum hermit-abi 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "1010591b26bbfe835e9faeabeb11866061cc7dcebffd56ad7d0942d0e61aefd8"
"checksum humantime 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f"
"checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e"

View File

@ -27,6 +27,7 @@ content_inspector = "0.2.4"
encoding = "0.2"
shell-words = "0.1.0"
unicode-width = "0.1.7"
globset = "0.4"
[dependencies.git2]
version = "0.13"

View File

@ -9,7 +9,7 @@ use syntect::parsing::{SyntaxReference, SyntaxSet, SyntaxSetBuilder};
use crate::errors::*;
use crate::inputfile::{InputFile, InputFileReader};
use crate::syntax_mapping::SyntaxMapping;
use crate::syntax_mapping::{MappingTarget, SyntaxMapping};
#[derive(Debug)]
pub struct HighlightingAssets {
@ -184,25 +184,28 @@ impl HighlightingAssets {
(Some(language), _) => self.syntax_set.find_syntax_by_token(language),
(None, InputFile::Ordinary(filename)) => {
let path = Path::new(filename);
let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
let extension = path.extension().and_then(|x| x.to_str()).unwrap_or("");
let file_name = mapping.replace(file_name);
let extension = mapping.replace(extension);
let ext_syntax = self
.syntax_set
.find_syntax_by_extension(&file_name)
.or_else(|| self.syntax_set.find_syntax_by_extension(&extension));
let line_syntax = if ext_syntax.is_none() {
String::from_utf8(reader.first_line.clone())
.ok()
.and_then(|l| self.syntax_set.find_syntax_by_first_line(&l))
} else {
None
};
let line_syntax = String::from_utf8(reader.first_line.clone())
.ok()
.and_then(|l| self.syntax_set.find_syntax_by_first_line(&l));
ext_syntax.or(line_syntax)
dbg!(path);
match dbg!(mapping.get_syntax_for(path)) {
Some(MappingTarget::MapTo(syntax_name)) => {
// TODO: we should probably return an error here if this syntax can not be
// found. Currently, we just fall back to 'plain'.
self.syntax_set.find_syntax_by_name(syntax_name)
}
Some(MappingTarget::MapToUnknown) => line_syntax,
None => ext_syntax.or(line_syntax),
}
}
(None, InputFile::StdIn) => String::from_utf8(reader.first_line.clone())
.ok()
@ -225,19 +228,19 @@ mod tests {
use crate::assets::HighlightingAssets;
use crate::inputfile::InputFile;
use crate::syntax_mapping::SyntaxMapping;
use crate::syntax_mapping::{MappingTarget, SyntaxMapping};
struct SyntaxDetectionTest {
struct SyntaxDetectionTest<'a> {
assets: HighlightingAssets,
pub syntax_mapping: SyntaxMapping,
pub syntax_mapping: SyntaxMapping<'a>,
temp_dir: TempDir,
}
impl SyntaxDetectionTest {
impl<'a> SyntaxDetectionTest<'a> {
fn new() -> Self {
SyntaxDetectionTest {
assets: HighlightingAssets::from_binary(),
syntax_mapping: SyntaxMapping::new(),
syntax_mapping: SyntaxMapping::builtin(),
temp_dir: TempDir::new("bat_syntax_detection_tests")
.expect("creation of temporary directory"),
}
@ -294,6 +297,10 @@ mod tests {
test.syntax_name_with_content("my_script", "#!/bin/bash"),
"Bourne Again Shell (bash)"
);
assert_eq!(
test.syntax_name_with_content("build", "#!/bin/bash"),
"Bourne Again Shell (bash)"
);
assert_eq!(test.syntax_name_with_content("my_script", "<?php"), "PHP");
}
@ -302,7 +309,20 @@ mod tests {
let mut test = SyntaxDetectionTest::new();
assert_ne!(test.syntax_name("test.h"), "C++");
test.syntax_mapping.insert("h", "cpp");
test.syntax_mapping
.insert("*.h", MappingTarget::MapTo("C++"))
.ok();
assert_eq!(test.syntax_name("test.h"), "C++");
}
#[test]
fn syntax_detection_is_case_sensitive() {
let mut test = SyntaxDetectionTest::new();
assert_ne!(test.syntax_name("README.MD"), "Markdown");
test.syntax_mapping
.insert("*.MD", MappingTarget::MapTo("Markdown"))
.ok();
assert_eq!(test.syntax_name("README.MD"), "Markdown");
}
}

View File

@ -18,8 +18,8 @@ use ansi_term;
use bat::{
config::{
Config, HighlightedLineRanges, InputFile, LineRange, LineRanges, OutputWrap, PagingMode,
StyleComponent, StyleComponents, SyntaxMapping,
Config, HighlightedLineRanges, InputFile, LineRange, LineRanges, MappingTarget, OutputWrap,
PagingMode, StyleComponent, StyleComponents, SyntaxMapping,
},
errors::*,
HighlightingAssets,
@ -104,7 +104,7 @@ impl App {
}
};
let mut syntax_mapping = SyntaxMapping::new();
let mut syntax_mapping = SyntaxMapping::builtin();
if let Some(values) = self.matches.values_of("map-syntax") {
for from_to in values {
@ -114,7 +114,7 @@ impl App {
return Err("Invalid syntax mapping. The format of the -m/--map-syntax option is 'from:to'.".into());
}
syntax_mapping.insert(parts[0], parts[1]);
syntax_mapping.insert(parts[0], MappingTarget::MapTo(parts[1]))?;
}
}

View File

@ -1,7 +1,7 @@
pub use crate::inputfile::InputFile;
pub use crate::line_range::{HighlightedLineRanges, LineRange, LineRanges};
pub use crate::style::{StyleComponent, StyleComponents};
pub use crate::syntax_mapping::SyntaxMapping;
pub use crate::syntax_mapping::{MappingTarget, SyntaxMapping};
pub use crate::wrap::OutputWrap;
#[derive(Debug, Clone, Copy, PartialEq)]
@ -60,7 +60,7 @@ pub struct Config<'a> {
pub theme: String,
/// File extension/name mappings
pub syntax_mapping: SyntaxMapping,
pub syntax_mapping: SyntaxMapping<'a>,
/// Command to start the pager
pub pager: Option<&'a str>,

View File

@ -6,6 +6,7 @@ error_chain! {
Io(::std::io::Error);
SyntectError(::syntect::LoadingError);
ParseIntError(::std::num::ParseIntError);
GlobParsingError(::globset::Error);
}
}

View File

@ -1,35 +1,85 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::path::Path;
use crate::errors::Result;
use globset::{Candidate, GlobBuilder, GlobMatcher};
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum MappingTarget<'a> {
MapTo(&'a str),
MapToUnknown,
}
#[derive(Debug, Clone, Default)]
pub struct SyntaxMapping(HashMap<String, String>);
pub struct SyntaxMapping<'a> {
mappings: Vec<(GlobMatcher, MappingTarget<'a>)>,
}
impl SyntaxMapping {
pub fn new() -> SyntaxMapping {
impl<'a> SyntaxMapping<'a> {
pub fn empty() -> SyntaxMapping<'a> {
Default::default()
}
pub fn insert(&mut self, from: impl Into<String>, to: impl Into<String>) -> Option<String> {
self.0.insert(from.into(), to.into())
pub fn builtin() -> SyntaxMapping<'a> {
let mut mapping = Self::empty();
mapping
.insert("build", MappingTarget::MapToUnknown)
.unwrap();
mapping
}
pub(crate) fn replace<'a>(&self, input: impl Into<Cow<'a, str>>) -> Cow<'a, str> {
let input = input.into();
match self.0.get(input.as_ref()) {
Some(s) => Cow::from(s.clone()),
None => input,
pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> {
let glob = GlobBuilder::new(from)
.case_insensitive(false)
.literal_separator(true)
.build()?;
self.mappings.push((glob.compile_matcher(), to));
Ok(())
}
pub(crate) fn get_syntax_for(&self, path: impl AsRef<Path>) -> Option<MappingTarget<'a>> {
let candidate = Candidate::new(path.as_ref());
let canddidate_filename = path.as_ref().file_name().map(Candidate::new);
for (ref glob, ref syntax) in &self.mappings {
if glob.is_match_candidate(&candidate)
|| canddidate_filename
.as_ref()
.map_or(false, |filename| glob.is_match_candidate(filename))
{
return Some(*syntax);
}
}
None
}
}
#[test]
fn basic() {
let mut map = SyntaxMapping::new();
map.insert("Cargo.lock", "toml");
map.insert(".ignore", ".gitignore");
let mut map = SyntaxMapping::empty();
map.insert("/path/to/Cargo.lock", MappingTarget::MapTo("TOML"))
.ok();
map.insert("/path/to/.ignore", MappingTarget::MapTo("Git Ignore"))
.ok();
assert_eq!("toml", map.replace("Cargo.lock"));
assert_eq!("other.lock", map.replace("other.lock"));
assert_eq!(
map.get_syntax_for("/path/to/Cargo.lock"),
Some(MappingTarget::MapTo("TOML"))
);
assert_eq!(map.get_syntax_for("/path/to/other.lock"), None);
assert_eq!(".gitignore", map.replace(".ignore"));
assert_eq!(
map.get_syntax_for("/path/to/.ignore"),
Some(MappingTarget::MapTo("Git Ignore"))
);
}
#[test]
fn builtin_mappings() {
let map = SyntaxMapping::builtin();
assert_eq!(
map.get_syntax_for("/path/to/build"),
Some(MappingTarget::MapToUnknown)
);
}