use std::ffi::OsStr; use std::fs; use std::path::Path; use once_cell::unsync::OnceCell; use syntect::highlighting::Theme; use syntect::parsing::{SyntaxReference, SyntaxSet}; use path_abs::PathAbs; use crate::error::*; use crate::input::{InputReader, OpenedInput}; use crate::syntax_mapping::ignored_suffixes::IgnoredSuffixes; use crate::syntax_mapping::MappingTarget; use crate::{bat_warning, SyntaxMapping}; use lazy_theme_set::LazyThemeSet; use serialized_syntax_set::*; #[cfg(feature = "build-assets")] pub use crate::assets::build_assets::*; pub(crate) mod assets_metadata; #[cfg(feature = "build-assets")] mod build_assets; mod lazy_theme_set; mod serialized_syntax_set; #[derive(Debug)] pub struct HighlightingAssets { syntax_set_cell: OnceCell, serialized_syntax_set: SerializedSyntaxSet, theme_set: LazyThemeSet, fallback_theme: Option<&'static str>, } #[derive(Debug)] pub struct SyntaxReferenceInSet<'a> { pub syntax: &'a SyntaxReference, pub syntax_set: &'a SyntaxSet, } /// Lazy-loaded syntaxes are already compressed, and we don't want to compress /// already compressed data. pub(crate) const COMPRESS_SYNTAXES: bool = false; /// We don't want to compress our [LazyThemeSet] since the lazy-loaded themes /// within it are already compressed, and compressing another time just makes /// performance suffer pub(crate) const COMPRESS_THEMES: bool = false; /// Compress for size of ~40 kB instead of ~200 kB without much difference in /// performance due to lazy-loading pub(crate) const COMPRESS_LAZY_THEMES: bool = true; /// Compress for size of ~10 kB instead of ~120 kB pub(crate) const COMPRESS_ACKNOWLEDGEMENTS: bool = true; impl HighlightingAssets { fn new(serialized_syntax_set: SerializedSyntaxSet, theme_set: LazyThemeSet) -> Self { HighlightingAssets { syntax_set_cell: OnceCell::new(), serialized_syntax_set, theme_set, fallback_theme: None, } } /// The default theme. /// /// ### Windows and Linux /// /// Windows and most Linux distributions has a dark terminal theme by /// default. On these platforms, this function always returns a theme that /// looks good on a dark background. /// /// ### macOS /// /// On macOS the default terminal background is light, but it is common that /// Dark Mode is active, which makes the terminal background dark. On this /// platform, the default theme depends on /// ```bash /// defaults read -globalDomain AppleInterfaceStyle /// ``` /// To avoid the overhead of the check on macOS, simply specify a theme /// explicitly via `--theme`, `BAT_THEME`, or `~/.config/bat`. /// /// See and /// for more context. pub fn default_theme() -> &'static str { #[cfg(not(target_os = "macos"))] { Self::default_dark_theme() } #[cfg(target_os = "macos")] { if macos_dark_mode_active() { Self::default_dark_theme() } else { Self::default_light_theme() } } } /** * The default theme that looks good on a dark background. */ fn default_dark_theme() -> &'static str { "Monokai Extended" } /** * The default theme that looks good on a light background. */ #[cfg(target_os = "macos")] fn default_light_theme() -> &'static str { "Monokai Extended Light" } pub fn from_cache(cache_path: &Path) -> Result { Ok(HighlightingAssets::new( SerializedSyntaxSet::FromFile(cache_path.join("syntaxes.bin")), asset_from_cache(&cache_path.join("themes.bin"), "theme set", COMPRESS_THEMES)?, )) } pub fn from_binary() -> Self { HighlightingAssets::new( SerializedSyntaxSet::FromBinary(get_serialized_integrated_syntaxset()), get_integrated_themeset(), ) } pub fn set_fallback_theme(&mut self, theme: &'static str) { self.fallback_theme = Some(theme); } /// Return the collection of syntect syntax definitions. pub fn get_syntax_set(&self) -> Result<&SyntaxSet> { self.syntax_set_cell .get_or_try_init(|| self.serialized_syntax_set.deserialize()) } /// Use [Self::get_syntaxes] instead #[deprecated] pub fn syntaxes(&self) -> &[SyntaxReference] { self.get_syntax_set() .expect(".syntaxes() is deprecated, use .get_syntaxes() instead") .syntaxes() } pub fn get_syntaxes(&self) -> Result<&[SyntaxReference]> { Ok(self.get_syntax_set()?.syntaxes()) } fn get_theme_set(&self) -> &LazyThemeSet { &self.theme_set } pub fn themes(&self) -> impl Iterator { self.get_theme_set().themes() } /// Use [Self::get_syntax_for_path] instead #[deprecated] pub fn syntax_for_file_name( &self, file_name: impl AsRef, mapping: &SyntaxMapping, ) -> Option<&SyntaxReference> { self.get_syntax_for_path(file_name, mapping) .ok() .map(|syntax_in_set| syntax_in_set.syntax) } /// Detect the syntax based on, in order: /// 1. Syntax mappings with [MappingTarget::MapTo] and [MappingTarget::MapToUnknown] /// (e.g. `/etc/profile` -> `Bourne Again Shell (bash)`) /// 2. The file name (e.g. `Dockerfile`) /// 3. Syntax mappings with [MappingTarget::MapExtensionToUnknown] /// (e.g. `*.conf`) /// 4. The file name extension (e.g. `.rs`) /// /// When detecting syntax based on syntax mappings, the full path is taken /// into account. When detecting syntax based on file name, no regard is /// taken to the path of the file. Only the file name itself matters. When /// detecting syntax based on file name extension, only the file name /// extension itself matters. /// /// Returns [Error::UndetectedSyntax] if it was not possible detect syntax /// based on path/file name/extension (or if the path was mapped to /// [MappingTarget::MapToUnknown] or [MappingTarget::MapExtensionToUnknown]). /// In this case it is appropriate to fall back to other methods to detect /// syntax. Such as using the contents of the first line of the file. /// /// Returns [Error::UnknownSyntax] if a syntax mapping exist, but the mapped /// syntax does not exist. pub fn get_syntax_for_path( &self, path: impl AsRef, mapping: &SyntaxMapping, ) -> Result { let path = path.as_ref(); let syntax_match = mapping.get_syntax_for(path); if let Some(MappingTarget::MapToUnknown) = syntax_match { return Err(Error::UndetectedSyntax(path.to_string_lossy().into())); } if let Some(MappingTarget::MapTo(syntax_name)) = syntax_match { return self .find_syntax_by_name(syntax_name)? .ok_or_else(|| Error::UnknownSyntax(syntax_name.to_owned())); } let file_name = path.file_name().unwrap_or_default(); match ( self.get_syntax_for_file_name(file_name, &mapping.ignored_suffixes)?, syntax_match, ) { (Some(syntax), _) => Ok(syntax), (_, Some(MappingTarget::MapExtensionToUnknown)) => { Err(Error::UndetectedSyntax(path.to_string_lossy().into())) } _ => self .get_syntax_for_file_extension(file_name, &mapping.ignored_suffixes)? .ok_or_else(|| Error::UndetectedSyntax(path.to_string_lossy().into())), } } /// Look up a syntect theme by name. pub fn get_theme(&self, theme: &str) -> &Theme { match self.get_theme_set().get(theme) { Some(theme) => theme, None => { if theme == "ansi-light" || theme == "ansi-dark" { bat_warning!("Theme '{}' is deprecated, using 'ansi' instead.", theme); return self.get_theme("ansi"); } if !theme.is_empty() { bat_warning!("Unknown theme '{}', using default.", theme) } self.get_theme_set() .get(self.fallback_theme.unwrap_or_else(Self::default_theme)) .expect("something is very wrong if the default theme is missing") } } } pub(crate) fn get_syntax( &self, language: Option<&str>, input: &mut OpenedInput, mapping: &SyntaxMapping, ) -> Result { if let Some(language) = language { let syntax_set = self.get_syntax_set()?; return syntax_set .find_syntax_by_token(language) .map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }) .ok_or_else(|| Error::UnknownSyntax(language.to_owned())); } let path = input.path(); let path_syntax = if let Some(path) = path { self.get_syntax_for_path( PathAbs::new(path).map_or_else(|_| path.to_owned(), |p| p.as_path().to_path_buf()), mapping, ) } else { Err(Error::UndetectedSyntax("[unknown]".into())) }; match path_syntax { // If a path wasn't provided, or if path based syntax detection // above failed, we fall back to first-line syntax detection. Err(Error::UndetectedSyntax(path)) => self .get_first_line_syntax(&mut input.reader)? .ok_or(Error::UndetectedSyntax(path)), _ => path_syntax, } } pub(crate) fn find_syntax_by_name( &self, syntax_name: &str, ) -> Result> { let syntax_set = self.get_syntax_set()?; Ok(syntax_set .find_syntax_by_name(syntax_name) .map(|syntax| SyntaxReferenceInSet { syntax, syntax_set })) } fn find_syntax_by_extension(&self, e: Option<&OsStr>) -> Result> { let syntax_set = self.get_syntax_set()?; let extension = e.and_then(|x| x.to_str()).unwrap_or_default(); Ok(syntax_set .find_syntax_by_extension(extension) .map(|syntax| SyntaxReferenceInSet { syntax, syntax_set })) } fn get_syntax_for_file_name( &self, file_name: &OsStr, ignored_suffixes: &IgnoredSuffixes, ) -> Result> { let mut syntax = self.find_syntax_by_extension(Some(file_name))?; if syntax.is_none() { syntax = ignored_suffixes.try_with_stripped_suffix(file_name, |stripped_file_name| { // Note: recursion self.get_syntax_for_file_name(stripped_file_name, ignored_suffixes) })?; } Ok(syntax) } fn get_syntax_for_file_extension( &self, file_name: &OsStr, ignored_suffixes: &IgnoredSuffixes, ) -> Result> { let mut syntax = self.find_syntax_by_extension(Path::new(file_name).extension())?; if syntax.is_none() { syntax = ignored_suffixes.try_with_stripped_suffix(file_name, |stripped_file_name| { // Note: recursion self.get_syntax_for_file_extension(stripped_file_name, ignored_suffixes) })?; } Ok(syntax) } fn get_first_line_syntax( &self, reader: &mut InputReader, ) -> Result> { let syntax_set = self.get_syntax_set()?; Ok(String::from_utf8(reader.first_line.clone()) .ok() .and_then(|l| syntax_set.find_syntax_by_first_line(&l)) .map(|syntax| SyntaxReferenceInSet { syntax, syntax_set })) } } pub(crate) fn get_serialized_integrated_syntaxset() -> &'static [u8] { include_bytes!("../assets/syntaxes.bin") } pub(crate) fn get_integrated_themeset() -> LazyThemeSet { from_binary(include_bytes!("../assets/themes.bin"), COMPRESS_THEMES) } pub fn get_acknowledgements() -> String { from_binary( include_bytes!("../assets/acknowledgements.bin"), COMPRESS_ACKNOWLEDGEMENTS, ) } pub(crate) fn from_binary(v: &[u8], compressed: bool) -> T { asset_from_contents(v, "n/a", compressed) .expect("data integrated in binary is never faulty, but make sure `compressed` is in sync!") } fn asset_from_contents( contents: &[u8], description: &str, compressed: bool, ) -> Result { if compressed { bincode::deserialize_from(flate2::read::ZlibDecoder::new(contents)) } else { bincode::deserialize_from(contents) } .map_err(|_| format!("Could not parse {description}").into()) } fn asset_from_cache( path: &Path, description: &str, compressed: bool, ) -> Result { let contents = fs::read(path).map_err(|_| { format!( "Could not load cached {} '{}'", description, path.to_string_lossy() ) })?; asset_from_contents(&contents[..], description, compressed) .map_err(|_| format!("Could not parse cached {description}").into()) } #[cfg(target_os = "macos")] fn macos_dark_mode_active() -> bool { const PREFERENCES_FILE: &str = "Library/Preferences/.GlobalPreferences.plist"; const STYLE_KEY: &str = "AppleInterfaceStyle"; let preferences_file = home::home_dir() .map(|home| home.join(PREFERENCES_FILE)) .expect("Could not get home directory"); match plist::Value::from_file(preferences_file).map(|file| file.into_dictionary()) { Ok(Some(preferences)) => match preferences.get(STYLE_KEY).and_then(|val| val.as_string()) { Some(value) => value == "Dark", // If the key does not exist, then light theme is currently in use. None => false, }, // Unreachable, in theory. All macOS users have a home directory and preferences file setup. Ok(None) | Err(_) => true, } } #[cfg(test)] mod tests { use super::*; use std::ffi::OsStr; use std::fs::File; use std::io::{BufReader, Write}; use tempfile::TempDir; use crate::input::Input; struct SyntaxDetectionTest<'a> { assets: HighlightingAssets, pub syntax_mapping: SyntaxMapping<'a>, pub temp_dir: TempDir, } impl<'a> SyntaxDetectionTest<'a> { fn new() -> Self { SyntaxDetectionTest { assets: HighlightingAssets::from_binary(), syntax_mapping: SyntaxMapping::new(), temp_dir: TempDir::new().expect("creation of temporary directory"), } } fn get_syntax_name( &self, language: Option<&str>, input: &mut OpenedInput, mapping: &SyntaxMapping, ) -> String { self.assets .get_syntax(language, input, mapping) .map(|syntax_in_set| syntax_in_set.syntax.name.clone()) .unwrap_or_else(|_| "!no syntax!".to_owned()) } fn syntax_for_real_file_with_content_os( &self, file_name: &OsStr, first_line: &str, ) -> String { let file_path = self.temp_dir.path().join(file_name); { let mut temp_file = File::create(&file_path).unwrap(); writeln!(temp_file, "{first_line}").unwrap(); } let input = Input::ordinary_file(&file_path); let dummy_stdin: &[u8] = &[]; let mut opened_input = input.open(dummy_stdin, None).unwrap(); self.get_syntax_name(None, &mut opened_input, &self.syntax_mapping) } fn syntax_for_file_with_content_os(&self, file_name: &OsStr, first_line: &str) -> String { let file_path = self.temp_dir.path().join(file_name); let input = Input::from_reader(Box::new(BufReader::new(first_line.as_bytes()))) .with_name(Some(&file_path)); let dummy_stdin: &[u8] = &[]; let mut opened_input = input.open(dummy_stdin, None).unwrap(); self.get_syntax_name(None, &mut opened_input, &self.syntax_mapping) } #[cfg(unix)] fn syntax_for_file_os(&self, file_name: &OsStr) -> String { self.syntax_for_file_with_content_os(file_name, "") } fn syntax_for_file_with_content(&self, file_name: &str, first_line: &str) -> String { self.syntax_for_file_with_content_os(OsStr::new(file_name), first_line) } fn syntax_for_file(&self, file_name: &str) -> String { self.syntax_for_file_with_content(file_name, "") } fn syntax_for_stdin_with_content(&self, file_name: &str, content: &[u8]) -> String { let input = Input::stdin().with_name(Some(file_name)); let mut opened_input = input.open(content, None).unwrap(); self.get_syntax_name(None, &mut opened_input, &self.syntax_mapping) } fn syntax_is_same_for_inputkinds(&self, file_name: &str, content: &str) -> bool { let as_file = self.syntax_for_real_file_with_content_os(file_name.as_ref(), content); let as_reader = self.syntax_for_file_with_content_os(file_name.as_ref(), content); let consistent = as_file == as_reader; // TODO: Compare StdIn somehow? if !consistent { eprintln!( "Inconsistent syntax detection:\nFor File: {as_file}\nFor Reader: {as_reader}" ) } consistent } } #[test] fn syntax_detection_basic() { let test = SyntaxDetectionTest::new(); assert_eq!(test.syntax_for_file("test.rs"), "Rust"); assert_eq!(test.syntax_for_file("test.cpp"), "C++"); assert_eq!(test.syntax_for_file("test.build"), "NAnt Build File"); assert_eq!( test.syntax_for_file("PKGBUILD"), "Bourne Again Shell (bash)" ); assert_eq!(test.syntax_for_file(".bashrc"), "Bourne Again Shell (bash)"); assert_eq!(test.syntax_for_file("Makefile"), "Makefile"); } #[cfg(unix)] #[test] fn syntax_detection_invalid_utf8() { use std::os::unix::ffi::OsStrExt; let test = SyntaxDetectionTest::new(); assert_eq!( test.syntax_for_file_os(OsStr::from_bytes(b"invalid_\xFEutf8_filename.rs")), "Rust" ); } #[test] fn syntax_detection_same_for_inputkinds() { let mut test = SyntaxDetectionTest::new(); test.syntax_mapping .insert("*.myext", MappingTarget::MapTo("C")) .ok(); test.syntax_mapping .insert("MY_FILE", MappingTarget::MapTo("Markdown")) .ok(); assert!(test.syntax_is_same_for_inputkinds("Test.md", "")); assert!(test.syntax_is_same_for_inputkinds("Test.txt", "#!/bin/bash")); assert!(test.syntax_is_same_for_inputkinds(".bashrc", "")); assert!(test.syntax_is_same_for_inputkinds("test.h", "")); assert!(test.syntax_is_same_for_inputkinds("test.js", "#!/bin/bash")); assert!(test.syntax_is_same_for_inputkinds("test.myext", "")); assert!(test.syntax_is_same_for_inputkinds("MY_FILE", "")); assert!(test.syntax_is_same_for_inputkinds("MY_FILE", "