From 87978e7755d614bbfc6e6939df9ebbcd04925cac Mon Sep 17 00:00:00 2001 From: Martin Nordholts Date: Tue, 7 Sep 2021 17:21:48 +0200 Subject: [PATCH] Make asset compression optional at compile time (#1825) This will be needed to later support zero-copy deserialization of independent syntax sets, but is interesting and useful on its own. Instead of deferring serialization and deserialization to syntect, we implement it ourselves in the same way, but make compression optional. --- Cargo.lock | 2 ++ Cargo.toml | 4 +++- src/assets.rs | 44 ++++++++++++++++++++++++++++++++++++------- src/build_assets.rs | 46 +++++++++++++++++++++++++++++++++++++++------ 4 files changed, 82 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 34183f59..b1648bb8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -89,6 +89,7 @@ dependencies = [ "ansi_term 0.12.1", "assert_cmd", "atty", + "bincode", "bugreport", "clap", "clircle", @@ -96,6 +97,7 @@ dependencies = [ "content_inspector", "dirs-next", "encoding", + "flate2", "git2", "globset", "grep-cli", diff --git a/Cargo.toml b/Cargo.toml index 213218df..a07eed6f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,7 +45,9 @@ regex-fancy = ["syntect/regex-fancy"] # Use the rust-only "fancy-regex" engine atty = { version = "0.2.14", optional = true } ansi_term = "^0.12.1" ansi_colours = "^1.0" +bincode = "1.0" console = "0.14.1" +flate2 = "1.0" lazy_static = { version = "1.4", optional = true } lazycell = "1.0" thiserror = "1.0" @@ -72,7 +74,7 @@ default-features = false [dependencies.syntect] version = "4.6.0" default-features = false -features = ["parsing", "dump-load"] +features = ["parsing"] [dependencies.clap] version = "2.33" diff --git a/src/assets.rs b/src/assets.rs index 94adf534..26de12e7 100644 --- a/src/assets.rs +++ b/src/assets.rs @@ -4,7 +4,6 @@ use std::path::{Path, PathBuf}; use lazycell::LazyCell; -use syntect::dumps::{from_binary, from_reader}; use syntect::highlighting::{Theme, ThemeSet}; use syntect::parsing::{SyntaxReference, SyntaxSet}; @@ -29,6 +28,12 @@ pub struct SyntaxReferenceInSet<'a> { pub syntax_set: &'a SyntaxSet, } +// Compress for size of ~700 kB instead of ~4600 kB at the cost of ~30% longer deserialization time +pub(crate) const COMPRESS_SYNTAXES: bool = true; + +// Compress for size of ~20 kB instead of ~200 kB at the cost of ~30% longer deserialization time +pub(crate) const COMPRESS_THEMES: bool = true; + const IGNORED_SUFFIXES: [&str; 13] = [ // Editor etc backups "~", @@ -66,7 +71,7 @@ impl HighlightingAssets { pub fn from_cache(cache_path: &Path) -> Result { Ok(HighlightingAssets::new( SerializedSyntaxSet::FromFile(cache_path.join("syntaxes.bin")), - asset_from_cache(&cache_path.join("themes.bin"), "theme set")?, + asset_from_cache(&cache_path.join("themes.bin"), "theme set", COMPRESS_THEMES)?, )) } @@ -299,8 +304,10 @@ enum SerializedSyntaxSet { impl SerializedSyntaxSet { fn deserialize(&self) -> Result { match self { - SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data)), - SerializedSyntaxSet::FromFile(ref path) => asset_from_cache(path, "syntax set"), + SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data, COMPRESS_SYNTAXES)), + SerializedSyntaxSet::FromFile(ref path) => { + asset_from_cache(path, "syntax set", COMPRESS_SYNTAXES) + } } } } @@ -310,10 +317,32 @@ pub(crate) fn get_serialized_integrated_syntaxset() -> &'static [u8] { } pub(crate) fn get_integrated_themeset() -> ThemeSet { - from_binary(include_bytes!("../assets/themes.bin")) + from_binary(include_bytes!("../assets/themes.bin"), COMPRESS_THEMES) } -fn asset_from_cache(path: &Path, description: &str) -> Result { +pub(crate) fn from_binary(v: &[u8], compressed: bool) -> T { + asset_from_contents(v, "n/a", compressed) + .expect("data integrated in binary is never faulty, but make sure `compressed` is in sync!") +} + +fn asset_from_contents( + contents: &[u8], + description: &str, + compressed: bool, +) -> Result { + if compressed { + bincode::deserialize_from(flate2::read::ZlibDecoder::new(contents)) + } else { + bincode::deserialize_from(contents) + } + .map_err(|_| format!("Could not parse {}", description).into()) +} + +fn asset_from_cache( + path: &Path, + description: &str, + compressed: bool, +) -> Result { let contents = fs::read(path).map_err(|_| { format!( "Could not load cached {} '{}'", @@ -321,7 +350,8 @@ fn asset_from_cache(path: &Path, description: &s path.to_string_lossy() ) })?; - from_reader(&contents[..]).map_err(|_| format!("Could not parse cached {}", description).into()) + asset_from_contents(&contents[..], description, compressed) + .map_err(|_| format!("Could not parse cached {}", description).into()) } #[cfg(test)] diff --git a/src/build_assets.rs b/src/build_assets.rs index 1eb532e2..43687d5e 100644 --- a/src/build_assets.rs +++ b/src/build_assets.rs @@ -1,12 +1,12 @@ use std::collections::HashMap; use std::path::Path; -use syntect::dumps::from_binary; use syntect::highlighting::ThemeSet; use syntect::parsing::syntax_definition::{ ContextReference, MatchOperation, MatchPattern, Pattern, SyntaxDefinition, }; use syntect::parsing::{Scope, SyntaxSet, SyntaxSetBuilder}; +use crate::assets::*; use crate::error::*; type SyntaxName = String; @@ -86,7 +86,7 @@ fn build_syntax_set_builder( builder.add_plain_text_syntax(); builder } else { - from_binary::(crate::assets::get_serialized_integrated_syntaxset()) + from_binary::(get_serialized_integrated_syntaxset(), COMPRESS_SYNTAXES) .into_builder() }; @@ -120,8 +120,18 @@ fn write_assets( current_version: &str, ) -> Result<()> { let _ = std::fs::create_dir_all(target_dir); - asset_to_cache(theme_set, &target_dir.join("themes.bin"), "theme set")?; - asset_to_cache(syntax_set, &target_dir.join("syntaxes.bin"), "syntax set")?; + asset_to_cache( + theme_set, + &target_dir.join("themes.bin"), + "theme set", + COMPRESS_THEMES, + )?; + asset_to_cache( + syntax_set, + &target_dir.join("syntaxes.bin"), + "syntax set", + COMPRESS_SYNTAXES, + )?; print!( "Writing metadata to folder {} ... ", @@ -294,9 +304,33 @@ impl SyntaxSetDependencyBuilder { } } -fn asset_to_cache(asset: &T, path: &Path, description: &str) -> Result<()> { +fn asset_to_contents( + asset: &T, + description: &str, + compressed: bool, +) -> Result> { + let mut contents = vec![]; + if compressed { + bincode::serialize_into( + flate2::write::ZlibEncoder::new(&mut contents, flate2::Compression::best()), + asset, + ) + } else { + bincode::serialize_into(&mut contents, asset) + } + .map_err(|_| format!("Could not serialize {}", description))?; + Ok(contents) +} + +fn asset_to_cache( + asset: &T, + path: &Path, + description: &str, + compressed: bool, +) -> Result<()> { print!("Writing {} to {} ... ", description, path.to_string_lossy()); - syntect::dumps::dump_to_file(asset, &path).map_err(|_| { + let contents = asset_to_contents(asset, description, compressed)?; + std::fs::write(path, &contents[..]).map_err(|_| { format!( "Could not save {} to {}", description,