Make asset compression optional at compile time (#1825)

This will be needed to later support zero-copy deserialization of independent
syntax sets, but is interesting and useful on its own.

Instead of deferring serialization and deserialization to syntect, we implement it
ourselves in the same way, but make compression optional.
This commit is contained in:
Martin Nordholts 2021-09-07 17:21:48 +02:00 committed by GitHub
parent d935ea1cda
commit 87978e7755
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 82 additions and 14 deletions

2
Cargo.lock generated
View File

@ -89,6 +89,7 @@ dependencies = [
"ansi_term 0.12.1",
"assert_cmd",
"atty",
"bincode",
"bugreport",
"clap",
"clircle",
@ -96,6 +97,7 @@ dependencies = [
"content_inspector",
"dirs-next",
"encoding",
"flate2",
"git2",
"globset",
"grep-cli",

View File

@ -45,7 +45,9 @@ regex-fancy = ["syntect/regex-fancy"] # Use the rust-only "fancy-regex" engine
atty = { version = "0.2.14", optional = true }
ansi_term = "^0.12.1"
ansi_colours = "^1.0"
bincode = "1.0"
console = "0.14.1"
flate2 = "1.0"
lazy_static = { version = "1.4", optional = true }
lazycell = "1.0"
thiserror = "1.0"
@ -72,7 +74,7 @@ default-features = false
[dependencies.syntect]
version = "4.6.0"
default-features = false
features = ["parsing", "dump-load"]
features = ["parsing"]
[dependencies.clap]
version = "2.33"

View File

@ -4,7 +4,6 @@ use std::path::{Path, PathBuf};
use lazycell::LazyCell;
use syntect::dumps::{from_binary, from_reader};
use syntect::highlighting::{Theme, ThemeSet};
use syntect::parsing::{SyntaxReference, SyntaxSet};
@ -29,6 +28,12 @@ pub struct SyntaxReferenceInSet<'a> {
pub syntax_set: &'a SyntaxSet,
}
// Compress for size of ~700 kB instead of ~4600 kB at the cost of ~30% longer deserialization time
pub(crate) const COMPRESS_SYNTAXES: bool = true;
// Compress for size of ~20 kB instead of ~200 kB at the cost of ~30% longer deserialization time
pub(crate) const COMPRESS_THEMES: bool = true;
const IGNORED_SUFFIXES: [&str; 13] = [
// Editor etc backups
"~",
@ -66,7 +71,7 @@ impl HighlightingAssets {
pub fn from_cache(cache_path: &Path) -> Result<Self> {
Ok(HighlightingAssets::new(
SerializedSyntaxSet::FromFile(cache_path.join("syntaxes.bin")),
asset_from_cache(&cache_path.join("themes.bin"), "theme set")?,
asset_from_cache(&cache_path.join("themes.bin"), "theme set", COMPRESS_THEMES)?,
))
}
@ -299,8 +304,10 @@ enum SerializedSyntaxSet {
impl SerializedSyntaxSet {
fn deserialize(&self) -> Result<SyntaxSet> {
match self {
SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data)),
SerializedSyntaxSet::FromFile(ref path) => asset_from_cache(path, "syntax set"),
SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data, COMPRESS_SYNTAXES)),
SerializedSyntaxSet::FromFile(ref path) => {
asset_from_cache(path, "syntax set", COMPRESS_SYNTAXES)
}
}
}
}
@ -310,10 +317,32 @@ pub(crate) fn get_serialized_integrated_syntaxset() -> &'static [u8] {
}
pub(crate) fn get_integrated_themeset() -> ThemeSet {
from_binary(include_bytes!("../assets/themes.bin"))
from_binary(include_bytes!("../assets/themes.bin"), COMPRESS_THEMES)
}
fn asset_from_cache<T: serde::de::DeserializeOwned>(path: &Path, description: &str) -> Result<T> {
pub(crate) fn from_binary<T: serde::de::DeserializeOwned>(v: &[u8], compressed: bool) -> T {
asset_from_contents(v, "n/a", compressed)
.expect("data integrated in binary is never faulty, but make sure `compressed` is in sync!")
}
fn asset_from_contents<T: serde::de::DeserializeOwned>(
contents: &[u8],
description: &str,
compressed: bool,
) -> Result<T> {
if compressed {
bincode::deserialize_from(flate2::read::ZlibDecoder::new(contents))
} else {
bincode::deserialize_from(contents)
}
.map_err(|_| format!("Could not parse {}", description).into())
}
fn asset_from_cache<T: serde::de::DeserializeOwned>(
path: &Path,
description: &str,
compressed: bool,
) -> Result<T> {
let contents = fs::read(path).map_err(|_| {
format!(
"Could not load cached {} '{}'",
@ -321,7 +350,8 @@ fn asset_from_cache<T: serde::de::DeserializeOwned>(path: &Path, description: &s
path.to_string_lossy()
)
})?;
from_reader(&contents[..]).map_err(|_| format!("Could not parse cached {}", description).into())
asset_from_contents(&contents[..], description, compressed)
.map_err(|_| format!("Could not parse cached {}", description).into())
}
#[cfg(test)]

View File

@ -1,12 +1,12 @@
use std::collections::HashMap;
use std::path::Path;
use syntect::dumps::from_binary;
use syntect::highlighting::ThemeSet;
use syntect::parsing::syntax_definition::{
ContextReference, MatchOperation, MatchPattern, Pattern, SyntaxDefinition,
};
use syntect::parsing::{Scope, SyntaxSet, SyntaxSetBuilder};
use crate::assets::*;
use crate::error::*;
type SyntaxName = String;
@ -86,7 +86,7 @@ fn build_syntax_set_builder(
builder.add_plain_text_syntax();
builder
} else {
from_binary::<SyntaxSet>(crate::assets::get_serialized_integrated_syntaxset())
from_binary::<SyntaxSet>(get_serialized_integrated_syntaxset(), COMPRESS_SYNTAXES)
.into_builder()
};
@ -120,8 +120,18 @@ fn write_assets(
current_version: &str,
) -> Result<()> {
let _ = std::fs::create_dir_all(target_dir);
asset_to_cache(theme_set, &target_dir.join("themes.bin"), "theme set")?;
asset_to_cache(syntax_set, &target_dir.join("syntaxes.bin"), "syntax set")?;
asset_to_cache(
theme_set,
&target_dir.join("themes.bin"),
"theme set",
COMPRESS_THEMES,
)?;
asset_to_cache(
syntax_set,
&target_dir.join("syntaxes.bin"),
"syntax set",
COMPRESS_SYNTAXES,
)?;
print!(
"Writing metadata to folder {} ... ",
@ -294,9 +304,33 @@ impl SyntaxSetDependencyBuilder {
}
}
fn asset_to_cache<T: serde::Serialize>(asset: &T, path: &Path, description: &str) -> Result<()> {
fn asset_to_contents<T: serde::Serialize>(
asset: &T,
description: &str,
compressed: bool,
) -> Result<Vec<u8>> {
let mut contents = vec![];
if compressed {
bincode::serialize_into(
flate2::write::ZlibEncoder::new(&mut contents, flate2::Compression::best()),
asset,
)
} else {
bincode::serialize_into(&mut contents, asset)
}
.map_err(|_| format!("Could not serialize {}", description))?;
Ok(contents)
}
fn asset_to_cache<T: serde::Serialize>(
asset: &T,
path: &Path,
description: &str,
compressed: bool,
) -> Result<()> {
print!("Writing {} to {} ... ", description, path.to_string_lossy());
syntect::dumps::dump_to_file(asset, &path).map_err(|_| {
let contents = asset_to_contents(asset, description, compressed)?;
std::fs::write(path, &contents[..]).map_err(|_| {
format!(
"Could not save {} to {}",
description,