diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 453467d7..6e3cf793 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -40,10 +40,13 @@ termcolor = "1.4.0" tracing = "0.1.40" which = "5.0.0" -[dev-dependencies] -tracing-test = "0.1" -uuid = { workspace = true, features = [ "v4", "fast-rng" ] } -rand = { workspace = true } +[dependencies.blake3] +version = "1.3.3" +features = ["rayon"] + +[dependencies.command-group] +version = "2.1.0" +features = ["with-tokio"] [dependencies.clap] version = "4.4.7" @@ -123,6 +126,11 @@ embed-resource = "2.4.0" version = "1.0.2" path = "../bosion" +[dev-dependencies] +tracing-test = "0.1" +uuid = { workspace = true, features = [ "v4", "fast-rng" ] } +rand = { workspace = true } + [features] default = ["pid1"] diff --git a/crates/cli/src/args.rs b/crates/cli/src/args.rs index 6a162954..8f5bc70f 100644 --- a/crates/cli/src/args.rs +++ b/crates/cli/src/args.rs @@ -808,7 +808,7 @@ pub struct Args { /// If the file is smaller than n bytes, the whole file is returned. There is no filter to /// read the whole file at once to encourage limiting the amount of data read and processed. /// - /// - 'string | hash', and 'path | hashfile' return the hash of the string or file at path. + /// - 'string | hash', and 'path | file_hash' return the hash of the string or file at path. /// No guarantee is made about the algorithm used: treat it as an opaque value. /// /// - 'any | kv_store(key)', 'kv_fetch(key)', and 'kv_clear' provide a simple key-value store. @@ -826,7 +826,9 @@ pub struct Args { /// the path to a file containing a jaq program. /// /// Jaq programs are run in order, after all other filters, and short-circuit: if a filter (jaq - /// or not) rejects an event, execution stops there, and no other filters are run. + /// or not) rejects an event, execution stops there, and no other filters are run. Additionally, + /// they stop after outputting the first value, so you'll want to use 'any' or 'all' when + /// iterating, otherwise only the first item will be processed, which can be quite confusing! /// /// Examples: /// diff --git a/crates/cli/src/filterer/proglib.rs b/crates/cli/src/filterer/proglib.rs index cc86347e..16c1f9a9 100644 --- a/crates/cli/src/filterer/proglib.rs +++ b/crates/cli/src/filterer/proglib.rs @@ -270,6 +270,58 @@ pub fn load_watchexec_defs(defs: &mut Definitions) -> miette::Result<()> { }), ); + trace!("jaq: add hash filter"); + defs.insert_custom( + "hash", + CustomFilter::new(0, { + move |_, (_, val)| { + let string = match &val { + Val::Str(v) => v.to_string(), + _ => return_err!(custom_err("expected string but got {val:?}")), + }; + + Box::new(once(Ok(Val::Str( + blake3::hash(string.as_bytes()).to_hex().to_string().into(), + )))) + } + }), + ); + + trace!("jaq: add file_hash filter"); + defs.insert_custom( + "file_hash", + CustomFilter::new(0, { + move |_, (_, val)| { + let path = match &val { + Val::Str(v) => v.to_string(), + _ => return_err!(custom_err("expected string but got {val:?}")), + }; + + Box::new(once(Ok(match File::open(&path) { + Ok(mut file) => { + const BUFFER_SIZE: usize = 1024 * 1024; + let mut hasher = blake3::Hasher::new(); + let mut buf = vec![0; BUFFER_SIZE]; + while let Ok(bytes) = file.read(&mut buf) { + debug!("jaq: read {bytes} bytes from {path:?}"); + if bytes == 0 { + break; + } + hasher.update(&buf[..bytes]); + buf = vec![0; BUFFER_SIZE]; + } + + Val::Str(hasher.finalize().to_hex().to_string().into()) + } + Err(err) => { + error!("jaq: failed to open file {path:?}: {err:?}"); + Val::Null + } + }))) + } + }), + ); + Ok(()) }