diff --git a/Cargo.lock b/Cargo.lock index 1356698..abcddac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,19 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -26,6 +39,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -111,6 +130,18 @@ dependencies = [ "os_str_bytes", ] +[[package]] +name = "arrayref" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" + +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + [[package]] name = "async-broadcast" version = "0.7.0" @@ -389,6 +420,15 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -401,6 +441,20 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +[[package]] +name = "blake3" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30cca6d3674597c30ddf2c587bf8d9d65c9a84d2326d941cc79c9842dfe0ef52" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "rayon", +] + [[package]] name = "block" version = "0.1.6" @@ -546,6 +600,15 @@ dependencies = [ "windows-targets 0.52.5", ] +[[package]] +name = "chumsky" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" +dependencies = [ + "hashbrown 0.14.3", +] + [[package]] name = "clap" version = "4.5.4" @@ -641,6 +704,18 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "command-group" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5080df6b0f0ecb76cab30808f00d937ba725cebe266a3da8cd89dff92f2a9916" +dependencies = [ + "async-trait", + "nix 0.26.4", + "tokio", + "winapi", +] + [[package]] name = "concurrent-queue" version = "2.4.0" @@ -687,6 +762,12 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "constant_time_eq" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" + [[package]] name = "core-foundation-sys" version = "0.8.6" @@ -771,6 +852,19 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b365fabc795046672053e29c954733ec3b05e4be654ab130fe8f1f94d7051f35" +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if", + "hashbrown 0.14.3", + "lock_api", + "once_cell", + "parking_lot_core", +] + [[package]] name = "deranged" version = "0.3.11" @@ -869,6 +963,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56ce8c6da7551ec6c462cbaf3bfbc75131ebbfa1c944aeaa9dab51ca1c5f0c3b" +[[package]] +name = "dyn-clone" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" + [[package]] name = "either" version = "1.11.0" @@ -1710,6 +1810,10 @@ name = "hashbrown" version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +dependencies = [ + "ahash", + "allocator-api2", +] [[package]] name = "hdrhistogram" @@ -1742,6 +1846,12 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hifijson" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18ae468bcb4dfecf0e4949ee28abbc99076b6a0077f51ddbc94dbfff8e6a870c" + [[package]] name = "home" version = "0.5.9" @@ -1975,6 +2085,68 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "jaq-core" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03d6a5713b8f33675abfac79d1db0022a3f28764b2a6b96a185c199ad8dab86d" +dependencies = [ + "aho-corasick", + "base64", + "hifijson", + "jaq-interpret", + "libm", + "log", + "regex", + "time", + "urlencoding", +] + +[[package]] +name = "jaq-interpret" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f569e38e5fc677db8dfda89ee0b4c25b3f53e811b16434fd14bdc5b43fc362ac" +dependencies = [ + "ahash", + "dyn-clone", + "hifijson", + "indexmap 2.2.6", + "jaq-syn", + "once_cell", + "serde_json", +] + +[[package]] +name = "jaq-parse" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef6f8beb9f9922546419e774e24199e8a968f54c63a5a2323c8f3ef3321ace14" +dependencies = [ + "chumsky", + "jaq-syn", +] + +[[package]] +name = "jaq-std" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d7871c59297cbfdd18f6f1bbbafaad24e97fd555ee1e2a1be7a40a5a20f551a" +dependencies = [ + "bincode", + "jaq-parse", + "jaq-syn", +] + +[[package]] +name = "jaq-syn" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4d60101fb791b20c982731d848ed6e7d25363656497647c2093b68bd88398d6" +dependencies = [ + "serde", +] + [[package]] name = "js-sys" version = "0.3.69" @@ -2207,6 +2379,17 @@ dependencies = [ "smallvec", ] +[[package]] +name = "nix" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", +] + [[package]] name = "nix" version = "0.27.1" @@ -2758,6 +2941,26 @@ dependencies = [ "rand_core", ] +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "realpath-ext" version = "0.1.3" @@ -3666,6 +3869,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf8parse" version = "0.2.1" @@ -3822,7 +4031,9 @@ dependencies = [ name = "watchexec-cli" version = "1.25.1" dependencies = [ + "ahash", "argfile", + "blake3", "bosion", "chrono", "clap", @@ -3830,17 +4041,26 @@ dependencies = [ "clap_complete_nushell", "clap_mangen", "clearscreen", + "command-group", "console-subscriber", + "dashmap", "dirs 5.0.1", "embed-resource", "eyra", "futures", "humantime", "ignore-files", + "indexmap 2.2.6", "is-terminal", + "jaq-core", + "jaq-interpret", + "jaq-parse", + "jaq-std", + "jaq-syn", "miette", "mimalloc", "notify-rust", + "once_cell", "pid1", "project-origins", "rand", @@ -3878,7 +4098,6 @@ version = "3.0.0" dependencies = [ "ignore", "ignore-files", - "project-origins", "tokio", "tracing", "tracing-subscriber", @@ -4312,6 +4531,26 @@ dependencies = [ "zvariant", ] +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.60", +] + [[package]] name = "zvariant" version = "4.0.2" diff --git a/completions/bash b/completions/bash index ef4500c..d4447c3 100644 --- a/completions/bash +++ b/completions/bash @@ -19,7 +19,7 @@ _watchexec() { case "${cmd}" in watchexec) - opts="-w -c -o -W -r -s -k -d -p -n -E -1 -N -q -e -f -i -v -h -V --watch --clear --on-busy-update --watch-when-idle --restart --signal --kill --stop-signal --stop-timeout --map-signal --debounce --stdin-quit --no-vcs-ignore --no-project-ignore --no-global-ignore --no-default-ignore --no-discover-ignore --ignore-nothing --postpone --delay-run --poll --shell --no-shell-long --no-environment --emit-events-to --only-emit-events --env --no-process-group --notify --color --timings --quiet --bell --project-origin --workdir --exts --filter --filter-file --ignore --ignore-file --fs-events --no-meta --print-events --verbose --log-file --manual --completions --help --version [COMMAND]..." + opts="-w -c -o -W -r -s -k -d -p -n -E -1 -N -q -e -f -j -i -v -h -V --watch --clear --on-busy-update --watch-when-idle --restart --signal --kill --stop-signal --stop-timeout --map-signal --debounce --stdin-quit --no-vcs-ignore --no-project-ignore --no-global-ignore --no-default-ignore --no-discover-ignore --ignore-nothing --postpone --delay-run --poll --shell --no-shell-long --no-environment --emit-events-to --only-emit-events --env --no-process-group --notify --color --timings --quiet --bell --project-origin --workdir --exts --filter --filter-file --filter-prog --ignore --ignore-file --fs-events --no-meta --print-events --verbose --log-file --manual --completions --help --version [COMMAND]..." if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") ) return 0 @@ -150,6 +150,14 @@ _watchexec() { fi return 0 ;; + --filter-prog) + COMPREPLY=($(compgen -f "${cur}")) + return 0 + ;; + -j) + COMPREPLY=($(compgen -f "${cur}")) + return 0 + ;; --ignore) COMPREPLY=($(compgen -f "${cur}")) return 0 diff --git a/completions/elvish b/completions/elvish index 0ff0326..b948a0a 100644 --- a/completions/elvish +++ b/completions/elvish @@ -45,6 +45,8 @@ set edit:completion:arg-completer[watchexec] = {|@words| cand -f 'Filename patterns to filter to' cand --filter 'Filename patterns to filter to' cand --filter-file 'Files to load filters from' + cand -j '[experimental] Filter programs' + cand --filter-prog '[experimental] Filter programs' cand -i 'Filename patterns to filter out' cand --ignore 'Filename patterns to filter out' cand --ignore-file 'Files to load ignores from' diff --git a/completions/fish b/completions/fish index 3f459a9..7bcbe09 100644 --- a/completions/fish +++ b/completions/fish @@ -17,6 +17,7 @@ complete -c watchexec -l workdir -d 'Set the working directory' -r -f -a "(__fis complete -c watchexec -s e -l exts -d 'Filename extensions to filter to' -r complete -c watchexec -s f -l filter -d 'Filename patterns to filter to' -r complete -c watchexec -l filter-file -d 'Files to load filters from' -r -F +complete -c watchexec -s j -l filter-prog -d '[experimental] Filter programs' -r complete -c watchexec -s i -l ignore -d 'Filename patterns to filter out' -r complete -c watchexec -l ignore-file -d 'Files to load ignores from' -r -F complete -c watchexec -l fs-events -d 'Filesystem events to filter to' -r -f -a "{access '',create '',remove '',rename '',modify '',metadata ''}" diff --git a/completions/nu b/completions/nu index 0334348..6bf12e9 100644 --- a/completions/nu +++ b/completions/nu @@ -67,6 +67,7 @@ module completions { --exts(-e): string # Filename extensions to filter to --filter(-f): string # Filename patterns to filter to --filter-file: string # Files to load filters from + --filter-prog(-j): string # [experimental] Filter programs --ignore(-i): string # Filename patterns to filter out --ignore-file: string # Files to load ignores from --fs-events: string@"nu-complete watchexec filter_fs_events" # Filesystem events to filter to diff --git a/completions/powershell b/completions/powershell index ff441d8..8673633 100644 --- a/completions/powershell +++ b/completions/powershell @@ -48,6 +48,8 @@ Register-ArgumentCompleter -Native -CommandName 'watchexec' -ScriptBlock { [CompletionResult]::new('-f', 'f', [CompletionResultType]::ParameterName, 'Filename patterns to filter to') [CompletionResult]::new('--filter', 'filter', [CompletionResultType]::ParameterName, 'Filename patterns to filter to') [CompletionResult]::new('--filter-file', 'filter-file', [CompletionResultType]::ParameterName, 'Files to load filters from') + [CompletionResult]::new('-j', 'j', [CompletionResultType]::ParameterName, '[experimental] Filter programs') + [CompletionResult]::new('--filter-prog', 'filter-prog', [CompletionResultType]::ParameterName, '[experimental] Filter programs') [CompletionResult]::new('-i', 'i', [CompletionResultType]::ParameterName, 'Filename patterns to filter out') [CompletionResult]::new('--ignore', 'ignore', [CompletionResultType]::ParameterName, 'Filename patterns to filter out') [CompletionResult]::new('--ignore-file', 'ignore-file', [CompletionResultType]::ParameterName, 'Files to load ignores from') diff --git a/completions/zsh b/completions/zsh index cda3fb2..a73496f 100644 --- a/completions/zsh +++ b/completions/zsh @@ -42,6 +42,8 @@ _watchexec() { '*-f+[Filename patterns to filter to]:PATTERN: ' \ '*--filter=[Filename patterns to filter to]:PATTERN: ' \ '*--filter-file=[Files to load filters from]:PATH:_files' \ +'*-j+[\[experimental\] Filter programs]:EXPRESSION: ' \ +'*--filter-prog=[\[experimental\] Filter programs]:EXPRESSION: ' \ '*-i+[Filename patterns to filter out]:PATTERN: ' \ '*--ignore=[Filename patterns to filter out]:PATTERN: ' \ '*--ignore-file=[Files to load ignores from]:PATH:_files' \ diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index af7139f..bc26728 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -20,27 +20,39 @@ name = "watchexec" path = "src/main.rs" [dependencies] +ahash = "0.8.6" # needs to be in sync with jaq's argfile = "0.2.0" chrono = "0.4.31" clap_complete = "4.4.4" clap_complete_nushell = "4.4.2" clap_mangen = "0.2.15" clearscreen = "3.0.0" +dashmap = "5.4.0" dirs = "5.0.0" futures = "0.3.29" humantime = "2.1.0" +indexmap = "2.2.6" # needs to be in sync with jaq's is-terminal = "0.4.4" +jaq-core = "1.2.1" +jaq-interpret = "1.2.1" +jaq-parse = "1.0.2" +jaq-std = "1.2.1" +jaq-syn = "1.1.0" notify-rust = "4.9.0" +once_cell = "1.17.1" serde_json = "1.0.107" tempfile = "3.8.1" termcolor = "1.4.0" tracing = "0.1.40" which = "6.0.1" -[dev-dependencies] -tracing-test = "0.2.4" -uuid = { workspace = true, features = [ "v4", "fast-rng" ] } -rand = { workspace = true } +[dependencies.blake3] +version = "1.3.3" +features = ["rayon"] + +[dependencies.command-group] +version = "2.1.0" +features = ["with-tokio"] [dependencies.clap] version = "4.4.7" @@ -120,6 +132,11 @@ embed-resource = "2.4.0" version = "1.0.2" path = "../bosion" +[dev-dependencies] +tracing-test = "0.2.4" +uuid = { workspace = true, features = [ "v4", "fast-rng" ] } +rand = { workspace = true } + [features] default = ["pid1"] diff --git a/crates/cli/src/args.rs b/crates/cli/src/args.rs index a11b971..76e2f9e 100644 --- a/crates/cli/src/args.rs +++ b/crates/cli/src/args.rs @@ -9,9 +9,13 @@ use clap::{ builder::TypedValueParser, error::ErrorKind, Arg, ArgAction, Command, CommandFactory, Parser, ValueEnum, ValueHint, }; +use miette::{IntoDiagnostic, Result}; +use tokio::{fs::File, io::AsyncReadExt}; use watchexec::paths::PATH_SEPARATOR; use watchexec_signals::Signal; +use crate::filterer::parse::parse_filter_program; + const OPTSET_FILTERING: &str = "Filtering"; const OPTSET_COMMAND: &str = "Command"; const OPTSET_DEBUGGING: &str = "Debugging"; @@ -791,6 +795,77 @@ pub struct Args { )] pub filter_files: Vec, + /// [experimental] Filter programs. + /// + /// /!\ This option is EXPERIMENTAL and may change and/or vanish without notice. + /// + /// Provide your own custom filter programs in jaq (similar to jq) syntax. Programs are given + /// an event in the same format as described in '--emit-events-to' and must return a boolean. + /// + /// In addition to the jaq stdlib, watchexec adds some custom filter definitions: + /// + /// - 'path | file_meta' returns file metadata or null if the file does not exist. + /// + /// - 'path | file_size' returns the size of the file at path, or null if it does not exist. + /// + /// - 'path | file_read(bytes)' returns a string with the first n bytes of the file at path. + /// If the file is smaller than n bytes, the whole file is returned. There is no filter to + /// read the whole file at once to encourage limiting the amount of data read and processed. + /// + /// - 'string | hash', and 'path | file_hash' return the hash of the string or file at path. + /// No guarantee is made about the algorithm used: treat it as an opaque value. + /// + /// - 'any | kv_store(key)', 'kv_fetch(key)', and 'kv_clear' provide a simple key-value store. + /// Data is kept in memory only, there is no persistence. Consistency is not guaranteed. + /// + /// - 'any | printout', 'any | printerr', and 'any | log(level)' will print or log any given + /// value to stdout, stderr, or the log (levels = error, warn, info, debug, trace), and + /// pass the value through (so '[1] | log("debug") | .[]' will produce a '1' and log '[1]'). + /// + /// All filtering done with such programs, and especially those using kv or filesystem access, + /// is much slower than the other filtering methods. If filtering is too slow, events will back + /// up and stall watchexec. Take care when designing your filters. + /// + /// If the argument to this option starts with an '@', the rest of the argument is taken to be + /// the path to a file containing a jaq program. + /// + /// Jaq programs are run in order, after all other filters, and short-circuit: if a filter (jaq + /// or not) rejects an event, execution stops there, and no other filters are run. Additionally, + /// they stop after outputting the first value, so you'll want to use 'any' or 'all' when + /// iterating, otherwise only the first item will be processed, which can be quite confusing! + /// + /// Find user-contributed programs or submit your own useful ones at + /// . + /// + /// ## Examples: + /// + /// Regexp ignore filter on paths: + /// + /// 'all(.tags[] | select(.kind == "path"); .absolute | test("[.]test[.]js$")) | not' + /// + /// Pass any event that creates a file: + /// + /// 'any(.tags[] | select(.kind == "fs"); .simple == "create")' + /// + /// Pass events that touch executable files: + /// + /// 'any(.tags[] | select(.kind == "path" && .filetype == "file"); .absolute | metadata | .executable)' + /// + /// Ignore files that start with shebangs: + /// + /// 'any(.tags[] | select(.kind == "path" && .filetype == "file"); .absolute | read(2) == "#!") | not' + #[arg( + long = "filter-prog", + short = 'j', + help_heading = OPTSET_FILTERING, + value_name = "EXPRESSION", + )] + pub filter_programs: Vec, + + #[doc(hidden)] + #[clap(skip)] + pub filter_programs_parsed: Vec, + /// Filename patterns to filter out /// /// Provide a glob-like filter pattern, and events for files matching the pattern will be @@ -1086,8 +1161,8 @@ fn expand_args_up_to_doubledash() -> Result, std::io::Error> { } #[inline] -pub fn get_args() -> Args { - use tracing::{debug, warn}; +pub async fn get_args() -> Result { + use tracing::{debug, trace, warn}; if std::env::var("RUST_LOG").is_ok() { warn!("⚠ RUST_LOG environment variable set, logging options have no effect"); @@ -1157,6 +1232,24 @@ pub fn get_args() -> Args { .exit(); } + for (n, prog) in args.filter_programs.iter_mut().enumerate() { + if let Some(progpath) = prog.strip_prefix('@') { + trace!(?n, path=?progpath, "reading filter program from file"); + let mut progfile = File::open(&progpath).await.into_diagnostic()?; + let mut buf = + String::with_capacity(progfile.metadata().await.into_diagnostic()?.len() as _); + let bytes_read = progfile.read_to_string(&mut buf).await.into_diagnostic()?; + debug!(?n, path=?progpath, %bytes_read, "read filter program from file"); + *prog = buf; + } + } + + args.filter_programs_parsed = std::mem::take(&mut args.filter_programs) + .into_iter() + .enumerate() + .map(parse_filter_program) + .collect::>()?; + debug!(?args, "got arguments"); - args + Ok(args) } diff --git a/crates/cli/src/filterer.rs b/crates/cli/src/filterer.rs index 0bb3f1a..6d9ec5c 100644 --- a/crates/cli/src/filterer.rs +++ b/crates/cli/src/filterer.rs @@ -1,4 +1,175 @@ -mod common; -mod globset; +use std::{ + ffi::OsString, + path::{Path, PathBuf, MAIN_SEPARATOR}, + sync::Arc, +}; -pub use globset::globset; +use miette::{IntoDiagnostic, Result}; +use tokio::io::{AsyncBufReadExt, BufReader}; +use tracing::{info, trace, trace_span}; +use watchexec::{error::RuntimeError, filter::Filterer}; +use watchexec_events::{ + filekind::{FileEventKind, ModifyKind}, + Event, Priority, Tag, +}; +use watchexec_filterer_globset::GlobsetFilterer; + +use crate::args::{Args, FsEvent}; + +mod dirs; +pub(crate) mod parse; +mod proglib; +mod progs; +mod syncval; + +/// A custom filterer that combines the library's Globset filterer and a switch for --no-meta +#[derive(Debug)] +pub struct WatchexecFilterer { + inner: GlobsetFilterer, + fs_events: Vec, + progs: Option, +} + +impl Filterer for WatchexecFilterer { + #[tracing::instrument(level = "trace", skip(self))] + fn check_event(&self, event: &Event, priority: Priority) -> Result { + for tag in &event.tags { + if let Tag::FileEventKind(fek) = tag { + let normalised = match fek { + FileEventKind::Access(_) => FsEvent::Access, + FileEventKind::Modify(ModifyKind::Name(_)) => FsEvent::Rename, + FileEventKind::Modify(ModifyKind::Metadata(_)) => FsEvent::Metadata, + FileEventKind::Modify(_) => FsEvent::Modify, + FileEventKind::Create(_) => FsEvent::Create, + FileEventKind::Remove(_) => FsEvent::Remove, + _ => continue, + }; + + trace!(allowed=?self.fs_events, this=?normalised, "check against fs event filter"); + if !self.fs_events.contains(&normalised) { + return Ok(false); + } + } + } + + trace!("check against original event"); + if !self.inner.check_event(event, priority)? { + return Ok(false); + } + + if let Some(progs) = &self.progs { + trace!("check against program filters"); + if !progs.check(event)? { + return Ok(false); + } + } + + Ok(true) + } +} + +impl WatchexecFilterer { + /// Create a new filterer from the given arguments + pub async fn new(args: &Args) -> Result> { + let (project_origin, workdir) = dirs::dirs(args).await?; + + let ignore_files = if args.no_discover_ignore { + Vec::new() + } else { + let vcs_types = dirs::vcs_types(&project_origin).await; + dirs::ignores(args, &vcs_types, &project_origin).await? + }; + + let mut ignores = Vec::new(); + + if !args.no_default_ignore { + ignores.extend([ + (format!("**{MAIN_SEPARATOR}.DS_Store"), None), + (String::from("watchexec.*.log"), None), + (String::from("*.py[co]"), None), + (String::from("#*#"), None), + (String::from(".#*"), None), + (String::from(".*.kate-swp"), None), + (String::from(".*.sw?"), None), + (String::from(".*.sw?x"), None), + (format!("**{MAIN_SEPARATOR}.bzr{MAIN_SEPARATOR}**"), None), + (format!("**{MAIN_SEPARATOR}_darcs{MAIN_SEPARATOR}**"), None), + ( + format!("**{MAIN_SEPARATOR}.fossil-settings{MAIN_SEPARATOR}**"), + None, + ), + (format!("**{MAIN_SEPARATOR}.git{MAIN_SEPARATOR}**"), None), + (format!("**{MAIN_SEPARATOR}.hg{MAIN_SEPARATOR}**"), None), + (format!("**{MAIN_SEPARATOR}.pijul{MAIN_SEPARATOR}**"), None), + (format!("**{MAIN_SEPARATOR}.svn{MAIN_SEPARATOR}**"), None), + ]); + } + + let mut filters = args + .filter_patterns + .iter() + .map(|f| (f.to_owned(), Some(workdir.clone()))) + .collect::>(); + + for filter_file in &args.filter_files { + filters.extend(read_filter_file(filter_file).await?); + } + + ignores.extend( + args.ignore_patterns + .iter() + .map(|f| (f.to_owned(), Some(workdir.clone()))), + ); + + let exts = args + .filter_extensions + .iter() + .map(|e| OsString::from(e.strip_prefix('.').unwrap_or(e))); + + info!("initialising Globset filterer"); + Ok(Arc::new(Self { + inner: GlobsetFilterer::new(project_origin, filters, ignores, ignore_files, exts) + .await + .into_diagnostic()?, + fs_events: args.filter_fs_events.clone(), + progs: if args.filter_programs_parsed.is_empty() { + None + } else { + Some(progs::FilterProgs::new(args)?) + }, + })) + } +} + +async fn read_filter_file(path: &Path) -> Result)>> { + let _span = trace_span!("loading filter file", ?path).entered(); + + let file = tokio::fs::File::open(path).await.into_diagnostic()?; + + let metadata_len = file + .metadata() + .await + .map(|m| usize::try_from(m.len())) + .unwrap_or(Ok(0)) + .into_diagnostic()?; + let filter_capacity = if metadata_len == 0 { + 0 + } else { + metadata_len / 20 + }; + let mut filters = Vec::with_capacity(filter_capacity); + + let reader = BufReader::new(file); + let mut lines = reader.lines(); + while let Some(line) = lines.next_line().await.into_diagnostic()? { + let line = line.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + + trace!(?line, "adding filter line"); + filters.push((line.to_owned(), Some(path.to_owned()))); + } + + Ok(filters) +} diff --git a/crates/cli/src/filterer/common.rs b/crates/cli/src/filterer/dirs.rs similarity index 100% rename from crates/cli/src/filterer/common.rs rename to crates/cli/src/filterer/dirs.rs diff --git a/crates/cli/src/filterer/globset.rs b/crates/cli/src/filterer/globset.rs deleted file mode 100644 index d235d58..0000000 --- a/crates/cli/src/filterer/globset.rs +++ /dev/null @@ -1,151 +0,0 @@ -use std::{ - ffi::OsString, - path::{Path, PathBuf, MAIN_SEPARATOR}, - sync::Arc, -}; - -use miette::{IntoDiagnostic, Result}; -use tokio::io::{AsyncBufReadExt, BufReader}; -use tracing::{info, trace, trace_span}; -use watchexec::{error::RuntimeError, filter::Filterer}; -use watchexec_events::{ - filekind::{FileEventKind, ModifyKind}, - Event, Priority, Tag, -}; -use watchexec_filterer_globset::GlobsetFilterer; - -use crate::args::{Args, FsEvent}; - -pub async fn globset(args: &Args) -> Result> { - let (project_origin, workdir) = super::common::dirs(args).await?; - - let ignore_files = if args.no_discover_ignore { - Vec::new() - } else { - let vcs_types = super::common::vcs_types(&project_origin).await; - super::common::ignores(args, &vcs_types, &project_origin).await? - }; - - let mut ignores = Vec::new(); - - if !args.no_default_ignore { - ignores.extend([ - (format!("**{MAIN_SEPARATOR}.DS_Store"), None), - (String::from("watchexec.*.log"), None), - (String::from("*.py[co]"), None), - (String::from("#*#"), None), - (String::from(".#*"), None), - (String::from(".*.kate-swp"), None), - (String::from(".*.sw?"), None), - (String::from(".*.sw?x"), None), - (format!("**{MAIN_SEPARATOR}.bzr{MAIN_SEPARATOR}**"), None), - (format!("**{MAIN_SEPARATOR}_darcs{MAIN_SEPARATOR}**"), None), - ( - format!("**{MAIN_SEPARATOR}.fossil-settings{MAIN_SEPARATOR}**"), - None, - ), - (format!("**{MAIN_SEPARATOR}.git{MAIN_SEPARATOR}**"), None), - (format!("**{MAIN_SEPARATOR}.hg{MAIN_SEPARATOR}**"), None), - (format!("**{MAIN_SEPARATOR}.pijul{MAIN_SEPARATOR}**"), None), - (format!("**{MAIN_SEPARATOR}.svn{MAIN_SEPARATOR}**"), None), - ]); - } - - let mut filters = args - .filter_patterns - .iter() - .map(|f| (f.to_owned(), Some(workdir.clone()))) - .collect::>(); - - for filter_file in &args.filter_files { - filters.extend(read_filter_file(filter_file).await?); - } - - ignores.extend( - args.ignore_patterns - .iter() - .map(|f| (f.to_owned(), Some(workdir.clone()))), - ); - - let exts = args - .filter_extensions - .iter() - .map(|e| OsString::from(e.strip_prefix('.').unwrap_or(e))); - - info!("initialising Globset filterer"); - Ok(Arc::new(WatchexecFilterer { - inner: GlobsetFilterer::new(project_origin, filters, ignores, ignore_files, exts) - .await - .into_diagnostic()?, - fs_events: args.filter_fs_events.clone(), - })) -} - -async fn read_filter_file(path: &Path) -> Result)>> { - let _span = trace_span!("loading filter file", ?path).entered(); - - let file = tokio::fs::File::open(path).await.into_diagnostic()?; - - let metadata_len = file - .metadata() - .await - .map(|m| usize::try_from(m.len())) - .unwrap_or(Ok(0)) - .into_diagnostic()?; - let filter_capacity = if metadata_len == 0 { - 0 - } else { - metadata_len / 20 - }; - let mut filters = Vec::with_capacity(filter_capacity); - - let reader = BufReader::new(file); - let mut lines = reader.lines(); - while let Some(line) = lines.next_line().await.into_diagnostic()? { - let line = line.trim(); - if line.is_empty() || line.starts_with('#') { - continue; - } - - trace!(?line, "adding filter line"); - filters.push((line.to_owned(), Some(path.to_owned()))); - } - - Ok(filters) -} - -/// A custom filterer that combines the library's Globset filterer and a switch for --no-meta -#[derive(Debug)] -pub struct WatchexecFilterer { - inner: GlobsetFilterer, - fs_events: Vec, -} - -impl Filterer for WatchexecFilterer { - fn check_event(&self, event: &Event, priority: Priority) -> Result { - for tag in &event.tags { - if let Tag::FileEventKind(fek) = tag { - let normalised = match fek { - FileEventKind::Access(_) => FsEvent::Access, - FileEventKind::Modify(ModifyKind::Name(_)) => FsEvent::Rename, - FileEventKind::Modify(ModifyKind::Metadata(_)) => FsEvent::Metadata, - FileEventKind::Modify(_) => FsEvent::Modify, - FileEventKind::Create(_) => FsEvent::Create, - FileEventKind::Remove(_) => FsEvent::Remove, - _ => continue, - }; - - if !self.fs_events.contains(&normalised) { - return Ok(false); - } - } - } - - trace!("check against original event"); - if !self.inner.check_event(event, priority)? { - return Ok(false); - } - - Ok(true) - } -} diff --git a/crates/cli/src/filterer/parse.rs b/crates/cli/src/filterer/parse.rs new file mode 100644 index 0000000..31c67c2 --- /dev/null +++ b/crates/cli/src/filterer/parse.rs @@ -0,0 +1,17 @@ +use miette::{miette, Result}; + +pub fn parse_filter_program((n, prog): (usize, String)) -> Result { + let parser = jaq_parse::main(); + let (main, errs) = jaq_parse::parse(&prog, parser); + + if !errs.is_empty() { + let errs = errs + .into_iter() + .map(|err| err.to_string()) + .collect::>() + .join("\n"); + return Err(miette!("failed to load filter program #{}: {:?}", n, errs)); + } + + main.ok_or_else(|| miette!("failed to load filter program #{} (no reason given)", n)) +} diff --git a/crates/cli/src/filterer/proglib.rs b/crates/cli/src/filterer/proglib.rs new file mode 100644 index 0000000..9c71de9 --- /dev/null +++ b/crates/cli/src/filterer/proglib.rs @@ -0,0 +1,27 @@ +use jaq_interpret::ParseCtx; +use miette::Result; +use tracing::debug; + +mod file; +mod hash; +mod kv; +mod macros; +mod output; + +pub fn jaq_lib() -> Result { + let mut jaq = ParseCtx::new(Vec::new()); + + debug!("loading jaq core library"); + jaq.insert_natives(jaq_core::core()); + + debug!("loading jaq std library"); + jaq.insert_defs(jaq_std::std()); + + debug!("loading jaq watchexec library"); + file::load(&mut jaq); + hash::load(&mut jaq); + kv::load(&mut jaq); + output::load(&mut jaq); + + Ok(jaq) +} diff --git a/crates/cli/src/filterer/proglib/file.rs b/crates/cli/src/filterer/proglib/file.rs new file mode 100644 index 0000000..fe94315 --- /dev/null +++ b/crates/cli/src/filterer/proglib/file.rs @@ -0,0 +1,173 @@ +use std::{ + fs::{metadata, File, FileType, Metadata}, + io::{BufReader, Read}, + iter::once, + time::{SystemTime, UNIX_EPOCH}, +}; + +use jaq_interpret::{ParseCtx, Error, Native, Val}; +use serde_json::{json, Value}; +use tracing::{debug, error, trace}; + +use super::macros::*; + +pub fn load(jaq: &mut ParseCtx) { + trace!("jaq: add file_read filter"); + jaq.insert_native( + "file_read".into(), + 1, + Native::new({ + move |args, (ctx, val)| { + let path = match &val { + Val::Str(v) => v.to_string(), + _ => return_err!(Err(Error::str("expected string (path) but got {val:?}"))), + }; + + let bytes = match int_arg!(args, 0, ctx, &val) { + Ok(v) => v, + Err(e) => return_err!(Err(e)), + }; + + Box::new(once(Ok(match File::open(&path) { + Ok(file) => { + let buf_reader = BufReader::new(file); + let mut limited = buf_reader.take(bytes); + let mut buffer = String::with_capacity(bytes as _); + match limited.read_to_string(&mut buffer) { + Ok(read) => { + debug!("jaq: read {read} bytes from {path:?}"); + Val::Str(buffer.into()) + } + Err(err) => { + error!("jaq: failed to read from {path:?}: {err:?}"); + Val::Null + } + } + } + Err(err) => { + error!("jaq: failed to open file {path:?}: {err:?}"); + Val::Null + } + }))) + } + }), + ); + + trace!("jaq: add file_meta filter"); + jaq.insert_native( + "file_meta".into(), + 0, + Native::new({ + move |_, (_, val)| { + let path = match &val { + Val::Str(v) => v.to_string(), + _ => return_err!(Err(Error::str("expected string (path) but got {val:?}"))), + }; + + Box::new(once(Ok(match metadata(&path) { + Ok(meta) => Val::from(json_meta(meta)), + Err(err) => { + error!("jaq: failed to open {path:?}: {err:?}"); + Val::Null + } + }))) + } + }), + ); + + trace!("jaq: add file_size filter"); + jaq.insert_native( + "file_size".into(), + 0, + Native::new({ + move |_, (_, val)| { + let path = match &val { + Val::Str(v) => v.to_string(), + _ => return_err!(Err(Error::str("expected string (path) but got {val:?}"))), + }; + + Box::new(once(Ok(match metadata(&path) { + Ok(meta) => Val::Int(meta.len() as _), + Err(err) => { + error!("jaq: failed to open {path:?}: {err:?}"); + Val::Null + } + }))) + } + }), + ); +} + +fn json_meta(meta: Metadata) -> Value { + let perms = meta.permissions(); + let mut val = json!({ + "type": filetype_str(meta.file_type()), + "size": meta.len(), + "modified": fs_time(meta.modified()), + "accessed": fs_time(meta.accessed()), + "created": fs_time(meta.created()), + "dir": meta.is_dir(), + "file": meta.is_file(), + "symlink": meta.is_symlink(), + "readonly": perms.readonly(), + }); + + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let map = val.as_object_mut().unwrap(); + map.insert( + "mode".to_string(), + Value::String(format!("{:o}", perms.mode())), + ); + map.insert("mode_byte".to_string(), Value::from(perms.mode())); + map.insert( + "executable".to_string(), + Value::Bool(perms.mode() & 0o111 != 0), + ); + } + + val +} + +fn filetype_str(filetype: FileType) -> &'static str { + #[cfg(unix)] + { + use std::os::unix::fs::FileTypeExt; + if filetype.is_char_device() { + return "char"; + } else if filetype.is_block_device() { + return "block"; + } else if filetype.is_fifo() { + return "fifo"; + } else if filetype.is_socket() { + return "socket"; + } + } + + #[cfg(windows)] + { + use std::os::windows::fs::FileTypeExt; + if filetype.is_symlink_dir() { + return "symdir"; + } else if filetype.is_symlink_file() { + return "symfile"; + } + } + + if filetype.is_dir() { + "dir" + } else if filetype.is_file() { + "file" + } else if filetype.is_symlink() { + "symlink" + } else { + "unknown" + } +} + +fn fs_time(time: std::io::Result) -> Option { + time.ok() + .and_then(|time| time.duration_since(UNIX_EPOCH).ok()) + .map(|dur| dur.as_secs()) +} diff --git a/crates/cli/src/filterer/proglib/hash.rs b/crates/cli/src/filterer/proglib/hash.rs new file mode 100644 index 0000000..c74aa50 --- /dev/null +++ b/crates/cli/src/filterer/proglib/hash.rs @@ -0,0 +1,62 @@ +use std::{fs::File, io::Read, iter::once}; + +use jaq_interpret::{Error, Native, ParseCtx, Val}; +use tracing::{debug, error, trace}; + +use super::macros::*; + +pub fn load(jaq: &mut ParseCtx) { + trace!("jaq: add hash filter"); + jaq.insert_native( + "hash".into(), + 0, + Native::new({ + move |_, (_, val)| { + let string = match &val { + Val::Str(v) => v.to_string(), + _ => return_err!(Err(Error::str("expected string but got {val:?}"))), + }; + + Box::new(once(Ok(Val::Str( + blake3::hash(string.as_bytes()).to_hex().to_string().into(), + )))) + } + }), + ); + + trace!("jaq: add file_hash filter"); + jaq.insert_native( + "file_hash".into(), + 0, + Native::new({ + move |_, (_, val)| { + let path = match &val { + Val::Str(v) => v.to_string(), + _ => return_err!(Err(Error::str("expected string but got {val:?}"))), + }; + + Box::new(once(Ok(match File::open(&path) { + Ok(mut file) => { + const BUFFER_SIZE: usize = 1024 * 1024; + let mut hasher = blake3::Hasher::new(); + let mut buf = vec![0; BUFFER_SIZE]; + while let Ok(bytes) = file.read(&mut buf) { + debug!("jaq: read {bytes} bytes from {path:?}"); + if bytes == 0 { + break; + } + hasher.update(&buf[..bytes]); + buf = vec![0; BUFFER_SIZE]; + } + + Val::Str(hasher.finalize().to_hex().to_string().into()) + } + Err(err) => { + error!("jaq: failed to open file {path:?}: {err:?}"); + Val::Null + } + }))) + } + }), + ); +} diff --git a/crates/cli/src/filterer/proglib/kv.rs b/crates/cli/src/filterer/proglib/kv.rs new file mode 100644 index 0000000..96d38c3 --- /dev/null +++ b/crates/cli/src/filterer/proglib/kv.rs @@ -0,0 +1,69 @@ +use std::{iter::once, sync::Arc}; + +use dashmap::DashMap; +use jaq_interpret::{Error, Native, ParseCtx, Val}; +use once_cell::sync::OnceCell; +use tracing::trace; + +use crate::filterer::syncval::SyncVal; + +use super::macros::*; + +type KvStore = Arc>; +fn kv_store() -> KvStore { + static KV_STORE: OnceCell = OnceCell::new(); + KV_STORE.get_or_init(|| KvStore::default()).clone() +} + +pub fn load(jaq: &mut ParseCtx) { + trace!("jaq: add kv_clear filter"); + jaq.insert_native( + "kv_clear".into(), + 0, + Native::new({ + move |_, (_, val)| { + let kv = kv_store(); + kv.clear(); + Box::new(once(Ok(val))) + } + }), + ); + + trace!("jaq: add kv_store filter"); + jaq.insert_native( + "kv_store".into(), + 1, + Native::new({ + move |args, (ctx, val)| { + let kv = kv_store(); + let key = match string_arg!(args, 0, ctx, val) { + Ok(v) => v, + Err(e) => return_err!(Err(e)), + }; + + kv.insert(key, (&val).into()); + Box::new(once(Ok(val))) + } + }), + ); + + trace!("jaq: add kv_fetch filter"); + jaq.insert_native( + "kv_fetch".into(), + 1, + Native::new({ + move |args, (ctx, val)| { + let kv = kv_store(); + let key = match string_arg!(args, 0, ctx, val) { + Ok(v) => v, + Err(e) => return_err!(Err(e)), + }; + + Box::new(once(Ok(kv + .get(&key) + .map(|val| val.value().into()) + .unwrap_or(Val::Null)))) + } + }), + ); +} diff --git a/crates/cli/src/filterer/proglib/macros.rs b/crates/cli/src/filterer/proglib/macros.rs new file mode 100644 index 0000000..5f2ecd3 --- /dev/null +++ b/crates/cli/src/filterer/proglib/macros.rs @@ -0,0 +1,30 @@ +macro_rules! return_err { + ($err:expr) => { + return Box::new(once($err)) + }; +} +pub(crate) use return_err; + +macro_rules! string_arg { + ($args:expr, $n:expr, $ctx:expr, $val:expr) => { + match ::jaq_interpret::FilterT::run($args.get($n), ($ctx.clone(), $val.clone())).next() { + Some(Ok(Val::Str(v))) => Ok(v.to_string()), + Some(Ok(val)) => Err(Error::str(format!("expected string but got {val:?}"))), + Some(Err(e)) => Err(e), + None => Err(Error::str("value expected but none found")), + } + }; +} +pub(crate) use string_arg; + +macro_rules! int_arg { + ($args:expr, $n:expr, $ctx:expr, $val:expr) => { + match ::jaq_interpret::FilterT::run($args.get($n), ($ctx.clone(), $val.clone())).next() { + Some(Ok(Val::Int(v))) => Ok(v as _), + Some(Ok(val)) => Err(Error::str(format!("expected int but got {val:?}"))), + Some(Err(e)) => Err(e), + None => Err(Error::str("value expected but none found")), + } + }; +} +pub(crate) use int_arg; diff --git a/crates/cli/src/filterer/proglib/output.rs b/crates/cli/src/filterer/proglib/output.rs new file mode 100644 index 0000000..f21f8a2 --- /dev/null +++ b/crates/cli/src/filterer/proglib/output.rs @@ -0,0 +1,83 @@ +use std::iter::once; + +use jaq_interpret::{Error, Native, ParseCtx, Val}; +use tracing::{debug, error, info, trace, warn}; + +use super::macros::*; + +macro_rules! log_action { + ($level:expr, $val:expr) => { + match $level.to_ascii_lowercase().as_str() { + "trace" => trace!("jaq: {}", $val), + "debug" => debug!("jaq: {}", $val), + "info" => info!("jaq: {}", $val), + "warn" => warn!("jaq: {}", $val), + "error" => error!("jaq: {}", $val), + _ => return_err!(Err(Error::str("invalid log level"))), + } + }; +} + +pub fn load(jaq: &mut ParseCtx) { + trace!("jaq: add log filter"); + jaq.insert_native( + "log".into(), + 1, + Native::with_update( + |args, (ctx, val)| { + let level = match string_arg!(args, 0, ctx, val) { + Ok(v) => v, + Err(e) => return_err!(Err(e)), + }; + + log_action!(level, val); + + // passthrough + Box::new(once(Ok(val))) + }, + |args, (ctx, val), _| { + let level = match string_arg!(args, 0, ctx, val) { + Ok(v) => v, + Err(e) => return_err!(Err(e)), + }; + + log_action!(level, val); + + // passthrough + Box::new(once(Ok(val))) + }, + ), + ); + + trace!("jaq: add printout filter"); + jaq.insert_native( + "printout".into(), + 0, + Native::with_update( + |_, (_, val)| { + println!("{}", val); + Box::new(once(Ok(val))) + }, + |_, (_, val), _| { + println!("{}", val); + Box::new(once(Ok(val))) + }, + ), + ); + + trace!("jaq: add printerr filter"); + jaq.insert_native( + "printerr".into(), + 0, + Native::with_update( + |_, (_, val)| { + eprintln!("{}", val); + Box::new(once(Ok(val))) + }, + |_, (_, val), _| { + eprintln!("{}", val); + Box::new(once(Ok(val))) + }, + ), + ); +} diff --git a/crates/cli/src/filterer/progs.rs b/crates/cli/src/filterer/progs.rs new file mode 100644 index 0000000..73fa813 --- /dev/null +++ b/crates/cli/src/filterer/progs.rs @@ -0,0 +1,143 @@ +use std::{iter::empty, marker::PhantomData}; + +use jaq_interpret::{Ctx, FilterT, RcIter, Val}; +use miette::miette; +use tokio::{ + sync::{mpsc, oneshot}, + task::{block_in_place, spawn_blocking}, +}; +use tracing::{error, trace, warn}; +use watchexec::error::RuntimeError; +use watchexec_events::Event; + +use crate::args::Args; + +const BUFFER: usize = 128; + +#[derive(Debug)] +pub struct FilterProgs { + channel: Requester, +} + +#[derive(Debug, Clone)] +pub struct Requester { + sender: mpsc::Sender<(S, oneshot::Sender)>, + _receiver: PhantomData, +} + +impl Requester +where + S: Send + Sync, + R: Send + Sync, +{ + pub fn new(capacity: usize) -> (Self, mpsc::Receiver<(S, oneshot::Sender)>) { + let (sender, receiver) = mpsc::channel(capacity); + ( + Self { + sender, + _receiver: PhantomData, + }, + receiver, + ) + } + + pub fn call(&self, value: S) -> Result { + // FIXME: this should really be async with a timeout, but that needs filtering in general + // to be async, which should be done at some point + block_in_place(|| { + let (sender, receiver) = oneshot::channel(); + self.sender.blocking_send((value, sender)).map_err(|err| { + RuntimeError::External(miette!("filter progs internal channel: {}", err).into()) + })?; + receiver + .blocking_recv() + .map_err(|err| RuntimeError::External(Box::new(err))) + }) + } +} + +impl FilterProgs { + pub fn check(&self, event: &Event) -> Result { + self.channel.call(event.clone()) + } + + pub fn new(args: &Args) -> miette::Result { + let progs = args.filter_programs_parsed.clone(); + eprintln!( + "EXPERIMENTAL: filter programs are unstable and may change/vanish without notice" + ); + + let (requester, mut receiver) = Requester::::new(BUFFER); + let task = + spawn_blocking(move || { + 'chan: while let Some((event, sender)) = receiver.blocking_recv() { + let val = serde_json::to_value(&event) + .map_err(|err| miette!("failed to serialize event: {}", err)) + .map(Val::from)?; + + for (n, prog) in progs.iter().enumerate() { + trace!(?n, "trying filter program"); + let mut jaq = super::proglib::jaq_lib()?; + let filter = jaq.compile(prog.clone()); + if !jaq.errs.is_empty() { + for (error, span) in jaq.errs { + error!(%error, "failed to compile filter program #{n}@{}:{}", span.start, span.end); + } + continue; + } + + let inputs = RcIter::new(empty()); + let mut results = filter.run((Ctx::new([], &inputs), val.clone())); + if let Some(res) = results.next() { + match res { + Ok(Val::Bool(false)) => { + trace!( + ?n, + verdict = false, + "filter program finished; fail so stopping there" + ); + sender + .send(false) + .unwrap_or_else(|_| warn!("failed to send filter result")); + continue 'chan; + } + Ok(Val::Bool(true)) => { + trace!( + ?n, + verdict = true, + "filter program finished; pass so trying next" + ); + continue; + } + Ok(val) => { + error!(?n, ?val, "filter program returned non-boolean, ignoring and trying next"); + continue; + } + Err(err) => { + error!(?n, error=%err, "filter program failed, so trying next"); + continue; + } + } + } + } + + trace!("all filters failed, sending pass as default"); + sender + .send(true) + .unwrap_or_else(|_| warn!("failed to send filter result")); + } + + Ok(()) as miette::Result<()> + }); + + tokio::spawn(async { + match task.await { + Ok(Ok(())) => {} + Ok(Err(err)) => error!("filter progs task failed: {}", err), + Err(err) => error!("filter progs task panicked: {}", err), + } + }); + + Ok(Self { channel: requester }) + } +} diff --git a/crates/cli/src/filterer/syncval.rs b/crates/cli/src/filterer/syncval.rs new file mode 100644 index 0000000..7836891 --- /dev/null +++ b/crates/cli/src/filterer/syncval.rs @@ -0,0 +1,71 @@ +/// Jaq's [Val](jaq_interpret::Val) uses Rc, but we want to use in Sync contexts. UGH! +use std::{rc::Rc, sync::Arc}; + +use indexmap::IndexMap; +use jaq_interpret::Val; + +#[derive(Clone, Debug)] +pub enum SyncVal { + Null, + Bool(bool), + Int(isize), + Float(f64), + Num(Arc), + Str(Arc), + Arr(Arc<[SyncVal]>), + Obj(Arc, SyncVal>>), +} + +impl From<&Val> for SyncVal { + fn from(val: &Val) -> Self { + match val { + Val::Null => Self::Null, + Val::Bool(b) => Self::Bool(*b), + Val::Int(i) => Self::Int(*i), + Val::Float(f) => Self::Float(*f), + Val::Num(s) => Self::Num(s.to_string().into()), + Val::Str(s) => Self::Str(s.to_string().into()), + Val::Arr(a) => Self::Arr({ + let mut arr = Vec::with_capacity(a.len()); + for v in a.iter() { + arr.push(v.into()); + } + arr.into() + }), + Val::Obj(m) => Self::Obj(Arc::new({ + let mut map = IndexMap::new(); + for (k, v) in m.iter() { + map.insert(k.to_string().into(), v.into()); + } + map + })), + } + } +} + +impl From<&SyncVal> for Val { + fn from(val: &SyncVal) -> Self { + match val { + SyncVal::Null => Self::Null, + SyncVal::Bool(b) => Self::Bool(*b), + SyncVal::Int(i) => Self::Int(*i), + SyncVal::Float(f) => Self::Float(*f), + SyncVal::Num(s) => Self::Num(s.to_string().into()), + SyncVal::Str(s) => Self::Str(s.to_string().into()), + SyncVal::Arr(a) => Self::Arr({ + let mut arr = Vec::with_capacity(a.len()); + for v in a.iter() { + arr.push(v.into()); + } + arr.into() + }), + SyncVal::Obj(m) => Self::Obj(Rc::new({ + let mut map: IndexMap<_, _, ahash::RandomState> = Default::default(); + for (k, v) in m.iter() { + map.insert(k.to_string().into(), v.into()); + } + map + })), + } + } +} diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index f328e6f..42bda7e 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -14,6 +14,8 @@ use tracing::{debug, info, warn}; use watchexec::Watchexec; use watchexec_events::{Event, Priority}; +use crate::filterer::WatchexecFilterer; + pub mod args; mod config; mod emits; @@ -44,7 +46,7 @@ async fn init() -> Result { } } - let args = args::get_args(); + let args = args::get_args().await?; let verbosity = args.verbose.unwrap_or(0); if log_on { @@ -101,7 +103,7 @@ async fn run_watchexec(args: Args) -> Result<()> { let state = state::State::new()?; let config = config::make_config(&args, &state)?; - config.filterer(filterer::globset(&args).await?); + config.filterer(WatchexecFilterer::new(&args).await?); info!("initialising Watchexec runtime"); let wx = Watchexec::with_config(config)?; diff --git a/crates/filterer/globset/Cargo.toml b/crates/filterer/globset/Cargo.toml index d0f920b..b5be354 100644 --- a/crates/filterer/globset/Cargo.toml +++ b/crates/filterer/globset/Cargo.toml @@ -38,10 +38,6 @@ path = "../ignore" [dev-dependencies] tracing-subscriber = "0.3.6" -[dev-dependencies.project-origins] -version = "1.3.0" -path = "../../project-origins" - [dev-dependencies.tokio] version = "1.33.0" features = [ diff --git a/crates/filterer/globset/tests/helpers/mod.rs b/crates/filterer/globset/tests/helpers/mod.rs index 23f9210..bfd43d7 100644 --- a/crates/filterer/globset/tests/helpers/mod.rs +++ b/crates/filterer/globset/tests/helpers/mod.rs @@ -3,8 +3,6 @@ use std::{ path::{Path, PathBuf}, }; -use ignore_files::IgnoreFile; -use project_origins::ProjectType; use watchexec::{error::RuntimeError, filter::Filterer}; use watchexec_events::{Event, FileType, Priority, Tag}; use watchexec_filterer_globset::GlobsetFilterer; @@ -12,7 +10,6 @@ use watchexec_filterer_ignore::IgnoreFilterer; pub mod globset { pub use super::globset_filt as filt; - pub use super::Applies; pub use super::PathHarness; pub use watchexec_events::Priority; } @@ -119,21 +116,3 @@ pub async fn globset_filt( .await .expect("making filterer") } - -pub trait Applies { - fn applies_in(self, origin: &str) -> Self; - fn applies_to(self, project_type: ProjectType) -> Self; -} - -impl Applies for IgnoreFile { - fn applies_in(mut self, origin: &str) -> Self { - let origin = std::fs::canonicalize(".").unwrap().join(origin); - self.applies_in = Some(origin); - self - } - - fn applies_to(mut self, project_type: ProjectType) -> Self { - self.applies_to = Some(project_type); - self - } -} diff --git a/crates/filterer/ignore/tests/helpers/mod.rs b/crates/filterer/ignore/tests/helpers/mod.rs index 7bd5e37..d5df2b0 100644 --- a/crates/filterer/ignore/tests/helpers/mod.rs +++ b/crates/filterer/ignore/tests/helpers/mod.rs @@ -14,7 +14,6 @@ pub mod ignore { pub use super::ignore_filt as filt; pub use super::Applies; pub use super::PathHarness; - pub use watchexec_events::Priority; } pub trait PathHarness: Filterer { diff --git a/crates/lib/Cargo.toml b/crates/lib/Cargo.toml index c8993a6..2395c71 100644 --- a/crates/lib/Cargo.toml +++ b/crates/lib/Cargo.toml @@ -2,7 +2,7 @@ name = "watchexec" version = "3.0.1" -authors = ["Matt Green ", "Félix Saparelli "] +authors = ["Félix Saparelli ", "Matt Green "] license = "Apache-2.0" description = "Library to execute commands in response to file modifications" keywords = ["watcher", "filesystem", "watchexec"] diff --git a/crates/supervisor/src/job/job.rs b/crates/supervisor/src/job/job.rs index 52ffa4e..08acdda 100644 --- a/crates/supervisor/src/job/job.rs +++ b/crates/supervisor/src/job/job.rs @@ -1,3 +1,5 @@ +#![allow(clippy::must_use_candidate)] // Ticket-returning methods are supposed to be used without awaiting + use std::{future::Future, sync::Arc, time::Duration}; use process_wrap::tokio::TokioCommandWrap; diff --git a/doc/watchexec.1 b/doc/watchexec.1 index 16676ef..4a204e4 100644 --- a/doc/watchexec.1 +++ b/doc/watchexec.1 @@ -4,7 +4,7 @@ .SH NAME watchexec \- Execute commands when watched files change .SH SYNOPSIS -\fBwatchexec\fR [\fB\-w\fR|\fB\-\-watch\fR] [\fB\-c\fR|\fB\-\-clear\fR] [\fB\-o\fR|\fB\-\-on\-busy\-update\fR] [\fB\-r\fR|\fB\-\-restart\fR] [\fB\-s\fR|\fB\-\-signal\fR] [\fB\-\-stop\-signal\fR] [\fB\-\-stop\-timeout\fR] [\fB\-\-map\-signal\fR] [\fB\-d\fR|\fB\-\-debounce\fR] [\fB\-\-stdin\-quit\fR] [\fB\-\-no\-vcs\-ignore\fR] [\fB\-\-no\-project\-ignore\fR] [\fB\-\-no\-global\-ignore\fR] [\fB\-\-no\-default\-ignore\fR] [\fB\-\-no\-discover\-ignore\fR] [\fB\-\-ignore\-nothing\fR] [\fB\-p\fR|\fB\-\-postpone\fR] [\fB\-\-delay\-run\fR] [\fB\-\-poll\fR] [\fB\-\-shell\fR] [\fB\-n \fR] [\fB\-\-no\-environment\fR] [\fB\-\-emit\-events\-to\fR] [\fB\-\-only\-emit\-events\fR] [\fB\-E\fR|\fB\-\-env\fR] [\fB\-\-no\-process\-group\fR] [\fB\-N\fR|\fB\-\-notify\fR] [\fB\-\-color\fR] [\fB\-\-timings\fR] [\fB\-q\fR|\fB\-\-quiet\fR] [\fB\-\-bell\fR] [\fB\-\-project\-origin\fR] [\fB\-\-workdir\fR] [\fB\-e\fR|\fB\-\-exts\fR] [\fB\-f\fR|\fB\-\-filter\fR] [\fB\-\-filter\-file\fR] [\fB\-i\fR|\fB\-\-ignore\fR] [\fB\-\-ignore\-file\fR] [\fB\-\-fs\-events\fR] [\fB\-\-no\-meta\fR] [\fB\-\-print\-events\fR] [\fB\-v\fR|\fB\-\-verbose\fR]... [\fB\-\-log\-file\fR] [\fB\-\-manual\fR] [\fB\-\-completions\fR] [\fB\-h\fR|\fB\-\-help\fR] [\fB\-V\fR|\fB\-\-version\fR] [\fICOMMAND\fR] +\fBwatchexec\fR [\fB\-w\fR|\fB\-\-watch\fR] [\fB\-c\fR|\fB\-\-clear\fR] [\fB\-o\fR|\fB\-\-on\-busy\-update\fR] [\fB\-r\fR|\fB\-\-restart\fR] [\fB\-s\fR|\fB\-\-signal\fR] [\fB\-\-stop\-signal\fR] [\fB\-\-stop\-timeout\fR] [\fB\-\-map\-signal\fR] [\fB\-d\fR|\fB\-\-debounce\fR] [\fB\-\-stdin\-quit\fR] [\fB\-\-no\-vcs\-ignore\fR] [\fB\-\-no\-project\-ignore\fR] [\fB\-\-no\-global\-ignore\fR] [\fB\-\-no\-default\-ignore\fR] [\fB\-\-no\-discover\-ignore\fR] [\fB\-\-ignore\-nothing\fR] [\fB\-p\fR|\fB\-\-postpone\fR] [\fB\-\-delay\-run\fR] [\fB\-\-poll\fR] [\fB\-\-shell\fR] [\fB\-n \fR] [\fB\-\-no\-environment\fR] [\fB\-\-emit\-events\-to\fR] [\fB\-\-only\-emit\-events\fR] [\fB\-E\fR|\fB\-\-env\fR] [\fB\-\-no\-process\-group\fR] [\fB\-N\fR|\fB\-\-notify\fR] [\fB\-\-color\fR] [\fB\-\-timings\fR] [\fB\-q\fR|\fB\-\-quiet\fR] [\fB\-\-bell\fR] [\fB\-\-project\-origin\fR] [\fB\-\-workdir\fR] [\fB\-e\fR|\fB\-\-exts\fR] [\fB\-f\fR|\fB\-\-filter\fR] [\fB\-\-filter\-file\fR] [\fB\-j\fR|\fB\-\-filter\-prog\fR] [\fB\-i\fR|\fB\-\-ignore\fR] [\fB\-\-ignore\-file\fR] [\fB\-\-fs\-events\fR] [\fB\-\-no\-meta\fR] [\fB\-\-print\-events\fR] [\fB\-v\fR|\fB\-\-verbose\fR]... [\fB\-\-log\-file\fR] [\fB\-\-manual\fR] [\fB\-\-completions\fR] [\fB\-h\fR|\fB\-\-help\fR] [\fB\-V\fR|\fB\-\-version\fR] [\fICOMMAND\fR] .SH DESCRIPTION Execute commands when watched files change. .PP @@ -432,6 +432,53 @@ Provide a path to a file containing filters, one per line. Empty lines and lines This can also be used via the $WATCHEXEC_FILTER_FILES environment variable. .TP +\fB\-j\fR, \fB\-\-filter\-prog\fR=\fIEXPRESSION\fR +[experimental] Filter programs. + +/!\\ This option is EXPERIMENTAL and may change and/or vanish without notice. + +Provide your own custom filter programs in jaq (similar to jq) syntax. Programs are given an event in the same format as described in \*(Aq\-\-emit\-events\-to\*(Aq and must return a boolean. + +In addition to the jaq stdlib, watchexec adds some custom filter definitions: + +\- \*(Aqpath | file_meta\*(Aq returns file metadata or null if the file does not exist. + +\- \*(Aqpath | file_size\*(Aq returns the size of the file at path, or null if it does not exist. + +\- \*(Aqpath | file_read(bytes)\*(Aq returns a string with the first n bytes of the file at path. If the file is smaller than n bytes, the whole file is returned. There is no filter to read the whole file at once to encourage limiting the amount of data read and processed. + +\- \*(Aqstring | hash\*(Aq, and \*(Aqpath | file_hash\*(Aq return the hash of the string or file at path. No guarantee is made about the algorithm used: treat it as an opaque value. + +\- \*(Aqany | kv_store(key)\*(Aq, \*(Aqkv_fetch(key)\*(Aq, and \*(Aqkv_clear\*(Aq provide a simple key\-value store. Data is kept in memory only, there is no persistence. Consistency is not guaranteed. + +\- \*(Aqany | printout\*(Aq, \*(Aqany | printerr\*(Aq, and \*(Aqany | log(level)\*(Aq will print or log any given value to stdout, stderr, or the log (levels = error, warn, info, debug, trace), and pass the value through (so \*(Aq[1] | log("debug") | .[]\*(Aq will produce a \*(Aq1\*(Aq and log \*(Aq[1]\*(Aq). + +All filtering done with such programs, and especially those using kv or filesystem access, is much slower than the other filtering methods. If filtering is too slow, events will back up and stall watchexec. Take care when designing your filters. + +If the argument to this option starts with an \*(Aq@\*(Aq, the rest of the argument is taken to be the path to a file containing a jaq program. + +Jaq programs are run in order, after all other filters, and short\-circuit: if a filter (jaq or not) rejects an event, execution stops there, and no other filters are run. Additionally, they stop after outputting the first value, so you\*(Aqll want to use \*(Aqany\*(Aq or \*(Aqall\*(Aq when iterating, otherwise only the first item will be processed, which can be quite confusing! + +Find user\-contributed programs or submit your own useful ones at . + +## Examples: + +Regexp ignore filter on paths: + +\*(Aqall(.tags[] | select(.kind == "path"); .absolute | test("[.]test[.]js$")) | not\*(Aq + +Pass any event that creates a file: + +\*(Aqany(.tags[] | select(.kind == "fs"); .simple == "create")\*(Aq + +Pass events that touch executable files: + +\*(Aqany(.tags[] | select(.kind == "path" && .filetype == "file"); .absolute | metadata | .executable)\*(Aq + +Ignore files that start with shebangs: + +\*(Aqany(.tags[] | select(.kind == "path" && .filetype == "file"); .absolute | read(2) == "#!") | not\*(Aq +.TP \fB\-i\fR, \fB\-\-ignore\fR=\fIPATTERN\fR Filename patterns to filter out diff --git a/doc/watchexec.1.md b/doc/watchexec.1.md index 1e24589..b8b1853 100644 --- a/doc/watchexec.1.md +++ b/doc/watchexec.1.md @@ -18,8 +18,9 @@ watchexec - Execute commands when watched files change \[**-N**\|**\--notify**\] \[**\--color**\] \[**\--timings**\] \[**-q**\|**\--quiet**\] \[**\--bell**\] \[**\--project-origin**\] \[**\--workdir**\] \[**-e**\|**\--exts**\] \[**-f**\|**\--filter**\] -\[**\--filter-file**\] \[**-i**\|**\--ignore**\] \[**\--ignore-file**\] -\[**\--fs-events**\] \[**\--no-meta**\] \[**\--print-events**\] +\[**\--filter-file**\] \[**-j**\|**\--filter-prog**\] +\[**-i**\|**\--ignore**\] \[**\--ignore-file**\] \[**\--fs-events**\] +\[**\--no-meta**\] \[**\--print-events**\] \[**-v**\|**\--verbose**\]\... \[**\--log-file**\] \[**\--manual**\] \[**\--completions**\] \[**-h**\|**\--help**\] \[**-V**\|**\--version**\] \[*COMMAND*\] @@ -612,6 +613,83 @@ the \--filter option. This can also be used via the \$WATCHEXEC_FILTER_FILES environment variable. +**-j**, **\--filter-prog**=*EXPRESSION* + +: \[experimental\] Filter programs. + +/!\\ This option is EXPERIMENTAL and may change and/or vanish without +notice. + +Provide your own custom filter programs in jaq (similar to jq) syntax. +Programs are given an event in the same format as described in +\--emit-events-to and must return a boolean. + +In addition to the jaq stdlib, watchexec adds some custom filter +definitions: + +\- path \| file_meta returns file metadata or null if the file does not +exist. + +\- path \| file_size returns the size of the file at path, or null if it +does not exist. + +\- path \| file_read(bytes) returns a string with the first n bytes of +the file at path. If the file is smaller than n bytes, the whole file is +returned. There is no filter to read the whole file at once to encourage +limiting the amount of data read and processed. + +\- string \| hash, and path \| file_hash return the hash of the string +or file at path. No guarantee is made about the algorithm used: treat it +as an opaque value. + +\- any \| kv_store(key), kv_fetch(key), and kv_clear provide a simple +key-value store. Data is kept in memory only, there is no persistence. +Consistency is not guaranteed. + +\- any \| printout, any \| printerr, and any \| log(level) will print or +log any given value to stdout, stderr, or the log (levels = error, warn, +info, debug, trace), and pass the value through (so \[1\] \| +log(\"debug\") \| .\[\] will produce a 1 and log \[1\]). + +All filtering done with such programs, and especially those using kv or +filesystem access, is much slower than the other filtering methods. If +filtering is too slow, events will back up and stall watchexec. Take +care when designing your filters. + +If the argument to this option starts with an @, the rest of the +argument is taken to be the path to a file containing a jaq program. + +Jaq programs are run in order, after all other filters, and +short-circuit: if a filter (jaq or not) rejects an event, execution +stops there, and no other filters are run. Additionally, they stop after +outputting the first value, so youll want to use any or all when +iterating, otherwise only the first item will be processed, which can be +quite confusing! + +Find user-contributed programs or submit your own useful ones at +\. + +\## Examples: + +Regexp ignore filter on paths: + +all(.tags\[\] \| select(.kind == \"path\"); .absolute \| +test(\"\[.\]test\[.\]js\$\")) \| not + +Pass any event that creates a file: + +any(.tags\[\] \| select(.kind == \"fs\"); .simple == \"create\") + +Pass events that touch executable files: + +any(.tags\[\] \| select(.kind == \"path\" && .filetype == \"file\"); +.absolute \| metadata \| .executable) + +Ignore files that start with shebangs: + +any(.tags\[\] \| select(.kind == \"path\" && .filetype == \"file\"); +.absolute \| read(2) == \"#!\") \| not + **-i**, **\--ignore**=*PATTERN* : Filename patterns to filter out