From d9b69c84057bd271cf4c9d0fc104e9921120ccc4 Mon Sep 17 00:00:00 2001 From: Thayne McCombs Date: Wed, 3 May 2023 23:33:44 -0600 Subject: [PATCH] feat: Add a way to escape { and }in exec templates. fixes #1303 --- Cargo.lock | 1 + Cargo.toml | 1 + doc/fd.1 | 5 ++++ src/cli.rs | 6 ++-- src/exec/mod.rs | 71 +++++++++++++++++++---------------------------- src/exec/token.rs | 69 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 108 insertions(+), 45 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a9f7468..370d020 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -291,6 +291,7 @@ checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" name = "fd-find" version = "8.7.1" dependencies = [ + "aho-corasick 1.0.1", "anyhow", "argmax", "chrono", diff --git a/Cargo.toml b/Cargo.toml index ce0aebe..3f55f62 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,7 @@ path = "src/main.rs" version_check = "0.9" [dependencies] +aho-corasick = "1.0" nu-ansi-term = "0.49" argmax = "0.3.1" ignore = "0.4.20" diff --git a/doc/fd.1 b/doc/fd.1 index 7a28377..0a596ce 100644 --- a/doc/fd.1 +++ b/doc/fd.1 @@ -380,10 +380,15 @@ parent directory path without file extension .IP {/.} basename without file extension +.IP {{} +literal '{' .RE If no placeholder is present, an implicit "{}" at the end is assumed. +If you need to include the literal text of one of the placeholders, you can use "{{}" to +escape the first "{". For example "{{}}" expands to "{}", and "{{}{{}}}" expands to "{{}". + Examples: - find all *.zip files and unzip them: diff --git a/src/cli.rs b/src/cli.rs index af9bcce..e828d1f 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -807,7 +807,8 @@ impl clap::Args for Exec { '{/}': basename\n \ '{//}': parent directory\n \ '{.}': path without file extension\n \ - '{/.}': basename without file extension\n\n\ + '{/.}': basename without file extension\n \ + '{{}': literal '{' (for escaping)\n\n\ If no placeholder is present, an implicit \"{}\" at the end is assumed.\n\n\ Examples:\n\n \ - find all *.zip files and unzip them:\n\n \ @@ -837,7 +838,8 @@ impl clap::Args for Exec { '{/}': basename\n \ '{//}': parent directory\n \ '{.}': path without file extension\n \ - '{/.}': basename without file extension\n\n\ + '{/.}': basename without file extension\n \ + '{{}': literal '{' (for escaping)\n\n\ If no placeholder is present, an implicit \"{}\" at the end is assumed.\n\n\ Examples:\n\n \ - Find all test_*.py files and open them in your favorite editor:\n\n \ diff --git a/src/exec/mod.rs b/src/exec/mod.rs index 891d764..d95f5d9 100644 --- a/src/exec/mod.rs +++ b/src/exec/mod.rs @@ -9,18 +9,17 @@ use std::io; use std::iter; use std::path::{Component, Path, PathBuf, Prefix}; use std::process::Stdio; -use std::sync::{Mutex, OnceLock}; +use std::sync::Mutex; use anyhow::{bail, Result}; use argmax::Command; -use regex::Regex; use crate::exit_codes::{merge_exitcodes, ExitCode}; use self::command::{execute_commands, handle_cmd_error}; use self::input::{basename, dirname, remove_extension}; pub use self::job::{batch, job}; -use self::token::Token; +use self::token::{tokenize, Token}; /// Execution mode of the command #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -230,52 +229,15 @@ impl CommandTemplate { I: IntoIterator, S: AsRef, { - static PLACEHOLDER_PATTERN: OnceLock = OnceLock::new(); - let mut args = Vec::new(); let mut has_placeholder = false; for arg in input { let arg = arg.as_ref(); - let mut tokens = Vec::new(); - let mut start = 0; - - let pattern = - PLACEHOLDER_PATTERN.get_or_init(|| Regex::new(r"\{(/?\.?|//)\}").unwrap()); - - for placeholder in pattern.find_iter(arg) { - // Leading text before the placeholder. - if placeholder.start() > start { - tokens.push(Token::Text(arg[start..placeholder.start()].to_owned())); - } - - start = placeholder.end(); - - match placeholder.as_str() { - "{}" => tokens.push(Token::Placeholder), - "{.}" => tokens.push(Token::NoExt), - "{/}" => tokens.push(Token::Basename), - "{//}" => tokens.push(Token::Parent), - "{/.}" => tokens.push(Token::BasenameNoExt), - _ => unreachable!("Unhandled placeholder"), - } - - has_placeholder = true; - } - - // Without a placeholder, the argument is just fixed text. - if tokens.is_empty() { - args.push(ArgumentTemplate::Text(arg.to_owned())); - continue; - } - - if start < arg.len() { - // Trailing text after last placeholder. - tokens.push(Token::Text(arg[start..].to_owned())); - } - - args.push(ArgumentTemplate::Tokens(tokens)); + let tmpl = tokenize(arg); + has_placeholder |= tmpl.has_tokens(); + args.push(tmpl); } // We need to check that we have at least one argument, because if not @@ -421,6 +383,14 @@ impl ArgumentTemplate { mod tests { use super::*; + fn generate_str(template: &CommandTemplate, input: &str) -> Vec { + template + .args + .iter() + .map(|arg| arg.generate(input, None).into_string().unwrap()) + .collect() + } + #[test] fn tokens_with_placeholder() { assert_eq!( @@ -502,6 +472,21 @@ mod tests { ); } + #[test] + fn tokens_with_literal_braces() { + let template = CommandTemplate::new(vec!["{{}}", "{{", "{.}}"]).unwrap(); + assert_eq!( + generate_str(&template, "foo"), + vec!["{}", "{", "{.}", "foo"] + ); + } + + #[test] + fn tokens_with_literal_braces_and_placeholder() { + let template = CommandTemplate::new(vec!["{{{},end}"]).unwrap(); + assert_eq!(generate_str(&template, "foo"), vec!["{foo,end}"]); + } + #[test] fn tokens_multiple() { assert_eq!( diff --git a/src/exec/token.rs b/src/exec/token.rs index a295745..c4bc4af 100644 --- a/src/exec/token.rs +++ b/src/exec/token.rs @@ -1,4 +1,8 @@ +use aho_corasick::AhoCorasick; use std::fmt::{self, Display, Formatter}; +use std::sync::OnceLock; + +use super::ArgumentTemplate; /// Designates what should be written to a buffer /// @@ -27,3 +31,68 @@ impl Display for Token { Ok(()) } } + +static PLACEHOLDERS: OnceLock = OnceLock::new(); + +pub(super) fn tokenize(input: &str) -> ArgumentTemplate { + // NOTE: we assume that { and } have the same length + const BRACE_LEN: usize = '{'.len_utf8(); + let mut tokens = Vec::new(); + let mut remaining = input; + let mut buf = String::new(); + let placeholders = PLACEHOLDERS.get_or_init(|| { + AhoCorasick::new(&["{{", "}}", "{}", "{/}", "{//}", "{.}", "{/.}"]).unwrap() + }); + while let Some(m) = placeholders.find(remaining) { + match m.pattern().as_u32() { + 0 | 1 => { + // we found an escaped {{ or }}, so add + // everything up to the first char to the buffer + // then skipp the second one. + buf += &remaining[..m.start() + BRACE_LEN]; + remaining = &remaining[m.end()..]; + } + id if !remaining[m.end()..].starts_with('}') => { + buf += &remaining[..m.start()]; + if !buf.is_empty() { + tokens.push(Token::Text(std::mem::take(&mut buf))); + } + tokens.push(token_from_pattern_id(id)); + remaining = &remaining[m.end()..]; + } + _ => { + // We got a normal pattern, but the final "}" + // is escaped, so add up to that to the buffer, then + // skip the final } + buf += &remaining[..m.end()]; + remaining = &remaining[m.end() + BRACE_LEN..]; + } + } + } + // Add the rest of the string to the buffer, and add the final buffer to the tokens + if !remaining.is_empty() { + buf += remaining; + } + if tokens.is_empty() { + // No placeholders were found, so just return the text + return ArgumentTemplate::Text(buf); + } + // Add final text segment + if !buf.is_empty() { + tokens.push(Token::Text(buf)); + } + debug_assert!(!tokens.is_empty()); + ArgumentTemplate::Tokens(tokens) +} + +fn token_from_pattern_id(id: u32) -> Token { + use Token::*; + match id { + 2 => Placeholder, + 3 => Basename, + 4 => Parent, + 5 => NoExt, + 6 => BasenameNoExt, + _ => unreachable!(), + } +}