Merge pull request #1139 from Uthar/master

add support for matching multiple patterns
This commit is contained in:
David Peter 2022-11-21 21:19:08 +01:00 committed by GitHub
commit c9d3968475
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 258 additions and 22 deletions

View File

@ -2,6 +2,8 @@
## Features
- New `--and <pattern>` option to add additional patterns that must also be matched. See #315
and #1139 (@Uthar)
- Added `--changed-after` as alias for `--changed-within`, to have a name consistent with `--changed-before`.

View File

@ -144,6 +144,18 @@ pub struct Opts {
)]
pub fixed_strings: bool,
/// Additional search patterns that need to be matched
#[arg(
long = "and",
value_name = "pattern",
long_help = "Add additional required search patterns, all of which must be matched. Multiple \
additional patterns can be specified. The patterns are regular expressions, \
unless '--glob' or '--fixed-strings' is used.",
hide_short_help = true,
allow_hyphen_values = true
)]
pub exprs: Option<Vec<String>>,
/// Show absolute instead of relative paths
#[arg(
long,

View File

@ -21,7 +21,7 @@ use atty::Stream;
use clap::{CommandFactory, Parser};
use globset::GlobBuilder;
use lscolors::LsColors;
use regex::bytes::{RegexBuilder, RegexSetBuilder};
use regex::bytes::{Regex, RegexBuilder, RegexSetBuilder};
use crate::cli::{ColorWhen, Opts};
use crate::config::Config;
@ -81,12 +81,28 @@ fn run() -> Result<ExitCode> {
}
ensure_search_pattern_is_not_a_path(&opts)?;
let pattern_regex = build_pattern_regex(&opts)?;
let pattern = &opts.pattern;
let exprs = &opts.exprs;
let empty = Vec::new();
let config = construct_config(opts, &pattern_regex)?;
ensure_use_hidden_option_for_leading_dot_pattern(&config, &pattern_regex)?;
let re = build_regex(pattern_regex, &config)?;
walk::scan(&search_paths, Arc::new(re), Arc::new(config))
let pattern_regexps = exprs
.as_ref()
.unwrap_or(&empty)
.iter()
.chain([pattern])
.map(|pat| build_pattern_regex(pat, &opts))
.collect::<Result<Vec<String>>>()?;
let config = construct_config(opts, &pattern_regexps)?;
ensure_use_hidden_option_for_leading_dot_pattern(&config, &pattern_regexps)?;
let regexps = pattern_regexps
.into_iter()
.map(|pat| build_regex(pat, &config))
.collect::<Result<Vec<Regex>>>()?;
walk::scan(&search_paths, Arc::new(regexps), Arc::new(config))
}
#[cfg(feature = "completions")]
@ -145,8 +161,7 @@ fn ensure_search_pattern_is_not_a_path(opts: &Opts) -> Result<()> {
}
}
fn build_pattern_regex(opts: &Opts) -> Result<String> {
let pattern = &opts.pattern;
fn build_pattern_regex(pattern: &str, opts: &Opts) -> Result<String> {
Ok(if opts.glob && !pattern.is_empty() {
let glob = GlobBuilder::new(pattern).literal_separator(true).build()?;
glob.regex().to_owned()
@ -172,11 +187,14 @@ fn check_path_separator_length(path_separator: Option<&str>) -> Result<()> {
}
}
fn construct_config(mut opts: Opts, pattern_regex: &str) -> Result<Config> {
fn construct_config(mut opts: Opts, pattern_regexps: &[String]) -> Result<Config> {
// The search will be case-sensitive if the command line flag is set or
// if the pattern has an uppercase character (smart case).
let case_sensitive =
!opts.ignore_case && (opts.case_sensitive || pattern_has_uppercase_char(pattern_regex));
// if any of the patterns has an uppercase character (smart case).
let case_sensitive = !opts.ignore_case
&& (opts.case_sensitive
|| pattern_regexps
.iter()
.any(|pat| pattern_has_uppercase_char(pat)));
let path_separator = opts
.path_separator
@ -415,14 +433,18 @@ fn extract_time_constraints(opts: &Opts) -> Result<Vec<TimeFilter>> {
fn ensure_use_hidden_option_for_leading_dot_pattern(
config: &Config,
pattern_regex: &str,
pattern_regexps: &[String],
) -> Result<()> {
if cfg!(unix) && config.ignore_hidden && pattern_matches_strings_with_leading_dot(pattern_regex)
if cfg!(unix)
&& config.ignore_hidden
&& pattern_regexps
.iter()
.any(|pat| pattern_matches_strings_with_leading_dot(pat))
{
Err(anyhow!(
"The pattern seems to only match files with a leading dot, but hidden files are \
"The pattern(s) seems to only match files with a leading dot, but hidden files are \
filtered by default. Consider adding -H/--hidden to search hidden files as well \
or adjust your search pattern."
or adjust your search pattern(s)."
))
} else {
Ok(())

View File

@ -47,12 +47,12 @@ pub const MAX_BUFFER_LENGTH: usize = 1000;
/// Default duration until output buffering switches to streaming.
pub const DEFAULT_MAX_BUFFER_TIME: Duration = Duration::from_millis(100);
/// Recursively scan the given search path for files / pathnames matching the pattern.
/// Recursively scan the given search path for files / pathnames matching the patterns.
///
/// If the `--exec` argument was supplied, this will create a thread pool for executing
/// jobs in parallel from a given command line and the discovered paths. Otherwise, each
/// path will simply be written to standard output.
pub fn scan(paths: &[PathBuf], pattern: Arc<Regex>, config: Arc<Config>) -> Result<ExitCode> {
pub fn scan(paths: &[PathBuf], patterns: Arc<Vec<Regex>>, config: Arc<Config>) -> Result<ExitCode> {
let first_path = &paths[0];
// Channel capacity was chosen empircally to perform similarly to an unbounded channel
@ -153,7 +153,7 @@ pub fn scan(paths: &[PathBuf], pattern: Arc<Regex>, config: Arc<Config>) -> Resu
let receiver_thread = spawn_receiver(&config, &quit_flag, &interrupt_flag, rx);
// Spawn the sender threads.
spawn_senders(&config, &quit_flag, pattern, parallel_walker, tx);
spawn_senders(&config, &quit_flag, patterns, parallel_walker, tx);
// Wait for the receiver thread to print out all results.
let exit_code = receiver_thread.join().unwrap();
@ -383,13 +383,13 @@ fn spawn_receiver(
fn spawn_senders(
config: &Arc<Config>,
quit_flag: &Arc<AtomicBool>,
pattern: Arc<Regex>,
patterns: Arc<Vec<Regex>>,
parallel_walker: ignore::WalkParallel,
tx: Sender<WorkerResult>,
) {
parallel_walker.run(|| {
let config = Arc::clone(config);
let pattern = Arc::clone(&pattern);
let patterns = Arc::clone(&patterns);
let tx_thread = tx.clone();
let quit_flag = Arc::clone(quit_flag);
@ -459,7 +459,10 @@ fn spawn_senders(
}
};
if !pattern.is_match(&filesystem::osstr_to_bytes(search_str.as_ref())) {
if !patterns
.iter()
.all(|pat| pat.is_match(&filesystem::osstr_to_bytes(search_str.as_ref())))
{
return ignore::WalkState::Continue;
}

View File

@ -76,6 +76,203 @@ fn test_simple() {
);
}
static AND_EXTRA_FILES: &[&str] = &[
"a.foo",
"one/b.foo",
"one/two/c.foo",
"one/two/C.Foo2",
"one/two/three/baz-quux",
"one/two/three/Baz-Quux2",
"one/two/three/d.foo",
"fdignored.foo",
"gitignored.foo",
".hidden.foo",
"A-B.jpg",
"A-C.png",
"B-A.png",
"B-C.png",
"C-A.jpg",
"C-B.png",
"e1 e2",
];
/// AND test
#[test]
fn test_and_basic() {
let te = TestEnv::new(DEFAULT_DIRS, AND_EXTRA_FILES);
te.assert_output(
&["foo", "--and", "c"],
"one/two/C.Foo2
one/two/c.foo
one/two/three/directory_foo/",
);
te.assert_output(
&["f", "--and", "[ad]", "--and", "[_]"],
"one/two/three/directory_foo/",
);
te.assert_output(
&["f", "--and", "[ad]", "--and", "[.]"],
"a.foo
one/two/three/d.foo",
);
te.assert_output(&["Foo", "--and", "C"], "one/two/C.Foo2");
te.assert_output(&["foo", "--and", "asdasdasdsadasd"], "");
}
#[test]
fn test_and_empty_pattern() {
let te = TestEnv::new(DEFAULT_DIRS, AND_EXTRA_FILES);
te.assert_output(&["Foo", "--and", "2", "--and", ""], "one/two/C.Foo2");
}
#[test]
fn test_and_bad_pattern() {
let te = TestEnv::new(DEFAULT_DIRS, AND_EXTRA_FILES);
te.assert_failure(&["Foo", "--and", "2", "--and", "[", "--and", "C"]);
te.assert_failure(&["Foo", "--and", "[", "--and", "2", "--and", "C"]);
te.assert_failure(&["Foo", "--and", "2", "--and", "C", "--and", "["]);
te.assert_failure(&["[", "--and", "2", "--and", "C", "--and", "Foo"]);
}
#[test]
fn test_and_pattern_starts_with_dash() {
let te = TestEnv::new(DEFAULT_DIRS, AND_EXTRA_FILES);
te.assert_output(
&["baz", "--and", "quux"],
"one/two/three/Baz-Quux2
one/two/three/baz-quux",
);
te.assert_output(
&["baz", "--and", "-"],
"one/two/three/Baz-Quux2
one/two/three/baz-quux",
);
te.assert_output(
&["Quu", "--and", "x", "--and", "-"],
"one/two/three/Baz-Quux2",
);
}
#[test]
fn test_and_plus_extension() {
let te = TestEnv::new(DEFAULT_DIRS, AND_EXTRA_FILES);
te.assert_output(
&[
"A",
"--and",
"B",
"--extension",
"jpg",
"--extension",
"png",
],
"A-B.jpg
B-A.png",
);
te.assert_output(
&[
"A",
"--extension",
"jpg",
"--and",
"B",
"--extension",
"png",
],
"A-B.jpg
B-A.png",
);
}
#[test]
fn test_and_plus_type() {
let te = TestEnv::new(DEFAULT_DIRS, AND_EXTRA_FILES);
te.assert_output(
&["c", "--type", "d", "--and", "foo"],
"one/two/three/directory_foo/",
);
te.assert_output(
&["c", "--type", "f", "--and", "foo"],
"one/two/C.Foo2
one/two/c.foo",
);
}
#[test]
fn test_and_plus_glob() {
let te = TestEnv::new(DEFAULT_DIRS, AND_EXTRA_FILES);
te.assert_output(&["*foo", "--glob", "--and", "c*"], "one/two/c.foo");
}
#[test]
fn test_and_plus_fixed_strings() {
let te = TestEnv::new(DEFAULT_DIRS, AND_EXTRA_FILES);
te.assert_output(
&["foo", "--fixed-strings", "--and", "c", "--and", "."],
"one/two/c.foo
one/two/C.Foo2",
);
te.assert_output(
&["foo", "--fixed-strings", "--and", "[c]", "--and", "."],
"",
);
te.assert_output(
&["Foo", "--fixed-strings", "--and", "C", "--and", "."],
"one/two/C.Foo2",
);
}
#[test]
fn test_and_plus_ignore_case() {
let te = TestEnv::new(DEFAULT_DIRS, AND_EXTRA_FILES);
te.assert_output(
&["Foo", "--ignore-case", "--and", "C", "--and", "[.]"],
"one/two/C.Foo2
one/two/c.foo",
);
}
#[test]
fn test_and_plus_case_sensitive() {
let te = TestEnv::new(DEFAULT_DIRS, AND_EXTRA_FILES);
te.assert_output(
&["foo", "--case-sensitive", "--and", "c", "--and", "[.]"],
"one/two/c.foo",
);
}
#[test]
fn test_and_plus_full_path() {
let te = TestEnv::new(DEFAULT_DIRS, AND_EXTRA_FILES);
te.assert_output(
&["three", "--full-path", "--and", "foo", "--and", "dir"],
"one/two/three/directory_foo/",
);
te.assert_output(
&["three", "--full-path", "--and", "two", "--and", "dir"],
"one/two/three/directory_foo/",
);
}
/// Test each pattern type with an empty pattern.
#[test]
fn test_empty_pattern() {