Add a flag to anchor patterns in the input

Closes #1476.
This commit is contained in:
Gabriel de Perthuis 2024-01-18 14:32:32 +01:00
parent ff3fc81db4
commit 5cdf9763e4
3 changed files with 90 additions and 9 deletions

View File

@ -168,8 +168,8 @@ pub struct Opts {
pub regex: bool,
/// Treat the pattern as a literal string instead of a regular expression. Note
/// that this also performs substring comparison. If you want to match on an
/// exact filename, consider using '--glob'.
/// that the pattern would still match on a substring of the input. If you want
/// to match on an exact filename, consider adding '--anchor=input' as well.
#[arg(
long,
short = 'F',
@ -246,6 +246,20 @@ pub struct Opts {
)]
pub full_path: bool,
/// By default, the search pattern for --regex and --fixed-strings can match any part of the input.
/// (See the --full-path option for what constitutes input)
///
/// This flag allows anchoring the pattern.
///
/// Conflicts with the --glob flag: globs always match the entire input
#[arg(
long,
help = "Where to anchor the pattern",
conflicts_with("glob"),
long_help
)]
pub anchor: Option<Anchor>,
/// Separate search results by the null character (instead of newlines).
/// Useful for piping results to 'xargs'.
#[arg(
@ -680,6 +694,17 @@ impl Opts {
self.rg_alias_hidden_ignore > 0
}
pub fn anchor(&self) -> Option<Anchor> {
if self.glob {
// globset has no way to use an anchor.
// Otherwise we'd guard like this:
// && !self.no_anchor && self.anchor.is_none()
Some(Anchor::Input)
} else {
self.anchor
}
}
pub fn max_depth(&self) -> Option<usize> {
self.max_depth.or(self.exact_depth)
}
@ -725,6 +750,14 @@ fn default_num_threads() -> NonZeroUsize {
.min(limit)
}
#[derive(Copy, Clone, PartialEq, Eq, ValueEnum)]
pub enum Anchor {
InputStart,
InputEnd,
Input,
Word,
}
#[derive(Copy, Clone, PartialEq, Eq, ValueEnum)]
pub enum FileType {
#[value(alias = "f")]

View File

@ -162,15 +162,36 @@ fn ensure_search_pattern_is_not_a_path(opts: &Opts) -> Result<()> {
}
}
fn apply_anchors(re: String, anchors: Option<cli::Anchor>) -> String {
use cli::Anchor;
match anchors {
None => re,
Some(Anchor::InputStart) => "^".to_owned() + &re,
Some(Anchor::InputEnd) => re + "$",
Some(Anchor::Input) => "^".to_owned() + &re + "$",
// https://docs.rs/regex/latest/regex/#empty-matches
Some(Anchor::Word) => r"\<".to_owned() + &re + r"\>",
}
}
fn build_pattern_regex(pattern: &str, opts: &Opts) -> Result<String> {
Ok(if opts.glob && !pattern.is_empty() {
let glob = GlobBuilder::new(pattern).literal_separator(true).build()?;
glob.regex().to_owned()
} else if opts.fixed_strings {
// Treat pattern as literal string if '--fixed-strings' is used
regex::escape(pattern)
Ok(if opts.glob {
if !pattern.is_empty() {
let glob = GlobBuilder::new(pattern).literal_separator(true).build()?;
glob.regex().to_owned()
} else {
"".to_owned()
}
} else {
String::from(pattern)
apply_anchors(
if opts.fixed_strings {
// Treat pattern as literal string if '--fixed-strings' is used
regex::escape(pattern)
} else {
String::from(pattern)
},
opts.anchor(),
)
})
}

View File

@ -604,6 +604,33 @@ fn test_full_path() {
);
}
/// Anchoring (--anchor)
#[test]
fn test_anchors() {
let te = TestEnv::new(DEFAULT_DIRS, DEFAULT_FILES);
te.assert_output(&["--anchor=input", "foo"], "");
te.assert_output(&["--anchor=input", "b.foo"], "one/b.foo");
te.assert_output(&["--anchor=input-start", "foo"], "");
te.assert_output(&["--anchor=input-start", "b."], "one/b.foo");
te.assert_output(
&["--anchor=input-end", "oo"],
"a.foo
one/b.foo
one/two/c.foo
one/two/three/d.foo
one/two/three/directory_foo/",
);
te.assert_output(&["--anchor=word", "oo"], "");
te.assert_output(
&["--anchor=word", "foo"],
"a.foo
one/b.foo
one/two/c.foo
one/two/three/d.foo",
);
}
/// Hidden files (--hidden)
#[test]
fn test_hidden() {