From e5760f09d55172c6e8164f63292d117fc44ec254 Mon Sep 17 00:00:00 2001 From: cyqsimon <28627918+cyqsimon@users.noreply.github.com> Date: Thu, 2 Mar 2023 16:09:08 +0800 Subject: [PATCH 1/3] Initial implementation --- contrib/completion/_fd | 1 + src/cli.rs | 19 +++++++++-- src/config.rs | 20 ++++++++++++ src/main.rs | 74 +++++++++++++++++++++++++++++++++++++++--- src/walk.rs | 37 ++++++++++++++------- 5 files changed, 133 insertions(+), 18 deletions(-) diff --git a/contrib/completion/_fd b/contrib/completion/_fd index 53c0a7c..58a8a12 100644 --- a/contrib/completion/_fd +++ b/contrib/completion/_fd @@ -110,6 +110,7 @@ _fd() { '*'{-t+,--type=}"[filter search by type]:type:(($fd_types))" '*'{-e+,--extension=}'[filter search by file extension]:extension' '*'{-E+,--exclude=}'[exclude files/directories that match the given glob pattern]:glob pattern' + '*--exclude-absolute=[exclude files/directories whose absolute path match the given glob pattern]:glob pattern' '*'{-S+,--size=}'[limit search by file size]:size limit:->size' '(-o --owner)'{-o+,--owner=}'[filter by owning user and/or group]:owner and/or group:->owner' diff --git a/src/cli.rs b/src/cli.rs index bb00a1c..bd36d92 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -46,7 +46,7 @@ pub struct Opts { no_hidden: (), /// Show search results from files and directories that would otherwise be - /// ignored by '.gitignore', '.ignore', '.fdignore', or the global ignore file. + /// ignored by '.gitignore', '.ignore', '.fdignore', or the global ignore files. /// The flag can be overridden with --ignore. #[arg( long, @@ -106,7 +106,7 @@ pub struct Opts { )] pub no_ignore_parent: bool, - /// Do not respect the global ignore file + /// Do not respect the global ignore files #[arg(long, hide = true)] pub no_global_ignore_file: bool, @@ -300,6 +300,21 @@ pub struct Opts { )] pub exclude: Vec, + /// Exclude files/directories whose absolute path match the given glob pattern. + /// This filter is applied on top of all other ignore logic. Multiple exclude patterns + /// can be specified. + /// + /// Note that using this filter causes fd to perform an extra canonicalization + /// for every path traversed, which incurs a non-trivial performance penalty. + /// Use at your own discretion. + #[arg( + long, + value_name = "pattern", + help = "Exclude entries whose absolute path match the given glob pattern", + long_help + )] + pub exclude_absolute: Vec, + /// Do not traverse into directories that match the search criteria. If /// you want to exclude specific directories, use the '--exclude=…' option. #[arg(long, hide_short_help = true, conflicts_with_all(&["size", "exact_depth"]), diff --git a/src/config.rs b/src/config.rs index 75b4c2b..34a7dd2 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,5 +1,6 @@ use std::{path::PathBuf, sync::Arc, time::Duration}; +use globset::GlobMatcher; use lscolors::LsColors; use regex::bytes::RegexSet; @@ -95,6 +96,9 @@ pub struct Config { /// A list of glob patterns that should be excluded from the search. pub exclude_patterns: Vec, + /// A list of glob matchers that should exclude matched entries by their absolute paths. + pub exclude_absolute_matchers: Vec, + /// A list of custom ignore files. pub ignore_files: Vec, @@ -130,3 +134,19 @@ impl Config { self.command.is_none() } } + +/// Get the platform-specific config directory for fd. +pub fn get_fd_config_dir() -> Option { + #[cfg(target_os = "macos")] + let mut dir = std::env::var_os("XDG_CONFIG_HOME") + .map(PathBuf::from) + .filter(|p| p.is_absolute()) + .or_else(|| dirs_next::home_dir().map(|d| d.join(".config")))?; + + #[cfg(not(target_os = "macos"))] + let mut dir = dirs_next::config_dir()?; + + dir.push("fd"); + + Some(dir) +} diff --git a/src/main.rs b/src/main.rs index 567058c..985edf7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -19,12 +19,13 @@ use std::time; use anyhow::{anyhow, bail, Context, Result}; use atty::Stream; use clap::{CommandFactory, Parser}; -use globset::GlobBuilder; +use globset::{Glob, GlobBuilder, GlobMatcher}; use lscolors::LsColors; use regex::bytes::{Regex, RegexBuilder, RegexSetBuilder}; use crate::cli::{ColorWhen, Opts}; -use crate::config::Config; +use crate::config::{get_fd_config_dir, Config}; +use crate::error::print_error; use crate::exec::CommandSet; use crate::exit_codes::ExitCode; use crate::filetypes::FileTypes; @@ -233,6 +234,29 @@ fn construct_config(mut opts: Opts, pattern_regexps: &[String]) -> Result matchers.append(&mut v), + Err(err) => print_error(format!("Cannot read global ignore-absolute file. {err}.")), + } + } + + dbg!(&matchers); + matchers + }; + Ok(Config { case_sensitive, search_full_path: opts.full_path, @@ -241,9 +265,7 @@ fn construct_config(mut opts: Opts, pattern_regexps: &[String]) -> Result Result Result Result> { + let file_content = match get_fd_config_dir() + .map(|p| p.join("ignore-absolute")) + .filter(|p| p.is_file()) + { + Some(path) => std::fs::read_to_string(path)?, + // not an error if the file doesn't exist + None => return Ok(vec![]), + }; + + let matchers = file_content + .lines() + // trim trailing spaces, unless escaped with backslash (`\`) + .map(|raw| { + let naive_trimmed = raw.trim_end_matches(' '); + if raw.len() == naive_trimmed.len() { + raw + } else if naive_trimmed.ends_with('\\') { + &raw[..naive_trimmed.len() + 1] + } else { + naive_trimmed + } + }) + // skip empty lines and comments + .filter(|line| !line.is_empty() && !line.starts_with('#')) + // build matchers + .filter_map(|line| match Glob::new(line) { + Ok(glob) => Some(glob.compile_matcher()), + // invalid globs from config file are warnings + Err(err) => { + print_error(format!( + "Malformed pattern in global ignore-absolute file. {err}." + )); + None + } + }) + .collect(); + + Ok(matchers) +} diff --git a/src/walk.rs b/src/walk.rs index 2b03fa9..95cca28 100644 --- a/src/walk.rs +++ b/src/walk.rs @@ -14,6 +14,7 @@ use ignore::overrides::OverrideBuilder; use ignore::{self, WalkBuilder}; use regex::bytes::Regex; +use crate::config::get_fd_config_dir; use crate::config::Config; use crate::dir_entry::DirEntry; use crate::error::print_error; @@ -89,17 +90,8 @@ pub fn scan(paths: &[PathBuf], patterns: Arc>, config: Arc) - } if config.read_global_ignore { - #[cfg(target_os = "macos")] - let config_dir_op = std::env::var_os("XDG_CONFIG_HOME") - .map(PathBuf::from) - .filter(|p| p.is_absolute()) - .or_else(|| dirs_next::home_dir().map(|d| d.join(".config"))); - - #[cfg(not(target_os = "macos"))] - let config_dir_op = dirs_next::config_dir(); - - if let Some(global_ignore_file) = config_dir_op - .map(|p| p.join("fd").join("ignore")) + if let Some(global_ignore_file) = get_fd_config_dir() + .map(|p| p.join("ignore")) .filter(|p| p.is_file()) { let result = walker.add_ignore(global_ignore_file); @@ -534,6 +526,29 @@ fn spawn_senders( } } + // Exclude by absolute path + // `ignore` crate does not intend to support this, so it's implemented here independently + // see https://github.com/BurntSushi/ripgrep/issues/2366 + // This is done last because canonicalisation has non-trivial cost + if !config.exclude_absolute_matchers.is_empty() { + match entry_path.canonicalize() { + Ok(path) => { + if config + .exclude_absolute_matchers + .iter() + .any(|glob| glob.is_match(&path)) + { + // emulate Git's behavior of skipping any matched directory entirely + // see https://git-scm.com/docs/gitignore#_pattern_format + return ignore::WalkState::Skip; + } + } + Err(err) => { + print_error(format!("Cannot canonicalize {entry_path:?}. {err}.")); + } + } + } + if config.is_printing() { if let Some(ls_colors) = &config.ls_colors { // Compute colors in parallel From 7d49fb9e2932e744ae78fb564cecfcfb802af17f Mon Sep 17 00:00:00 2001 From: cyqsimon <28627918+cyqsimon@users.noreply.github.com> Date: Thu, 2 Mar 2023 16:29:46 +0800 Subject: [PATCH 2/3] Cleanup `dbg!` test logging --- src/main.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index 985edf7..e8c2b55 100644 --- a/src/main.rs +++ b/src/main.rs @@ -253,7 +253,6 @@ fn construct_config(mut opts: Opts, pattern_regexps: &[String]) -> Result Date: Thu, 2 Mar 2023 17:01:11 +0800 Subject: [PATCH 3/3] Fix incorrect usage of `WalkState::Skip` with explanations --- src/walk.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/walk.rs b/src/walk.rs index 95cca28..ad8c68e 100644 --- a/src/walk.rs +++ b/src/walk.rs @@ -538,9 +538,16 @@ fn spawn_senders( .iter() .any(|glob| glob.is_match(&path)) { - // emulate Git's behavior of skipping any matched directory entirely - // see https://git-scm.com/docs/gitignore#_pattern_format - return ignore::WalkState::Skip; + // Ideally we want to return `WalkState::Skip` to emulate gitignore's + // behavior of skipping any matched directory entirely + // Unfortunately this will make the search behaviour inconsistent + // because this filter happens outside of the directory walker + // + // E.g. Given directory structure `/foo/bar/` and CWD `/`: + // - `fd --exclude-absolute '/foo'` will return nothing + // - `fd --exclude-absolute '/foo' bar` will return '/foo/bar' + // Obviously this makes no sense + return ignore::WalkState::Continue; } } Err(err) => {