fd/src/walk.rs

506 lines
18 KiB
Rust
Raw Normal View History

use std::borrow::Cow;
use std::ffi::OsStr;
use std::fs::{FileType, Metadata};
2019-01-01 22:52:08 +01:00
use std::io;
use std::path::{Path, PathBuf};
2018-04-13 22:46:17 +02:00
use std::process;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::mpsc::{channel, Receiver, Sender};
2018-04-13 22:46:17 +02:00
use std::sync::{Arc, Mutex};
2017-10-10 08:01:17 +02:00
use std::thread;
use std::time;
2020-04-03 21:18:54 +02:00
use anyhow::{anyhow, Result};
use ignore::overrides::OverrideBuilder;
2018-04-13 22:46:17 +02:00
use ignore::{self, WalkBuilder};
use regex::bytes::Regex;
2017-10-10 08:01:17 +02:00
2020-04-03 21:18:54 +02:00
use crate::error::print_error;
use crate::exec;
use crate::exit_codes::{merge_exitcodes, ExitCode};
use crate::filesystem;
use crate::options::Options;
use crate::output;
2017-10-10 08:01:17 +02:00
/// The receiver thread can either be buffering results or directly streaming to the console.
enum ReceiverMode {
/// Receiver is still buffering in order to sort the results, if the search finishes fast
/// enough.
Buffering,
/// Receiver is directly printing results to the output.
Streaming,
}
2018-09-30 15:01:23 +02:00
/// The Worker threads can result in a valid entry having PathBuf or an error.
pub enum WorkerResult {
Entry(PathBuf),
2018-09-30 22:56:32 +02:00
Error(ignore::Error),
2018-09-30 15:01:23 +02:00
}
2020-04-03 11:51:50 +02:00
/// Maximum size of the output buffer before flushing results to the console
pub const MAX_BUFFER_LENGTH: usize = 1000;
2017-10-14 18:04:11 +02:00
/// Recursively scan the given search path for files / pathnames matching the pattern.
///
/// If the `--exec` argument was supplied, this will create a thread pool for executing
/// jobs in parallel from a given command line and the discovered paths. Otherwise, each
/// path will simply be written to standard output.
2020-04-03 21:18:54 +02:00
pub fn scan(path_vec: &[PathBuf], pattern: Arc<Regex>, config: Arc<Options>) -> Result<ExitCode> {
Add multiple path support (#182) * Adding support for multiple paths. (panic) - Started adding multiple file support - fd panics with multiple files right now * Moved the ctrlc handler to main. - Moved the ctrlc handler to main so we can search multiple files * Tests now allow custom directory setup - TestEnv::new() now takes two arguments, the directories to create and the files to create inside those directories. * rust-fmt changes * rust-fmt changes * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Removed commented code in testenv * Refactored walk::scan to accept the path buffer vector. Using the ParallelWalker allows for multithreaded searching of multiple directories * Moved ctrlc handler back into walker, it is only called once from main now. * Moved the colored output check back to it's original place * Removing shell-escape, not sure how it got added... * Added test for `fd 'a.foo' test1` to show that a.foo is only found in the test1 and not the test2 direcotry * Removing side effect from walk::scan, `dir_vec` is no longer a mutable reference and an iterator is being used instead. * Running rustfmt to format code correctly
2017-12-06 23:52:23 +01:00
let mut path_iter = path_vec.iter();
2018-01-01 12:16:43 +01:00
let first_path_buf = path_iter
.next()
.expect("Error: Path vector can not be empty");
2017-10-10 08:01:17 +02:00
let (tx, rx) = channel();
Add multiple path support (#182) * Adding support for multiple paths. (panic) - Started adding multiple file support - fd panics with multiple files right now * Moved the ctrlc handler to main. - Moved the ctrlc handler to main so we can search multiple files * Tests now allow custom directory setup - TestEnv::new() now takes two arguments, the directories to create and the files to create inside those directories. * rust-fmt changes * rust-fmt changes * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Removed commented code in testenv * Refactored walk::scan to accept the path buffer vector. Using the ParallelWalker allows for multithreaded searching of multiple directories * Moved ctrlc handler back into walker, it is only called once from main now. * Moved the colored output check back to it's original place * Removing shell-escape, not sure how it got added... * Added test for `fd 'a.foo' test1` to show that a.foo is only found in the test1 and not the test2 direcotry * Removing side effect from walk::scan, `dir_vec` is no longer a mutable reference and an iterator is being used instead. * Running rustfmt to format code correctly
2017-12-06 23:52:23 +01:00
let mut override_builder = OverrideBuilder::new(first_path_buf.as_path());
2017-10-26 21:13:56 +02:00
for pattern in &config.exclude_patterns {
2020-04-03 21:18:54 +02:00
override_builder
.add(pattern)
.map_err(|e| anyhow!("Malformed exclude pattern: {}", e))?;
}
2020-04-03 21:18:54 +02:00
let overrides = override_builder
.build()
.map_err(|_| anyhow!("Mismatch in exclude patterns"))?;
Add multiple path support (#182) * Adding support for multiple paths. (panic) - Started adding multiple file support - fd panics with multiple files right now * Moved the ctrlc handler to main. - Moved the ctrlc handler to main so we can search multiple files * Tests now allow custom directory setup - TestEnv::new() now takes two arguments, the directories to create and the files to create inside those directories. * rust-fmt changes * rust-fmt changes * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Removed commented code in testenv * Refactored walk::scan to accept the path buffer vector. Using the ParallelWalker allows for multithreaded searching of multiple directories * Moved ctrlc handler back into walker, it is only called once from main now. * Moved the colored output check back to it's original place * Removing shell-escape, not sure how it got added... * Added test for `fd 'a.foo' test1` to show that a.foo is only found in the test1 and not the test2 direcotry * Removing side effect from walk::scan, `dir_vec` is no longer a mutable reference and an iterator is being used instead. * Running rustfmt to format code correctly
2017-12-06 23:52:23 +01:00
let mut walker = WalkBuilder::new(first_path_buf.as_path());
walker
2017-10-12 08:01:51 +02:00
.hidden(config.ignore_hidden)
2018-10-27 18:11:50 +02:00
.ignore(config.read_fdignore)
2018-02-21 21:41:52 +01:00
.parents(config.read_fdignore || config.read_vcsignore)
.git_ignore(config.read_vcsignore)
.git_global(config.read_vcsignore)
.git_exclude(config.read_vcsignore)
.overrides(overrides)
2017-10-12 08:01:51 +02:00
.follow_links(config.follow_links)
// No need to check for supported platforms, option is unavailable on unsupported ones
.same_file_system(config.one_file_system)
Add multiple path support (#182) * Adding support for multiple paths. (panic) - Started adding multiple file support - fd panics with multiple files right now * Moved the ctrlc handler to main. - Moved the ctrlc handler to main so we can search multiple files * Tests now allow custom directory setup - TestEnv::new() now takes two arguments, the directories to create and the files to create inside those directories. * rust-fmt changes * rust-fmt changes * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Removed commented code in testenv * Refactored walk::scan to accept the path buffer vector. Using the ParallelWalker allows for multithreaded searching of multiple directories * Moved ctrlc handler back into walker, it is only called once from main now. * Moved the colored output check back to it's original place * Removing shell-escape, not sure how it got added... * Added test for `fd 'a.foo' test1` to show that a.foo is only found in the test1 and not the test2 direcotry * Removing side effect from walk::scan, `dir_vec` is no longer a mutable reference and an iterator is being used instead. * Running rustfmt to format code correctly
2017-12-06 23:52:23 +01:00
.max_depth(config.max_depth);
2017-10-10 08:01:17 +02:00
2018-02-21 21:41:52 +01:00
if config.read_fdignore {
walker.add_custom_ignore_filename(".fdignore");
}
2020-04-25 21:32:17 +02:00
if config.read_global_ignore {
#[cfg(target_os = "macos")]
2020-05-19 14:01:00 +02:00
let config_dir_op = std::env::var_os("XDG_CONFIG_HOME")
2020-04-25 21:32:17 +02:00
.map(PathBuf::from)
.filter(|p| p.is_absolute())
2020-10-09 18:11:15 +02:00
.or_else(|| dirs_next::home_dir().map(|d| d.join(".config")));
2020-04-25 21:32:17 +02:00
#[cfg(not(target_os = "macos"))]
2020-10-09 18:11:15 +02:00
let config_dir_op = dirs_next::config_dir();
2020-04-25 21:32:17 +02:00
if let Some(global_ignore_file) = config_dir_op
.map(|p| p.join("fd").join("ignore"))
.filter(|p| p.is_file())
{
let result = walker.add_ignore(global_ignore_file);
match result {
Some(ignore::Error::Partial(_)) => (),
Some(err) => {
print_error(format!(
"Malformed pattern in global ignore file. {}.",
err.to_string()
));
}
None => (),
}
}
}
2018-03-26 00:15:01 +02:00
for ignore_file in &config.ignore_files {
let result = walker.add_ignore(ignore_file);
match result {
Some(ignore::Error::Partial(_)) => (),
Some(err) => {
2020-04-03 21:18:54 +02:00
print_error(format!(
"Malformed pattern in custom ignore file. {}.",
err.to_string()
));
2018-03-26 00:15:01 +02:00
}
None => (),
2018-03-26 00:15:01 +02:00
}
}
Add multiple path support (#182) * Adding support for multiple paths. (panic) - Started adding multiple file support - fd panics with multiple files right now * Moved the ctrlc handler to main. - Moved the ctrlc handler to main so we can search multiple files * Tests now allow custom directory setup - TestEnv::new() now takes two arguments, the directories to create and the files to create inside those directories. * rust-fmt changes * rust-fmt changes * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Removed commented code in testenv * Refactored walk::scan to accept the path buffer vector. Using the ParallelWalker allows for multithreaded searching of multiple directories * Moved ctrlc handler back into walker, it is only called once from main now. * Moved the colored output check back to it's original place * Removing shell-escape, not sure how it got added... * Added test for `fd 'a.foo' test1` to show that a.foo is only found in the test1 and not the test2 direcotry * Removing side effect from walk::scan, `dir_vec` is no longer a mutable reference and an iterator is being used instead. * Running rustfmt to format code correctly
2017-12-06 23:52:23 +01:00
for path_entry in path_iter {
walker.add(path_entry.as_path());
}
2019-01-26 01:16:53 +01:00
let parallel_walker = walker.threads(config.threads).build_parallel();
Add multiple path support (#182) * Adding support for multiple paths. (panic) - Started adding multiple file support - fd panics with multiple files right now * Moved the ctrlc handler to main. - Moved the ctrlc handler to main so we can search multiple files * Tests now allow custom directory setup - TestEnv::new() now takes two arguments, the directories to create and the files to create inside those directories. * rust-fmt changes * rust-fmt changes * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Removed commented code in testenv * Refactored walk::scan to accept the path buffer vector. Using the ParallelWalker allows for multithreaded searching of multiple directories * Moved ctrlc handler back into walker, it is only called once from main now. * Moved the colored output check back to it's original place * Removing shell-escape, not sure how it got added... * Added test for `fd 'a.foo' test1` to show that a.foo is only found in the test1 and not the test2 direcotry * Removing side effect from walk::scan, `dir_vec` is no longer a mutable reference and an iterator is being used instead. * Running rustfmt to format code correctly
2017-12-06 23:52:23 +01:00
let wants_to_quit = Arc::new(AtomicBool::new(false));
if config.ls_colors.is_some() && config.command.is_none() {
let wq = Arc::clone(&wants_to_quit);
2018-01-01 12:16:43 +01:00
ctrlc::set_handler(move || {
if wq.load(Ordering::Relaxed) {
// Ctrl-C has been pressed twice, exit NOW
process::exit(ExitCode::KilledBySigint.into());
} else {
wq.store(true, Ordering::Relaxed);
}
2018-09-27 23:01:38 +02:00
})
.unwrap();
}
Add multiple path support (#182) * Adding support for multiple paths. (panic) - Started adding multiple file support - fd panics with multiple files right now * Moved the ctrlc handler to main. - Moved the ctrlc handler to main so we can search multiple files * Tests now allow custom directory setup - TestEnv::new() now takes two arguments, the directories to create and the files to create inside those directories. * rust-fmt changes * rust-fmt changes * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Removed commented code in testenv * Refactored walk::scan to accept the path buffer vector. Using the ParallelWalker allows for multithreaded searching of multiple directories * Moved ctrlc handler back into walker, it is only called once from main now. * Moved the colored output check back to it's original place * Removing shell-escape, not sure how it got added... * Added test for `fd 'a.foo' test1` to show that a.foo is only found in the test1 and not the test2 direcotry * Removing side effect from walk::scan, `dir_vec` is no longer a mutable reference and an iterator is being used instead. * Running rustfmt to format code correctly
2017-12-06 23:52:23 +01:00
2017-10-10 08:01:17 +02:00
// Spawn the thread that receives all results through the channel.
let receiver_thread = spawn_receiver(&config, &wants_to_quit, rx);
// Spawn the sender threads.
spawn_senders(&config, &wants_to_quit, pattern, parallel_walker, tx);
// Wait for the receiver thread to print out all results.
let exit_code = receiver_thread.join().unwrap();
if wants_to_quit.load(Ordering::Relaxed) {
2020-04-03 21:34:59 +02:00
Ok(ExitCode::KilledBySigint)
} else {
Ok(exit_code)
}
}
fn spawn_receiver(
2020-04-03 11:39:32 +02:00
config: &Arc<Options>,
wants_to_quit: &Arc<AtomicBool>,
rx: Receiver<WorkerResult>,
) -> thread::JoinHandle<ExitCode> {
let config = Arc::clone(config);
let wants_to_quit = Arc::clone(wants_to_quit);
let show_filesystem_errors = config.show_filesystem_errors;
let threads = config.threads;
thread::spawn(move || {
2017-10-14 18:04:11 +02:00
// This will be set to `Some` if the `--exec` argument was supplied.
if let Some(ref cmd) = config.command {
if cmd.in_batch_mode() {
exec::batch(rx, cmd, show_filesystem_errors)
} else {
let shared_rx = Arc::new(Mutex::new(rx));
let out_perm = Arc::new(Mutex::new(()));
// Each spawned job will store it's thread handle in here.
let mut handles = Vec::with_capacity(threads);
for _ in 0..threads {
let rx = Arc::clone(&shared_rx);
let cmd = Arc::clone(cmd);
let out_perm = Arc::clone(&out_perm);
// Spawn a job thread that will listen for and execute inputs.
let handle =
thread::spawn(move || exec::job(rx, cmd, out_perm, show_filesystem_errors));
// Push the handle of the spawned thread into the vector for later joining.
handles.push(handle);
}
2017-10-10 08:01:17 +02:00
// Wait for all threads to exit before exiting the program.
let mut results: Vec<ExitCode> = Vec::new();
for h in handles {
results.push(h.join().unwrap());
}
2020-04-03 10:48:27 +02:00
merge_exitcodes(&results)
2017-10-14 20:04:04 +02:00
}
2017-10-14 18:04:11 +02:00
} else {
let start = time::Instant::now();
let mut buffer = vec![];
// Start in buffering mode
let mut mode = ReceiverMode::Buffering;
// Maximum time to wait before we start streaming to the console.
let max_buffer_time = config
2018-01-01 12:16:43 +01:00
.max_buffer_time
.unwrap_or_else(|| time::Duration::from_millis(100));
2017-10-10 08:01:17 +02:00
2019-01-01 22:52:08 +01:00
let stdout = io::stdout();
let mut stdout = stdout.lock();
let mut num_results = 0;
2018-09-30 15:01:23 +02:00
for worker_result in rx {
2020-02-28 17:46:49 +01:00
match worker_result {
WorkerResult::Entry(value) => {
2018-09-30 15:01:23 +02:00
match mode {
ReceiverMode::Buffering => {
buffer.push(value);
// Have we reached the maximum buffer size or maximum buffering time?
if buffer.len() > MAX_BUFFER_LENGTH
|| time::Instant::now() - start > max_buffer_time
{
// Flush the buffer
for v in &buffer {
2019-01-01 22:52:08 +01:00
output::print_entry(
&mut stdout,
v,
&config,
&wants_to_quit,
2019-01-01 22:52:08 +01:00
);
2018-09-30 15:01:23 +02:00
}
buffer.clear();
// Start streaming
mode = ReceiverMode::Streaming;
}
}
ReceiverMode::Streaming => {
output::print_entry(&mut stdout, &value, &config, &wants_to_quit);
2017-10-14 18:04:11 +02:00
}
}
num_results += 1;
2017-10-14 18:04:11 +02:00
}
2020-02-28 17:46:49 +01:00
WorkerResult::Error(err) => {
2018-10-22 14:20:08 +02:00
if show_filesystem_errors {
2020-04-03 21:18:54 +02:00
print_error(err.to_string());
2018-10-22 14:20:08 +02:00
}
2017-10-10 08:01:17 +02:00
}
}
if let Some(max_results) = config.max_results {
if num_results >= max_results {
break;
}
}
2017-10-10 08:01:17 +02:00
}
2017-10-14 18:04:11 +02:00
// If we have finished fast enough (faster than max_buffer_time), we haven't streamed
// anything to the console, yet. In this case, sort the results and print them:
if !buffer.is_empty() {
buffer.sort();
for value in buffer {
output::print_entry(&mut stdout, &value, &config, &wants_to_quit);
2017-10-14 18:04:11 +02:00
}
2017-10-10 08:01:17 +02:00
}
ExitCode::Success
2017-10-10 08:01:17 +02:00
}
})
}
2017-10-10 08:01:17 +02:00
pub enum DirEntry {
Normal(ignore::DirEntry),
BrokenSymlink(PathBuf),
}
impl DirEntry {
pub fn path(&self) -> &Path {
match self {
DirEntry::Normal(e) => e.path(),
DirEntry::BrokenSymlink(pathbuf) => pathbuf.as_path(),
}
}
pub fn file_type(&self) -> Option<FileType> {
match self {
DirEntry::Normal(e) => e.file_type(),
DirEntry::BrokenSymlink(pathbuf) => {
pathbuf.symlink_metadata().map(|m| m.file_type()).ok()
}
}
}
pub fn metadata(&self) -> Option<Metadata> {
match self {
DirEntry::Normal(e) => e.metadata().ok(),
DirEntry::BrokenSymlink(_) => None,
}
}
pub fn depth(&self) -> Option<usize> {
match self {
DirEntry::Normal(e) => Some(e.depth()),
DirEntry::BrokenSymlink(_) => None,
}
}
}
fn spawn_senders(
2020-04-03 11:39:32 +02:00
config: &Arc<Options>,
wants_to_quit: &Arc<AtomicBool>,
pattern: Arc<Regex>,
parallel_walker: ignore::WalkParallel,
tx: Sender<WorkerResult>,
) {
Add multiple path support (#182) * Adding support for multiple paths. (panic) - Started adding multiple file support - fd panics with multiple files right now * Moved the ctrlc handler to main. - Moved the ctrlc handler to main so we can search multiple files * Tests now allow custom directory setup - TestEnv::new() now takes two arguments, the directories to create and the files to create inside those directories. * rust-fmt changes * rust-fmt changes * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Removed commented code in testenv * Refactored walk::scan to accept the path buffer vector. Using the ParallelWalker allows for multithreaded searching of multiple directories * Moved ctrlc handler back into walker, it is only called once from main now. * Moved the colored output check back to it's original place * Removing shell-escape, not sure how it got added... * Added test for `fd 'a.foo' test1` to show that a.foo is only found in the test1 and not the test2 direcotry * Removing side effect from walk::scan, `dir_vec` is no longer a mutable reference and an iterator is being used instead. * Running rustfmt to format code correctly
2017-12-06 23:52:23 +01:00
parallel_walker.run(|| {
let config = Arc::clone(config);
2017-10-10 08:01:17 +02:00
let pattern = Arc::clone(&pattern);
let tx_thread = tx.clone();
let wants_to_quit = Arc::clone(wants_to_quit);
2017-10-10 08:01:17 +02:00
Box::new(move |entry_o| {
if wants_to_quit.load(Ordering::Relaxed) {
return ignore::WalkState::Quit;
}
2017-10-10 08:01:17 +02:00
let entry = match entry_o {
2020-02-28 18:42:35 +01:00
Ok(ref e) if e.depth() == 0 => {
// Skip the root directory entry.
return ignore::WalkState::Continue;
}
Ok(e) => DirEntry::Normal(e),
Err(ignore::Error::WithPath {
path,
err: inner_err,
}) => match inner_err.as_ref() {
2020-02-28 18:26:09 +01:00
ignore::Error::Io(io_error)
if io_error.kind() == io::ErrorKind::NotFound
&& path
.symlink_metadata()
2020-02-28 18:42:35 +01:00
.ok()
2020-02-28 18:26:09 +01:00
.map_or(false, |m| m.file_type().is_symlink()) =>
{
2020-04-03 10:08:47 +02:00
DirEntry::BrokenSymlink(path)
}
_ => {
tx_thread
.send(WorkerResult::Error(ignore::Error::WithPath {
path,
err: inner_err,
}))
.unwrap();
return ignore::WalkState::Continue;
}
},
2018-09-30 15:01:23 +02:00
Err(err) => {
tx_thread.send(WorkerResult::Error(err)).unwrap();
return ignore::WalkState::Continue;
}
2017-10-10 08:01:17 +02:00
};
if let Some(min_depth) = config.min_depth {
if entry.depth().map_or(true, |d| d < min_depth) {
return ignore::WalkState::Continue;
}
}
Check the pattern before anything else, since it doesn't require metadata This should partially address #432 by decreasing the number of stat() calls: $ strace -c -f ./fd-before '\.h$' /usr -j1 -S +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 15.71 8.831948 7 1192279 46059 stat $ strace -c -f ./fd-after '\.h$' /usr -j1 -S +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 7.92 1.972474 10 183907 46046 stat Though it's not as few as possible: $ strace -c -f find /usr -iname '*.h' -size +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 19.01 0.946500 5 161649 newfstatat $ strace -c -f bfs /usr -iname '*.h' -size +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 13.73 0.406565 5 69005 statx Performance is much better when metadata is required: $ hyperfine ./fd-{before,after}" '\.h$' /usr -j1 -S +1k" Benchmark #1: ./fd-before '\.h$' /usr -j1 -S +1k Time (mean ± σ): 4.623 s ± 0.154 s [User: 1.465 s, System: 3.354 s] Range (min … max): 4.327 s … 4.815 s 10 runs Benchmark #2: ./fd-after '\.h$' /usr -j1 -S +1k Time (mean ± σ): 2.650 s ± 0.058 s [User: 1.258 s, System: 1.592 s] Range (min … max): 2.568 s … 2.723 s 10 runs Summary './fd-after '\.h$' /usr -j1 -S +1k' ran 1.74 ± 0.07 times faster than './fd-before '\.h$' /usr -j1 -S +1k' While remaining the same when it's not: $ hyperfine ./fd-{before,after}" '\.h$' /usr -j1" Benchmark #1: ./fd-before '\.h$' /usr -j1 Time (mean ± σ): 2.382 s ± 0.038 s [User: 1.221 s, System: 1.286 s] Range (min … max): 2.325 s … 2.433 s 10 runs Benchmark #2: ./fd-after '\.h$' /usr -j1 Time (mean ± σ): 2.362 s ± 0.034 s [User: 1.193 s, System: 1.294 s] Range (min … max): 2.307 s … 2.422 s 10 runs Summary './fd-after '\.h$' /usr -j1' ran 1.01 ± 0.02 times faster than './fd-before '\.h$' /usr -j1'
2019-04-26 03:17:42 +02:00
// Check the name first, since it doesn't require metadata
let entry_path = entry.path();
let search_str: Cow<OsStr> = if config.search_full_path {
2020-04-03 21:18:54 +02:00
let path_abs_buf = filesystem::path_absolute_form(entry_path)
.expect("Retrieving absolute path succeeds");
Cow::Owned(path_abs_buf.as_os_str().to_os_string())
Check the pattern before anything else, since it doesn't require metadata This should partially address #432 by decreasing the number of stat() calls: $ strace -c -f ./fd-before '\.h$' /usr -j1 -S +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 15.71 8.831948 7 1192279 46059 stat $ strace -c -f ./fd-after '\.h$' /usr -j1 -S +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 7.92 1.972474 10 183907 46046 stat Though it's not as few as possible: $ strace -c -f find /usr -iname '*.h' -size +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 19.01 0.946500 5 161649 newfstatat $ strace -c -f bfs /usr -iname '*.h' -size +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 13.73 0.406565 5 69005 statx Performance is much better when metadata is required: $ hyperfine ./fd-{before,after}" '\.h$' /usr -j1 -S +1k" Benchmark #1: ./fd-before '\.h$' /usr -j1 -S +1k Time (mean ± σ): 4.623 s ± 0.154 s [User: 1.465 s, System: 3.354 s] Range (min … max): 4.327 s … 4.815 s 10 runs Benchmark #2: ./fd-after '\.h$' /usr -j1 -S +1k Time (mean ± σ): 2.650 s ± 0.058 s [User: 1.258 s, System: 1.592 s] Range (min … max): 2.568 s … 2.723 s 10 runs Summary './fd-after '\.h$' /usr -j1 -S +1k' ran 1.74 ± 0.07 times faster than './fd-before '\.h$' /usr -j1 -S +1k' While remaining the same when it's not: $ hyperfine ./fd-{before,after}" '\.h$' /usr -j1" Benchmark #1: ./fd-before '\.h$' /usr -j1 Time (mean ± σ): 2.382 s ± 0.038 s [User: 1.221 s, System: 1.286 s] Range (min … max): 2.325 s … 2.433 s 10 runs Benchmark #2: ./fd-after '\.h$' /usr -j1 Time (mean ± σ): 2.362 s ± 0.034 s [User: 1.193 s, System: 1.294 s] Range (min … max): 2.307 s … 2.422 s 10 runs Summary './fd-after '\.h$' /usr -j1' ran 1.01 ± 0.02 times faster than './fd-before '\.h$' /usr -j1'
2019-04-26 03:17:42 +02:00
} else {
match entry_path.file_name() {
Some(filename) => Cow::Borrowed(filename),
None => unreachable!(
"Encountered file system entry without a file name. This should only \
happen for paths like 'foo/bar/..' or '/' which are not supposed to \
appear in a file system traversal."
),
}
Check the pattern before anything else, since it doesn't require metadata This should partially address #432 by decreasing the number of stat() calls: $ strace -c -f ./fd-before '\.h$' /usr -j1 -S +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 15.71 8.831948 7 1192279 46059 stat $ strace -c -f ./fd-after '\.h$' /usr -j1 -S +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 7.92 1.972474 10 183907 46046 stat Though it's not as few as possible: $ strace -c -f find /usr -iname '*.h' -size +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 19.01 0.946500 5 161649 newfstatat $ strace -c -f bfs /usr -iname '*.h' -size +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 13.73 0.406565 5 69005 statx Performance is much better when metadata is required: $ hyperfine ./fd-{before,after}" '\.h$' /usr -j1 -S +1k" Benchmark #1: ./fd-before '\.h$' /usr -j1 -S +1k Time (mean ± σ): 4.623 s ± 0.154 s [User: 1.465 s, System: 3.354 s] Range (min … max): 4.327 s … 4.815 s 10 runs Benchmark #2: ./fd-after '\.h$' /usr -j1 -S +1k Time (mean ± σ): 2.650 s ± 0.058 s [User: 1.258 s, System: 1.592 s] Range (min … max): 2.568 s … 2.723 s 10 runs Summary './fd-after '\.h$' /usr -j1 -S +1k' ran 1.74 ± 0.07 times faster than './fd-before '\.h$' /usr -j1 -S +1k' While remaining the same when it's not: $ hyperfine ./fd-{before,after}" '\.h$' /usr -j1" Benchmark #1: ./fd-before '\.h$' /usr -j1 Time (mean ± σ): 2.382 s ± 0.038 s [User: 1.221 s, System: 1.286 s] Range (min … max): 2.325 s … 2.433 s 10 runs Benchmark #2: ./fd-after '\.h$' /usr -j1 Time (mean ± σ): 2.362 s ± 0.034 s [User: 1.193 s, System: 1.294 s] Range (min … max): 2.307 s … 2.422 s 10 runs Summary './fd-after '\.h$' /usr -j1' ran 1.01 ± 0.02 times faster than './fd-before '\.h$' /usr -j1'
2019-04-26 03:17:42 +02:00
};
2020-04-03 12:04:47 +02:00
if !pattern.is_match(&filesystem::osstr_to_bytes(search_str.as_ref())) {
return ignore::WalkState::Continue;
Check the pattern before anything else, since it doesn't require metadata This should partially address #432 by decreasing the number of stat() calls: $ strace -c -f ./fd-before '\.h$' /usr -j1 -S +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 15.71 8.831948 7 1192279 46059 stat $ strace -c -f ./fd-after '\.h$' /usr -j1 -S +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 7.92 1.972474 10 183907 46046 stat Though it's not as few as possible: $ strace -c -f find /usr -iname '*.h' -size +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 19.01 0.946500 5 161649 newfstatat $ strace -c -f bfs /usr -iname '*.h' -size +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 13.73 0.406565 5 69005 statx Performance is much better when metadata is required: $ hyperfine ./fd-{before,after}" '\.h$' /usr -j1 -S +1k" Benchmark #1: ./fd-before '\.h$' /usr -j1 -S +1k Time (mean ± σ): 4.623 s ± 0.154 s [User: 1.465 s, System: 3.354 s] Range (min … max): 4.327 s … 4.815 s 10 runs Benchmark #2: ./fd-after '\.h$' /usr -j1 -S +1k Time (mean ± σ): 2.650 s ± 0.058 s [User: 1.258 s, System: 1.592 s] Range (min … max): 2.568 s … 2.723 s 10 runs Summary './fd-after '\.h$' /usr -j1 -S +1k' ran 1.74 ± 0.07 times faster than './fd-before '\.h$' /usr -j1 -S +1k' While remaining the same when it's not: $ hyperfine ./fd-{before,after}" '\.h$' /usr -j1" Benchmark #1: ./fd-before '\.h$' /usr -j1 Time (mean ± σ): 2.382 s ± 0.038 s [User: 1.221 s, System: 1.286 s] Range (min … max): 2.325 s … 2.433 s 10 runs Benchmark #2: ./fd-after '\.h$' /usr -j1 Time (mean ± σ): 2.362 s ± 0.034 s [User: 1.193 s, System: 1.294 s] Range (min … max): 2.307 s … 2.422 s 10 runs Summary './fd-after '\.h$' /usr -j1' ran 1.01 ± 0.02 times faster than './fd-before '\.h$' /usr -j1'
2019-04-26 03:17:42 +02:00
}
// Filter out unwanted extensions.
if let Some(ref exts_regex) = config.extensions {
if let Some(path_str) = entry_path.file_name() {
2020-04-03 12:04:47 +02:00
if !exts_regex.is_match(&filesystem::osstr_to_bytes(path_str)) {
Check the pattern before anything else, since it doesn't require metadata This should partially address #432 by decreasing the number of stat() calls: $ strace -c -f ./fd-before '\.h$' /usr -j1 -S +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 15.71 8.831948 7 1192279 46059 stat $ strace -c -f ./fd-after '\.h$' /usr -j1 -S +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 7.92 1.972474 10 183907 46046 stat Though it's not as few as possible: $ strace -c -f find /usr -iname '*.h' -size +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 19.01 0.946500 5 161649 newfstatat $ strace -c -f bfs /usr -iname '*.h' -size +1k >/dev/null % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 13.73 0.406565 5 69005 statx Performance is much better when metadata is required: $ hyperfine ./fd-{before,after}" '\.h$' /usr -j1 -S +1k" Benchmark #1: ./fd-before '\.h$' /usr -j1 -S +1k Time (mean ± σ): 4.623 s ± 0.154 s [User: 1.465 s, System: 3.354 s] Range (min … max): 4.327 s … 4.815 s 10 runs Benchmark #2: ./fd-after '\.h$' /usr -j1 -S +1k Time (mean ± σ): 2.650 s ± 0.058 s [User: 1.258 s, System: 1.592 s] Range (min … max): 2.568 s … 2.723 s 10 runs Summary './fd-after '\.h$' /usr -j1 -S +1k' ran 1.74 ± 0.07 times faster than './fd-before '\.h$' /usr -j1 -S +1k' While remaining the same when it's not: $ hyperfine ./fd-{before,after}" '\.h$' /usr -j1" Benchmark #1: ./fd-before '\.h$' /usr -j1 Time (mean ± σ): 2.382 s ± 0.038 s [User: 1.221 s, System: 1.286 s] Range (min … max): 2.325 s … 2.433 s 10 runs Benchmark #2: ./fd-after '\.h$' /usr -j1 Time (mean ± σ): 2.362 s ± 0.034 s [User: 1.193 s, System: 1.294 s] Range (min … max): 2.307 s … 2.422 s 10 runs Summary './fd-after '\.h$' /usr -j1' ran 1.01 ± 0.02 times faster than './fd-before '\.h$' /usr -j1'
2019-04-26 03:17:42 +02:00
return ignore::WalkState::Continue;
}
} else {
return ignore::WalkState::Continue;
}
}
2017-10-10 08:01:17 +02:00
// Filter out unwanted file types.
if let Some(ref file_types) = config.file_types {
if let Some(ref entry_type) = entry.file_type() {
if (!file_types.files && entry_type.is_file())
|| (!file_types.directories && entry_type.is_dir())
|| (!file_types.symlinks && entry_type.is_symlink())
|| (!file_types.sockets && filesystem::is_socket(entry_type))
|| (!file_types.pipes && filesystem::is_pipe(entry_type))
2018-10-19 22:05:15 +02:00
|| (file_types.executables_only
&& !entry
.metadata()
2020-04-03 12:04:47 +02:00
.map(|m| filesystem::is_executable(&m))
2018-10-19 22:05:15 +02:00
.unwrap_or(false))
2020-04-03 12:04:47 +02:00
|| (file_types.empty_only && !filesystem::is_empty(&entry))
|| !(entry_type.is_file()
|| entry_type.is_dir()
|| entry_type.is_symlink()
|| filesystem::is_socket(entry_type)
|| filesystem::is_pipe(entry_type))
{
return ignore::WalkState::Continue;
}
} else {
return ignore::WalkState::Continue;
}
2017-10-10 08:01:17 +02:00
}
#[cfg(unix)]
{
if let Some(ref owner_constraint) = config.owner_constraint {
if let Ok(ref metadata) = entry_path.metadata() {
if !owner_constraint.matches(&metadata) {
return ignore::WalkState::Continue;
}
} else {
return ignore::WalkState::Continue;
}
}
}
// Filter out unwanted sizes if it is a file and we have been given size constraints.
2019-01-26 02:13:16 +01:00
if !config.size_constraints.is_empty() {
if entry_path.is_file() {
if let Ok(metadata) = entry_path.metadata() {
let file_size = metadata.len();
if config
.size_constraints
.iter()
.any(|sc| !sc.is_within(file_size))
{
return ignore::WalkState::Continue;
}
} else {
return ignore::WalkState::Continue;
}
} else {
return ignore::WalkState::Continue;
}
}
// Filter out unwanted modification times
if !config.time_constraints.is_empty() {
let mut matched = false;
if let Ok(metadata) = entry_path.metadata() {
if let Ok(modified) = metadata.modified() {
matched = config
.time_constraints
.iter()
.all(|tf| tf.applies_to(&modified));
}
}
if !matched {
return ignore::WalkState::Continue;
}
}
2020-01-01 12:05:50 +01:00
let send_result = tx_thread.send(WorkerResult::Entry(entry_path.to_owned()));
2020-04-03 10:08:47 +02:00
if send_result.is_err() {
return ignore::WalkState::Quit;
}
2017-10-10 08:01:17 +02:00
2020-10-25 08:16:01 +01:00
// Apply pruning.
if config.prune {
return ignore::WalkState::Skip;
}
2017-10-10 08:01:17 +02:00
ignore::WalkState::Continue
})
});
}