fd/src/walk.rs

282 lines
10 KiB
Rust
Raw Normal View History

2017-10-21 10:16:03 +02:00
// Copyright (c) 2017 fd developers
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0>
// or the MIT license <LICENSE-MIT or http://opensource.org/licenses/MIT>,
// at your option. All files in the project carrying such
// notice may not be copied, modified, or distributed except
// according to those terms.
extern crate ctrlc;
use exec;
2017-10-10 08:01:17 +02:00
use fshelper;
2018-03-25 19:48:09 +02:00
use internal::{error, FdOptions, EXITCODE_SIGINT, MAX_BUFFER_LENGTH};
2017-10-10 08:01:17 +02:00
use output;
2018-03-26 00:15:01 +02:00
use std::error::Error;
Add multiple path support (#182) * Adding support for multiple paths. (panic) - Started adding multiple file support - fd panics with multiple files right now * Moved the ctrlc handler to main. - Moved the ctrlc handler to main so we can search multiple files * Tests now allow custom directory setup - TestEnv::new() now takes two arguments, the directories to create and the files to create inside those directories. * rust-fmt changes * rust-fmt changes * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Removed commented code in testenv * Refactored walk::scan to accept the path buffer vector. Using the ParallelWalker allows for multithreaded searching of multiple directories * Moved ctrlc handler back into walker, it is only called once from main now. * Moved the colored output check back to it's original place * Removing shell-escape, not sure how it got added... * Added test for `fd 'a.foo' test1` to show that a.foo is only found in the test1 and not the test2 direcotry * Removing side effect from walk::scan, `dir_vec` is no longer a mutable reference and an iterator is being used instead. * Running rustfmt to format code correctly
2017-12-06 23:52:23 +01:00
use std::path::PathBuf;
2018-04-13 22:46:17 +02:00
use std::process;
use std::sync::atomic::{AtomicBool, Ordering};
2017-10-10 08:01:17 +02:00
use std::sync::mpsc::channel;
2018-04-13 22:46:17 +02:00
use std::sync::{Arc, Mutex};
2017-10-10 08:01:17 +02:00
use std::thread;
use std::time;
use ignore::overrides::OverrideBuilder;
2018-04-13 22:46:17 +02:00
use ignore::{self, WalkBuilder};
2017-10-14 18:04:11 +02:00
use regex::Regex;
2017-10-10 08:01:17 +02:00
/// The receiver thread can either be buffering results or directly streaming to the console.
enum ReceiverMode {
/// Receiver is still buffering in order to sort the results, if the search finishes fast
/// enough.
Buffering,
/// Receiver is directly printing results to the output.
Streaming,
}
2017-10-14 18:04:11 +02:00
/// Recursively scan the given search path for files / pathnames matching the pattern.
///
/// If the `--exec` argument was supplied, this will create a thread pool for executing
/// jobs in parallel from a given command line and the discovered paths. Otherwise, each
/// path will simply be written to standard output.
2017-12-10 06:40:13 +01:00
pub fn scan(path_vec: &[PathBuf], pattern: Arc<Regex>, config: Arc<FdOptions>) {
Add multiple path support (#182) * Adding support for multiple paths. (panic) - Started adding multiple file support - fd panics with multiple files right now * Moved the ctrlc handler to main. - Moved the ctrlc handler to main so we can search multiple files * Tests now allow custom directory setup - TestEnv::new() now takes two arguments, the directories to create and the files to create inside those directories. * rust-fmt changes * rust-fmt changes * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Removed commented code in testenv * Refactored walk::scan to accept the path buffer vector. Using the ParallelWalker allows for multithreaded searching of multiple directories * Moved ctrlc handler back into walker, it is only called once from main now. * Moved the colored output check back to it's original place * Removing shell-escape, not sure how it got added... * Added test for `fd 'a.foo' test1` to show that a.foo is only found in the test1 and not the test2 direcotry * Removing side effect from walk::scan, `dir_vec` is no longer a mutable reference and an iterator is being used instead. * Running rustfmt to format code correctly
2017-12-06 23:52:23 +01:00
let mut path_iter = path_vec.iter();
2018-01-01 12:16:43 +01:00
let first_path_buf = path_iter
.next()
.expect("Error: Path vector can not be empty");
2017-10-10 08:01:17 +02:00
let (tx, rx) = channel();
2017-10-14 18:04:11 +02:00
let threads = config.threads;
2017-10-10 08:01:17 +02:00
Add multiple path support (#182) * Adding support for multiple paths. (panic) - Started adding multiple file support - fd panics with multiple files right now * Moved the ctrlc handler to main. - Moved the ctrlc handler to main so we can search multiple files * Tests now allow custom directory setup - TestEnv::new() now takes two arguments, the directories to create and the files to create inside those directories. * rust-fmt changes * rust-fmt changes * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Removed commented code in testenv * Refactored walk::scan to accept the path buffer vector. Using the ParallelWalker allows for multithreaded searching of multiple directories * Moved ctrlc handler back into walker, it is only called once from main now. * Moved the colored output check back to it's original place * Removing shell-escape, not sure how it got added... * Added test for `fd 'a.foo' test1` to show that a.foo is only found in the test1 and not the test2 direcotry * Removing side effect from walk::scan, `dir_vec` is no longer a mutable reference and an iterator is being used instead. * Running rustfmt to format code correctly
2017-12-06 23:52:23 +01:00
let mut override_builder = OverrideBuilder::new(first_path_buf.as_path());
2017-10-26 21:13:56 +02:00
for pattern in &config.exclude_patterns {
let res = override_builder.add(pattern);
if res.is_err() {
error(&format!("Error: malformed exclude pattern '{}'", pattern));
}
}
let overrides = override_builder.build().unwrap_or_else(|_| {
error("Mismatch in exclude patterns");
});
Add multiple path support (#182) * Adding support for multiple paths. (panic) - Started adding multiple file support - fd panics with multiple files right now * Moved the ctrlc handler to main. - Moved the ctrlc handler to main so we can search multiple files * Tests now allow custom directory setup - TestEnv::new() now takes two arguments, the directories to create and the files to create inside those directories. * rust-fmt changes * rust-fmt changes * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Removed commented code in testenv * Refactored walk::scan to accept the path buffer vector. Using the ParallelWalker allows for multithreaded searching of multiple directories * Moved ctrlc handler back into walker, it is only called once from main now. * Moved the colored output check back to it's original place * Removing shell-escape, not sure how it got added... * Added test for `fd 'a.foo' test1` to show that a.foo is only found in the test1 and not the test2 direcotry * Removing side effect from walk::scan, `dir_vec` is no longer a mutable reference and an iterator is being used instead. * Running rustfmt to format code correctly
2017-12-06 23:52:23 +01:00
let mut walker = WalkBuilder::new(first_path_buf.as_path());
walker
2017-10-12 08:01:51 +02:00
.hidden(config.ignore_hidden)
2018-02-21 21:41:52 +01:00
.ignore(false)
.parents(config.read_fdignore || config.read_vcsignore)
.git_ignore(config.read_vcsignore)
.git_global(config.read_vcsignore)
.git_exclude(config.read_vcsignore)
.overrides(overrides)
2017-10-12 08:01:51 +02:00
.follow_links(config.follow_links)
Add multiple path support (#182) * Adding support for multiple paths. (panic) - Started adding multiple file support - fd panics with multiple files right now * Moved the ctrlc handler to main. - Moved the ctrlc handler to main so we can search multiple files * Tests now allow custom directory setup - TestEnv::new() now takes two arguments, the directories to create and the files to create inside those directories. * rust-fmt changes * rust-fmt changes * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Removed commented code in testenv * Refactored walk::scan to accept the path buffer vector. Using the ParallelWalker allows for multithreaded searching of multiple directories * Moved ctrlc handler back into walker, it is only called once from main now. * Moved the colored output check back to it's original place * Removing shell-escape, not sure how it got added... * Added test for `fd 'a.foo' test1` to show that a.foo is only found in the test1 and not the test2 direcotry * Removing side effect from walk::scan, `dir_vec` is no longer a mutable reference and an iterator is being used instead. * Running rustfmt to format code correctly
2017-12-06 23:52:23 +01:00
.max_depth(config.max_depth);
2017-10-10 08:01:17 +02:00
2018-02-21 21:41:52 +01:00
if config.read_fdignore {
walker.add_custom_ignore_filename(".fdignore");
}
2018-03-26 00:15:01 +02:00
for ignore_file in &config.ignore_files {
let result = walker.add_ignore(ignore_file);
if let Some(err) = result {
match err {
ignore::Error::Partial(_) => (),
_ => {
error(&format!(
"Error while parsing custom ignore file '{}': {}.",
ignore_file.to_string_lossy(),
err.description()
));
}
}
}
}
Add multiple path support (#182) * Adding support for multiple paths. (panic) - Started adding multiple file support - fd panics with multiple files right now * Moved the ctrlc handler to main. - Moved the ctrlc handler to main so we can search multiple files * Tests now allow custom directory setup - TestEnv::new() now takes two arguments, the directories to create and the files to create inside those directories. * rust-fmt changes * rust-fmt changes * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Removed commented code in testenv * Refactored walk::scan to accept the path buffer vector. Using the ParallelWalker allows for multithreaded searching of multiple directories * Moved ctrlc handler back into walker, it is only called once from main now. * Moved the colored output check back to it's original place * Removing shell-escape, not sure how it got added... * Added test for `fd 'a.foo' test1` to show that a.foo is only found in the test1 and not the test2 direcotry * Removing side effect from walk::scan, `dir_vec` is no longer a mutable reference and an iterator is being used instead. * Running rustfmt to format code correctly
2017-12-06 23:52:23 +01:00
for path_entry in path_iter {
walker.add(path_entry.as_path());
}
let parallel_walker = walker.threads(threads).build_parallel();
Add multiple path support (#182) * Adding support for multiple paths. (panic) - Started adding multiple file support - fd panics with multiple files right now * Moved the ctrlc handler to main. - Moved the ctrlc handler to main so we can search multiple files * Tests now allow custom directory setup - TestEnv::new() now takes two arguments, the directories to create and the files to create inside those directories. * rust-fmt changes * rust-fmt changes * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Removed commented code in testenv * Refactored walk::scan to accept the path buffer vector. Using the ParallelWalker allows for multithreaded searching of multiple directories * Moved ctrlc handler back into walker, it is only called once from main now. * Moved the colored output check back to it's original place * Removing shell-escape, not sure how it got added... * Added test for `fd 'a.foo' test1` to show that a.foo is only found in the test1 and not the test2 direcotry * Removing side effect from walk::scan, `dir_vec` is no longer a mutable reference and an iterator is being used instead. * Running rustfmt to format code correctly
2017-12-06 23:52:23 +01:00
let wants_to_quit = Arc::new(AtomicBool::new(false));
let receiver_wtq = Arc::clone(&wants_to_quit);
let sender_wtq = Arc::clone(&wants_to_quit);
if config.ls_colors.is_some() && config.command.is_none() {
let wq = Arc::clone(&receiver_wtq);
2018-01-01 12:16:43 +01:00
ctrlc::set_handler(move || {
wq.store(true, Ordering::Relaxed);
}).unwrap();
}
Add multiple path support (#182) * Adding support for multiple paths. (panic) - Started adding multiple file support - fd panics with multiple files right now * Moved the ctrlc handler to main. - Moved the ctrlc handler to main so we can search multiple files * Tests now allow custom directory setup - TestEnv::new() now takes two arguments, the directories to create and the files to create inside those directories. * rust-fmt changes * rust-fmt changes * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Removed commented code in testenv * Refactored walk::scan to accept the path buffer vector. Using the ParallelWalker allows for multithreaded searching of multiple directories * Moved ctrlc handler back into walker, it is only called once from main now. * Moved the colored output check back to it's original place * Removing shell-escape, not sure how it got added... * Added test for `fd 'a.foo' test1` to show that a.foo is only found in the test1 and not the test2 direcotry * Removing side effect from walk::scan, `dir_vec` is no longer a mutable reference and an iterator is being used instead. * Running rustfmt to format code correctly
2017-12-06 23:52:23 +01:00
2017-10-10 08:01:17 +02:00
// Spawn the thread that receives all results through the channel.
let rx_config = Arc::clone(&config);
let receiver_thread = thread::spawn(move || {
2017-10-14 18:04:11 +02:00
// This will be set to `Some` if the `--exec` argument was supplied.
if let Some(ref cmd) = rx_config.command {
let shared_rx = Arc::new(Mutex::new(rx));
2017-10-14 23:59:36 +02:00
let out_perm = Arc::new(Mutex::new(()));
// TODO: the following line is a workaround to replace the `unsafe` block that was
// previously used here to avoid the (unnecessary?) cloning of the command. The
// `unsafe` block caused problems on some platforms (SIGILL instructions on Linux) and
// therefore had to be removed.
let cmd = Arc::new(cmd.clone());
2017-10-10 08:01:17 +02:00
2017-10-14 18:04:11 +02:00
// Each spawned job will store it's thread handle in here.
let mut handles = Vec::with_capacity(threads);
for _ in 0..threads {
2017-10-22 11:47:05 +02:00
let rx = Arc::clone(&shared_rx);
let cmd = Arc::clone(&cmd);
let out_perm = Arc::clone(&out_perm);
2017-10-10 08:01:17 +02:00
2017-10-14 20:04:04 +02:00
// Spawn a job thread that will listen for and execute inputs.
let handle = thread::spawn(move || exec::job(rx, cmd, out_perm));
2017-10-10 08:01:17 +02:00
2017-10-14 18:04:11 +02:00
// Push the handle of the spawned thread into the vector for later joining.
handles.push(handle);
}
2017-10-10 08:01:17 +02:00
2017-10-14 18:04:11 +02:00
// Wait for all threads to exit before exiting the program.
2017-10-14 20:04:04 +02:00
for h in handles {
h.join().unwrap();
}
2017-10-14 18:04:11 +02:00
} else {
let start = time::Instant::now();
let mut buffer = vec![];
// Start in buffering mode
let mut mode = ReceiverMode::Buffering;
// Maximum time to wait before we start streaming to the console.
2018-01-01 12:16:43 +01:00
let max_buffer_time = rx_config
.max_buffer_time
.unwrap_or_else(|| time::Duration::from_millis(100));
2017-10-10 08:01:17 +02:00
2017-10-14 18:04:11 +02:00
for value in rx {
match mode {
ReceiverMode::Buffering => {
buffer.push(value);
// Have we reached the maximum buffer size or maximum buffering time?
if buffer.len() > MAX_BUFFER_LENGTH
|| time::Instant::now() - start > max_buffer_time
{
2017-10-14 18:04:11 +02:00
// Flush the buffer
for v in &buffer {
output::print_entry(v, &rx_config, &receiver_wtq);
2017-10-14 18:04:11 +02:00
}
buffer.clear();
// Start streaming
mode = ReceiverMode::Streaming;
}
}
ReceiverMode::Streaming => {
output::print_entry(&value, &rx_config, &receiver_wtq);
2017-10-10 08:01:17 +02:00
}
}
}
2017-10-14 18:04:11 +02:00
// If we have finished fast enough (faster than max_buffer_time), we haven't streamed
// anything to the console, yet. In this case, sort the results and print them:
if !buffer.is_empty() {
buffer.sort();
for value in buffer {
output::print_entry(&value, &rx_config, &receiver_wtq);
2017-10-14 18:04:11 +02:00
}
2017-10-10 08:01:17 +02:00
}
}
});
// Spawn the sender threads.
Add multiple path support (#182) * Adding support for multiple paths. (panic) - Started adding multiple file support - fd panics with multiple files right now * Moved the ctrlc handler to main. - Moved the ctrlc handler to main so we can search multiple files * Tests now allow custom directory setup - TestEnv::new() now takes two arguments, the directories to create and the files to create inside those directories. * rust-fmt changes * rust-fmt changes * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Removed commented code in testenv * Refactored walk::scan to accept the path buffer vector. Using the ParallelWalker allows for multithreaded searching of multiple directories * Moved ctrlc handler back into walker, it is only called once from main now. * Moved the colored output check back to it's original place * Removing shell-escape, not sure how it got added... * Added test for `fd 'a.foo' test1` to show that a.foo is only found in the test1 and not the test2 direcotry * Removing side effect from walk::scan, `dir_vec` is no longer a mutable reference and an iterator is being used instead. * Running rustfmt to format code correctly
2017-12-06 23:52:23 +01:00
parallel_walker.run(|| {
2017-10-10 08:01:17 +02:00
let config = Arc::clone(&config);
let pattern = Arc::clone(&pattern);
let tx_thread = tx.clone();
let wants_to_quit = Arc::clone(&sender_wtq);
2017-10-10 08:01:17 +02:00
Box::new(move |entry_o| {
if wants_to_quit.load(Ordering::Relaxed) {
return ignore::WalkState::Quit;
}
2017-10-10 08:01:17 +02:00
let entry = match entry_o {
Ok(e) => e,
Err(_) => return ignore::WalkState::Continue,
};
Add multiple path support (#182) * Adding support for multiple paths. (panic) - Started adding multiple file support - fd panics with multiple files right now * Moved the ctrlc handler to main. - Moved the ctrlc handler to main so we can search multiple files * Tests now allow custom directory setup - TestEnv::new() now takes two arguments, the directories to create and the files to create inside those directories. * rust-fmt changes * rust-fmt changes * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Moving code around, no need to do everything in one big loop - PathDisplay was never actually used for anything, removed it during refactor of main - Removed redundant logic for absolute paths - Moved code placed needlessly inside a loop in the last commit outside of that loop. * Removed commented code in testenv * Refactored walk::scan to accept the path buffer vector. Using the ParallelWalker allows for multithreaded searching of multiple directories * Moved ctrlc handler back into walker, it is only called once from main now. * Moved the colored output check back to it's original place * Removing shell-escape, not sure how it got added... * Added test for `fd 'a.foo' test1` to show that a.foo is only found in the test1 and not the test2 direcotry * Removing side effect from walk::scan, `dir_vec` is no longer a mutable reference and an iterator is being used instead. * Running rustfmt to format code correctly
2017-12-06 23:52:23 +01:00
if entry.depth() == 0 {
return ignore::WalkState::Continue;
}
2017-10-10 08:01:17 +02:00
// Filter out unwanted file types.
2018-03-25 12:19:51 +02:00
if let Some(ref file_types) = config.file_types {
if let Some(ref entry_type) = entry.file_type() {
if (entry_type.is_file() && !file_types.files)
|| (entry_type.is_dir() && !file_types.directories)
|| (entry_type.is_symlink() && !file_types.symlinks)
2018-03-25 19:48:09 +02:00
|| (entry.metadata().is_ok()
&& !fshelper::is_executable(&entry.metadata().unwrap())
2018-03-25 16:36:37 +02:00
&& file_types.executables_only)
{
return ignore::WalkState::Continue;
} else if !(entry_type.is_file() || entry_type.is_dir()
|| entry_type.is_symlink())
{
// This is probably a block device, char device, fifo or socket. Skip it.
return ignore::WalkState::Continue;
}
} else {
return ignore::WalkState::Continue;
}
2017-10-10 08:01:17 +02:00
}
2018-02-25 11:34:26 +01:00
let entry_path = entry.path();
2017-10-10 08:01:17 +02:00
// Filter out unwanted extensions.
if let Some(ref exts_regex) = config.extensions {
if let Some(path_str) = entry_path.file_name().map_or(None, |s| s.to_str()) {
if !exts_regex.is_match(path_str) {
return ignore::WalkState::Continue;
}
} else {
return ignore::WalkState::Continue;
2017-10-10 08:01:17 +02:00
}
}
2017-10-12 08:01:51 +02:00
let search_str_o = if config.search_full_path {
2017-10-22 11:47:05 +02:00
match fshelper::path_absolute_form(entry_path) {
Ok(path_abs_buf) => Some(path_abs_buf.to_string_lossy().into_owned().into()),
Err(_) => error("Error: unable to get full path."),
}
2017-10-12 08:01:51 +02:00
} else {
entry_path.file_name().map(|f| f.to_string_lossy())
2017-10-12 08:01:51 +02:00
};
2017-10-10 08:01:17 +02:00
if let Some(search_str) = search_str_o {
if pattern.is_match(&*search_str) {
// TODO: take care of the unwrap call
tx_thread.send(entry_path.to_owned()).unwrap()
}
2017-10-10 08:01:17 +02:00
}
ignore::WalkState::Continue
})
});
// Drop the initial sender. If we don't do this, the receiver will block even
// if all threads have finished, since there is still one sender around.
drop(tx);
// Wait for the receiver thread to print out all results.
receiver_thread.join().unwrap();
if wants_to_quit.load(Ordering::Relaxed) {
2018-01-03 10:00:22 +01:00
process::exit(EXITCODE_SIGINT);
}
2017-10-10 08:01:17 +02:00
}