fd/src/walk.rs

217 lines
7.6 KiB
Rust
Raw Normal View History

2017-10-14 18:04:11 +02:00
use exec::{self, TokenizedCommand};
2017-10-10 08:01:17 +02:00
use fshelper;
2017-10-14 22:42:47 +02:00
use internal::{error, FdOptions, PathDisplay};
2017-10-10 08:01:17 +02:00
use output;
use std::path::{Path, PathBuf};
2017-10-14 18:04:11 +02:00
use std::sync::{Arc, Mutex};
2017-10-10 08:01:17 +02:00
use std::sync::mpsc::channel;
use std::thread;
use std::time;
use ignore::{self, WalkBuilder};
2017-10-14 18:04:11 +02:00
use regex::Regex;
2017-10-10 08:01:17 +02:00
/// The receiver thread can either be buffering results or directly streaming to the console.
enum ReceiverMode {
/// Receiver is still buffering in order to sort the results, if the search finishes fast
/// enough.
Buffering,
/// Receiver is directly printing results to the output.
Streaming,
}
/// The type of file to search for.
#[derive(Copy, Clone)]
pub enum FileType {
Any,
RegularFile,
Directory,
SymLink,
}
2017-10-14 18:04:11 +02:00
/// Recursively scan the given search path for files / pathnames matching the pattern.
///
/// If the `--exec` argument was supplied, this will create a thread pool for executing
/// jobs in parallel from a given command line and the discovered paths. Otherwise, each
/// path will simply be written to standard output.
2017-10-10 08:01:17 +02:00
pub fn scan(root: &Path, pattern: Arc<Regex>, base: &Path, config: Arc<FdOptions>) {
let (tx, rx) = channel();
2017-10-14 18:04:11 +02:00
let threads = config.threads;
2017-10-10 08:01:17 +02:00
let walker = WalkBuilder::new(root)
2017-10-12 08:01:51 +02:00
.hidden(config.ignore_hidden)
.ignore(config.read_ignore)
.git_ignore(config.read_ignore)
.parents(config.read_ignore)
.git_global(config.read_ignore)
.git_exclude(config.read_ignore)
.follow_links(config.follow_links)
.max_depth(config.max_depth)
2017-10-14 18:04:11 +02:00
.threads(threads)
2017-10-12 08:01:51 +02:00
.build_parallel();
2017-10-10 08:01:17 +02:00
// Spawn the thread that receives all results through the channel.
let rx_config = Arc::clone(&config);
let rx_base = base.to_owned();
2017-10-14 22:42:47 +02:00
let is_absolute = config.path_display == PathDisplay::Absolute;
2017-10-10 08:01:17 +02:00
let receiver_thread = thread::spawn(move || {
2017-10-14 18:04:11 +02:00
// This will be set to `Some` if the `--exec` argument was supplied.
if let Some(ref cmd) = rx_config.command {
let shared_rx = Arc::new(Mutex::new(rx));
2017-10-14 22:42:47 +02:00
let base = Arc::new(if is_absolute { Some(rx_base) } else { None });
2017-10-10 08:01:17 +02:00
// This is safe because `cmd` will exist beyond the end of this scope.
2017-10-14 18:04:11 +02:00
// It's required to tell Rust that it's safe to share across threads.
let cmd = unsafe { Arc::from_raw(cmd as *const TokenizedCommand) };
2017-10-10 08:01:17 +02:00
2017-10-14 18:04:11 +02:00
// Each spawned job will store it's thread handle in here.
let mut handles = Vec::with_capacity(threads);
for _ in 0..threads {
let rx = shared_rx.clone();
let cmd = cmd.clone();
2017-10-14 22:42:47 +02:00
let base = base.clone();
2017-10-10 08:01:17 +02:00
2017-10-14 20:04:04 +02:00
// Spawn a job thread that will listen for and execute inputs.
2017-10-14 22:42:47 +02:00
let handle = thread::spawn(move || exec::job(rx, base, cmd));
2017-10-10 08:01:17 +02:00
2017-10-14 18:04:11 +02:00
// Push the handle of the spawned thread into the vector for later joining.
handles.push(handle);
}
2017-10-10 08:01:17 +02:00
2017-10-14 18:04:11 +02:00
// Wait for all threads to exit before exiting the program.
2017-10-14 20:04:04 +02:00
for h in handles {
h.join().unwrap();
}
2017-10-14 18:04:11 +02:00
} else {
let start = time::Instant::now();
let mut buffer = vec![];
// Start in buffering mode
let mut mode = ReceiverMode::Buffering;
// Maximum time to wait before we start streaming to the console.
let max_buffer_time = rx_config.max_buffer_time.unwrap_or_else(
|| time::Duration::from_millis(100),
);
2017-10-10 08:01:17 +02:00
2017-10-14 18:04:11 +02:00
for value in rx {
match mode {
ReceiverMode::Buffering => {
buffer.push(value);
// Have we reached the maximum time?
if time::Instant::now() - start > max_buffer_time {
// Flush the buffer
for v in &buffer {
output::print_entry(&rx_base, v, &rx_config);
}
buffer.clear();
// Start streaming
mode = ReceiverMode::Streaming;
}
}
ReceiverMode::Streaming => {
output::print_entry(&rx_base, &value, &rx_config);
2017-10-10 08:01:17 +02:00
}
}
}
2017-10-14 18:04:11 +02:00
// If we have finished fast enough (faster than max_buffer_time), we haven't streamed
// anything to the console, yet. In this case, sort the results and print them:
if !buffer.is_empty() {
buffer.sort();
for value in buffer {
output::print_entry(&rx_base, &value, &rx_config);
}
2017-10-10 08:01:17 +02:00
}
}
});
// Spawn the sender threads.
walker.run(|| {
let base = base.to_owned();
let config = Arc::clone(&config);
let pattern = Arc::clone(&pattern);
let tx_thread = tx.clone();
let root = root.to_owned();
2017-10-10 08:01:17 +02:00
Box::new(move |entry_o| {
let entry = match entry_o {
Ok(e) => e,
Err(_) => return ignore::WalkState::Continue,
};
let entry_path = entry.path();
if entry_path == root {
return ignore::WalkState::Continue;
}
2017-10-10 08:01:17 +02:00
// Filter out unwanted file types.
match config.file_type {
FileType::Any => (),
2017-10-12 08:01:51 +02:00
FileType::RegularFile => {
if entry.file_type().map_or(false, |ft| !ft.is_file()) {
return ignore::WalkState::Continue;
}
}
FileType::Directory => {
if entry.file_type().map_or(false, |ft| !ft.is_dir()) {
return ignore::WalkState::Continue;
}
}
FileType::SymLink => {
if entry.file_type().map_or(false, |ft| !ft.is_symlink()) {
return ignore::WalkState::Continue;
}
}
2017-10-10 08:01:17 +02:00
}
// Filter out unwanted extensions.
if let Some(ref filter_ext) = config.extension {
let entry_ext = entry_path.extension().map(
|e| e.to_string_lossy().to_lowercase(),
);
2017-10-10 08:01:17 +02:00
if entry_ext.map_or(true, |ext| ext != *filter_ext) {
return ignore::WalkState::Continue;
}
}
2017-10-12 08:01:51 +02:00
let search_str_o = if config.search_full_path {
Some(entry_path.to_string_lossy())
2017-10-12 08:01:51 +02:00
} else {
entry_path.file_name().map(|f| f.to_string_lossy())
2017-10-12 08:01:51 +02:00
};
2017-10-10 08:01:17 +02:00
if let Some(search_str) = search_str_o {
2017-10-12 08:01:51 +02:00
pattern.find(&*search_str).map(|_| {
let mut path_rel_buf = match fshelper::path_relative_from(entry_path, &*base) {
Some(p) => p,
None => error("Error: could not get relative path for directory entry."),
};
if path_rel_buf == PathBuf::new() {
path_rel_buf.push(".");
}
// TODO: take care of the unwrap call
2017-10-12 08:01:51 +02:00
tx_thread.send(path_rel_buf.to_owned()).unwrap()
});
2017-10-10 08:01:17 +02:00
}
ignore::WalkState::Continue
})
});
// Drop the initial sender. If we don't do this, the receiver will block even
// if all threads have finished, since there is still one sender around.
drop(tx);
// Wait for the receiver thread to print out all results.
receiver_thread.join().unwrap();
}