Check the pattern before anything else, since it doesn't require metadata

This should partially address #432 by decreasing the number of stat() calls:

    $ strace -c -f ./fd-before '\.h$' /usr -j1 -S +1k >/dev/null
    % time     seconds  usecs/call     calls    errors syscall
    ------ ----------- ----------- --------- --------- ----------------
     15.71    8.831948           7   1192279     46059 stat
    $ strace -c -f ./fd-after '\.h$' /usr -j1 -S +1k >/dev/null
    % time     seconds  usecs/call     calls    errors syscall
    ------ ----------- ----------- --------- --------- ----------------
      7.92    1.972474          10    183907     46046 stat

Though it's not as few as possible:

    $ strace -c -f find /usr -iname '*.h' -size +1k >/dev/null
    % time     seconds  usecs/call     calls    errors syscall
    ------ ----------- ----------- --------- --------- ----------------
     19.01    0.946500           5    161649           newfstatat
    $ strace -c -f bfs /usr -iname '*.h' -size +1k >/dev/null
    % time     seconds  usecs/call     calls    errors syscall
    ------ ----------- ----------- --------- --------- ----------------
     13.73    0.406565           5     69005           statx

Performance is much better when metadata is required:

    $ hyperfine ./fd-{before,after}" '\.h$' /usr -j1 -S +1k"
    Benchmark #1: ./fd-before '\.h$' /usr -j1 -S +1k
      Time (mean ± σ):      4.623 s ±  0.154 s    [User: 1.465 s, System: 3.354 s]
      Range (min … max):    4.327 s …  4.815 s    10 runs

    Benchmark #2: ./fd-after '\.h$' /usr -j1 -S +1k
      Time (mean ± σ):      2.650 s ±  0.058 s    [User: 1.258 s, System: 1.592 s]
      Range (min … max):    2.568 s …  2.723 s    10 runs

    Summary
      './fd-after '\.h$' /usr -j1 -S +1k' ran
        1.74 ± 0.07 times faster than './fd-before '\.h$' /usr -j1 -S +1k'

While remaining the same when it's not:

    $ hyperfine ./fd-{before,after}" '\.h$' /usr -j1"
    Benchmark #1: ./fd-before '\.h$' /usr -j1
      Time (mean ± σ):      2.382 s ±  0.038 s    [User: 1.221 s, System: 1.286 s]
      Range (min … max):    2.325 s …  2.433 s    10 runs

    Benchmark #2: ./fd-after '\.h$' /usr -j1
      Time (mean ± σ):      2.362 s ±  0.034 s    [User: 1.193 s, System: 1.294 s]
      Range (min … max):    2.307 s …  2.422 s    10 runs

    Summary
      './fd-after '\.h$' /usr -j1' ran
        1.01 ± 0.02 times faster than './fd-before '\.h$' /usr -j1'
This commit is contained in:
Tavian Barnes 2019-04-25 21:17:42 -04:00 committed by David Peter
parent 35945c4a62
commit 5cbd8405ec
1 changed files with 36 additions and 32 deletions

View File

@ -267,6 +267,38 @@ fn spawn_senders(
return ignore::WalkState::Continue;
}
// Check the name first, since it doesn't require metadata
let entry_path = entry.path();
let search_str_o = if config.search_full_path {
match fshelper::path_absolute_form(entry_path) {
Ok(path_abs_buf) => Some(path_abs_buf.to_string_lossy().into_owned().into()),
Err(_) => {
print_error_and_exit!("Unable to retrieve absolute path.");
}
}
} else {
entry_path.file_name().map(|f| f.to_string_lossy())
};
if let Some(search_str) = search_str_o {
if !pattern.is_match(&*search_str) {
return ignore::WalkState::Continue;
}
}
// Filter out unwanted extensions.
if let Some(ref exts_regex) = config.extensions {
if let Some(path_str) = entry_path.file_name().and_then(|s| s.to_str()) {
if !exts_regex.is_match(path_str) {
return ignore::WalkState::Continue;
}
} else {
return ignore::WalkState::Continue;
}
}
// Filter out unwanted file types.
if let Some(ref file_types) = config.file_types {
@ -294,19 +326,6 @@ fn spawn_senders(
}
}
let entry_path = entry.path();
// Filter out unwanted extensions.
if let Some(ref exts_regex) = config.extensions {
if let Some(path_str) = entry_path.file_name().and_then(|s| s.to_str()) {
if !exts_regex.is_match(path_str) {
return ignore::WalkState::Continue;
}
} else {
return ignore::WalkState::Continue;
}
}
// Filter out unwanted sizes if it is a file and we have been given size constraints.
if !config.size_constraints.is_empty() {
if entry_path.is_file() {
@ -345,25 +364,10 @@ fn spawn_senders(
}
}
let search_str_o = if config.search_full_path {
match fshelper::path_absolute_form(entry_path) {
Ok(path_abs_buf) => Some(path_abs_buf.to_string_lossy().into_owned().into()),
Err(_) => {
print_error_and_exit!("Unable to retrieve absolute path.");
}
}
} else {
entry_path.file_name().map(|f| f.to_string_lossy())
};
if let Some(search_str) = search_str_o {
if pattern.is_match(&*search_str) {
// TODO: take care of the unwrap call
tx_thread
.send(WorkerResult::Entry(entry_path.to_owned()))
.unwrap()
}
}
// TODO: take care of the unwrap call
tx_thread
.send(WorkerResult::Entry(entry_path.to_owned()))
.unwrap();
ignore::WalkState::Continue
})