Add --max-results=<count> option

This new option can be used instead of piping to `head -n <count>` for improved performance: | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | |:---|---:|---:|---:|---:| | `fd --max-buffer-time=0 flow.yaml` | 153.9 ± 2.5 | 151.3 | 170.3 | 4.21 ± 5.86 | | `fd --max-buffer-time=0 flow.yaml \| head -n 1` | 145.3 ± 17.4 | 111.0 | 180.2 | 3.98 ± 5.55 | | `fd --max-results=1 flow.yaml` | 36.5 ± 50.8 | 7.2 | 145.7 | 1.00 | Note: there is a large standard deviation on the last result due to the non-deterministic file system traversal. With `--max-results`, we don't have to traverse the whole filesystem tree, so it's all about luck. closes #472 closes #476
2020-04-02 17:52:44 +02:00 · 2020-04-02 17:52:44 +02:00 · d43827fe57
parent ee673c92d3
commit d43827fe57
8 changed files with 84 additions and 12 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,6 +2,10 @@

 ## Features

+- Added `--max-results=<count>` option to limit the number of search results, see #472 and #476
+  This can be useful to speed up searches in cases where you know that there are only N results.
+  Using this option is also (slightly) faster than piping to `head -n <count>` where `fd` can only
+  exit when it finds the search results `<count> + 1`.
 - Support additional ANSI font styles in `LS_COLORS`: faint, slow blink, rapid blink, dimmed, hidden and strikethrough.

 ## Bugfixes
--- a/doc/fd.1
+++ b/doc/fd.1
@ -80,6 +80,9 @@ is matched against the full path.
 Separate search results by the null character (instead of newlines). Useful for piping results to
 .IR xargs .
 .TP
+.B \-\-max\-results count
+Limit the number of search results to 'count' and quit immediately.
+.TP
 .B \-\-show-errors
 Enable the display of filesystem errors for situations such as insufficient
 permissions or dead symlinks.
--- a/src/app.rs
+++ b/src/app.rs
@ -251,6 +251,14 @@ pub fn build_app() -> App<'static, 'static> {
                .value_name("date|dur")
                .number_of_values(1),
        )
+        .arg(
+            arg("max-results")
+                .long("max-results")
+                .takes_value(true)
+                .value_name("count")
+                .conflicts_with_all(&["exec", "exec-batch"])
+                .hidden_short_help(true),
+        )
        .arg(
            arg("show-errors")
                .long("show-errors")
@ -457,6 +465,9 @@ fn usage() -> HashMap<&'static str, Help> {
           Examples:\n    \
               --changed-before '2018-10-27 10:00:00'\n    \
               --change-older-than 2weeks");
+    doc!(h, "max-results"
+        , "(hidden)"
+        , "Limit the number of search results to 'count' and quit immediately.");
    doc!(h, "show-errors"
        , "Enable display of filesystem errors"
        , "Enable the display of filesystem errors for situations such as insufficient permissions \
--- a/src/internal/opts.rs
+++ b/src/internal/opts.rs
@ -81,4 +81,7 @@ pub struct FdOptions {

    /// The separator used to print file paths.
    pub path_separator: Option<String>,
+
+    /// The maximum number of search results
+    pub max_results: Option<usize>,
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -279,6 +279,10 @@ fn main() {
        time_constraints,
        show_filesystem_errors: matches.is_present("show-errors"),
        path_separator,
+        max_results: matches
+            .value_of("max-results")
+            .and_then(|n| usize::from_str_radix(n, 10).ok())
+            .filter(|&n| n != 0),
    };

    match RegexBuilder::new(&pattern_regex)
--- a/src/walk.rs
+++ b/src/walk.rs
@ -199,6 +199,8 @@ fn spawn_receiver(
            let stdout = io::stdout();
            let mut stdout = stdout.lock();

+            let mut num_results = 0;
+
            for worker_result in rx {
                match worker_result {
                    WorkerResult::Entry(value) => {
@ -229,6 +231,8 @@ fn spawn_receiver(
                                output::print_entry(&mut stdout, &value, &config, &wants_to_quit);
                            }
                        }
+
+                        num_results += 1;
                    }
                    WorkerResult::Error(err) => {
                        if show_filesystem_errors {
@ -236,6 +240,12 @@ fn spawn_receiver(
                        }
                    }
                }
+
+                if let Some(max_results) = config.max_results {
+                    if num_results >= max_results {
+                        break;
+                    }
+                }
            }

            // If we have finished fast enough (faster than max_buffer_time), we haven't streamed
--- a/tests/testenv/mod.rs
+++ b/tests/testenv/mod.rs
@ -192,6 +192,29 @@ impl TestEnv {
        PathBuf::from(components.next().expect("root directory").as_os_str())
    }

+    /// Assert that calling *fd* in the specified path under the root working directory,
+    /// and with the specified arguments produces the expected output.
+    pub fn assert_success_and_get_output<P: AsRef<Path>>(
+        &self,
+        path: P,
+        args: &[&str],
+    ) -> process::Output {
+        // Setup *fd* command.
+        let mut cmd = process::Command::new(&self.fd_exe);
+        cmd.current_dir(self.temp_dir.path().join(path));
+        cmd.args(args);
+
+        // Run *fd*.
+        let output = cmd.output().expect("fd output");
+
+        // Check for exit status.
+        if !output.status.success() {
+            panic!(format_exit_error(args, &output));
+        }
+
+        output
+    }
+
    /// Assert that calling *fd* with the specified arguments produces the expected output.
    pub fn assert_output(&self, args: &[&str], expected: &str) {
        self.assert_output_subdirectory(".", args, expected)
@ -205,18 +228,7 @@ impl TestEnv {
        args: &[&str],
        expected: &str,
    ) {
-        // Setup *fd* command.
-        let mut cmd = process::Command::new(&self.fd_exe);
-        cmd.current_dir(self.temp_dir.path().join(path));
-        cmd.args(args);
-
-        // Run *fd*.
-        let output = cmd.output().expect("fd output");
-
-        // Check for exit status.
-        if !output.status.success() {
-            panic!(format_exit_error(args, &output));
-        }
+        let output = self.assert_success_and_get_output(path, args);

        // Normalize both expected and actual output.
        let expected = normalize_output(expected, true, self.normalize_line);
--- a/tests/tests.rs
+++ b/tests/tests.rs
@ -1470,3 +1470,28 @@ fn test_base_directory() {
        ),
    );
 }
+
+#[test]
+fn test_max_results() {
+    let te = TestEnv::new(DEFAULT_DIRS, DEFAULT_FILES);
+
+    // Unrestricted
+    te.assert_output(
+        &["--max-results=0", "c.foo"],
+        "one/two/C.Foo2
+         one/two/c.foo",
+    );
+
+    // Limited to two results
+    te.assert_output(
+        &["--max-results=2", "c.foo"],
+        "one/two/C.Foo2
+         one/two/c.foo",
+    );
+
+    // Limited to one result. We could find either C.Foo2 or c.foo
+    let output = te.assert_success_and_get_output(".", &["--max-results=1", "c.foo"]);
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let stdout = stdout.trim();
+    assert!(stdout == "one/two/C.Foo2" || stdout == "one/two/c.foo");
+}