diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index e3182b3..228dbb4 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -209,7 +209,7 @@ jobs: DPKG_BASENAME=${{ needs.crate_metadata.outputs.name }} DPKG_CONFLICTS=${{ needs.crate_metadata.outputs.name }}-musl - case ${{ matrix.job.target }} in *-musl) DPKG_BASENAME=${{ needs.crate_metadata.outputs.name }}-musl ; DPKG_CONFLICTS=${{ needs.crate_metadata.outputs.name }} ;; esac; + case ${{ matrix.job.target }} in *-musl*) DPKG_BASENAME=${{ needs.crate_metadata.outputs.name }}-musl ; DPKG_CONFLICTS=${{ needs.crate_metadata.outputs.name }} ;; esac; DPKG_VERSION=${{ needs.crate_metadata.outputs.version }} unset DPKG_ARCH diff --git a/CHANGELOG.md b/CHANGELOG.md index 32667aa..e852038 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,21 +2,55 @@ ## Features + +## Bugfixes + +- Respect NO_COLOR environment variable with `--list-details` option. (#1455) + + +## Changes + + +## Other + + + + +# v9.0.0 + +## Performance + +- Performance has been *significantly improved*, both due to optimizations in the underlying `ignore` + crate (#1429), and in `fd` itself (#1422, #1408, #1362) - @tavianator. + [Benchmarks results](https://gist.github.com/tavianator/32edbe052f33ef60570cf5456b59de81) show gains + of 6-8x for full traversals of smaller directories (100k files) and up to 13x for larger directories (1M files). + +- The default number of threads is now constrained to be at most 64. This should improve startup time on + systems with many CPU cores. (#1203, #1410, #1412, #1431) - @tmccombs and @tavianator + +- New flushing behavior when writing output to stdout, providing better performance for TTY and non-TTY + use cases, see #1452 and #1313 (@tavianator). + +## Features + +- Support character and block device file types, see #1213 and #1336 (@cgzones) - Breaking: `.git/` is now ignored by default when using `--hidden` / `-H`, use `--no-ignore` / `-I` or `--no-ignore-vcs` to override, see #1387 and #1396 (@skoriop) - ## Bugfixes - Fix `NO_COLOR` support, see #1421 (@acuteenvy) -## Changes - -- The default number of threads is now constrained to be at most 16. This should improve startup time on - systems with many CPU cores. (#1203) - ## Other +- Fixed documentation typos, see #1409 (@marcospb19) + +## Thanks + +Special thanks to @tavianator for his incredible work on performance in the `ignore` crate and `fd` itself. + + + # v8.7.1 ## Bugfixes diff --git a/Cargo.lock b/Cargo.lock index 28843dc..cb159cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -154,9 +154,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.7" +version = "4.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac495e00dcec98c83465d5ad66c5c4fabd652fd6686e7c6269b117e729a6f17b" +checksum = "41fffed7514f420abec6d183b1d3acfd9099c79c3a10a06ade4f8203f1411272" dependencies = [ "clap_builder", "clap_derive", @@ -164,9 +164,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.4.7" +version = "4.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c77ed9a32a62e6ca27175d00d29d05ca32e396ea1eb5fb01d8256b669cec7663" +checksum = "63361bae7eef3771745f02d8d892bec2fee5f6e34af316ba556e7f97a7069ff1" dependencies = [ "anstream", "anstyle", @@ -313,7 +313,7 @@ checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" [[package]] name = "fd-find" -version = "8.7.1" +version = "9.0.0" dependencies = [ "aho-corasick", "anyhow", @@ -465,9 +465,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.149" +version = "0.2.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" +checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" [[package]] name = "linux-raw-sys" @@ -483,9 +483,9 @@ checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] name = "lscolors" -version = "0.15.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf7015a04103ad78abb77e4b79ed151e767922d1cfde5f62640471c629a2320d" +checksum = "ab0b209ec3976527806024406fe765474b9a1750a0ed4b8f0372364741f50e7b" dependencies = [ "nu-ansi-term", ] @@ -749,9 +749,9 @@ dependencies = [ [[package]] name = "test-case" -version = "3.2.1" +version = "3.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8f1e820b7f1d95a0cdbf97a5df9de10e1be731983ab943e56703ac1b8e9d425" +checksum = "eb2550dd13afcd286853192af8601920d959b14c401fcece38071d53bf0768a8" dependencies = [ "test-case-macros", ] diff --git a/Cargo.toml b/Cargo.toml index 4f2f464..6ca9f90 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ license = "MIT OR Apache-2.0" name = "fd-find" readme = "README.md" repository = "https://github.com/sharkdp/fd" -version = "8.7.1" +version = "9.0.0" edition= "2021" rust-version = "1.70.0" @@ -51,7 +51,7 @@ clap_complete = {version = "4.4.4", optional = true} faccess = "0.2.4" [dependencies.clap] -version = "4.4.7" +version = "4.4.10" features = ["suggestions", "color", "wrap_help", "cargo", "derive"] [dependencies.chrono] @@ -60,7 +60,7 @@ default-features = false features = ["std", "clock"] [dependencies.lscolors] -version = "0.15" +version = "0.16" default-features = false features = ["nu-ansi-term"] @@ -80,7 +80,7 @@ jemallocator = {version = "0.5.4", optional = true} diff = "0.1" tempfile = "3.8" filetime = "0.2" -test-case = "3.1" +test-case = "3.3" [profile.release] lto = true diff --git a/README.md b/README.md index 9e71d63..18cbdb0 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,7 @@ target/debug/deps/libnum_cpus-f5ce7ef99006aa05.rlib ``` To really search *all* files and directories, simply combine the hidden and ignore features to show -everything (`-HI`). +everything (`-HI`) or use `-u`/`--unrestricted`. ### Matching the full path By default, *fd* only matches the filename of each file. However, using the `--full-path` or `-p` option, @@ -261,7 +261,9 @@ To make exclude-patterns like these permanent, you can create a `.fdignore` file /mnt/external-drive *.bak ``` -Note: `fd` also supports `.ignore` files that are used by other programs such as `rg` or `ag`. + +> [!NOTE] +> `fd` also supports `.ignore` files that are used by other programs such as `rg` or `ag`. If you want `fd` to ignore these patterns globally, you can put them in `fd`'s global ignore file. This is usually located in `~/.config/fd/ignore` in macOS or Linux, and `%APPDATA%\fd\ignore` in @@ -284,7 +286,8 @@ option: If you also want to remove a certain class of directories, you can use the same technique. You will have to use `rm`s `--recursive`/`-r` flag to remove directories. -Note: there are scenarios where using `fd … -X rm -r` can cause race conditions: if you have a +> [!NOTE] +> There are scenarios where using `fd … -X rm -r` can cause race conditions: if you have a path like `…/foo/bar/foo/…` and want to remove all directories named `foo`, you can end up in a situation where the outer `foo` directory is removed first, leading to (harmless) *"'foo/bar/foo': No such file or directory"* errors in the `rm` call. @@ -331,64 +334,57 @@ Options: ## Benchmark -Let's search my home folder for files that end in `[0-9].jpg`. It contains ~190.000 -subdirectories and about a million files. For averaging and statistical analysis, I'm using +Let's search my home folder for files that end in `[0-9].jpg`. It contains ~750.000 +subdirectories and about a 4 million files. For averaging and statistical analysis, I'm using [hyperfine](https://github.com/sharkdp/hyperfine). The following benchmarks are performed with a "warm"/pre-filled disk-cache (results for a "cold" disk-cache show the same trends). Let's start with `find`: ``` -Benchmark #1: find ~ -iregex '.*[0-9]\.jpg$' - - Time (mean ± σ): 7.236 s ± 0.090 s - - Range (min … max): 7.133 s … 7.385 s +Benchmark 1: find ~ -iregex '.*[0-9]\.jpg$' + Time (mean ± σ): 19.922 s ± 0.109 s + Range (min … max): 19.765 s … 20.065 s ``` `find` is much faster if it does not need to perform a regular-expression search: ``` -Benchmark #2: find ~ -iname '*[0-9].jpg' - - Time (mean ± σ): 3.914 s ± 0.027 s - - Range (min … max): 3.876 s … 3.964 s +Benchmark 2: find ~ -iname '*[0-9].jpg' + Time (mean ± σ): 11.226 s ± 0.104 s + Range (min … max): 11.119 s … 11.466 s ``` -Now let's try the same for `fd`. Note that `fd` *always* performs a regular expression -search. The options `--hidden` and `--no-ignore` are needed for a fair comparison, -otherwise `fd` does not have to traverse hidden folders and ignored paths (see below): +Now let's try the same for `fd`. Note that `fd` performs a regular expression +search by default. The options `-u`/`--unrestricted` option is needed here for +a fair comparison. Otherwise `fd` does not have to traverse hidden folders and +ignored paths (see below): ``` -Benchmark #3: fd -HI '.*[0-9]\.jpg$' ~ - - Time (mean ± σ): 811.6 ms ± 26.9 ms - - Range (min … max): 786.0 ms … 870.7 ms +Benchmark 3: fd -u '[0-9]\.jpg$' ~ + Time (mean ± σ): 854.8 ms ± 10.0 ms + Range (min … max): 839.2 ms … 868.9 ms ``` -For this particular example, `fd` is approximately nine times faster than `find -iregex` -and about five times faster than `find -iname`. By the way, both tools found the exact -same 20880 files :smile:. +For this particular example, `fd` is approximately **23 times faster** than `find -iregex` +and about **13 times faster** than `find -iname`. By the way, both tools found the exact +same 546 files :smile:. -Finally, let's run `fd` without `--hidden` and `--no-ignore` (this can lead to different -search results, of course). If *fd* does not have to traverse the hidden and git-ignored -folders, it is almost an order of magnitude faster: -``` -Benchmark #4: fd '[0-9]\.jpg$' ~ - - Time (mean ± σ): 123.7 ms ± 6.0 ms - - Range (min … max): 118.8 ms … 140.0 ms -``` - -**Note**: This is *one particular* benchmark on *one particular* machine. While I have -performed quite a lot of different tests (and found consistent results), things might -be different for you! I encourage everyone to try it out on their own. See +**Note**: This is *one particular* benchmark on *one particular* machine. While we have +performed a lot of different tests (and found consistent results), things might +be different for you! We encourage everyone to try it out on their own. See [this repository](https://github.com/sharkdp/fd-benchmarks) for all necessary scripts. -Concerning *fd*'s speed, the main credit goes to the `regex` and `ignore` crates that are also used -in [ripgrep](https://github.com/BurntSushi/ripgrep) (check it out!). +Concerning *fd*'s speed, a lot of credit goes to the `regex` and `ignore` crates that are +also used in [ripgrep](https://github.com/BurntSushi/ripgrep) (check it out!). ## Troubleshooting +### `fd` does not find my file! + +Remember that `fd` ignores hidden directories and files by default. It also ignores patterns +from `.gitignore` files. If you want to make sure to find absolutely every possible file, always +use the options `-u`/`--unrestricted` option (or `-HI` to enable hidden and ignored files): +``` bash +> fd -u … +``` + ### Colorized output `fd` can colorize files by extension, just like `ls`. In order for this to work, the environment @@ -402,15 +398,6 @@ for alternative, more complete (or more colorful) variants, see [here](https://g `fd` also honors the [`NO_COLOR`](https://no-color.org/) environment variable. -### `fd` does not find my file! - -Remember that `fd` ignores hidden directories and files by default. It also ignores patterns -from `.gitignore` files. If you want to make sure to find absolutely every possible file, always -use the options `-H` and `-I` to disable these two features: -``` bash -> fd -HI … -``` - ### `fd` doesn't seem to interpret my regex pattern correctly A lot of special regex characters (like `[]`, `^`, `$`, ..) are also special characters in your @@ -543,7 +530,7 @@ Make sure that `$HOME/.local/bin` is in your `$PATH`. If you use an older version of Ubuntu, you can download the latest `.deb` package from the [release page](https://github.com/sharkdp/fd/releases) and install it via: ``` bash -sudo dpkg -i fd_8.7.1_amd64.deb # adapt version number and architecture +sudo dpkg -i fd_9.0.0_amd64.deb # adapt version number and architecture ``` ### On Debian @@ -677,7 +664,7 @@ With Rust's package manager [cargo](https://github.com/rust-lang/cargo), you can ``` cargo install fd-find ``` -Note that rust version *1.64.0* or later is required. +Note that rust version *1.70.0* or later is required. `make` is also needed for the build. @@ -708,8 +695,6 @@ cargo install --path . ## License -Copyright (c) 2017-2021 The fd developers - `fd` is distributed under the terms of both the MIT License and the Apache License 2.0. See the [LICENSE-APACHE](LICENSE-APACHE) and [LICENSE-MIT](LICENSE-MIT) files for license details. diff --git a/doc/release-checklist.md b/doc/release-checklist.md index b11e75d..0a20802 100644 --- a/doc/release-checklist.md +++ b/doc/release-checklist.md @@ -9,7 +9,7 @@ necessary changes for the upcoming release. - [ ] Update version in `Cargo.toml`. Run `cargo build` to update `Cargo.lock`. Make sure to `git add` the `Cargo.lock` changes as well. - [ ] Find the current min. supported Rust version by running - `grep '^\s*MIN_SUPPORTED_RUST_VERSION' .github/workflows/CICD.yml`. + `grep rust-version Cargo.toml`. - [ ] Update the `fd` version and the min. supported Rust version in `README.md`. - [ ] Update `CHANGELOG.md`. Change the heading of the *"Upcoming release"* section to the version of this release. diff --git a/src/cli.rs b/src/cli.rs index c17f0b8..64ea111 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -715,24 +715,14 @@ impl Opts { fn default_num_threads() -> NonZeroUsize { // If we can't get the amount of parallelism for some reason, then // default to a single thread, because that is safe. - // Note that the minimum value for a NonZeroUsize is 1. - // Unfortunately, we can't do `NonZeroUsize::new(1).unwrap()` - // in a const context. - const FALLBACK_PARALLELISM: NonZeroUsize = NonZeroUsize::MIN; - // As the number of threads increases, the startup time suffers from - // initializing the threads, and we get diminishing returns from additional - // parallelism. So set a maximum number of threads to use by default. - // - // This value is based on some empirical observations, but the ideal value - // probably depends on the exact hardware in use. - // - // Safety: The literal "20" is known not to be zero. - const MAX_DEFAULT_THREADS: NonZeroUsize = unsafe { NonZeroUsize::new_unchecked(20) }; + let fallback = NonZeroUsize::MIN; + // To limit startup overhead on massively parallel machines, don't use more + // than 64 threads. + let limit = NonZeroUsize::new(64).unwrap(); - std::cmp::min( - std::thread::available_parallelism().unwrap_or(FALLBACK_PARALLELISM), - MAX_DEFAULT_THREADS, - ) + std::thread::available_parallelism() + .unwrap_or(fallback) + .min(limit) } #[derive(Copy, Clone, PartialEq, Eq, ValueEnum)] @@ -768,17 +758,6 @@ pub enum ColorWhen { Never, } -impl ColorWhen { - pub fn as_str(&self) -> &'static str { - use ColorWhen::*; - match *self { - Auto => "auto", - Never => "never", - Always => "always", - } - } -} - // there isn't a derive api for getting grouped values yet, // so we have to use hand-rolled parsing for exec and exec-batch pub struct Exec { diff --git a/src/dir_entry.rs b/src/dir_entry.rs index 3a19d59..f44f2be 100644 --- a/src/dir_entry.rs +++ b/src/dir_entry.rs @@ -8,11 +8,13 @@ use lscolors::{Colorable, LsColors, Style}; use crate::config::Config; use crate::filesystem::strip_current_dir; +#[derive(Debug)] enum DirEntryInner { Normal(ignore::DirEntry), BrokenSymlink(PathBuf), } +#[derive(Debug)] pub struct DirEntry { inner: DirEntryInner, metadata: OnceCell>, diff --git a/src/exec/job.rs b/src/exec/job.rs index af603cc..4864d6d 100644 --- a/src/exec/job.rs +++ b/src/exec/job.rs @@ -1,9 +1,6 @@ use std::sync::Mutex; -use crossbeam_channel::Receiver; - use crate::config::Config; -use crate::dir_entry::DirEntry; use crate::error::print_error; use crate::exit_codes::{merge_exitcodes, ExitCode}; use crate::walk::WorkerResult; @@ -14,7 +11,7 @@ use super::CommandSet; /// generate a command with the supplied command template. The generated command will then /// be executed, and this process will continue until the receiver's sender has closed. pub fn job( - rx: Receiver, + results: impl IntoIterator, cmd: &CommandSet, out_perm: &Mutex<()>, config: &Config, @@ -22,35 +19,39 @@ pub fn job( // Output should be buffered when only running a single thread let buffer_output: bool = config.threads > 1; - let mut results: Vec = Vec::new(); - loop { + let mut ret = ExitCode::Success; + for result in results { // Obtain the next result from the receiver, else if the channel // has closed, exit from the loop - let dir_entry: DirEntry = match rx.recv() { - Ok(WorkerResult::Entry(dir_entry)) => dir_entry, - Ok(WorkerResult::Error(err)) => { + let dir_entry = match result { + WorkerResult::Entry(dir_entry) => dir_entry, + WorkerResult::Error(err) => { if config.show_filesystem_errors { print_error(err.to_string()); } continue; } - Err(_) => break, }; // Generate a command, execute it and store its exit code. - results.push(cmd.execute( + let code = cmd.execute( dir_entry.stripped_path(config), config.path_separator.as_deref(), out_perm, buffer_output, - )) + ); + ret = merge_exitcodes([ret, code]); } // Returns error in case of any error. - merge_exitcodes(results) + ret } -pub fn batch(rx: Receiver, cmd: &CommandSet, config: &Config) -> ExitCode { - let paths = rx +pub fn batch( + results: impl IntoIterator, + cmd: &CommandSet, + config: &Config, +) -> ExitCode { + let paths = results .into_iter() .filter_map(|worker_result| match worker_result { WorkerResult::Entry(dir_entry) => Some(dir_entry.into_stripped_path(config)), diff --git a/src/main.rs b/src/main.rs index 5440601..bef4120 100644 --- a/src/main.rs +++ b/src/main.rs @@ -325,18 +325,22 @@ fn extract_command(opts: &mut Opts, colored_output: bool) -> Result Result> { +fn determine_ls_command(colored_output: bool) -> Result> { #[allow(unused)] let gnu_ls = |command_name| { + let color_arg = if colored_output { + "--color=always" + } else { + "--color=never" + }; // Note: we use short options here (instead of --long-options) to support more // platforms (like BusyBox). vec![ diff --git a/src/walk.rs b/src/walk.rs index 691c5d0..c81d2a4 100644 --- a/src/walk.rs +++ b/src/walk.rs @@ -4,12 +4,12 @@ use std::io::{self, Write}; use std::mem; use std::path::PathBuf; use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{Arc, Mutex}; +use std::sync::{Arc, Mutex, MutexGuard}; use std::thread; use std::time::{Duration, Instant}; use anyhow::{anyhow, Result}; -use crossbeam_channel::{bounded, Receiver, RecvTimeoutError, Sender}; +use crossbeam_channel::{bounded, Receiver, RecvTimeoutError, SendError, Sender}; use etcetera::BaseStrategy; use ignore::overrides::{Override, OverrideBuilder}; use ignore::{self, WalkBuilder, WalkParallel, WalkState}; @@ -36,6 +36,7 @@ enum ReceiverMode { /// The Worker threads can result in a valid entry having PathBuf or an error. #[allow(clippy::large_enum_variant)] +#[derive(Debug)] pub enum WorkerResult { // Errors should be rare, so it's probably better to allow large_enum_variant than // to box the Entry variant @@ -43,6 +44,83 @@ pub enum WorkerResult { Error(ignore::Error), } +/// A batch of WorkerResults to send over a channel. +#[derive(Clone)] +struct Batch { + items: Arc>>>, +} + +impl Batch { + fn new() -> Self { + Self { + items: Arc::new(Mutex::new(Some(vec![]))), + } + } + + fn lock(&self) -> MutexGuard<'_, Option>> { + self.items.lock().unwrap() + } +} + +impl IntoIterator for Batch { + type Item = WorkerResult; + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.lock().take().unwrap().into_iter() + } +} + +/// Wrapper that sends batches of items at once over a channel. +struct BatchSender { + batch: Batch, + tx: Sender, + limit: usize, +} + +impl BatchSender { + fn new(tx: Sender, limit: usize) -> Self { + Self { + batch: Batch::new(), + tx, + limit, + } + } + + /// Check if we need to flush a batch. + fn needs_flush(&self, batch: Option<&Vec>) -> bool { + match batch { + // Limit the batch size to provide some backpressure + Some(vec) => vec.len() >= self.limit, + // Batch was already taken by the receiver, so make a new one + None => true, + } + } + + /// Add an item to a batch. + fn send(&mut self, item: WorkerResult) -> Result<(), SendError<()>> { + let mut batch = self.batch.lock(); + + if self.needs_flush(batch.as_ref()) { + drop(batch); + self.batch = Batch::new(); + batch = self.batch.lock(); + } + + let items = batch.as_mut().unwrap(); + items.push(item); + + if items.len() == 1 { + // New batch, send it over the channel + self.tx + .send(self.batch.clone()) + .map_err(|_| SendError(()))?; + } + + Ok(()) + } +} + /// Maximum size of the output buffer before flushing results to the console const MAX_BUFFER_LENGTH: usize = 1000; /// Default duration until output buffering switches to streaming. @@ -57,7 +135,7 @@ struct ReceiverBuffer<'a, W> { /// The ^C notifier. interrupt_flag: &'a AtomicBool, /// Receiver for worker results. - rx: Receiver, + rx: Receiver, /// Standard output. stdout: W, /// The current buffer mode. @@ -72,7 +150,7 @@ struct ReceiverBuffer<'a, W> { impl<'a, W: Write> ReceiverBuffer<'a, W> { /// Create a new receiver buffer. - fn new(state: &'a WorkerState, rx: Receiver, stdout: W) -> Self { + fn new(state: &'a WorkerState, rx: Receiver, stdout: W) -> Self { let config = &state.config; let quit_flag = state.quit_flag.as_ref(); let interrupt_flag = state.interrupt_flag.as_ref(); @@ -103,7 +181,7 @@ impl<'a, W: Write> ReceiverBuffer<'a, W> { } /// Receive the next worker result. - fn recv(&self) -> Result { + fn recv(&self) -> Result { match self.mode { ReceiverMode::Buffering => { // Wait at most until we should switch to streaming @@ -119,34 +197,44 @@ impl<'a, W: Write> ReceiverBuffer<'a, W> { /// Wait for a result or state change. fn poll(&mut self) -> Result<(), ExitCode> { match self.recv() { - Ok(WorkerResult::Entry(dir_entry)) => { - if self.config.quiet { - return Err(ExitCode::HasResults(true)); - } + Ok(batch) => { + for result in batch { + match result { + WorkerResult::Entry(dir_entry) => { + if self.config.quiet { + return Err(ExitCode::HasResults(true)); + } - match self.mode { - ReceiverMode::Buffering => { - self.buffer.push(dir_entry); - if self.buffer.len() > MAX_BUFFER_LENGTH { - self.stream()?; + match self.mode { + ReceiverMode::Buffering => { + self.buffer.push(dir_entry); + if self.buffer.len() > MAX_BUFFER_LENGTH { + self.stream()?; + } + } + ReceiverMode::Streaming => { + self.print(&dir_entry)?; + } + } + + self.num_results += 1; + if let Some(max_results) = self.config.max_results { + if self.num_results >= max_results { + return self.stop(); + } + } + } + WorkerResult::Error(err) => { + if self.config.show_filesystem_errors { + print_error(err.to_string()); + } } } - ReceiverMode::Streaming => { - self.print(&dir_entry)?; - self.flush()?; - } } - self.num_results += 1; - if let Some(max_results) = self.config.max_results { - if self.num_results >= max_results { - return self.stop(); - } - } - } - Ok(WorkerResult::Error(err)) => { - if self.config.show_filesystem_errors { - print_error(err.to_string()); + // If we don't have another batch ready, flush before waiting + if self.mode == ReceiverMode::Streaming && self.rx.is_empty() { + self.flush()?; } } Err(RecvTimeoutError::Timeout) => { @@ -201,7 +289,7 @@ impl<'a, W: Write> ReceiverBuffer<'a, W> { /// Flush stdout if necessary. fn flush(&mut self) -> Result<(), ExitCode> { - if self.config.interactive_terminal && self.stdout.flush().is_err() { + if self.stdout.flush().is_err() { // Probably a broken pipe. Exit gracefully. return Err(ExitCode::GeneralError); } @@ -319,13 +407,13 @@ impl WorkerState { /// Run the receiver work, either on this thread or a pool of background /// threads (for --exec). - fn receive(&self, rx: Receiver) -> ExitCode { + fn receive(&self, rx: Receiver) -> ExitCode { let config = &self.config; // This will be set to `Some` if the `--exec` argument was supplied. if let Some(ref cmd) = config.command { if cmd.in_batch_mode() { - exec::batch(rx, cmd, &config) + exec::batch(rx.into_iter().flatten(), cmd, &config) } else { let out_perm = Mutex::new(()); @@ -337,7 +425,8 @@ impl WorkerState { let rx = rx.clone(); // Spawn a job thread that will listen for and execute inputs. - let handle = scope.spawn(|| exec::job(rx, cmd, &out_perm, &config)); + let handle = scope + .spawn(|| exec::job(rx.into_iter().flatten(), cmd, &out_perm, &config)); // Push the handle of the spawned thread into the vector for later joining. handles.push(handle); @@ -355,12 +444,20 @@ impl WorkerState { } /// Spawn the sender threads. - fn spawn_senders(&self, walker: WalkParallel, tx: Sender) { + fn spawn_senders(&self, walker: WalkParallel, tx: Sender) { walker.run(|| { let patterns = &self.patterns; let config = &self.config; let quit_flag = self.quit_flag.as_ref(); - let tx = tx.clone(); + + let mut limit = 0x100; + if let Some(cmd) = &config.command { + if !cmd.in_batch_mode() && config.threads > 1 { + // Evenly distribute work between multiple receivers + limit = 1; + } + } + let mut tx = BatchSender::new(tx.clone(), limit); Box::new(move |entry| { if quit_flag.load(Ordering::Relaxed) { @@ -545,8 +642,7 @@ impl WorkerState { .unwrap(); } - // Channel capacity was chosen empircally to perform similarly to an unbounded channel - let (tx, rx) = bounded(0x4000 * config.threads); + let (tx, rx) = bounded(2 * config.threads); let exit_code = thread::scope(|scope| { // Spawn the receiver thread(s)