walk: Add a cache for DirEntry metadata

This commit is contained in:
Tavian Barnes 2021-10-11 12:21:27 -04:00 committed by David Peter
parent 02e9850112
commit 7b5b3ec47b
6 changed files with 67 additions and 24 deletions

View file

@ -1,5 +1,10 @@
# Upcoming release # Upcoming release
## Performance improvements
- File metadata is now cached between the different filters that require it (e.g. `--owner`,
`--size`), reducing the number of `stat` syscalls when multiple filters are used; see #863
## Features ## Features
- Don't buffer command output from `--exec` when using a single thread. See #522 - Don't buffer command output from `--exec` when using a single thread. See #522
@ -15,6 +20,8 @@
- Properly handle write errors to devices that are full, see #737 - Properly handle write errors to devices that are full, see #737
- Use local time zone for time functions (`--change-newer-than`, `--change-older-than`), see #631 (@jacobmischka) - Use local time zone for time functions (`--change-newer-than`, `--change-older-than`), see #631 (@jacobmischka)
- Support `--list-details` on more platforms (like BusyBox), see #783 - Support `--list-details` on more platforms (like BusyBox), see #783
- The filters `--owner`, `--size`, and `--changed-{within,before}` now apply to symbolic links
themselves, rather than the link target, except when `--follow` is specified; see #863
## Changes ## Changes

1
Cargo.lock generated
View file

@ -177,6 +177,7 @@ dependencies = [
"lscolors", "lscolors",
"normpath", "normpath",
"num_cpus", "num_cpus",
"once_cell",
"regex", "regex",
"regex-syntax", "regex-syntax",
"tempdir", "tempdir",

View file

@ -49,6 +49,7 @@ anyhow = "1.0"
dirs-next = "2.0" dirs-next = "2.0"
normpath = "0.3" normpath = "0.3"
chrono = "0.4" chrono = "0.4"
once_cell = "1.8.0"
[dependencies.clap] [dependencies.clap]
version = "2.31.3" version = "2.31.3"

View file

@ -37,7 +37,7 @@ impl FileTypes {
|| (self.executables_only || (self.executables_only
&& !entry && !entry
.metadata() .metadata()
.map(|m| filesystem::is_executable(&m)) .map(|m| filesystem::is_executable(m))
.unwrap_or(false)) .unwrap_or(false))
|| (self.empty_only && !filesystem::is_empty(entry)) || (self.empty_only && !filesystem::is_empty(entry))
|| !(entry_type.is_file() || !(entry_type.is_file()

View file

@ -13,6 +13,7 @@ use std::time;
use anyhow::{anyhow, Result}; use anyhow::{anyhow, Result};
use ignore::overrides::OverrideBuilder; use ignore::overrides::OverrideBuilder;
use ignore::{self, WalkBuilder}; use ignore::{self, WalkBuilder};
use once_cell::unsync::OnceCell;
use regex::bytes::Regex; use regex::bytes::Regex;
use crate::config::Config; use crate::config::Config;
@ -295,39 +296,58 @@ fn spawn_receiver(
}) })
} }
pub enum DirEntry { enum DirEntryInner {
Normal(ignore::DirEntry), Normal(ignore::DirEntry),
BrokenSymlink(PathBuf), BrokenSymlink(PathBuf),
} }
pub struct DirEntry {
inner: DirEntryInner,
metadata: OnceCell<Option<Metadata>>,
}
impl DirEntry { impl DirEntry {
fn normal(e: ignore::DirEntry) -> Self {
Self {
inner: DirEntryInner::Normal(e),
metadata: OnceCell::new(),
}
}
fn broken_symlink(path: PathBuf) -> Self {
Self {
inner: DirEntryInner::BrokenSymlink(path),
metadata: OnceCell::new(),
}
}
pub fn path(&self) -> &Path { pub fn path(&self) -> &Path {
match self { match &self.inner {
DirEntry::Normal(e) => e.path(), DirEntryInner::Normal(e) => e.path(),
DirEntry::BrokenSymlink(pathbuf) => pathbuf.as_path(), DirEntryInner::BrokenSymlink(pathbuf) => pathbuf.as_path(),
} }
} }
pub fn file_type(&self) -> Option<FileType> { pub fn file_type(&self) -> Option<FileType> {
match self { match &self.inner {
DirEntry::Normal(e) => e.file_type(), DirEntryInner::Normal(e) => e.file_type(),
DirEntry::BrokenSymlink(pathbuf) => { DirEntryInner::BrokenSymlink(_) => self.metadata().map(|m| m.file_type()),
pathbuf.symlink_metadata().map(|m| m.file_type()).ok()
}
} }
} }
pub fn metadata(&self) -> Option<Metadata> { pub fn metadata(&self) -> Option<&Metadata> {
match self { self.metadata
DirEntry::Normal(e) => e.metadata().ok(), .get_or_init(|| match &self.inner {
DirEntry::BrokenSymlink(_) => None, DirEntryInner::Normal(e) => e.metadata().ok(),
} DirEntryInner::BrokenSymlink(path) => path.symlink_metadata().ok(),
})
.as_ref()
} }
pub fn depth(&self) -> Option<usize> { pub fn depth(&self) -> Option<usize> {
match self { match &self.inner {
DirEntry::Normal(e) => Some(e.depth()), DirEntryInner::Normal(e) => Some(e.depth()),
DirEntry::BrokenSymlink(_) => None, DirEntryInner::BrokenSymlink(_) => None,
} }
} }
} }
@ -355,7 +375,7 @@ fn spawn_senders(
// Skip the root directory entry. // Skip the root directory entry.
return ignore::WalkState::Continue; return ignore::WalkState::Continue;
} }
Ok(e) => DirEntry::Normal(e), Ok(e) => DirEntry::normal(e),
Err(ignore::Error::WithPath { Err(ignore::Error::WithPath {
path, path,
err: inner_err, err: inner_err,
@ -367,7 +387,7 @@ fn spawn_senders(
.ok() .ok()
.map_or(false, |m| m.file_type().is_symlink()) => .map_or(false, |m| m.file_type().is_symlink()) =>
{ {
DirEntry::BrokenSymlink(path) DirEntry::broken_symlink(path)
} }
_ => { _ => {
return match tx_thread.send(WorkerResult::Error(ignore::Error::WithPath { return match tx_thread.send(WorkerResult::Error(ignore::Error::WithPath {
@ -436,7 +456,7 @@ fn spawn_senders(
#[cfg(unix)] #[cfg(unix)]
{ {
if let Some(ref owner_constraint) = config.owner_constraint { if let Some(ref owner_constraint) = config.owner_constraint {
if let Ok(ref metadata) = entry_path.metadata() { if let Some(metadata) = entry.metadata() {
if !owner_constraint.matches(metadata) { if !owner_constraint.matches(metadata) {
return ignore::WalkState::Continue; return ignore::WalkState::Continue;
} }
@ -449,7 +469,7 @@ fn spawn_senders(
// Filter out unwanted sizes if it is a file and we have been given size constraints. // Filter out unwanted sizes if it is a file and we have been given size constraints.
if !config.size_constraints.is_empty() { if !config.size_constraints.is_empty() {
if entry_path.is_file() { if entry_path.is_file() {
if let Ok(metadata) = entry_path.metadata() { if let Some(metadata) = entry.metadata() {
let file_size = metadata.len(); let file_size = metadata.len();
if config if config
.size_constraints .size_constraints
@ -469,7 +489,7 @@ fn spawn_senders(
// Filter out unwanted modification times // Filter out unwanted modification times
if !config.time_constraints.is_empty() { if !config.time_constraints.is_empty() {
let mut matched = false; let mut matched = false;
if let Ok(metadata) = entry_path.metadata() { if let Some(metadata) = entry.metadata() {
if let Ok(modified) = metadata.modified() { if let Ok(modified) = metadata.modified() {
matched = config matched = config
.time_constraints .time_constraints

View file

@ -1652,9 +1652,22 @@ fn create_file_with_modified<P: AsRef<Path>>(path: P, duration_in_secs: u64) {
filetime::set_file_times(&path, ft, ft).expect("time modification failed"); filetime::set_file_times(&path, ft, ft).expect("time modification failed");
} }
#[cfg(test)]
fn remove_symlink<P: AsRef<Path>>(path: P) {
#[cfg(unix)]
fs::remove_file(path).expect("remove symlink");
// On Windows, symlinks remember whether they point to files or directories, so try both
#[cfg(windows)]
fs::remove_file(path.as_ref())
.or_else(|_| fs::remove_dir(path.as_ref()))
.expect("remove symlink");
}
#[test] #[test]
fn test_modified_relative() { fn test_modified_relative() {
let te = TestEnv::new(&[], &[]); let te = TestEnv::new(&[], &[]);
remove_symlink(te.test_root().join("symlink"));
create_file_with_modified(te.test_root().join("foo_0_now"), 0); create_file_with_modified(te.test_root().join("foo_0_now"), 0);
create_file_with_modified(te.test_root().join("bar_1_min"), 60); create_file_with_modified(te.test_root().join("bar_1_min"), 60);
create_file_with_modified(te.test_root().join("foo_10_min"), 600); create_file_with_modified(te.test_root().join("foo_10_min"), 600);
@ -1692,8 +1705,9 @@ fn change_file_modified<P: AsRef<Path>>(path: P, iso_date: &str) {
} }
#[test] #[test]
fn test_modified_asolute() { fn test_modified_absolute() {
let te = TestEnv::new(&[], &["15mar2018", "30dec2017"]); let te = TestEnv::new(&[], &["15mar2018", "30dec2017"]);
remove_symlink(te.test_root().join("symlink"));
change_file_modified(te.test_root().join("15mar2018"), "2018-03-15T12:00:00Z"); change_file_modified(te.test_root().join("15mar2018"), "2018-03-15T12:00:00Z");
change_file_modified(te.test_root().join("30dec2017"), "2017-12-30T23:59:00Z"); change_file_modified(te.test_root().join("30dec2017"), "2017-12-30T23:59:00Z");