walk: Add a cache for DirEntry metadata

This commit is contained in:
Tavian Barnes 2021-10-11 12:21:27 -04:00 committed by David Peter
parent 02e9850112
commit 7b5b3ec47b
6 changed files with 67 additions and 24 deletions

View File

@ -1,5 +1,10 @@
# Upcoming release
## Performance improvements
- File metadata is now cached between the different filters that require it (e.g. `--owner`,
`--size`), reducing the number of `stat` syscalls when multiple filters are used; see #863
## Features
- Don't buffer command output from `--exec` when using a single thread. See #522
@ -15,6 +20,8 @@
- Properly handle write errors to devices that are full, see #737
- Use local time zone for time functions (`--change-newer-than`, `--change-older-than`), see #631 (@jacobmischka)
- Support `--list-details` on more platforms (like BusyBox), see #783
- The filters `--owner`, `--size`, and `--changed-{within,before}` now apply to symbolic links
themselves, rather than the link target, except when `--follow` is specified; see #863
## Changes

1
Cargo.lock generated
View File

@ -177,6 +177,7 @@ dependencies = [
"lscolors",
"normpath",
"num_cpus",
"once_cell",
"regex",
"regex-syntax",
"tempdir",

View File

@ -49,6 +49,7 @@ anyhow = "1.0"
dirs-next = "2.0"
normpath = "0.3"
chrono = "0.4"
once_cell = "1.8.0"
[dependencies.clap]
version = "2.31.3"

View File

@ -37,7 +37,7 @@ impl FileTypes {
|| (self.executables_only
&& !entry
.metadata()
.map(|m| filesystem::is_executable(&m))
.map(|m| filesystem::is_executable(m))
.unwrap_or(false))
|| (self.empty_only && !filesystem::is_empty(entry))
|| !(entry_type.is_file()

View File

@ -13,6 +13,7 @@ use std::time;
use anyhow::{anyhow, Result};
use ignore::overrides::OverrideBuilder;
use ignore::{self, WalkBuilder};
use once_cell::unsync::OnceCell;
use regex::bytes::Regex;
use crate::config::Config;
@ -295,39 +296,58 @@ fn spawn_receiver(
})
}
pub enum DirEntry {
enum DirEntryInner {
Normal(ignore::DirEntry),
BrokenSymlink(PathBuf),
}
pub struct DirEntry {
inner: DirEntryInner,
metadata: OnceCell<Option<Metadata>>,
}
impl DirEntry {
fn normal(e: ignore::DirEntry) -> Self {
Self {
inner: DirEntryInner::Normal(e),
metadata: OnceCell::new(),
}
}
fn broken_symlink(path: PathBuf) -> Self {
Self {
inner: DirEntryInner::BrokenSymlink(path),
metadata: OnceCell::new(),
}
}
pub fn path(&self) -> &Path {
match self {
DirEntry::Normal(e) => e.path(),
DirEntry::BrokenSymlink(pathbuf) => pathbuf.as_path(),
match &self.inner {
DirEntryInner::Normal(e) => e.path(),
DirEntryInner::BrokenSymlink(pathbuf) => pathbuf.as_path(),
}
}
pub fn file_type(&self) -> Option<FileType> {
match self {
DirEntry::Normal(e) => e.file_type(),
DirEntry::BrokenSymlink(pathbuf) => {
pathbuf.symlink_metadata().map(|m| m.file_type()).ok()
}
match &self.inner {
DirEntryInner::Normal(e) => e.file_type(),
DirEntryInner::BrokenSymlink(_) => self.metadata().map(|m| m.file_type()),
}
}
pub fn metadata(&self) -> Option<Metadata> {
match self {
DirEntry::Normal(e) => e.metadata().ok(),
DirEntry::BrokenSymlink(_) => None,
}
pub fn metadata(&self) -> Option<&Metadata> {
self.metadata
.get_or_init(|| match &self.inner {
DirEntryInner::Normal(e) => e.metadata().ok(),
DirEntryInner::BrokenSymlink(path) => path.symlink_metadata().ok(),
})
.as_ref()
}
pub fn depth(&self) -> Option<usize> {
match self {
DirEntry::Normal(e) => Some(e.depth()),
DirEntry::BrokenSymlink(_) => None,
match &self.inner {
DirEntryInner::Normal(e) => Some(e.depth()),
DirEntryInner::BrokenSymlink(_) => None,
}
}
}
@ -355,7 +375,7 @@ fn spawn_senders(
// Skip the root directory entry.
return ignore::WalkState::Continue;
}
Ok(e) => DirEntry::Normal(e),
Ok(e) => DirEntry::normal(e),
Err(ignore::Error::WithPath {
path,
err: inner_err,
@ -367,7 +387,7 @@ fn spawn_senders(
.ok()
.map_or(false, |m| m.file_type().is_symlink()) =>
{
DirEntry::BrokenSymlink(path)
DirEntry::broken_symlink(path)
}
_ => {
return match tx_thread.send(WorkerResult::Error(ignore::Error::WithPath {
@ -436,7 +456,7 @@ fn spawn_senders(
#[cfg(unix)]
{
if let Some(ref owner_constraint) = config.owner_constraint {
if let Ok(ref metadata) = entry_path.metadata() {
if let Some(metadata) = entry.metadata() {
if !owner_constraint.matches(metadata) {
return ignore::WalkState::Continue;
}
@ -449,7 +469,7 @@ fn spawn_senders(
// Filter out unwanted sizes if it is a file and we have been given size constraints.
if !config.size_constraints.is_empty() {
if entry_path.is_file() {
if let Ok(metadata) = entry_path.metadata() {
if let Some(metadata) = entry.metadata() {
let file_size = metadata.len();
if config
.size_constraints
@ -469,7 +489,7 @@ fn spawn_senders(
// Filter out unwanted modification times
if !config.time_constraints.is_empty() {
let mut matched = false;
if let Ok(metadata) = entry_path.metadata() {
if let Some(metadata) = entry.metadata() {
if let Ok(modified) = metadata.modified() {
matched = config
.time_constraints

View File

@ -1652,9 +1652,22 @@ fn create_file_with_modified<P: AsRef<Path>>(path: P, duration_in_secs: u64) {
filetime::set_file_times(&path, ft, ft).expect("time modification failed");
}
#[cfg(test)]
fn remove_symlink<P: AsRef<Path>>(path: P) {
#[cfg(unix)]
fs::remove_file(path).expect("remove symlink");
// On Windows, symlinks remember whether they point to files or directories, so try both
#[cfg(windows)]
fs::remove_file(path.as_ref())
.or_else(|_| fs::remove_dir(path.as_ref()))
.expect("remove symlink");
}
#[test]
fn test_modified_relative() {
let te = TestEnv::new(&[], &[]);
remove_symlink(te.test_root().join("symlink"));
create_file_with_modified(te.test_root().join("foo_0_now"), 0);
create_file_with_modified(te.test_root().join("bar_1_min"), 60);
create_file_with_modified(te.test_root().join("foo_10_min"), 600);
@ -1692,8 +1705,9 @@ fn change_file_modified<P: AsRef<Path>>(path: P, iso_date: &str) {
}
#[test]
fn test_modified_asolute() {
fn test_modified_absolute() {
let te = TestEnv::new(&[], &["15mar2018", "30dec2017"]);
remove_symlink(te.test_root().join("symlink"));
change_file_modified(te.test_root().join("15mar2018"), "2018-03-15T12:00:00Z");
change_file_modified(te.test_root().join("30dec2017"), "2017-12-30T23:59:00Z");