2022-01-11 12:59:39 +01:00
|
|
|
use std::path::{Path, PathBuf};
|
|
|
|
|
|
|
|
use futures::stream::{FuturesUnordered, StreamExt};
|
|
|
|
use ignore::{
|
2022-06-15 05:25:05 +02:00
|
|
|
gitignore::{Gitignore, GitignoreBuilder, Glob},
|
2022-01-11 12:59:39 +01:00
|
|
|
Match,
|
|
|
|
};
|
2023-05-09 00:13:04 +02:00
|
|
|
use radix_trie::{Trie, TrieCommon};
|
2023-06-03 10:24:39 +02:00
|
|
|
use tokio::fs::{canonicalize, read_to_string};
|
2022-01-11 12:59:39 +01:00
|
|
|
use tracing::{trace, trace_span};
|
|
|
|
|
2022-06-15 05:25:05 +02:00
|
|
|
use crate::{Error, IgnoreFile};
|
2022-01-11 12:59:39 +01:00
|
|
|
|
2023-05-09 00:13:04 +02:00
|
|
|
#[derive(Clone, Debug)]
|
|
|
|
struct Ignore {
|
|
|
|
gitignore: Gitignore,
|
|
|
|
builder: Option<GitignoreBuilder>,
|
|
|
|
}
|
|
|
|
|
2022-06-15 05:25:05 +02:00
|
|
|
/// A mutable filter dedicated to ignore files and trees of ignore files.
|
2022-01-11 12:59:39 +01:00
|
|
|
///
|
|
|
|
/// This reads and compiles ignore files, and should be used for handling ignore files. It's created
|
|
|
|
/// with a project origin and a list of ignore files, and new ignore files can be added later
|
2023-03-18 11:23:46 +01:00
|
|
|
/// (unless [`finish`](IgnoreFilter::finish()) is called).
|
2022-01-16 02:49:14 +01:00
|
|
|
#[derive(Clone, Debug)]
|
2022-06-15 05:25:05 +02:00
|
|
|
pub struct IgnoreFilter {
|
2022-01-11 12:59:39 +01:00
|
|
|
origin: PathBuf,
|
2023-05-09 00:13:04 +02:00
|
|
|
ignores: Trie<String, Ignore>,
|
2022-01-11 12:59:39 +01:00
|
|
|
}
|
|
|
|
|
2022-06-15 05:25:05 +02:00
|
|
|
impl IgnoreFilter {
|
2022-01-16 02:49:14 +01:00
|
|
|
/// Create a new empty filterer.
|
|
|
|
///
|
2023-03-18 11:23:46 +01:00
|
|
|
/// Prefer [`new()`](IgnoreFilter::new()) if you have ignore files ready to use.
|
2022-01-16 02:49:14 +01:00
|
|
|
pub fn empty(origin: impl AsRef<Path>) -> Self {
|
|
|
|
let origin = origin.as_ref();
|
2023-05-09 00:13:04 +02:00
|
|
|
|
|
|
|
let mut ignores = Trie::new();
|
|
|
|
ignores.insert(
|
|
|
|
origin.display().to_string(),
|
|
|
|
Ignore {
|
|
|
|
gitignore: Gitignore::empty(),
|
|
|
|
builder: Some(GitignoreBuilder::new(origin)),
|
|
|
|
},
|
|
|
|
);
|
|
|
|
|
2022-01-16 02:49:14 +01:00
|
|
|
Self {
|
|
|
|
origin: origin.to_owned(),
|
2023-05-09 00:13:04 +02:00
|
|
|
ignores,
|
2022-01-16 02:49:14 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-11 12:59:39 +01:00
|
|
|
/// Read ignore files from disk and load them for filtering.
|
2022-01-16 02:49:14 +01:00
|
|
|
///
|
2023-03-18 11:23:46 +01:00
|
|
|
/// Use [`empty()`](IgnoreFilter::empty()) if you want an empty filterer,
|
2022-01-16 02:49:14 +01:00
|
|
|
/// or to construct one outside an async environment.
|
2023-01-06 14:53:49 +01:00
|
|
|
pub async fn new(origin: impl AsRef<Path> + Send, files: &[IgnoreFile]) -> Result<Self, Error> {
|
2023-06-03 10:24:39 +02:00
|
|
|
let origin = origin.as_ref().to_owned();
|
|
|
|
let origin = canonicalize(&origin)
|
|
|
|
.await
|
|
|
|
.map_err(move |err| Error::Canonicalize { path: origin, err })?;
|
|
|
|
|
|
|
|
let origin = dunce::simplified(&origin);
|
2022-01-15 11:46:06 +01:00
|
|
|
let _span = trace_span!("build_filterer", ?origin);
|
|
|
|
|
|
|
|
trace!(files=%files.len(), "loading file contents");
|
2022-01-11 12:59:39 +01:00
|
|
|
let (files_contents, errors): (Vec<_>, Vec<_>) = files
|
|
|
|
.iter()
|
|
|
|
.map(|file| async move {
|
|
|
|
trace!(?file, "loading ignore file");
|
2022-06-15 05:25:05 +02:00
|
|
|
let content = read_to_string(&file.path)
|
|
|
|
.await
|
|
|
|
.map_err(|err| Error::Read {
|
2022-01-11 12:59:39 +01:00
|
|
|
file: file.path.clone(),
|
|
|
|
err,
|
2022-06-15 05:25:05 +02:00
|
|
|
})?;
|
2022-01-11 12:59:39 +01:00
|
|
|
Ok((file.clone(), content))
|
|
|
|
})
|
|
|
|
.collect::<FuturesUnordered<_>>()
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
.await
|
|
|
|
.into_iter()
|
|
|
|
.map(|res| match res {
|
|
|
|
Ok(o) => (Some(o), None),
|
|
|
|
Err(e) => (None, Some(e)),
|
|
|
|
})
|
|
|
|
.unzip();
|
|
|
|
|
2022-06-15 05:25:05 +02:00
|
|
|
let errors: Vec<Error> = errors.into_iter().flatten().collect();
|
2022-01-11 12:59:39 +01:00
|
|
|
if !errors.is_empty() {
|
2022-01-15 11:46:06 +01:00
|
|
|
trace!("found {} errors", errors.len());
|
2022-06-15 05:25:05 +02:00
|
|
|
return Err(Error::Multi(errors));
|
2022-01-11 12:59:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: different parser/adapter for non-git-syntax ignore files?
|
|
|
|
|
2022-01-15 11:46:06 +01:00
|
|
|
trace!(files=%files_contents.len(), "building ignore list");
|
2023-05-09 00:13:04 +02:00
|
|
|
|
|
|
|
let mut ignores_trie = Trie::new();
|
|
|
|
|
|
|
|
// add builder for the root of the file system, so that we can handle global ignores and globs
|
|
|
|
ignores_trie.insert(
|
|
|
|
prefix(origin),
|
|
|
|
Ignore {
|
|
|
|
gitignore: Gitignore::empty(),
|
|
|
|
builder: Some(GitignoreBuilder::new(origin)),
|
|
|
|
},
|
|
|
|
);
|
|
|
|
|
|
|
|
let mut total_num_ignores = 0;
|
|
|
|
let mut total_num_whitelists = 0;
|
|
|
|
|
2022-01-11 12:59:39 +01:00
|
|
|
for (file, content) in files_contents.into_iter().flatten() {
|
|
|
|
let _span = trace_span!("loading ignore file", ?file).entered();
|
2023-05-09 00:13:04 +02:00
|
|
|
|
2023-05-14 06:59:59 +02:00
|
|
|
let applies_in = get_applies_in_path(origin, &file);
|
2023-05-09 00:13:04 +02:00
|
|
|
|
|
|
|
let parent_ignore = ignores_trie
|
|
|
|
.get_ancestor_value(&applies_in.display().to_string())
|
|
|
|
// unwrap will always succeed because we created an entry with the root of the origin
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
let mut builder = parent_ignore
|
|
|
|
.builder
|
|
|
|
.clone()
|
|
|
|
.unwrap_or_else(|| GitignoreBuilder::new(&applies_in));
|
|
|
|
|
2022-01-11 12:59:39 +01:00
|
|
|
for line in content.lines() {
|
|
|
|
if line.is_empty() || line.starts_with('#') {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
trace!(?line, "adding ignore line");
|
|
|
|
builder
|
2023-05-09 00:13:04 +02:00
|
|
|
.add_line(Some(applies_in.clone().to_owned()), line)
|
2022-06-15 05:25:05 +02:00
|
|
|
.map_err(|err| Error::Glob {
|
2022-01-16 03:18:15 +01:00
|
|
|
file: Some(file.path.clone()),
|
2022-01-11 12:59:39 +01:00
|
|
|
err,
|
|
|
|
})?;
|
|
|
|
}
|
2023-05-09 00:13:04 +02:00
|
|
|
trace!("compiling globset");
|
|
|
|
let compiled_builder = builder
|
|
|
|
.build()
|
|
|
|
.map_err(|err| Error::Glob { file: None, err })?;
|
2022-01-11 12:59:39 +01:00
|
|
|
|
2023-05-09 00:13:04 +02:00
|
|
|
total_num_ignores += compiled_builder.num_ignores();
|
|
|
|
total_num_whitelists += compiled_builder.num_whitelists();
|
|
|
|
|
|
|
|
ignores_trie.insert(
|
|
|
|
applies_in.display().to_string(),
|
|
|
|
Ignore {
|
|
|
|
gitignore: compiled_builder,
|
|
|
|
builder: Some(builder),
|
|
|
|
},
|
|
|
|
);
|
|
|
|
}
|
2022-01-11 12:59:39 +01:00
|
|
|
|
|
|
|
trace!(
|
|
|
|
files=%files.len(),
|
2023-05-09 00:13:04 +02:00
|
|
|
trie=?ignores_trie,
|
|
|
|
ignores=%total_num_ignores,
|
|
|
|
allows=%total_num_whitelists,
|
2022-01-11 12:59:39 +01:00
|
|
|
"ignore files loaded and compiled",
|
|
|
|
);
|
|
|
|
|
|
|
|
Ok(Self {
|
|
|
|
origin: origin.to_owned(),
|
2023-05-09 00:13:04 +02:00
|
|
|
ignores: ignores_trie,
|
2022-01-11 12:59:39 +01:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2022-01-16 03:18:15 +01:00
|
|
|
/// Returns the number of ignores and allowlists loaded.
|
2023-01-06 14:53:49 +01:00
|
|
|
#[must_use]
|
2022-01-16 03:18:15 +01:00
|
|
|
pub fn num_ignores(&self) -> (u64, u64) {
|
2023-05-09 00:13:04 +02:00
|
|
|
self.ignores.iter().fold((0, 0), |mut acc, (_, ignore)| {
|
|
|
|
acc.0 += ignore.gitignore.num_ignores();
|
|
|
|
acc.1 += ignore.gitignore.num_whitelists();
|
|
|
|
acc
|
|
|
|
})
|
2022-01-16 03:18:15 +01:00
|
|
|
}
|
|
|
|
|
2022-01-11 12:59:39 +01:00
|
|
|
/// Deletes the internal builder, to save memory.
|
|
|
|
///
|
|
|
|
/// This makes it impossible to add new ignore files without re-compiling the whole set.
|
|
|
|
pub fn finish(&mut self) {
|
2023-05-09 00:13:04 +02:00
|
|
|
let keys = self.ignores.keys().cloned().collect::<Vec<_>>();
|
|
|
|
for key in keys {
|
|
|
|
if let Some(ignore) = self.ignores.get_mut(&key) {
|
|
|
|
ignore.builder = None;
|
|
|
|
}
|
|
|
|
}
|
2022-01-11 12:59:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Reads and adds an ignore file, if the builder is available.
|
|
|
|
///
|
|
|
|
/// Does nothing silently otherwise.
|
2022-06-15 05:25:05 +02:00
|
|
|
pub async fn add_file(&mut self, file: &IgnoreFile) -> Result<(), Error> {
|
2023-05-14 06:59:59 +02:00
|
|
|
let applies_in = get_applies_in_path(&self.origin, file)
|
2023-05-09 00:13:04 +02:00
|
|
|
.display()
|
|
|
|
.to_string();
|
2022-01-11 12:59:39 +01:00
|
|
|
|
2023-05-14 06:59:59 +02:00
|
|
|
let Some(Ignore { builder: Some(ref mut builder), ..}) = self.ignores.get_mut(&applies_in) else {
|
2023-05-09 00:13:04 +02:00
|
|
|
return Ok(());
|
|
|
|
};
|
2022-01-11 12:59:39 +01:00
|
|
|
|
2023-05-09 00:13:04 +02:00
|
|
|
trace!(?file, "reading ignore file");
|
|
|
|
let content = read_to_string(&file.path)
|
|
|
|
.await
|
|
|
|
.map_err(|err| Error::Read {
|
|
|
|
file: file.path.clone(),
|
|
|
|
err,
|
|
|
|
})?;
|
|
|
|
|
|
|
|
let _span = trace_span!("loading ignore file", ?file).entered();
|
|
|
|
for line in content.lines() {
|
|
|
|
if line.is_empty() || line.starts_with('#') {
|
|
|
|
continue;
|
2022-01-11 12:59:39 +01:00
|
|
|
}
|
|
|
|
|
2023-05-09 00:13:04 +02:00
|
|
|
trace!(?line, "adding ignore line");
|
|
|
|
builder
|
|
|
|
.add_line(file.applies_in.clone(), line)
|
|
|
|
.map_err(|err| Error::Glob {
|
|
|
|
file: Some(file.path.clone()),
|
|
|
|
err,
|
|
|
|
})?;
|
2022-01-15 14:36:22 +01:00
|
|
|
}
|
|
|
|
|
2023-05-14 06:59:59 +02:00
|
|
|
self.recompile(file)?;
|
2023-05-09 00:13:04 +02:00
|
|
|
|
2022-01-15 14:36:22 +01:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2023-05-14 06:59:59 +02:00
|
|
|
fn recompile(&mut self, file: &IgnoreFile) -> Result<(), Error> {
|
2023-05-09 00:13:04 +02:00
|
|
|
let applies_in = get_applies_in_path(&self.origin, file)
|
|
|
|
.display()
|
|
|
|
.to_string();
|
2022-01-11 12:59:39 +01:00
|
|
|
|
2023-05-09 00:13:04 +02:00
|
|
|
let Some(Ignore { gitignore: compiled, builder: Some(builder)}) = self.ignores.get(&applies_in) else {
|
|
|
|
return Ok(());
|
|
|
|
};
|
2022-01-11 12:59:39 +01:00
|
|
|
|
2023-05-09 00:13:04 +02:00
|
|
|
let pre_ignores = compiled.num_ignores();
|
|
|
|
let pre_allows = compiled.num_whitelists();
|
|
|
|
|
|
|
|
trace!("recompiling globset");
|
|
|
|
let recompiled = builder.build().map_err(|err| Error::Glob {
|
2023-05-14 06:59:59 +02:00
|
|
|
file: Some(file.path.clone()),
|
2023-05-09 00:13:04 +02:00
|
|
|
err,
|
|
|
|
})?;
|
|
|
|
|
|
|
|
trace!(
|
|
|
|
new_ignores=%(recompiled.num_ignores() - pre_ignores),
|
|
|
|
new_allows=%(recompiled.num_whitelists() - pre_allows),
|
|
|
|
"ignore file loaded and set recompiled",
|
|
|
|
);
|
|
|
|
|
|
|
|
self.ignores.insert(
|
|
|
|
applies_in,
|
|
|
|
Ignore {
|
|
|
|
gitignore: recompiled,
|
|
|
|
builder: Some(builder.to_owned()),
|
|
|
|
},
|
|
|
|
);
|
2022-01-11 12:59:39 +01:00
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2022-01-15 14:36:22 +01:00
|
|
|
/// Adds some globs manually, if the builder is available.
|
|
|
|
///
|
|
|
|
/// Does nothing silently otherwise.
|
2023-01-06 14:53:49 +01:00
|
|
|
pub fn add_globs(&mut self, globs: &[&str], applies_in: Option<&PathBuf>) -> Result<(), Error> {
|
2023-05-09 00:13:04 +02:00
|
|
|
let applies_in = applies_in.unwrap_or(&self.origin);
|
2022-01-15 14:36:22 +01:00
|
|
|
|
2023-05-09 00:13:04 +02:00
|
|
|
let Some(Ignore {builder: Some(builder), ..}) = self.ignores.get_mut(&applies_in.display().to_string()) else {
|
|
|
|
return Ok(());
|
|
|
|
};
|
|
|
|
|
|
|
|
let _span = trace_span!("loading ignore globs", ?globs).entered();
|
|
|
|
for line in globs {
|
|
|
|
if line.is_empty() || line.starts_with('#') {
|
|
|
|
continue;
|
2022-01-15 14:36:22 +01:00
|
|
|
}
|
|
|
|
|
2023-05-09 00:13:04 +02:00
|
|
|
trace!(?line, "adding ignore line");
|
|
|
|
builder
|
|
|
|
.add_line(Some(applies_in.clone()), line)
|
|
|
|
.map_err(|err| Error::Glob { file: None, err })?;
|
2022-01-15 14:36:22 +01:00
|
|
|
}
|
|
|
|
|
2023-05-14 06:59:59 +02:00
|
|
|
self.recompile(&IgnoreFile {
|
|
|
|
path: "manual glob".into(),
|
|
|
|
applies_in: Some(applies_in.clone()),
|
|
|
|
applies_to: None,
|
|
|
|
})?;
|
2023-05-09 00:13:04 +02:00
|
|
|
|
2022-01-15 14:36:22 +01:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2022-06-15 05:25:05 +02:00
|
|
|
/// Match a particular path against the ignore set.
|
|
|
|
pub fn match_path(&self, path: &Path, is_dir: bool) -> Match<&Glob> {
|
2023-05-09 00:13:04 +02:00
|
|
|
let path = dunce::simplified(path);
|
|
|
|
|
|
|
|
let Some(ignores) = self.ignores.get_ancestor_value(&path.display().to_string()) else {
|
|
|
|
trace!(?path, "no ignores for path");
|
|
|
|
return Match::None;
|
|
|
|
};
|
|
|
|
|
2022-06-15 05:25:05 +02:00
|
|
|
if path.strip_prefix(&self.origin).is_ok() {
|
|
|
|
trace!("checking against path or parents");
|
2023-05-09 00:13:04 +02:00
|
|
|
ignores.gitignore.matched_path_or_any_parents(path, is_dir)
|
2022-06-15 05:25:05 +02:00
|
|
|
} else {
|
|
|
|
trace!("checking against path only");
|
2023-05-09 00:13:04 +02:00
|
|
|
ignores.gitignore.matched(path, is_dir)
|
2022-06-15 05:25:05 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-11 12:59:39 +01:00
|
|
|
/// Check a particular folder path against the ignore set.
|
|
|
|
///
|
|
|
|
/// Returns `false` if the folder should be ignored.
|
|
|
|
///
|
2023-03-18 11:23:46 +01:00
|
|
|
/// Note that this is a slightly different implementation than watchexec's Filterer trait, as
|
|
|
|
/// the latter handles events with multiple associated paths.
|
2022-01-11 12:59:39 +01:00
|
|
|
pub fn check_dir(&self, path: &Path) -> bool {
|
|
|
|
let _span = trace_span!("check_dir", ?path).entered();
|
|
|
|
|
|
|
|
trace!("checking against compiled ignore files");
|
2022-06-15 05:25:05 +02:00
|
|
|
match self.match_path(path, true) {
|
2022-01-11 12:59:39 +01:00
|
|
|
Match::None => {
|
|
|
|
trace!("no match (pass)");
|
|
|
|
true
|
|
|
|
}
|
|
|
|
Match::Ignore(glob) => {
|
|
|
|
if glob.from().map_or(true, |f| path.strip_prefix(f).is_ok()) {
|
|
|
|
trace!(?glob, "positive match (fail)");
|
|
|
|
false
|
|
|
|
} else {
|
|
|
|
trace!(?glob, "positive match, but not in scope (pass)");
|
|
|
|
true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Match::Whitelist(glob) => {
|
|
|
|
trace!(?glob, "negative match (pass)");
|
|
|
|
true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-05-09 00:13:04 +02:00
|
|
|
|
2023-05-14 06:59:59 +02:00
|
|
|
fn get_applies_in_path(origin: &Path, ignore_file: &IgnoreFile) -> PathBuf {
|
2023-05-09 00:13:04 +02:00
|
|
|
let root_path = PathBuf::from(prefix(origin));
|
2023-05-14 06:59:59 +02:00
|
|
|
ignore_file
|
|
|
|
.applies_in
|
|
|
|
.as_ref()
|
|
|
|
.map(|p| PathBuf::from(dunce::simplified(p)))
|
|
|
|
.unwrap_or(root_path)
|
2023-05-09 00:13:04 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Gets the root component of a given path.
|
|
|
|
///
|
|
|
|
/// This will be `/` on unix systems, or a Drive letter (`C:`, `D:`, etc)
|
|
|
|
fn prefix<T: AsRef<Path>>(path: T) -> String {
|
|
|
|
let path = path.as_ref();
|
|
|
|
|
|
|
|
let Some(prefix) = path.components().next() else {
|
|
|
|
return "/".into();
|
|
|
|
};
|
|
|
|
|
|
|
|
match prefix {
|
|
|
|
std::path::Component::Prefix(prefix_component) => {
|
|
|
|
prefix_component.as_os_str().to_str().unwrap_or("/").into()
|
|
|
|
}
|
|
|
|
_ => "/".into(),
|
|
|
|
}
|
|
|
|
}
|
2023-06-03 10:24:39 +02:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::IgnoreFilter;
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
async fn handle_relative_paths() {
|
|
|
|
let ignore = IgnoreFilter::new(".", &[]).await.unwrap();
|
|
|
|
assert!(ignore.origin.is_absolute());
|
|
|
|
}
|
|
|
|
}
|