watchexec/crates/ignore-files/src/discover.rs

527 lines
13 KiB
Rust

use std::{
collections::HashSet,
env,
io::{Error, ErrorKind},
path::{Path, PathBuf},
};
use git_config::{path::interpolate::Context as InterpolateContext, File, Path as GitPath};
use project_origins::ProjectType;
use tokio::fs::{canonicalize, metadata, read_dir};
use tracing::{trace, trace_span};
use crate::{IgnoreFile, IgnoreFilter};
/// The separator for paths used in environment variables.
#[cfg(unix)]
const PATH_SEPARATOR: &str = ":";
/// The separator for paths used in environment variables.
#[cfg(not(unix))]
const PATH_SEPARATOR: &str = ";";
/// Finds all ignore files in the given directory and subdirectories.
///
/// This considers:
/// - Git ignore files (`.gitignore`)
/// - Mercurial ignore files (`.hgignore`)
/// - Tool-generic `.ignore` files
/// - `.git/info/exclude` files in the `path` directory only
/// - Git configurable project ignore files (with `core.excludesFile` in `.git/config`)
///
/// Importantly, this should be called from the origin of the project, not a subfolder. This
/// function will not discover the project origin, and will not traverse parent directories. Use the
/// [`project::origins`](crate::project::origins) function for that.
///
/// This function also does not distinguish between project folder types, and collects all files for
/// all supported VCSs and other project types. Use the `applies_to` field to filter the results.
///
/// All errors (permissions, etc) are collected and returned alongside the ignore files: you may
/// want to show them to the user while still using whatever ignores were successfully found. Errors
/// from files not being found are silently ignored (the files are just not returned).
///
/// ## Special case: project-local git config specifying `core.excludesFile`
///
/// If the project's `.git/config` specifies a value for `core.excludesFile`, this function will
/// return an `IgnoreFile { path: path/to/that/file, applies_in: None, applies_to: Some(ProjectType::Git) }`.
/// This is the only case in which the `applies_in` field is None from this function. When such is
/// received the global Git ignore files found by [`from_environment()`] **should be ignored**.
///
/// ## Async
///
/// This future is not `Send` due to [`git_config`] internals.
#[allow(clippy::future_not_send)]
pub async fn from_origin(path: impl AsRef<Path> + Send) -> (Vec<IgnoreFile>, Vec<Error>) {
let base = path.as_ref().to_owned();
let mut files = Vec::new();
let mut errors = Vec::new();
match find_file(base.join(".git/config")).await {
Err(err) => errors.push(err),
Ok(None) => {}
Ok(Some(path)) => match path.parent().map(File::from_git_dir) {
None => errors.push(Error::new(
ErrorKind::Other,
"unreachable: .git/config must have a parent",
)),
Some(Err(err)) => errors.push(Error::new(ErrorKind::Other, err)),
Some(Ok(config)) => {
let config_excludes = config.value::<GitPath<'_>>("core", None, "excludesFile");
if let Ok(excludes) = config_excludes {
match excludes.interpolate(InterpolateContext {
home_dir: env::var("HOME").ok().map(PathBuf::from).as_deref(),
..Default::default()
}) {
Ok(e) => {
discover_file(
&mut files,
&mut errors,
None,
Some(ProjectType::Git),
e.into(),
)
.await;
}
Err(err) => {
errors.push(Error::new(ErrorKind::Other, err));
}
}
}
}
},
}
discover_file(
&mut files,
&mut errors,
Some(base.clone()),
Some(ProjectType::Bazaar),
base.join(".bzrignore"),
)
.await;
discover_file(
&mut files,
&mut errors,
Some(base.clone()),
Some(ProjectType::Darcs),
base.join("_darcs/prefs/boring"),
)
.await;
discover_file(
&mut files,
&mut errors,
Some(base.clone()),
Some(ProjectType::Fossil),
base.join(".fossil-settings/ignore-glob"),
)
.await;
discover_file(
&mut files,
&mut errors,
Some(base.clone()),
Some(ProjectType::Git),
base.join(".git/info/exclude"),
)
.await;
trace!("visiting child directories for ignore files");
match DirTourist::new(&base, &files).await {
Ok(mut dirs) => {
loop {
match dirs.next().await {
Visit::Done => break,
Visit::Skip => continue,
Visit::Find(dir) => {
if discover_file(
&mut files,
&mut errors,
Some(dir.clone()),
None,
dir.join(".ignore"),
)
.await
{
dirs.add_last_file_to_filter(&mut files, &mut errors).await;
}
if discover_file(
&mut files,
&mut errors,
Some(dir.clone()),
Some(ProjectType::Git),
dir.join(".gitignore"),
)
.await
{
dirs.add_last_file_to_filter(&mut files, &mut errors).await;
}
if discover_file(
&mut files,
&mut errors,
Some(dir.clone()),
Some(ProjectType::Mercurial),
dir.join(".hgignore"),
)
.await
{
dirs.add_last_file_to_filter(&mut files, &mut errors).await;
}
}
}
}
errors.extend(dirs.errors);
}
Err(err) => {
errors.push(err);
}
}
(files, errors)
}
/// Finds all ignore files that apply to the current runtime.
///
/// Takes an optional `appname` for the calling application for looking at an environment variable
/// and an application-specific config location.
///
/// This considers:
/// - User-specific git ignore files (e.g. `~/.gitignore`)
/// - Git configurable ignore files (e.g. with `core.excludesFile` in system or user config)
/// - `$XDG_CONFIG_HOME/{appname}/ignore`, as well as other locations (APPDATA on Windows…)
/// - Files from the `{APPNAME}_IGNORE_FILES` environment variable (separated the same was as `PATH`)
///
/// All errors (permissions, etc) are collected and returned alongside the ignore files: you may
/// want to show them to the user while still using whatever ignores were successfully found. Errors
/// from files not being found are silently ignored (the files are just not returned).
///
/// ## Async
///
/// This future is not `Send` due to [`git_config`] internals.
#[allow(clippy::future_not_send)]
pub async fn from_environment(appname: Option<&str>) -> (Vec<IgnoreFile>, Vec<Error>) {
let mut files = Vec::new();
let mut errors = Vec::new();
if let Some(name) = appname {
for path in env::var(format!("{}_IGNORE_FILES", name.to_uppercase()))
.unwrap_or_default()
.split(PATH_SEPARATOR)
{
discover_file(&mut files, &mut errors, None, None, PathBuf::from(path)).await;
}
}
let mut found_git_global = false;
match File::from_environment_overrides().map(|mut env| {
File::from_globals().map(move |glo| {
env.append(glo);
env
})
}) {
Err(err) => errors.push(Error::new(ErrorKind::Other, err)),
Ok(Err(err)) => errors.push(Error::new(ErrorKind::Other, err)),
Ok(Ok(config)) => {
let config_excludes = config.value::<GitPath<'_>>("core", None, "excludesFile");
if let Ok(excludes) = config_excludes {
match excludes.interpolate(InterpolateContext {
home_dir: env::var("HOME").ok().map(PathBuf::from).as_deref(),
..Default::default()
}) {
Ok(e) => {
if discover_file(
&mut files,
&mut errors,
None,
Some(ProjectType::Git),
e.into(),
)
.await
{
found_git_global = true;
}
}
Err(err) => {
errors.push(Error::new(ErrorKind::Other, err));
}
}
}
}
}
if !found_git_global {
let mut tries = Vec::with_capacity(5);
if let Ok(home) = env::var("XDG_CONFIG_HOME") {
tries.push(Path::new(&home).join("git/ignore"));
}
if let Ok(home) = env::var("APPDATA") {
tries.push(Path::new(&home).join(".gitignore"));
}
if let Ok(home) = env::var("USERPROFILE") {
tries.push(Path::new(&home).join(".gitignore"));
}
if let Ok(home) = env::var("HOME") {
tries.push(Path::new(&home).join(".config/git/ignore"));
tries.push(Path::new(&home).join(".gitignore"));
}
for path in tries {
if discover_file(&mut files, &mut errors, None, Some(ProjectType::Git), path).await {
break;
}
}
}
let mut bzrs = Vec::with_capacity(5);
if let Ok(home) = env::var("APPDATA") {
bzrs.push(Path::new(&home).join("Bazzar/2.0/ignore"));
}
if let Ok(home) = env::var("HOME") {
bzrs.push(Path::new(&home).join(".bazarr/ignore"));
}
for path in bzrs {
if discover_file(
&mut files,
&mut errors,
None,
Some(ProjectType::Bazaar),
path,
)
.await
{
break;
}
}
if let Some(name) = appname {
let mut wgis = Vec::with_capacity(4);
if let Ok(home) = env::var("XDG_CONFIG_HOME") {
wgis.push(Path::new(&home).join(format!("{name}/ignore")));
}
if let Ok(home) = env::var("APPDATA") {
wgis.push(Path::new(&home).join(format!("{name}/ignore")));
}
if let Ok(home) = env::var("USERPROFILE") {
wgis.push(Path::new(&home).join(format!(".{name}/ignore")));
}
if let Ok(home) = env::var("HOME") {
wgis.push(Path::new(&home).join(format!(".{name}/ignore")));
}
for path in wgis {
if discover_file(&mut files, &mut errors, None, None, path).await {
break;
}
}
}
(files, errors)
}
// TODO: add context to these errors
/// Utility function to handle looking for an ignore file and adding it to a list if found.
///
/// This is mostly an internal function, but it is exposed for other filterers to use.
#[allow(clippy::future_not_send)]
#[tracing::instrument(skip(files, errors), level = "trace")]
#[inline]
pub async fn discover_file(
files: &mut Vec<IgnoreFile>,
errors: &mut Vec<Error>,
applies_in: Option<PathBuf>,
applies_to: Option<ProjectType>,
path: PathBuf,
) -> bool {
match find_file(path).await {
Err(err) => {
trace!(?err, "found an error");
errors.push(err);
false
}
Ok(None) => {
trace!("found nothing");
false
}
Ok(Some(path)) => {
trace!(?path, "found a file");
files.push(IgnoreFile {
path,
applies_in,
applies_to,
});
true
}
}
}
async fn find_file(path: PathBuf) -> Result<Option<PathBuf>, Error> {
match metadata(&path).await {
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(err) => Err(err),
Ok(meta) if meta.is_file() && meta.len() > 0 => Ok(Some(path)),
Ok(_) => Ok(None),
}
}
#[derive(Debug)]
struct DirTourist {
base: PathBuf,
to_visit: Vec<PathBuf>,
to_skip: HashSet<PathBuf>,
pub errors: Vec<std::io::Error>,
filter: IgnoreFilter,
}
#[derive(Debug)]
enum Visit {
Find(PathBuf),
Skip,
Done,
}
impl DirTourist {
pub async fn new(base: &Path, files: &[IgnoreFile]) -> Result<Self, Error> {
let base = canonicalize(base).await?;
trace!("create IgnoreFilterer for visiting directories");
let mut filter = IgnoreFilter::new(&base, files)
.await
.map_err(|err| Error::new(ErrorKind::Other, err))?;
filter
.add_globs(
&[
"/.git",
"/.hg",
"/.bzr",
"/_darcs",
"/.fossil-settings",
"/.svn",
"/.pijul",
],
Some(&base),
)
.map_err(|err| Error::new(ErrorKind::Other, err))?;
Ok(Self {
to_visit: vec![base.clone()],
base,
to_skip: HashSet::new(),
errors: Vec::new(),
filter,
})
}
#[allow(clippy::future_not_send)]
pub async fn next(&mut self) -> Visit {
if let Some(path) = self.to_visit.pop() {
self.visit_path(path).await
} else {
Visit::Done
}
}
#[allow(clippy::future_not_send)]
#[tracing::instrument(skip(self), level = "trace")]
async fn visit_path(&mut self, path: PathBuf) -> Visit {
if self.must_skip(&path) {
trace!("in skip list");
return Visit::Skip;
}
if !self.filter.check_dir(&path) {
trace!("path is ignored, adding to skip list");
self.skip(path);
return Visit::Skip;
}
let mut dir = match read_dir(&path).await {
Ok(dir) => dir,
Err(err) => {
trace!("failed to read dir: {}", err);
self.errors.push(err);
return Visit::Skip;
}
};
while let Some(entry) = match dir.next_entry().await {
Ok(entry) => entry,
Err(err) => {
trace!("failed to read dir entries: {}", err);
self.errors.push(err);
return Visit::Skip;
}
} {
let path = entry.path();
let _span = trace_span!("dir_entry", ?path).entered();
if self.must_skip(&path) {
trace!("in skip list");
continue;
}
match entry.file_type().await {
Ok(ft) => {
if ft.is_dir() {
if !self.filter.check_dir(&path) {
trace!("path is ignored, adding to skip list");
self.skip(path);
continue;
}
trace!("found a dir, adding to list");
self.to_visit.push(path);
} else {
trace!("not a dir");
}
}
Err(err) => {
trace!("failed to read filetype, adding to skip list: {}", err);
self.errors.push(err);
self.skip(path);
}
}
}
Visit::Find(path)
}
pub fn skip(&mut self, path: PathBuf) {
let check_path = path.as_path();
self.to_visit.retain(|p| !p.starts_with(check_path));
self.to_skip.insert(path);
}
pub(crate) async fn add_last_file_to_filter(
&mut self,
files: &mut [IgnoreFile],
errors: &mut Vec<Error>,
) {
if let Some(ig) = files.last() {
if let Err(err) = self.filter.add_file(ig).await {
errors.push(Error::new(ErrorKind::Other, err));
}
}
}
fn must_skip(&self, mut path: &Path) -> bool {
if self.to_skip.contains(path) {
return true;
}
while let Some(parent) = path.parent() {
if parent == self.base {
break;
}
if self.to_skip.contains(parent) {
return true;
}
path = parent;
}
false
}
}