From 3ad0e1aa57041a30c12c94f94ec796ef42243fa7 Mon Sep 17 00:00:00 2001 From: thislooksfun Date: Fri, 29 Dec 2023 19:12:59 -0600 Subject: [PATCH] Respect `applies_in` scope when processing nested ignores (#746) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, when importing multiple nested ignore files, some info from the parent—notably the "root" path—would be inherited. This lead to some problems with matching of "pseudo-absolute" rules (those with a leading slash) in nested ignore files (see #745 for more details). To fix this, we now fully isolate each path of the tree during the import process. This leads to more accurate, though unfortunately slightly less performant, rule matching. The only time a builder is reused now is if two input files have the same `applies_in` value, in which case they are merged together. I have added tests to ensure correctness and prevent a regression. I also was careful to make sure no previous tests broke in any way (all changes to existing tests were made in isolation, and thus are not affected by the logic changes). As far as I can tell, the only behavior change is that now some previously-ignored rules will now be applied, which could, in very rare configurations, lead to files being unintentionally ignored. However, due to the aforementioned logic bug, those files were all ignored by git already, so I suspect the number of people actually caught off guard by this change to be extremely low, likely zero. Fixes #745. --- Cargo.lock | 1 + crates/filterer/ignore/tests/filtering.rs | 6 +- crates/filterer/ignore/tests/helpers/mod.rs | 15 +- crates/ignore-files/Cargo.toml | 3 + crates/ignore-files/src/filter.rs | 60 +++++--- crates/ignore-files/tests/filtering.rs | 69 +++++++++ crates/ignore-files/tests/global/first | 1 + crates/ignore-files/tests/global/second | 1 + crates/ignore-files/tests/helpers/mod.rs | 159 ++++++++++++++++++++ crates/ignore-files/tests/tree/base | 3 + crates/ignore-files/tests/tree/branch/inner | 3 + 11 files changed, 294 insertions(+), 27 deletions(-) create mode 100644 crates/ignore-files/tests/filtering.rs create mode 100644 crates/ignore-files/tests/global/first create mode 100644 crates/ignore-files/tests/global/second create mode 100644 crates/ignore-files/tests/helpers/mod.rs create mode 100644 crates/ignore-files/tests/tree/base create mode 100644 crates/ignore-files/tests/tree/branch/inner diff --git a/Cargo.lock b/Cargo.lock index a3db401..4b71f92 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1936,6 +1936,7 @@ dependencies = [ "thiserror", "tokio", "tracing", + "tracing-subscriber", ] [[package]] diff --git a/crates/filterer/ignore/tests/filtering.rs b/crates/filterer/ignore/tests/filtering.rs index 8d50234..2cf4154 100644 --- a/crates/filterer/ignore/tests/filtering.rs +++ b/crates/filterer/ignore/tests/filtering.rs @@ -60,7 +60,7 @@ fn folders_suite(filterer: &IgnoreFilterer, name: &str) { #[tokio::test] async fn globs() { - let filterer = filt("", &[file("globs")]).await; + let filterer = filt("", &[file("globs").applies_globally()]).await; // Unmatched filterer.file_does_pass("FINAL-FINAL.docx"); @@ -214,8 +214,8 @@ async fn scopes() { let filterer = filt( "", &[ - file("scopes-global"), - file("scopes-local").applies_in(""), + file("scopes-global").applies_globally(), + file("scopes-local"), file("scopes-sublocal").applies_in("tests"), file("none-allowed").applies_in("tests/child"), ], diff --git a/crates/filterer/ignore/tests/helpers/mod.rs b/crates/filterer/ignore/tests/helpers/mod.rs index 2a877d8..7bd5e37 100644 --- a/crates/filterer/ignore/tests/helpers/mod.rs +++ b/crates/filterer/ignore/tests/helpers/mod.rs @@ -187,24 +187,27 @@ pub async fn ignore_filt(origin: &str, ignore_files: &[IgnoreFile]) -> IgnoreFil } pub fn ig_file(name: &str) -> IgnoreFile { - let path = std::fs::canonicalize(".") - .unwrap() - .join("tests") - .join("ignores") - .join(name); + let origin = std::fs::canonicalize(".").unwrap(); + let path = origin.join("tests").join("ignores").join(name); IgnoreFile { path, - applies_in: None, + applies_in: Some(origin), applies_to: None, } } pub trait Applies { + fn applies_globally(self) -> Self; fn applies_in(self, origin: &str) -> Self; fn applies_to(self, project_type: ProjectType) -> Self; } impl Applies for IgnoreFile { + fn applies_globally(mut self) -> Self { + self.applies_in = None; + self + } + fn applies_in(mut self, origin: &str) -> Self { let origin = std::fs::canonicalize(".").unwrap().join(origin); self.applies_in = Some(origin); diff --git a/crates/ignore-files/Cargo.toml b/crates/ignore-files/Cargo.toml index d3bd64b..abb3142 100644 --- a/crates/ignore-files/Cargo.toml +++ b/crates/ignore-files/Cargo.toml @@ -36,3 +36,6 @@ features = [ [dependencies.project-origins] version = "1.2.1" path = "../project-origins" + +[dev-dependencies] +tracing-subscriber = "0.3.6" diff --git a/crates/ignore-files/src/filter.rs b/crates/ignore-files/src/filter.rs index ff50657..45ffbdb 100644 --- a/crates/ignore-files/src/filter.rs +++ b/crates/ignore-files/src/filter.rs @@ -115,14 +115,9 @@ impl IgnoreFilter { let applies_in = get_applies_in_path(origin, &file); - let parent_ignore = ignores_trie - .get_ancestor_value(&applies_in.display().to_string()) - // unwrap will always succeed because we created an entry with the root of the origin - .unwrap(); - - let mut builder = parent_ignore - .builder - .clone() + let mut builder = ignores_trie + .get(&applies_in.display().to_string()) + .and_then(|node| node.builder.clone()) .unwrap_or_else(|| GitignoreBuilder::new(&applies_in)); for line in content.lines() { @@ -313,17 +308,46 @@ impl IgnoreFilter { pub fn match_path(&self, path: &Path, is_dir: bool) -> Match<&Glob> { let path = dunce::simplified(path); - let Some(ignores) = self.ignores.get_ancestor_value(&path.display().to_string()) else { - trace!(?path, "no ignores for path"); - return Match::None; - }; + let mut search_path = path; + loop { + let Some(trie_node) = self + .ignores + .get_ancestor(&search_path.display().to_string()) + else { + trace!(?path, ?search_path, "no ignores for path"); + return Match::None; + }; - if path.strip_prefix(&self.origin).is_ok() { - trace!("checking against path or parents"); - ignores.gitignore.matched_path_or_any_parents(path, is_dir) - } else { - trace!("checking against path only"); - ignores.gitignore.matched(path, is_dir) + // Unwrap will always succeed because every node has an entry. + let ignores = trie_node.value().unwrap(); + + let match_ = if path.strip_prefix(&self.origin).is_ok() { + trace!(?path, ?search_path, "checking against path or parents"); + ignores.gitignore.matched_path_or_any_parents(path, is_dir) + } else { + trace!(?path, ?search_path, "checking against path only"); + ignores.gitignore.matched(path, is_dir) + }; + + match match_ { + Match::None => { + trace!( + ?path, + ?search_path, + "no match found, searching for parent ignores" + ); + // Unwrap will always succeed because every node has an entry. + let trie_path = Path::new(trie_node.key().unwrap()); + if let Some(trie_parent) = trie_path.parent() { + trace!(?path, ?search_path, "checking parent ignore"); + search_path = trie_parent; + } else { + trace!(?path, ?search_path, "no parent ignore found"); + return Match::None; + } + } + _ => return match_, + } } } diff --git a/crates/ignore-files/tests/filtering.rs b/crates/ignore-files/tests/filtering.rs new file mode 100644 index 0000000..929f9ff --- /dev/null +++ b/crates/ignore-files/tests/filtering.rs @@ -0,0 +1,69 @@ +mod helpers; + +use helpers::ignore_tests::*; + +#[tokio::test] +async fn globals() { + let filter = filt( + "tree", + &[ + file("global/first").applies_globally(), + file("global/second").applies_globally(), + ], + ) + .await; + + // Both ignores should be loaded as global + filter.agnostic_fail("/apples"); + filter.agnostic_fail("/oranges"); + + // Sanity check + filter.agnostic_pass("/kiwi"); +} + +#[tokio::test] +async fn tree() { + let filter = filt("tree", &[file("tree/base"), file("tree/branch/inner")]).await; + + // "oranges" is not ignored at any level + filter.agnostic_pass("tree/oranges"); + filter.agnostic_pass("tree/branch/oranges"); + filter.agnostic_pass("tree/branch/inner/oranges"); + filter.agnostic_pass("tree/other/oranges"); + + // "apples" should only be ignored at the root + filter.agnostic_fail("tree/apples"); + filter.agnostic_pass("tree/branch/apples"); + filter.agnostic_pass("tree/branch/inner/apples"); + filter.agnostic_pass("tree/other/apples"); + + // "carrots" should be ignored at any level + filter.agnostic_fail("tree/carrots"); + filter.agnostic_fail("tree/branch/carrots"); + filter.agnostic_fail("tree/branch/inner/carrots"); + filter.agnostic_fail("tree/other/carrots"); + + // "pineapples/grapes" should only be ignored at the root + filter.agnostic_fail("tree/pineapples/grapes"); + filter.agnostic_pass("tree/branch/pineapples/grapes"); + filter.agnostic_pass("tree/branch/inner/pineapples/grapes"); + filter.agnostic_pass("tree/other/pineapples/grapes"); + + // "cauliflowers" should only be ignored at the root of "branch/" + filter.agnostic_pass("tree/cauliflowers"); + filter.agnostic_fail("tree/branch/cauliflowers"); + filter.agnostic_pass("tree/branch/inner/cauliflowers"); + filter.agnostic_pass("tree/other/cauliflowers"); + + // "artichokes" should be ignored anywhere inside of "branch/" + filter.agnostic_pass("tree/artichokes"); + filter.agnostic_fail("tree/branch/artichokes"); + filter.agnostic_fail("tree/branch/inner/artichokes"); + filter.agnostic_pass("tree/other/artichokes"); + + // "bananas/pears" should only be ignored at the root of "branch/" + filter.agnostic_pass("tree/bananas/pears"); + filter.agnostic_fail("tree/branch/bananas/pears"); + filter.agnostic_pass("tree/branch/inner/bananas/pears"); + filter.agnostic_pass("tree/other/bananas/pears"); +} diff --git a/crates/ignore-files/tests/global/first b/crates/ignore-files/tests/global/first new file mode 100644 index 0000000..950a188 --- /dev/null +++ b/crates/ignore-files/tests/global/first @@ -0,0 +1 @@ +apples diff --git a/crates/ignore-files/tests/global/second b/crates/ignore-files/tests/global/second new file mode 100644 index 0000000..192019c --- /dev/null +++ b/crates/ignore-files/tests/global/second @@ -0,0 +1 @@ +oranges diff --git a/crates/ignore-files/tests/helpers/mod.rs b/crates/ignore-files/tests/helpers/mod.rs new file mode 100644 index 0000000..a5a06eb --- /dev/null +++ b/crates/ignore-files/tests/helpers/mod.rs @@ -0,0 +1,159 @@ +use std::path::{Path, PathBuf}; + +use ignore::{gitignore::Glob, Match}; +use ignore_files::{IgnoreFile, IgnoreFilter}; + +pub mod ignore_tests { + pub use super::ig_file as file; + pub use super::ignore_filt as filt; + pub use super::Applies; + pub use super::PathHarness; +} + +/// Get the drive letter of the current working directory. +#[cfg(windows)] +fn drive_root() -> String { + let path = std::fs::canonicalize(".").unwrap(); + + let Some(prefix) = path.components().next() else { + return r"C:\".into(); + }; + + match prefix { + std::path::Component::Prefix(prefix_component) => prefix_component + .as_os_str() + .to_str() + .map(|p| p.to_owned() + r"\") + .unwrap_or(r"C:\".into()), + _ => r"C:\".into(), + } +} + +fn normalize_path(path: &str) -> PathBuf { + #[cfg(windows)] + let path: &str = &String::from(path) + .strip_prefix("/") + .map_or(path.into(), |p| drive_root() + p); + + let path: PathBuf = if Path::new(path).has_root() { + path.into() + } else { + std::fs::canonicalize(".").unwrap().join("tests").join(path) + }; + + dunce::simplified(&path).into() +} + +pub trait PathHarness { + fn check_path(&self, path: &Path, is_dir: bool) -> Match<&Glob>; + + fn path_pass(&self, path: &str, is_dir: bool, pass: bool) { + let full_path = &normalize_path(path); + + tracing::info!(?path, ?is_dir, ?pass, "check"); + + let result = self.check_path(full_path, is_dir); + + assert_eq!( + match result { + Match::None => true, + Match::Ignore(glob) => !glob.from().map_or(true, |f| full_path.starts_with(f)), + Match::Whitelist(_glob) => true, + }, + pass, + "{} {:?} (expected {}) [result: {}]", + if is_dir { "dir" } else { "file" }, + full_path, + if pass { "pass" } else { "fail" }, + match result { + Match::None => String::from("None"), + Match::Ignore(glob) => format!( + "Ignore({})", + glob.from() + .map_or(String::from(""), |f| f.display().to_string()) + ), + Match::Whitelist(glob) => format!( + "Whitelist({})", + glob.from() + .map_or(String::from(""), |f| f.display().to_string()) + ), + }, + ); + } + + fn file_does_pass(&self, path: &str) { + self.path_pass(path, false, true); + } + + fn file_doesnt_pass(&self, path: &str) { + self.path_pass(path, false, false); + } + + fn dir_does_pass(&self, path: &str) { + self.path_pass(path, true, true); + } + + fn dir_doesnt_pass(&self, path: &str) { + self.path_pass(path, true, false); + } + + fn agnostic_pass(&self, path: &str) { + self.file_does_pass(path); + self.dir_does_pass(path); + } + + fn agnostic_fail(&self, path: &str) { + self.file_doesnt_pass(path); + self.dir_doesnt_pass(path); + } +} + +impl PathHarness for IgnoreFilter { + fn check_path(&self, path: &Path, is_dir: bool) -> Match<&Glob> { + self.match_path(&path, is_dir) + } +} + +fn tracing_init() { + use tracing_subscriber::{ + fmt::{format::FmtSpan, Subscriber}, + util::SubscriberInitExt, + EnvFilter, + }; + Subscriber::builder() + .pretty() + .with_span_events(FmtSpan::NEW | FmtSpan::CLOSE) + .with_env_filter(EnvFilter::from_default_env()) + .finish() + .try_init() + .ok(); +} + +pub async fn ignore_filt(origin: &str, ignore_files: &[IgnoreFile]) -> IgnoreFilter { + tracing_init(); + let origin = normalize_path(origin); + IgnoreFilter::new(origin, ignore_files) + .await + .expect("making filterer") +} + +pub fn ig_file(name: &str) -> IgnoreFile { + let path = normalize_path(name); + let parent: PathBuf = path.parent().unwrap_or(&path).into(); + IgnoreFile { + path, + applies_in: Some(parent), + applies_to: None, + } +} + +pub trait Applies { + fn applies_globally(self) -> Self; +} + +impl Applies for IgnoreFile { + fn applies_globally(mut self) -> Self { + self.applies_in = None; + self + } +} diff --git a/crates/ignore-files/tests/tree/base b/crates/ignore-files/tests/tree/base new file mode 100644 index 0000000..ab872a2 --- /dev/null +++ b/crates/ignore-files/tests/tree/base @@ -0,0 +1,3 @@ +/apples +carrots +pineapples/grapes diff --git a/crates/ignore-files/tests/tree/branch/inner b/crates/ignore-files/tests/tree/branch/inner new file mode 100644 index 0000000..396a2d3 --- /dev/null +++ b/crates/ignore-files/tests/tree/branch/inner @@ -0,0 +1,3 @@ +/cauliflowers +artichokes +bananas/pears