Use globset for ignore matching; closes #14 and #23

This commit is contained in:
Matt Green 2017-01-27 13:00:13 -05:00
parent ed474ccfe3
commit ebcb5976ba
5 changed files with 183 additions and 209 deletions

2
Cargo.lock generated
View File

@ -1,6 +1,6 @@
[root]
name = "watchexec"
version = "1.6.1"
version = "1.6.2"
dependencies = [
"clap 2.19.2 (registry+https://github.com/rust-lang/crates.io-index)",
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",

View File

@ -1,6 +1,6 @@
[package]
name = "watchexec"
version = "1.6.1"
version = "1.6.2"
authors = ["Matt Green <mattgreenrocks@gmail.com>"]
description = "Executes commands in response to file modifications"
documentation = "https://github.com/mattgreen/watchexec"

View File

@ -1,96 +1,141 @@
extern crate glob;
extern crate globset;
use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
use std::fs;
use std::io;
use std::io::Read;
use std::path::{Path, PathBuf};
// Immutable, ordered set of Patterns
// Used to implement whitelisting
pub struct PatternSet {
patterns: Vec<Pattern>,
pub fn load(path: &Path) -> Option<GitignoreFile> {
let mut p = path.to_owned();
loop {
let gitignore_path = p.join(".gitignore");
if gitignore_path.exists() {
return GitignoreFile::new(&gitignore_path).ok();
}
// Stop if we see a .git directory
if let Ok(metadata) = p.join(".git").metadata() {
if metadata.is_dir() {
break;
}
}
if p.parent().is_none() {
break;
}
p.pop();
}
None
}
// Represents a single gitignore rule
//
// Currently we ignore rules about whether to match
// only a directory since it's a bit weird for what
// we want to use a gitignore file for.
struct Pattern {
pattern: glob::Pattern,
str: String,
pub struct GitignoreFile {
set: GlobSet,
patterns: Vec<Pattern>,
root: PathBuf,
whitelist: bool,
#[allow(dead_code)]
directory: bool,
anchored: bool,
}
#[derive(Debug)]
pub enum Error {
Glob(glob::PatternError),
GlobSet(globset::Error),
Io(io::Error),
}
pub fn parse(path: &Path) -> Result<PatternSet, Error> {
let mut file = try!(fs::File::open(path));
let mut contents = String::new();
try!(file.read_to_string(&mut contents));
// If we've opened the file, we'll have at least one other path component
let root = path.parent().unwrap();
let patterns = try!(contents.lines()
.filter(|l| !l.is_empty())
.filter(|l| !l.starts_with("#"))
.map(|l| Pattern::new(l, root))
.collect());
Ok(PatternSet::new(patterns))
struct Pattern {
pattern: String,
pattern_type: PatternType,
anchored: bool,
}
impl PatternSet {
fn new(patterns: Vec<Pattern>) -> PatternSet {
PatternSet { patterns: patterns }
enum PatternType {
Ignore,
Whitelist,
}
impl GitignoreFile {
pub fn new(path: &Path) -> Result<GitignoreFile, Error> {
let mut file = try!(fs::File::open(path));
let mut contents = String::new();
try!(file.read_to_string(&mut contents));
let lines = contents.lines().collect();
let root = path.parent().unwrap();
GitignoreFile::from_strings(lines, root)
}
pub fn from_strings(strs: Vec<&str>, root: &Path) -> Result<GitignoreFile, Error> {
let mut builder = GlobSetBuilder::new();
let mut patterns = vec![];
let parsed_patterns = GitignoreFile::parse(strs);
for p in parsed_patterns {
let mut pat = String::from(p.pattern.clone());
if !p.anchored && !pat.starts_with("**/") {
pat = "**/".to_string() + &pat;
}
if !pat.ends_with("/**") {
pat = pat + "/**";
}
let glob = try!(GlobBuilder::new(&pat)
.literal_separator(true)
.build());
builder.add(glob);
patterns.push(p);
}
Ok(GitignoreFile {
set: try!(builder.build()),
patterns: patterns,
root: root.to_owned(),
})
}
// Apply the patterns to the path one-by-one
//
// If there are whitelisting, we need to run through the whole set.
// Otherwise, we can stop at the first exclusion.
pub fn is_excluded(&self, path: &Path) -> bool {
let mut excluded = false;
let has_whitelistings = self.patterns.iter().any(|p| p.whitelist);
let stripped = path.strip_prefix(&self.root);
if !stripped.is_ok() {
return false;
}
for pattern in &self.patterns {
let matched = pattern.matches(path);
let matches = self.set.matches(stripped.unwrap());
if matched {
if pattern.whitelist {
excluded = false;
} else {
excluded = true;
// We can stop running rules in this case
if !has_whitelistings {
break;
}
}
for &i in matches.iter().rev() {
let pattern = &self.patterns[i];
return match pattern.pattern_type {
PatternType::Whitelist => false,
PatternType::Ignore => true,
}
}
excluded
false
}
fn parse(contents: Vec<&str>) -> Vec<Pattern> {
contents.iter()
.filter(|l| !l.is_empty())
.filter(|l| !l.starts_with('#'))
.map(|l| Pattern::parse(l))
.collect()
}
}
impl Pattern {
fn new(pattern: &str, root: &Path) -> Result<Pattern, Error> {
fn parse(pattern: &str) -> Pattern {
let mut normalized = String::from(pattern);
let whitelisted = if normalized.starts_with('!') {
let pattern_type = if normalized.starts_with('!') {
normalized.remove(0);
true
PatternType::Whitelist
} else {
false
PatternType::Ignore
};
let anchored = if normalized.starts_with('/') {
@ -100,63 +145,26 @@ impl Pattern {
false
};
let directory = if normalized.ends_with('/') {
if normalized.ends_with('/') {
normalized.pop();
true
} else {
false
};
}
if normalized.starts_with("\\#") || normalized.starts_with("\\!") {
normalized.remove(0);
}
let pat = try!(glob::Pattern::new(&normalized));
Ok(Pattern {
pattern: pat,
str: String::from(normalized),
root: root.to_path_buf(),
whitelist: whitelisted,
directory: directory,
Pattern {
pattern: normalized,
pattern_type: pattern_type,
anchored: anchored,
})
}
fn matches(&self, path: &Path) -> bool {
let options = glob::MatchOptions {
case_sensitive: false,
require_literal_separator: true,
require_literal_leading_dot: false,
};
let stripped_path = match path.strip_prefix(&self.root) {
Ok(p) => p,
Err(_) => return false,
};
let mut result = false;
if self.anchored {
let first_component = stripped_path.iter().next();
result = match first_component {
Some(s) => self.pattern.matches_path_with(Path::new(&s), &options),
None => false,
}
} else if !self.str.contains('/') {
result = stripped_path.iter()
.any(|c| self.pattern.matches_path_with(Path::new(c), &options));
} else if self.pattern.matches_path_with(stripped_path, &options) {
result = true;
}
result
}
}
impl From<glob::PatternError> for Error {
fn from(error: glob::PatternError) -> Error {
Error::Glob(error)
impl From<globset::Error> for Error {
fn from(error: globset::Error) -> Error {
Error::GlobSet(error)
}
}
@ -166,126 +174,122 @@ impl From<io::Error> for Error {
}
}
// fn main() {
// let cwd = env::current_dir().unwrap();
// let gitignore_file = cwd.join(".gitignore");
// let file = File::new(&gitignore_file).unwrap();
// for arg in env::args().skip(1) {
// let path = cwd.join(&arg);
// let matches = file.is_excluded(&path);
// println!("File: {}, Excluded: {}", arg, matches);
// }
// }
#[cfg(test)]
mod tests {
use super::Pattern;
use super::GitignoreFile;
use std::path::PathBuf;
fn base_dir() -> PathBuf {
PathBuf::from("/home/user/dir")
}
fn build_pattern(pattern: &str) -> Pattern {
Pattern::new(pattern, &base_dir()).unwrap()
fn build_gitignore(pattern: &str) -> GitignoreFile {
GitignoreFile::from_strings(vec![pattern], &base_dir()).unwrap()
}
#[test]
fn test_matches_exact() {
let pattern = build_pattern("Cargo.toml");
let file = build_gitignore("Cargo.toml");
assert!(pattern.matches(&base_dir().join("Cargo.toml")));
}
#[test]
fn test_matches_simple_wildcard() {
let pattern = build_pattern("targ*");
assert!(pattern.matches(&base_dir().join("target")));
assert!(file.is_excluded(&base_dir().join("Cargo.toml")));
}
#[test]
fn test_does_not_match() {
let pattern = build_pattern("Cargo.toml");
let file = build_gitignore("Cargo.toml");
assert!(!pattern.matches(&base_dir().join("src").join("main.rs")));
assert!(!file.is_excluded(&base_dir().join("src").join("main.rs")));
}
#[test]
fn test_matches_simple_wildcard() {
let file = build_gitignore("targ*");
assert!(file.is_excluded(&base_dir().join("target")));
}
#[test]
fn test_matches_subdir_exact() {
let file = build_gitignore("target");
assert!(file.is_excluded(&base_dir().join("target/")));
}
#[test]
fn test_matches_subdir() {
let pattern = build_pattern("target");
let file = build_gitignore("target");
assert!(pattern.matches(&base_dir().join("target").join("file")));
assert!(pattern.matches(&base_dir().join("target").join("subdir").join("file")));
assert!(file.is_excluded(&base_dir().join("target").join("file")));
assert!(file.is_excluded(&base_dir().join("target").join("subdir").join("file")));
}
#[test]
fn test_wildcard_with_dir() {
let pattern = build_pattern("target/f*");
let file = build_gitignore("target/f*");
assert!(pattern.matches(&base_dir().join("target").join("file")));
assert!(!pattern.matches(&base_dir().join("target").join("subdir").join("file")));
assert!(file.is_excluded(&base_dir().join("target").join("file")));
assert!(!file.is_excluded(&base_dir().join("target").join("subdir").join("file")));
}
#[test]
fn test_leading_slash() {
let pattern = build_pattern("/*.c");
let file = build_gitignore("/*.c");
assert!(pattern.matches(&base_dir().join("cat-file.c")));
assert!(!pattern.matches(&base_dir().join("mozilla-sha1").join("sha1.c")));
assert!(file.is_excluded(&base_dir().join("cat-file.c")));
assert!(!file.is_excluded(&base_dir().join("mozilla-sha1").join("sha1.c")));
}
#[test]
fn test_leading_double_wildcard() {
let pattern = build_pattern("**/foo");
let file = build_gitignore("**/foo");
assert!(pattern.matches(&base_dir().join("foo")));
assert!(pattern.matches(&base_dir().join("target").join("foo")));
assert!(pattern.matches(&base_dir().join("target").join("subdir").join("foo")));
assert!(file.is_excluded(&base_dir().join("foo")));
assert!(file.is_excluded(&base_dir().join("target").join("foo")));
assert!(file.is_excluded(&base_dir().join("target").join("subdir").join("foo")));
}
#[test]
fn test_trailing_double_wildcard() {
let pattern = build_pattern("abc/**");
let file = build_gitignore("abc/**");
assert!(!pattern.matches(&base_dir().join("def").join("foo")));
assert!(pattern.matches(&base_dir().join("abc").join("foo")));
assert!(pattern.matches(&base_dir().join("abc").join("subdir").join("foo")));
assert!(!file.is_excluded(&base_dir().join("def").join("foo")));
assert!(file.is_excluded(&base_dir().join("abc").join("foo")));
assert!(file.is_excluded(&base_dir().join("abc").join("subdir").join("foo")));
}
#[test]
fn test_sandwiched_double_wildcard() {
let pattern = build_pattern("a/**/b");
let file = build_gitignore("a/**/b");
assert!(pattern.matches(&base_dir().join("a").join("b")));
assert!(pattern.matches(&base_dir().join("a").join("x").join("b")));
assert!(pattern.matches(&base_dir().join("a").join("x").join("y").join("b")));
}
use super::PatternSet;
#[test]
fn test_empty_pattern_set_never_excludes() {
let set = PatternSet::new(vec![]);
assert!(!set.is_excluded(&base_dir().join("target")));
assert!(file.is_excluded(&base_dir().join("a").join("b")));
assert!(file.is_excluded(&base_dir().join("a").join("x").join("b")));
assert!(file.is_excluded(&base_dir().join("a").join("x").join("y").join("b")));
}
#[test]
fn test_set_tests_all_patterns() {
let patterns = vec![build_pattern("target"), build_pattern("target2")];
let set = PatternSet::new(patterns);
fn test_empty_file_never_excludes() {
let file = GitignoreFile::from_strings(vec![], &base_dir()).unwrap();
assert!(set.is_excluded(&base_dir().join("target").join("foo.txt")));
assert!(set.is_excluded(&base_dir().join("target2").join("bar.txt")));
assert!(!file.is_excluded(&base_dir().join("target")));
}
#[test]
fn test_set_handles_whitelisting() {
let patterns = vec![build_pattern("target"), build_pattern("!target/foo.txt")];
let set = PatternSet::new(patterns);
fn test_checks_all_patterns() {
let patterns = vec!["target", "target2"];
let file = GitignoreFile::from_strings(patterns, &base_dir()).unwrap();
assert!(!set.is_excluded(&base_dir().join("target").join("foo.txt")));
assert!(file.is_excluded(&base_dir().join("target").join("foo.txt")));
assert!(file.is_excluded(&base_dir().join("target2").join("bar.txt")));
}
#[test]
fn test_handles_whitelisting() {
let patterns = vec!["target", "!target/foo.txt"];
let file = GitignoreFile::from_strings(patterns, &base_dir()).unwrap();
assert!(!file.is_excluded(&base_dir().join("target").join("foo.txt")));
assert!(file.is_excluded(&base_dir().join("target").join("blah.txt")));
}
}

View File

@ -28,43 +28,16 @@ mod watcher;
use std::collections::HashMap;
use std::env;
use std::path::Path;
use std::path::PathBuf;
use std::sync::{Arc, RwLock};
use std::sync::mpsc::{channel, Receiver};
use std::time::Duration;
use std::path::PathBuf;
use notification_filter::NotificationFilter;
use process::Process;
use signal::Signal;
use watcher::{Event, Watcher};
fn find_gitignore(path: &Path) -> Option<PathBuf> {
let mut p = path.to_owned();
loop {
let gitignore_path = p.join(".gitignore");
if gitignore_path.exists() {
return Some(gitignore_path);
}
// Stop if we see a .git directory
if let Ok(metadata) = p.join(".git").metadata() {
if metadata.is_dir() {
break;
}
}
if p.parent().is_none() {
break;
}
p.pop();
}
None
}
fn init_logger(debug: bool) {
let mut log_builder = env_logger::LogBuilder::new();
let level = if debug {
@ -111,14 +84,11 @@ fn main() {
.canonicalize()
.expect("unable to canonicalize cwd");
let mut gitignore_file = None;
if !args.no_vcs_ignore {
if let Some(gitignore_path) = find_gitignore(&cwd) {
debug!("Found .gitignore file: {:?}", gitignore_path);
gitignore_file = gitignore::parse(&gitignore_path).ok();
}
}
let gitignore_file = if !args.no_vcs_ignore {
gitignore::load(&cwd)
} else {
None
};
let filter = NotificationFilter::new(args.filters, args.ignores, gitignore_file)
.expect("unable to create notification filter");

View File

@ -12,7 +12,7 @@ pub struct NotificationFilter {
filters: GlobSet,
filter_count: usize,
ignores: GlobSet,
ignore_file: Option<gitignore::PatternSet>,
ignore_file: Option<gitignore::GitignoreFile>,
}
#[derive(Debug)]
@ -24,7 +24,7 @@ pub enum Error {
impl NotificationFilter {
pub fn new(filters: Vec<String>,
ignores: Vec<String>,
ignore_file: Option<gitignore::PatternSet>)
ignore_file: Option<gitignore::GitignoreFile>)
-> Result<NotificationFilter, Error> {
let mut filter_set_builder = GlobSetBuilder::new();
for f in &filters {