fd/src/regex_helper.rs
2024-03-08 10:46:24 -08:00

105 lines
3.5 KiB
Rust

use regex_syntax::hir::Hir;
use regex_syntax::ParserBuilder;
/// Determine if a regex pattern contains a literal uppercase character.
pub fn pattern_has_uppercase_char(pattern: &str) -> bool {
let mut parser = ParserBuilder::new().utf8(false).build();
parser
.parse(pattern)
.map(|hir| hir_has_uppercase_char(&hir))
.unwrap_or(false)
}
/// Determine if a regex expression contains a literal uppercase character.
fn hir_has_uppercase_char(hir: &Hir) -> bool {
use regex_syntax::hir::*;
match hir.kind() {
HirKind::Literal(Literal(bytes)) => match std::str::from_utf8(bytes) {
Ok(s) => s.chars().any(|c| c.is_uppercase()),
Err(_) => bytes.iter().any(|b| char::from(*b).is_uppercase()),
},
HirKind::Class(Class::Unicode(ranges)) => ranges
.iter()
.any(|r| r.start().is_uppercase() || r.end().is_uppercase()),
HirKind::Class(Class::Bytes(ranges)) => ranges
.iter()
.any(|r| char::from(r.start()).is_uppercase() || char::from(r.end()).is_uppercase()),
HirKind::Capture(Capture { sub, .. }) | HirKind::Repetition(Repetition { sub, .. }) => {
hir_has_uppercase_char(sub)
}
HirKind::Concat(hirs) | HirKind::Alternation(hirs) => {
hirs.iter().any(hir_has_uppercase_char)
}
_ => false,
}
}
/// Determine if a regex pattern only matches strings starting with a literal dot (hidden files)
pub fn pattern_matches_strings_with_leading_dot(pattern: &str) -> bool {
let mut parser = ParserBuilder::new().utf8(false).build();
parser
.parse(pattern)
.map(|hir| hir_matches_strings_with_leading_dot(&hir))
.unwrap_or(false)
}
/// See above.
fn hir_matches_strings_with_leading_dot(hir: &Hir) -> bool {
use regex_syntax::hir::*;
// Note: this only really detects the simplest case where a regex starts with
// "^\\.", i.e. a start text anchor and a literal dot character. There are a lot
// of other patterns that ONLY match hidden files, e.g. ^(\\.foo|\\.bar) which are
// not (yet) detected by this algorithm.
match hir.kind() {
HirKind::Concat(hirs) => {
let mut hirs = hirs.iter();
if let Some(hir) = hirs.next() {
if hir.kind() != &HirKind::Look(Look::Start) {
return false;
}
} else {
return false;
}
if let Some(hir) = hirs.next() {
match hir.kind() {
HirKind::Literal(Literal(bytes)) => bytes.starts_with(&[b'.']),
_ => false,
}
} else {
false
}
}
_ => false,
}
}
#[test]
fn pattern_has_uppercase_char_simple() {
assert!(pattern_has_uppercase_char("A"));
assert!(pattern_has_uppercase_char("foo.EXE"));
assert!(!pattern_has_uppercase_char("a"));
assert!(!pattern_has_uppercase_char("foo.exe123"));
}
#[test]
fn pattern_has_uppercase_char_advanced() {
assert!(pattern_has_uppercase_char("foo.[a-zA-Z]"));
assert!(!pattern_has_uppercase_char(r"\Acargo"));
assert!(!pattern_has_uppercase_char(r"carg\x6F"));
}
#[test]
fn matches_strings_with_leading_dot_simple() {
assert!(pattern_matches_strings_with_leading_dot("^\\.gitignore"));
assert!(!pattern_matches_strings_with_leading_dot("^.gitignore"));
assert!(!pattern_matches_strings_with_leading_dot("\\.gitignore"));
assert!(!pattern_matches_strings_with_leading_dot("^gitignore"));
}