fd/src/regex_helper.rs
2020-04-03 22:36:32 +02:00

52 lines
1.8 KiB
Rust

use regex_syntax::hir::Hir;
use regex_syntax::ParserBuilder;
/// Determine if a regex pattern contains a literal uppercase character.
pub fn pattern_has_uppercase_char(pattern: &str) -> bool {
let mut parser = ParserBuilder::new().allow_invalid_utf8(true).build();
parser
.parse(pattern)
.map(|hir| hir_has_uppercase_char(&hir))
.unwrap_or(false)
}
/// Determine if a regex expression contains a literal uppercase character.
fn hir_has_uppercase_char(hir: &Hir) -> bool {
use regex_syntax::hir::*;
match *hir.kind() {
HirKind::Literal(Literal::Unicode(c)) => c.is_uppercase(),
HirKind::Literal(Literal::Byte(b)) => char::from(b).is_uppercase(),
HirKind::Class(Class::Unicode(ref ranges)) => ranges
.iter()
.any(|r| r.start().is_uppercase() || r.end().is_uppercase()),
HirKind::Class(Class::Bytes(ref ranges)) => ranges
.iter()
.any(|r| char::from(r.start()).is_uppercase() || char::from(r.end()).is_uppercase()),
HirKind::Group(Group { ref hir, .. }) | HirKind::Repetition(Repetition { ref hir, .. }) => {
hir_has_uppercase_char(hir)
}
HirKind::Concat(ref hirs) | HirKind::Alternation(ref hirs) => {
hirs.iter().any(hir_has_uppercase_char)
}
_ => false,
}
}
#[test]
fn pattern_has_uppercase_char_simple() {
assert!(pattern_has_uppercase_char("A"));
assert!(pattern_has_uppercase_char("foo.EXE"));
assert!(!pattern_has_uppercase_char("a"));
assert!(!pattern_has_uppercase_char("foo.exe123"));
}
#[test]
fn pattern_has_uppercase_char_advanced() {
assert!(pattern_has_uppercase_char("foo.[a-zA-Z]"));
assert!(!pattern_has_uppercase_char(r"\Acargo"));
assert!(!pattern_has_uppercase_char(r"carg\x6F"));
}