Use regex for matcher parsing

- Somewhat better maintainability I think
This commit is contained in:
cyqsimon 2023-11-02 23:16:04 +08:00
parent 0fea82cff9
commit 9f7d70f642
No known key found for this signature in database
GPG Key ID: 1D8CE2F297390D65
2 changed files with 40 additions and 65 deletions

View File

@ -102,6 +102,8 @@ nix = { version = "0.26.4", default-features = false, features = ["term"] }
anyhow = "1.0.75"
indexmap = { version = "2.1.0", features = ["serde"] }
itertools = "0.11.0"
once_cell = "1.18"
regex = "1.10.2"
serde = { version = "1.0", features = ["derive"] }
serde_with = "3.4.0"
toml = { version = "0.8.6", features = ["preserve_order"] }

View File

@ -3,6 +3,8 @@ use std::{convert::Infallible, env, fs, path::Path, str::FromStr};
use anyhow::{anyhow, bail};
use indexmap::IndexMap;
use itertools::Itertools;
use once_cell::sync::Lazy;
use regex::Regex;
use serde::Deserialize;
use serde_with::DeserializeFromStr;
use walkdir::WalkDir;
@ -43,8 +45,10 @@ impl MappingTarget {
struct Matcher(Vec<MatcherSegment>);
/// Parse a matcher.
///
/// Note that this implementation is rather strict: when it sees a '$', '{', or
/// '}' where it does not make sense, it will immediately hard-error.
/// Note that this implementation is rather strict: it will greedily interpret
/// every valid environment variable replacement as such, then immediately
/// hard-error if it finds a '$', '{', or '}' anywhere in the remaining text
/// segments.
///
/// The reason for this strictness is I currently cannot think of a valid reason
/// why you would ever need '$', '{', or '}' as plaintext in a glob pattern.
@ -55,72 +59,41 @@ impl FromStr for Matcher {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
use MatcherSegment as Seg;
static VAR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"\$\{([\w\d_\-]+)\}").unwrap());
if s.is_empty() {
bail!("Empty string is not a valid glob matcher");
let mut segments = vec![];
let mut text_start = 0;
for capture in VAR_REGEX.captures_iter(s) {
let match_0 = capture.get(0).unwrap();
// text before this var
let text_end = match_0.start();
segments.push(Seg::Text(s[text_start..text_end].into()));
text_start = match_0.end();
// this var
segments.push(Seg::Env(capture.get(1).unwrap().as_str().into()));
}
// possible trailing text
segments.push(Seg::Text(s[text_start..].into()));
// cleanup empty text segments
let non_empty_segments = segments
.into_iter()
.filter(|seg| match seg {
Seg::Text(t) => !t.is_empty(),
Seg::Env(_) => true,
})
.collect_vec();
if non_empty_segments.iter().any(|seg| match seg {
Seg::Text(t) => t.contains(['$', '{', '}']),
Seg::Env(_) => false,
}) {
bail!(r#"Invalid matcher: "{s}""#);
}
let mut segments = Vec::new();
let mut buf = String::new();
let mut is_in_var = false;
let mut char_it = s.chars();
loop {
match char_it.next() {
Some('$') => {
if is_in_var {
bail!(r#"Saw a '$' when already in a variable: "{s}""#);
}
match char_it.next() {
Some('{') => {
// push text unless empty
if !buf.is_empty() {
segments.push(Seg::Text(buf.clone()));
buf.clear();
}
// open var
is_in_var = true;
}
Some(_) | None => {
bail!(r#"Expected a '{{' after '$': "{s}""#);
}
}
}
Some('{') => {
bail!(r#"Saw a hanging '{{': "{s}""#);
}
Some('}') => {
if !is_in_var {
bail!(r#"Saw a '}}' when not in a variable: "{s}""#);
}
if buf.is_empty() {
// `${}`
bail!(r#"Variable name cannot be empty: "{s}""#);
}
// push variable
segments.push(Seg::Env(buf.clone()));
buf.clear();
// close var
is_in_var = false;
}
Some(' ') if is_in_var => {
bail!(r#"' ' Cannot be part of a variable's name: "{s}""#);
}
Some(c) => {
// either plaintext or variable name
buf.push(c);
}
None => {
if is_in_var {
bail!(r#"Variable unclosed: "{s}""#);
}
segments.push(Seg::Text(buf.clone()));
break;
}
}
}
Ok(Self(segments))
Ok(Self(non_empty_segments))
}
}
impl Matcher {