cheat/vendor/github.com/danwakefield/fnmatch/fnmatch.go
2019-10-27 12:04:31 -04:00

219 lines
5.6 KiB
Go

// Provide string-matching based on fnmatch.3
package fnmatch
// There are a few issues that I believe to be bugs, but this implementation is
// based as closely as possible on BSD fnmatch. These bugs are present in the
// source of BSD fnmatch, and so are replicated here. The issues are as follows:
//
// * FNM_PERIOD is no longer observed after the first * in a pattern
// This only applies to matches done with FNM_PATHNAME as well
// * FNM_PERIOD doesn't apply to ranges. According to the documentation,
// a period must be matched explicitly, but a range will match it too
import (
"unicode"
"unicode/utf8"
)
const (
FNM_NOESCAPE = (1 << iota)
FNM_PATHNAME
FNM_PERIOD
FNM_LEADING_DIR
FNM_CASEFOLD
FNM_IGNORECASE = FNM_CASEFOLD
FNM_FILE_NAME = FNM_PATHNAME
)
func unpackRune(str *string) rune {
rune, size := utf8.DecodeRuneInString(*str)
*str = (*str)[size:]
return rune
}
// Matches the pattern against the string, with the given flags,
// and returns true if the match is successful.
// This function should match fnmatch.3 as closely as possible.
func Match(pattern, s string, flags int) bool {
// The implementation for this function was patterned after the BSD fnmatch.c
// source found at http://src.gnu-darwin.org/src/contrib/csup/fnmatch.c.html
noescape := (flags&FNM_NOESCAPE != 0)
pathname := (flags&FNM_PATHNAME != 0)
period := (flags&FNM_PERIOD != 0)
leadingdir := (flags&FNM_LEADING_DIR != 0)
casefold := (flags&FNM_CASEFOLD != 0)
// the following is some bookkeeping that the original fnmatch.c implementation did not do
// We are forced to do this because we're not keeping indexes into C strings but rather
// processing utf8-encoded strings. Use a custom unpacker to maintain our state for us
sAtStart := true
sLastAtStart := true
sLastSlash := false
sLastUnpacked := rune(0)
unpackS := func() rune {
sLastSlash = (sLastUnpacked == '/')
sLastUnpacked = unpackRune(&s)
sLastAtStart = sAtStart
sAtStart = false
return sLastUnpacked
}
for len(pattern) > 0 {
c := unpackRune(&pattern)
switch c {
case '?':
if len(s) == 0 {
return false
}
sc := unpackS()
if pathname && sc == '/' {
return false
}
if period && sc == '.' && (sLastAtStart || (pathname && sLastSlash)) {
return false
}
case '*':
// collapse multiple *'s
// don't use unpackRune here, the only char we care to detect is ASCII
for len(pattern) > 0 && pattern[0] == '*' {
pattern = pattern[1:]
}
if period && s[0] == '.' && (sAtStart || (pathname && sLastUnpacked == '/')) {
return false
}
// optimize for patterns with * at end or before /
if len(pattern) == 0 {
if pathname {
return leadingdir || (strchr(s, '/') == -1)
} else {
return true
}
return !(pathname && strchr(s, '/') >= 0)
} else if pathname && pattern[0] == '/' {
offset := strchr(s, '/')
if offset == -1 {
return false
} else {
// we already know our pattern and string have a /, skip past it
s = s[offset:] // use unpackS here to maintain our bookkeeping state
unpackS()
pattern = pattern[1:] // we know / is one byte long
break
}
}
// general case, recurse
for test := s; len(test) > 0; unpackRune(&test) {
// I believe the (flags &^ FNM_PERIOD) is a bug when FNM_PATHNAME is specified
// but this follows exactly from how fnmatch.c implements it
if Match(pattern, test, (flags &^ FNM_PERIOD)) {
return true
} else if pathname && test[0] == '/' {
break
}
}
return false
case '[':
if len(s) == 0 {
return false
}
if pathname && s[0] == '/' {
return false
}
sc := unpackS()
if !rangematch(&pattern, sc, flags) {
return false
}
case '\\':
if !noescape {
if len(pattern) > 0 {
c = unpackRune(&pattern)
}
}
fallthrough
default:
if len(s) == 0 {
return false
}
sc := unpackS()
switch {
case sc == c:
case casefold && unicode.ToLower(sc) == unicode.ToLower(c):
default:
return false
}
}
}
return len(s) == 0 || (leadingdir && s[0] == '/')
}
func rangematch(pattern *string, test rune, flags int) bool {
if len(*pattern) == 0 {
return false
}
casefold := (flags&FNM_CASEFOLD != 0)
noescape := (flags&FNM_NOESCAPE != 0)
if casefold {
test = unicode.ToLower(test)
}
var negate, matched bool
if (*pattern)[0] == '^' || (*pattern)[0] == '!' {
negate = true
(*pattern) = (*pattern)[1:]
}
for !matched && len(*pattern) > 1 && (*pattern)[0] != ']' {
c := unpackRune(pattern)
if !noescape && c == '\\' {
if len(*pattern) > 1 {
c = unpackRune(pattern)
} else {
return false
}
}
if casefold {
c = unicode.ToLower(c)
}
if (*pattern)[0] == '-' && len(*pattern) > 1 && (*pattern)[1] != ']' {
unpackRune(pattern) // skip the -
c2 := unpackRune(pattern)
if !noescape && c2 == '\\' {
if len(*pattern) > 0 {
c2 = unpackRune(pattern)
} else {
return false
}
}
if casefold {
c2 = unicode.ToLower(c2)
}
// this really should be more intelligent, but it looks like
// fnmatch.c does simple int comparisons, therefore we will as well
if c <= test && test <= c2 {
matched = true
}
} else if c == test {
matched = true
}
}
// skip past the rest of the pattern
ok := false
for !ok && len(*pattern) > 0 {
c := unpackRune(pattern)
if c == '\\' && len(*pattern) > 0 {
unpackRune(pattern)
} else if c == ']' {
ok = true
}
}
return ok && matched != negate
}
// define strchr because strings.Index() seems a bit overkill
// returns the index of c in s, or -1 if there is no match
func strchr(s string, c rune) int {
for i, sc := range s {
if sc == c {
return i
}
}
return -1
}