blob: 8e5d7a7dae177ea33cf0ca638eaffdfa44349597 [file] [log] [blame]
// Copyright 2016 The Vanadium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package pattern handles parsing and matching SQL LIKE-style glob patterns.
package pattern
import (
"bytes"
"fmt"
"regexp"
"v.io/v23/verror"
)
const (
DefaultEscapeChar = '\\'
)
// Pattern is a parsed LIKE-style glob pattern.
type Pattern struct {
// regular expression equivalent to the original like pattern
regex *regexp.Regexp
// fixed prefix that all pattern matches must start with
fixedPrefix string
// true if pattern contains no unescaped wildcards; in this case, fixedPrefix
// is the entire unescaped expression
noWildcards bool
}
// Parse parses a LIKE-style glob pattern assuming '\' as escape character.
// See ParseWithEscapeChar().
func Parse(pattern string) (*Pattern, error) {
return ParseWithEscapeChar(pattern, DefaultEscapeChar)
}
// ParseWithEscapeChar parses a LIKE-style glob pattern.
// Supported wildcards are '_' (match any one character) and '%' (match zero or
// more characters). They can be escaped by escChar; escChar can also escape
// itself. '_' and '%' cannot be used as escChar; '\x00' escChar disables
// escaping.
func ParseWithEscapeChar(pattern string, escChar rune) (*Pattern, error) {
if escChar == '%' || escChar == '_' {
return nil, NewErrIllegalEscapeChar(nil)
}
// The LIKE-style pattern is converted to a regex, converting:
// % to .*?
// _ to .
// Everything else that would be incorrectly interpreted as a regex is escaped.
// The approach this function takes is to collect characters to be escaped
// into toBeEscapedBuf. When a wildcard is encountered, first toBeEscapedBuf
// is escaped and written to the regex buffer, next the wildcard is translated
// to regex (either ".*?" or ".") and written to the regex buffer.
// At the end, any remaining chars in toBeEscapedBuf are written.
var buf bytes.Buffer // buffer for return regex
var toBeEscapedBuf bytes.Buffer // buffer to hold characters waiting to be escaped
// Even though regexp.Regexp provides a LiteralPrefix() method, it doesn't
// always return the longest fixed prefix, so we save it while parsing.
var fixedPrefix string
foundWildcard := false
buf.WriteString("^") // '^<regex_str>$'
escapedMode := false
for _, c := range pattern {
if escapedMode {
switch c {
case '%', '_', escChar:
toBeEscapedBuf.WriteRune(c)
default:
return nil, NewErrInvalidEscape(nil, string(c))
}
escapedMode = false
} else {
switch c {
case '%', '_':
// Write out any chars waiting to be escaped, then write ".*?' or '.'.
buf.WriteString(regexp.QuoteMeta(toBeEscapedBuf.String()))
if !foundWildcard {
// First wildcard found, fixedPrefix is the pattern up to it.
fixedPrefix = toBeEscapedBuf.String()
foundWildcard = true
}
toBeEscapedBuf.Reset()
if c == '%' {
buf.WriteString(".*?")
} else {
buf.WriteString(".")
}
case escChar:
if escChar != '\x00' {
escapedMode = true
} else {
// nul is never an escape char, treat same as default.
toBeEscapedBuf.WriteRune(c)
}
default:
toBeEscapedBuf.WriteRune(c)
}
}
}
if escapedMode {
return nil, NewErrInvalidEscape(nil, "<end>")
}
// Write any remaining chars in toBeEscapedBuf.
buf.WriteString(regexp.QuoteMeta(toBeEscapedBuf.String()))
if !foundWildcard {
// No wildcard found, fixedPrefix is the entire pattern.
fixedPrefix = toBeEscapedBuf.String()
}
buf.WriteString("$") // '^<regex_str>$'
regex := buf.String()
compRegex, err := regexp.Compile(regex)
if err != nil {
// TODO(ivanpi): Should never happen. Panic here?
return nil, verror.New(verror.ErrInternal, nil, fmt.Sprintf("failed to compile pattern %q (regular expression %q): %v", pattern, regex, err))
}
return &Pattern{
regex: compRegex,
fixedPrefix: fixedPrefix,
noWildcards: !foundWildcard,
}, nil
}
// MatchString returns true iff the pattern matches the entire string.
func (p *Pattern) MatchString(s string) bool {
return p.regex.MatchString(s)
}
// FixedPrefix returns the unescaped fixed prefix that all matching strings must
// start with, and whether the prefix is the whole pattern.
func (p *Pattern) FixedPrefix() (string, bool) {
return p.fixedPrefix, p.noWildcards
}
// Escape escapes a literal string for inclusion in a LIKE-style pattern
// assuming '\' as escape character.
// See EscapeWithEscapeChar().
func Escape(s string) string {
return EscapeWithEscapeChar(s, DefaultEscapeChar)
}
// EscapeWithEscapeChar escapes a literal string for inclusion in a LIKE-style
// pattern. It inserts escChar before each '_', '%', and escChar in the string.
func EscapeWithEscapeChar(s string, escChar rune) string {
if escChar == '\x00' {
panic(verror.New(verror.ErrBadArg, nil, "'\x00' disables escaping, cannot be used in EscapeWithEscapeChar"))
}
if escChar == '%' || escChar == '_' {
panic(NewErrIllegalEscapeChar(nil))
}
var buf bytes.Buffer
for _, c := range s {
if c == '%' || c == '_' || c == escChar {
buf.WriteRune(escChar)
}
buf.WriteRune(c)
}
return buf.String()
}