blob: 101cce12ed43ecb93d919b0e7be8bbcc74916a92 [file] [log] [blame]
package pathregex
import (
"bytes"
"io"
"regexp"
"strings"
"veyron2/verror"
)
// parser is a recursive-descent parser for path regular expressions.
type parser struct {
reader *strings.Reader
// commas are treated specially within braces, but are otherwise normal
// characters.
commaIsSpecial bool
// isErrored is true iff an error has been encountered.
isErrored bool
}
// Compile compiles a string to a finite automaton. Returns a StateSet of the
// initial states of the automaton.
func Compile(s string) (StateSet, error) {
re, err := compileRegex(s)
if err != nil {
return nil, err
}
return compileNFA(re), nil
}
// CompileReverse compiles a path regular expression to a finite automaton,
// reversing the regular expression. Returns a StateSet of the initial states
// of the automaton.
func CompileReverse(s string) (StateSet, error) {
re, err := compileRegex(s)
if err != nil {
return nil, err
}
re.reverse()
return compileNFA(re), nil
}
func compileRegex(s string) (regex, error) {
p := &parser{reader: strings.NewReader(s)}
re := p.parsePath()
if p.isErrored || p.reader.Len() != 0 {
pos, _ := p.reader.Seek(0, 1)
err := verror.BadArgf("Syntax error at char %d: %q", pos, s)
return nil, err
}
return re, nil
}
// parsePath reads a path regular expression. Reads as much of the input as
// possible, stopping at special characters, like '}' or ',' (if
// commaIsSpecial).
func (p *parser) parsePath() regex {
var path []regex
for !p.isErrored {
// If the next rune is '{', parse as an alternation; otherwise, parse
// the next component.
c, ok := p.readRune()
if !ok {
break
}
var re regex
if c == '{' {
re = p.parsePathAlt()
} else {
p.unreadRune()
re = p.parseComponent()
}
if re != nil {
path = append(path, re)
}
c, ok = p.readRune()
if !ok {
break
}
if c != '/' {
p.unreadRune()
break
}
}
return newSequence(path)
}
// parsePathAlt reads an alternation {p1,p2,...,pn}. Assumes the opening brace
// has already been read; consumes the closing brace.
func (p *parser) parsePathAlt() regex {
s := p.commaIsSpecial
defer func() { p.commaIsSpecial = s }()
p.commaIsSpecial = true
var choices []regex
parseLoop:
for !p.isErrored {
if re := p.parsePath(); re != nil {
choices = append(choices, re)
}
c, ok := p.readRune()
if !ok {
break parseLoop
}
switch c {
case ',':
// skip
case '}':
return newAlt(choices)
default:
break parseLoop
}
}
p.setError()
return nil
}
// parseComponent parses a single component of a path. This is a glob
// expression.
func (p *parser) parseComponent() regex {
// p.reader.Seek(0, 1) just returns the current position.
startPos, _ := p.reader.Seek(0, 1)
var buf bytes.Buffer
p.parseComponentPiece(&buf)
if buf.Len() == 0 {
return nil
}
endPos, _ := p.reader.Seek(0, 1)
// The ... component name is special.
if endPos-startPos == 3 {
var literal [3]byte
p.reader.ReadAt(literal[:], startPos)
if string(literal[:]) == "..." {
return dotDotDotRegex
}
}
// Everything else is a regular expression.
re, err := regexp.Compile("^" + buf.String() + "$")
if err != nil {
p.setError()
return nil
}
return newSingle(re)
}
// parseComponentPiece reads a glob expression and converts it to a regular
// expression.
func (p *parser) parseComponentPiece(buf *bytes.Buffer) {
for !p.isErrored {
c, ok := p.readRune()
if !ok {
return
}
switch c {
case '*':
buf.WriteString(".*")
case '?':
buf.WriteString(".")
case '.', '(', ')':
buf.WriteRune('\\')
buf.WriteRune(c)
case '\\':
p.parseEscapedRune(buf)
case '[':
buf.WriteRune('[')
p.parseCharRange(buf)
buf.WriteRune(']')
case '{':
buf.WriteRune('(')
p.parseComponentAlt(buf)
buf.WriteRune(')')
case '}', ']', '/':
p.unreadRune()
return
case ',':
if p.commaIsSpecial {
p.unreadRune()
return
} else {
buf.WriteRune(c)
}
default:
buf.WriteRune(c)
}
}
}
// parseCharRange copies the input range literally.
//
// TODO(jyh): Translate the glob range.
func (p *parser) parseCharRange(buf *bytes.Buffer) {
// Initial ] does not close the range.
c, ok := p.readRune()
if !ok {
p.setError()
return
}
buf.WriteRune(c)
for {
c, ok := p.readRune()
if !ok {
p.setError()
return
}
if c == ']' {
break
}
buf.WriteRune(c)
}
}
// parseComponentAlt parses an alternation.
func (p *parser) parseComponentAlt(buf *bytes.Buffer) {
s := p.commaIsSpecial
p.commaIsSpecial = true
defer func() { p.commaIsSpecial = s }()
for {
p.parseComponentPiece(buf)
c, ok := p.readRune()
if !ok {
p.setError()
return
}
switch c {
case ',':
buf.WriteRune('|')
case '}':
return
default:
p.setError()
return
}
}
}
// parseEscapedRune parses a rune immediately after a backslash.
func (p *parser) parseEscapedRune(buf *bytes.Buffer) {
c, ok := p.readRune()
if !ok {
return
}
// TODO(jyh): Are there any special escape sequences?
buf.WriteRune('\\')
buf.WriteRune(c)
}
// readRune reads the next rune from the input reader. If there is an error,
// returns false and sets the isErrored flag if the error is not io.EOF.
func (p *parser) readRune() (rune, bool) {
if p.isErrored {
return 0, false
}
c, _, err := p.reader.ReadRune()
switch {
case err == io.EOF:
return c, false
case err != nil:
p.setError()
return c, false
}
return c, true
}
// unreadRune pushes back the last rune read.
func (p *parser) unreadRune() {
p.reader.UnreadRune()
}
// setError sets the error flag.
func (p *parser) setError() {
p.isErrored = true
}