blob: 80b0f3db475a083a0591bb8fb59e69d0017ce7d0 [file] [log] [blame]
package pathregex
import (
"regexp"
"sort"
"unsafe"
)
// state is a state of the NFA.
type state struct {
// isFinal is true iff the state is a final state.
isFinal bool
// isClosed is true iff the epsilon transitions are transitively closed.
isClosed bool
// trans is a non-epsilon transitions, or nil.
trans *transition
// epsilon are epsilon transitions, meaning the automation can take a
// state transition without reading any input.
epsilon hashStateSet
}
// transition represents a non-empty transition. If the current input symbol matches the
// regular expression, the NFA can take a transition to the next states.
type transition struct {
re *regexp.Regexp // nil means accept anything.
next hashStateSet
}
// hashStateSet represents a set of states using a map.
type hashStateSet map[*state]struct{}
// StateSet represents a set of states of the NFA as a sorted slice of state pointers.
type StateSet []*state
// byPointer is used to sort the pointers in StateSet.
type byPointer StateSet
// tt accepts anything.
func (r tt) compile(final *state) *state {
t := &transition{next: hashStateSet{final: struct{}{}}}
return &state{trans: t, epsilon: make(hashStateSet)}
}
// ff has no transititons.
func (r *ff) compile(final *state) *state {
return &state{epsilon: make(hashStateSet)}
}
// epsilon doesn't require a transition.
func (r *epsilon) compile(final *state) *state {
return final
}
// single has a single transition from initial to final state.
func (r *single) compile(final *state) *state {
t := &transition{re: r.r, next: hashStateSet{final: struct{}{}}}
return &state{trans: t, epsilon: make(hashStateSet)}
}
// sequence composes the automata.
func (r *sequence) compile(final *state) *state {
return r.r1.compile(r.r2.compile(final))
}
// alt constructs the separate automata, then defines a new start state that
// includes epsilon transitions to the two separate automata.
func (r *alt) compile(final *state) *state {
s1 := r.r1.compile(final)
s2 := r.r2.compile(final)
return &state{epsilon: hashStateSet{s1: struct{}{}, s2: struct{}{}}}
}
// star contains a loop to accepts 0-or-more occurrences of r.re. There is an
// epsilon transition from the start state s1 to the final state (for 0
// occurrences), and a back epsilon-transition for 1-or-more occurrences.
func (r *star) compile(final *state) *state {
s2 := &state{epsilon: make(hashStateSet)}
s1 := r.re.compile(s2)
s2.epsilon = hashStateSet{s1: struct{}{}, final: struct{}{}}
s1.epsilon[s2] = struct{}{}
return s1
}
// close takes the transitive closure of the epsilon transitions.
func (s *state) close() {
if !s.isClosed {
s.isClosed = true
s.epsilon[s] = struct{}{}
s.epsilon.closeEpsilon()
if s.trans != nil {
s.trans.next.closeEpsilon()
}
}
}
// isUseless returns true iff the state has only epsilon transitions and it is
// not a final state.
func (s *state) isUseless() bool {
return s.trans == nil && !s.isFinal
}
// addStates folds the src states into the dst.
func (dst hashStateSet) addStates(src hashStateSet) {
for s, _ := range src {
dst[s] = struct{}{}
}
}
// stateSet converts the hashStateSet to a StateSet.
func (set hashStateSet) stateSet() StateSet {
states := StateSet{}
for s, _ := range set {
if !s.isUseless() {
states = append(states, s)
}
}
sort.Sort(byPointer(states))
return states
}
// closeEpsilon closes the state set under epsilon transitions.
func (states hashStateSet) closeEpsilon() {
for changed := true; changed; {
size := len(states)
for s, _ := range states {
s.close()
states.addStates(s.epsilon)
}
changed = len(states) != size
}
// Remove useless states.
for s, _ := range states {
if s.isUseless() {
delete(states, s)
}
}
}
// Step takes a transition for input name.
func (ss StateSet) Step(name string) StateSet {
states := make(hashStateSet)
for _, s := range ss {
if s.trans != nil && (s.trans.re == nil || s.trans.re.MatchString(name)) {
s.close()
states.addStates(s.trans.next)
}
}
return states.stateSet()
}
// IsFinal returns true iff the StateSet contains a final state.
func (ss StateSet) IsFinal() bool {
for _, s := range ss {
if s.isFinal {
return true
}
}
return false
}
// IsReject returns true iff the StateSet is empty.
func (ss StateSet) IsReject() bool {
return len(ss) == 0
}
// Union combines the state sets, returning a new StateSet.
func (s1 StateSet) Union(s2 StateSet) StateSet {
// As a space optimization, detect the case where the two states sets are
// equal. If so, return s1 unchanged.
if s1.Equals(s2) {
return s1
}
i1 := 0
i2 := 0
var result StateSet
for i1 < len(s1) && i2 < len(s2) {
p1 := uintptr(unsafe.Pointer(s1[i1]))
p2 := uintptr(unsafe.Pointer(s2[i2]))
switch {
case p1 == p2:
i2++
fallthrough
case p1 < p2:
result = append(result, s1[i1])
i1++
case p2 < p1:
result = append(result, s2[i2])
i2++
}
}
result = append(result, s1[i1:]...)
result = append(result, s2[i2:]...)
return result
}
// Equals returns true iff the state sets are equal.
func (s1 StateSet) Equals(s2 StateSet) bool {
if len(s1) != len(s2) {
return false
}
for i, s := range s1 {
if s2[i] != s {
return false
}
}
return true
}
// sorting methods.
func (a byPointer) Len() int { return len(a) }
func (a byPointer) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byPointer) Less(i, j int) bool {
return uintptr(unsafe.Pointer(a[i])) < uintptr(unsafe.Pointer(a[j]))
}
// Compile compiles the Regex to a NFA.
func compileNFA(r regex) StateSet {
s := r.compile(&state{isFinal: true, epsilon: make(hashStateSet)})
s.close()
return s.epsilon.stateSet()
}