store: implementing the trie and replacing the memstore impl.
Change-Id: Ib862b9ca09740290270ced4b50aad3b940158cfa
diff --git a/services/syncbase/store/leveldb/stream.go b/services/syncbase/store/leveldb/stream.go
index 98c3c2f..2d44f73 100644
--- a/services/syncbase/store/leveldb/stream.go
+++ b/services/syncbase/store/leveldb/stream.go
@@ -44,7 +44,7 @@
s := &stream{
node: store.NewResourceNode(),
cIter: cIter,
- limit: limit,
+ limit: store.CopyBytes(nil, limit),
}
parent.AddChild(s.node, func() {
s.Cancel()
diff --git a/services/syncbase/store/memstore/snapshot.go b/services/syncbase/store/memstore/snapshot.go
index 557b4dd..f6cc7d0 100644
--- a/services/syncbase/store/memstore/snapshot.go
+++ b/services/syncbase/store/memstore/snapshot.go
@@ -9,27 +9,24 @@
"v.io/v23/verror"
"v.io/x/ref/services/syncbase/store"
+ "v.io/x/ref/services/syncbase/store/ptrie"
)
type snapshot struct {
store.SnapshotSpecImpl
mu sync.Mutex
node *store.ResourceNode
- data map[string][]byte
+ data *ptrie.T
err error
}
var _ store.Snapshot = (*snapshot)(nil)
// Assumes st lock is held.
-func newSnapshot(st *memstore, parent *store.ResourceNode) *snapshot {
- dataCopy := make(map[string][]byte, len(st.data))
- for k, v := range st.data {
- dataCopy[k] = v
- }
+func newSnapshot(data *ptrie.T, parent *store.ResourceNode) *snapshot {
s := &snapshot{
node: store.NewResourceNode(),
- data: dataCopy,
+ data: data,
}
parent.AddChild(s.node, func() {
s.Abort()
@@ -56,11 +53,11 @@
if s.err != nil {
return valbuf, store.ConvertError(s.err)
}
- value, ok := s.data[string(key)]
- if !ok {
+ value := s.data.Get(key)
+ if value == nil {
return valbuf, verror.New(store.ErrUnknownKey, nil, string(key))
}
- return store.CopyBytes(valbuf, value), nil
+ return store.CopyBytes(valbuf, value.([]byte)), nil
}
// Scan implements the store.StoreReader interface.
@@ -70,5 +67,5 @@
if s.err != nil {
return &store.InvalidStream{Error: s.err}
}
- return newStream(s, s.node, start, limit)
+ return newStream(s.data, s.node, start, limit)
}
diff --git a/services/syncbase/store/memstore/store.go b/services/syncbase/store/memstore/store.go
index 699c2c5..6639e8d 100644
--- a/services/syncbase/store/memstore/store.go
+++ b/services/syncbase/store/memstore/store.go
@@ -12,21 +12,22 @@
"v.io/v23/verror"
"v.io/x/ref/services/syncbase/store"
+ "v.io/x/ref/services/syncbase/store/ptrie"
"v.io/x/ref/services/syncbase/store/transactions"
)
type memstore struct {
mu sync.Mutex
node *store.ResourceNode
- data map[string][]byte
+ data *ptrie.T
err error
}
// New creates a new memstore.
func New() store.Store {
return transactions.Wrap(&memstore{
- data: map[string][]byte{},
node: store.NewResourceNode(),
+ data: ptrie.New(true),
})
}
@@ -49,11 +50,11 @@
if st.err != nil {
return valbuf, store.ConvertError(st.err)
}
- value, ok := st.data[string(key)]
- if !ok {
+ value := st.data.Get(key)
+ if value == nil {
return valbuf, verror.New(store.ErrUnknownKey, nil, string(key))
}
- return store.CopyBytes(valbuf, value), nil
+ return store.CopyBytes(valbuf, value.([]byte)), nil
}
// Scan implements the store.StoreReader interface.
@@ -63,8 +64,7 @@
if st.err != nil {
return &store.InvalidStream{Error: st.err}
}
- // TODO(sadovsky): Close snapshot once stream is closed or canceled.
- return newSnapshot(st, st.node).Scan(start, limit)
+ return newStream(st.data.Copy(), st.node, start, limit)
}
// NewSnapshot implements the store.Store interface.
@@ -74,7 +74,7 @@
if st.err != nil {
return &store.InvalidSnapshot{Error: st.err}
}
- return newSnapshot(st, st.node)
+ return newSnapshot(st.data.Copy(), st.node)
}
// WriteBatch implements the transactions.BatchStore interface.
@@ -87,9 +87,9 @@
for _, write := range batch {
switch write.T {
case transactions.PutOp:
- st.data[string(write.Key)] = write.Value
+ st.data.Put(write.Key, store.CopyBytes(nil, write.Value))
case transactions.DeleteOp:
- delete(st.data, string(write.Key))
+ st.data.Delete(write.Key)
default:
panic(fmt.Sprintf("unknown write operation type: %v", write.T))
}
diff --git a/services/syncbase/store/memstore/stream.go b/services/syncbase/store/memstore/stream.go
index 5f776d9..d7d8ba6 100644
--- a/services/syncbase/store/memstore/stream.go
+++ b/services/syncbase/store/memstore/stream.go
@@ -5,39 +5,27 @@
package memstore
import (
- "sort"
"sync"
"v.io/v23/verror"
"v.io/x/ref/services/syncbase/store"
+ "v.io/x/ref/services/syncbase/store/ptrie"
)
type stream struct {
- mu sync.Mutex
- node *store.ResourceNode
- sn *snapshot
- keys []string
- currIndex int
- currKey *string
- err error
- done bool
+ mu sync.Mutex
+ node *store.ResourceNode
+ pstream *ptrie.Stream
+ err error
+ done bool
}
var _ store.Stream = (*stream)(nil)
-func newStream(sn *snapshot, parent *store.ResourceNode, start, limit []byte) *stream {
- keys := []string{}
- for k := range sn.data {
- if k >= string(start) && (len(limit) == 0 || k < string(limit)) {
- keys = append(keys, k)
- }
- }
- sort.Strings(keys)
+func newStream(data *ptrie.T, parent *store.ResourceNode, start, limit []byte) *stream {
s := &stream{
- node: store.NewResourceNode(),
- sn: sn,
- keys: keys,
- currIndex: -1,
+ node: store.NewResourceNode(),
+ pstream: data.Scan(start, limit),
}
parent.AddChild(s.node, func() {
s.Cancel()
@@ -49,16 +37,11 @@
func (s *stream) Advance() bool {
s.mu.Lock()
defer s.mu.Unlock()
- s.currKey = nil
if s.done {
return false
}
- s.currIndex++
- if s.currIndex < len(s.keys) {
- s.currKey = &s.keys[s.currIndex]
- } else {
- s.done = true
- s.currKey = nil
+ if s.done = !s.pstream.Advance(); s.done {
+ s.node.Close()
}
return !s.done
}
@@ -67,20 +50,14 @@
func (s *stream) Key(keybuf []byte) []byte {
s.mu.Lock()
defer s.mu.Unlock()
- if s.currKey == nil {
- panic("nothing staged")
- }
- return store.CopyBytes(keybuf, []byte(*s.currKey))
+ return s.pstream.Key(keybuf)
}
// Value implements the store.Stream interface.
func (s *stream) Value(valbuf []byte) []byte {
s.mu.Lock()
defer s.mu.Unlock()
- if s.currKey == nil {
- panic("nothing staged")
- }
- return store.CopyBytes(valbuf, s.sn.data[*s.currKey])
+ return store.CopyBytes(valbuf, s.pstream.Value().([]byte))
}
// Err implements the store.Stream interface.
@@ -93,11 +70,10 @@
// Cancel implements the store.Stream interface.
func (s *stream) Cancel() {
s.mu.Lock()
- defer s.mu.Unlock()
- if s.done {
- return
+ if !s.done {
+ s.done = true
+ s.node.Close()
+ s.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgCanceledStream)
}
- s.done = true
- s.node.Close()
- s.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgCanceledStream)
+ s.mu.Unlock()
}
diff --git a/services/syncbase/store/ptrie/ptrie.go b/services/syncbase/store/ptrie/ptrie.go
new file mode 100644
index 0000000..a3e5be4
--- /dev/null
+++ b/services/syncbase/store/ptrie/ptrie.go
@@ -0,0 +1,324 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package ptrie provides a ptrie to store a mapping from bit strings to
+// arbitrary values. Ptrie exposes a simple interface: Get(key),
+// Put(key, value), Delete(key), and Scan(start, limit). Conceptually a ptrie
+// can be thought of as a map[[]byte]interface{}, designed to support fast
+// range queries and immutable views.
+//
+// For performance reasons, bit strings are represented as byte slices.
+// The ptrie consists of three concepts: a binary trie, path contraction and
+// copy-on-write modifications.
+//
+// 1) A binary trie consists of nodes and arcs. Each node has a value and two
+// children: 0 and 1. The value and the children might be nil. An arc connects
+// a node with its child. A node N has a value V and S is the bit string of
+// the path from the root to N iff the trie maps S to V. Using this rule we can
+// build maps and sets. For example, a set of {'b', 'c'} can be represented as:
+// 'b': 0110 0010 o
+// 'c': 0110 0011 0/
+// 1\
+// 1\
+// 0/
+// 0/
+// 0/
+// 1\
+// o
+// 0/1\
+// o o
+// This trie has 10 nodes. This representation is not efficient.
+// To reduce the number of nodes, we use the path contraction technique.
+//
+// 2) Path contraction. If a path consists only of nodes that have one child
+// and don't have a value, then the path can be replaced with one arc.
+// The new arc has the whole bit string written on the path. The example above
+// becomes smaller: o
+// 0110001/
+// o
+// 0/1\
+// o o
+// This structure is stored in memory in a slightly different way. The trie consists
+// of nodes, each node has a value and two children. Each non-nil child is
+// a triple: the child node, the bit string written on the arc and the length of
+// the bit string. For convenience, bits in the bit string are aligned so that
+// the bit string might be a sub slice of a bit string representing a path from
+// a root of the trie to the child.
+//
+// 3) Copy-on-write modifications. In order to support immutable views of
+// the data in the ptrie, the Put() and the Delete() functions have the makeCopy
+// flag. If the makeCopy flag is true, then the algorithm doesn't modify the
+// current ptrie, but it returns a new ptrie with some nodes reused from
+// the current one.
+package ptrie
+
+import (
+ "v.io/x/ref/services/syncbase/store"
+)
+
+// T represents a ptrie.
+type T struct {
+ root *pnode
+ copyOnWrite bool
+}
+
+// New returns a new empty ptrie. If copyOnWrite is true, then the new ptrie
+// performs copy-on-write modifications for Put/Delete operations and it is
+// allowed to make a copy of the ptrie by calling Copy().
+func New(copyOnWrite bool) *T {
+ return &T{copyOnWrite: copyOnWrite}
+}
+
+// Put maps the given value to the given key. The value is not copied, so
+// the client must keep the value unchanged.
+func (t *T) Put(key []byte, value interface{}) {
+ t.root = t.root.put(key, value, t.copyOnWrite)
+}
+
+// Get returns a value mapped to the given key. Get returns nil if the given
+// key has no mapped value. The client must not modify the returned value as
+// the returned value points directly to the value stored in the ptrie.
+func (t *T) Get(key []byte) interface{} {
+ return t.root.get(key)
+}
+
+// Delete removes mapping to the given key.
+func (t *T) Delete(key []byte) {
+ t.root = t.root.delete(key, t.copyOnWrite)
+}
+
+// Scan returns all key-value pairs with keys in range [start, limit).
+// If limit is "", all key-value pairs with keys >= start are included.
+func (t *T) Scan(start, limit []byte) *Stream {
+ return t.root.Scan(start, limit)
+}
+
+// Copy returns a copy of the ptrie. This operation is only allowed if the ptrie
+// was created with the copyOnWrite flag. Copy is a very fast operation since
+// it just copies the pointer to the root of the ptrie.
+// Panics if the ptrie was created with copyOnWrite=false.
+func (t *T) Copy() *T {
+ if !t.copyOnWrite {
+ panic("the ptrie was not created in persistent mode")
+ }
+ return &T{
+ root: t.root,
+ copyOnWrite: true,
+ }
+}
+
+// pnode represents a node in the ptrie.
+type pnode struct {
+ value interface{}
+ child [2]*pchild
+}
+
+// pchild represents a child of a node in a ptrie.
+type pchild struct {
+ node *pnode
+ bitstr []byte
+ bitlen uint32
+}
+
+// put maps the given value to the given key, assuming the given node is
+// the root of the ptrie. The value is not copied, so the client must keep
+// the value unchanged. Put returns the new root of the ptrie.
+// The client must not modify the returned value as the returned value points
+// directly to the value stored in the ptrie.
+//
+// If the makeCopy flag is true, then the Put performs a copy-on-write
+// modification.
+//
+// A nil node is treated as an empty ptrie.
+func (node *pnode) put(key []byte, value interface{}, makeCopy bool) *pnode {
+ if value == nil {
+ return node.delete(key, makeCopy)
+ }
+ if node == nil {
+ node = &pnode{}
+ }
+ return putInternal(node, 0, key, value, makeCopy)
+}
+
+// get returns a value mapped to the given key, assuming the given node is
+// the root of the ptrie. Get returns nil if the given key has no mapped value.
+//
+// A nil node is treated as an empty ptrie.
+func (node *pnode) get(key []byte) interface{} {
+ if node == nil {
+ return nil
+ }
+ return getInternal(node, 0, key)
+}
+
+// delete removes mapping to the given key, assuming the given node is
+// the root of the ptrie. Delete returns the new root of the ptrie.
+//
+// If the makeCopy flag is true, then the Delete performs a copy-on-write
+// modification.
+//
+// A nil node is treated as an empty ptrie.
+func (node *pnode) delete(key []byte, makeCopy bool) *pnode {
+ if node == nil {
+ return nil
+ }
+ newNode, _ := deleteInternal(node, 0, key, makeCopy)
+ return newNode
+}
+
+// putInternal does a DFS through the ptrie to find a node corresponding to
+// the key and updates the value.
+//
+// Invariant: the first bitIndex bits of the key represent the path from
+// the root to the current node.
+func putInternal(node *pnode, bitIndex uint32, key []byte, value interface{}, makeCopy bool) *pnode {
+ if makeCopy {
+ node = copyNode(node)
+ }
+ if bitlen(key) == bitIndex {
+ // The node corresponding to the key is found, update the value.
+ node.value = value
+ return node
+ }
+ // Pick the appropriate child and check that o - node
+ // the bit string of the path to the child node \
+ // matches the corresponding substring of the key. ?
+ // If not, then we need to insert a node / \
+ // in the middle of the path to the child. ? o - child.node
+ currBit := getBit(key, bitIndex)
+ if makeCopy {
+ node.child[currBit] = copyChild(node.child[currBit])
+ }
+ child := node.child[currBit]
+ lcp := bitLCP(child, key[bitIndex>>3:], bitIndex&7)
+ if child != nil && lcp == child.bitlen {
+ // child.bitstr matches the substring of the key.
+ // Continue the DFS.
+ child.node = putInternal(child.node, bitIndex+lcp, key, value, makeCopy)
+ return node
+ }
+ // child.bitstr doesn't match the substring of the key.
+ // We need to insert a node in the middle of the path to the child.
+ // o - node
+ // \A
+ // o - middleNode
+ // / \B
+ // C/ o - child.node
+ // o - newChild.node
+ newChild := &pchild{
+ node: &pnode{value: value},
+ bitstr: store.CopyBytes(nil, key[(bitIndex+lcp)>>3:]),
+ bitlen: bitlen(key) - bitIndex - lcp,
+ }
+ if child == nil {
+ // This case means that paths A and B are empty. Just attach the
+ // new child to the node.
+ node.child[currBit] = newChild
+ return node
+ }
+ // Since the child.node exists and we picked the child based on the currBit
+ // (a bit from the key), the path A is not empty.
+ // The path B also can't be empty since lcp < child.bitlen.
+ middleNode := new(pnode)
+ // Update the child of the node, i.e. the A part.
+ node.child[currBit] = &pchild{
+ node: middleNode,
+ bitstr: store.CopyBytes(nil, child.bitstr[:((bitIndex&7)+lcp+7)>>3]),
+ bitlen: lcp,
+ }
+ // Pick the first bit on path C. Since C can be empty, we pick the first
+ // bit on B and invert it.
+ nextBit := getBit(child.bitstr, (bitIndex&7)+lcp) ^ 1
+ // Set the C part only if C is not empty.
+ if bitIndex+lcp < bitlen(key) {
+ middleNode.child[nextBit] = newChild
+ }
+ // Set the B part.
+ middleNode.child[nextBit^1] = &pchild{
+ node: child.node,
+ bitstr: store.CopyBytes(nil, child.bitstr[((bitIndex&7)+lcp)>>3:]),
+ bitlen: child.bitlen - lcp,
+ }
+ return node
+}
+
+// getInternal does a DFS through the ptrie to find a node corresponding to
+// the key and returns the value.
+//
+// Invariant: the first bitIndex bits of the key represent the path from
+// the root to the current node.
+func getInternal(node *pnode, bitIndex uint32, key []byte) interface{} {
+ if bitlen(key) == bitIndex {
+ return node.value
+ }
+ child := node.child[getBit(key, bitIndex)]
+ lcp := bitLCP(child, key[bitIndex>>3:], bitIndex&7)
+ if child == nil || lcp != child.bitlen {
+ return nil
+ }
+ return getInternal(child.node, bitIndex+lcp, key)
+}
+
+// deleteInternal does a DFS through the ptrie to find a node corresponding to
+// the key and deletes the value. deleteInternal removes the whole subtree if
+// no nodes in the subtree have values.
+//
+// Invariant: the first bitIndex bits of the key represent the path from
+// the root to the current node.
+func deleteInternal(node *pnode, bitIndex uint32, key []byte, makeCopy bool) (newNode *pnode, deleted bool) {
+ if bitlen(key) == bitIndex {
+ // The node corresponding to the key is found.
+ if node.child[0] == nil && node.child[1] == nil {
+ return nil, true
+ }
+ if makeCopy {
+ node = copyNode(node)
+ }
+ node.value = nil
+ return node, true
+ }
+ // Pick the appropriate child and check that
+ // the bit string of the path to the child node
+ // matches the corresponding substring of the key.
+ currBit := getBit(key, bitIndex)
+ child := node.child[currBit]
+ lcp := bitLCP(child, key[bitIndex>>3:], bitIndex&7)
+ if child == nil || lcp != child.bitlen {
+ // child.bitstr doesn't match the substring of the key, so the key
+ // was not found in the ptrie.
+ return node, false
+ }
+ // Delete the key in the subtree.
+ if newNode, deleted = deleteInternal(child.node, bitIndex+lcp, key, makeCopy); !deleted {
+ return node, false
+ }
+ if makeCopy {
+ node = copyNode(node)
+ }
+ if newNode == nil {
+ // If the whole subtree was removed, just remove the child.
+ // It is possible that the node has no value and only one child. In this
+ // case the node will be contracted one step out of the recursion.
+ node.child[currBit] = nil
+ return node, true
+ }
+ if makeCopy {
+ node.child[currBit] = copyChild(node.child[currBit])
+ }
+ if newNode.value == nil && (newNode.child[0] == nil || newNode.child[1] == nil) {
+ // Contract the new node if necessary.
+ // Note: both children of the new node can't be nil since deleteInternal
+ // automatically removes empty subtrees.
+ child := newNode.child[0]
+ if child == nil {
+ child = newNode.child[1]
+ }
+ node.child[currBit].node = child.node
+ node.child[currBit].bitstr = appendBits(node.child[currBit].bitstr, (bitIndex&7)+node.child[currBit].bitlen, child.bitstr)
+ node.child[currBit].bitlen += child.bitlen
+ } else {
+ node.child[currBit].node = newNode
+ }
+ return node, true
+}
diff --git a/services/syncbase/store/ptrie/ptrie_test.go b/services/syncbase/store/ptrie/ptrie_test.go
new file mode 100644
index 0000000..e0da1dc
--- /dev/null
+++ b/services/syncbase/store/ptrie/ptrie_test.go
@@ -0,0 +1,49 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TODO(rogulenko): Add a benchmark to compare a ptrie with
+// a map[[]byte]interface{}.
+package ptrie
+
+import (
+ "testing"
+)
+
+// TestPutGetDelete verifies basic functionality of Put/Get/Delete.
+// More Put/Get/Delete/Scan tests can be found in store/memstore.
+// TODO(rogulenko): Add more tests, don't rely on tests in store/memstore.
+func TestPutGetDelete(t *testing.T) {
+ data := New(true)
+ data.Put([]byte("a"), "a")
+ data.Put([]byte("ab"), "ab")
+ if got, want := data.Get([]byte("a")).(string), "a"; got != want {
+ t.Fatalf("unexpected Get result: got %q, want %q", got, want)
+ }
+ if got, want := data.Get([]byte("ab")).(string), "ab"; got != want {
+ t.Fatalf("unexpected Get result: got %q, want %q", got, want)
+ }
+ // Verify that copy-on-write works.
+ newData := data.Copy()
+ newData.Delete([]byte("a"))
+ if got, want := data.Get([]byte("a")).(string), "a"; got != want {
+ t.Fatalf("unexpected Get result: got %q, want %q", got, want)
+ }
+ if value := newData.Get([]byte("a")); value != nil {
+ t.Fatalf("Get returned a non-nil value %v", value)
+ }
+ // Verify path contraction after Delete().
+ if newData.root.child[0].bitlen != 16 {
+ t.Fatal("path was not contracted after Delete()")
+ }
+ // Verify path contraction after Put("ac") and Delete("ac").
+ data = newData.Copy()
+ data.Put([]byte("ac"), "ac")
+ if got, want := data.Get([]byte("ac")).(string), "ac"; got != want {
+ t.Fatalf("unexpected Get result: got %q, want %q", got, want)
+ }
+ data.Delete([]byte("ab"))
+ if data.root.child[0].bitlen != 16 {
+ t.Fatal("path was not contracted after Delete()")
+ }
+}
diff --git a/services/syncbase/store/ptrie/stream.go b/services/syncbase/store/ptrie/stream.go
new file mode 100644
index 0000000..a61a7ef
--- /dev/null
+++ b/services/syncbase/store/ptrie/stream.go
@@ -0,0 +1,224 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ptrie
+
+import (
+ "bytes"
+
+ "v.io/x/ref/services/syncbase/store"
+)
+
+// Stream is a struct for iterating through a ptrie.
+// WARNING: The stream is not thread-safe.
+//
+// The best way to walk through nodes of a tree is to perform a DFS.
+// To support the Advance() method, we save the current state of the DFS
+// by capturing the DFS stack.
+type Stream struct {
+ dfsStack []*dfsStackElement
+ limit []byte
+ hasAdvanced bool // true iff the stream has been advanced at least once
+ hasValue bool // true iff a value has been staged
+ key []byte
+}
+
+type dfsStackElement struct {
+ node *pnode
+ childID int8
+}
+
+// Scan returns all key-value pairs with keys in range [start, limit).
+// If limit is "", all key-value pairs with keys >= start are included.
+//
+// A nil node is treated as an empty ptrie.
+func (node *pnode) Scan(start, limit []byte) *Stream {
+ if node == nil {
+ node = &pnode{}
+ }
+ s := &Stream{
+ limit: store.CopyBytes(nil, limit),
+ }
+ // Locate the first key-value pair with key >= start and capture
+ // the DFS stack.
+ s.lowerBound(node, 0, start)
+ return s
+}
+
+// Advance stages an element so the client can retrieve it with Key or Value.
+// Advance returns true iff there is an element to retrieve. The client must
+// call Advance before calling Key or Value.
+func (s *Stream) Advance() bool {
+ s.hasValue = false
+ if len(s.dfsStack) == 0 {
+ return false
+ }
+ if !s.hasAdvanced {
+ s.hasAdvanced = true
+ } else {
+ s.advanceInternal()
+ }
+ s.key = s.keyFromDFSStack()
+ if len(s.dfsStack) == 0 || (len(s.limit) != 0 && bytes.Compare(s.key, s.limit) >= 0) {
+ s.dfsStack = nil
+ return false
+ }
+ s.hasValue = true
+ return true
+}
+
+// Key returns the key of the element that was staged by Advance. The returned
+// slice may be a sub-slice of keybuf if keybuf was large enough to hold the
+// entire key. Otherwise, a newly allocated slice will be returned. It is
+// valid to pass a nil keybuf.
+// Key may panic if Advance returned false or was not called at all.
+func (s *Stream) Key(keybuf []byte) []byte {
+ if !s.hasValue {
+ panic("nothing staged")
+ }
+ return store.CopyBytes(keybuf, s.key)
+}
+
+// Value returns the value of the element that was staged by Advance.
+// The client should not modify the returned value as the returned
+// value points directly to the value stored in the ptrie.
+// Value may panic if Advance returned false or was not called at all.
+func (s *Stream) Value() interface{} {
+ if !s.hasValue {
+ panic("nothing staged")
+ }
+ return s.dfsStack[len(s.dfsStack)-1].node.value
+}
+
+// advanceInternal simulates the DFS until the next node with a value is found
+// or the stream reaches the end of the ptrie.
+func (s *Stream) advanceInternal() {
+ // The code below simulates the following recursive function:
+ // func dfs(node *Node) {
+ // if node.Value != nil {
+ // // The next key-value pair is found.
+ // }
+ // for i := 0; i < 2; i++ {
+ // if node.child[i] != nil {
+ // dfs(node.child[i].node)
+ // }
+ // }
+ // }
+ for len(s.dfsStack) > 0 {
+ top := s.dfsStack[len(s.dfsStack)-1]
+ // childID can be -1, 0, or 1.
+ top.childID++
+ for top.childID <= 1 && top.node.child[top.childID] == nil {
+ top.childID++
+ }
+ if top.childID > 1 {
+ s.popFromDFSStack()
+ continue
+ }
+ child := top.node.child[top.childID].node
+ s.dfsStack = append(s.dfsStack, &dfsStackElement{
+ node: child,
+ childID: -1,
+ })
+ if child.value != nil {
+ return
+ }
+ }
+}
+
+// keyFromDFSStack returns the bit string written on the path from the root
+// to the node on the top of the DFS stack.
+func (s *Stream) keyFromDFSStack() []byte {
+ // Calculate the buffer size.
+ var bitlen uint32 = 0
+ for i := 0; i < len(s.dfsStack)-1; i++ {
+ element := s.dfsStack[i]
+ bitlen += element.node.child[element.childID].bitlen
+ }
+ if bitlen%8 != 0 {
+ panic("the number of bytes in a key is not an integer")
+ }
+ // Allocate the buffer.
+ buf := make([]byte, bitlen>>3)
+ bitlen = 0
+ // Concatenate all key parts.
+ for i := 0; i < len(s.dfsStack)-1; i++ {
+ element := s.dfsStack[i]
+ buf = appendBits(buf, bitlen, element.node.child[element.childID].bitstr)
+ bitlen += element.node.child[element.childID].bitlen
+ }
+ return buf
+}
+
+// lowerBound initializes the DFS stack to store a path to the first node with
+// a key >= start and a non-nil value. lowerBound returns true iff that node
+// was found in the subtree.
+//
+// Invariant: the first bitIndex bits of start represent the path from
+// the root to the current node.
+func (s *Stream) lowerBound(node *pnode, bitIndex uint32, start []byte) bool {
+ if bitIndex >= bitlen(start) {
+ s.findFirst(node)
+ return true
+ }
+ top := s.pushToDFSStack(node)
+ // Pick the appropriate child and check that
+ // the bit string of the path to the child node
+ // matches the corresponding substring of start.
+ currBit := getBit(start, bitIndex)
+ top.childID = int8(currBit)
+ if child := node.child[currBit]; child != nil {
+ lcp := bitLCP(child, start[bitIndex>>3:], bitIndex&7)
+ if lcp < child.bitlen {
+ // child.bitstr doesn't match the substring of start.
+ // Find the first node with a value in a subtree of the child if
+ // child.bitstr is greater than the corresponding substring of
+ // start.
+ if bitIndex+lcp == bitlen(start) || (bitIndex+lcp < bitlen(start) && getBit(start, bitIndex+lcp) == 0) {
+ s.findFirst(child.node)
+ return true
+ }
+ } else if s.lowerBound(child.node, bitIndex+lcp, start) {
+ return true
+ }
+ }
+ if currBit == 0 && node.child[1] != nil {
+ top.childID = 1
+ s.findFirst(node.child[1].node)
+ return true
+ }
+ s.popFromDFSStack()
+ return false
+}
+
+// findFirst simulates the DFS to find the first node with a value in a subtree.
+// NOTE: since the Put/Delete implementation automatically removes
+// subtrees without values, each subtree of a ptrie has a value.
+func (s *Stream) findFirst(node *pnode) {
+ top := s.pushToDFSStack(node)
+ if node.value != nil {
+ return
+ }
+ for top.childID < 1 {
+ top.childID++
+ if child := node.child[top.childID]; child != nil {
+ s.findFirst(child.node)
+ return
+ }
+ }
+ panic("subtree has no nodes with values")
+}
+
+func (s *Stream) pushToDFSStack(node *pnode) *dfsStackElement {
+ top := &dfsStackElement{
+ node: node,
+ childID: -1,
+ }
+ s.dfsStack = append(s.dfsStack, top)
+ return top
+}
+
+func (s *Stream) popFromDFSStack() {
+ s.dfsStack = s.dfsStack[:len(s.dfsStack)-1]
+}
diff --git a/services/syncbase/store/ptrie/util.go b/services/syncbase/store/ptrie/util.go
new file mode 100644
index 0000000..d1fa68d
--- /dev/null
+++ b/services/syncbase/store/ptrie/util.go
@@ -0,0 +1,109 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ptrie
+
+// mostSignificantBit stores the mapping from a byte to the position of its most
+// significant bit set to 1.
+var mostSignificantBit [256]uint32
+
+func init() {
+ for i := 2; i < 256; i++ {
+ mostSignificantBit[i] = mostSignificantBit[i>>1] + 1
+ }
+}
+
+// bitlen returns the length of a slice in bits, i.e. 8 * len(slice).
+func bitlen(slice []byte) uint32 {
+ return uint32(len(slice)) << 3
+}
+
+// getBit returns i-th lexicographically significant bit of a slice.
+// In other words, if we think of a byte slice as a big integer, getBit returns
+// the i-th bit of that integer counting from the highest to the lowest.
+func getBit(slice []byte, i uint32) byte {
+ return (slice[i>>3] >> (7 - (i & 7))) & 1
+}
+
+// sliceBitLCP returns the length of the longest common prefix between two
+// slices. The slices are compared as two bit strings. The first skipbits of
+// both slices are virtually removed and ignored.
+// NOTE: sliceBitLCP assumes that 0 <= skipbits < 8.
+func sliceBitLCP(a, b []byte, skipbits uint32) uint32 {
+ // Check that the first byte (without the first skipbits) is the same in
+ // both slices.
+ if x := (a[0] ^ b[0]) << skipbits; x != 0 {
+ return 7 - mostSignificantBit[x]
+ }
+ minlen := len(a)
+ if len(b) < minlen {
+ minlen = len(b)
+ }
+ for i := 1; i < minlen; i++ {
+ // (uint32(i<<3) - skipbits) --- the number of already compared bits.
+ if x := a[i] ^ b[i]; x != 0 {
+ return (uint32(i<<3) - skipbits) + (7 - mostSignificantBit[x])
+ }
+ }
+ return uint32(minlen<<3) - skipbits
+}
+
+// copyNode returns a copy of the provided pnode struct.
+func copyNode(node *pnode) *pnode {
+ if node == nil {
+ return nil
+ }
+ return &pnode{
+ value: node.value,
+ child: node.child,
+ }
+}
+
+// copyNode returns a copy of the provided child struct.
+func copyChild(child *pchild) *pchild {
+ if child == nil {
+ return nil
+ }
+ return &pchild{
+ node: child.node,
+ bitstr: child.bitstr,
+ bitlen: child.bitlen,
+ }
+}
+
+// bitLCP returns the length of the longest common bit-prefix between the Path
+// of a child and a slice. The first skipbits of both slices are virtually
+// removed and ignored. Returns 0 if the child is nil.
+func bitLCP(child *pchild, slice []byte, skipbits uint32) uint32 {
+ if child == nil {
+ return 0
+ }
+ lcp := sliceBitLCP(child.bitstr, slice, skipbits)
+ if lcp > child.bitlen {
+ lcp = child.bitlen
+ }
+ return lcp
+}
+
+// appendBits appends 'b' to 'a', assuming that 'a' has the provided bit length.
+// If the bit length is a multiple of 8, appendBits just appends 'b' to 'a'.
+// Otherwise appendBits appends 'b' to 'a' overlapping the first byte of 'b'
+// with the last byte of 'a' so that the first bitlen bits of 'a' are unchanged.
+//
+// If 'a' has not enough capacity to hold the result, appendBits creates a new
+// slice to hold the result. Otherwise the result is stored in 'a'.
+func appendBits(a []byte, bitlen uint32, b []byte) []byte {
+ newlen := int(bitlen>>3) + len(b)
+ if newlen > cap(a) {
+ tmp := make([]byte, newlen)
+ copy(tmp, a)
+ a = tmp
+ }
+ a = a[:newlen]
+ oldByte := a[bitlen>>3]
+ copy(a[bitlen>>3:], b)
+ var bitmask byte = (1 << (8 - (bitlen & 7))) - 1
+ a[bitlen>>3] = (bitmask & a[bitlen>>3]) | (^bitmask & oldByte)
+ return a
+}
diff --git a/services/syncbase/store/ptrie/util_test.go b/services/syncbase/store/ptrie/util_test.go
new file mode 100644
index 0000000..0ab8892
--- /dev/null
+++ b/services/syncbase/store/ptrie/util_test.go
@@ -0,0 +1,74 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ptrie
+
+import (
+ "bytes"
+ "testing"
+)
+
+// TestGetBit verifies the getBit() function.
+func TestGetBit(t *testing.T) {
+ x := []byte{0x1b, 0x1b, 0x1b}
+ xbits := []byte{
+ 0, 0, 0, 1, 1, 0, 1, 1,
+ 0, 0, 0, 1, 1, 0, 1, 1,
+ 0, 0, 0, 1, 1, 0, 1, 1,
+ }
+ for i := 0; i < 24; i++ {
+ if got, want := getBit(x, uint32(i)), xbits[i]; got != want {
+ t.Fatalf("unexpected bit %d: got %d, want %d", i, got, want)
+ }
+ }
+}
+
+// TestGetBit verifies the appendBits() function.
+func TestAppendBits(t *testing.T) {
+ a := []byte{0x99, 0x99}
+ b := []byte{0xff, 0xff}
+ if got, want := appendBits(a, 8, b), []byte{0x99, 0xff, 0xff}; !bytes.Equal(got, want) {
+ t.Fatalf("unexpected appendBits result: got %v, want %v", got, want)
+ }
+ if got, want := appendBits(a, 9, b), []byte{0x99, 0xff, 0xff}; !bytes.Equal(got, want) {
+ t.Fatalf("unexpected appendBits result: got %v, want %v", got, want)
+ }
+ if got, want := appendBits(a, 10, b), []byte{0x99, 0xbf, 0xff}; !bytes.Equal(got, want) {
+ t.Fatalf("unexpected appendBits result: got %v, want %v", got, want)
+ }
+ if got, want := appendBits(a, 11, b), []byte{0x99, 0x9f, 0xff}; !bytes.Equal(got, want) {
+ t.Fatalf("unexpected appendBits result: got %v, want %v", got, want)
+ }
+ if got, want := appendBits(a, 14, b), []byte{0x99, 0x9b, 0xff}; !bytes.Equal(got, want) {
+ t.Fatalf("unexpected appendBits result: got %v, want %v", got, want)
+ }
+ if got, want := appendBits(a, 16, b), []byte{0x99, 0x99, 0xff, 0xff}; !bytes.Equal(got, want) {
+ t.Fatalf("unexpected appendBits result: got %v, want %v", got, want)
+ }
+}
+
+// TestSliceBitLCP verifies the sliceBitLCP() function.
+func TestSliceBitLCP(t *testing.T) {
+ for i := 0; i < 8; i++ {
+ if got, want := sliceBitLCP([]byte{0x99, 0x99}, []byte{0x99, 0x99}, uint32(i)), uint32(16-i); got != want {
+ t.Fatalf("unexpected sliceBitLCP result: got %d, want %d", got, want)
+ }
+ }
+ for i := 0; i < 8; i++ {
+ if got, want := sliceBitLCP([]byte{0x99, 0x99}, []byte{0x99, 0xf9}, uint32(i)), uint32(9-i); got != want {
+ t.Fatalf("unexpected sliceBitLCP result: got %d, want %d", got, want)
+ }
+ }
+
+ for i := 0; i < 1; i++ {
+ if got, want := sliceBitLCP([]byte{0x99, 0x99}, []byte{0xd9, 0xf9}, uint32(i)), uint32(1-i); got != want {
+ t.Fatalf("unexpected sliceBitLCP result: got %d, want %d", got, want)
+ }
+ }
+ for i := 2; i < 8; i++ {
+ if got, want := sliceBitLCP([]byte{0x99, 0x99}, []byte{0xd9, 0xf9}, uint32(i)), uint32(9-i); got != want {
+ t.Fatalf("unexpected sliceBitLCP result: got %d, want %d", got, want)
+ }
+ }
+}