blob: a3e5be4d4ca0aa499e80cb2f5bd679fafa8a56d5 [file] [log] [blame]
Sergey Rogulenko61bbd612015-09-04 12:03:36 -07001// Copyright 2015 The Vanadium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package ptrie provides a ptrie to store a mapping from bit strings to
6// arbitrary values. Ptrie exposes a simple interface: Get(key),
7// Put(key, value), Delete(key), and Scan(start, limit). Conceptually a ptrie
8// can be thought of as a map[[]byte]interface{}, designed to support fast
9// range queries and immutable views.
10//
11// For performance reasons, bit strings are represented as byte slices.
12// The ptrie consists of three concepts: a binary trie, path contraction and
13// copy-on-write modifications.
14//
15// 1) A binary trie consists of nodes and arcs. Each node has a value and two
16// children: 0 and 1. The value and the children might be nil. An arc connects
17// a node with its child. A node N has a value V and S is the bit string of
18// the path from the root to N iff the trie maps S to V. Using this rule we can
19// build maps and sets. For example, a set of {'b', 'c'} can be represented as:
20// 'b': 0110 0010 o
21// 'c': 0110 0011 0/
22// 1\
23// 1\
24// 0/
25// 0/
26// 0/
27// 1\
28// o
29// 0/1\
30// o o
31// This trie has 10 nodes. This representation is not efficient.
32// To reduce the number of nodes, we use the path contraction technique.
33//
34// 2) Path contraction. If a path consists only of nodes that have one child
35// and don't have a value, then the path can be replaced with one arc.
36// The new arc has the whole bit string written on the path. The example above
37// becomes smaller: o
38// 0110001/
39// o
40// 0/1\
41// o o
42// This structure is stored in memory in a slightly different way. The trie consists
43// of nodes, each node has a value and two children. Each non-nil child is
44// a triple: the child node, the bit string written on the arc and the length of
45// the bit string. For convenience, bits in the bit string are aligned so that
46// the bit string might be a sub slice of a bit string representing a path from
47// a root of the trie to the child.
48//
49// 3) Copy-on-write modifications. In order to support immutable views of
50// the data in the ptrie, the Put() and the Delete() functions have the makeCopy
51// flag. If the makeCopy flag is true, then the algorithm doesn't modify the
52// current ptrie, but it returns a new ptrie with some nodes reused from
53// the current one.
54package ptrie
55
56import (
57 "v.io/x/ref/services/syncbase/store"
58)
59
60// T represents a ptrie.
61type T struct {
62 root *pnode
63 copyOnWrite bool
64}
65
66// New returns a new empty ptrie. If copyOnWrite is true, then the new ptrie
67// performs copy-on-write modifications for Put/Delete operations and it is
68// allowed to make a copy of the ptrie by calling Copy().
69func New(copyOnWrite bool) *T {
70 return &T{copyOnWrite: copyOnWrite}
71}
72
73// Put maps the given value to the given key. The value is not copied, so
74// the client must keep the value unchanged.
75func (t *T) Put(key []byte, value interface{}) {
76 t.root = t.root.put(key, value, t.copyOnWrite)
77}
78
79// Get returns a value mapped to the given key. Get returns nil if the given
80// key has no mapped value. The client must not modify the returned value as
81// the returned value points directly to the value stored in the ptrie.
82func (t *T) Get(key []byte) interface{} {
83 return t.root.get(key)
84}
85
86// Delete removes mapping to the given key.
87func (t *T) Delete(key []byte) {
88 t.root = t.root.delete(key, t.copyOnWrite)
89}
90
91// Scan returns all key-value pairs with keys in range [start, limit).
92// If limit is "", all key-value pairs with keys >= start are included.
93func (t *T) Scan(start, limit []byte) *Stream {
94 return t.root.Scan(start, limit)
95}
96
97// Copy returns a copy of the ptrie. This operation is only allowed if the ptrie
98// was created with the copyOnWrite flag. Copy is a very fast operation since
99// it just copies the pointer to the root of the ptrie.
100// Panics if the ptrie was created with copyOnWrite=false.
101func (t *T) Copy() *T {
102 if !t.copyOnWrite {
103 panic("the ptrie was not created in persistent mode")
104 }
105 return &T{
106 root: t.root,
107 copyOnWrite: true,
108 }
109}
110
111// pnode represents a node in the ptrie.
112type pnode struct {
113 value interface{}
114 child [2]*pchild
115}
116
117// pchild represents a child of a node in a ptrie.
118type pchild struct {
119 node *pnode
120 bitstr []byte
121 bitlen uint32
122}
123
124// put maps the given value to the given key, assuming the given node is
125// the root of the ptrie. The value is not copied, so the client must keep
126// the value unchanged. Put returns the new root of the ptrie.
127// The client must not modify the returned value as the returned value points
128// directly to the value stored in the ptrie.
129//
130// If the makeCopy flag is true, then the Put performs a copy-on-write
131// modification.
132//
133// A nil node is treated as an empty ptrie.
134func (node *pnode) put(key []byte, value interface{}, makeCopy bool) *pnode {
135 if value == nil {
136 return node.delete(key, makeCopy)
137 }
138 if node == nil {
139 node = &pnode{}
140 }
141 return putInternal(node, 0, key, value, makeCopy)
142}
143
144// get returns a value mapped to the given key, assuming the given node is
145// the root of the ptrie. Get returns nil if the given key has no mapped value.
146//
147// A nil node is treated as an empty ptrie.
148func (node *pnode) get(key []byte) interface{} {
149 if node == nil {
150 return nil
151 }
152 return getInternal(node, 0, key)
153}
154
155// delete removes mapping to the given key, assuming the given node is
156// the root of the ptrie. Delete returns the new root of the ptrie.
157//
158// If the makeCopy flag is true, then the Delete performs a copy-on-write
159// modification.
160//
161// A nil node is treated as an empty ptrie.
162func (node *pnode) delete(key []byte, makeCopy bool) *pnode {
163 if node == nil {
164 return nil
165 }
166 newNode, _ := deleteInternal(node, 0, key, makeCopy)
167 return newNode
168}
169
170// putInternal does a DFS through the ptrie to find a node corresponding to
171// the key and updates the value.
172//
173// Invariant: the first bitIndex bits of the key represent the path from
174// the root to the current node.
175func putInternal(node *pnode, bitIndex uint32, key []byte, value interface{}, makeCopy bool) *pnode {
176 if makeCopy {
177 node = copyNode(node)
178 }
179 if bitlen(key) == bitIndex {
180 // The node corresponding to the key is found, update the value.
181 node.value = value
182 return node
183 }
184 // Pick the appropriate child and check that o - node
185 // the bit string of the path to the child node \
186 // matches the corresponding substring of the key. ?
187 // If not, then we need to insert a node / \
188 // in the middle of the path to the child. ? o - child.node
189 currBit := getBit(key, bitIndex)
190 if makeCopy {
191 node.child[currBit] = copyChild(node.child[currBit])
192 }
193 child := node.child[currBit]
194 lcp := bitLCP(child, key[bitIndex>>3:], bitIndex&7)
195 if child != nil && lcp == child.bitlen {
196 // child.bitstr matches the substring of the key.
197 // Continue the DFS.
198 child.node = putInternal(child.node, bitIndex+lcp, key, value, makeCopy)
199 return node
200 }
201 // child.bitstr doesn't match the substring of the key.
202 // We need to insert a node in the middle of the path to the child.
203 // o - node
204 // \A
205 // o - middleNode
206 // / \B
207 // C/ o - child.node
208 // o - newChild.node
209 newChild := &pchild{
210 node: &pnode{value: value},
211 bitstr: store.CopyBytes(nil, key[(bitIndex+lcp)>>3:]),
212 bitlen: bitlen(key) - bitIndex - lcp,
213 }
214 if child == nil {
215 // This case means that paths A and B are empty. Just attach the
216 // new child to the node.
217 node.child[currBit] = newChild
218 return node
219 }
220 // Since the child.node exists and we picked the child based on the currBit
221 // (a bit from the key), the path A is not empty.
222 // The path B also can't be empty since lcp < child.bitlen.
223 middleNode := new(pnode)
224 // Update the child of the node, i.e. the A part.
225 node.child[currBit] = &pchild{
226 node: middleNode,
227 bitstr: store.CopyBytes(nil, child.bitstr[:((bitIndex&7)+lcp+7)>>3]),
228 bitlen: lcp,
229 }
230 // Pick the first bit on path C. Since C can be empty, we pick the first
231 // bit on B and invert it.
232 nextBit := getBit(child.bitstr, (bitIndex&7)+lcp) ^ 1
233 // Set the C part only if C is not empty.
234 if bitIndex+lcp < bitlen(key) {
235 middleNode.child[nextBit] = newChild
236 }
237 // Set the B part.
238 middleNode.child[nextBit^1] = &pchild{
239 node: child.node,
240 bitstr: store.CopyBytes(nil, child.bitstr[((bitIndex&7)+lcp)>>3:]),
241 bitlen: child.bitlen - lcp,
242 }
243 return node
244}
245
246// getInternal does a DFS through the ptrie to find a node corresponding to
247// the key and returns the value.
248//
249// Invariant: the first bitIndex bits of the key represent the path from
250// the root to the current node.
251func getInternal(node *pnode, bitIndex uint32, key []byte) interface{} {
252 if bitlen(key) == bitIndex {
253 return node.value
254 }
255 child := node.child[getBit(key, bitIndex)]
256 lcp := bitLCP(child, key[bitIndex>>3:], bitIndex&7)
257 if child == nil || lcp != child.bitlen {
258 return nil
259 }
260 return getInternal(child.node, bitIndex+lcp, key)
261}
262
263// deleteInternal does a DFS through the ptrie to find a node corresponding to
264// the key and deletes the value. deleteInternal removes the whole subtree if
265// no nodes in the subtree have values.
266//
267// Invariant: the first bitIndex bits of the key represent the path from
268// the root to the current node.
269func deleteInternal(node *pnode, bitIndex uint32, key []byte, makeCopy bool) (newNode *pnode, deleted bool) {
270 if bitlen(key) == bitIndex {
271 // The node corresponding to the key is found.
272 if node.child[0] == nil && node.child[1] == nil {
273 return nil, true
274 }
275 if makeCopy {
276 node = copyNode(node)
277 }
278 node.value = nil
279 return node, true
280 }
281 // Pick the appropriate child and check that
282 // the bit string of the path to the child node
283 // matches the corresponding substring of the key.
284 currBit := getBit(key, bitIndex)
285 child := node.child[currBit]
286 lcp := bitLCP(child, key[bitIndex>>3:], bitIndex&7)
287 if child == nil || lcp != child.bitlen {
288 // child.bitstr doesn't match the substring of the key, so the key
289 // was not found in the ptrie.
290 return node, false
291 }
292 // Delete the key in the subtree.
293 if newNode, deleted = deleteInternal(child.node, bitIndex+lcp, key, makeCopy); !deleted {
294 return node, false
295 }
296 if makeCopy {
297 node = copyNode(node)
298 }
299 if newNode == nil {
300 // If the whole subtree was removed, just remove the child.
301 // It is possible that the node has no value and only one child. In this
302 // case the node will be contracted one step out of the recursion.
303 node.child[currBit] = nil
304 return node, true
305 }
306 if makeCopy {
307 node.child[currBit] = copyChild(node.child[currBit])
308 }
309 if newNode.value == nil && (newNode.child[0] == nil || newNode.child[1] == nil) {
310 // Contract the new node if necessary.
311 // Note: both children of the new node can't be nil since deleteInternal
312 // automatically removes empty subtrees.
313 child := newNode.child[0]
314 if child == nil {
315 child = newNode.child[1]
316 }
317 node.child[currBit].node = child.node
318 node.child[currBit].bitstr = appendBits(node.child[currBit].bitstr, (bitIndex&7)+node.child[currBit].bitlen, child.bitstr)
319 node.child[currBit].bitlen += child.bitlen
320 } else {
321 node.child[currBit].node = newNode
322 }
323 return node, true
324}