Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 1 | // Copyright 2015 The Vanadium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 5 | // Package leveldb provides a LevelDB-based implementation of store.Store. |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 6 | package leveldb |
| 7 | |
Sergey Rogulenko | 120385a | 2015-05-18 14:47:55 -0700 | [diff] [blame] | 8 | // #cgo LDFLAGS: -lleveldb -lsnappy |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 9 | // #include <stdlib.h> |
| 10 | // #include "leveldb/c.h" |
| 11 | // #include "syncbase_leveldb.h" |
| 12 | import "C" |
| 13 | import ( |
Sergey Rogulenko | 8a1ae3a | 2015-05-29 17:13:44 -0700 | [diff] [blame] | 14 | "container/list" |
| 15 | "fmt" |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 16 | "sync" |
| 17 | "unsafe" |
| 18 | |
| 19 | "v.io/syncbase/x/ref/services/syncbase/store" |
Sergey Rogulenko | a53e60f | 2015-05-22 11:05:01 -0700 | [diff] [blame] | 20 | "v.io/v23/verror" |
Sergey Rogulenko | 0dbfe07 | 2015-05-19 20:10:18 -0700 | [diff] [blame] | 21 | ) |
| 22 | |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 23 | // db is a wrapper around LevelDB that implements the store.Store interface. |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 24 | type db struct { |
Sergey Rogulenko | def3b30 | 2015-05-20 17:33:24 -0700 | [diff] [blame] | 25 | // mu protects the state of the db. |
| 26 | mu sync.RWMutex |
Sergey Rogulenko | 95baa66 | 2015-05-22 15:07:06 -0700 | [diff] [blame] | 27 | node *store.ResourceNode |
Sergey Rogulenko | def3b30 | 2015-05-20 17:33:24 -0700 | [diff] [blame] | 28 | cDb *C.leveldb_t |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 29 | // Default read/write options. |
| 30 | readOptions *C.leveldb_readoptions_t |
| 31 | writeOptions *C.leveldb_writeoptions_t |
Sergey Rogulenko | 0dbfe07 | 2015-05-19 20:10:18 -0700 | [diff] [blame] | 32 | err error |
Sergey Rogulenko | 8a1ae3a | 2015-05-29 17:13:44 -0700 | [diff] [blame] | 33 | |
Adam Sadovsky | a3fc33c | 2015-06-02 18:44:46 -0700 | [diff] [blame] | 34 | // txmu protects the transaction-related variables below, and is also held |
| 35 | // during transaction commits. It must always be acquired before mu. |
Sergey Rogulenko | 0dbfe07 | 2015-05-19 20:10:18 -0700 | [diff] [blame] | 36 | txmu sync.Mutex |
Sergey Rogulenko | 8a1ae3a | 2015-05-29 17:13:44 -0700 | [diff] [blame] | 37 | // txEvents is a queue of create/commit transaction events. |
| 38 | txEvents *list.List |
| 39 | txSequenceNumber uint64 |
| 40 | // txTable is a set of keys written by recent transactions. This set |
| 41 | // includes all write sets of transactions committed after the oldest living |
Adam Sadovsky | a3fc33c | 2015-06-02 18:44:46 -0700 | [diff] [blame] | 42 | // (in-flight) transaction. |
Sergey Rogulenko | 8a1ae3a | 2015-05-29 17:13:44 -0700 | [diff] [blame] | 43 | txTable *trie |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 44 | } |
| 45 | |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 46 | var _ store.Store = (*db)(nil) |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 47 | |
| 48 | // Open opens the database located at the given path, creating it if it doesn't |
| 49 | // exist. |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 50 | func Open(path string) (store.Store, error) { |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 51 | var cError *C.char |
| 52 | cPath := C.CString(path) |
| 53 | defer C.free(unsafe.Pointer(cPath)) |
| 54 | |
| 55 | cOpts := C.leveldb_options_create() |
| 56 | C.leveldb_options_set_create_if_missing(cOpts, 1) |
| 57 | C.leveldb_options_set_paranoid_checks(cOpts, 1) |
| 58 | defer C.leveldb_options_destroy(cOpts) |
| 59 | |
| 60 | cDb := C.leveldb_open(cOpts, cPath, &cError) |
| 61 | if err := goError(cError); err != nil { |
| 62 | return nil, err |
| 63 | } |
| 64 | readOptions := C.leveldb_readoptions_create() |
| 65 | C.leveldb_readoptions_set_verify_checksums(readOptions, 1) |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 66 | return &db{ |
Sergey Rogulenko | 95baa66 | 2015-05-22 15:07:06 -0700 | [diff] [blame] | 67 | node: store.NewResourceNode(), |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 68 | cDb: cDb, |
| 69 | readOptions: readOptions, |
| 70 | writeOptions: C.leveldb_writeoptions_create(), |
Sergey Rogulenko | 8a1ae3a | 2015-05-29 17:13:44 -0700 | [diff] [blame] | 71 | txEvents: list.New(), |
| 72 | txTable: newTrie(), |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 73 | }, nil |
| 74 | } |
| 75 | |
| 76 | // Close implements the store.Store interface. |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 77 | func (d *db) Close() error { |
Sergey Rogulenko | 0dbfe07 | 2015-05-19 20:10:18 -0700 | [diff] [blame] | 78 | d.mu.Lock() |
| 79 | defer d.mu.Unlock() |
| 80 | if d.err != nil { |
Adam Sadovsky | a3fc33c | 2015-06-02 18:44:46 -0700 | [diff] [blame] | 81 | return convertError(d.err) |
Sergey Rogulenko | 0dbfe07 | 2015-05-19 20:10:18 -0700 | [diff] [blame] | 82 | } |
Sergey Rogulenko | 95baa66 | 2015-05-22 15:07:06 -0700 | [diff] [blame] | 83 | d.node.Close() |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 84 | C.leveldb_close(d.cDb) |
Sergey Rogulenko | 0dbfe07 | 2015-05-19 20:10:18 -0700 | [diff] [blame] | 85 | d.cDb = nil |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 86 | C.leveldb_readoptions_destroy(d.readOptions) |
Sergey Rogulenko | 0dbfe07 | 2015-05-19 20:10:18 -0700 | [diff] [blame] | 87 | d.readOptions = nil |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 88 | C.leveldb_writeoptions_destroy(d.writeOptions) |
Sergey Rogulenko | 0dbfe07 | 2015-05-19 20:10:18 -0700 | [diff] [blame] | 89 | d.writeOptions = nil |
Adam Sadovsky | 8db7443 | 2015-05-29 17:37:32 -0700 | [diff] [blame] | 90 | d.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgClosedStore) |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 91 | return nil |
| 92 | } |
| 93 | |
| 94 | // Destroy removes all physical data of the database located at the given path. |
| 95 | func Destroy(path string) error { |
| 96 | var cError *C.char |
| 97 | cPath := C.CString(path) |
| 98 | defer C.free(unsafe.Pointer(cPath)) |
| 99 | cOpts := C.leveldb_options_create() |
| 100 | defer C.leveldb_options_destroy(cOpts) |
| 101 | C.leveldb_destroy_db(cOpts, cPath, &cError) |
| 102 | return goError(cError) |
| 103 | } |
| 104 | |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 105 | // Get implements the store.StoreReader interface. |
| 106 | func (d *db) Get(key, valbuf []byte) ([]byte, error) { |
| 107 | return d.getWithOpts(key, valbuf, d.readOptions) |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 108 | } |
| 109 | |
| 110 | // Scan implements the store.StoreReader interface. |
Adam Sadovsky | f437f33 | 2015-05-19 23:03:22 -0700 | [diff] [blame] | 111 | func (d *db) Scan(start, limit []byte) store.Stream { |
Sergey Rogulenko | 0dbfe07 | 2015-05-19 20:10:18 -0700 | [diff] [blame] | 112 | d.mu.RLock() |
| 113 | defer d.mu.RUnlock() |
| 114 | if d.err != nil { |
| 115 | return &store.InvalidStream{d.err} |
| 116 | } |
Sergey Rogulenko | def3b30 | 2015-05-20 17:33:24 -0700 | [diff] [blame] | 117 | return newStream(d, d.node, start, limit, d.readOptions) |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 118 | } |
| 119 | |
| 120 | // Put implements the store.StoreWriter interface. |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 121 | func (d *db) Put(key, value []byte) error { |
John Kline | 18834bd | 2015-06-26 10:07:46 -0700 | [diff] [blame^] | 122 | write := store.WriteOp{ |
| 123 | T: store.PutOp, |
| 124 | Key: key, |
| 125 | Value: value, |
Sergey Rogulenko | 8a1ae3a | 2015-05-29 17:13:44 -0700 | [diff] [blame] | 126 | } |
John Kline | 18834bd | 2015-06-26 10:07:46 -0700 | [diff] [blame^] | 127 | return d.write([]store.WriteOp{write}, d.writeOptions) |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 128 | } |
| 129 | |
| 130 | // Delete implements the store.StoreWriter interface. |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 131 | func (d *db) Delete(key []byte) error { |
John Kline | 18834bd | 2015-06-26 10:07:46 -0700 | [diff] [blame^] | 132 | write := store.WriteOp{ |
| 133 | T: store.DeleteOp, |
| 134 | Key: key, |
Sergey Rogulenko | 8a1ae3a | 2015-05-29 17:13:44 -0700 | [diff] [blame] | 135 | } |
John Kline | 18834bd | 2015-06-26 10:07:46 -0700 | [diff] [blame^] | 136 | return d.write([]store.WriteOp{write}, d.writeOptions) |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 137 | } |
| 138 | |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 139 | // NewTransaction implements the store.Store interface. |
| 140 | func (d *db) NewTransaction() store.Transaction { |
Sergey Rogulenko | 0dbfe07 | 2015-05-19 20:10:18 -0700 | [diff] [blame] | 141 | d.txmu.Lock() |
Sergey Rogulenko | 8a1ae3a | 2015-05-29 17:13:44 -0700 | [diff] [blame] | 142 | defer d.txmu.Unlock() |
Sergey Rogulenko | 0dbfe07 | 2015-05-19 20:10:18 -0700 | [diff] [blame] | 143 | d.mu.RLock() |
| 144 | defer d.mu.RUnlock() |
| 145 | if d.err != nil { |
Sergey Rogulenko | 0dbfe07 | 2015-05-19 20:10:18 -0700 | [diff] [blame] | 146 | return &store.InvalidTransaction{d.err} |
| 147 | } |
Sergey Rogulenko | def3b30 | 2015-05-20 17:33:24 -0700 | [diff] [blame] | 148 | return newTransaction(d, d.node) |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 149 | } |
| 150 | |
| 151 | // NewSnapshot implements the store.Store interface. |
| 152 | func (d *db) NewSnapshot() store.Snapshot { |
Sergey Rogulenko | 0dbfe07 | 2015-05-19 20:10:18 -0700 | [diff] [blame] | 153 | d.mu.RLock() |
| 154 | defer d.mu.RUnlock() |
| 155 | if d.err != nil { |
| 156 | return &store.InvalidSnapshot{d.err} |
| 157 | } |
Sergey Rogulenko | def3b30 | 2015-05-20 17:33:24 -0700 | [diff] [blame] | 158 | return newSnapshot(d, d.node) |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 159 | } |
| 160 | |
Adam Sadovsky | 8db7443 | 2015-05-29 17:37:32 -0700 | [diff] [blame] | 161 | // write writes a batch and adds all written keys to txTable. |
Sergey Rogulenko | 8a1ae3a | 2015-05-29 17:13:44 -0700 | [diff] [blame] | 162 | // TODO(rogulenko): remove this method. |
John Kline | 18834bd | 2015-06-26 10:07:46 -0700 | [diff] [blame^] | 163 | func (d *db) write(batch []store.WriteOp, cOpts *C.leveldb_writeoptions_t) error { |
Sergey Rogulenko | 8a1ae3a | 2015-05-29 17:13:44 -0700 | [diff] [blame] | 164 | d.txmu.Lock() |
| 165 | defer d.txmu.Unlock() |
| 166 | return d.writeLocked(batch, cOpts) |
| 167 | } |
| 168 | |
| 169 | // writeLocked is like write(), but it assumes txmu is held. |
John Kline | 18834bd | 2015-06-26 10:07:46 -0700 | [diff] [blame^] | 170 | func (d *db) writeLocked(batch []store.WriteOp, cOpts *C.leveldb_writeoptions_t) error { |
Sergey Rogulenko | 8a1ae3a | 2015-05-29 17:13:44 -0700 | [diff] [blame] | 171 | d.mu.Lock() |
| 172 | defer d.mu.Unlock() |
| 173 | if d.err != nil { |
| 174 | return d.err |
| 175 | } |
| 176 | cBatch := C.leveldb_writebatch_create() |
| 177 | defer C.leveldb_writebatch_destroy(cBatch) |
| 178 | for _, write := range batch { |
John Kline | 18834bd | 2015-06-26 10:07:46 -0700 | [diff] [blame^] | 179 | switch write.T { |
| 180 | case store.PutOp: |
| 181 | cKey, cKeyLen := cSlice(write.Key) |
| 182 | cVal, cValLen := cSlice(write.Value) |
Sergey Rogulenko | 8a1ae3a | 2015-05-29 17:13:44 -0700 | [diff] [blame] | 183 | C.leveldb_writebatch_put(cBatch, cKey, cKeyLen, cVal, cValLen) |
John Kline | 18834bd | 2015-06-26 10:07:46 -0700 | [diff] [blame^] | 184 | case store.DeleteOp: |
| 185 | cKey, cKeyLen := cSlice(write.Key) |
Sergey Rogulenko | 8a1ae3a | 2015-05-29 17:13:44 -0700 | [diff] [blame] | 186 | C.leveldb_writebatch_delete(cBatch, cKey, cKeyLen) |
| 187 | default: |
John Kline | 18834bd | 2015-06-26 10:07:46 -0700 | [diff] [blame^] | 188 | panic(fmt.Sprintf("unknown write operation type: %v", write.T)) |
Sergey Rogulenko | 8a1ae3a | 2015-05-29 17:13:44 -0700 | [diff] [blame] | 189 | } |
| 190 | } |
| 191 | var cError *C.char |
| 192 | C.leveldb_write(d.cDb, cOpts, cBatch, &cError) |
| 193 | if err := goError(cError); err != nil { |
| 194 | return err |
| 195 | } |
| 196 | if d.txEvents.Len() == 0 { |
| 197 | return nil |
| 198 | } |
| 199 | d.trackBatch(batch) |
| 200 | return nil |
| 201 | } |
| 202 | |
Adam Sadovsky | 8db7443 | 2015-05-29 17:37:32 -0700 | [diff] [blame] | 203 | // trackBatch writes the batch to txTable and adds a commit event to txEvents. |
John Kline | 18834bd | 2015-06-26 10:07:46 -0700 | [diff] [blame^] | 204 | func (d *db) trackBatch(batch []store.WriteOp) { |
Sergey Rogulenko | 8a1ae3a | 2015-05-29 17:13:44 -0700 | [diff] [blame] | 205 | // TODO(rogulenko): do GC. |
| 206 | d.txSequenceNumber++ |
| 207 | seq := d.txSequenceNumber |
| 208 | var keys [][]byte |
| 209 | for _, write := range batch { |
John Kline | 18834bd | 2015-06-26 10:07:46 -0700 | [diff] [blame^] | 210 | d.txTable.add(write.Key, seq) |
| 211 | keys = append(keys, write.Key) |
Sergey Rogulenko | 8a1ae3a | 2015-05-29 17:13:44 -0700 | [diff] [blame] | 212 | } |
| 213 | tx := &commitedTransaction{ |
| 214 | seq: seq, |
| 215 | batch: keys, |
| 216 | } |
| 217 | d.txEvents.PushBack(tx) |
| 218 | } |
| 219 | |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 220 | // getWithOpts returns the value for the given key. |
| 221 | // cOpts may contain a pointer to a snapshot. |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 222 | func (d *db) getWithOpts(key, valbuf []byte, cOpts *C.leveldb_readoptions_t) ([]byte, error) { |
Sergey Rogulenko | 0dbfe07 | 2015-05-19 20:10:18 -0700 | [diff] [blame] | 223 | d.mu.RLock() |
| 224 | defer d.mu.RUnlock() |
| 225 | if d.err != nil { |
Adam Sadovsky | a3fc33c | 2015-06-02 18:44:46 -0700 | [diff] [blame] | 226 | return valbuf, convertError(d.err) |
Sergey Rogulenko | 0dbfe07 | 2015-05-19 20:10:18 -0700 | [diff] [blame] | 227 | } |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 228 | var cError *C.char |
| 229 | var valLen C.size_t |
| 230 | cStr, cLen := cSlice(key) |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 231 | val := C.leveldb_get(d.cDb, cOpts, cStr, cLen, &valLen, &cError) |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 232 | if err := goError(cError); err != nil { |
Sergey Rogulenko | 0dbfe07 | 2015-05-19 20:10:18 -0700 | [diff] [blame] | 233 | return valbuf, err |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 234 | } |
| 235 | if val == nil { |
Sergey Rogulenko | a53e60f | 2015-05-22 11:05:01 -0700 | [diff] [blame] | 236 | return valbuf, verror.New(store.ErrUnknownKey, nil, string(key)) |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 237 | } |
| 238 | defer C.leveldb_free(unsafe.Pointer(val)) |
Adam Sadovsky | c18c8ca | 2015-05-08 18:05:46 -0700 | [diff] [blame] | 239 | return store.CopyBytes(valbuf, goBytes(val, valLen)), nil |
Sergey Rogulenko | b0081cf | 2015-05-05 22:39:37 -0700 | [diff] [blame] | 240 | } |