-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpacked_table.go
337 lines (295 loc) · 9.08 KB
/
packed_table.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
package dory
import (
"bytes"
"encoding/binary"
"errors"
"github.com/dgryski/go-farm"
)
const (
// The 1GiB table limit puts an effective cap on key size to 1GiB-8
// (single key, 0-size value). This lets us use the top 2 bits of the key
// size to store flags (same can be done with value size). Yay!
keySizeFlagMask = 3 << 30
// Flag to indicate this key/value entry has been deleted.
keySizeDeletedFlag = 1 << 31
// Length of the size prefix for a key/value pair.
prefixLen = 8
)
var (
ErrNoSpace = errors.New("insufficent space left")
)
// PackedTable is a simple key/value table that stores key and value data
// contiguously within a single []byte slice. The only data stored outside the
// slice is an index used to locate entries in the slice.
//
// The primary goals are to minimise heap fragmentation, and allow the user to
// instantly free memory used by this table (i.e. using munmap()). A good side
// effect is to also reduce Go GC pressure, although this is not a primary
// goal.
//
// Note: PackedTable is not thread-safe.
type PackedTable struct {
buf []byte
autoGcThreshold int
off int
// This is essentially a hash-table with linear-probed open-addressing.
// Instead of having a dynamically sized table, we just use a uint32-keyed
// map as a 2^32 element array. An added advantage is that we don't need to
// have a special "empty" element, the map provides that. But we do need to
// track deleted keys, which we do by using the value -1. Keys are the
// key-hashes. Values are an offset into the byte buffer where the key/value
// pair is stored.
keys map[uint32]int32
hashFn func([]byte) uint32
added int
deleted int
deletedSpace int
}
// Construct a new PackedTable using the given slice to store key/value data.
// autoGcThreshold is the number of bytes of data deleted before the table
// automatically performs a garbage collection (technically a compaction).
// If autoGcThreshold is 0, automatic GC is disabled.
// Note: The slice MUST be smaller than 1GiB in length.
func NewPackedTable(buf []byte, autoGcThreshold int) *PackedTable {
if len(buf) > 1<<30 {
panic("len(buf) > 1GiB")
}
return &PackedTable{
buf: buf,
autoGcThreshold: autoGcThreshold,
keys: make(map[uint32]int32),
hashFn: farm.Hash32,
}
}
// Reset erases all data in the table.
func (t *PackedTable) Reset() {
t.off = 0
t.added = 0
t.deleted = 0
t.deletedSpace = 0
t.keys = make(map[uint32]int32)
}
func (t *PackedTable) readSize(off int) (int, int) {
// Avoid using binary.LittleEndian here because this function is called a
// lot, and the compiler only inlines leaf functions.
b := t.buf[off : off+8]
keySize := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) |
(uint32(b[3]) << 24)
valSize := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) |
(uint32(b[7]) << 24)
return int(keySize), int(valSize)
}
func (t *PackedTable) writeSize(key, val int) int {
off := t.off
t.off += prefixLen
binary.LittleEndian.PutUint32(t.buf[off:], uint32(key))
binary.LittleEndian.PutUint32(t.buf[off+4:], uint32(val))
return off
}
func (t *PackedTable) tagDeleted(off int) {
// Flag only affects the most significant bits. This is shorter, and now
// eligable for inlining.
t.buf[off+3] |= (keySizeDeletedFlag >> 24)
}
func (t *PackedTable) hashEntry(key []byte) uint32 {
hash := t.hashFn(key)
for ; ; hash++ {
off, ok := t.keys[hash]
if !ok {
break
} else if off < 0 {
continue
}
keySize, _ := t.readSize(int(off))
keyOff := int(off) + prefixLen
if keySize == len(key) && bytes.Compare(key, t.buf[keyOff:keyOff+len(key)]) == 0 {
break
}
}
return hash
}
func (t *PackedTable) findKey(key []byte) int {
hash := t.hashFn(key)
for ; ; hash++ {
off, ok := t.keys[hash]
if !ok {
break
} else if off < 0 {
continue
}
keySize, _ := t.readSize(int(off))
keyOff := int(off) + prefixLen
if keySize == len(key) && bytes.Compare(key, t.buf[keyOff:keyOff+len(key)]) == 0 {
return int(off)
}
}
return -1
}
// EntrySize returns the amount of space used in the table's slice by the given
// key/value. May be used to determine if there is sufficient space to store
// the key/value.
func (t *PackedTable) EntrySize(key, val []byte) int {
return len(key) + len(val) + prefixLen
}
// FreeSpace returns the number of bytes of usable free space in the table.
func (t *PackedTable) FreeSpace() int {
return len(t.buf) - t.off
}
// LiveSpace return the number of bytes used by entries in the table.
func (t *PackedTable) LiveSpace() int {
return t.off - t.deletedSpace
}
// DeletedSpace returns the number of bytes used by deleted entries in the
// table. This space may be reclaimed by running a garbage collection.
// Note: LiveSpace + FreeSpace + DeletedSpace == len(buf)
func (t *PackedTable) DeletedSpace() int {
return t.deletedSpace
}
// NumEntries returns the number of entries in the table. Should probably be
// renamed to Len().
func (t *PackedTable) NumEntries() int {
return t.added - t.deleted
}
// NumDeleted returns the number of deleted entries in the table. This space
// may be reclaimed by running a garbage collection.
func (t *PackedTable) NumDeleted() int {
return t.deleted
}
// Has returns whether or not the table contains the requested key.
func (t *PackedTable) Has(key []byte) bool {
if len(key) == 0 {
panic("zero-sized key")
}
return t.findKey(key) >= 0
}
// Get returns a slice of the value for the key, if it exists in the table,
// or nil if the key does not exist. The returned slice will be a slice into
// the table's memory and MUST NOT be modified, and is only valid until the
// next call into PackedTable.
func (t *PackedTable) Get(key []byte) []byte {
if len(key) == 0 {
panic("zero-sized key")
}
off := t.findKey(key)
if off < 0 {
return nil
}
keySize, valSize := t.readSize(off)
valOff := off + prefixLen + keySize
return t.buf[valOff : valOff+valSize]
}
func (t *PackedTable) deleteEntry(hash uint32, off int, deleteHashEntry bool) {
keySize, valSize := t.readSize(off)
t.tagDeleted(off)
if deleteHashEntry {
_, ok := t.keys[hash+1]
if ok {
t.keys[hash] = -1
} else {
delete(t.keys, hash)
}
} else {
// So that the key doesn't get caught up in GC.
t.keys[hash] = -1
}
t.deleted++
t.deletedSpace += keySize + valSize + prefixLen
if t.autoGcThreshold > 0 && t.deletedSpace > t.autoGcThreshold {
t.GC()
}
}
// Put adds the key/value into the table, if there is sufficient free space.
// Returns nil on success, or ErrNoSpace if there is insufficient free space.
// If the table already contains the key, the existing key/value will be
// deleted (as if Delete() was called), and the new entry inserted.
func (t *PackedTable) Put(key, val []byte) error {
if len(key) == 0 {
panic("zero-sized key")
}
size := t.EntrySize(key, val)
if size > t.FreeSpace() {
return ErrNoSpace
}
hash := t.hashEntry(key)
if off, ok := t.keys[hash]; ok && off >= 0 {
t.deleteEntry(hash, int(off), false)
}
off := t.writeSize(len(key), len(val))
n := copy(t.buf[t.off:], key)
t.off += n
if n != len(key) {
panic("n != len(key)")
}
n = copy(t.buf[t.off:], val)
t.off += n
if n != len(val) {
panic("n != len(val)")
}
t.keys[hash] = int32(off)
t.added++
return nil
}
// Delete removes the key, and returns true if the key existed. Deleting a
// key may not automatically free space used by that key/value. Space is only
// reclaimed if the amount of deleted space exceeds the auto-GC threshold, or
// a GC is explicitly performed.
func (t *PackedTable) Delete(key []byte) bool {
if len(key) == 0 {
panic("zero-sized key")
}
hash := t.hashEntry(key)
off, ok := t.keys[hash]
if ok && off >= 0 {
t.deleteEntry(hash, int(off), true)
return true
}
return false
}
// Keys returns a list of keys. Returned keys are slices into this table's
// memory, MUST NOT be modified, and are only valid until the next call
// into PackedTable.
func (t *PackedTable) Keys() [][]byte {
if t.NumEntries() == 0 {
return nil
}
keys := make([][]byte, 0, t.NumEntries())
for off := 0; off < t.off; {
keySize, valSize := t.readSize(off)
entrySize := (keySize & ^keySizeFlagMask) + valSize + prefixLen
if (keySize & keySizeDeletedFlag) == 0 {
key := t.buf[off+prefixLen : off+prefixLen+keySize]
keys = append(keys, key)
}
off += entrySize
}
return keys
}
// GC performs a garbage collection to reclaim free space.
func (t *PackedTable) GC() {
if t.deleted == 0 {
// No deleted entries => no GC needed.
return
}
oldLen := t.off
if len(t.keys) > 8 && len(t.keys) > (2*t.NumEntries()) {
// This is when there are too many deleted entries in the table.
t.keys = make(map[uint32]int32, t.NumEntries())
}
t.off = 0
t.added = 0
t.deleted = 0
t.deletedSpace = 0
for off := 0; off < oldLen; {
keySize, valSize := t.readSize(off)
entrySize := (keySize & ^keySizeFlagMask) + valSize + prefixLen
if (keySize & keySizeDeletedFlag) == 0 {
key := t.buf[off+prefixLen : off+prefixLen+keySize]
hash := t.hashEntry(key)
copy(t.buf[t.off:], t.buf[off:off+entrySize])
t.keys[hash] = int32(t.off)
t.off += entrySize
t.added++
}
off += entrySize
}
}