-
Notifications
You must be signed in to change notification settings - Fork 4
/
config.hpp
324 lines (291 loc) · 16.4 KB
/
config.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
// Copyright (c) Lorenz Hübschle-Schneider
// Copyright (c) Facebook, Inc. and its affiliates.
// All Rights Reserved. This source code is licensed under the Apache 2.0
// License (found in the LICENSE file in the root directory).
#pragma once
#include <tlx/math/integer_log2.hpp>
#include <cstdint>
#include <iomanip>
#include <utility>
// requires xxhash3, i.e. libxxhash v0.8.0 or later
// available from https://github.com/Cyan4973/xxHash/releases/tag/v0.8.0
#define XXH_INLINE_ALL
#include <xxhash.h>
namespace std {
ostream &operator<<(ostream &os, const __uint128_t &v) {
return os << "0x" << ::std::hex << static_cast<uint64_t>(v >> 64)
<< ::std::setw(16) << static_cast<uint64_t>(v) << ::std::dec;
}
} // namespace std
namespace ribbon {
// clang-format off
template <size_t bits>
using at_least_t = std::conditional_t<bits <= 8, uint8_t,
std::conditional_t<bits <= 16, uint16_t,
std::conditional_t<bits <= 32, uint32_t,
std::conditional_t<bits <= 64, uint64_t,
std::conditional_t<bits <= 128, __uint128_t,
/* error */ void>>>>>;
template <size_t bits>
using at_least_fast_t = std::conditional_t<bits <= 8, uint_fast8_t,
// use uint32_t here, it's much faster
std::conditional_t<bits <= 16, uint32_t,
std::conditional_t<bits <= 32, uint_fast32_t,
std::conditional_t<bits <= 64, uint_fast64_t,
std::conditional_t<bits <= 128, __uint128_t,
/* error */ void>>>>>;
template <typename uint_type>
using make_fast_t = at_least_fast_t<8u * sizeof(uint_type)>;
// clang-format on
// threshold compression modes
enum class ThreshMode : int { normal = 0, onebit = 1, twobit = 2 };
template <typename Config>
constexpr unsigned thresh_meta_bits =
Config::kThreshMode == ThreshMode::normal
? (Config::kSparseCoeffs
? tlx::integer_log2_floor(
Config::kBucketSize /
// sparse start pos alignment: L = 16,32: 4, L = 64,128: 8
(sizeof(typename Config::CoeffRow) >= 8 ? 8 : 4))
: tlx::integer_log2_floor(Config::kBucketSize))
: (Config::kThreshMode == ThreshMode::onebit
? 1
: (Config::kThreshMode == ThreshMode::twobit ? 2 :
/* fallthrough */ 9999));
// To avoid writing 'typename' everywhere that we use types like 'Index'
#define IMPORT_RIBBON_CONFIG(RibbonConfig) \
using CoeffRow = typename RibbonConfig::CoeffRow; \
using ResultRow = typename RibbonConfig::ResultRow; \
using Index = typename RibbonConfig::Index; \
using Hash = typename RibbonConfig::Hash; \
using Key = typename RibbonConfig::Key; \
\
/* Some more additions */ \
[[maybe_unused]] static constexpr auto kCoeffBits = \
static_cast<Index>(sizeof(CoeffRow) * 8U); \
[[maybe_unused]] static constexpr auto kResultBits = \
RibbonConfig::kResultBits; \
[[maybe_unused]] static constexpr Index kBucketSize = \
RibbonConfig::kBucketSize; \
\
/* Export to algorithm */ \
[[maybe_unused]] static constexpr bool kIsFilter = RibbonConfig::kIsFilter; \
[[maybe_unused]] static constexpr bool kFirstCoeffAlwaysOne = \
RibbonConfig::kFirstCoeffAlwaysOne; \
[[maybe_unused]] static constexpr ThreshMode kThreshMode = \
RibbonConfig::kThreshMode; \
[[maybe_unused]] static constexpr bool kSparseCoeffs = \
RibbonConfig::kSparseCoeffs; \
[[maybe_unused]] static constexpr bool kUseInterleavedSol = \
RibbonConfig::kUseInterleavedSol; \
[[maybe_unused]] static constexpr bool kUseCacheLineStorage = \
RibbonConfig::kUseCacheLineStorage; \
[[maybe_unused]] static constexpr bool kUseMHC = RibbonConfig::kUseMHC; \
[[maybe_unused]] static constexpr bool kUseMultiplyShiftHash = \
RibbonConfig::kUseMultiplyShiftHash; \
\
static_assert(!kUseInterleavedSol || !kUseCacheLineStorage, \
"can't have both"); \
static_assert(1 << tlx::integer_log2_floor(kBucketSize) == kBucketSize, \
"bucket size must be a power of two"); \
static_assert( \
sizeof(CoeffRow) + sizeof(ResultRow) + sizeof(Index) + sizeof(Hash) + \
sizeof(Key) + kBucketSize + kResultBits + kCoeffBits + \
kFirstCoeffAlwaysOne + (int)kThreshMode + kSparseCoeffs + \
kUseInterleavedSol + kUseCacheLineStorage + kUseMHC + \
kUseMultiplyShiftHash > \
0, \
"avoid unused warnings, semicolon expected after macro call")
// for printing sqlplottools RESULT lines
template <typename Config>
std::string dump_config() {
IMPORT_RIBBON_CONFIG(Config);
std::stringstream s;
s << " L=" << kCoeffBits << " B=" << kBucketSize << " r=" << kResultBits
<< " mode=" << (int)kThreshMode << " sparse=" << kSparseCoeffs << " sol="
<< (kUseInterleavedSol ? "int" : (kUseCacheLineStorage ? "cls" : "basic"))
<< " fcao=" << kFirstCoeffAlwaysOne << " idxbits=" << 8u * sizeof(Index)
<< " keybits=" << 8u * sizeof(Key) << " filter=" << kIsFilter;
return s.str();
}
// This is a very basic config that has all required attributes but doesn't
// necessarily make the best choices! Do not use this as is, instead have a look
// at RConfig and its derivatives below. This class is mainly useful as it
// documents the effect of the various settings.
template <typename CoeffRow_, typename ResultRow_, typename Key_>
class DefaultConfig {
public:
// An unsigned integer type. The size of this type determines L, the width
// of the ribbon. A good choice is uint64_t. Fewer than 16 bits yield high
// overheads without improving performance, but correctness is maintained
// down to uint8_t. You can also use __uint128_t, which can be a bit slow
// but yields excellent overhead (sub-0.1% possible).
using CoeffRow = CoeffRow_;
// A type that is big enough to hold the data that is stored in the data
// structure (retrieval) / the fingerprints (filter)
using ResultRow = ResultRow_;
// The key type. For retrieval, input consists of pairs of Key and
// ResultRow, while for filters, it's just Key.
using Key = Key_;
// An unsigned type that is large enough to hold the maximum index in the
// input (i.e., input size muts fit into Index). Use uint64_t if you have
// ribbons holding billions of items.
using Index = uint32_t;
// An unsigned type to hold hash values. This should likely always be
// uint64_t.
using Hash = uint64_t;
// How many items form a bucket. This should be O(L^2/log(L)) - see
// recommended_bucket_size below, don't use this value
static constexpr Index kBucketSize = 16u * sizeof(CoeffRow);
// How many bits the retrieval data structure should store per key / how
// many fingerprint bits to use in a filter. When using interleaved
// storage, values other than 8*sizeof(ResultRow) are efficiently supported.
static constexpr Index kResultBits = 8u * sizeof(ResultRow);
// Whether to use a shift-multiply hash function for arithmetic keys instead
// of xxhash. This can be problematic if keys aren't random. Should likely
// stay disabled.
static constexpr bool kUseMultiplyShiftHash = false;
// The hash function to use for mapping Key to Hash. This should be a good
// hash function that yields sufficiently independent values for different
// seeds.
static constexpr Hash HashFn(const Key &key, uint64_t seed) {
if constexpr (std::is_arithmetic_v<Key>) {
return XXH3_64bits_withSeed(reinterpret_cast<const char *>(&key),
sizeof(Key), seed);
} else {
return XXH3_64bits_withSeed(key.data(), key.size(), seed);
}
}
// Whether the data structure is used as a filter or for retrieval.
static constexpr bool kIsFilter = true;
// Whether the first coefficient should always be set. This makes things
// slightly faster.
static constexpr bool kFirstCoeffAlwaysOne = true;
// Which threshold compressor to use. `twobit` is fast and pretty good,
// `onebit` yields improved compression at the cost of some (query)
// performance. `normal` uses uncompressed thresholds and should not
// normally be used.
static constexpr ThreshMode kThreshMode = ThreshMode::twobit;
// Whether to use sparse coefficients. This speeds up queries for
// non-interleaved solution storage, but is not fully optimized - the
// threshold parameters need separate tuning for the sparse case. As is,
// sparse mode is a prototype and should not be used productively.
static constexpr bool kSparseCoeffs = false;
// Whether to use interleaved storage. This is usually faster than the
// alternatives.
static constexpr bool kUseInterleavedSol = false;
// Whether to use contiguous storage with embedded metadata. This is
// cache-efficient but slow in practice and should likely not be used.
static constexpr bool kUseCacheLineStorage = false;
// Whether to use Master Hash Codes. This causes keys to be hashed only
// once during insertion and query, using hash remixing to derive the next
// level's hash if the key was bumped. This is almost always a good idea.
static constexpr bool kUseMHC = false;
// Whether to print timings and other information about the construction.
static constexpr bool log = true;
};
namespace {
// Internal. Whether compressed metadata should be used for Cache-Line
// Storage. For other storage types, the answer is always yes - for CLS, it
// depends how much metadata space is available and whether uncompressed
// thresholds would fit. This is because we don't save anything by using less
// than the available space.
template <typename Index, Index kBucketSize, Index kResultBits>
struct shouldCompressMeta {
// internal
static constexpr Index _kBucketIdxBits = tlx::integer_log2_ceil(kBucketSize),
_kBucketBits = kBucketSize * kResultBits,
_clbits = 512,
_buckets_per_cl = (_kBucketBits > _clbits)
? 1
: _clbits / _kBucketBits;
static constexpr bool value = _kBucketIdxBits * _buckets_per_cl > kResultBits;
};
} // namespace
template <size_t coeff_bits, ThreshMode mode>
constexpr size_t recommended_bucket_size =
// round down to next power of two
size_t{1} << tlx::integer_log2_floor(
// w * w / (factor * log(w))
coeff_bits * coeff_bits /
((mode == ThreshMode::normal ? 2 : 4) * tlx::integer_log2_ceil(coeff_bits)));
// Reasonable default config, but specify sizes in bits not types and make
// smarter choices. This exposes a lot of options that users may not want to
// think about, see below for some reasonable presets.
template <size_t coeff_bits, size_t result_bits, ThreshMode mode = ThreshMode::twobit,
bool sparse = false, bool interleaved = false, bool cls = false,
int bucket_sh = 0, typename Key = int>
struct RConfig
: public DefaultConfig<at_least_t<coeff_bits>, at_least_t<result_bits>, Key> {
using Super =
DefaultConfig<at_least_t<coeff_bits>, at_least_t<result_bits>, int>;
using Index = uint32_t;
static constexpr Index kResultBits = result_bits;
static constexpr ThreshMode kThreshMode =
// if using CLS, override compression mode
Super::kUseCacheLineStorage
? (shouldCompressMeta<Index, Super::kBucketSize, kResultBits>::value
? ThreshMode::twobit
: ThreshMode::normal)
: mode;
// Bucket size w^2 / 2 log w (uncompressed) or w^2 / 4 log w (compressed),
// rounded down to next power of two. Optionally shift by 'bucket_sh'
static constexpr Index kBucketSize =
bucket_sh < 0 ? (recommended_bucket_size<coeff_bits, mode> >> -bucket_sh)
: (recommended_bucket_size<coeff_bits, mode> << bucket_sh);
static constexpr bool kSparseCoeffs = sparse,
kUseInterleavedSol = interleaved,
kUseCacheLineStorage = cls, kUseMHC = true;
};
/******************************************************************************/
// Suggested configurations for users
// A fast filter config. This uses L=64, 2-bit thresholds, and interleaved
// storage with dense coefficients and master hash codes. It is a good
// all-round preset for filters. You can expect <0.5% overhead if result_bits
// is at least 4; we measured 1.6% overhead for result_bits = 1. For
// result_bits = 8 we saw 0.23% overhead, declining further as result_bits is
// increased.
template <size_t result_bits, typename Key>
struct FastFilterConfig
: public RConfig</* L */ 64, result_bits, ThreshMode::twobit, /* sparse */ false,
/* interleaved */ true, /* cls */ false, /* shift */ 0, Key> {
};
// A very space-efficient filter config. This uses L=128 and 2-bit thresholds,
// with the other choices as for FastFilterConfig. Expect <0.1% overhead for
// result_bits >= 7, and <0.2% for result_bits between 3 and 6. We measured
// 0.45% overhead for result_bits = 1 and 0.18% overhead for result_bits = 3.
// For result_bits = 8, we saw 0.09% overhead, declining further as result_bits
// is increased.
template <size_t result_bits, typename Key>
struct CompactFilterConfig
: public RConfig</* L */ 128, result_bits, ThreshMode::twobit, /* sparse */ false,
/* interleaved */ true, /* cls */ false, /* shift */ 0, Key> {
};
// An extremely space-efficient filter config. This uses L=128 and 1-bit
// thresholds, with the other choices as for FastFilterConfig. Expect <0.1%
// overhead for result_bits >= 4. We measured 0.33% overhead for result_bits =
// 1 and 0.12% overhead for result_bits = 3. For result_bits = 8, we saw 0.05%
// overhead, declining further as result_bits is increased.
template <size_t result_bits, typename Key>
struct UltraCompactFilterConfig
: public RConfig</* L */ 128, result_bits, ThreshMode::onebit, /* sparse */ false,
/* interleaved */ true, /* cls */ false, /* shift */ 0, Key> {
};
// A fast retrieval config, see FastFilterConfig for details
template <size_t result_bits, typename Key>
struct FastRetrievalConfig : public FastFilterConfig<result_bits, Key> {
static constexpr bool kIsFilter = false;
};
// A very space-efficient retrieval config, see CompactFilterConfig for details
template <size_t result_bits, typename Key>
struct CompactRetrievalConfig : public CompactFilterConfig<result_bits, Key> {
static constexpr bool kIsFilter = false;
};
// An extremely space-efficient retrieval config, see UltraCompactFilterConfig
// for details
template <size_t result_bits, typename Key>
struct UltraCompactRetrievalConfig
: public UltraCompactFilterConfig<result_bits, Key> {
static constexpr bool kIsFilter = false;
};
} // namespace ribbon