forked from UWQuickstep/quickstep
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBloomFilterIndexSubBlock.hpp
199 lines (168 loc) · 7.42 KB
/
BloomFilterIndexSubBlock.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
**/
#ifndef QUICKSTEP_STORAGE_BLOOM_FILTER_INDEX_SUB_BLOCK_HPP_
#define QUICKSTEP_STORAGE_BLOOM_FILTER_INDEX_SUB_BLOCK_HPP_
#include <cstddef>
#include <cstdint>
#include <memory>
#include <vector>
#include "expressions/predicate/PredicateCost.hpp"
#include "storage/IndexSubBlock.hpp"
#include "storage/StorageBlockInfo.hpp"
#include "storage/SubBlockTypeRegistryMacros.hpp"
#include "utility/BloomFilter.hpp"
#include "utility/Macros.hpp"
namespace quickstep {
class CatalogRelationSchema;
class ComparisonPredicate;
class IndexSubBlockDescription;
class TupleIdSequence;
class TupleStorageSubBlock;
QUICKSTEP_DECLARE_SUB_BLOCK_TYPE_REGISTERED(BloomFilterIndexSubBlock);
/** \addtogroup Storage
* @{
*/
/**
* @brief An IndexSubBlock which implements a Bloom Filter index.
* @note This IndexSubBlock supports both fixed-length attributes, and the
* variable length attributes.
**/
class BloomFilterIndexSubBlock : public IndexSubBlock {
public:
/**
* @brief An enum describing the various selecitivity factors provided by
* a BloomFilter index.
* @note A kSelectivityNone will correspond to a BloomFilter miss (true negative),
* when the data is certainly guaranteed to be not present in the StorageBlock.
* A kSelectivityAll, kSelectivityNone will correspond to a BloomFilter hit
* when the data may or may not be present in the StorageBlock.
**/
enum class BloomFilterSelectivity {
kSelectivityUnknown = 0,
kSelectivityAll,
kSelectivityNone
};
/**
* @brief A random seed to initialize the bloom filter hash functions.
**/
static const std::uint64_t kBloomFilterSeed = 0xA5A5A5A55A5A5A5AULL;
BloomFilterIndexSubBlock(const TupleStorageSubBlock &tuple_store,
const IndexSubBlockDescription &description,
const bool new_block,
void *sub_block_memory,
const std::size_t sub_block_memory_size);
~BloomFilterIndexSubBlock() override;
/**
* @brief Determine whether an IndexSubBlockDescription is valid for this
* type of IndexSubBlock.
*
* @param relation The relation an index described by description would
* belong to.
* @param description A description of the parameters for this type of
* IndexSubBlock, which will be checked for validity.
* @return Whether description is well-formed and valid for this type of
* IndexSubBlock belonging to relation (i.e. whether an IndexSubBlock
* of this type, belonging to relation, can be constructed according
* to description).
**/
static bool DescriptionIsValid(const CatalogRelationSchema &relation,
const IndexSubBlockDescription &description);
/**
* @brief Estimate the average number of bytes (including any applicable
* overhead) used to index a single tuple in this type of
* IndexSubBlock. Used by StorageBlockLayout::finalize() to divide
* block memory amongst sub-blocks.
* @warning description must be valid. DescriptionIsValid() should be called
* first if necessary.
*
* @param relation The relation tuples belong to.
* @param description A description of the parameters for this type of
* IndexSubBlock.
* @return The average/ammortized number of bytes used to index a single
* tuple of relation in an IndexSubBlock of this type described by
* description.
**/
static std::size_t EstimateBytesPerTuple(const CatalogRelationSchema &relation,
const IndexSubBlockDescription &description);
/**
* @brief Estimate the total number of bytes (including any applicable
* overhead) occupied by this IndexSubBlock within the StorageBlock.
* This function is to be used by those indicies whose occupied size
* in the block does not depend on the number of tuples being indexed.
* @warning description must be valid. DescriptionIsValid() should be called
* first if necessary.
* @note This function will be invoked by StorageBlockLayout::finalize()
* if and only when EstimateBytesPerTuple() returns a zero size.
*
* @param relation The relation tuples belong to.
* @param description A description of the parameters for this type of
* IndexSubBlock.
* @return The total number of bytes occupied by this IndexSubBlock within
* the StorageBlock.
**/
static std::size_t EstimateBytesPerBlock(const CatalogRelationSchema &relation,
const IndexSubBlockDescription &description);
IndexSubBlockType getIndexSubBlockType() const override {
return kBloomFilter;
}
bool supportsAdHocAdd() const override {
return true;
}
bool supportsAdHocRemove() const override {
return false;
}
bool addEntry(const tuple_id tuple) override;
bool bulkAddEntries(const TupleIdSequence &tuples) override;
void removeEntry(const tuple_id tuple) override;
void bulkRemoveEntries(const TupleIdSequence &tuples) override;
predicate_cost_t estimatePredicateEvaluationCost(
const ComparisonPredicate &predicate) const override;
/**
* @note Currently this version only supports predicates with
* equality comparison of a static literal value with a non-composite key.
**/
TupleIdSequence* getMatchesForPredicate(const ComparisonPredicate &predicate,
const TupleIdSequence *filter) const override;
bool rebuild() override;
/**
* @brief Get the selectivity provided by the index for a given predicate.
* There are only two selectivity levels returned by this function-
* either a zero(none) selectivity for bloom filter miss
* or a one(all) selectivity for bloom filter hit.
* A none selectivity predicate will return no tuples, whereas
* an all selectivity predicate will possibly return all tuples.
*
* @param predicate The comparison predicate to evaluate selectivity for.
* @return An enum describing the selectivity factor.
**/
BloomFilterSelectivity getSelectivityForPredicate(const ComparisonPredicate &predicate) const;
private:
bool is_initialized_;
bool is_consistent_;
const std::uint64_t random_seed_;
const std::uint64_t bit_array_size_in_bytes_;
std::vector<attribute_id> indexed_attribute_ids_;
std::unique_ptr<unsigned char> bit_array_;
std::unique_ptr<BloomFilter> bloom_filter_;
friend class BloomFilterIndexSubBlockTest;
DISALLOW_COPY_AND_ASSIGN(BloomFilterIndexSubBlock);
};
/** @} */
} // namespace quickstep
#endif // QUICKSTEP_STORAGE_BLOOM_FILTER_INDEX_SUB_BLOCK_HPP_