forked from UWQuickstep/quickstep
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTupleStorageSubBlock.hpp
534 lines (486 loc) · 22.2 KB
/
TupleStorageSubBlock.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
**/
#ifndef QUICKSTEP_STORAGE_TUPLE_STORAGE_SUB_BLOCK_HPP_
#define QUICKSTEP_STORAGE_TUPLE_STORAGE_SUB_BLOCK_HPP_
#include <unordered_map>
#include <vector>
#include "catalog/CatalogTypedefs.hpp"
#include "expressions/predicate/PredicateCost.hpp"
#include "storage/StorageBlockInfo.hpp"
#include "storage/TupleIdSequence.hpp"
#include "types/TypedValue.hpp"
#include "utility/Macros.hpp"
namespace quickstep {
class CatalogRelationSchema;
class ComparisonPredicate;
class Tuple;
class TupleStorageSubBlockDescription;
class ValueAccessor;
/** \addtogroup Storage
* @{
*/
// TODO(chasseur): Abstract more common safety checks into base class.
/**
* @brief SubBlock which stores complete tuples.
**/
class TupleStorageSubBlock {
public:
/**
* @brief Structure describing the result of an insertion of a single tuple.
**/
struct InsertResult {
InsertResult(const tuple_id inserted_id_arg, const bool ids_mutated_arg)
: inserted_id(inserted_id_arg),
ids_mutated(ids_mutated_arg) {
}
/**
* @brief The ID of the inserted tuple, or -1 if unable to insert.
**/
tuple_id inserted_id;
/**
* @brief True if other tuples in the TupleStorageSubBlock had their ids
* mutated (requiring that indexes be rebuilt), false otherwise.
**/
bool ids_mutated;
};
/**
* @brief Constructor.
*
* @param relation The CatalogRelationSchema which this SubBlock belongs to.
* @param description A description containing any parameters needed to
* construct this SubBlock. Implementation-specific parameters are
* defined as extensions in StorageBlockLayout.proto.
* @param new_block Whether this is a newly-created block.
* @param sub_block_memory The memory slot to use for the block's contents.
* @param sub_block_memory_size The size of the memory slot in bytes.
* @exception BlockMemoryTooSmall This TupleStorageSubBlock hasn't been
* provided enough memory to store metadata.
**/
TupleStorageSubBlock(const CatalogRelationSchema &relation,
const TupleStorageSubBlockDescription &description,
const bool new_block,
void *sub_block_memory,
const std::size_t sub_block_memory_size)
: relation_(relation),
description_(description),
sub_block_memory_(sub_block_memory),
sub_block_memory_size_(sub_block_memory_size) {
}
/**
* @brief Virtual destructor.
**/
virtual ~TupleStorageSubBlock() {
}
/**
* @brief Get the CatalogRelationSchema this TupleStorageSubBlock belongs to.
*
* @return The relation this TupleStorageSubBlock belongs to.
**/
inline const CatalogRelationSchema& getRelation() const {
return relation_;
}
/**
* @brief Identify the type of this TupleStorageSubBlock.
*
* @return This TupleStorageSubBlock's type.
**/
virtual TupleStorageSubBlockType getTupleStorageSubBlockType() const = 0;
/**
* @brief Determine whether this SubBlock supports the getAttributeValue()
* method to get an untyped pointer to a value for a particular
* attribute.
*
* @param attr The ID of the attribute which getAttributeValue() would be
* used with.
* @return Whether the getAttributeValue() method can be used on this
* SubBlock with the specified attr.
**/
virtual bool supportsUntypedGetAttributeValue(const attribute_id attr) const = 0;
/**
* @brief Determine whether this SubBlock supports ad-hoc insertion of
* individual tuples via the insertTuple() method.
* @note If this method returns false, then tuples can only be inserted via
* insertTupleInBatch(), which should be followed by a call to
* rebuild() when a batch is fully inserted.
* @note Even if this method returns false, it is still legal to call
* insertTuple(), although it will always fail to actually insert.
*
* @return Whether the insertTuple() can be used on this SubBlock.
**/
virtual bool supportsAdHocInsert() const = 0;
/**
* @brief Determine whether inserting tuples one-at-a-time via the
* insertTuple() method is efficient (i.e. has constant time and space
* costs and does not require expensive reorganization of other tuples
* in this SubBlock).
*
* @return Whether insertTuple() is efficient for this SubBlock.
**/
virtual bool adHocInsertIsEfficient() const = 0;
/**
* @brief Determine whether this block has any tuples in it.
*
* @return True if this SubBlock is empty, false otherwise.
**/
virtual bool isEmpty() const = 0;
/**
* @brief Determine whether this block is packed, i.e. there are no holes in
* the tuple-id sequence.
*
* @return True if this SubBlock is packed, false otherwise.
**/
virtual bool isPacked() const = 0;
/**
* @brief Get the highest tuple-id of a valid tuple in this SubBlock.
*
* @return The highest tuple-id of a tuple stored in this SubBlock (-1 if
* this SubBlock is empty).
**/
virtual tuple_id getMaxTupleID() const = 0;
/**
* @brief Get the number of tuples contained in this SubBlock.
* @note This method returns tuple_id for ease of comparison with other
* tuple_id values referring to tuples in this block. The return value
* will always be non-negative, though.
* @note The default implementation is O(1) for packed TupleStorageSubBlocks,
* but TupleStorageSubBlock implementations which may be non-packed
* should override this where possible, as the default version of this
* method is O(N) when the SubBlock is non-packed.
*
* @return The number of tuples contained in this TupleStorageSubBlock.
**/
virtual tuple_id numTuples() const;
/**
* @brief Determine whether a tuple with the given id exists in this
* SubBlock.
*
* @param tuple The ID to check.
* @return True if tuple exists, false if it does not.
**/
virtual bool hasTupleWithID(const tuple_id tuple) const = 0;
/**
* @brief Insert a single tuple into this TupleStorageSubBlock.
*
* @param tuple The tuple to insert, whose values must be in the correct
* order.
* @return The result of the insertion.
**/
virtual InsertResult insertTuple(const Tuple &tuple) = 0;
/**
* @brief Insert a single tuple as part of a batch.
* @note This method is intended to allow a large number of tuples to be
* loaded without incurring the full cost of maintaining a "clean"
* internal block structure. Once a batch of tuples have been inserted,
* rebuild() should be called to put this TupleStorageSubBlock into a
* consistent state.
* @warning The inserted tuple may be placed in an "incorrect" or sub-optimal
* location in this TupleStorageSubBlock. The only methods which are
* safe to call between bulkInsertTuples() and rebuild() are
* insertTupleInBatch(), bulkInsertTuples(), and
* bulkInsertTuplesWithRemappedAttributes().
*
* @param tuple The tuple to insert, whose values must be in the correct
* order.
* @return True if the insertion was successful, false if out of space.
**/
virtual bool insertTupleInBatch(const Tuple &tuple) = 0;
/**
* @brief Insert as many tuples as possible from a ValueAccessor (all of the
* tuples accessible or as many as will fit in this
* TupleStorageSubBlock) as a single batch.
*
* @warning The inserted tuples may be placed in an "incorrect" or
* sub-optimal locations in this TupleStorageSubBlock. The only
* methods which are safe to call between bulkInsertTuples() and
* rebuild() are insertTupleInBatch(), bulkInsertTuples(), and
* bulkInsertTuplesWithRemappedAttributes().
*
* @param accessor A ValueAccessor to insert tuples from. This method assumes
* that the attributes in the ValueAccessor are in exactly the same
* order (and the same type as) the attributes of this
* TupleStorageStorageBlock's relation. The accessor's iteration will
* be advanced to the first non-inserted tuple or, if all accessible
* tuples were inserted in this sub-block, to the end position.
* @return The number of tuples inserted from accessor.
**/
virtual tuple_id bulkInsertTuples(ValueAccessor *accessor) = 0;
/**
* @brief Insert as many tuples as possible from a ValueAccessor (all of the
* tuples accessible or as many as will fit in this
* TupleStorageSubBlock) as a single batch. This version remaps
* attribute IDs from the ValueAccessor to the correct order of
* attributes for this StorageBlock's relation.
*
* @warning The inserted tuples may be placed in an "incorrect" or
* sub-optimal locations in this TupleStorageSubBlock. The only
* methods which are safe to call between bulkInsertTuples() and
* rebuild() are insertTupleInBatch(), bulkInsertTuples(), and
* bulkInsertTuplesWithRemappedAttributes().
*
* @param attribute_map A vector which maps the attributes of this
* TupleStorageSubBlock's relation (in order with no gaps) to the
* corresponding attributes which should be read from accessor.
* @param accessor A ValueAccessor to insert tuples from. The accessor's
* iteration will be advanced to the first non-inserted tuple or, if
* all accessible tuples were inserted in this sub-block, to the end
* position.
* @return The number of tuples inserted from accessor.
**/
virtual tuple_id bulkInsertTuplesWithRemappedAttributes(
const std::vector<attribute_id> &attribute_map,
ValueAccessor *accessor) = 0;
/**
* @brief Insert up to max_num_tuples_to_insert tuples from a ValueAccessor
* as a single batch, using the attribute_map to project and reorder
* columns from the input ValueAccessor. Does not update header.
*
* @note Typical usage is where you want to bulk-insert columns from two
* or more value accessors. Instead of writing out the columns into
* one or more column vector value accessors, you can simply use this
* function with the appropriate attribute_map for each value
* accessor (InsertDestination::bulkInsertTuplesFromValueAccessors
* handles all the details) to insert tuples without an extra temp copy.
*
* @warning Must call bulkInsertPartialTuplesFinalize() to update the header,
* until which point, the insertion is not visible to others.
* @warning The inserted tuples may be placed in a suboptimal position in the
* block.
*
* @param attribute_map A vector which maps the attributes of this
* TupleStorageSubBlock's relation (gaps indicated with kInvalidCatalogId)
* to the corresponding attributes which should be read from accessor.
* @param accessor A ValueAccessor to insert tuples from. The accessor's
* iteration will be advanced to the first non-inserted tuple or, if
* all accessible tuples were inserted in this sub-block, to the end
* position.
* @return The number of tuples inserted from accessor.
**/
virtual tuple_id bulkInsertPartialTuples(
const std::vector<attribute_id> &attribute_map,
ValueAccessor *accessor,
const tuple_id max_num_tuples_to_insert) {
LOG(FATAL) << "Partial bulk insert is not supported for this TupleStorageBlock type ("
<< getTupleStorageSubBlockType() << ").";
}
/**
* @brief Update header after a bulkInsertPartialTuples.
*
* @warning Only call this after a bulkInsertPartialTuples, passing in the
* number of tuples that were inserted (return value of that function).
*
* @param num_tuples_inserted Number of tuples inserted (i.e., how much to
* advance the header.num_tuples by). Should be equal to the return
* value of bulkInsertPartialTuples.
**/
virtual void bulkInsertPartialTuplesFinalize(
const tuple_id num_tuples_inserted) {
LOG(FATAL) << "Partial bulk insert is not supported for this TupleStorageBlock type ("
<< getTupleStorageSubBlockType() << ").";
}
/**
* @brief Get the (untyped) value of an attribute in a tuple in this buffer.
* @warning This method may not be supported for all implementations of
* TupleStorageSubBlock. supportsUntypedGetAttributeValue() MUST be
* called first to determine if this method is usable.
* @warning For debug builds, an assertion checks whether the specified tuple
* actually exists. In general, this method should only be called
* for tuples which are known to exist (from hasTupleWithID(),
* isPacked() and getMaxTupleID(), or presence in an index).
*
* @param tuple The desired tuple in this SubBlock.
* @param attr The attribute id of the desired attribute.
* @return An untyped pointer to the value of the specified attribute in the
* specified tuple.
**/
virtual const void* getAttributeValue(const tuple_id tuple, const attribute_id attr) const = 0;
/**
* @brief Get the value of the specified attribute of the specified tuple as
* a TypedValue.
* @warning For debug builds, an assertion checks whether the specified tuple
* actually exists. In general, this method should only be called
* for tuples which are known to exist (from hasTupleWithID(),
* isPacked() and getMaxTupleID(), or presence in an index).
*
* @param tuple The desired tuple in this SubBlock.
* @param attr The attribute id of the desired attribute.
* @return The data as a TypedValue.
**/
virtual TypedValue getAttributeValueTyped(const tuple_id tuple, const attribute_id attr) const = 0;
/**
* @brief Create a ValueAccessor object that can read attribute values from
* within this TupleStorageSubBlock.
*
* @param sequence If non-null, the ValueAccessor returned with only iterate
* over the tuples in sequence. Note that the returned ValueAccessor
* will only be valid so long as sequence exists.
* @return A ValueAccessor object which can iterate over and read values from
* this TupleStorageSubBlock.
**/
virtual ValueAccessor* createValueAccessor(const TupleIdSequence *sequence = nullptr) const = 0;
/**
* @brief Determine whether it is possible to update some attribute values
* in-place without relocating the tuple.
*
* @param tuple The desired tuple in this SubBlock.
* @param new_values A map of attribute IDs to corresponding new (typed)
* values.
* @return True if an in-place update for all values in new_values is
* possible, false otherwise.
**/
virtual bool canSetAttributeValuesInPlaceTyped(
const tuple_id tuple,
const std::unordered_map<attribute_id, TypedValue> &new_values) const = 0;
/**
* @brief Set (update) the value of an attribute in an existing tuple.
* @warning For debug builds, an assertion checks whether the specified tuple
* actually exists. In general, this method should only be called
* for tuples which are known to exist (from hasTupleWithID(),
* isPacked() and getMaxTupleID(), or presence in an index).
* @warning Sometimes it is not possible to update an attribute value
* in-place, and the tuple must be relocated. ALWAYS call
* canSetAttributeValuesInPlaceTyped() first to check whether this
* method is safe to use.
*
* @param tuple The desired tuple in this SubBlock.
* @param attr The attribute id of the desired attribute.
* @param value A TypedValue containing the new value for the attribute.
**/
virtual void setAttributeValueInPlaceTyped(const tuple_id tuple,
const attribute_id attr,
const TypedValue &value) = 0;
/**
* @brief Delete a single tuple from this TupleStorageSubBlock.
* @warning For debug builds, an assertion checks whether the specified tuple
* actually exists. In general, this method should only be called
* for tuples which are known to exist (from hasTupleWithID(),
* isPacked() and getMaxTupleID(), or presence in an index).
* @warning Always check the return value of this call to see whether indexes
* must be totally rebuilt.
*
* @param tuple The tuple to delete.
* @return True if other tuples have had their ids mutated (requiring indexes
* to be rebuilt), false if other tuple IDs are stable.
**/
virtual bool deleteTuple(const tuple_id tuple) = 0;
/**
* @brief Delete multiple tuples from this TupleStorageSubBlock.
* @warning For debug builds, an assertion checks whether the specified
* tuples actually exist. In general, this method should only be
* called for tuples which are known to exist (from
* hasTupleWithID(), isPacked() and getMaxTupleID(), or presence
* in an index).
* @warning Always check the return value of this call to see whether indexes
* must be totally rebuilt.
*
* @param tuples A sequence of tuple IDs which indicate the tuples to be
* deleted.
* @return True if other tuples have had their ids mutated (requiring
* indexes to be rebuilt), false if other tuple IDs are stable.
**/
virtual bool bulkDeleteTuples(TupleIdSequence *tuples) = 0;
/**
* @brief Estimate the cost of evaluating a ComparisonPredicate on this
* TupleStorageStorageBlock.
*
* @param predicate The predicate to be evaluated.
* @return A cost estimate for evaluating the predicate using this
* TupleStorageSubBlock's getMatchesForPredicate() method. Lower
* values indicate a cheaper/faster estimated cost.
**/
virtual predicate_cost_t estimatePredicateEvaluationCost(
const ComparisonPredicate &predicate) const = 0;
/**
* @brief Get the IDs of tuples in this SubBlock which match a given
* ComparisonPredicate using a "fast-path" technique specific to this
* TupleStorageStorageBlock.
* @warning This should only be called if estimatePredicateEvaluationCost()
* returns a value OTHER than kInfinite, kRowScan, or kColumnScan
* for the predicate.
*
* @param predicate The predicate to match.
* @param filter If non-NULL, then only tuple IDs which are set in the
* filter will be checked (all others will be assumed to be false).
* @return The IDs of tuples matching the specified predicate.
**/
virtual TupleIdSequence* getMatchesForPredicate(const ComparisonPredicate &predicate,
const TupleIdSequence *filter) const {
FATAL_ERROR("Called TupleStorageSubBlock::getMatchesForPredicate() on a "
"TupleStorageStorageBlock that does not provide any non-scan "
"method for evaluating predicates.");
}
/**
* @brief Get the IDs of all tuples which exist in this SubBlock.
* @note As with numTuples(), the default implementation of this method is
* O(1) for packed blocks, but O(n) for non-packed blocks. New
* TupleStorageSubBlock implementations which are not packed should
* override this with a faster version if possible.
*
* @return The IDs of all tuples which exist in this SubBlock.
**/
virtual TupleIdSequence* getExistenceMap() const;
/**
* @brief Get the IDs of all tuples which exist in this SubBlock.
* @note The default implementation of this method is O(n) for both packed and
* non-packed blocks. However, with packed with packed blocks there is
* not branching in the tight loop.
*
* @return The list of IDs of all tuples which exist in this SubBlock.
**/
virtual OrderedTupleIdSequence* getExistenceList() const;
/**
* @brief Rebuild this TupleStorageSubBlock, compacting storage and
* reordering tuples where applicable.
* @note This method may use an unbounded amount of out-of-band memory.
**/
virtual void rebuild() = 0;
/**
* @brief Check if this TupleStorageSubBlock is compressed, i.e. whether it
* can safely be cast to CompressedTupleStorageSubBlock.
*
* @return true if this is a CompressedTupleStorageSubBlock, false otherwise.
**/
virtual bool isCompressed() const {
return false;
}
/**
* @brief Check if the instance of tuple storage sub-block order preserving.
* This is useful in merging of sorted runs, where insertion order
* should be preserved.
*
* @return \c true if this tuple storage sub-block is order preserving.
**/
virtual bool isInsertOrderPreserving() const = 0;
protected:
/**
* @brief In debug builds, performs assertions to make sure that the values
* in tuple can be inserted into this TupleStorageSubBlock.
*
* @param tuple The tuple to check.
**/
void paranoidInsertTypeCheck(const Tuple &tuple);
const CatalogRelationSchema &relation_;
const TupleStorageSubBlockDescription &description_;
void *sub_block_memory_;
const std::size_t sub_block_memory_size_;
private:
DISALLOW_COPY_AND_ASSIGN(TupleStorageSubBlock);
};
/** @} */
} // namespace quickstep
#endif // QUICKSTEP_STORAGE_TUPLE_STORAGE_SUB_BLOCK_HPP_