forked from UWQuickstep/quickstep
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPackedPayloadHashTable.hpp
1016 lines (925 loc) · 39 KB
/
PackedPayloadHashTable.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
**/
#ifndef QUICKSTEP_STORAGE_PACKED_PAYLOAD_HASH_TABLE_HPP_
#define QUICKSTEP_STORAGE_PACKED_PAYLOAD_HASH_TABLE_HPP_
#include <algorithm>
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <functional>
#include <limits>
#include <vector>
#include "catalog/CatalogTypedefs.hpp"
#include "expressions/aggregation/AggregationHandle.hpp"
#include "storage/HashTableBase.hpp"
#include "storage/HashTableKeyManager.hpp"
#include "storage/StorageBlob.hpp"
#include "storage/StorageBlockInfo.hpp"
#include "storage/ValueAccessorMultiplexer.hpp"
#include "storage/ValueAccessorUtil.hpp"
#include "threading/SpinMutex.hpp"
#include "threading/SpinSharedMutex.hpp"
#include "types/TypedValue.hpp"
#include "types/containers/ColumnVectorsValueAccessor.hpp"
#include "utility/HashPair.hpp"
#include "utility/Macros.hpp"
#include "glog/logging.h"
namespace quickstep {
class StorageManager;
class Type;
class ValueAccessor;
/** \addtogroup Storage
* @{
*/
/**
* @brief Aggregation hash table implementation in which the payload can be just
* a bunch of bytes. This implementation is suitable for aggregation with
* multiple aggregation handles (e.g. SUM, MAX, MIN etc).
*
* At present the hash table uses separate chaining to resolve collisions, i.e.
* Keys/values are stored in a separate region of memory from the base hash
* table slot array. Every bucket has a "next" pointer so that entries that
* collide (i.e. map to the same base slot) form chains of pointers with each
* other.
**/
class PackedPayloadHashTable : public AggregationStateHashTableBase {
public:
/**
* @brief Constructor.
*
* @param key_types A vector of one or more types (>1 indicates a composite
* key).
* @param num_entries The estimated number of entries this hash table will
* hold.
* @param handles The aggregation handles.
* @param storage_manager The StorageManager to use (a StorageBlob will be
* allocated to hold this hash table's contents).
**/
PackedPayloadHashTable(
const std::vector<const Type *> &key_types,
const std::size_t num_entries,
const std::vector<AggregationHandle *> &handles,
StorageManager *storage_manager);
~PackedPayloadHashTable() override;
HashTableImplType getImplType() const override {
return HashTableImplType::kSeparateChaining;
}
/**
* @brief Erase all entries in this hash table.
*
* @warning This method is not guaranteed to be threadsafe.
**/
void clear();
void destroyPayload() override;
/**
* @brief Use aggregation handles to update (multiple) aggregation states in
* this hash table, with group-by keys and arguments drawn from the
* given ValueAccessors. New states are first inserted if not already
* present.
*
* @note This method is threadsafe with regard to other calls to
* upsertCompositeKey() and upsertValueAccessorCompositeKey().
*
* @param argument_ids The multi-source attribute IDs of each argument
* component to be read from \p accessor_mux.
* @param key_ids The multi-source attribute IDs of each group-by key
* component to be read from \p accessor_mux.
* @param accessor_mux A ValueAccessorMultiplexer object that contains the
* ValueAccessors which will be used to access keys. beginIteration()
* should be called on the accessors before calling this method.
* @return True on success, false if upsert failed because there was not
* enough space to insert new entries for all the keys in accessor
* (note that some entries may still have been upserted, and
* accessors' iterations will be left on the first tuple which could
* not be inserted).
**/
bool upsertValueAccessorCompositeKey(
const std::vector<std::vector<MultiSourceAttributeId>> &argument_ids,
const std::vector<MultiSourceAttributeId> &key_ids,
const ValueAccessorMultiplexer &accessor_mux) override;
/**
* @return The ID of the StorageBlob used to store this hash table.
**/
inline block_id getBlobId() const {
return blob_->getID();
}
/**
* @warning This method assumes that no concurrent calls to
* upsertCompositeKey() or upsertValueAccessorCompositeKey() are
* taking place (i.e. that this HashTable is immutable for the
* duration of the call).
* Concurrent calls to getSingleCompositeKey(), forEach(), and
* forEachCompositeKey() are safe.
*
* @return The number of entries in this HashTable.
**/
inline std::size_t numEntries() const {
return header_->buckets_allocated.load(std::memory_order_relaxed);
}
/**
* @brief Use aggregation handles to merge the given aggregation states into
* the aggregation states mapped to the given key. New states are first
* inserted if not already present.
*
* @warning The key must not be null.
* @note This method is threadsafe with regard to other calls to
* upsertCompositeKey() and upsertValueAccessorCompositeKey().
*
* @param key The key.
* @param source_state The source aggregation states to be merged into this
* hash table.
* @return True on success, false if upsert failed because there was not
* enough space to insert a new entry in this hash table.
**/
inline bool upsertCompositeKey(const std::vector<TypedValue> &key,
const std::uint8_t *source_state);
/**
* @brief Apply a functor to an aggregation state mapped to the given key.
* First inserting a new state if one is not already present.
*
* @warning The key must not be null.
* @note This method is threadsafe with regard to other calls to
* upsertCompositeKey() and upsertValueAccessorCompositeKey().
*
* @param key The key.
* @param functor A pointer to a functor, which should provide a call
* operator which takes an aggregation state (of type std::uint8_t *)
* as an argument.
* @param index The index of the target aggregation state among those states
* mapped to \p key.
* @return True on success, false if upsert failed because there was not
* enough space to insert a new entry in this hash table.
**/
template <typename FunctorT>
inline bool upsertCompositeKey(const std::vector<TypedValue> &key,
FunctorT *functor,
const std::size_t index);
/**
* @brief Lookup a composite key against this hash table to find a matching
* entry.
*
* @warning The key must not be null.
* @warning This method assumes that no concurrent calls to
* upsertCompositeKey() or upsertValueAccessorCompositeKey() are
* taking place (i.e. that this HashTable is immutable for the
* duration of the call and as long as the returned pointer may be
* dereferenced). Concurrent calls to getSingleCompositeKey(),
* forEach(), and forEachCompositeKey() are safe.
*
* @param key The key to look up.
* @return The value of a matched entry if a matching key is found.
* Otherwise, return NULL.
**/
inline const std::uint8_t* getSingleCompositeKey(
const std::vector<TypedValue> &key) const;
/**
* @brief Lookup a composite key against this hash table to find a matching
* entry. Then return the aggregation state component with the
* specified index.
*
* @warning The key must not be null.
* @warning This method assumes that no concurrent calls to
* upsertCompositeKey() or upsertValueAccessorCompositeKey() are
* taking place (i.e. that this HashTable is immutable for the
* duration of the call and as long as the returned pointer may be
* dereferenced). Concurrent calls to getSingleCompositeKey(),
* forEach(), and forEachCompositeKey() are safe.
*
* @param key The key to look up.
* @param index The index of the target aggregation state among those states
* mapped to \p key.
* @return The aggregation state of the specified index if a matching key is
* found. Otherwise, return NULL.
**/
inline const std::uint8_t* getSingleCompositeKey(
const std::vector<TypedValue> &key,
const std::size_t index) const;
/**
* @brief Apply a functor to each (key, value) pair in this hash table.
*
* @warning This method assumes that no concurrent calls to
* upsertCompositeKey() or upsertValueAccessorCompositeKey() are
* taking place (i.e. that this HashTable is immutable for the
* duration of the call and as long as the returned pointer may be
* dereferenced). Concurrent calls to getSingleCompositeKey(),
* forEach(), and forEachCompositeKey() are safe.
*
* @param functor A pointer to a functor, which should provide a call operator
* which takes 2 arguments: const TypedValue&, const std::uint8_t*.
* The call operator will be invoked once on each key, value pair in
* this hash table.
* @return The number of key-value pairs visited.
**/
template <typename FunctorT>
inline std::size_t forEach(FunctorT *functor) const;
/**
* @brief Apply a functor to each (key, aggregation state) pair in this hash
* table, where the aggregation state is retrieved from the value
* that maps to the corresponding key with the specified index.
*
* @warning This method assumes that no concurrent calls to
* upsertCompositeKey() or upsertValueAccessorCompositeKey() are
* taking place (i.e. that this HashTable is immutable for the
* duration of the call and as long as the returned pointer may be
* dereferenced). Concurrent calls to getSingleCompositeKey(),
* forEach(), and forEachCompositeKey() are safe.
*
* @param functor A pointer to a functor, which should provide a call operator
* which takes 2 arguments: const TypedValue&, const std::uint8_t*.
* The call operator will be invoked once on each (key, aggregation state)
* pair in this hash table.
* @param index The index of the target aggregation state among those states
* mapped to \p key.
* @return The number of key-value pairs visited.
**/
template <typename FunctorT>
inline std::size_t forEach(FunctorT *functor, const int index) const;
/**
* @brief Apply a functor to each key, value pair in this hash table.
* Composite key version.
*
* @warning This method assumes that no concurrent calls to
* upsertCompositeKey() or upsertValueAccessorCompositeKey() are
* taking place (i.e. that this HashTable is immutable for the
* duration of the call and as long as the returned pointer may be
* dereferenced). Concurrent calls to getSingleCompositeKey(),
* forEach(), and forEachCompositeKey() are safe.
*
* @param functor A pointer to a functor, which should provide a call operator
* which takes 2 arguments: const TypedValue&, const std::uint8_t*.
* The call operator will be invoked once on each key, value pair in
* this hash table.
* @return The number of key-value pairs visited.
**/
template <typename FunctorT>
inline std::size_t forEachCompositeKey(FunctorT *functor) const;
/**
* @brief Apply a functor to each (key, aggregation state) pair in this hash
* table, where the aggregation state is retrieved from the value
* that maps to the corresponding key with the specified index.
* Composite key version.
*
* @warning This method assumes that no concurrent calls to
* upsertCompositeKey() or upsertValueAccessorCompositeKey() are
* taking place (i.e. that this HashTable is immutable for the
* duration of the call and as long as the returned pointer may be
* dereferenced). Concurrent calls to getSingleCompositeKey(),
* forEach(), and forEachCompositeKey() are safe.
*
* @param functor A pointer to a functor, which should provide a call operator
* which takes 2 arguments: const TypedValue&, const std::uint8_t*.
* The call operator will be invoked once on each (key, aggregation state)
* pair in this hash table.
* @param index The index of the target aggregation state among those states
* mapped to \p key.
* @return The number of key-value pairs visited.
**/
template <typename FunctorT>
inline std::size_t forEachCompositeKey(FunctorT *functor,
const std::size_t index) const;
std::size_t getMemoryConsumptionBytes() const override {
return sizeof(Header) + numEntries() * bucket_size_;
}
private:
void resize(const std::size_t extra_buckets,
const std::size_t extra_variable_storage,
const std::size_t retry_num = 0);
inline std::size_t calculateVariableLengthCompositeKeyCopySize(
const std::vector<TypedValue> &key) const {
std::size_t total = 0;
for (std::vector<TypedValue>::size_type idx = 0; idx < key.size(); ++idx) {
if (!(*key_inline_)[idx]) {
total += key[idx].getDataSize();
}
}
return total;
}
inline bool getNextEntry(TypedValue *key,
const std::uint8_t **value,
std::size_t *entry_num) const;
inline bool getNextEntryCompositeKey(std::vector<TypedValue> *key,
const std::uint8_t **value,
std::size_t *entry_num) const;
template <bool key_only = false>
inline std::uint8_t* upsertCompositeKeyInternal(
const std::vector<TypedValue> &key,
const std::size_t variable_key_size);
template <bool use_two_accessors, bool key_only, bool has_variable_size,
typename ValueAccessorT>
inline bool upsertValueAccessorCompositeKeyInternal(
const std::vector<std::vector<MultiSourceAttributeId>> &argument_ids,
const std::vector<MultiSourceAttributeId> &key_ids,
ValueAccessor *base_accessor,
ValueAccessorT *derived_accessor);
// Generate a hash for a composite key by hashing each component of 'key' and
// mixing their bits with CombineHashes().
inline std::size_t hashCompositeKey(const std::vector<TypedValue> &key) const;
// Set information about which key components are stored inline. This usually
// comes from a HashTableKeyManager, and is set by the constructor of a
// subclass of HashTable.
inline void setKeyInline(const std::vector<bool> *key_inline) {
key_inline_ = key_inline;
all_keys_inline_ = std::accumulate(key_inline_->begin(), key_inline_->end(),
true, std::logical_and<bool>());
}
inline static std::size_t ComputeTotalPayloadSize(
const std::vector<AggregationHandle *> &handles) {
std::size_t total_payload_size = sizeof(SpinMutex);
for (const auto *handle : handles) {
total_payload_size += handle->getPayloadSize();
}
return total_payload_size;
}
// Assign '*key_vector' with the attribute values specified by 'key_ids' at
// the current position of 'accessor'. If 'check_for_null_keys' is true, stops
// and returns true if any of the values is null, otherwise returns false.
template <bool use_two_accessors,
bool check_for_null_keys,
typename BaseAccessorT,
typename DerivedAccessorT>
inline static bool GetCompositeKeyFromValueAccessor(
const std::vector<MultiSourceAttributeId> &key_ids,
const BaseAccessorT *accessor,
const DerivedAccessorT *derived_accessor,
std::vector<TypedValue> *key_vector) {
for (std::size_t key_idx = 0; key_idx < key_ids.size(); ++key_idx) {
const MultiSourceAttributeId &key_id = key_ids[key_idx];
if (use_two_accessors && key_id.source == ValueAccessorSource::kDerived) {
(*key_vector)[key_idx] = derived_accessor->getTypedValue(key_id.attr_id);
} else {
(*key_vector)[key_idx] = accessor->getTypedValue(key_id.attr_id);
}
if (check_for_null_keys && (*key_vector)[key_idx].isNull()) {
return true;
}
}
return false;
}
struct Header {
std::size_t num_slots;
std::size_t num_buckets;
alignas(kCacheLineBytes) std::atomic<std::size_t> buckets_allocated;
alignas(kCacheLineBytes)
std::atomic<std::size_t> variable_length_bytes_allocated;
};
// Type(s) of keys.
const std::vector<const Type *> key_types_;
// Information about whether key components are stored inline or in a
// separate variable-length storage region. This is usually determined by a
// HashTableKeyManager and set by calling setKeyInline().
bool all_keys_inline_;
const std::vector<bool> *key_inline_;
const std::size_t num_handles_;
const std::vector<AggregationHandle *> handles_;
std::size_t total_payload_size_;
std::vector<std::size_t> payload_offsets_;
std::uint8_t *init_payload_;
StorageManager *storage_manager_;
MutableBlobReference blob_;
// Locked in shared mode for most operations, exclusive mode during resize.
// Not locked at all for non-resizable HashTables.
alignas(kCacheLineBytes) SpinSharedMutex<true> resize_shared_mutex_;
std::size_t kBucketAlignment;
// Value's offset in a bucket is the first alignof(ValueT) boundary after the
// next pointer and hash code.
std::size_t kValueOffset;
// Round bucket size up to a multiple of kBucketAlignment.
inline std::size_t ComputeBucketSize(const std::size_t fixed_key_size) {
return (((kValueOffset + this->total_payload_size_ + fixed_key_size - 1) /
kBucketAlignment) +
1) *
kBucketAlignment;
}
// Attempt to find an empty bucket to insert 'hash_code' into, starting after
// '*bucket' in the chain (or, if '*bucket' is NULL, starting from the slot
// array). Returns true and stores SIZE_T_MAX in '*pending_chain_ptr' if an
// empty bucket is found. Returns false if 'allow_duplicate_keys' is false
// and a hash collision is found (caller should then check whether there is a
// genuine key collision or the hash collision is spurious). Returns false
// and sets '*bucket' to NULL if there are no more empty buckets in the hash
// table. If 'variable_key_allocation_required' is nonzero, this method will
// attempt to allocate storage for a variable-length key BEFORE allocating a
// bucket, so that no bucket number below 'header_->num_buckets' is ever
// deallocated after being allocated.
inline bool locateBucketForInsertion(
const std::size_t hash_code,
const std::size_t variable_key_allocation_required,
void **bucket,
std::atomic<std::size_t> **pending_chain_ptr,
std::size_t *pending_chain_ptr_finish_value);
// Write a scalar 'key' and its 'hash_code' into the '*bucket', which was
// found by locateBucketForInsertion(). Assumes that storage for a
// variable-length key copy (if any) was already allocated by a successful
// call to allocateVariableLengthKeyStorage().
inline void writeScalarKeyToBucket(
const TypedValue &key,
const std::size_t hash_code,
void *bucket);
// Write a composite 'key' and its 'hash_code' into the '*bucket', which was
// found by locateBucketForInsertion(). Assumes that storage for
// variable-length key copies (if any) was already allocated by a successful
// call to allocateVariableLengthKeyStorage().
inline void writeCompositeKeyToBucket(
const std::vector<TypedValue> &key,
const std::size_t hash_code,
void *bucket);
// Determine whether it is actually necessary to resize this hash table.
// Checks that there is at least one unallocated bucket, and that there is
// at least 'extra_variable_storage' bytes of variable-length storage free.
inline bool isFull(const std::size_t extra_variable_storage) const;
// Helper object to manage key storage.
HashTableKeyManager<false, true> key_manager_;
// In-memory structure is as follows:
// - SeparateChainingHashTable::Header
// - Array of slots, interpreted as follows:
// - 0 = Points to nothing (empty)
// - SIZE_T_MAX = Pending (some thread is starting a chain from this
// slot and will overwrite it soon)
// - Anything else = The number of the first bucket in the chain for
// this slot PLUS ONE (i.e. subtract one to get the actual bucket
// number).
// - Array of buckets, each of which is:
// - atomic size_t "next" pointer, interpreted the same as slots above.
// - size_t hash value
// - possibly some unused bytes as needed so that ValueT's alignment
// requirement is met
// - ValueT value slot
// - fixed-length key storage (which may include pointers to external
// memory or offsets of variable length keys stored within this hash
// table)
// - possibly some additional unused bytes so that bucket size is a
// multiple of both alignof(std::atomic<std::size_t>) and
// alignof(ValueT)
// - Variable-length key storage region (referenced by offsets stored in
// fixed-length keys).
Header *header_;
std::atomic<std::size_t> *slots_;
void *buckets_;
const std::size_t bucket_size_;
DISALLOW_COPY_AND_ASSIGN(PackedPayloadHashTable);
};
/** @} */
// ----------------------------------------------------------------------------
// Implementations of template class methods follow.
class HashTableMerger {
public:
/**
* @brief Constructor
*
* @param handle The Aggregation handle being used.
* @param destination_hash_table The destination hash table to which other
* hash tables will be merged.
**/
explicit HashTableMerger(PackedPayloadHashTable *destination_hash_table)
: destination_hash_table_(destination_hash_table) {}
/**
* @brief The operator for the functor.
*
* @param group_by_key The group by key being merged.
* @param source_state The aggregation state for the given key in the source
* aggregation hash table.
**/
inline void operator()(const std::vector<TypedValue> &group_by_key,
const std::uint8_t *source_state) {
destination_hash_table_->upsertCompositeKey(group_by_key, source_state);
}
private:
PackedPayloadHashTable *destination_hash_table_;
DISALLOW_COPY_AND_ASSIGN(HashTableMerger);
};
inline std::size_t PackedPayloadHashTable::hashCompositeKey(
const std::vector<TypedValue> &key) const {
DEBUG_ASSERT(!key.empty());
DEBUG_ASSERT(key.size() == key_types_.size());
std::size_t hash = key.front().getHash();
for (std::vector<TypedValue>::const_iterator key_it = key.begin() + 1;
key_it != key.end();
++key_it) {
hash = CombineHashes(hash, key_it->getHash());
}
return hash;
}
inline bool PackedPayloadHashTable::getNextEntry(TypedValue *key,
const std::uint8_t **value,
std::size_t *entry_num) const {
if (*entry_num < header_->buckets_allocated.load(std::memory_order_relaxed)) {
const char *bucket =
static_cast<const char *>(buckets_) + (*entry_num) * bucket_size_;
*key = key_manager_.getKeyComponentTyped(bucket, 0);
*value = reinterpret_cast<const std::uint8_t *>(bucket + kValueOffset);
++(*entry_num);
return true;
} else {
return false;
}
}
inline bool PackedPayloadHashTable::getNextEntryCompositeKey(
std::vector<TypedValue> *key,
const std::uint8_t **value,
std::size_t *entry_num) const {
if (*entry_num < header_->buckets_allocated.load(std::memory_order_relaxed)) {
const char *bucket =
static_cast<const char *>(buckets_) + (*entry_num) * bucket_size_;
for (std::vector<const Type *>::size_type key_idx = 0;
key_idx < this->key_types_.size();
++key_idx) {
key->emplace_back(key_manager_.getKeyComponentTyped(bucket, key_idx));
}
*value = reinterpret_cast<const std::uint8_t *>(bucket + kValueOffset);
++(*entry_num);
return true;
} else {
return false;
}
}
inline bool PackedPayloadHashTable::locateBucketForInsertion(
const std::size_t hash_code,
const std::size_t variable_key_allocation_required,
void **bucket,
std::atomic<std::size_t> **pending_chain_ptr,
std::size_t *pending_chain_ptr_finish_value) {
if (*bucket == nullptr) {
*pending_chain_ptr = &(slots_[hash_code % header_->num_slots]);
} else {
*pending_chain_ptr = static_cast<std::atomic<std::size_t> *>(*bucket);
}
for (;;) {
std::size_t existing_chain_ptr = 0;
if ((*pending_chain_ptr)
->compare_exchange_strong(existing_chain_ptr,
std::numeric_limits<std::size_t>::max(),
std::memory_order_acq_rel)) {
// Got to the end of the chain. Allocate a new bucket.
// First, allocate variable-length key storage, if needed (i.e. if this
// is an upsert and we didn't allocate up-front).
if (!key_manager_.allocateVariableLengthKeyStorage(
variable_key_allocation_required)) {
// Ran out of variable-length storage.
(*pending_chain_ptr)->store(0, std::memory_order_release);
*bucket = nullptr;
return false;
}
const std::size_t allocated_bucket_num =
header_->buckets_allocated.fetch_add(1, std::memory_order_relaxed);
if (allocated_bucket_num >= header_->num_buckets) {
// Ran out of buckets.
header_->buckets_allocated.fetch_sub(1, std::memory_order_relaxed);
(*pending_chain_ptr)->store(0, std::memory_order_release);
*bucket = nullptr;
return false;
} else {
*bucket =
static_cast<char *>(buckets_) + allocated_bucket_num * bucket_size_;
*pending_chain_ptr_finish_value = allocated_bucket_num + 1;
return true;
}
}
// Spin until the real "next" pointer is available.
while (existing_chain_ptr == std::numeric_limits<std::size_t>::max()) {
existing_chain_ptr =
(*pending_chain_ptr)->load(std::memory_order_acquire);
}
if (existing_chain_ptr == 0) {
// Other thread had to roll back, so try again.
continue;
}
// Chase the next pointer.
*bucket =
static_cast<char *>(buckets_) + (existing_chain_ptr - 1) * bucket_size_;
*pending_chain_ptr = static_cast<std::atomic<std::size_t> *>(*bucket);
const std::size_t hash_in_bucket = *reinterpret_cast<const std::size_t *>(
static_cast<const char *>(*bucket) +
sizeof(std::atomic<std::size_t>));
if (hash_in_bucket == hash_code) {
return false;
}
}
}
inline const std::uint8_t* PackedPayloadHashTable::getSingleCompositeKey(
const std::vector<TypedValue> &key) const {
DEBUG_ASSERT(this->key_types_.size() == key.size());
const std::size_t hash_code = this->hashCompositeKey(key);
std::size_t bucket_ref =
slots_[hash_code % header_->num_slots].load(std::memory_order_relaxed);
while (bucket_ref != 0) {
DEBUG_ASSERT(bucket_ref != std::numeric_limits<std::size_t>::max());
const char *bucket =
static_cast<const char *>(buckets_) + (bucket_ref - 1) * bucket_size_;
const std::size_t bucket_hash = *reinterpret_cast<const std::size_t *>(
bucket + sizeof(std::atomic<std::size_t>));
if ((bucket_hash == hash_code) &&
key_manager_.compositeKeyCollisionCheck(key, bucket)) {
// Match located.
return reinterpret_cast<const std::uint8_t *>(bucket + kValueOffset);
}
bucket_ref =
reinterpret_cast<const std::atomic<std::size_t> *>(bucket)->load(
std::memory_order_relaxed);
}
// Reached the end of the chain and didn't find a match.
return nullptr;
}
inline const std::uint8_t* PackedPayloadHashTable::getSingleCompositeKey(
const std::vector<TypedValue> &key,
const std::size_t index) const {
DEBUG_ASSERT(this->key_types_.size() == key.size());
const std::size_t hash_code = this->hashCompositeKey(key);
std::size_t bucket_ref =
slots_[hash_code % header_->num_slots].load(std::memory_order_relaxed);
while (bucket_ref != 0) {
DEBUG_ASSERT(bucket_ref != std::numeric_limits<std::size_t>::max());
const char *bucket =
static_cast<const char *>(buckets_) + (bucket_ref - 1) * bucket_size_;
const std::size_t bucket_hash = *reinterpret_cast<const std::size_t *>(
bucket + sizeof(std::atomic<std::size_t>));
if ((bucket_hash == hash_code) &&
key_manager_.compositeKeyCollisionCheck(key, bucket)) {
// Match located.
return reinterpret_cast<const std::uint8_t *>(bucket + kValueOffset) +
this->payload_offsets_[index];
}
bucket_ref =
reinterpret_cast<const std::atomic<std::size_t> *>(bucket)->load(
std::memory_order_relaxed);
}
// Reached the end of the chain and didn't find a match.
return nullptr;
}
inline bool PackedPayloadHashTable::upsertCompositeKey(
const std::vector<TypedValue> &key,
const std::uint8_t *source_state) {
const std::size_t variable_size =
calculateVariableLengthCompositeKeyCopySize(key);
for (;;) {
{
SpinSharedMutexSharedLock<true> resize_lock(resize_shared_mutex_);
std::uint8_t *value =
upsertCompositeKeyInternal(key, variable_size);
if (value != nullptr) {
SpinMutexLock lock(*(reinterpret_cast<SpinMutex *>(value)));
for (unsigned int k = 0; k < num_handles_; ++k) {
handles_[k]->mergeStates(source_state + payload_offsets_[k],
value + payload_offsets_[k]);
}
return true;
}
}
resize(0, variable_size);
}
}
template <typename FunctorT>
inline bool PackedPayloadHashTable::upsertCompositeKey(
const std::vector<TypedValue> &key,
FunctorT *functor,
const std::size_t index) {
const std::size_t variable_size =
calculateVariableLengthCompositeKeyCopySize(key);
for (;;) {
{
SpinSharedMutexSharedLock<true> resize_lock(resize_shared_mutex_);
std::uint8_t *value =
upsertCompositeKeyInternal(key, variable_size);
if (value != nullptr) {
(*functor)(value + payload_offsets_[index]);
return true;
}
}
resize(0, variable_size);
}
}
template <bool key_only>
inline std::uint8_t* PackedPayloadHashTable::upsertCompositeKeyInternal(
const std::vector<TypedValue> &key,
const std::size_t variable_key_size) {
if (variable_key_size > 0) {
// Don't allocate yet, since the key may already be present. However, we
// do check if either the allocated variable storage space OR the free
// space is big enough to hold the key (at least one must be true: either
// the key is already present and allocated, or we need to be able to
// allocate enough space for it).
std::size_t allocated_bytes = header_->variable_length_bytes_allocated.load(
std::memory_order_relaxed);
if ((allocated_bytes < variable_key_size) &&
(allocated_bytes + variable_key_size >
key_manager_.getVariableLengthKeyStorageSize())) {
return nullptr;
}
}
const std::size_t hash_code = this->hashCompositeKey(key);
void *bucket = nullptr;
std::atomic<std::size_t> *pending_chain_ptr;
std::size_t pending_chain_ptr_finish_value;
for (;;) {
if (locateBucketForInsertion(hash_code,
variable_key_size,
&bucket,
&pending_chain_ptr,
&pending_chain_ptr_finish_value)) {
// Found an empty bucket.
break;
} else if (bucket == nullptr) {
// Ran out of buckets or variable-key space.
return nullptr;
} else if (key_manager_.compositeKeyCollisionCheck(key, bucket)) {
// Found an already-existing entry for this key.
return reinterpret_cast<std::uint8_t *>(static_cast<char *>(bucket) +
kValueOffset);
}
}
// We are now writing to an empty bucket.
// Write the key and hash.
writeCompositeKeyToBucket(key, hash_code, bucket);
std::uint8_t *value = static_cast<unsigned char *>(bucket) + kValueOffset;
if (!key_only) {
std::memcpy(value, init_payload_, this->total_payload_size_);
}
// Update the previous chaing pointer to point to the new bucket.
pending_chain_ptr->store(pending_chain_ptr_finish_value,
std::memory_order_release);
// Return the value.
return value;
}
template <bool use_two_accessors, bool key_only, bool has_variable_size,
typename ValueAccessorT>
inline bool PackedPayloadHashTable::upsertValueAccessorCompositeKeyInternal(
const std::vector<std::vector<MultiSourceAttributeId>> &argument_ids,
const std::vector<MultiSourceAttributeId> &key_ids,
ValueAccessor *base_accessor,
ValueAccessorT *derived_accessor) {
std::size_t variable_size = 0;
std::vector<TypedValue> key_vector;
key_vector.resize(key_ids.size());
return InvokeOnAnyValueAccessor(
base_accessor,
[&](auto *accessor) -> bool { // NOLINT(build/c++11)
bool continuing = true;
while (continuing) {
{
continuing = false;
SpinSharedMutexSharedLock<true> lock(resize_shared_mutex_);
while (accessor->next()) {
if (use_two_accessors) {
derived_accessor->next();
}
if (this->GetCompositeKeyFromValueAccessor<use_two_accessors, true>(
key_ids,
accessor,
derived_accessor,
&key_vector)) {
continue;
}
if (has_variable_size) {
variable_size =
this->calculateVariableLengthCompositeKeyCopySize(key_vector);
}
std::uint8_t *value =
this->template upsertCompositeKeyInternal<key_only>(
key_vector, variable_size);
if (value == nullptr) {
continuing = true;
break;
} else if (!key_only) {
SpinMutexLock lock(*(reinterpret_cast<SpinMutex *>(value)));
for (unsigned int k = 0; k < num_handles_; ++k) {
const auto &ids = argument_ids[k];
if (ids.empty()) {
handles_[k]->updateStateNullary(value + payload_offsets_[k]);
} else {
const MultiSourceAttributeId &arg_id = ids.front();
if (use_two_accessors && arg_id.source == ValueAccessorSource::kDerived) {
DCHECK_NE(arg_id.attr_id, kInvalidAttributeID);
handles_[k]->updateStateUnary(derived_accessor->getTypedValue(arg_id.attr_id),
value + payload_offsets_[k]);
} else {
handles_[k]->updateStateUnary(accessor->getTypedValue(arg_id.attr_id),
value + payload_offsets_[k]);
}
}
}
}
}
}
if (continuing) {
this->resize(0, variable_size);
accessor->previous();
if (use_two_accessors) {
derived_accessor->previous();
}
}
}
return true;
});
}
inline void PackedPayloadHashTable::writeScalarKeyToBucket(
const TypedValue &key,
const std::size_t hash_code,
void *bucket) {
*reinterpret_cast<std::size_t *>(static_cast<char *>(bucket) +
sizeof(std::atomic<std::size_t>)) =
hash_code;
key_manager_.writeKeyComponentToBucket(key, 0, bucket, nullptr);
}
inline void PackedPayloadHashTable::writeCompositeKeyToBucket(
const std::vector<TypedValue> &key,
const std::size_t hash_code,
void *bucket) {
DEBUG_ASSERT(key.size() == this->key_types_.size());
*reinterpret_cast<std::size_t *>(static_cast<char *>(bucket) +
sizeof(std::atomic<std::size_t>)) =
hash_code;
for (std::size_t idx = 0; idx < this->key_types_.size(); ++idx) {
key_manager_.writeKeyComponentToBucket(key[idx], idx, bucket, nullptr);
}
}
inline bool PackedPayloadHashTable::isFull(
const std::size_t extra_variable_storage) const {
if (header_->buckets_allocated.load(std::memory_order_relaxed) >=
header_->num_buckets) {
// All buckets are allocated.
return true;
}
if (extra_variable_storage > 0) {
if (extra_variable_storage +
header_->variable_length_bytes_allocated.load(
std::memory_order_relaxed) >
key_manager_.getVariableLengthKeyStorageSize()) {
// Not enough variable-length key storage space.
return true;
}
}
return false;
}
template <typename FunctorT>
inline std::size_t PackedPayloadHashTable::forEach(FunctorT *functor) const {
std::size_t entries_visited = 0;
std::size_t entry_num = 0;
TypedValue key;
const std::uint8_t *value_ptr;
while (getNextEntry(&key, &value_ptr, &entry_num)) {
++entries_visited;
(*functor)(key, value_ptr);
}
return entries_visited;
}
template <typename FunctorT>
inline std::size_t PackedPayloadHashTable::forEach(
FunctorT *functor, const int index) const {
std::size_t entries_visited = 0;
std::size_t entry_num = 0;
TypedValue key;
const std::uint8_t *value_ptr;
while (getNextEntry(&key, &value_ptr, &entry_num)) {
++entries_visited;
(*functor)(key, value_ptr + payload_offsets_[index]);
key.clear();
}
return entries_visited;
}
template <typename FunctorT>
inline std::size_t PackedPayloadHashTable::forEachCompositeKey(
FunctorT *functor) const {
std::size_t entries_visited = 0;
std::size_t entry_num = 0;
std::vector<TypedValue> key;
const std::uint8_t *value_ptr;
while (getNextEntryCompositeKey(&key, &value_ptr, &entry_num)) {
++entries_visited;
(*functor)(key, value_ptr);
key.clear();
}
return entries_visited;
}
template <typename FunctorT>
inline std::size_t PackedPayloadHashTable::forEachCompositeKey(
FunctorT *functor,