-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Enhancement]simd for bit unpacking from arrow
Signed-off-by: zombee0 <[email protected]>
- Loading branch information
Showing
4 changed files
with
145 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
// Copyright 2021-present StarRocks, Inc. All rights reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// https://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#pragma once | ||
|
||
#ifdef __ARM_NEON | ||
#include <arrow/util/bpacking.h> | ||
#include <arrow/util/bpakcing_neon.h> | ||
#endif | ||
#ifdef __AVX2__ | ||
#include <arrow/util/bpacking.h> | ||
#include <arrow/util/bpacking_avx2.h> | ||
#endif | ||
|
||
#include "common/config.h" | ||
#include "common/logging.h" | ||
#include "util/bit_packing.h" | ||
|
||
namespace starrocks { | ||
|
||
class BitPackingAdapter { | ||
public: | ||
template <typename OutType> | ||
static std::pair<const uint8_t*, int64_t> UnpackValues(int bit_width, const uint8_t* __restrict__ in, | ||
int64_t in_bytes, int64_t num_values, | ||
OutType* __restrict__ out) { | ||
#if defined(__AVX2__) || defined(__ARM_NEON) | ||
|
||
#pragma push_macro("UNPACK_ADAPTER_VALUES_CASE") | ||
#define UNPACK_ADAPTER_VALUES_CASE(ignore1, i, ignore2) \ | ||
case i: \ | ||
return UnpackValues<OutType, i>(in, in_bytes, num_values, out); | ||
|
||
switch (bit_width) { | ||
// Expand cases from 0 to 64. | ||
BOOST_PP_REPEAT_FROM_TO(0, 65, UNPACK_ADAPTER_VALUES_CASE, ignore); | ||
default: | ||
DCHECK(false); | ||
return std::make_pair(nullptr, -1); | ||
} | ||
#pragma pop_macro("UNPACK_ADAPTER_VALUES_CASE") | ||
|
||
#else | ||
|
||
return BitPacking::UnpackValues(bit_width, in, in_bytes, num_values, out); | ||
|
||
#endif | ||
} | ||
|
||
template <typename OutType, int BIT_WIDTH> | ||
static std::pair<const uint8_t*, int64_t> UnpackValues(const uint8_t* __restrict__ in, int64_t in_bytes, | ||
int64_t num_values, OutType* __restrict__ out) { | ||
constexpr int BATCH_SIZE = 32; | ||
const int64_t values_to_read = BitPacking::NumValuesToUnpack(BIT_WIDTH, in_bytes, num_values); | ||
const int64_t batches_to_read = values_to_read / BATCH_SIZE; | ||
const int64_t remainder_values = values_to_read % BATCH_SIZE; | ||
const uint8_t* in_pos = in; | ||
OutType* out_pos = out; | ||
|
||
// First unpack as many full batches as possible. | ||
if (config::enable_bit_unpack_simd && batches_to_read > 0) { | ||
in_pos = BitPackingAdapter::UnpackValues_32_SIMD<OutType, BIT_WIDTH>(in_pos, in_bytes, out_pos, | ||
batches_to_read * BATCH_SIZE); | ||
} else { | ||
for (int64_t i = 0; i < batches_to_read; ++i) { | ||
in_pos = BitPacking::Unpack32Values<OutType, BIT_WIDTH>(in_pos, in_bytes, out_pos); | ||
out_pos += BATCH_SIZE; | ||
in_bytes -= (BATCH_SIZE * BIT_WIDTH) / CHAR_BIT; | ||
} | ||
} | ||
|
||
// Then unpack the final partial batch. | ||
if (remainder_values > 0) { | ||
in_pos = BitPacking::UnpackUpTo31Values<OutType, BIT_WIDTH>(in_pos, in_bytes, remainder_values, out_pos); | ||
} | ||
return std::make_pair(in_pos, values_to_read); | ||
} | ||
|
||
template <typename OutType, int BIT_WIDTH> | ||
static const uint8_t* UnpackValues_32_SIMD(const uint8_t* __restrict__ in, int64_t in_bytes, | ||
OutType* __restrict__ out, int64_t num_values) { | ||
int batch_size = num_values; | ||
const int byte_width = 8; | ||
if constexpr (sizeof(OutType) == 4) { | ||
#if defined(__AVX2__) | ||
int num_unpacked = arrow::internal::unpack32_avx2(reinterpret_cast<const uint32_t*>(in), | ||
reinterpret_cast<uint32_t*>(out), batch_size, BIT_WIDTH); | ||
#elif defined(__ARM_NEON) | ||
int num_unpacked = arrow::internal::unpack32_neon(reinterpret_cast<const uint32_t*>(in), | ||
reinterpret_cast<uint32_t*>(out), batch_size, BIT_WIDTH); | ||
#else | ||
#error "Not supported instruction set" | ||
#endif | ||
|
||
DCHECK(num_unpacked == batch_size); | ||
in += num_unpacked * BIT_WIDTH / byte_width; | ||
} else if constexpr (sizeof(OutType) == 8 && BIT_WIDTH > 32) { | ||
// Use unpack64 only if BIT_WIDTH is larger than 32 | ||
// TODO (ARROW-13677): improve the performance of internal::unpack64 | ||
// and remove the restriction of BIT_WIDTH | ||
int num_unpacked = arrow::internal::unpack64(in, reinterpret_cast<uint64_t*>(out), batch_size, BIT_WIDTH); | ||
DCHECK(num_unpacked == batch_size); | ||
in += num_unpacked * BIT_WIDTH / byte_width; | ||
} else { | ||
// TODO: revisit this limit if necessary | ||
DCHECK_LE(BIT_WIDTH, 32); | ||
const int buffer_size = 32; | ||
uint32_t unpack_buffer[buffer_size]; | ||
|
||
#if defined(__AVX2__) | ||
int num_unpacked = arrow::internal::unpack32_avx2(reinterpret_cast<const uint32_t*>(in), unpack_buffer, | ||
batch_size, BIT_WIDTH); | ||
#elif defined(__ARM_NEON) | ||
int num_unpacked = arrow::internal::unpack32_neon(reinterpret_cast<const uint32_t*>(in), unpack_buffer, | ||
batch_size, BIT_WIDTH); | ||
#else | ||
#error "Not supported instruction set" | ||
#endif | ||
DCHECK(num_unpacked == batch_size); | ||
for (int k = 0; k < num_unpacked; ++k) { | ||
out[k] = static_cast<OutType>(unpack_buffer[k]); | ||
} | ||
in += num_unpacked * BIT_WIDTH / byte_width; | ||
} | ||
return in; | ||
} | ||
}; | ||
|
||
} // namespace starrocks |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters