Skip to content

Commit

Permalink
refine
Browse files Browse the repository at this point in the history
Signed-off-by: guo-shaoge <[email protected]>
  • Loading branch information
guo-shaoge committed Jan 26, 2025
1 parent d9aeae7 commit eb01d9e
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 119 deletions.
32 changes: 28 additions & 4 deletions dbms/src/Columns/ColumnArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,17 +266,41 @@ void ColumnArray::serializeToPosForCmp(
String * sort_key_container) const
{
if (nullmap != nullptr)
serializeToPosImpl</*has_null=*/false, /*compare_semantics=*/true, /*has_nullmap=*/true>(pos, start, length, collator, sort_key_container, nullmap);
serializeToPosImpl</*has_null=*/false, /*compare_semantics=*/true, /*has_nullmap=*/true>(
pos,
start,
length,
collator,
sort_key_container,
nullmap);
else
serializeToPosImpl</*has_null=*/false, /*compare_semantics=*/true, /*has_nullmap=*/false>(pos, start, length, collator, sort_key_container, nullptr);
serializeToPosImpl</*has_null=*/false, /*compare_semantics=*/true, /*has_nullmap=*/false>(
pos,
start,
length,
collator,
sort_key_container,
nullptr);
}

void ColumnArray::serializeToPos(PaddedPODArray<char *> & pos, size_t start, size_t length, bool has_null) const
{
if (has_null)
serializeToPosImpl</*has_null=*/true, /*compare_semantics=*/false, /*has_nullmap=*/false>(pos, start, length, nullptr, nullptr, nullptr);
serializeToPosImpl</*has_null=*/true, /*compare_semantics=*/false, /*has_nullmap=*/false>(
pos,
start,
length,
nullptr,
nullptr,
nullptr);
else
serializeToPosImpl</*has_null=*/false, /*compare_semantics=*/false, /*has_nullmap=*/false>(pos, start, length, nullptr, nullptr, nullptr);
serializeToPosImpl</*has_null=*/false, /*compare_semantics=*/false, /*has_nullmap=*/false>(
pos,
start,
length,
nullptr,
nullptr,
nullptr);
}

template <bool has_null, bool compare_semantics, bool has_nullmap>
Expand Down
8 changes: 4 additions & 4 deletions dbms/src/Columns/ColumnDecimal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ void ColumnDecimal<T>::serializeToPosForColumnArrayImpl(
size());

static_assert(!(has_null && has_nullmap));
assert(!has_nullmap || (nullmap && nullmap->size() == array_offsets.back()));
RUNTIME_CHECK(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size()));

for (size_t i = 0; i < length; ++i)
{
Expand All @@ -283,9 +283,9 @@ void ColumnDecimal<T>::serializeToPosForColumnArrayImpl(
{
for (size_t j = 0; j < len; ++j)
tiflash_compiler_builtin_memcpy(
pos[i] + j * sizeof(T),
&data[array_offsets[start + i - 1] + j],
sizeof(T));
pos[i] + j * sizeof(T),
&data[array_offsets[start + i - 1] + j],
sizeof(T));
}
else
{
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Columns/ColumnFixedString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ void ColumnFixedString::serializeToPosForColumnArrayImpl(
size());

static_assert(!(has_null && has_nullmap));
assert(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size()));
RUNTIME_CHECK(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size()));

for (size_t i = 0; i < length; ++i)
{
Expand Down
104 changes: 16 additions & 88 deletions dbms/src/Columns/ColumnNullable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,22 +319,10 @@ void ColumnNullable::serializeToPosForCmp(
const TiDB::TiDBCollatorPtr & collator,
String * sort_key_container) const
{
if unlikely (nullmap != nullptr)
{
// This code path is not efficient, because of the temporary `new_nullmap_col`.
// But only got this code path when the column is like ColumnNullable(ColumnTuple(ColumnNullable)),
// which is rare for TiFlash, because ColumnTuple is not used for now.
auto new_nullmap_col = ColumnUInt8::create();
DB::mergeNullMap(start, length, *nullmap, getNullMapData(), new_nullmap_col->getData());
new_nullmap_col->serializeToPosForCmp(pos, start, length, nullptr, collator, sort_key_container);
getNestedColumn()
.serializeToPosForCmp(pos, start, length, &(new_nullmap_col->getData()), collator, sort_key_container);
}
else
{
getNullMapColumn().serializeToPosForCmp(pos, start, length, nullptr, collator, sort_key_container);
getNestedColumn().serializeToPosForCmp(pos, start, length, &getNullMapData(), collator, sort_key_container);
}
// Nested ColumnNullable like ColumnNullable(ColumnArray(ColumnNullable(ColumnXXX))) not support.
RUNTIME_CHECK_MSG(!nullmap, "serializeToPosForCmp cannot handle nested nullable");
getNullMapColumn().serializeToPosForCmp(pos, start, length, nullptr, collator, sort_key_container);
getNestedColumn().serializeToPosForCmp(pos, start, length, &getNullMapData(), collator, sort_key_container);
}

void ColumnNullable::serializeToPos(PaddedPODArray<char *> & pos, size_t start, size_t length, bool has_null) const
Expand All @@ -344,78 +332,18 @@ void ColumnNullable::serializeToPos(PaddedPODArray<char *> & pos, size_t start,
}

void ColumnNullable::serializeToPosForCmpColumnArray(
PaddedPODArray<char *> & pos,
size_t start,
size_t length,
const NullMap * nullmap,
const IColumn::Offsets & array_offsets,
const TiDB::TiDBCollatorPtr & collator,
String * sort_key_container) const
{
const auto & nested_nullmap = getNullMapData();
RUNTIME_CHECK(nested_nullmap.size() == array_offsets.back());
const auto nested_start = array_offsets[start - 1];
const auto nested_length = array_offsets[start + length - 1] - array_offsets[start - 1];

if (nullmap != nullptr)
{
// Got this code path when the column is like ColumnNullable(ColumnArray(ColumnNullable)),
RUNTIME_CHECK(nullmap->size() == array_offsets.size());
auto new_nullmap_col = ColumnUInt8::create();
auto & new_nullmap_data = new_nullmap_col->getData();
new_nullmap_data.assign(nested_nullmap);
for (size_t i = start; i < start + length; ++i)
{
if (DB::isNullAt(*nullmap, i))
{
const auto row_size = array_offsets[i] - array_offsets[i - 1];
const auto row_offset = array_offsets[i - 1];
for (size_t j = row_offset; j < row_offset + row_size; ++j)
setNullAt(new_nullmap_data, j);
}
}
// new_nullmap_col
// ->serializeToPosForCmpColumnArray(pos, start, length, nullptr, array_offsets, collator, sort_key_container);
// getNestedColumn().serializeToPosForCmpColumnArray(
// pos,
// start,
// length,
// &new_nullmap_data,
// array_offsets,
// collator,
// sort_key_container);
new_nullmap_col
->serializeToPosForCmp(pos, nested_start, nested_length, nullptr, collator, sort_key_container);
getNestedColumn().serializeToPosForCmp(
pos,
nested_start,
nested_length,
&new_nullmap_data,
collator,
sort_key_container);
}
else
{
// getNullMapColumn()
// .serializeToPosForCmpColumnArray(pos, start, length, nullptr, array_offsets, collator, sort_key_container);
// getNestedColumn().serializeToPosForCmpColumnArray(
// pos,
// start,
// length,
// &getNullMapData(),
// array_offsets,
// collator,
// sort_key_container);
getNullMapColumn()
.serializeToPosForCmp(pos, nested_start, nested_length, nullptr, collator, sort_key_container);
getNestedColumn().serializeToPosForCmp(
pos,
nested_start,
nested_length,
&getNullMapData(),
collator,
sort_key_container);
}
PaddedPODArray<char *> & /* pos */,
size_t /* start */,
size_t /* length */,
const NullMap * /* nullmap */,
const IColumn::Offsets & /* array_offsets */,
const TiDB::TiDBCollatorPtr & /* collator */,
String * /* sort_key_container */) const
{
// Doesn't support ColumnArray(ColumnNullable(ColumnXXX))
throw Exception(
"Method serializeToPosForCmpColumnArray is not supported for " + getName(),
ErrorCodes::NOT_IMPLEMENTED);
}
void ColumnNullable::serializeToPosForColumnArray(
PaddedPODArray<char *> & pos,
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Columns/ColumnString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -990,7 +990,7 @@ void ColumnString::serializeToPosForColumnArrayImpl(
array_offsets.back(),
size());

assert(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size()));
RUNTIME_CHECK(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size()));

/// countSerializeByteSizeForCmpColumnArray has already checked that the size of one element is not greater than UINT32_MAX
if constexpr (compare_semantics)
Expand Down
8 changes: 4 additions & 4 deletions dbms/src/Columns/ColumnVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ void ColumnVector<T>::serializeToPosForColumnArrayImpl(
size());

static_assert(!(has_null && has_nullmap));
assert(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size()));
RUNTIME_CHECK(!has_nullmap || (nullmap && nullmap->size() == array_offsets.size()));

for (size_t i = 0; i < length; ++i)
{
Expand All @@ -216,9 +216,9 @@ void ColumnVector<T>::serializeToPosForColumnArrayImpl(
{
for (size_t j = 0; j < len; ++j)
tiflash_compiler_builtin_memcpy(
pos[i] + j * sizeof(T),
&data[array_offsets[start + i - 1] + j],
sizeof(T));
pos[i] + j * sizeof(T),
&data[array_offsets[start + i - 1] + j],
sizeof(T));
}
else
{
Expand Down
41 changes: 24 additions & 17 deletions dbms/src/Columns/tests/gtest_column_serialize_deserialize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -618,20 +618,25 @@ try
testSerializeAndDeserialize(col_nullable_array_vec);
testSerializeAndDeserialize(col_nullable_array_vec, true, nullptr, nullptr);

// ColumnNullable(ColumnArray(ColumnNullable(ColumnString)))
auto col_offsets_1 = createColumn<IColumn::Offset>({1, 3, 6}).column;
auto col_array_string = ColumnArray::create(col_nullable_string, col_offsets_1);
auto col_nullable_array_string = ColumnNullable::create(col_array_string, createColumn<UInt8>({0, 1, 0}).column);
testCountSerializeByteSize(col_nullable_array_string,
{1 + 4 + 1 + 4 + 4,
1 + 4 + 2 + 8 + 4,
1 + 4 + 3 + 12 + 7}, true, nullptr);
testSerializeAndDeserialize(col_nullable_array_string);
testSerializeAndDeserialize(col_nullable_array_string, true, collator_utf8_bin, &sort_key_container);
testSerializeAndDeserialize(col_nullable_array_string, true, collator_utf8_general_ci, &sort_key_container);
testSerializeAndDeserialize(col_nullable_array_string, true, collator_utf8_unicode_ci, &sort_key_container);

// ColumnNullable(ColumnTuple(ColumnNullable(ColumnString)))
// ColumnNullable(ColumnArray(ColumnString))
auto col_string = createColumn<String>({"123", "2", "34", "456", "5678", "6"}).column;
auto col_array_string = ColumnArray::create(col_vector, col_offsets);
auto col_nullable_array_string = ColumnNullable::create(col_array_vec, createColumn<UInt8>({1, 0, 1}).column);
testSerializeAndDeserialize(col_nullable_array_vec);
testSerializeAndDeserialize(col_nullable_array_vec, true, nullptr, nullptr);

// Nested ColumnNullable like ColumnNullable(ColumnArray(ColumnNullable(ColumnString))) not support.
// auto col_offsets_1 = createColumn<IColumn::Offset>({1, 3, 6}).column;
// auto col_array_string = ColumnArray::create(col_nullable_string, col_offsets_1);
// auto col_nullable_array_string = ColumnNullable::create(col_array_string, createColumn<UInt8>({0, 1, 0}).column);
// testCountSerializeByteSize(col_nullable_array_string,
// {1 + 4 + 1 + 4 + 4,
// 1 + 4 + 2 + 8 + 4,
// 1 + 4 + 3 + 12 + 7}, true, nullptr);
// testSerializeAndDeserialize(col_nullable_array_string);
// testSerializeAndDeserialize(col_nullable_array_string, true, collator_utf8_bin, &sort_key_container);
// testSerializeAndDeserialize(col_nullable_array_string, true, collator_utf8_general_ci, &sort_key_container);
// testSerializeAndDeserialize(col_nullable_array_string, true, collator_utf8_unicode_ci, &sort_key_container);
}
CATCH

Expand Down Expand Up @@ -667,9 +672,10 @@ try
auto col_array_nullable_string = ColumnArray::create(col_nullable_string, col_offsets);
testCountSerializeByteSize(col_array_nullable_string, {4 + 5 + 4, 4 + 10 + 4, 4 + 15 + 7});
testSerializeAndDeserialize(col_array_nullable_string);
testSerializeAndDeserialize(col_array_nullable_string, true, collator_utf8_bin, &sort_key_container);
testSerializeAndDeserialize(col_array_nullable_string, true, collator_utf8_general_ci, &sort_key_container);
testSerializeAndDeserialize(col_array_nullable_string, true, collator_utf8_unicode_ci, &sort_key_container);
// compare semantics not support ColumnArray(ColumnNullable(ColumnString)).
// testSerializeAndDeserialize(col_array_nullable_string, true, collator_utf8_bin, &sort_key_container);
// testSerializeAndDeserialize(col_array_nullable_string, true, collator_utf8_general_ci, &sort_key_container);
// testSerializeAndDeserialize(col_array_nullable_string, true, collator_utf8_unicode_ci, &sort_key_container);

// ColumnArray(ColumnDecimal)
auto col_decimal_256 = createColumn<Decimal256>(
Expand Down Expand Up @@ -820,6 +826,7 @@ try
true,
collator_utf8_unicode_ci);

// ColumnString
String sort_key_container;
testSerializeAndDeserialize(col_string, true, collator_utf8_bin, &sort_key_container);
testSerializeAndDeserialize(col_string, true, collator_utf8_general_ci, &sort_key_container);
Expand Down

0 comments on commit eb01d9e

Please sign in to comment.