Skip to content

Commit

Permalink
fix 10
Browse files Browse the repository at this point in the history
  • Loading branch information
eldenmoon committed Dec 24, 2024
1 parent b3aa843 commit 3c60280
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 12 deletions.
8 changes: 4 additions & 4 deletions be/src/olap/rowset/segment_v2/column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,11 +331,11 @@ Status VariantColumnReader::init(const ColumnReaderOptions& opts, const SegmentF
if (self_column_pb.has_variant_statistics()) {
_statistics = std::make_unique<VariantStatistics>();
const auto& variant_stats = self_column_pb.variant_statistics();
for (const auto& [path, _] : variant_stats.sparse_column_non_null_size()) {
_statistics->sparse_column_non_null_size.emplace(path.data(), path.size());
for (const auto& [path, size] : variant_stats.sparse_column_non_null_size()) {
_statistics->sparse_column_non_null_size.emplace(path, size);
}
for (const auto& [path, _] : variant_stats.subcolumn_non_null_size()) {
_statistics->subcolumns_non_null_size.emplace(path.data(), path.size());
for (const auto& [path, size] : variant_stats.subcolumn_non_null_size()) {
_statistics->subcolumns_non_null_size.emplace(path, size);
}
}
return Status::OK();
Expand Down
18 changes: 18 additions & 0 deletions be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -353,8 +353,26 @@ Status HierarchicalDataReader::_process_sparse_column(vectorized::ColumnObject&
auto sub_path = get_sub_path(path, path_prefix);
sparse_data_paths->insert_data(sub_path.data(), sub_path.size());
sparse_data_values->insert_from(src_sparse_data_values, lower_bound_index);
} else {
// insert into root column, example: access v['b'] and b is in sparse column
// data example:
// {"b" : 123}
// {"b" : {"c" : 456}}
// b maybe in sparse column, and b.c is in subolumn, put `b` into root column to distinguish
// from "" which is empty path and root
if (container_variant.is_null_root()) {
container_variant.add_sub_column({}, sparse_data_offsets.size());
}
const auto& data = ColumnObject::deserialize_from_sparse_column(
&src_sparse_data_values, lower_bound_index);
container_variant.get_subcolumn({})->insert(data.first, data.second);
}
}
// if root was created, and not seen in sparse data, insert default
if (!container_variant.is_null_root() &&
container_variant.get_subcolumn({})->size() == sparse_data_offsets.size()) {
container_variant.get_subcolumn({})->insert_default();
}
sparse_data_offsets.push_back(sparse_data_paths->size());
}
}
Expand Down
2 changes: 2 additions & 0 deletions be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ Status VariantColumnWriterImpl::_get_subcolumn_paths_from_stats(std::set<std::st
// reserve 1 for root column
for (const auto& [size, path] : paths_with_sizes) {
if (paths.size() < vectorized::ColumnObject::MAX_SUBCOLUMNS - 1) {
VLOG_DEBUG << "pick " << path << " as subcolumn";
paths.emplace(path);
}
// // todo : Add all remaining paths into shared data statistics until we reach its max size;
Expand All @@ -120,6 +121,7 @@ Status VariantColumnWriterImpl::_get_subcolumn_paths_from_stats(std::set<std::st
} else {
// Use all dynamic paths from all source columns.
for (const auto& [path, _] : path_to_total_number_of_non_null_values) {
VLOG_DEBUG << "pick " << path << " as subcolumn";
paths.emplace(path);
}
}
Expand Down
14 changes: 14 additions & 0 deletions be/src/vec/columns/column_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1308,6 +1308,7 @@ void ColumnObject::insert_range_from(const IColumn& src, size_t start, size_t le
auto it = src_path_and_subcoumn_for_sparse_column.begin();
auto end = src_path_and_subcoumn_for_sparse_column.end();
while (it != end) {
VLOG_DEBUG << "pick " << it->first << " as sparse column";
sorted_src_subcolumn_for_sparse_column.emplace_back(it->first, it->second);
++it;
}
Expand Down Expand Up @@ -1967,6 +1968,10 @@ Status ColumnObject::finalize(FinalizeMode mode) {

// 3. pick MAX_SUBCOLUMNS selected subcolumns
for (size_t i = 0; i < std::min(MAX_SUBCOLUMNS, sorted_by_size.size()); ++i) {
// if too many null values, then consider it as sparse column
if (sorted_by_size[i].second < num_rows * 0.95) {
continue;
}
selected_path.insert(sorted_by_size[i].first);
}
std::map<std::string_view, Subcolumn> remaing_subcolumns;
Expand All @@ -1975,6 +1980,7 @@ Status ColumnObject::finalize(FinalizeMode mode) {
if (selected_path.find(entry->path.get_path()) != selected_path.end()) {
new_subcolumns.add(entry->path, entry->data);
} else {
VLOG_DEBUG << "pick " << entry->path.get_path() << " as sparse column";
remaing_subcolumns.emplace(entry->path.get_path(), entry->data);
}
}
Expand Down Expand Up @@ -2143,7 +2149,15 @@ const DataTypePtr ColumnObject::NESTED_TYPE = std::make_shared<vectorized::DataT
std::make_shared<vectorized::DataTypeArray>(std::make_shared<vectorized::DataTypeNullable>(
std::make_shared<vectorized::DataTypeObject>())));

// const size_t ColumnObject::MAX_SUBCOLUMNS = 5;
#ifndef NDEBUG
const size_t ColumnObject::MAX_SUBCOLUMNS = []() -> size_t {
std::srand(std::time(nullptr)); // 初始化随机数种子
return 1 + std::rand() % 10; // 随机值范围 [1, 10]
}();
#else
const size_t ColumnObject::MAX_SUBCOLUMNS = 5;
#endif

DataTypePtr ColumnObject::get_root_type() const {
return subcolumns.get_root()->data.get_least_common_type();
Expand Down
16 changes: 8 additions & 8 deletions regression-test/data/variant_p0/compaction/test_compaction.out
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
3 {"x":[3]}
4 {"y":1}
4 {"y":1}
5 {"z":2.0}
5 {"z":2.0}
5 {"z":2}
5 {"z":2}
6 {"x":111}
6 {"x":111}
7 {"m":1}
Expand Down Expand Up @@ -96,8 +96,8 @@
3 {"x":[3]}
4 {"y":1}
4 {"y":1}
5 {"z":2.0}
5 {"z":2.0}
5 {"z":2}
5 {"z":2}
6 {"x":111}
6 {"x":111}
7 {"m":1}
Expand Down Expand Up @@ -180,7 +180,7 @@
2 {"a":"1"}
3 {"x":[3]}
4 {"y":1}
5 {"z":2.0}
5 {"z":2}
6 {"x":111}
7 {"m":1}
8 {"l":2}
Expand Down Expand Up @@ -233,7 +233,7 @@
2 {"a":"1"}
3 {"x":[3]}
4 {"y":1}
5 {"z":2.0}
5 {"z":2}
6 {"x":111}
7 {"m":1}
8 {"l":2}
Expand Down Expand Up @@ -284,7 +284,7 @@
2 {"a":"1"}
3 {"x":[3]}
4 {"y":1}
5 {"z":2.0}
5 {"z":2}
6 {"x":111}
7 {"m":1}
8 {"l":2}
Expand Down Expand Up @@ -337,7 +337,7 @@
2 {"a":"1"}
3 {"x":[3]}
4 {"y":1}
5 {"z":2.0}
5 {"z":2}
6 {"x":111}
7 {"m":1}
8 {"l":2}
Expand Down

0 comments on commit 3c60280

Please sign in to comment.