Skip to content

Commit

Permalink
[fix](cluster key) fix some cluster key problems (#43574)
Browse files Browse the repository at this point in the history
  • Loading branch information
mymeiyi authored Nov 19, 2024
1 parent 8be13b0 commit cd94cf1
Show file tree
Hide file tree
Showing 29 changed files with 11,681 additions and 21 deletions.
4 changes: 4 additions & 0 deletions be/src/olap/base_tablet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1566,6 +1566,10 @@ Status BaseTablet::check_rowid_conversion(
VLOG_DEBUG << "check_rowid_conversion, location_map is empty";
return Status::OK();
}
if (!tablet_schema()->cluster_key_idxes().empty()) {
VLOG_DEBUG << "skip check_rowid_conversion for mow tables with cluster keys";
return Status::OK();
}
std::vector<segment_v2::SegmentSharedPtr> dst_segments;

RETURN_IF_ERROR(
Expand Down
6 changes: 5 additions & 1 deletion be/src/olap/memtable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -385,8 +385,12 @@ Status MemTable::_sort_by_cluster_keys() {
for (int i = 0; i < row_in_blocks.size(); i++) {
row_pos_vec.emplace_back(row_in_blocks[i]->_row_pos);
}
std::vector<int> column_offset;
for (int i = 0; i < _column_offset.size(); ++i) {
column_offset.emplace_back(i);
}
return _output_mutable_block.add_rows(&in_block, row_pos_vec.data(),
row_pos_vec.data() + in_block.rows(), &_column_offset);
row_pos_vec.data() + in_block.rows(), &column_offset);
}

void MemTable::_sort_one_column(std::vector<RowInBlock*>& row_in_blocks, Tie& tie,
Expand Down
5 changes: 4 additions & 1 deletion be/src/olap/merger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,10 @@ Status Merger::vertical_compact_one_group(
}

reader_params.tablet_schema = merge_tablet_schema;
bool has_cluster_key = false;
if (!tablet->tablet_schema()->cluster_key_idxes().empty()) {
reader_params.delete_bitmap = &tablet->tablet_meta()->delete_bitmap();
has_cluster_key = true;
}

if (is_key && stats_output && stats_output->rowid_conversion) {
Expand Down Expand Up @@ -290,7 +292,8 @@ Status Merger::vertical_compact_one_group(
"failed to read next block when merging rowsets of tablet " +
std::to_string(tablet->tablet_id()));
RETURN_NOT_OK_STATUS_WITH_WARN(
dst_rowset_writer->add_columns(&block, column_group, is_key, max_rows_per_segment),
dst_rowset_writer->add_columns(&block, column_group, is_key, max_rows_per_segment,
has_cluster_key),
"failed to write block when merging rowsets of tablet " +
std::to_string(tablet->tablet_id()));

Expand Down
3 changes: 3 additions & 0 deletions be/src/olap/primary_key_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ Status PrimaryKeyIndexBuilder::add_item(const Slice& key) {
if (UNLIKELY(_num_rows == 0)) {
_min_key.append(key.get_data(), key.get_size());
}
DCHECK(key.compare(_max_key) > 0)
<< "found duplicate key or key is not sorted! current key: " << key
<< ", last max key: " << _max_key;
_max_key.clear();
_max_key.append(key.get_data(), key.get_size());
_num_rows++;
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/rowset_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class RowsetWriter {
"RowsetWriter not support add_block");
}
virtual Status add_columns(const vectorized::Block* block, const std::vector<uint32_t>& col_ids,
bool is_key, uint32_t max_rows_per_segment) {
bool is_key, uint32_t max_rows_per_segment, bool has_cluster_key) {
return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>(
"RowsetWriter not support add_columns");
}
Expand Down
3 changes: 2 additions & 1 deletion be/src/olap/rowset/segment_creator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ Status SegmentFlusher::close() {
bool SegmentFlusher::need_buffering() {
// buffering variants for schema change
return _context.write_type == DataWriteType::TYPE_SCHEMA_CHANGE &&
_context.tablet_schema->num_variant_columns() > 0;
(_context.tablet_schema->num_variant_columns() > 0 ||
!_context.tablet_schema->cluster_key_idxes().empty());
}

Status SegmentFlusher::_add_rows(std::unique_ptr<segment_v2::SegmentWriter>& segment_writer,
Expand Down
8 changes: 6 additions & 2 deletions be/src/olap/rowset/vertical_beta_rowset_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ template <class T>
requires std::is_base_of_v<BaseBetaRowsetWriter, T>
Status VerticalBetaRowsetWriter<T>::add_columns(const vectorized::Block* block,
const std::vector<uint32_t>& col_ids, bool is_key,
uint32_t max_rows_per_segment) {
uint32_t max_rows_per_segment,
bool has_cluster_key) {
auto& context = this->_context;

VLOG_NOTICE << "VerticalBetaRowsetWriter::add_columns, columns: " << block->columns();
Expand All @@ -71,7 +72,10 @@ Status VerticalBetaRowsetWriter<T>::add_columns(const vectorized::Block* block,
_cur_writer_idx = 0;
RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, 0, num_rows));
} else if (is_key) {
if (_segment_writers[_cur_writer_idx]->num_rows_written() > max_rows_per_segment) {
// TODO for cluster key, always create new segment writer because the primary keys are
// sorted in SegmentWriter::_generate_primary_key_index, will cause too many segments
if (_segment_writers[_cur_writer_idx]->num_rows_written() > max_rows_per_segment ||
has_cluster_key) {
// segment is full, need flush columns and create new segment writer
RETURN_IF_ERROR(_flush_columns(_segment_writers[_cur_writer_idx].get(), true));

Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/vertical_beta_rowset_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class VerticalBetaRowsetWriter final : public T {
~VerticalBetaRowsetWriter() override = default;

Status add_columns(const vectorized::Block* block, const std::vector<uint32_t>& col_ids,
bool is_key, uint32_t max_rows_per_segment) override;
bool is_key, uint32_t max_rows_per_segment, bool has_cluster_key) override;

// flush last segment's column
Status flush_columns(bool is_key) override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,10 @@ public class Config extends ConfigBase {
"Default timeout for insert load job, in seconds."})
public static int insert_load_default_timeout_second = 14400; // 4 hour

@ConfField(mutable = true, masterOnly = true, description = {"对mow表随机设置cluster keys,用于测试",
"random set cluster keys for mow table for test"})
public static boolean random_add_cluster_keys_for_mow = false;

@ConfField(mutable = true, masterOnly = true, description = {
"等内部攒批真正写入完成才返回;insert into和stream load默认开启攒批",
"Wait for the internal batch to be written before returning; "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import org.apache.doris.catalog.KeysType;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Config;
import org.apache.doris.common.io.Text;
import org.apache.doris.common.io.Writable;

Expand Down Expand Up @@ -138,7 +139,7 @@ private void analyzeClusterKeys(List<ColumnDef> cols) throws AnalysisException {
break;
}
}
if (sameKey) {
if (sameKey && !Config.random_add_cluster_keys_for_mow) {
throw new AnalysisException("Unique keys and cluster keys should be different.");
}
// check that cluster key column exists
Expand Down
4 changes: 2 additions & 2 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
Original file line number Diff line number Diff line change
Expand Up @@ -4691,13 +4691,13 @@ public static short calcShortKeyColumnCount(List<Column> columns, Map<String, St

if (clusterColumns.size() > 0 && shortKeyColumnCount < clusterColumns.size()) {
boolean sameKey = true;
for (int i = 0; i < shortKeyColumnCount; i++) {
for (int i = 0; i < shortKeyColumnCount && i < indexColumns.size(); i++) {
if (!clusterColumns.get(i).getName().equals(indexColumns.get(i).getName())) {
sameKey = false;
break;
}
}
if (sameKey) {
if (sameKey && !Config.random_add_cluster_keys_for_mow) {
throw new DdlException(shortKeyColumnCount + " short keys is a part of unique keys");
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,17 @@
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Random;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
Expand All @@ -99,7 +103,6 @@
* table info in creating table.
*/
public class CreateTableInfo {

public static final String ENGINE_OLAP = "olap";
public static final String ENGINE_JDBC = "jdbc";
public static final String ENGINE_ELASTICSEARCH = "elasticsearch";
Expand All @@ -111,6 +114,8 @@ public class CreateTableInfo {
private static final ImmutableSet<AggregateType> GENERATED_COLUMN_ALLOW_AGG_TYPE =
ImmutableSet.of(AggregateType.REPLACE, AggregateType.REPLACE_IF_NOT_NULL);

private static final Logger LOG = LogManager.getLogger(CreateTableInfo.class);

private final boolean ifNotExists;
private String ctlName;
private String dbName;
Expand Down Expand Up @@ -422,6 +427,35 @@ public void validate(ConnectContext ctx) {
}
}

try {
if (Config.random_add_cluster_keys_for_mow && isEnableMergeOnWrite && clusterKeysColumnNames.isEmpty()
&& PropertyAnalyzer.analyzeUseLightSchemaChange(new HashMap<>(properties))) {
// exclude columns whose data type can not be cluster key, see {@link ColumnDefinition#validate}
List<ColumnDefinition> clusterKeysCandidates = columns.stream().filter(c -> {
DataType type = c.getType();
return !(type.isFloatLikeType() || type.isStringType() || type.isArrayType()
|| type.isBitmapType() || type.isHllType() || type.isQuantileStateType()
|| type.isJsonType()
|| type.isVariantType()
|| type.isMapType()
|| type.isStructType());
}).collect(Collectors.toList());
if (clusterKeysCandidates.size() > 0) {
clusterKeysColumnNames = new ArrayList<>();
Random random = new Random();
int randomClusterKeysCount = random.nextInt(clusterKeysCandidates.size()) + 1;
Collections.shuffle(clusterKeysCandidates);
for (int i = 0; i < randomClusterKeysCount; i++) {
clusterKeysColumnNames.add(clusterKeysCandidates.get(i).getName());
}
LOG.info("Randomly add cluster keys for table {}.{}: {}",
dbName, tableName, clusterKeysColumnNames);
}
}
} catch (Exception e) {
throw new AnalysisException(e.getMessage(), e.getCause());
}

validateKeyColumns();
if (!clusterKeysColumnNames.isEmpty()) {
if (!isEnableMergeOnWrite) {
Expand Down Expand Up @@ -830,7 +864,7 @@ private void validateKeyColumns() {
break;
}
}
if (sameKey) {
if (sameKey && !Config.random_add_cluster_keys_for_mow) {
throw new AnalysisException("Unique keys and cluster keys should be different.");
}
// check that cluster key column exists
Expand Down
40 changes: 40 additions & 0 deletions regression-test/data/point_query_p0/load_ck.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
-2147475406 true 45 23794 -11023 915989078 2115356192 15927.068 1.392557423391501E9 45951348783208518.810 8340516346665031.310 2022-01-26 2022-04-13T11:13:48 2022-01-31 2022-02-16T06:07:21 130.50.6.0 [email protected] Londonderry Alley 61
-2147424303 false -28 -5177 -1409 149417728 553396597 -10123.558 -1.268722910924068E9 67354830622005524.848 52407243294991364.348 2022-06-29 2022-05-06T09:30:02 2023-01-09 2022-03-12T14:26 109.50.92.119 [email protected] Heath Drive 38
-2147413967 true -75 30533 -5435 -727385447 32929830 9577.564 1.334766997510087E9 39973144022098028.800 5886463393340733.108 2022-06-23 2022-05-10T19:13:50 2022-01-17 2022-11-26T22:49:36 157.38.90.25 [email protected] Loeprich Crossing 43
-2147380173 true -79 -5785 9752 1851350218 1121852298 25652.402 -1.618061059513558E9 95821873014545736.897 38923569966532828.626 2022-10-30 2022-05-02T17:06:33 2022-08-11 2022-02-08T10:19:47 217.198.98.239 [email protected] Lawn Lane 78
-2147374459 false -118 -30267 -14606 262497842 -1811881704 8211.805 2.37851933046663E8 37354136531251060.755 63024710145324035.720 2022-10-11 2022-01-17T10:20:18 2022-04-12 2022-10-24T18:14:38 16.243.195.81 [email protected] Annamark Pass 72
-2147369329 false -121 -22859 4733 -378861997 385323541 -22969.846 1.483825622420542E9 50940877800950041.950 87108729227937387.294 2022-06-05 2022-08-18T05:39:56 2022-08-21 2022-12-12T08:43:59 16.27.107.167 [email protected] Village Green Terrace 55
-2147367329 true 84 21471 -29331 1823545950 1200800855 -13832.219 8.01505090724918E8 45495296019797580.477 45196001436348967.557 2022-02-17 2022-05-23T01:44:31 2022-08-01 2022-08-16T10:32:36 84.110.209.128 [email protected] Packers Street 34
-2147339287 true 62 28989 -32018 650184880 -365849435 -21644.414 -7.8648426469503E7 92593387160450273.870 39588697152489527.185 2022-07-23 2023-01-03T11:54:35 2022-08-02 2022-05-19T18:35:36 30.194.6.115 [email protected] Basil Street 79
-2147336695 false 42 -7202 27919 1898713395 1177326785 -302.0104 -1.268944460183375E9 61604656210729534.717 6683002058708470.832 2022-08-20 2022-08-14T01:41:12 2022-11-02 2022-05-15T04:22:07 36.86.77.214 [email protected] Briar Crest Crossing 37
-2147330925 false -122 -21211 -2331 1906695924 -1342280417 5545.3013 -1.286038914681617E9 31911132334645267.930 84364209624711210.131 2022-02-16 2022-03-11T12:05:33 2022-11-24 2022-12-17T19:56:16 6.87.14.74 [email protected] Forest Run Terrace 13

-- !sql --
-2145739104 true 10 -22603 6132 -984517723 138439036 8683.904 1.681202635040786E9 49683339998558535.395 38251259739648714.297 2022-04-26 2022-09-12T00:32:18 2022-11-20 2023-01-09T16:19:06 180.215.212.86 [email protected] Darwin Center 26
-2140012242 false 10 30893 -16192 -175522451 -1382546546 21324.643 2.017216342012696E9 41477187479096470.647 25445001389089818.791 2022-11-06 2022-09-02T12:04:05 2022-05-29 2022-02-04T22:21:46 24.25.69.81 [email protected] Jay Way 9
-2130269306 false 10 30342 -18732 1461226453 -1257020753 -10751.815 3.44246067782915E8 2456538047280540.838 37394928326629689.946 2022-11-28 2022-05-04T20:40:19 2022-08-25 2022-03-18T10:17:35 179.198.200.96 [email protected] Tennyson Street 83
-2122709724 true 10 -8985 -30620 -1375603501 631094037 14711.055 -1.210030062083139E9 96220820029888063.156 42161382030214480.728 2022-05-28 2023-01-03T20:44:27 2022-06-11 2022-07-26T22:49:22 13.249.135.222 [email protected] Riverside Parkway 72
-2117749737 false 10 26335 30644 1841596444 283308539 18848.148 3.5339747538014E8 11924963560520504.166 28287350935413049.601 2022-08-01 2022-04-21T02:28:54 2022-02-27 2022-09-02T17:11:17 183.108.102.1 [email protected] Maple Wood Street 40
-2113239713 false 10 27624 31311 711781944 -1838033894 -12299.482 -1.88263132184351E9 9480201396831049.605 52114965946122870.302 2022-06-11 2022-08-31T08:54:30 2022-03-26 2023-01-08T23:28:27 200.161.156.176 [email protected] Westport Drive 82
-2107773486 false 10 27096 10368 1579374450 1370327646 -15339.031 2.110010890135424E9 54514853031265543.378 38546969634312019.180 2022-12-31 2022-10-07T10:18:27 2022-10-01 2022-07-09T11:41:11 121.120.227.53 [email protected] Sugar Crossing 43
-2107242025 true 10 25215 26566 1292568651 -2126795906 11912.074 -2.140044503516609E9 98695561934257164.368 18845397264645075.775 2022-05-21 2022-09-24T23:00:21 2022-02-12 2022-11-24T19:17:03 141.226.90.50 [email protected] Cody Street 78
-2106969609 true 10 29572 16738 1736115820 -957295886 -13319.206 -1.333603562816737E9 91224478600376111.942 69457425159617037.453 2022-09-06 2022-05-08T19:52:36 2022-04-05 2022-08-17T19:23:31 222.79.139.99 [email protected] Oxford Alley 77
-2102307005 true 10 -23674 24613 -1810828490 -47095409 -14686.167 2.072108685694799E9 39847820962230526.125 584354832299375.156 2022-03-27 2022-02-11T13:46:06 2022-12-25 2022-11-28T09:37:49 213.146.33.250 [email protected] Eagle Crest Terrace 84

-- !sql --
26743529

-- !sql --
103

-- !sql --
2999834

-- !sql --
-2147303679 2022-12-02T04:39:45

-- !sql --
-2147303679 84525658185172942.967

Loading

0 comments on commit cd94cf1

Please sign in to comment.