Skip to content

Commit

Permalink
Merge pull request ClickHouse#69731 from Avogar/dynamic-constraints
Browse files Browse the repository at this point in the history
Don't allow Variant/Dynamic types in ORDER BY/GROUP BY/PARTITION BY/PRIMARY KEY by default
  • Loading branch information
Avogar authored Nov 7, 2024
2 parents e3c716c + 4d3dba2 commit 3efeccd
Show file tree
Hide file tree
Showing 39 changed files with 531 additions and 45 deletions.
10 changes: 6 additions & 4 deletions docs/en/sql-reference/data-types/dynamic.md
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,8 @@ The result of operator `<` for values `d1` with underlying type `T1` and `d2` wi
- If `T1 = T2 = T`, the result will be `d1.T < d2.T` (underlying values will be compared).
- If `T1 != T2`, the result will be `T1 < T2` (type names will be compared).

By default `Dynamic` type is not allowed in `GROUP BY`/`ORDER BY` keys, if you want to use it consider its special comparison rule and enable `allow_suspicious_types_in_group_by`/`allow_suspicious_types_in_order_by` settings.

Examples:
```sql
CREATE TABLE test (d Dynamic) ENGINE=Memory;
Expand All @@ -535,7 +537,7 @@ SELECT d, dynamicType(d) FROM test;
```

```sql
SELECT d, dynamicType(d) FROM test ORDER BY d;
SELECT d, dynamicType(d) FROM test ORDER BY d SETTINGS allow_suspicious_types_in_order_by=1;
```

```sql
Expand All @@ -557,7 +559,7 @@ Example:
```sql
CREATE TABLE test (d Dynamic) ENGINE=Memory;
INSERT INTO test VALUES (1::UInt32), (1::Int64), (100::UInt32), (100::Int64);
SELECT d, dynamicType(d) FROM test ORDER by d;
SELECT d, dynamicType(d) FROM test ORDER BY d SETTINGS allow_suspicious_types_in_order_by=1;
```

```text
Expand All @@ -570,7 +572,7 @@ SELECT d, dynamicType(d) FROM test ORDER by d;
```

```sql
SELECT d, dynamicType(d) FROM test GROUP by d;
SELECT d, dynamicType(d) FROM test GROUP by d SETTINGS allow_suspicious_types_in_group_by=1;
```

```text
Expand All @@ -582,7 +584,7 @@ SELECT d, dynamicType(d) FROM test GROUP by d;
└─────┴────────────────┘
```

**Note**: the described comparison rule is not applied during execution of comparison functions like `<`/`>`/`=` and others because of [special work](#using-dynamic-type-in-functions) of functions with `Dynamic` type
**Note:** the described comparison rule is not applied during execution of comparison functions like `<`/`>`/`=` and others because of [special work](#using-dynamic-type-in-functions) of functions with `Dynamic` type

## Reaching the limit in number of different data types stored inside Dynamic

Expand Down
2 changes: 2 additions & 0 deletions docs/en/sql-reference/data-types/variant.md
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,8 @@ SELECT v, variantType(v) FROM test ORDER by v;
└─────┴────────────────┘
```

**Note** by default `Variant` type is not allowed in `GROUP BY`/`ORDER BY` keys, if you want to use it consider its special comparison rule and enable `allow_suspicious_types_in_group_by`/`allow_suspicious_types_in_order_by` settings.

## JSONExtract functions with Variant

All `JSONExtract*` functions support `Variant` type:
Expand Down
58 changes: 53 additions & 5 deletions src/Analyzer/Resolve/QueryAnalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ namespace Setting
extern const SettingsBool single_join_prefer_left_table;
extern const SettingsBool transform_null_in;
extern const SettingsUInt64 use_structure_from_insertion_table_in_table_functions;
extern const SettingsBool allow_suspicious_types_in_group_by;
extern const SettingsBool allow_suspicious_types_in_order_by;
extern const SettingsBool use_concurrency_control;
}

Expand Down Expand Up @@ -4028,6 +4030,8 @@ ProjectionNames QueryAnalyzer::resolveSortNodeList(QueryTreeNodePtr & sort_node_
sort_node.getExpression() = sort_column_list_node->getNodes().front();
}

validateSortingKeyType(sort_node.getExpression()->getResultType(), scope);

size_t sort_expression_projection_names_size = sort_expression_projection_names.size();
if (sort_expression_projection_names_size != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR,
Expand Down Expand Up @@ -4142,6 +4146,26 @@ ProjectionNames QueryAnalyzer::resolveSortNodeList(QueryTreeNodePtr & sort_node_
return result_projection_names;
}

void QueryAnalyzer::validateSortingKeyType(const DataTypePtr & sorting_key_type, const IdentifierResolveScope & scope) const
{
if (scope.context->getSettingsRef()[Setting::allow_suspicious_types_in_order_by])
return;

auto check = [](const IDataType & type)
{
if (isDynamic(type) || isVariant(type))
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Data types Variant/Dynamic are not allowed in ORDER BY keys, because it can lead to unexpected results. "
"Consider using a subcolumn with a specific data type instead (for example 'column.Int64' or 'json.some.path.:Int64' if "
"its a JSON path subcolumn) or casting this column to a specific data type. "
"Set setting allow_suspicious_types_in_order_by = 1 in order to allow it");
};

check(*sorting_key_type);
sorting_key_type->forEachChild(check);
}

namespace
{

Expand Down Expand Up @@ -4181,11 +4205,12 @@ void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierR
expandTuplesInList(group_by_list);
}

if (scope.group_by_use_nulls)
for (const auto & grouping_set : query_node_typed.getGroupBy().getNodes())
{
for (const auto & grouping_set : query_node_typed.getGroupBy().getNodes())
for (const auto & group_by_elem : grouping_set->as<ListNode>()->getNodes())
{
for (const auto & group_by_elem : grouping_set->as<ListNode>()->getNodes())
validateGroupByKeyType(group_by_elem->getResultType(), scope);
if (scope.group_by_use_nulls)
scope.nullable_group_by_keys.insert(group_by_elem);
}
}
Expand All @@ -4201,14 +4226,37 @@ void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierR
auto & group_by_list = query_node_typed.getGroupBy().getNodes();
expandTuplesInList(group_by_list);

if (scope.group_by_use_nulls)
for (const auto & group_by_elem : query_node_typed.getGroupBy().getNodes())
{
for (const auto & group_by_elem : query_node_typed.getGroupBy().getNodes())
validateGroupByKeyType(group_by_elem->getResultType(), scope);
if (scope.group_by_use_nulls)
scope.nullable_group_by_keys.insert(group_by_elem);
}
}
}

/** Validate data types of GROUP BY key.
*/
void QueryAnalyzer::validateGroupByKeyType(const DataTypePtr & group_by_key_type, const IdentifierResolveScope & scope) const
{
if (scope.context->getSettingsRef()[Setting::allow_suspicious_types_in_group_by])
return;

auto check = [](const IDataType & type)
{
if (isDynamic(type) || isVariant(type))
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Data types Variant/Dynamic are not allowed in GROUP BY keys, because it can lead to unexpected results. "
"Consider using a subcolumn with a specific data type instead (for example 'column.Int64' or 'json.some.path.:Int64' if "
"its a JSON path subcolumn) or casting this column to a specific data type. "
"Set setting allow_suspicious_types_in_group_by = 1 in order to allow it");
};

check(*group_by_key_type);
group_by_key_type->forEachChild(check);
}

/** Resolve interpolate columns nodes list.
*/
void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpolate_node_list, IdentifierResolveScope & scope)
Expand Down
4 changes: 4 additions & 0 deletions src/Analyzer/Resolve/QueryAnalyzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,12 @@ class QueryAnalyzer

ProjectionNames resolveSortNodeList(QueryTreeNodePtr & sort_node_list, IdentifierResolveScope & scope);

void validateSortingKeyType(const DataTypePtr & sorting_key_type, const IdentifierResolveScope & scope) const;

void resolveGroupByNode(QueryNode & query_node_typed, IdentifierResolveScope & scope);

void validateGroupByKeyType(const DataTypePtr & group_by_key_type, const IdentifierResolveScope & scope) const;

void resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpolate_node_list, IdentifierResolveScope & scope);

void resolveWindowNodeList(QueryTreeNodePtr & window_node_list, IdentifierResolveScope & scope);
Expand Down
6 changes: 6 additions & 0 deletions src/Core/Settings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,12 @@ In CREATE TABLE statement allows specifying Variant type with similar variant ty
)", 0) \
DECLARE(Bool, allow_suspicious_primary_key, false, R"(
Allow suspicious `PRIMARY KEY`/`ORDER BY` for MergeTree (i.e. SimpleAggregateFunction).
)", 0) \
DECLARE(Bool, allow_suspicious_types_in_group_by, false, R"(
Allows or restricts using [Variant](../../sql-reference/data-types/variant.md) and [Dynamic](../../sql-reference/data-types/dynamic.md) types in GROUP BY keys.
)", 0) \
DECLARE(Bool, allow_suspicious_types_in_order_by, false, R"(
Allows or restricts using [Variant](../../sql-reference/data-types/variant.md) and [Dynamic](../../sql-reference/data-types/dynamic.md) types in ORDER BY keys.
)", 0) \
DECLARE(Bool, compile_expressions, false, R"(
Compile some scalar functions and operators to native code. Due to a bug in the LLVM compiler infrastructure, on AArch64 machines, it is known to lead to a nullptr dereference and, consequently, server crash. Do not enable this setting.
Expand Down
2 changes: 2 additions & 0 deletions src/Core/SettingsChangesHistory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
},
{"24.11",
{
{"allow_suspicious_types_in_group_by", true, false, "Don't allow Variant/Dynamic types in GROUP BY by default"},
{"allow_suspicious_types_in_order_by", true, false, "Don't allow Variant/Dynamic types in ORDER BY by default"},
{"distributed_cache_discard_connection_if_unread_data", true, true, "New setting"},
{"filesystem_cache_enable_background_download_for_metadata_files_in_packed_storage", true, true, "New setting"},
{"filesystem_cache_enable_background_download_during_fetch", true, true, "New setting"},
Expand Down
2 changes: 2 additions & 0 deletions src/Databases/enableAllExperimentalSettings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ void enableAllExperimentalSettings(ContextMutablePtr context)

context->setSetting("allow_suspicious_low_cardinality_types", 1);
context->setSetting("allow_suspicious_fixed_string_types", 1);
context->setSetting("allow_suspicious_types_in_group_by", 1);
context->setSetting("allow_suspicious_types_in_order_by", 1);
context->setSetting("allow_suspicious_indices", 1);
context->setSetting("allow_suspicious_codecs", 1);
context->setSetting("allow_hyperscan", 1);
Expand Down
58 changes: 58 additions & 0 deletions src/Interpreters/ExpressionAnalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ namespace Setting
extern const SettingsBool query_plan_aggregation_in_order;
extern const SettingsBool query_plan_read_in_order;
extern const SettingsUInt64 use_index_for_in_with_subqueries_max_values;
extern const SettingsBool allow_suspicious_types_in_group_by;
extern const SettingsBool allow_suspicious_types_in_order_by;
}


Expand All @@ -118,6 +120,7 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
extern const int UNKNOWN_IDENTIFIER;
extern const int UNKNOWN_TYPE_OF_AST_NODE;
extern const int ILLEGAL_COLUMN;
}

namespace
Expand Down Expand Up @@ -1368,13 +1371,15 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain
ExpressionActionsChain::Step & step = chain.lastStep(columns_after_join);

ASTs asts = select_query->groupBy()->children;
NameSet group_by_keys;
if (select_query->group_by_with_grouping_sets)
{
for (const auto & ast : asts)
{
for (const auto & ast_element : ast->children)
{
step.addRequiredOutput(ast_element->getColumnName());
group_by_keys.insert(ast_element->getColumnName());
getRootActions(ast_element, only_types, step.actions()->dag);
}
}
Expand All @@ -1384,10 +1389,17 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain
for (const auto & ast : asts)
{
step.addRequiredOutput(ast->getColumnName());
group_by_keys.insert(ast->getColumnName());
getRootActions(ast, only_types, step.actions()->dag);
}
}

for (const auto & result_column : step.getResultColumns())
{
if (group_by_keys.contains(result_column.name))
validateGroupByKeyType(result_column.type);
}

if (optimize_aggregation_in_order)
{
for (auto & child : asts)
Expand All @@ -1402,6 +1414,26 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain
return true;
}

void SelectQueryExpressionAnalyzer::validateGroupByKeyType(const DB::DataTypePtr & key_type) const
{
if (getContext()->getSettingsRef()[Setting::allow_suspicious_types_in_group_by])
return;

auto check = [](const IDataType & type)
{
if (isDynamic(type) || isVariant(type))
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Data types Variant/Dynamic are not allowed in GROUP BY keys, because it can lead to unexpected results. "
"Consider using a subcolumn with a specific data type instead (for example 'column.Int64' or 'json.some.path.:Int64' if "
"its a JSON path subcolumn) or casting this column to a specific data type. "
"Set setting allow_suspicious_types_in_group_by = 1 in order to allow it");
};

check(*key_type);
key_type->forEachChild(check);
}

void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types)
{
const auto * select_query = getAggregatingQuery();
Expand Down Expand Up @@ -1599,6 +1631,12 @@ ActionsAndProjectInputsFlagPtr SelectQueryExpressionAnalyzer::appendOrderBy(
with_fill = true;
}

for (const auto & result_column : step.getResultColumns())
{
if (order_by_keys.contains(result_column.name))
validateOrderByKeyType(result_column.type);
}

if (auto interpolate_list = select_query->interpolate())
{

Expand Down Expand Up @@ -1664,6 +1702,26 @@ ActionsAndProjectInputsFlagPtr SelectQueryExpressionAnalyzer::appendOrderBy(
return actions;
}

void SelectQueryExpressionAnalyzer::validateOrderByKeyType(const DataTypePtr & key_type) const
{
if (getContext()->getSettingsRef()[Setting::allow_suspicious_types_in_order_by])
return;

auto check = [](const IDataType & type)
{
if (isDynamic(type) || isVariant(type))
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Data types Variant/Dynamic are not allowed in ORDER BY keys, because it can lead to unexpected results. "
"Consider using a subcolumn with a specific data type instead (for example 'column.Int64' or 'json.some.path.:Int64' if "
"its a JSON path subcolumn) or casting this column to a specific data type. "
"Set setting allow_suspicious_types_in_order_by = 1 in order to allow it");
};

check(*key_type);
key_type->forEachChild(check);
}

bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain, bool only_types)
{
const auto * select_query = getSelectQuery();
Expand Down
2 changes: 2 additions & 0 deletions src/Interpreters/ExpressionAnalyzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,7 @@ class SelectQueryExpressionAnalyzer : public ExpressionAnalyzer
ActionsAndProjectInputsFlagPtr appendPrewhere(ExpressionActionsChain & chain, bool only_types);
bool appendWhere(ExpressionActionsChain & chain, bool only_types);
bool appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order, ManyExpressionActions &);
void validateGroupByKeyType(const DataTypePtr & key_type) const;
void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types);
void appendWindowFunctionsArguments(ExpressionActionsChain & chain, bool only_types);

Expand All @@ -408,6 +409,7 @@ class SelectQueryExpressionAnalyzer : public ExpressionAnalyzer
bool appendHaving(ExpressionActionsChain & chain, bool only_types);
/// appendSelect
ActionsAndProjectInputsFlagPtr appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, ManyExpressionActions &);
void validateOrderByKeyType(const DataTypePtr & key_type) const;
bool appendLimitBy(ExpressionActionsChain & chain, bool only_types);
/// appendProjectResult
};
Expand Down
12 changes: 12 additions & 0 deletions src/Storages/KeyDescription.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,18 @@ KeyDescription KeyDescription::getSortingKeyFromAST(
throw Exception(ErrorCodes::DATA_TYPE_CANNOT_BE_USED_IN_KEY,
"Column {} with type {} is not allowed in key expression, it's not comparable",
backQuote(result.sample_block.getByPosition(i).name), result.data_types.back()->getName());

auto check = [&](const IDataType & type)
{
if (isDynamic(type) || isVariant(type))
throw Exception(
ErrorCodes::DATA_TYPE_CANNOT_BE_USED_IN_KEY,
"Column with type Variant/Dynamic is not allowed in key expression. Consider using a subcolumn with a specific data "
"type instead (for example 'column.Int64' or 'json.some.path.:Int64' if its a JSON path subcolumn) or casting this column to a specific data type");
};

check(*result.data_types.back());
result.data_types.back()->forEachChild(check);
}

return result;
Expand Down
1 change: 1 addition & 0 deletions tests/queries/0_stateless/01825_new_type_json_10.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
-- Tags: no-fasttest

SET allow_experimental_json_type = 1;
SET allow_suspicious_types_in_order_by = 1;

DROP TABLE IF EXISTS t_json_10;
CREATE TABLE t_json_10 (o JSON) ENGINE = Memory;
Expand Down
6 changes: 3 additions & 3 deletions tests/queries/0_stateless/01825_new_type_json_11.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ $CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(obj)) as
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(obj.key_1[]))) as path FROM t_json_11 order by path;"
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(obj.key_1[].key_3[])))) as path FROM t_json_11 order by path;"
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(arrayJoin(obj.key_1[].key_3[].key_4[]))))) as path FROM t_json_11 order by path;"
$CLICKHOUSE_CLIENT -q "SELECT obj FROM t_json_11 ORDER BY obj.id FORMAT JSONEachRow"
$CLICKHOUSE_CLIENT -q "SELECT obj.key_1[].key_3 FROM t_json_11 ORDER BY obj.id FORMAT JSONEachRow"
$CLICKHOUSE_CLIENT -q "SELECT obj.key_1[].key_3[].key_4[].key_5, obj.key_1[].key_3[].key_7 FROM t_json_11 ORDER BY obj.id"
$CLICKHOUSE_CLIENT -q "SELECT obj FROM t_json_11 ORDER BY obj.id FORMAT JSONEachRow" --allow_suspicious_types_in_order_by 1
$CLICKHOUSE_CLIENT -q "SELECT obj.key_1[].key_3 FROM t_json_11 ORDER BY obj.id FORMAT JSONEachRow" --allow_suspicious_types_in_order_by 1
$CLICKHOUSE_CLIENT -q "SELECT obj.key_1[].key_3[].key_4[].key_5, obj.key_1[].key_3[].key_7 FROM t_json_11 ORDER BY obj.id" --allow_suspicious_types_in_order_by 1

$CLICKHOUSE_CLIENT -q "DROP TABLE t_json_11;"
4 changes: 2 additions & 2 deletions tests/queries/0_stateless/01825_new_type_json_12.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ $CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(obj)) as
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(obj.key_0[]))) as path FROM t_json_12 order by path;"
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(obj.key_0[].key_1[])))) as path FROM t_json_12 order by path;"
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(arrayJoin(obj.key_0[].key_1[].key_3[]))))) as path FROM t_json_12 order by path;"
$CLICKHOUSE_CLIENT -q "SELECT obj FROM t_json_12 ORDER BY obj.id FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1
$CLICKHOUSE_CLIENT -q "SELECT obj FROM t_json_12 ORDER BY obj.id FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 --allow_suspicious_types_in_order_by 1
$CLICKHOUSE_CLIENT -q "SELECT obj.key_0[].key_1[].key_3[].key_4, obj.key_0[].key_1[].key_3[].key_5, \
obj.key_0[].key_1[].key_3[].key_6, obj.key_0[].key_1[].key_3[].key_7 FROM t_json_12 ORDER BY obj.id"
obj.key_0[].key_1[].key_3[].key_6, obj.key_0[].key_1[].key_3[].key_7 FROM t_json_12 ORDER BY obj.id" --allow_suspicious_types_in_order_by 1

$CLICKHOUSE_CLIENT -q "DROP TABLE t_json_12;"
Loading

0 comments on commit 3efeccd

Please sign in to comment.