Skip to content

Commit

Permalink
support lossless schema change for enum type (#1334) (#1336)
Browse files Browse the repository at this point in the history
  • Loading branch information
windtalker authored Dec 31, 2020
1 parent d54685e commit 6665d6e
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 15 deletions.
6 changes: 6 additions & 0 deletions dbms/src/DataTypes/DataTypeEnum.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ class DataTypeEnum final : public IDataTypeEnum
return it->second;
}

bool hasElement(StringRef name) const
{
// todo consider collation
return name_to_value_map.find(name) != name_to_value_map.end();
}

FieldType getValue(StringRef name) const
{
const auto it = name_to_value_map.find(name);
Expand Down
46 changes: 34 additions & 12 deletions dbms/src/DataTypes/isSupportedDataTypeCast.cpp
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
#include <DataTypes/isSupportedDataTypeCast.h>
#include <Common/typeid_cast.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/isSupportedDataTypeCast.h>
#include <Functions/FunctionHelpers.h>

namespace DB
{

bool isSupportedDataTypeCast(const DataTypePtr &from, const DataTypePtr &to)
bool isSupportedDataTypeCast(const DataTypePtr & from, const DataTypePtr & to)
{
assert(from != nullptr && to != nullptr);
/// `to` is equal to `from`
Expand Down Expand Up @@ -96,17 +97,38 @@ bool isSupportedDataTypeCast(const DataTypePtr &from, const DataTypePtr &to)
bool to_is_decimal = IsDecimalDataType(to);
if (from_is_decimal || to_is_decimal)
{
if (from_is_decimal && to_is_decimal)
// not support change Decimal to other type, neither other type to Decimal
return false;
}
}

if (from->isEnum() && to->isEnum())
{
/// support cast Enum to Enum if the from type is a subset of the target type
const auto from_enum8 = checkAndGetDataType<DataTypeEnum8>(from.get());
const auto to_enum8 = checkAndGetDataType<DataTypeEnum8>(to.get());
if (from_enum8 && to_enum8)
{
for (auto & value : from_enum8->getValues())
{
// not support change Decimal to other type, neither other type to Decimal
return false;
if (!to_enum8->hasElement(value.first) || to_enum8->getValue(value.first) != value.second)
return false;
}

return from->equals(*to);
}
const auto from_enum16 = checkAndGetDataType<DataTypeEnum16>(from.get());
const auto to_enum16 = checkAndGetDataType<DataTypeEnum16>(to.get());
if (from_enum16 && to_enum16)
{
for (auto & value : from_enum16->getValues())
{
if (!to_enum16->hasElement(value.first) || to_enum16->getValue(value.first) != value.second)
return false;
}
}
return true;
}

// TODO enums, set?
// TODO set?

/// some DataTypes that support in ClickHouse but not in TiDB

Expand Down
43 changes: 42 additions & 1 deletion dbms/src/Storages/DeltaMerge/convertColumnTypeHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,16 @@ bool castNonNullNumericColumn(const DataTypePtr & disk_type_not_null_,
return true;
}
}
else if (checkDataType<DataTypeEnum8>(disk_type_not_null) && checkDataType<DataTypeEnum8>(read_type_not_null))
{
memory_col_not_null->insertRangeFrom(*disk_col_not_null, rows_offset, rows_limit);
return true;
}
else if (checkDataType<DataTypeEnum16>(disk_type_not_null) && checkDataType<DataTypeEnum16>(read_type_not_null))
{
memory_col_not_null->insertRangeFrom(*disk_col_not_null, rows_offset, rows_limit);
return true;
}

// else is not support
return false;
Expand Down Expand Up @@ -316,19 +326,50 @@ void convertColumnByColumnDefine(const DataTypePtr & disk_type,
}
}

std::pair<bool, bool> checkColumnTypeCompatibility(const DataTypePtr & source_type, const DataTypePtr & target_type)
{
if (unlikely(!isSupportedDataTypeCast(source_type, target_type)))
{
return std::make_pair(false, false);
}

bool need_cast_data = true;
/// Currently, cast Enum to Enum is allowed only if from_type
/// is a subset of the target_type, so when cast from source
/// enum type to target enum type, the data do not need to be
/// casted.
bool source_is_null = source_type->isNullable();
bool target_is_null = source_type->isNullable();
if (source_is_null && target_is_null)
{
need_cast_data = !(typeid_cast<const DataTypeNullable *>(source_type.get())->isEnum()
&& typeid_cast<const DataTypeNullable *>(target_type.get())->isEnum());
}
else if (!source_is_null && !target_is_null)
{
need_cast_data = !(source_type->isEnum() && target_type->isEnum());
}
return std::make_pair(true, need_cast_data);
}

ColumnPtr convertColumnByColumnDefineIfNeed(const DataTypePtr & from_type, ColumnPtr && from_col, const ColumnDefine & to_column_define)
{
// No need to convert
if (likely(from_type->equals(*to_column_define.type)))
return std::move(from_col);

// Check if support
if (unlikely(!isSupportedDataTypeCast(from_type, to_column_define.type)))
auto [compatible, need_data_cast] = checkColumnTypeCompatibility(from_type, to_column_define.type);
if (unlikely(!compatible))
{
throw Exception("Reading mismatch data type pack. Cast from " + from_type->getName() + " to " + to_column_define.type->getName()
+ " is NOT supported!",
ErrorCodes::NOT_IMPLEMENTED);
}
if (unlikely(!need_data_cast))
{
return std::move(from_col);
}

// Cast column's data from DataType in disk to what we need now
auto to_col = to_column_define.type->createColumn();
Expand Down
25 changes: 23 additions & 2 deletions dbms/src/Storages/Transaction/SchemaBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,28 @@ using ColumnInfosModifier = std::function<void(ColumnInfos & column_infos)>;
using SchemaChange = std::pair<AlterCommands, ColumnInfosModifier>;
using SchemaChanges = std::vector<SchemaChange>;

bool typeDiffers(const TiDB::ColumnInfo & a, const TiDB::ColumnInfo & b)
{
if (a.tp != b.tp || a.hasNotNullFlag() != b.hasNotNullFlag() || a.hasUnsignedFlag() != b.hasUnsignedFlag())
return true;
if (a.tp == TypeEnum || a.tp == TypeSet)
{
if (a.elems.size() != b.elems.size())
return true;
for (size_t i = 0; i < a.elems.size(); i++)
{
if (a.elems[i].first != b.elems[i].first)
return true;
}
return false;
}
else if (a.tp == TypeNewDecimal)
{
return a.flen != b.flen || a.decimal != b.decimal;
}
return false;
}

/// When schema change detected, the modification to original table info must be preserved as well.
/// With the preserved table info modifications, table info changes along with applying alter commands.
/// In other words, table info and storage structure (altered by applied alter commands) are always identical,
Expand Down Expand Up @@ -186,8 +208,7 @@ inline SchemaChanges detectSchemaChanges(Logger * log, Context & context, const
if (column_info_.id == orig_column_info.id && column_info_.name != orig_column_info.name)
LOG_INFO(log, "detect column " << orig_column_info.name << " rename to " << column_info_.name);

return column_info_.id == orig_column_info.id
&& (column_info_.tp != orig_column_info.tp || column_info_.hasNotNullFlag() != orig_column_info.hasNotNullFlag());
return column_info_.id == orig_column_info.id && typeDiffers(column_info_, orig_column_info);
});

if (column_info != table_info.columns.end())
Expand Down
30 changes: 30 additions & 0 deletions tests/fullstack-test/ddl/alter_column_enum.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
mysql> drop table if exists test.a1
mysql> create table test.a1(id int(11) NOT NULL AUTO_INCREMENT, name enum('A','B','C') DEFAULT NULL, PRIMARY KEY (id));
mysql> insert into test.a1 values (1,'A'),(2,'B'),(3,'C');
mysql> alter table test.a1 set tiflash replica 1;

func> wait_table test a1

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.a1 order by id;
+----+------+
| id | name |
+----+------+
| 1 | A |
| 2 | B |
| 3 | C |
+----+------+

mysql> alter table test.a1 change name name enum('A','B','C', 'D');
mysql> insert into test.a1 values (4,'D');

mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.a1 order by id;
+----+------+
| id | name |
+----+------+
| 1 | A |
| 2 | B |
| 3 | C |
| 4 | D |
+----+------+

mysql> drop table if exists test.a1

0 comments on commit 6665d6e

Please sign in to comment.