-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[BugFix] Fix check of overflow for converting floating to intergal (#…
…49707) Signed-off-by: zihe.liu <[email protected]> (cherry picked from commit ebc98dd) # Conflicts: # be/src/exprs/vectorized/cast_expr.cpp # be/src/formats/avro/numeric_column.cpp # be/test/CMakeLists.txt # test/sql/test_function/R/test_cast # test/sql/test_function/T/test_cast
- Loading branch information
1 parent
6d1e2a5
commit de14287
Showing
11 changed files
with
775 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,221 @@ | ||
// Copyright 2021-present StarRocks, Inc. All rights reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// https://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#include "numeric_column.h" | ||
|
||
#include "column/fixed_length_column.h" | ||
#include "gutil/strings/substitute.h" | ||
#include "util/numeric_types.h" | ||
#include "util/string_parser.hpp" | ||
|
||
namespace starrocks { | ||
|
||
template <typename FromType, typename ToType> | ||
static inline bool checked_cast(const FromType& from, ToType* to) { | ||
*to = static_cast<ToType>(from); | ||
|
||
// NOTE: use lowest() because float and double needed. | ||
DIAGNOSTIC_PUSH | ||
#if defined(__clang__) | ||
DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion") | ||
#endif | ||
return check_number_overflow<FromType, ToType>(from); | ||
DIAGNOSTIC_POP | ||
} | ||
|
||
template <typename T> | ||
static Status add_column_with_numeric_value(FixedLengthColumn<T>* column, const TypeDescriptor& type_desc, | ||
const std::string& name, const avro_value_t& value) { | ||
switch (avro_value_get_type(&value)) { | ||
case AVRO_INT32: { | ||
int in; | ||
if (avro_value_get_int(&value, &in) != 0) { | ||
auto err_msg = strings::Substitute("Get int value error. column=$0", name); | ||
return Status::InvalidArgument(err_msg); | ||
} | ||
T out{}; | ||
|
||
if (!checked_cast(in, &out)) { | ||
column->append_numbers(&out, sizeof(out)); | ||
} else { | ||
auto err_msg = strings::Substitute("Value is overflow. column=$0, value=$1", name, in); | ||
return Status::InvalidArgument(err_msg); | ||
} | ||
return Status::OK(); | ||
} | ||
case AVRO_INT64: { | ||
int64_t in; | ||
if (avro_value_get_long(&value, &in) != 0) { | ||
auto err_msg = strings::Substitute("Get int64 value error. column=$0", name); | ||
return Status::InvalidArgument(err_msg); | ||
} | ||
T out{}; | ||
|
||
if (!checked_cast(in, &out)) { | ||
column->append_numbers(&out, sizeof(out)); | ||
} else { | ||
auto err_msg = strings::Substitute("Value is overflow. column=$0, value=$1", name, in); | ||
return Status::InvalidArgument(err_msg); | ||
} | ||
return Status::OK(); | ||
} | ||
case AVRO_BOOLEAN: { | ||
int in; | ||
if (avro_value_get_boolean(&value, &in) != 0) { | ||
auto err_msg = strings::Substitute("Get boolean value error. column=$0", name); | ||
return Status::InvalidArgument(err_msg); | ||
} | ||
T out{}; | ||
|
||
if (!checked_cast(in, &out)) { | ||
column->append_numbers(&out, sizeof(out)); | ||
} else { | ||
auto err_msg = strings::Substitute("Value is overflow. column=$0, value=$1", name, in); | ||
return Status::InvalidArgument(err_msg); | ||
} | ||
return Status::OK(); | ||
} | ||
|
||
case AVRO_FLOAT: { | ||
float in; | ||
if (avro_value_get_float(&value, &in) != 0) { | ||
auto err_msg = strings::Substitute("Get float value error. column=$0", name); | ||
return Status::InvalidArgument(err_msg); | ||
} | ||
|
||
T out{}; | ||
|
||
if (!checked_cast(in, &out)) { | ||
column->append_numbers(&out, sizeof(out)); | ||
} else { | ||
auto err_msg = strings::Substitute("Value is overflow. column=$0, value=$1", name, in); | ||
return Status::InvalidArgument(err_msg); | ||
} | ||
return Status::OK(); | ||
} | ||
|
||
case AVRO_DOUBLE: { | ||
double in; | ||
if (avro_value_get_double(&value, &in) != 0) { | ||
auto err_msg = strings::Substitute("Get double value error. column=$0", name); | ||
return Status::InvalidArgument(err_msg); | ||
} | ||
|
||
T out{}; | ||
|
||
if (!checked_cast(in, &out)) { | ||
column->append_numbers(&out, sizeof(out)); | ||
} else { | ||
auto err_msg = strings::Substitute("Value is overflow. column=$0, value=$1", name, in); | ||
return Status::InvalidArgument(err_msg); | ||
} | ||
return Status::OK(); | ||
} | ||
|
||
default: { | ||
auto err_msg = strings::Substitute("Unsupported value type. column=$0", name); | ||
return Status::DataQualityError(err_msg); | ||
} | ||
} | ||
return Status::OK(); | ||
} | ||
|
||
template <typename T> | ||
static Status add_column_with_string_value_numeric(FixedLengthColumn<T>* column, const TypeDescriptor& type_desc, | ||
const std::string& name, const avro_value_t& value) { | ||
const char* in; | ||
size_t size; | ||
if (avro_value_get_string(&value, &in, &size) != 0) { | ||
auto err_msg = strings::Substitute("Get string value error. column=$0", name); | ||
return Status::InvalidArgument(err_msg); | ||
} | ||
|
||
// The size returned for a string object will include the NUL terminator, | ||
// it will be one more than you’d get from calling strlen on the content. | ||
// Please refer to this link: https://avro.apache.org/docs/1.11.1/api/c/ | ||
--size; | ||
|
||
StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; | ||
|
||
T v{}; | ||
if constexpr (std::is_floating_point<T>::value) { | ||
v = StringParser::string_to_float<T>(in, size, &parse_result); | ||
} else { | ||
v = StringParser::string_to_int<T>(in, size, &parse_result); | ||
} | ||
|
||
if (parse_result == StringParser::PARSE_SUCCESS) { | ||
column->append_numbers(&v, sizeof(v)); | ||
return Status::OK(); | ||
} else { | ||
// Attemp to parse the string as float. | ||
auto d = StringParser::string_to_float<double>(in, size, &parse_result); | ||
if (parse_result == StringParser::PARSE_SUCCESS) { | ||
if (!checked_cast(d, &v)) { | ||
column->append_numbers(&v, sizeof(v)); | ||
return Status::OK(); | ||
} else { | ||
auto err_msg = strings::Substitute("Value is overflow. column=$0, value=$1", name, d); | ||
return Status::InvalidArgument(err_msg); | ||
} | ||
} | ||
|
||
std::string err_msg = strings::Substitute("Unable to cast string value to BIGINT. value=$0, column=$1", | ||
std::string(in, size), name); | ||
return Status::InvalidArgument(err_msg); | ||
} | ||
} | ||
|
||
template <typename T> | ||
Status add_numeric_column(Column* column, const TypeDescriptor& type_desc, const std::string& name, | ||
const avro_value_t& value) { | ||
auto numeric_column = down_cast<FixedLengthColumn<T>*>(column); | ||
avro_type_t type = avro_value_get_type(&value); | ||
switch (type) { | ||
case AVRO_INT32: | ||
case AVRO_INT64: | ||
case AVRO_FLOAT: | ||
case AVRO_DOUBLE: | ||
case AVRO_BOOLEAN: { | ||
return add_column_with_numeric_value(numeric_column, type_desc, name, value); | ||
} | ||
|
||
case AVRO_STRING: { | ||
return add_column_with_string_value_numeric(numeric_column, type_desc, name, value); | ||
} | ||
|
||
default: { | ||
auto err_msg = strings::Substitute("Unsupported value type. Numeric type is required. column=$0", name); | ||
return Status::InvalidArgument(err_msg); | ||
} | ||
} | ||
return Status::OK(); | ||
} | ||
|
||
template Status add_numeric_column<int64_t>(Column* column, const TypeDescriptor& type_desc, const std::string& name, | ||
const avro_value_t& value); | ||
template Status add_numeric_column<int32_t>(Column* column, const TypeDescriptor& type_desc, const std::string& name, | ||
const avro_value_t& value); | ||
template Status add_numeric_column<int16_t>(Column* column, const TypeDescriptor& type_desc, const std::string& name, | ||
const avro_value_t& value); | ||
template Status add_numeric_column<int8_t>(Column* column, const TypeDescriptor& type_desc, const std::string& name, | ||
const avro_value_t& value); | ||
template Status add_numeric_column<uint8_t>(Column* column, const TypeDescriptor& type_desc, const std::string& name, | ||
const avro_value_t& value); | ||
template Status add_numeric_column<double>(Column* column, const TypeDescriptor& type_desc, const std::string& name, | ||
const avro_value_t& value); | ||
template Status add_numeric_column<float>(Column* column, const TypeDescriptor& type_desc, const std::string& name, | ||
const avro_value_t& value); | ||
|
||
} // namespace starrocks |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
// Copyright 2021-present StarRocks, Inc. All rights reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// https://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#pragma once | ||
|
||
#include <limits> | ||
#include <type_traits> | ||
|
||
namespace starrocks { | ||
|
||
template <typename FromType, typename ToType> | ||
static constexpr FromType floating_to_intergral_lower_bound = | ||
static_cast<FromType>(std::numeric_limits<ToType>::lowest()); | ||
|
||
template <typename FromType, typename ToType> | ||
static constexpr FromType floating_to_intergral_upper_bound = static_cast<FromType>(2) * | ||
(std::numeric_limits<ToType>::max() / 2 + 1); | ||
|
||
/// Check whether the value of type `FromType` overflows when converted to type `ToType`. | ||
/// If overflow, return true; otherwise, return false. | ||
template <typename FromType, typename ToType> | ||
bool check_number_overflow(FromType value) { | ||
if constexpr (std::is_floating_point_v<FromType> && std::is_integral_v<ToType>) { | ||
// For floating-point numbers, we cannot use `value > (Type)std::numeric_limits<ResultType>::max()` to | ||
// determine whether `value` exceeds the maximum value of ResultType. The reason is as follows: | ||
// | ||
// `std::numeric_limits<ResultType>::max()` is `2^n-1`, where n is 63, 31, 15 or 7, this number cannot be | ||
// exactly represented by floating-point numbers, so when converted to Type, it will be rounded up to `2^n`. | ||
// Therefore, when `value` is `2^n`, `value > (Type)std::numeric_limits<ResultType>::max()` will return false. | ||
// However, in actual conversion, overflow will occur, resulting in the maximum or minimum value of ResultType, | ||
// depending on the architecture, compiler, and compilation parameters. | ||
// | ||
// Because `2^n` can be exactly represented by floating-point numbers, we use `value >= (Type)2^n` to determine | ||
// whether it is overflow, rather than `value > (Type)2^n-1`. | ||
return !(value >= floating_to_intergral_lower_bound<FromType, ToType> && | ||
value < floating_to_intergral_upper_bound<FromType, ToType>); | ||
} else { | ||
// std::numeric_limits<T>::lowest() is a finite value x such that there is no other | ||
// finite value y where y < x. | ||
// This is different from std::numeric_limits<T>::min() for floating-point types. | ||
// So we use lowest instead of min for lower bound of all types. | ||
return (value < (FromType)std::numeric_limits<ToType>::lowest()) | | ||
(value > (FromType)std::numeric_limits<ToType>::max()); | ||
} | ||
} | ||
|
||
} // namespace starrocks |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.