diff --git a/be/src/olap/push_handler.h b/be/src/olap/push_handler.h index 5698913c82d6b3..b932dcacae808a 100644 --- a/be/src/olap/push_handler.h +++ b/be/src/olap/push_handler.h @@ -20,8 +20,8 @@ #include #include #include -#include +#include #include #include #include @@ -35,6 +35,7 @@ #include "olap/rowset/rowset_fwd.h" #include "olap/tablet_fwd.h" #include "runtime/runtime_state.h" +#include "vec/core/block.h" #include "vec/exec/format/generic_reader.h" namespace doris { @@ -48,7 +49,6 @@ class TTabletInfo; class StorageEngine; namespace vectorized { -class Block; class GenericReader; class VExprContext; } // namespace vectorized diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp index 5771434c4c6321..0182e5273f7216 100644 --- a/be/src/util/string_parser.hpp +++ b/be/src/util/string_parser.hpp @@ -23,8 +23,8 @@ #include #include #include -#include +#include // IWYU pragma: no_include #include // IWYU pragma: keep #include @@ -40,6 +40,7 @@ #include "runtime/large_int_value.h" #include "runtime/primitive_type.h" #include "vec/common/int_exp.h" +#include "vec/common/string_utils/string_utils.h" #include "vec/core/extended_types.h" #include "vec/core/wide_integer.h" #include "vec/data_types/data_type_decimal.h" diff --git a/be/src/vec/data_types/data_type_ipv4.h b/be/src/vec/data_types/data_type_ipv4.h index 88e6a3756bce64..65eb0d57da9aa0 100644 --- a/be/src/vec/data_types/data_type_ipv4.h +++ b/be/src/vec/data_types/data_type_ipv4.h @@ -18,15 +18,15 @@ #pragma once #include -#include -#include #include +#include #include #include "common/status.h" #include "olap/olap_common.h" #include "runtime/define_primitive_type.h" +#include "vec/common/pod_array.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_number_base.h" @@ -45,9 +45,7 @@ namespace doris::vectorized { class DataTypeIPv4 final : public DataTypeNumberBase { public: TypeIndex get_type_id() const override { return TypeIndex::IPv4; } - TypeDescriptor get_type_as_type_descriptor() const override { - return TypeDescriptor(TYPE_IPV4); - } + TypeDescriptor get_type_as_type_descriptor() const override { return {TYPE_IPV4}; } const char* get_family_name() const override { return "IPv4"; } std::string do_get_name() const override { return "IPv4"; } diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp b/be/src/vec/data_types/serde/data_type_array_serde.cpp index a56eb00dbdd6fb..d654e3ae22d042 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp @@ -19,7 +19,6 @@ #include -#include "common/exception.h" #include "common/status.h" #include "util/jsonb_document.h" #include "vec/columns/column.h" @@ -28,9 +27,7 @@ #include "vec/common/assert_cast.h" #include "vec/common/string_ref.h" -namespace doris { - -namespace vectorized { +namespace doris::vectorized { class Arena; #include "common/compile_check_begin.h" @@ -47,8 +44,8 @@ Status DataTypeArraySerDe::serialize_one_cell_to_json(const IColumn& column, int ColumnPtr ptr = result.first; row_num = result.second; - auto& data_column = assert_cast(*ptr); - auto& offsets = data_column.get_offsets(); + const auto& data_column = assert_cast(*ptr); + const auto& offsets = data_column.get_offsets(); size_t offset = offsets[row_num - 1]; size_t next_offset = offsets[row_num]; @@ -68,7 +65,7 @@ Status DataTypeArraySerDe::serialize_one_cell_to_json(const IColumn& column, int Status DataTypeArraySerDe::deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const { DESERIALIZE_COLUMN_FROM_JSON_VECTOR(); return Status::OK(); @@ -145,7 +142,7 @@ Status DataTypeArraySerDe::deserialize_one_cell_from_json(IColumn& column, Slice } } - int elem_deserialized = 0; + uint64_t elem_deserialized = 0; Status st = nested_serde->deserialize_column_from_json_vector(nested_column, slices, &elem_deserialized, options); offsets.emplace_back(offsets.back() + elem_deserialized); @@ -178,7 +175,7 @@ Status DataTypeArraySerDe::deserialize_one_cell_from_hive_text( } } - int elem_deserialized = 0; + uint64_t elem_deserialized = 0; Status status = nested_serde->deserialize_column_from_hive_text_vector( nested_column, slices, &elem_deserialized, options, hive_text_complex_type_delimiter_level + 1); @@ -187,7 +184,7 @@ Status DataTypeArraySerDe::deserialize_one_cell_from_hive_text( } Status DataTypeArraySerDe::deserialize_column_from_hive_text_vector( - IColumn& column, std::vector& slices, int* num_deserialized, + IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options, int hive_text_complex_type_delimiter_level) const { DESERIALIZE_COLUMN_FROM_HIVE_TEXT_VECTOR(); return Status::OK(); @@ -200,8 +197,8 @@ Status DataTypeArraySerDe::serialize_one_cell_to_hive_text( ColumnPtr ptr = result.first; row_num = result.second; - auto& data_column = assert_cast(*ptr); - auto& offsets = data_column.get_offsets(); + const auto& data_column = assert_cast(*ptr); + const auto& offsets = data_column.get_offsets(); size_t start = offsets[row_num - 1]; size_t end = offsets[row_num]; @@ -237,7 +234,7 @@ Status DataTypeArraySerDe::write_one_cell_to_json(const IColumn& column, rapidjs Arena& mem_pool, int64_t row_num) const { // Use allocator instead of stack memory, since rapidjson hold the reference of String value // otherwise causes stack use after free - auto& column_array = static_cast(column); + const auto& column_array = static_cast(column); if (row_num > column_array.size()) { return Status::InternalError("row num {} out of range {}!", row_num, column_array.size()); } @@ -246,7 +243,7 @@ Status DataTypeArraySerDe::write_one_cell_to_json(const IColumn& column, rapidjs if (!mem) { return Status::InternalError("Malloc failed"); } - vectorized::Field* array = new (mem) vectorized::Field(column_array[row_num]); + auto* array = new (mem) vectorized::Field(column_array[row_num]); convert_field_to_rapidjson(*array, result, allocator); return Status::OK(); @@ -270,18 +267,18 @@ Status DataTypeArraySerDe::read_one_cell_from_json(IColumn& column, } void DataTypeArraySerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const { - auto blob = static_cast(arg); + const auto* blob = static_cast(arg); column.deserialize_and_insert_from_arena(blob->getBlob()); } void DataTypeArraySerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const { - auto& array_column = static_cast(column); - auto& offsets = array_column.get_offsets(); - auto& nested_data = array_column.get_data(); + const auto& array_column = static_cast(column); + const auto& offsets = array_column.get_offsets(); + const auto& nested_data = array_column.get_data(); auto& builder = assert_cast(*array_builder); - auto nested_builder = builder.value_builder(); + auto* nested_builder = builder.value_builder(); for (size_t array_idx = start; array_idx < end; ++array_idx) { if (null_map && (*null_map)[array_idx]) { checkArrowStatus(builder.AppendNull(), column.get_name(), @@ -295,17 +292,17 @@ void DataTypeArraySerDe::write_column_to_arrow(const IColumn& column, const Null } void DataTypeArraySerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, - int start, int end, + int64_t start, int64_t end, const cctz::time_zone& ctz) const { auto& column_array = static_cast(column); auto& offsets_data = column_array.get_offsets(); - auto concrete_array = dynamic_cast(arrow_array); + const auto* concrete_array = dynamic_cast(arrow_array); auto arrow_offsets_array = concrete_array->offsets(); - auto arrow_offsets = dynamic_cast(arrow_offsets_array.get()); + auto* arrow_offsets = dynamic_cast(arrow_offsets_array.get()); auto prev_size = offsets_data.back(); auto arrow_nested_start_offset = arrow_offsets->Value(start); auto arrow_nested_end_offset = arrow_offsets->Value(end); - for (int64_t i = start + 1; i < end + 1; ++i) { + for (auto i = start + 1; i < end + 1; ++i) { // convert to doris offset, start from offsets.back() offsets_data.emplace_back(prev_size + arrow_offsets->Value(i) - arrow_nested_start_offset); } @@ -319,9 +316,9 @@ Status DataTypeArraySerDe::_write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, int64_t row_idx_of_mysql, bool col_const, const FormatOptions& options) const { - auto& column_array = assert_cast(column); - auto& offsets = column_array.get_offsets(); - auto& data = column_array.get_data(); + const auto& column_array = assert_cast(column); + const auto& offsets = column_array.get_offsets(); + const auto& data = column_array.get_data(); bool is_nested_string = data.is_column_string(); const auto row_idx_of_col_arr = index_check_const(row_idx_of_mysql, col_const); result.open_dynamic_mode(); @@ -434,5 +431,4 @@ Status DataTypeArraySerDe::read_column_from_pb(IColumn& column, const PValues& a } return Status::OK(); } -} // namespace vectorized -} // namespace doris +} // namespace doris::vectorized diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h b/be/src/vec/data_types/serde/data_type_array_serde.h index cdd2115576030e..f9d852b3843468 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.h +++ b/be/src/vec/data_types/serde/data_type_array_serde.h @@ -18,9 +18,9 @@ #pragma once #include -#include -#include +#include +#include #include "common/status.h" #include "data_type_serde.h" @@ -36,8 +36,8 @@ class Arena; class DataTypeArraySerDe : public DataTypeSerDe { public: - DataTypeArraySerDe(const DataTypeSerDeSPtr& _nested_serde, int nesting_level = 1) - : DataTypeSerDe(nesting_level), nested_serde(_nested_serde) {} + DataTypeArraySerDe(DataTypeSerDeSPtr _nested_serde, int nesting_level = 1) + : DataTypeSerDe(nesting_level), nested_serde(std::move(_nested_serde)) {} Status serialize_one_cell_to_json(const IColumn& column, int64_t row_num, BufferWritable& bw, FormatOptions& options) const override; @@ -49,14 +49,14 @@ class DataTypeArraySerDe : public DataTypeSerDe { const FormatOptions& options) const override; Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const override; Status deserialize_one_cell_from_hive_text( IColumn& column, Slice& slice, const FormatOptions& options, int hive_text_complex_type_delimiter_level = 1) const override; Status deserialize_column_from_hive_text_vector( - IColumn& column, std::vector& slices, int* num_deserialized, + IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options, int hive_text_complex_type_delimiter_level = 1) const override; Status serialize_one_cell_to_hive_text( @@ -81,8 +81,8 @@ class DataTypeArraySerDe : public DataTypeSerDe { void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const override; - void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, + int64_t end, const cctz::time_zone& ctz) const override; Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int64_t row_idx, bool col_const, @@ -101,7 +101,7 @@ class DataTypeArraySerDe : public DataTypeSerDe { nested_serde->set_return_object_as_string(value); } - virtual DataTypeSerDeSPtrs get_nested_serdes() const override { return {nested_serde}; } + DataTypeSerDeSPtrs get_nested_serdes() const override { return {nested_serde}; } private: template diff --git a/be/src/vec/data_types/serde/data_type_bitmap_serde.cpp b/be/src/vec/data_types/serde/data_type_bitmap_serde.cpp index 5d024a418340e8..2e4f8d72c6fe00 100644 --- a/be/src/vec/data_types/serde/data_type_bitmap_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_bitmap_serde.cpp @@ -60,7 +60,7 @@ Status DataTypeBitMapSerDe::serialize_one_cell_to_json(const IColumn& column, in } Status DataTypeBitMapSerDe::deserialize_column_from_json_vector( - IColumn& column, std::vector& slices, int* num_deserialized, + IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options) const { DESERIALIZE_COLUMN_FROM_JSON_VECTOR() return Status::OK(); diff --git a/be/src/vec/data_types/serde/data_type_bitmap_serde.h b/be/src/vec/data_types/serde/data_type_bitmap_serde.h index 24c2e6f930d203..22c450cd27ec35 100644 --- a/be/src/vec/data_types/serde/data_type_bitmap_serde.h +++ b/be/src/vec/data_types/serde/data_type_bitmap_serde.h @@ -45,7 +45,7 @@ class DataTypeBitMapSerDe : public DataTypeSerDe { const FormatOptions& options) const override; Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const override; Status write_column_to_pb(const IColumn& column, PValues& result, int64_t start, @@ -61,8 +61,8 @@ class DataTypeBitMapSerDe : public DataTypeSerDe { arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const override; - void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const override { + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, + int64_t end, const cctz::time_zone& ctz) const override { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "read_column_from_arrow with type " + column.get_name()); } diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.cpp b/be/src/vec/data_types/serde/data_type_date64_serde.cpp index c91db85be5bcda..16ebb1888eb31d 100644 --- a/be/src/vec/data_types/serde/data_type_date64_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_date64_serde.cpp @@ -19,8 +19,6 @@ #include -#include - #include "vec/columns/column_const.h" #include "vec/io/io_helper.h" @@ -63,7 +61,7 @@ Status DataTypeDate64SerDe::serialize_one_cell_to_json(const IColumn& column, in } Status DataTypeDate64SerDe::deserialize_column_from_json_vector( - IColumn& column, std::vector& slices, int* num_deserialized, + IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options) const { DESERIALIZE_COLUMN_FROM_JSON_VECTOR(); return Status::OK(); @@ -129,7 +127,7 @@ Status DataTypeDateTimeSerDe::serialize_one_cell_to_json(const IColumn& column, } Status DataTypeDateTimeSerDe::deserialize_column_from_json_vector( - IColumn& column, std::vector& slices, int* num_deserialized, + IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options) const { DESERIALIZE_COLUMN_FROM_JSON_VECTOR() return Status::OK(); @@ -162,11 +160,11 @@ Status DataTypeDateTimeSerDe::deserialize_one_cell_from_json(IColumn& column, Sl void DataTypeDate64SerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const { - auto& col_data = static_cast&>(column).get_data(); + const auto& col_data = static_cast&>(column).get_data(); auto& string_builder = assert_cast(*array_builder); for (size_t i = start; i < end; ++i) { char buf[64]; - const VecDateTimeValue* time_val = (const VecDateTimeValue*)(&col_data[i]); + const auto* time_val = (const VecDateTimeValue*)(&col_data[i]); size_t len = time_val->to_buffer(buf); if (null_map && (*null_map)[i]) { checkArrowStatus(string_builder.AppendNull(), column.get_name(), @@ -199,38 +197,38 @@ static int64_t time_unit_divisor(arrow::TimeUnit::type unit) { } void DataTypeDate64SerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, - int start, int end, + int64_t start, int64_t end, const cctz::time_zone& ctz) const { auto& col_data = static_cast&>(column).get_data(); int64_t divisor = 1; int64_t multiplier = 1; if (arrow_array->type()->id() == arrow::Type::DATE64) { - auto concrete_array = dynamic_cast(arrow_array); + const auto* concrete_array = dynamic_cast(arrow_array); divisor = 1000; //ms => secs - for (size_t value_i = start; value_i < end; ++value_i) { + for (auto value_i = start; value_i < end; ++value_i) { VecDateTimeValue v; v.from_unixtime( static_cast(concrete_array->Value(value_i)) / divisor * multiplier, ctz); col_data.emplace_back(binary_cast(v)); } } else if (arrow_array->type()->id() == arrow::Type::TIMESTAMP) { - auto concrete_array = dynamic_cast(arrow_array); + const auto* concrete_array = dynamic_cast(arrow_array); const auto type = std::static_pointer_cast(arrow_array->type()); divisor = time_unit_divisor(type->unit()); if (divisor == 0L) { throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT, "Invalid Time Type: " + type->name()); } - for (size_t value_i = start; value_i < end; ++value_i) { + for (auto value_i = start; value_i < end; ++value_i) { VecDateTimeValue v; v.from_unixtime( static_cast(concrete_array->Value(value_i)) / divisor * multiplier, ctz); col_data.emplace_back(binary_cast(v)); } } else if (arrow_array->type()->id() == arrow::Type::DATE32) { - auto concrete_array = dynamic_cast(arrow_array); + const auto* concrete_array = dynamic_cast(arrow_array); multiplier = 24 * 60 * 60; // day => secs - for (size_t value_i = start; value_i < end; ++value_i) { + for (auto value_i = start; value_i < end; ++value_i) { VecDateTimeValue v; v.from_unixtime( static_cast(concrete_array->Value(value_i)) / divisor * multiplier, ctz); @@ -245,7 +243,7 @@ Status DataTypeDate64SerDe::_write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, int64_t row_idx, bool col_const, const FormatOptions& options) const { - auto& data = assert_cast&>(column).get_data(); + const auto& data = assert_cast&>(column).get_data(); const auto col_index = index_check_const(row_idx, col_const); auto time_num = data[col_index]; VecDateTimeValue time_val = binary_cast(time_num); @@ -286,8 +284,8 @@ Status DataTypeDate64SerDe::write_column_to_orc(const std::string& timezone, con orc::ColumnVectorBatch* orc_col_batch, int64_t start, int64_t end, std::vector& buffer_list) const { - auto& col_data = static_cast&>(column).get_data(); - orc::StringVectorBatch* cur_batch = dynamic_cast(orc_col_batch); + const auto& col_data = static_cast&>(column).get_data(); + auto* cur_batch = dynamic_cast(orc_col_batch); INIT_MEMORY_FOR_ORC_WRITER() diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.h b/be/src/vec/data_types/serde/data_type_date64_serde.h index c3b97b4273c9cf..e56b738d451225 100644 --- a/be/src/vec/data_types/serde/data_type_date64_serde.h +++ b/be/src/vec/data_types/serde/data_type_date64_serde.h @@ -53,14 +53,14 @@ class DataTypeDate64SerDe : public DataTypeNumberSerDe { const FormatOptions& options) const override; Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const override; void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const override; - void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, + int64_t end, const cctz::time_zone& ctz) const override; Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int64_t row_idx, bool col_const, const FormatOptions& options) const override; @@ -93,7 +93,7 @@ class DataTypeDateTimeSerDe : public DataTypeDate64SerDe { Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options) const override; Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const override; }; } // namespace vectorized diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp index fd030e85fe5e8e..f43b2919c40b4c 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp @@ -71,7 +71,7 @@ Status DataTypeDateTimeV2SerDe::serialize_one_cell_to_json(const IColumn& column } Status DataTypeDateTimeV2SerDe::deserialize_column_from_json_vector( - IColumn& column, std::vector& slices, int* num_deserialized, + IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options) const { DESERIALIZE_COLUMN_FROM_JSON_VECTOR(); return Status::OK(); @@ -130,8 +130,9 @@ void DataTypeDateTimeV2SerDe::write_column_to_arrow(const IColumn& column, const } void DataTypeDateTimeV2SerDe::read_column_from_arrow(IColumn& column, - const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const { + const arrow::Array* arrow_array, int64_t start, + int64_t end, + const cctz::time_zone& ctz) const { auto& col_data = static_cast(column).get_data(); int64_t divisor = 1; if (arrow_array->type()->id() == arrow::Type::TIMESTAMP) { @@ -159,7 +160,7 @@ void DataTypeDateTimeV2SerDe::read_column_from_arrow(IColumn& column, return; } } - for (size_t value_i = start; value_i < end; ++value_i) { + for (auto value_i = start; value_i < end; ++value_i) { auto utc_epoch = static_cast(concrete_array->Value(value_i)); DateV2Value v; @@ -244,7 +245,7 @@ Status DataTypeDateTimeV2SerDe::write_column_to_orc(const std::string& timezone, } Status DataTypeDateTimeV2SerDe::deserialize_column_from_fixed_json( - IColumn& column, Slice& slice, int rows, int* num_deserialized, + IColumn& column, Slice& slice, uint64_t rows, uint64_t* num_deserialized, const FormatOptions& options) const { if (rows < 1) [[unlikely]] { return Status::OK(); @@ -260,7 +261,7 @@ Status DataTypeDateTimeV2SerDe::deserialize_column_from_fixed_json( } void DataTypeDateTimeV2SerDe::insert_column_last_value_multiple_times(IColumn& column, - int times) const { + uint64_t times) const { if (times < 1) [[unlikely]] { return; } diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h index 484df3df62a3e4..12c5d8d913dd1d 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h @@ -56,14 +56,14 @@ class DataTypeDateTimeV2SerDe : public DataTypeNumberSerDe { const FormatOptions& options) const override; Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const override; void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const override; - void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, + int64_t end, const cctz::time_zone& ctz) const override; Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int64_t row_idx, bool col_const, @@ -77,10 +77,10 @@ class DataTypeDateTimeV2SerDe : public DataTypeNumberSerDe { int64_t start, int64_t end, std::vector& buffer_list) const override; - Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, - int* num_deserialized, + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, uint64_t rows, + uint64_t* num_deserialized, const FormatOptions& options) const override; - void insert_column_last_value_multiple_times(IColumn& column, int times) const override; + void insert_column_last_value_multiple_times(IColumn& column, uint64_t times) const override; private: template diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp index a8455fd1c9eae6..9749f5f098064f 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp @@ -56,7 +56,7 @@ Status DataTypeDateV2SerDe::serialize_one_cell_to_json(const IColumn& column, in } Status DataTypeDateV2SerDe::deserialize_column_from_json_vector( - IColumn& column, std::vector& slices, int* num_deserialized, + IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options) const { DESERIALIZE_COLUMN_FROM_JSON_VECTOR(); return Status::OK(); @@ -101,15 +101,15 @@ void DataTypeDateV2SerDe::write_column_to_arrow(const IColumn& column, const Nul } void DataTypeDateV2SerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, - int start, int end, + int64_t start, int64_t end, const cctz::time_zone& ctz) const { auto& col_data = static_cast&>(column).get_data(); - auto concrete_array = dynamic_cast(arrow_array); + const auto* concrete_array = dynamic_cast(arrow_array); int64_t divisor = 1; int64_t multiplier = 1; multiplier = 24 * 60 * 60; // day => secs - for (size_t value_i = start; value_i < end; ++value_i) { + for (auto value_i = start; value_i < end; ++value_i) { DateV2Value v; v.from_unixtime(static_cast(concrete_array->Value(value_i)) / divisor * multiplier, ctz); @@ -122,7 +122,7 @@ Status DataTypeDateV2SerDe::_write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, int64_t row_idx, bool col_const, const FormatOptions& options) const { - auto& data = assert_cast&>(column).get_data(); + const auto& data = assert_cast&>(column).get_data(); auto col_index = index_check_const(row_idx, col_const); DateV2Value date_val = binary_cast>(data[col_index]); @@ -178,7 +178,8 @@ Status DataTypeDateV2SerDe::write_column_to_orc(const std::string& timezone, con } Status DataTypeDateV2SerDe::deserialize_column_from_fixed_json(IColumn& column, Slice& slice, - int rows, int* num_deserialized, + uint64_t rows, + uint64_t* num_deserialized, const FormatOptions& options) const { if (rows < 1) [[unlikely]] { return Status::OK(); @@ -193,7 +194,7 @@ Status DataTypeDateV2SerDe::deserialize_column_from_fixed_json(IColumn& column, } void DataTypeDateV2SerDe::insert_column_last_value_multiple_times(IColumn& column, - int times) const { + uint64_t times) const { if (times < 1) [[unlikely]] { return; } diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.h b/be/src/vec/data_types/serde/data_type_datev2_serde.h index c1f8bab15ad004..a28b23a51230ef 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.h @@ -54,14 +54,14 @@ class DataTypeDateV2SerDe : public DataTypeNumberSerDe { const FormatOptions& options) const override; Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const override; void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const override; - void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, + int64_t end, const cctz::time_zone& ctz) const override; Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int64_t row_idx, bool col_const, const FormatOptions& options) const override; @@ -74,11 +74,11 @@ class DataTypeDateV2SerDe : public DataTypeNumberSerDe { int64_t start, int64_t end, std::vector& buffer_list) const override; - Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, - int* num_deserialized, + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, uint64_t rows, + uint64_t* num_deserialized, const FormatOptions& options) const override; - void insert_column_last_value_multiple_times(IColumn& column, int times) const override; + void insert_column_last_value_multiple_times(IColumn& column, uint64_t times) const override; private: template diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp index 32b08c3990fe16..e538141ef50ca5 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp @@ -62,7 +62,7 @@ Status DataTypeDecimalSerDe::serialize_one_cell_to_json(const IColumn& column template Status DataTypeDecimalSerDe::deserialize_column_from_json_vector( - IColumn& column, std::vector& slices, int* num_deserialized, + IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options) const { DESERIALIZE_COLUMN_FROM_JSON_VECTOR(); return Status::OK(); @@ -163,9 +163,10 @@ void DataTypeDecimalSerDe::write_column_to_arrow(const IColumn& column, const template void DataTypeDecimalSerDe::read_column_from_arrow(IColumn& column, - const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const { - auto concrete_array = dynamic_cast(arrow_array); + const arrow::Array* arrow_array, int64_t start, + int64_t end, + const cctz::time_zone& ctz) const { + const auto* concrete_array = dynamic_cast(arrow_array); const auto* arrow_decimal_type = static_cast(arrow_array->type().get()); const auto arrow_scale = arrow_decimal_type->scale(); @@ -174,7 +175,7 @@ void DataTypeDecimalSerDe::read_column_from_arrow(IColumn& column, // Decimal for deicmalv3 if constexpr (std::is_same_v>) { // TODO check precision - for (size_t value_i = start; value_i < end; ++value_i) { + for (auto value_i = start; value_i < end; ++value_i) { auto value = *reinterpret_cast( concrete_array->Value(value_i)); // convert scale to 9; @@ -198,7 +199,7 @@ void DataTypeDecimalSerDe::read_column_from_arrow(IColumn& column, } } else if constexpr (std::is_same_v || std::is_same_v || std::is_same_v) { - for (size_t value_i = start; value_i < end; ++value_i) { + for (auto value_i = start; value_i < end; ++value_i) { column_data.emplace_back(*reinterpret_cast(concrete_array->Value(value_i))); } } else { @@ -281,7 +282,7 @@ Status DataTypeDecimalSerDe::write_column_to_orc(const std::string& timezone, template Status DataTypeDecimalSerDe::deserialize_column_from_fixed_json( - IColumn& column, Slice& slice, int rows, int* num_deserialized, + IColumn& column, Slice& slice, uint64_t rows, uint64_t* num_deserialized, const FormatOptions& options) const { if (rows < 1) [[unlikely]] { return Status::OK(); @@ -298,7 +299,7 @@ Status DataTypeDecimalSerDe::deserialize_column_from_fixed_json( template void DataTypeDecimalSerDe::insert_column_last_value_multiple_times(IColumn& column, - int times) const { + uint64_t times) const { if (times < 1) [[unlikely]] { return; } diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h b/be/src/vec/data_types/serde/data_type_decimal_serde.h index 51867ced18fb04..4eae03b9d2d708 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.h +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h @@ -87,7 +87,7 @@ class DataTypeDecimalSerDe : public DataTypeSerDe { const FormatOptions& options) const override; Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const override; Status write_column_to_pb(const IColumn& column, PValues& result, int64_t start, @@ -102,8 +102,8 @@ class DataTypeDecimalSerDe : public DataTypeSerDe { void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const override; - void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, + int64_t end, const cctz::time_zone& ctz) const override; Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int64_t row_idx, bool col_const, const FormatOptions& options) const override; @@ -116,11 +116,11 @@ class DataTypeDecimalSerDe : public DataTypeSerDe { int64_t start, int64_t end, std::vector& buffer_list) const override; - Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, - int* num_deserialized, + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, uint64_t rows, + uint64_t* num_deserialized, const FormatOptions& options) const override; - void insert_column_last_value_multiple_times(IColumn& column, int times) const override; + void insert_column_last_value_multiple_times(IColumn& column, uint64_t times) const override; private: template diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.cpp b/be/src/vec/data_types/serde/data_type_hll_serde.cpp index 42260b092605e1..d7c9e7285cde12 100644 --- a/be/src/vec/data_types/serde/data_type_hll_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_hll_serde.cpp @@ -65,7 +65,7 @@ Status DataTypeHLLSerDe::serialize_one_cell_to_json(const IColumn& column, int64 Status DataTypeHLLSerDe::deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const { DESERIALIZE_COLUMN_FROM_JSON_VECTOR(); return Status::OK(); diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.h b/be/src/vec/data_types/serde/data_type_hll_serde.h index 36835b22db62c5..b096ac49d30580 100644 --- a/be/src/vec/data_types/serde/data_type_hll_serde.h +++ b/be/src/vec/data_types/serde/data_type_hll_serde.h @@ -42,7 +42,7 @@ class DataTypeHLLSerDe : public DataTypeSerDe { Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options) const override; Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const override; Status write_column_to_pb(const IColumn& column, PValues& result, int64_t start, int64_t end) const override; @@ -55,8 +55,8 @@ class DataTypeHLLSerDe : public DataTypeSerDe { void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const override; - void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const override { + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, + int64_t end, const cctz::time_zone& ctz) const override { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "read_column_from_arrow with type " + column.get_name()); } diff --git a/be/src/vec/data_types/serde/data_type_ipv4_serde.cpp b/be/src/vec/data_types/serde/data_type_ipv4_serde.cpp index bd53277d743eae..44972cbfe4f063 100644 --- a/be/src/vec/data_types/serde/data_type_ipv4_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_ipv4_serde.cpp @@ -128,10 +128,10 @@ void DataTypeIPv4SerDe::write_column_to_arrow(const IColumn& column, const NullM } void DataTypeIPv4SerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, - int start, int end, + int64_t start, int64_t end, const cctz::time_zone& ctz) const { auto& col_data = assert_cast(column).get_data(); - int row_count = end - start; + int64_t row_count = end - start; /// buffers[0] is a null bitmap and buffers[1] are actual values std::shared_ptr buffer = arrow_array->data()->buffers[1]; const auto* raw_data = reinterpret_cast(buffer->data()) + start; diff --git a/be/src/vec/data_types/serde/data_type_ipv4_serde.h b/be/src/vec/data_types/serde/data_type_ipv4_serde.h index 44093412678165..f59efbe29f4c5f 100644 --- a/be/src/vec/data_types/serde/data_type_ipv4_serde.h +++ b/be/src/vec/data_types/serde/data_type_ipv4_serde.h @@ -58,8 +58,8 @@ class DataTypeIPv4SerDe : public DataTypeNumberSerDe { void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const override; - void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, + int64_t end, const cctz::time_zone& ctz) const override; private: template diff --git a/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp b/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp index e899de93c90ce0..70d2165494bdfb 100644 --- a/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp @@ -157,13 +157,13 @@ void DataTypeIPv6SerDe::write_column_to_arrow(const IColumn& column, const NullM } void DataTypeIPv6SerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, - int start, int end, + int64_t start, int64_t end, const cctz::time_zone& ctz) const { auto& col_data = assert_cast(column).get_data(); const auto* concrete_array = assert_cast(arrow_array); std::shared_ptr buffer = concrete_array->value_data(); - for (size_t offset_i = start; offset_i < end; ++offset_i) { + for (auto offset_i = start; offset_i < end; ++offset_i) { if (!concrete_array->IsNull(offset_i)) { const char* raw_data = reinterpret_cast( buffer->data() + concrete_array->value_offset(offset_i)); @@ -186,7 +186,7 @@ Status DataTypeIPv6SerDe::write_column_to_orc(const std::string& timezone, const int64_t end, std::vector& buffer_list) const { const auto& col_data = assert_cast(column).get_data(); - orc::StringVectorBatch* cur_batch = assert_cast(orc_col_batch); + auto* cur_batch = assert_cast(orc_col_batch); INIT_MEMORY_FOR_ORC_WRITER() diff --git a/be/src/vec/data_types/serde/data_type_ipv6_serde.h b/be/src/vec/data_types/serde/data_type_ipv6_serde.h index 2634c2efbef3d7..26d87ee5bf840c 100644 --- a/be/src/vec/data_types/serde/data_type_ipv6_serde.h +++ b/be/src/vec/data_types/serde/data_type_ipv6_serde.h @@ -65,8 +65,8 @@ class DataTypeIPv6SerDe : public DataTypeNumberSerDe { void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const override; - void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, + int64_t end, const cctz::time_zone& ctz) const override; void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; void write_one_cell_to_jsonb(const IColumn& column, JsonbWriterT& result, Arena* mem_pool, int unique_id, int64_t row_num) const override; diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp index e597cdba224376..88e8eb779ad271 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp @@ -102,7 +102,7 @@ Status DataTypeJsonbSerDe::serialize_one_cell_to_json(const IColumn& column, int Status DataTypeJsonbSerDe::deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const { DESERIALIZE_COLUMN_FROM_JSON_VECTOR(); return Status::OK(); diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h b/be/src/vec/data_types/serde/data_type_jsonb_serde.h index 5080b1ba46ed3c..f1fc1634b5c0b8 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h @@ -56,7 +56,7 @@ class DataTypeJsonbSerDe : public DataTypeStringSerDe { const FormatOptions& options) const override; Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const override; Status write_column_to_orc(const std::string& timezone, const IColumn& column, diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp b/be/src/vec/data_types/serde/data_type_map_serde.cpp index e6b641ee6c9528..7a4921623f3bda 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp @@ -44,7 +44,7 @@ Status DataTypeMapSerDe::serialize_one_cell_to_json(const IColumn& column, int64 ColumnPtr ptr = result.first; row_num = result.second; - const ColumnMap& map_column = assert_cast(*ptr); + const auto& map_column = assert_cast(*ptr); const ColumnArray::Offsets64& offsets = map_column.get_offsets(); size_t offset = offsets[row_num - 1]; @@ -113,7 +113,7 @@ Status DataTypeMapSerDe::deserialize_one_cell_from_hive_text( } } - int num_keys = 0, num_values = 0; + uint64_t num_keys = 0, num_values = 0; Status st; st = key_serde->deserialize_column_from_hive_text_vector( nested_key_column, key_slices, &num_keys, options, @@ -136,7 +136,7 @@ Status DataTypeMapSerDe::deserialize_one_cell_from_hive_text( } Status DataTypeMapSerDe::deserialize_column_from_hive_text_vector( - IColumn& column, std::vector& slices, int* num_deserialized, + IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options, int hive_text_complex_type_delimiter_level) const { DESERIALIZE_COLUMN_FROM_HIVE_TEXT_VECTOR(); return Status::OK(); @@ -149,7 +149,7 @@ Status DataTypeMapSerDe::serialize_one_cell_to_hive_text( ColumnPtr ptr = result.first; row_num = result.second; - const ColumnMap& map_column = assert_cast(*ptr); + const auto& map_column = assert_cast(*ptr); const ColumnArray::Offsets64& offsets = map_column.get_offsets(); size_t start = offsets[row_num - 1]; @@ -178,7 +178,7 @@ Status DataTypeMapSerDe::serialize_one_cell_to_hive_text( Status DataTypeMapSerDe::deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const { DESERIALIZE_COLUMN_FROM_JSON_VECTOR() return Status::OK(); @@ -314,7 +314,7 @@ Status DataTypeMapSerDe::deserialize_one_cell_from_json(IColumn& column, Slice& } void DataTypeMapSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const { - auto blob = static_cast(arg); + const auto* blob = static_cast(arg); column.deserialize_and_insert_from_arena(blob->getBlob()); } @@ -334,17 +334,17 @@ void DataTypeMapSerDe::write_column_to_arrow(const IColumn& column, const NullMa arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const { auto& builder = assert_cast(*array_builder); - auto& map_column = assert_cast(column); + const auto& map_column = assert_cast(column); const IColumn& nested_keys_column = map_column.get_keys(); const IColumn& nested_values_column = map_column.get_values(); // now we default set key value in map is nullable DCHECK(nested_keys_column.is_nullable()); DCHECK(nested_values_column.is_nullable()); - auto keys_nullmap_data = + const auto* keys_nullmap_data = check_and_get_column(nested_keys_column)->get_null_map_data().data(); - auto& offsets = map_column.get_offsets(); - auto key_builder = builder.key_builder(); - auto value_builder = builder.item_builder(); + const auto& offsets = map_column.get_offsets(); + auto* key_builder = builder.key_builder(); + auto* value_builder = builder.item_builder(); for (size_t r = start; r < end; ++r) { if ((null_map && (*null_map)[r])) { @@ -380,13 +380,13 @@ void DataTypeMapSerDe::write_column_to_arrow(const IColumn& column, const NullMa } void DataTypeMapSerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, - int start, int end, + int64_t start, int64_t end, const cctz::time_zone& ctz) const { auto& column_map = static_cast(column); auto& offsets_data = column_map.get_offsets(); - auto concrete_map = dynamic_cast(arrow_array); + const auto* concrete_map = dynamic_cast(arrow_array); auto arrow_offsets_array = concrete_map->offsets(); - auto arrow_offsets = dynamic_cast(arrow_offsets_array.get()); + auto* arrow_offsets = dynamic_cast(arrow_offsets_array.get()); auto prev_size = offsets_data.back(); auto arrow_nested_start_offset = arrow_offsets->Value(start); auto arrow_nested_end_offset = arrow_offsets->Value(end); @@ -405,7 +405,7 @@ Status DataTypeMapSerDe::_write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, int64_t row_idx, bool col_const, const FormatOptions& options) const { - auto& map_column = assert_cast(column); + const auto& map_column = assert_cast(column); const IColumn& nested_keys_column = map_column.get_keys(); const IColumn& nested_values_column = map_column.get_values(); bool is_key_string = remove_nullable(nested_keys_column.get_ptr())->is_column_string(); @@ -416,7 +416,7 @@ Status DataTypeMapSerDe::_write_column_to_mysql(const IColumn& column, if (0 != result.push_string("{", 1)) { return Status::InternalError("pack mysql buffer failed."); } - auto& offsets = map_column.get_offsets(); + const auto& offsets = map_column.get_offsets(); for (auto j = offsets[col_index - 1]; j < offsets[col_index]; ++j) { if (j != offsets[col_index - 1]) { if (0 != result.push_string(", ", 2)) { diff --git a/be/src/vec/data_types/serde/data_type_map_serde.h b/be/src/vec/data_types/serde/data_type_map_serde.h index 51c11300d44c01..7472d599377cac 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.h +++ b/be/src/vec/data_types/serde/data_type_map_serde.h @@ -49,7 +49,7 @@ class DataTypeMapSerDe : public DataTypeSerDe { const FormatOptions& options) const override; Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const override; Status deserialize_one_cell_from_hive_text( @@ -57,7 +57,7 @@ class DataTypeMapSerDe : public DataTypeSerDe { int hive_text_complex_type_delimiter_level = 1) const override; Status deserialize_column_from_hive_text_vector( - IColumn& column, std::vector& slices, int* num_deserialized, + IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options, int hive_text_complex_type_delimiter_level = 1) const override; @@ -75,8 +75,8 @@ class DataTypeMapSerDe : public DataTypeSerDe { void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const override; - void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, + int64_t end, const cctz::time_zone& ctz) const override; Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int64_t row_idx, bool col_const, diff --git a/be/src/vec/data_types/serde/data_type_nothing_serde.h b/be/src/vec/data_types/serde/data_type_nothing_serde.h index 7bf7d2e64314cc..255fbc1097ed1d 100644 --- a/be/src/vec/data_types/serde/data_type_nothing_serde.h +++ b/be/src/vec/data_types/serde/data_type_nothing_serde.h @@ -53,7 +53,7 @@ class DataTypeNothingSerde : public DataTypeSerDe { column.get_name()); } Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const override { return Status::NotSupported("deserialize_column_from_text_vector with type " + column.get_name()); @@ -83,8 +83,8 @@ class DataTypeNothingSerde : public DataTypeSerDe { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "write_column_to_arrow with type " + column.get_name()); } - void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const override { + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, + int64_t end, const cctz::time_zone& ctz) const override { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "read_column_from_arrow with type " + column.get_name()); } diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp index f21f160fb0a891..b8151dc650c214 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp @@ -73,7 +73,7 @@ Status DataTypeNullableSerDe::serialize_one_cell_to_json(const IColumn& column, } Status DataTypeNullableSerDe::deserialize_column_from_json_vector( - IColumn& column, std::vector& slices, int* num_deserialized, + IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options) const { DESERIALIZE_COLUMN_FROM_JSON_VECTOR(); return Status::OK(); @@ -122,14 +122,14 @@ Status DataTypeNullableSerDe::deserialize_one_cell_from_hive_text( } Status DataTypeNullableSerDe::deserialize_column_from_hive_text_vector( - IColumn& column, std::vector& slices, int* num_deserialized, + IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options, int hive_text_complex_type_delimiter_level) const { DESERIALIZE_COLUMN_FROM_HIVE_TEXT_VECTOR(); return Status::OK(); } Status DataTypeNullableSerDe::deserialize_column_from_fixed_json( - IColumn& column, Slice& slice, int rows, int* num_deserialized, + IColumn& column, Slice& slice, uint64_t rows, uint64_t* num_deserialized, const FormatOptions& options) const { if (rows < 1) [[unlikely]] { return Status::OK(); @@ -263,7 +263,7 @@ Status DataTypeNullableSerDe::read_column_from_pb(IColumn& column, const PValues void DataTypeNullableSerDe::write_one_cell_to_jsonb(const IColumn& column, JsonbWriter& result, Arena* mem_pool, int32_t col_id, int64_t row_num) const { - auto& nullable_col = assert_cast(column); + const auto& nullable_col = assert_cast(column); result.writeKey(cast_set(col_id)); if (nullable_col.is_null_at(row_num)) { result.writeNull(); @@ -298,11 +298,11 @@ void DataTypeNullableSerDe::write_column_to_arrow(const IColumn& column, const N } void DataTypeNullableSerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, - int start, int end, + int64_t start, int64_t end, const cctz::time_zone& ctz) const { auto& col = reinterpret_cast(column); NullMap& map_data = col.get_null_map_data(); - for (size_t i = start; i < end; ++i) { + for (auto i = start; i < end; ++i) { auto is_null = arrow_array->IsNull(i); map_data.emplace_back(is_null); } @@ -350,7 +350,7 @@ Status DataTypeNullableSerDe::write_column_to_orc(const std::string& timezone, std::vector& buffer_list) const { const auto& column_nullable = assert_cast(column); orc_col_batch->hasNulls = true; - auto& null_map_tmp = column_nullable.get_null_map_data(); + const auto& null_map_tmp = column_nullable.get_null_map_data(); auto orc_null_map = revert_null_map(&null_map_tmp, start, end); // orc_col_batch->notNull.data() must add 'start' (+ start), // because orc_col_batch->notNull.data() begins at 0 @@ -368,8 +368,8 @@ Status DataTypeNullableSerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, int64_t row_num) const { - auto& col = static_cast(column); - auto& nested_col = col.get_nested_column(); + const auto& col = static_cast(column); + const auto& nested_col = col.get_nested_column(); if (col.is_null_at(row_num)) { result.SetNull(); } else { diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h b/be/src/vec/data_types/serde/data_type_nullable_serde.h index 159db890540dbc..8ed0bb6826d416 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.h +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h @@ -44,18 +44,18 @@ class DataTypeNullableSerDe : public DataTypeSerDe { const FormatOptions& options) const override; Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const override; - Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, - int* num_deserialized, + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, uint64_t rows, + uint64_t* num_deserialized, const FormatOptions& options) const override; Status deserialize_one_cell_from_hive_text( IColumn& column, Slice& slice, const FormatOptions& options, int hive_text_complex_type_delimiter_level = 1) const override; Status deserialize_column_from_hive_text_vector( - IColumn& column, std::vector& slices, int* num_deserialized, + IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options, int hive_text_complex_type_delimiter_level = 1) const override; @@ -75,8 +75,8 @@ class DataTypeNullableSerDe : public DataTypeSerDe { void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const override; - void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, + int64_t end, const cctz::time_zone& ctz) const override; Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int64_t row_idx, bool col_const, const FormatOptions& options) const override; diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index 55c7b2c9505dae..62671543af33b7 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -77,7 +77,7 @@ void DataTypeNumberSerDe::write_column_to_arrow(const IColumn& column, const auto& col_data = assert_cast(column).get_data(); using ARROW_BUILDER_TYPE = typename TypeMapLookup::ValueType; auto arrow_null_map = revert_null_map(null_map, start, end); - auto arrow_null_map_data = arrow_null_map.empty() ? nullptr : arrow_null_map.data(); + auto* arrow_null_map_data = arrow_null_map.empty() ? nullptr : arrow_null_map.data(); if constexpr (std::is_same_v) { auto* null_builder = dynamic_cast(array_builder); if (null_builder) { @@ -86,7 +86,7 @@ void DataTypeNumberSerDe::write_column_to_arrow(const IColumn& column, const null_builder->type()->name()); } } else { - ARROW_BUILDER_TYPE& builder = assert_cast(*array_builder); + auto& builder = assert_cast(*array_builder); checkArrowStatus( builder.AppendValues(reinterpret_cast(col_data.data() + start), end - start, @@ -111,7 +111,7 @@ void DataTypeNumberSerDe::write_column_to_arrow(const IColumn& column, const } } else if constexpr (std::is_same_v || std::is_same_v) { } else { - ARROW_BUILDER_TYPE& builder = assert_cast(*array_builder); + auto& builder = assert_cast(*array_builder); checkArrowStatus( builder.AppendValues(col_data.data() + start, end - start, reinterpret_cast(arrow_null_map_data)), @@ -186,7 +186,7 @@ Status DataTypeNumberSerDe::serialize_one_cell_to_json(const IColumn& column, template Status DataTypeNumberSerDe::deserialize_column_from_json_vector( - IColumn& column, std::vector& slices, int* num_deserialized, + IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options) const { DESERIALIZE_COLUMN_FROM_JSON_VECTOR(); return Status::OK(); @@ -194,14 +194,14 @@ Status DataTypeNumberSerDe::deserialize_column_from_json_vector( template void DataTypeNumberSerDe::read_column_from_arrow(IColumn& column, - const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const { - int row_count = end - start; + const arrow::Array* arrow_array, int64_t start, + int64_t end, const cctz::time_zone& ctz) const { + auto row_count = end - start; auto& col_data = static_cast&>(column).get_data(); // now uint8 for bool if constexpr (std::is_same_v) { - auto concrete_array = dynamic_cast(arrow_array); + const auto* concrete_array = dynamic_cast(arrow_array); for (size_t bool_i = 0; bool_i != static_cast(concrete_array->length()); ++bool_i) { col_data.emplace_back(concrete_array->Value(bool_i)); } @@ -210,7 +210,7 @@ void DataTypeNumberSerDe::read_column_from_arrow(IColumn& column, // only for largeint(int128) type if (arrow_array->type_id() == arrow::Type::STRING) { - auto concrete_array = dynamic_cast(arrow_array); + const auto* concrete_array = dynamic_cast(arrow_array); std::shared_ptr buffer = concrete_array->value_data(); for (size_t offset_i = start; offset_i < end; ++offset_i) { @@ -238,7 +238,7 @@ void DataTypeNumberSerDe::read_column_from_arrow(IColumn& column, } template Status DataTypeNumberSerDe::deserialize_column_from_fixed_json( - IColumn& column, Slice& slice, int rows, int* num_deserialized, + IColumn& column, Slice& slice, uint64_t rows, uint64_t* num_deserialized, const FormatOptions& options) const { if (rows < 1) [[unlikely]] { return Status::OK(); @@ -255,7 +255,7 @@ Status DataTypeNumberSerDe::deserialize_column_from_fixed_json( template void DataTypeNumberSerDe::insert_column_last_value_multiple_times(IColumn& column, - int times) const { + uint64_t times) const { if (times < 1) [[unlikely]] { return; } @@ -340,7 +340,7 @@ Status DataTypeNumberSerDe::write_column_to_orc(const std::string& timezone, auto& col_data = assert_cast(column).get_data(); if constexpr (std::is_same_v) { // largeint - orc::StringVectorBatch* cur_batch = dynamic_cast(orc_col_batch); + auto* cur_batch = dynamic_cast(orc_col_batch); INIT_MEMORY_FOR_ORC_WRITER() diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h b/be/src/vec/data_types/serde/data_type_number_serde.h index 203cd9dbf46d67..3309b5d08ba69e 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.h +++ b/be/src/vec/data_types/serde/data_type_number_serde.h @@ -67,14 +67,14 @@ class DataTypeNumberSerDe : public DataTypeSerDe { const FormatOptions& options) const override; Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const override; - Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, - int* num_deserialized, + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, uint64_t rows, + uint64_t* num_deserialized, const FormatOptions& options) const override; - void insert_column_last_value_multiple_times(IColumn& column, int times) const override; + void insert_column_last_value_multiple_times(IColumn& column, uint64_t times) const override; Status write_column_to_pb(const IColumn& column, PValues& result, int64_t start, int64_t end) const override; @@ -88,8 +88,8 @@ class DataTypeNumberSerDe : public DataTypeSerDe { void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const override; - void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, + int64_t end, const cctz::time_zone& ctz) const override; Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int64_t row_idx, bool col_const, diff --git a/be/src/vec/data_types/serde/data_type_object_serde.h b/be/src/vec/data_types/serde/data_type_object_serde.h index c08d4d0af0d2c3..4ba144760fb94e 100644 --- a/be/src/vec/data_types/serde/data_type_object_serde.h +++ b/be/src/vec/data_types/serde/data_type_object_serde.h @@ -51,7 +51,7 @@ class DataTypeObjectSerDe : public DataTypeSerDe { column.get_name()); } Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const override { return Status::NotSupported("deserialize_column_from_text_vector with type " + column.get_name()); @@ -72,8 +72,8 @@ class DataTypeObjectSerDe : public DataTypeSerDe { void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const override; - void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const override { + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, + int64_t end, const cctz::time_zone& ctz) const override { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "read_column_from_arrow with type " + column.get_name()); } diff --git a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h index d3526ba389925f..b912257fb72314 100644 --- a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h +++ b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h @@ -70,7 +70,7 @@ class DataTypeQuantileStateSerDe : public DataTypeSerDe { } Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const override { return Status::NotSupported("deserialize_column_from_text_vector with type " + column.get_name()); @@ -131,8 +131,8 @@ class DataTypeQuantileStateSerDe : public DataTypeSerDe { } } } - void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const override { + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, + int64_t end, const cctz::time_zone& ctz) const override { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "read_column_from_arrow with type " + column.get_name()); } diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index 122a700cf9b20b..f54c4604499606 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -18,9 +18,9 @@ #pragma once #include -#include #include +#include #include #include #include @@ -31,12 +31,9 @@ #include "util/jsonb_writer.h" #include "util/mysql_row_buffer.h" #include "vec/columns/column_nullable.h" -#include "vec/common/pod_array.h" -#include "vec/common/pod_array_fwd.h" #include "vec/common/string_buffer.hpp" #include "vec/core/field.h" #include "vec/core/types.h" -#include "vec/io/reader_buffer.h" namespace arrow { class ArrayBuilder; @@ -171,7 +168,7 @@ class DataTypeSerDe { * null */ const char* null_format = "\\N"; - int null_len = 2; + size_t null_len = 2; /** * The wrapper char for string type in nested type. @@ -257,11 +254,11 @@ class DataTypeSerDe { const FormatOptions& options) const = 0; // deserialize text vector is to avoid virtual function call in complex type nested loop virtual Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const = 0; // deserialize fixed values.Repeatedly insert the value row times into the column. - virtual Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, - int* num_deserialized, + virtual Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, uint64_t rows, + uint64_t* num_deserialized, const FormatOptions& options) const { //In this function implementation, we need to consider the case where rows is 0, 1, and other larger integers. if (rows < 1) [[unlikely]] { @@ -279,7 +276,7 @@ class DataTypeSerDe { return Status::OK(); } // Insert the last value to the end of this column multiple times. - virtual void insert_column_last_value_multiple_times(IColumn& column, int times) const { + virtual void insert_column_last_value_multiple_times(IColumn& column, uint64_t times) const { if (times < 1) [[unlikely]] { return; } @@ -296,7 +293,7 @@ class DataTypeSerDe { return deserialize_one_cell_from_json(column, slice, options); }; virtual Status deserialize_column_from_hive_text_vector( - IColumn& column, std::vector& slices, int* num_deserialized, + IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options, int hive_text_complex_type_delimiter_level = 1) const { return deserialize_column_from_json_vector(column, slices, num_deserialized, options); }; @@ -335,8 +332,9 @@ class DataTypeSerDe { virtual void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const = 0; - virtual void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const = 0; + virtual void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, + int64_t start, int64_t end, + const cctz::time_zone& ctz) const = 0; // ORC serializer virtual Status write_column_to_orc(const std::string& timezone, const IColumn& column, @@ -384,7 +382,7 @@ inline static NullMap revert_null_map(const NullMap* null_bytemap, size_t start, } res.resize(end - start); - auto* __restrict src_data = (*null_bytemap).data(); + const auto* __restrict src_data = (*null_bytemap).data(); auto* __restrict res_data = res.data(); for (size_t i = 0; i < res.size(); ++i) { res_data[i] = !src_data[i + start]; diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h b/be/src/vec/data_types/serde/data_type_string_serde.h index 69a8cc2617191d..d0f428be67362b 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.h +++ b/be/src/vec/data_types/serde/data_type_string_serde.h @@ -198,7 +198,7 @@ class DataTypeStringSerDeBase : public DataTypeSerDe { } Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const override { DESERIALIZE_COLUMN_FROM_JSON_VECTOR() return Status::OK(); @@ -216,8 +216,8 @@ class DataTypeStringSerDeBase : public DataTypeSerDe { return Status::OK(); } - Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, - int* num_deserialized, + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, uint64_t rows, + uint64_t* num_deserialized, const FormatOptions& options) const override { if (rows < 1) [[unlikely]] { return Status::OK(); @@ -232,7 +232,7 @@ class DataTypeStringSerDeBase : public DataTypeSerDe { return Status::OK(); } - void insert_column_last_value_multiple_times(IColumn& column, int times) const override { + void insert_column_last_value_multiple_times(IColumn& column, uint64_t times) const override { if (times < 1) [[unlikely]] { return; } @@ -286,14 +286,14 @@ class DataTypeStringSerDeBase : public DataTypeSerDe { column.get_name(), array_builder->type()->name()); } } - void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const override { + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, + int64_t end, const cctz::time_zone& ctz) const override { if (arrow_array->type_id() == arrow::Type::STRING || arrow_array->type_id() == arrow::Type::BINARY) { const auto* concrete_array = dynamic_cast(arrow_array); std::shared_ptr buffer = concrete_array->value_data(); - for (size_t offset_i = start; offset_i < end; ++offset_i) { + for (auto offset_i = start; offset_i < end; ++offset_i) { if (!concrete_array->IsNull(offset_i)) { const auto* raw_data = buffer->data() + concrete_array->value_offset(offset_i); assert_cast(column).insert_data( diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp b/be/src/vec/data_types/serde/data_type_struct_serde.cpp index eb571dcc7a82d5..ead4d0b20881d9 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp @@ -35,7 +35,7 @@ std::optional DataTypeStructSerDe::try_get_position_by_name(const String size_t size = elem_serdes_ptrs.size(); for (size_t i = 0; i < size; ++i) { if (elem_names[i] == name) { - return std::optional(i); + return {i}; } } return std::nullopt; @@ -54,8 +54,7 @@ Status DataTypeStructSerDe::serialize_one_cell_to_json(const IColumn& column, in ColumnPtr ptr = result.first; row_num = result.second; - const ColumnStruct& struct_column = - assert_cast(*ptr); + const auto& struct_column = assert_cast(*ptr); bw.write('{'); for (int i = 0; i < struct_column.get_columns().size(); i++) { if (i != 0) { @@ -220,7 +219,7 @@ Status DataTypeStructSerDe::deserialize_one_cell_from_json(IColumn& column, Slic } Status DataTypeStructSerDe::deserialize_column_from_json_vector( - IColumn& column, std::vector& slices, int* num_deserialized, + IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options) const { DESERIALIZE_COLUMN_FROM_JSON_VECTOR() return Status::OK(); @@ -254,7 +253,7 @@ Status DataTypeStructSerDe::deserialize_one_cell_from_hive_text( if (options.escape_char != 0 && i > 0 && data[i - 1] == options.escape_char) { continue; } - slices.push_back({data + from, i - from}); + slices.emplace_back(data + from, i - from); from = i + 1; } } @@ -271,7 +270,7 @@ Status DataTypeStructSerDe::deserialize_one_cell_from_hive_text( } Status DataTypeStructSerDe::deserialize_column_from_hive_text_vector( - IColumn& column, std::vector& slices, int* num_deserialized, + IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options, int hive_text_complex_type_delimiter_level) const { DESERIALIZE_COLUMN_FROM_HIVE_TEXT_VECTOR(); return Status::OK(); @@ -284,8 +283,7 @@ Status DataTypeStructSerDe::serialize_one_cell_to_hive_text( ColumnPtr ptr = result.first; row_num = result.second; - const ColumnStruct& struct_column = - assert_cast(*ptr); + const auto& struct_column = assert_cast(*ptr); char collection_delimiter = options.get_collection_delimiter(hive_text_complex_type_delimiter_level); @@ -301,7 +299,7 @@ Status DataTypeStructSerDe::serialize_one_cell_to_hive_text( } void DataTypeStructSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const { - auto blob = static_cast(arg); + const auto* blob = static_cast(arg); column.deserialize_and_insert_from_arena(blob->getBlob()); } @@ -309,7 +307,7 @@ void DataTypeStructSerDe::write_column_to_arrow(const IColumn& column, const Nul arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const { auto& builder = assert_cast(*array_builder); - auto& struct_column = assert_cast(column); + const auto& struct_column = assert_cast(column); for (auto r = start; r < end; ++r) { if (null_map != nullptr && (*null_map)[r]) { checkArrowStatus(builder.AppendNull(), struct_column.get_name(), @@ -326,10 +324,10 @@ void DataTypeStructSerDe::write_column_to_arrow(const IColumn& column, const Nul } void DataTypeStructSerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, - int start, int end, + int64_t start, int64_t end, const cctz::time_zone& ctz) const { auto& struct_column = static_cast(column); - auto concrete_struct = dynamic_cast(arrow_array); + const auto* concrete_struct = dynamic_cast(arrow_array); DCHECK_EQ(struct_column.tuple_size(), concrete_struct->num_fields()); for (auto i = 0; i < struct_column.tuple_size(); ++i) { elem_serdes_ptrs[i]->read_column_from_arrow( @@ -342,7 +340,7 @@ Status DataTypeStructSerDe::_write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, int64_t row_idx, bool col_const, const FormatOptions& options) const { - auto& col = assert_cast(column); + const auto& col = assert_cast(column); const auto col_index = index_check_const(row_idx, col_const); result.open_dynamic_mode(); if (0 != result.push_string("{", 1)) { diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.h b/be/src/vec/data_types/serde/data_type_struct_serde.h index fc77e26b71f882..a1f4cdd3b57816 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.h +++ b/be/src/vec/data_types/serde/data_type_struct_serde.h @@ -18,13 +18,13 @@ #pragma once #include -#include -#include +#include #include "common/status.h" #include "data_type_serde.h" #include "util/jsonb_writer.h" +#include "vec/io/reader_buffer.h" namespace doris { class PValues; @@ -125,14 +125,14 @@ class DataTypeStructSerDe : public DataTypeSerDe { const FormatOptions& options) const override; Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, - int* num_deserialized, + uint64_t* num_deserialized, const FormatOptions& options) const override; Status deserialize_one_cell_from_hive_text( IColumn& column, Slice& slice, const FormatOptions& options, int hive_text_complex_type_delimiter_level = 1) const override; Status deserialize_column_from_hive_text_vector( - IColumn& column, std::vector& slices, int* num_deserialized, + IColumn& column, std::vector& slices, uint64_t* num_deserialized, const FormatOptions& options, int hive_text_complex_type_delimiter_level = 1) const override; Status serialize_one_cell_to_hive_text( @@ -150,8 +150,8 @@ class DataTypeStructSerDe : public DataTypeSerDe { void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int64_t start, int64_t end, const cctz::time_zone& ctz) const override; - void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, - int end, const cctz::time_zone& ctz) const override; + void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start, + int64_t end, const cctz::time_zone& ctz) const override; Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int64_t row_idx, bool col_const, diff --git a/be/src/vec/data_types/serde/data_type_time_serde.h b/be/src/vec/data_types/serde/data_type_time_serde.h index 0ebe79e522cbda..ee0946143eec9f 100644 --- a/be/src/vec/data_types/serde/data_type_time_serde.h +++ b/be/src/vec/data_types/serde/data_type_time_serde.h @@ -18,11 +18,10 @@ #pragma once #include #include -#include -#include + +#include #include "data_type_number_serde.h" -#include "vec/core/types.h" namespace doris { class JsonbOutStream; diff --git a/be/src/vec/exec/format/arrow/arrow_pip_input_stream.cpp b/be/src/vec/exec/format/arrow/arrow_pip_input_stream.cpp index 7bc32c7ab4bb91..6fb8877e0c503d 100644 --- a/be/src/vec/exec/format/arrow/arrow_pip_input_stream.cpp +++ b/be/src/vec/exec/format/arrow/arrow_pip_input_stream.cpp @@ -17,22 +17,17 @@ #include "arrow_pip_input_stream.h" -#include "arrow/array.h" +#include + #include "arrow/buffer.h" #include "arrow/io/buffered.h" -#include "arrow/io/stdio.h" -#include "arrow/ipc/options.h" -#include "arrow/ipc/reader.h" -#include "arrow/record_batch.h" #include "arrow/result.h" -#include "common/logging.h" -#include "io/fs/stream_load_pipe.h" -#include "runtime/runtime_state.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" ArrowPipInputStream::ArrowPipInputStream(io::FileReaderSPtr file_reader) - : _file_reader(file_reader), _pos(0), _begin(true), _read_buf(new uint8_t[4]) { + : _file_reader(std::move(file_reader)), _pos(0), _begin(true), _read_buf(new uint8_t[4]) { set_mode(arrow::io::FileMode::READ); } @@ -56,7 +51,7 @@ Status ArrowPipInputStream::HasNext(bool* get) { // and the `RecordBatchStreamReader::Open` function will directly report an error when it gets this buff Slice file_slice(_read_buf, 4); size_t read_length = 0; - RETURN_IF_ERROR(_file_reader->read_at(0, file_slice, &read_length, NULL)); + RETURN_IF_ERROR(_file_reader->read_at(0, file_slice, &read_length, nullptr)); if (read_length == 0) { *get = false; } else { @@ -68,7 +63,7 @@ Status ArrowPipInputStream::HasNext(bool* get) { arrow::Result ArrowPipInputStream::Read(int64_t nbytes, void* out) { // RecordBatchStreamReader::Open will create a new reader that will stream a batch of arrow data. // But the first four bytes of this batch of data were taken by the HasNext function, so they need to be copied back here. - uint8_t* out_ptr = (uint8_t*)out; + auto* out_ptr = (uint8_t*)out; if (_begin) { memmove(out_ptr, _read_buf, 4); out_ptr += 4; @@ -77,7 +72,7 @@ arrow::Result ArrowPipInputStream::Read(int64_t nbytes, void* out) { Slice file_slice(out_ptr, nbytes); size_t read_length = 0; - Status status = _file_reader->read_at(0, file_slice, &read_length, NULL); + Status status = _file_reader->read_at(0, file_slice, &read_length, nullptr); if (UNLIKELY(!status.ok())) { return arrow::Status::IOError("Error to read data from pip"); } @@ -97,4 +92,5 @@ arrow::Result> ArrowPipInputStream::Read(int64_t return buffer; } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/arrow/arrow_pip_input_stream.h b/be/src/vec/exec/format/arrow/arrow_pip_input_stream.h index fef4cf10903b4c..ab6bcf33a71f5c 100644 --- a/be/src/vec/exec/format/arrow/arrow_pip_input_stream.h +++ b/be/src/vec/exec/format/arrow/arrow_pip_input_stream.h @@ -17,32 +17,27 @@ #pragma once -#include #include #include -#include -#include -#include -#include #include "arrow/io/interfaces.h" -#include "io/file_factory.h" +#include "common/factory_creator.h" +#include "io/fs/file_reader.h" namespace doris { - namespace io { class FileSystem; struct IOContext; } // namespace io namespace vectorized { - +#include "common/compile_check_begin.h" class ArrowPipInputStream : public arrow::io::InputStream { ENABLE_FACTORY_CREATOR(ArrowPipInputStream); public: ArrowPipInputStream(io::FileReaderSPtr file_reader); - ~ArrowPipInputStream() override {} + ~ArrowPipInputStream() override = default; arrow::Status Close() override; bool closed() const override; @@ -63,4 +58,5 @@ class ArrowPipInputStream : public arrow::io::InputStream { }; } // namespace vectorized +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/vec/exec/format/arrow/arrow_stream_reader.cpp b/be/src/vec/exec/format/arrow/arrow_stream_reader.cpp index d1a1a9d35c8861..efe8e36bf20368 100644 --- a/be/src/vec/exec/format/arrow/arrow_stream_reader.cpp +++ b/be/src/vec/exec/format/arrow/arrow_stream_reader.cpp @@ -17,9 +17,7 @@ #include "arrow_stream_reader.h" -#include "arrow/array.h" #include "arrow/io/buffered.h" -#include "arrow/io/stdio.h" #include "arrow/ipc/options.h" #include "arrow/ipc/reader.h" #include "arrow/record_batch.h" @@ -29,9 +27,12 @@ #include "io/fs/stream_load_pipe.h" #include "runtime/descriptors.h" #include "runtime/runtime_state.h" +#include "vec/core/block.h" +#include "vec/core/column_with_type_and_name.h" #include "vec/utils/arrow_column_to_doris_column.h" namespace doris { +#include "common/compile_check_begin.h" class RuntimeProfile; } // namespace doris @@ -85,11 +86,11 @@ Status ArrowStreamReader::get_next_block(Block* block, size_t* read_rows, bool* // convert arrow batch to block auto columns = block->mutate_columns(); - int batch_size = out_batches.size(); - for (int i = 0; i < batch_size; i++) { + size_t batch_size = out_batches.size(); + for (size_t i = 0; i < batch_size; i++) { arrow::RecordBatch& batch = *out_batches[i]; - int num_rows = batch.num_rows(); - int num_columns = batch.num_columns(); + auto num_rows = batch.num_rows(); + auto num_columns = batch.num_columns(); for (int c = 0; c < num_columns; ++c) { arrow::Array* column = batch.column(c).get(); @@ -113,10 +114,11 @@ Status ArrowStreamReader::get_next_block(Block* block, size_t* read_rows, bool* Status ArrowStreamReader::get_columns(std::unordered_map* name_to_type, std::unordered_set* missing_cols) { - for (auto& slot : _file_slot_descs) { + for (const auto& slot : _file_slot_descs) { name_to_type->emplace(slot->col_name(), slot->type()); } return Status::OK(); } +#include "common/compile_check_end.h" } // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/exec/format/arrow/arrow_stream_reader.h b/be/src/vec/exec/format/arrow/arrow_stream_reader.h index eb0acca4aeec8e..830753326b9b16 100644 --- a/be/src/vec/exec/format/arrow/arrow_stream_reader.h +++ b/be/src/vec/exec/format/arrow/arrow_stream_reader.h @@ -18,7 +18,6 @@ #pragma once #include -#include #include #include #include @@ -30,19 +29,17 @@ #include "common/status.h" #include "io/file_factory.h" #include "io/fs/file_reader_writer_fwd.h" -#include "util/slice.h" -#include "vec/data_types/data_type.h" #include "vec/exec/format/file_reader/new_plain_text_line_reader.h" #include "vec/exec/format/generic_reader.h" namespace doris { - namespace io { class FileSystem; struct IOContext; } // namespace io namespace vectorized { +#include "common/compile_check_begin.h" struct ScannerCounter; class Block; @@ -73,4 +70,5 @@ class ArrowStreamReader : public GenericReader { cctz::time_zone _ctzz; }; } // namespace vectorized +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/vec/exec/format/avro/avro_jni_reader.cpp b/be/src/vec/exec/format/avro/avro_jni_reader.cpp index 03135aa5c94e8c..648155187e1725 100644 --- a/be/src/vec/exec/format/avro/avro_jni_reader.cpp +++ b/be/src/vec/exec/format/avro/avro_jni_reader.cpp @@ -24,6 +24,7 @@ #include "runtime/types.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" AvroJNIReader::AvroJNIReader(RuntimeState* state, RuntimeProfile* profile, const TFileScanRangeParams& params, @@ -48,7 +49,7 @@ Status AvroJNIReader::get_next_block(Block* block, size_t* read_rows, bool* eof) Status AvroJNIReader::get_columns(std::unordered_map* name_to_type, std::unordered_set* missing_cols) { - for (auto& desc : _file_slot_descs) { + for (const auto& desc : _file_slot_descs) { name_to_type->emplace(desc->col_name(), desc->type()); } return Status::OK(); @@ -61,7 +62,7 @@ Status AvroJNIReader::init_fetch_table_reader( std::ostringstream columns_types; std::vector column_names; int index = 0; - for (auto& desc : _file_slot_descs) { + for (const auto& desc : _file_slot_descs) { std::string field = desc->col_name(); column_names.emplace_back(field); std::string type = JniConnector::get_jni_type(desc->type()); @@ -96,7 +97,7 @@ Status AvroJNIReader::init_fetch_table_reader( return _jni_connector->open(_state, _profile); } -TFileType::type AvroJNIReader::get_file_type() { +TFileType::type AvroJNIReader::get_file_type() const { TFileType::type type; if (_range.__isset.file_type) { // for compatibility @@ -128,7 +129,7 @@ Status AvroJNIReader::get_parsed_schema(std::vector* col_names, if (document.IsArray()) { for (int i = 0; i < document.Size(); ++i) { rapidjson::Value& column_schema = document[i]; - col_names->push_back(column_schema["name"].GetString()); + col_names->emplace_back(column_schema["name"].GetString()); col_types->push_back(convert_to_doris_type(column_schema)); } } @@ -175,4 +176,5 @@ TypeDescriptor AvroJNIReader::convert_to_doris_type(const rapidjson::Value& colu } } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/avro/avro_jni_reader.h b/be/src/vec/exec/format/avro/avro_jni_reader.h index 82388f32915027..8e956ac714322b 100644 --- a/be/src/vec/exec/format/avro/avro_jni_reader.h +++ b/be/src/vec/exec/format/avro/avro_jni_reader.h @@ -18,9 +18,8 @@ #pragma once #include -#include -#include +#include #include #include #include @@ -32,9 +31,7 @@ namespace doris { class RuntimeProfile; - class RuntimeState; - class SlotDescriptor; namespace vectorized { class Block; @@ -43,7 +40,7 @@ struct TypeDescriptor; } // namespace doris namespace doris::vectorized { - +#include "common/compile_check_begin.h" /** * Read avro-format file */ @@ -73,7 +70,7 @@ class AvroJNIReader : public JniReader { Status init_fetch_table_reader( std::unordered_map* colname_to_value_range); - TFileType::type get_file_type(); + TFileType::type get_file_type() const; Status init_fetch_table_schema_reader(); @@ -88,4 +85,5 @@ class AvroJNIReader : public JniReader { std::unordered_map* _colname_to_value_range = nullptr; }; +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/column_type_convert.cpp b/be/src/vec/exec/format/column_type_convert.cpp index 0442158b690c39..08418c7e94a3a2 100644 --- a/be/src/vec/exec/format/column_type_convert.cpp +++ b/be/src/vec/exec/format/column_type_convert.cpp @@ -18,6 +18,7 @@ #include "vec/exec/format/column_type_convert.h" namespace doris::vectorized::converter { +#include "common/compile_check_begin.h" #define FOR_LOGICAL_NUMERIC_TYPES(M) \ M(TYPE_BOOLEAN) \ @@ -112,7 +113,7 @@ ColumnPtr ColumnTypeConverter::get_column(const TypeDescriptor& src_type, Column // In order to share null map between parquet converted src column and dst column to avoid copying. It is very tricky that will // call mutable function `doris_nullable_column->get_null_map_column_ptr()` which will set `_need_update_has_null = true`. // Because some operations such as agg will call `has_null()` to set `_need_update_has_null = false`. - auto doris_nullable_column = + auto* doris_nullable_column = const_cast(static_cast(dst_column.get())); return ColumnNullable::create(_cached_src_column, doris_nullable_column->get_null_map_column_ptr()); @@ -329,4 +330,5 @@ std::unique_ptr ColumnTypeConverter::get_converter( return std::make_unique(src_type, dst_type); } +#include "common/compile_check_end.h" } // namespace doris::vectorized::converter diff --git a/be/src/vec/exec/format/column_type_convert.h b/be/src/vec/exec/format/column_type_convert.h index d4a8186549ab1d..4c76a88f363d94 100644 --- a/be/src/vec/exec/format/column_type_convert.h +++ b/be/src/vec/exec/format/column_type_convert.h @@ -17,6 +17,10 @@ #pragma once +#include +#include + +#include "common/cast_set.h" #include "gutil/strings/numbers.h" #include "vec/columns/column_string.h" #include "vec/core/types.h" @@ -25,6 +29,7 @@ #include "vec/io/io_helper.h" namespace doris::vectorized::converter { +#include "common/compile_check_begin.h" template constexpr bool is_decimal_type_const() { @@ -234,9 +239,14 @@ struct SafeCastString { PrimitiveTypeTraits::ColumnType::value_type* value) { int32 cast_to_int = 0; bool can_cast = safe_strto32(startptr, buffer_size, &cast_to_int); - *value = cast_to_int; - return can_cast && cast_to_int <= std::numeric_limits::max() && - cast_to_int >= std::numeric_limits::min(); + if (can_cast && cast_to_int <= std::numeric_limits::max() && + cast_to_int >= std::numeric_limits::min()) { + // has checked the cast_to_int is in the range of int8 + *value = cast_set(cast_to_int); + return true; + } else { + return false; + } } }; @@ -247,9 +257,14 @@ struct SafeCastString { PrimitiveTypeTraits::ColumnType::value_type* value) { int32 cast_to_int = 0; bool can_cast = safe_strto32(startptr, buffer_size, &cast_to_int); - *value = cast_to_int; - return can_cast && cast_to_int <= std::numeric_limits::max() && - cast_to_int >= std::numeric_limits::min(); + if (can_cast && cast_to_int <= std::numeric_limits::max() && + cast_to_int >= std::numeric_limits::min()) { + // has checked the cast_to_int is in the range of int16 + *value = cast_set(cast_to_int); + return true; + } else { + return false; + } } }; @@ -364,7 +379,7 @@ class CastStringConverter : public ColumnTypeConverter { public: CastStringConverter() = default; - CastStringConverter(DataTypePtr dst_type_desc) : _dst_type_desc(dst_type_desc) {} + CastStringConverter(DataTypePtr dst_type_desc) : _dst_type_desc(std::move(dst_type_desc)) {} using DstCppType = typename PrimitiveTypeTraits::ColumnType::value_type; using DstColumnType = typename PrimitiveTypeTraits::ColumnType; @@ -389,14 +404,15 @@ class CastStringConverter : public ColumnTypeConverter { bool can_cast = false; if constexpr (is_decimal_type_const()) { can_cast = SafeCastDecimalString::safe_cast_string( - string_value.data, string_value.size, &value, + string_value.data, cast_set(string_value.size), &value, _dst_type_desc->get_precision(), _dst_type_desc->get_scale()); } else if constexpr (DstPrimitiveType == TYPE_DATETIMEV2) { can_cast = SafeCastString::safe_cast_string( - string_value.data, string_value.size, &value, _dst_type_desc->get_scale()); + string_value.data, cast_set(string_value.size), &value, + _dst_type_desc->get_scale()); } else { can_cast = SafeCastString::safe_cast_string( - string_value.data, string_value.size, &value); + string_value.data, cast_set(string_value.size), &value); } if (!can_cast) { if (null_map == nullptr) { @@ -488,7 +504,9 @@ class NumericToDecimalConverter : public ColumnTypeConverter { auto& v = reinterpret_cast(data[start_idx + i]); v = (DstNativeType)decimal_int128; } else { - data[start_idx + i] = DstCppType::from_int_frac(src_data[i], 0, _scale); + // TODO: check cast overflow + data[start_idx + i] = DstCppType::from_int_frac( + static_cast(src_data[i]), 0, _scale); } } @@ -521,8 +539,8 @@ class DecimalToNumericConverter : public ColumnTypeConverter { int64_t scale_factor = common::exp10_i64(_scale); for (int i = 0; i < rows; ++i) { if constexpr (DstPrimitiveType == TYPE_FLOAT || DstPrimitiveType == TYPE_DOUBLE) { - data[start_idx + i] = - static_cast(src_data[i].value / (double)scale_factor); + data[start_idx + i] = static_cast( + static_cast(src_data[i].value) / (double)scale_factor); } else { data[start_idx + i] = static_cast(src_data[i].value / scale_factor); } @@ -532,4 +550,5 @@ class DecimalToNumericConverter : public ColumnTypeConverter { } }; +#include "common/compile_check_end.h" } // namespace doris::vectorized::converter diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp b/be/src/vec/exec/format/csv/csv_reader.cpp index b58808413f48ab..b43b069f54d45f 100644 --- a/be/src/vec/exec/format/csv/csv_reader.cpp +++ b/be/src/vec/exec/format/csv/csv_reader.cpp @@ -26,7 +26,6 @@ #include #include #include -#include #include #include @@ -45,7 +44,6 @@ #include "runtime/types.h" #include "util/string_util.h" #include "util/utf8_check.h" -#include "vec/common/typeid_cast.h" #include "vec/core/block.h" #include "vec/core/column_with_type_and_name.h" #include "vec/data_types/data_type_factory.hpp" @@ -58,7 +56,6 @@ class RuntimeProfile; namespace vectorized { class IColumn; } // namespace vectorized - namespace io { struct IOContext; enum class FileCachePolicy : uint8_t; @@ -66,7 +63,7 @@ enum class FileCachePolicy : uint8_t; } // namespace doris namespace doris::vectorized { - +#include "common/compile_check_begin.h" const static Slice _s_null_slice = Slice("\\N"); void EncloseCsvTextFieldSplitter::do_split(const Slice& line, std::vector* splitted_values) { @@ -341,7 +338,7 @@ Status CsvReader::init_reader(bool is_load) { (_state != nullptr && _state->trim_tailing_spaces_for_external_table_query()); _options.escape_char = _escape; - if (_params.file_attributes.text_params.collection_delimiter.size() == 0) { + if (_params.file_attributes.text_params.collection_delimiter.empty()) { switch (_text_serde_type) { case TTextSerdeType::JSON_TEXT_SERDE: _options.collection_delim = ','; @@ -355,7 +352,7 @@ Status CsvReader::init_reader(bool is_load) { } else { _options.collection_delim = _params.file_attributes.text_params.collection_delimiter[0]; } - if (_params.file_attributes.text_params.mapkv_delimiter.size() == 0) { + if (_params.file_attributes.text_params.mapkv_delimiter.empty()) { switch (_text_serde_type) { case TTextSerdeType::JSON_TEXT_SERDE: _options.map_key_delim = ':'; @@ -475,7 +472,7 @@ Status CsvReader::init_reader(bool is_load) { } else { // For load task, the column order is same as file column order int i = 0; - for (auto& desc [[maybe_unused]] : _file_slot_descs) { + for (const auto& desc [[maybe_unused]] : _file_slot_descs) { _col_idxs.push_back(i++); } } @@ -575,7 +572,7 @@ Status CsvReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { Status CsvReader::get_columns(std::unordered_map* name_to_type, std::unordered_set* missing_cols) { - for (auto& slot : _file_slot_descs) { + for (const auto& slot : _file_slot_descs) { name_to_type->emplace(slot->col_name(), slot->type()); } return Status::OK(); @@ -795,7 +792,7 @@ void CsvReader::_split_line(const Slice& line) { Status CsvReader::_check_array_format(std::vector& split_values, bool* is_success) { // if not the array format, filter this line and return error url for (int j = 0; j < _file_slot_descs.size(); ++j) { - auto slot_desc = _file_slot_descs[j]; + auto* slot_desc = _file_slot_descs[j]; if (!slot_desc->is_materialized()) { continue; } @@ -884,7 +881,7 @@ Status CsvReader::_prepare_parse(size_t* read_line, bool* is_parse_name) { _not_trim_enclose = (!_trim_double_quotes && _enclose == '\"'); _options.converted_from_string = _trim_double_quotes; _options.escape_char = _escape; - if (_params.file_attributes.text_params.collection_delimiter.size() == 0) { + if (_params.file_attributes.text_params.collection_delimiter.empty()) { switch (_text_serde_type) { case TTextSerdeType::JSON_TEXT_SERDE: _options.collection_delim = ','; @@ -898,7 +895,7 @@ Status CsvReader::_prepare_parse(size_t* read_line, bool* is_parse_name) { } else { _options.collection_delim = _params.file_attributes.text_params.collection_delimiter[0]; } - if (_params.file_attributes.text_params.mapkv_delimiter.size() == 0) { + if (_params.file_attributes.text_params.mapkv_delimiter.empty()) { switch (_text_serde_type) { case TTextSerdeType::JSON_TEXT_SERDE: _options.collection_delim = ':'; @@ -976,8 +973,8 @@ Status CsvReader::_parse_col_names(std::vector* col_names) { } ptr = _remove_bom(ptr, size); _split_line(Slice(ptr, size)); - for (size_t idx = 0; idx < _split_values.size(); ++idx) { - col_names->emplace_back(_split_values[idx].to_string()); + for (auto _split_value : _split_values) { + col_names->emplace_back(_split_value.to_string()); } return Status::OK(); } @@ -1019,4 +1016,5 @@ Status CsvReader::close() { return Status::OK(); } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/csv/csv_reader.h b/be/src/vec/exec/format/csv/csv_reader.h index 8ffd704c6e4b2f..b59cd0523b42c8 100644 --- a/be/src/vec/exec/format/csv/csv_reader.h +++ b/be/src/vec/exec/format/csv/csv_reader.h @@ -19,14 +19,14 @@ #include #include -#include -#include #include +#include #include #include #include #include +#include #include #include "common/status.h" @@ -38,6 +38,7 @@ #include "vec/exec/format/generic_reader.h" namespace doris { +#include "common/compile_check_begin.h" class LineReader; class Decompressor; @@ -51,7 +52,6 @@ struct IOContext; struct TypeDescriptor; namespace vectorized { - struct ScannerCounter; class Block; @@ -73,7 +73,7 @@ class BaseLineFieldSplitter : public LineFieldSplitterIf { class CsvProtoFieldSplitter final : public BaseLineFieldSplitter { public: inline void split_line_impl(const Slice& line, std::vector* splitted_values) { - PDataRow** row_ptr = reinterpret_cast(line.data); + auto** row_ptr = reinterpret_cast(line.data); PDataRow* row = *row_ptr; for (const PDataColumn& col : row->col()) { splitted_values->emplace_back(col.value()); @@ -142,7 +142,7 @@ class EncloseCsvTextFieldSplitter : public BaseCsvTextFieldSplitter line_reader_ctx, size_t value_sep_len = 1, char trimming_char = 0) : BaseCsvTextFieldSplitter(trim_tailing_space, trim_ends, value_sep_len, trimming_char), - _text_line_reader_ctx(line_reader_ctx) {} + _text_line_reader_ctx(std::move(line_reader_ctx)) {} void do_split(const Slice& line, std::vector* splitted_values); @@ -153,10 +153,10 @@ class EncloseCsvTextFieldSplitter : public BaseCsvTextFieldSplitter { public: explicit PlainCsvTextFieldSplitter(bool trim_tailing_space, bool trim_ends, - const std::string& value_sep, size_t value_sep_len = 1, + std::string value_sep, size_t value_sep_len = 1, char trimming_char = 0) : BaseCsvTextFieldSplitter(trim_tailing_space, trim_ends, value_sep_len, trimming_char), - _value_sep(value_sep) { + _value_sep(std::move(value_sep)) { is_single_char_delim = (value_sep_len == 1); } @@ -173,10 +173,10 @@ class PlainCsvTextFieldSplitter : public BaseCsvTextFieldSplitter { public: explicit HiveCsvTextFieldSplitter(bool trim_tailing_space, bool trim_ends, - const string& value_sep, size_t value_sep_len = 1, + std::string value_sep, size_t value_sep_len = 1, char trimming_char = 0, char escape_char = 0) : BaseCsvTextFieldSplitter(trim_tailing_space, trim_ends, value_sep_len, trimming_char), - _value_sep(value_sep), + _value_sep(std::move(value_sep)), _escape_char(escape_char) {} void do_split(const Slice& line, std::vector* splitted_values); @@ -296,8 +296,8 @@ class CsvReader : public GenericReader { vectorized::DataTypeSerDeSPtrs _serdes; vectorized::DataTypeSerDe::FormatOptions _options; - int _value_separator_length; - int _line_delimiter_length; + size_t _value_separator_length; + size_t _line_delimiter_length; bool _trim_double_quotes = false; bool _trim_tailing_spaces = false; // `should_not_trim` is to manage the case that: user do not expect to trim double quotes but enclose is double quotes @@ -313,4 +313,5 @@ class CsvReader : public GenericReader { std::vector _use_nullable_string_opt; }; } // namespace vectorized +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/vec/exec/format/file_reader/new_plain_binary_line_reader.cpp b/be/src/vec/exec/format/file_reader/new_plain_binary_line_reader.cpp index 5623af2e183864..93cc19d7be146b 100644 --- a/be/src/vec/exec/format/file_reader/new_plain_binary_line_reader.cpp +++ b/be/src/vec/exec/format/file_reader/new_plain_binary_line_reader.cpp @@ -19,16 +19,19 @@ #include +#include + #include "io/fs/file_reader.h" #include "io/fs/stream_load_pipe.h" namespace doris { +#include "common/compile_check_begin.h" namespace io { struct IOContext; } // namespace io NewPlainBinaryLineReader::NewPlainBinaryLineReader(io::FileReaderSPtr file_reader) - : _file_reader(file_reader) {} + : _file_reader(std::move(file_reader)) {} NewPlainBinaryLineReader::~NewPlainBinaryLineReader() { close(); @@ -51,4 +54,5 @@ Status NewPlainBinaryLineReader::read_line(const uint8_t** ptr, size_t* size, bo return Status::OK(); } +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/vec/exec/format/file_reader/new_plain_binary_line_reader.h b/be/src/vec/exec/format/file_reader/new_plain_binary_line_reader.h index aa6a5e63ea7718..e81afb162c3439 100644 --- a/be/src/vec/exec/format/file_reader/new_plain_binary_line_reader.h +++ b/be/src/vec/exec/format/file_reader/new_plain_binary_line_reader.h @@ -17,9 +17,8 @@ #pragma once -#include -#include - +#include +#include #include #include "common/status.h" @@ -27,6 +26,7 @@ #include "io/fs/file_reader_writer_fwd.h" namespace doris { +#include "common/compile_check_begin.h" class PDataRow; namespace io { @@ -55,4 +55,5 @@ class NewPlainBinaryLineReader : public LineReader { std::unique_ptr _cur_row; }; +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp index ad86cca212b6ed..83bca5439ae944 100644 --- a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp +++ b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp @@ -19,16 +19,14 @@ #include #include -#include - #ifdef __AVX2__ #include #endif #include #include #include -#include #include +#include #include "exec/decompressor.h" #include "io/fs/file_reader.h" @@ -45,6 +43,7 @@ // leave these 2 size small for debugging namespace doris { +#include "common/compile_check_begin.h" const uint8_t* EncloseCsvLineReaderContext::read_line_impl(const uint8_t* start, const size_t length) { _total_len = length; @@ -192,11 +191,11 @@ NewPlainTextLineReader::NewPlainTextLineReader(RuntimeProfile* profile, TextLineReaderCtxPtr line_reader_ctx, size_t length, size_t current_offset) : _profile(profile), - _file_reader(file_reader), + _file_reader(std::move(file_reader)), _decompressor(decompressor), _min_length(length), _total_read_bytes(0), - _line_reader_ctx(line_reader_ctx), + _line_reader_ctx(std::move(line_reader_ctx)), _input_buf(new uint8_t[INPUT_CHUNK]), _input_buf_size(INPUT_CHUNK), _input_buf_pos(0), @@ -272,7 +271,7 @@ void NewPlainTextLineReader::extend_input_buf() { _input_buf_size = _input_buf_size * 2; } - uint8_t* new_input_buf = new uint8_t[_input_buf_size]; + auto* new_input_buf = new uint8_t[_input_buf_size]; memmove(new_input_buf, _input_buf + _input_buf_pos, input_buf_read_remaining()); delete[] _input_buf; @@ -309,7 +308,7 @@ void NewPlainTextLineReader::extend_output_buf() { _output_buf_size = _output_buf_size * 2; } - uint8_t* new_output_buf = new uint8_t[_output_buf_size]; + auto* new_output_buf = new uint8_t[_output_buf_size]; memmove(new_output_buf, _output_buf + _output_buf_pos, output_buf_read_remaining()); delete[] _output_buf; @@ -327,7 +326,7 @@ Status NewPlainTextLineReader::read_line(const uint8_t** ptr, size_t* size, bool return Status::OK(); } _line_reader_ctx->refresh(); - int found_line_delimiter = 0; + size_t found_line_delimiter = 0; size_t offset = 0; bool stream_end = true; while (!done()) { @@ -493,4 +492,5 @@ void NewPlainTextLineReader::_collect_profile_before_close() { } } +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h index c91b503cbe5c0d..e1c30607e1b3c3 100644 --- a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h +++ b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h @@ -16,11 +16,9 @@ // under the License. #pragma once -#include - #include +#include #include -#include #include #include #include @@ -29,9 +27,9 @@ #include "exec/line_reader.h" #include "io/fs/file_reader_writer_fwd.h" #include "util/runtime_profile.h" -#include "util/slice.h" namespace doris { +#include "common/compile_check_begin.h" namespace io { struct IOContext; } @@ -60,9 +58,9 @@ class BaseTextLineReaderContext : public TextLineReaderContextIf { using FindDelimiterFunc = const uint8_t* (*)(const uint8_t*, size_t, const char*, size_t); public: - explicit BaseTextLineReaderContext(const std::string& line_delimiter_, + explicit BaseTextLineReaderContext(std::string line_delimiter_, const size_t line_delimiter_len_, const bool keep_cr_) - : line_delimiter(line_delimiter_), + : line_delimiter(std::move(line_delimiter_)), line_delimiter_len(line_delimiter_len_), keep_cr(keep_cr_) { use_memmem = line_delimiter_len != 1 || line_delimiter != "\n" || keep_cr; @@ -113,14 +111,15 @@ class BaseTextLineReaderContext : public TextLineReaderContextIf { int mask_carriage_return = _mm256_movemask_epi8(cmp_carriage_return); if (mask_newline != 0 || mask_carriage_return != 0) { - int pos_lf = (mask_newline != 0) ? i + __builtin_ctz(mask_newline) : INT32_MAX; - int pos_cr = (mask_carriage_return != 0) ? i + __builtin_ctz(mask_carriage_return) - : INT32_MAX; + size_t pos_lf = (mask_newline != 0) ? i + __builtin_ctz(mask_newline) : INT32_MAX; + size_t pos_cr = (mask_carriage_return != 0) + ? i + __builtin_ctz(mask_carriage_return) + : INT32_MAX; if (pos_lf < pos_cr) { return start + pos_lf; } else if (pos_cr < pos_lf) { if (pos_lf != INT32_MAX) { - if (pos_lf - 1 >= 0 && start[pos_lf - 1] == '\r') { + if (pos_lf >= 1 && start[pos_lf - 1] == '\r') { //check xxx\r\r\r\nxxx line_crlf = true; return start + pos_lf - 1; @@ -202,15 +201,14 @@ class EncloseCsvLineReaderContext final public: explicit EncloseCsvLineReaderContext(const std::string& line_delimiter_, - const size_t line_delimiter_len_, - const std::string& column_sep_, + const size_t line_delimiter_len_, std::string column_sep_, const size_t column_sep_len_, size_t col_sep_num, const char enclose, const char escape, const bool keep_cr_) : BaseTextLineReaderContext(line_delimiter_, line_delimiter_len_, keep_cr_), _enclose(enclose), _escape(escape), _column_sep_len(column_sep_len_), - _column_sep(column_sep_) { + _column_sep(std::move(column_sep_)) { if (column_sep_len_ == 1) { find_col_sep_func = &EncloseCsvLineReaderContext::look_for_column_sep_pos; } else { @@ -227,7 +225,7 @@ class EncloseCsvLineReaderContext final _state.reset(); } - [[nodiscard]] inline const std::vector column_sep_positions() const { + [[nodiscard]] inline std::vector column_sep_positions() const { return _column_sep_positions; } @@ -337,4 +335,5 @@ class NewPlainTextLineReader : public LineReader { RuntimeProfile::Counter* _bytes_decompress_counter = nullptr; RuntimeProfile::Counter* _decompress_timer = nullptr; }; +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/vec/exec/format/format_common.h b/be/src/vec/exec/format/format_common.h index 3edf021ad2797a..5e62912fd27c0c 100644 --- a/be/src/vec/exec/format/format_common.h +++ b/be/src/vec/exec/format/format_common.h @@ -21,6 +21,7 @@ #include "vec/core/types.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" struct DecimalScaleParams { enum ScaleType { @@ -133,7 +134,7 @@ class ShardedKVCache { } private: - uint32_t _get_idx(const std::string& key) { + uint32_t _get_idx(const std::string& key) const { return (uint32_t)std::hash()(key) % _num_shards; } @@ -141,4 +142,5 @@ class ShardedKVCache { KVCache** _shards = nullptr; }; +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/generic_reader.h b/be/src/vec/exec/format/generic_reader.h index e32928e4b95de4..35abe1ed1d98c9 100644 --- a/be/src/vec/exec/format/generic_reader.h +++ b/be/src/vec/exec/format/generic_reader.h @@ -19,13 +19,14 @@ #include -#include "common/factory_creator.h" #include "common/status.h" +#include "runtime/descriptors.h" #include "runtime/types.h" #include "util/profile_collector.h" -#include "vec/exprs/vexpr_context.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" class Block; // This a reader interface for all file readers. @@ -49,7 +50,7 @@ class GenericReader : public ProfileCollector { std::vector* col_types) { return Status::NotSupported("get_parsed_schema is not implemented for this reader."); } - virtual ~GenericReader() = default; + ~GenericReader() override = default; /// If the underlying FileReader has filled the partition&missing columns, /// The FileScanner does not need to fill @@ -72,7 +73,8 @@ class GenericReader : public ProfileCollector { /// Whether the underlying FileReader has filled the partition&missing columns bool _fill_all_columns = false; - TPushAggOp::type _push_down_agg_type; + TPushAggOp::type _push_down_agg_type {}; }; +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/jni_reader.cpp b/be/src/vec/exec/format/jni_reader.cpp index 563f6cbea5181e..927b3cc2edd227 100644 --- a/be/src/vec/exec/format/jni_reader.cpp +++ b/be/src/vec/exec/format/jni_reader.cpp @@ -25,6 +25,7 @@ #include "vec/core/types.h" namespace doris { +#include "common/compile_check_begin.h" class RuntimeProfile; class RuntimeState; @@ -42,7 +43,7 @@ MockJniReader::MockJniReader(const std::vector& file_slot_descs std::ostringstream columns_types; std::vector column_names; int index = 0; - for (auto& desc : _file_slot_descs) { + for (const auto& desc : _file_slot_descs) { std::string field = desc->col_name(); std::string type = JniConnector::get_jni_type(desc->type()); column_names.emplace_back(field); @@ -72,7 +73,7 @@ Status MockJniReader::get_next_block(Block* block, size_t* read_rows, bool* eof) Status MockJniReader::get_columns(std::unordered_map* name_to_type, std::unordered_set* missing_cols) { - for (auto& desc : _file_slot_descs) { + for (const auto& desc : _file_slot_descs) { name_to_type->emplace(desc->col_name(), desc->type()); } return Status::OK(); @@ -84,4 +85,5 @@ Status MockJniReader::init_reader( RETURN_IF_ERROR(_jni_connector->init(colname_to_value_range)); return _jni_connector->open(_state, _profile); } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/jni_reader.h b/be/src/vec/exec/format/jni_reader.h index 3f156f598a0881..17f18a12f130df 100644 --- a/be/src/vec/exec/format/jni_reader.h +++ b/be/src/vec/exec/format/jni_reader.h @@ -17,8 +17,7 @@ #pragma once -#include - +#include #include #include #include @@ -31,6 +30,7 @@ #include "vec/exec/jni_connector.h" namespace doris { +#include "common/compile_check_begin.h" class RuntimeProfile; class RuntimeState; class SlotDescriptor; @@ -109,4 +109,5 @@ class MockJniReader : public JniReader { std::unordered_map* _colname_to_value_range; }; +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp b/be/src/vec/exec/format/json/new_json_reader.cpp index 8b3802b0af0645..9d40f1759a8e26 100644 --- a/be/src/vec/exec/format/json/new_json_reader.cpp +++ b/be/src/vec/exec/format/json/new_json_reader.cpp @@ -22,19 +22,18 @@ #include #include #include -#include #include #include #include #include #include // IWYU pragma: keep -#include -#include #include +#include +#include +#include #include #include -#include #include #include @@ -50,9 +49,7 @@ #include "runtime/descriptors.h" #include "runtime/runtime_state.h" #include "runtime/types.h" -#include "util/defer_op.h" #include "util/slice.h" -#include "util/uid_util.h" #include "vec/columns/column.h" #include "vec/columns/column_array.h" #include "vec/columns/column_map.h" @@ -60,12 +57,9 @@ #include "vec/columns/column_string.h" #include "vec/columns/column_struct.h" #include "vec/common/assert_cast.h" -#include "vec/common/typeid_cast.h" -#include "vec/core/block.h" #include "vec/core/column_with_type_and_name.h" #include "vec/exec/format/file_reader/new_plain_text_line_reader.h" #include "vec/exec/scan/vscanner.h" -#include "vec/json/simd_json_parser.h" namespace doris::io { struct IOContext; @@ -73,6 +67,7 @@ enum class FileCachePolicy : uint8_t; } // namespace doris::io namespace doris::vectorized { +#include "common/compile_check_begin.h" using namespace ErrorCode; NewJsonReader::NewJsonReader(RuntimeState* state, RuntimeProfile* profile, ScannerCounter* counter, @@ -807,7 +802,8 @@ Status NewJsonReader::_set_column_value(rapidjson::Value& objectValue, Block& bl } } else { it = objectValue.FindMember( - rapidjson::Value(slot_desc->col_name().c_str(), slot_desc->col_name().size())); + rapidjson::Value(slot_desc->col_name().c_str(), + cast_set(slot_desc->col_name().size()))); } if (it != objectValue.MemberEnd()) { @@ -977,7 +973,7 @@ Status NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator const auto& struct_value = value->GetObject(); auto sub_serdes = data_serde->get_nested_serdes(); - auto struct_column_ptr = assert_cast(data_column_ptr); + auto* struct_column_ptr = assert_cast(data_column_ptr); std::map sub_col_name_to_idx; for (size_t sub_col_idx = 0; sub_col_idx < sub_col_size; sub_col_idx++) { @@ -991,7 +987,7 @@ Status NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator "Json file struct column `{}` subfield name isn't a String", column_name); } - auto sub_key_char = sub.name.GetString(); + const auto* sub_key_char = sub.name.GetString(); auto sub_key_length = sub.name.GetStringLength(); std::string sub_key(sub_key_char, sub_key_length); @@ -1005,7 +1001,7 @@ Status NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator } for (size_t sub_col_idx = 0; sub_col_idx < sub_col_size; sub_col_idx++) { - auto sub_value = sub_values[sub_col_idx]; + const auto* sub_value = sub_values[sub_col_idx]; const auto& sub_col_type = type_desc.children[sub_col_idx]; @@ -1022,7 +1018,7 @@ Status NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator } const auto& object_value = value->GetObject(); auto sub_serdes = data_serde->get_nested_serdes(); - auto map_column_ptr = assert_cast(data_column_ptr); + auto* map_column_ptr = assert_cast(data_column_ptr); for (const auto& member_value : object_value) { RETURN_IF_ERROR(_write_data_to_column( @@ -1045,7 +1041,7 @@ Status NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator } const auto& array_value = value->GetArray(); auto sub_serdes = data_serde->get_nested_serdes(); - auto array_column_ptr = assert_cast(data_column_ptr); + auto* array_column_ptr = assert_cast(data_column_ptr); for (const auto& sub_value : array_value) { RETURN_IF_ERROR(_write_data_to_column(&sub_value, type_desc.children[0], @@ -1150,7 +1146,7 @@ std::string NewJsonReader::_print_json_value(const rapidjson::Value& value) { buffer.Clear(); rapidjson::Writer writer(buffer); value.Accept(writer); - return std::string(buffer.GetString()); + return {buffer.GetString()}; } Status NewJsonReader::_read_one_message(std::unique_ptr* file_buf, size_t* read_size) { @@ -1613,8 +1609,8 @@ Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val "partial update, missing key column: {}", slot_desc->col_name(), valid)); // remove this line in block - for (int i = 0; i < block.columns(); ++i) { - auto column = block.get_by_position(i).column->assume_mutable(); + for (size_t index = 0; index < block.columns(); ++index) { + auto column = block.get_by_position(index).column->assume_mutable(); if (column->size() != cur_row_count) { DCHECK(column->size() == cur_row_count + 1); column->pop_back(1); @@ -1699,7 +1695,7 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& auto sub_col_size = type_desc.children.size(); simdjson::ondemand::object struct_value = value.get_object(); auto sub_serdes = data_serde->get_nested_serdes(); - auto struct_column_ptr = assert_cast(data_column_ptr); + auto* struct_column_ptr = assert_cast(data_column_ptr); std::map sub_col_name_to_idx; for (size_t sub_col_idx = 0; sub_col_idx < sub_col_size; sub_col_idx++) { @@ -1732,7 +1728,7 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& //fill missing subcolumn for (size_t sub_col_idx = 0; sub_col_idx < sub_col_size; sub_col_idx++) { - if (has_value[sub_col_idx] == true) { + if (has_value[sub_col_idx]) { continue; } @@ -1754,17 +1750,17 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& simdjson::ondemand::object object_value = value.get_object(); auto sub_serdes = data_serde->get_nested_serdes(); - auto map_column_ptr = assert_cast(data_column_ptr); + auto* map_column_ptr = assert_cast(data_column_ptr); size_t field_count = 0; for (simdjson::ondemand::field member_value : object_value) { auto f = [](std::string_view key_view, const TypeDescriptor& type_desc, vectorized::IColumn* column_ptr, DataTypeSerDeSPtr serde, vectorized::DataTypeSerDe::FormatOptions serde_options, bool* valid) { - auto data_column_ptr = column_ptr; + auto* data_column_ptr = column_ptr; auto data_serde = serde; if (column_ptr->is_nullable()) { - auto nullable_column = static_cast(column_ptr); + auto* nullable_column = static_cast(column_ptr); nullable_column->get_null_map_data().push_back(0); data_column_ptr = nullable_column->get_nested_column().get_ptr().get(); @@ -1801,7 +1797,7 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& simdjson::ondemand::array array_value = value.get_array(); auto sub_serdes = data_serde->get_nested_serdes(); - auto array_column_ptr = assert_cast(data_column_ptr); + auto* array_column_ptr = assert_cast(data_column_ptr); int field_count = 0; for (simdjson::ondemand::value sub_value : array_value) { @@ -2158,4 +2154,5 @@ void NewJsonReader::_collect_profile_before_close() { } } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/json/new_json_reader.h b/be/src/vec/exec/format/json/new_json_reader.h index 482a1ced747cef..a9c006cb121b4a 100644 --- a/be/src/vec/exec/format/json/new_json_reader.h +++ b/be/src/vec/exec/format/json/new_json_reader.h @@ -23,8 +23,6 @@ #include #include #include // IWYU pragma: keep -#include -#include #include #include @@ -42,19 +40,13 @@ #include "vec/common/string_ref.h" #include "vec/core/types.h" #include "vec/exec/format/generic_reader.h" -#include "vec/json/json_parser.h" -#include "vec/json/simd_json_parser.h" -namespace simdjson { -namespace fallback { -namespace ondemand { +namespace simdjson::fallback::ondemand { class object; -} // namespace ondemand -} // namespace fallback -} // namespace simdjson +} // namespace simdjson::fallback::ondemand namespace doris { - +#include "common/compile_check_begin.h" class SlotDescriptor; class RuntimeState; class TFileRangeDesc; @@ -230,10 +222,10 @@ class NewJsonReader : public GenericReader { bool _skip_first_line; std::string _line_delimiter; - int _line_delimiter_length; + size_t _line_delimiter_length; - int _next_row; - int _total_rows; + uint32_t _next_row; + size_t _total_rows; std::string _jsonpaths; std::string _json_root; @@ -315,4 +307,5 @@ class NewJsonReader : public GenericReader { }; } // namespace vectorized +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/vec/exec/format/orc/orc_memory_pool.h b/be/src/vec/exec/format/orc/orc_memory_pool.h index 1df3d63f95291e..caccb47d9838ae 100644 --- a/be/src/vec/exec/format/orc/orc_memory_pool.h +++ b/be/src/vec/exec/format/orc/orc_memory_pool.h @@ -27,6 +27,7 @@ using ORC_MEMORY_ALLOCATOR = ORCMemoryAllocator; #endif namespace doris::vectorized { +#include "common/compile_check_begin.h" class ORCMemoryPool : public orc::MemoryPool { public: @@ -50,4 +51,5 @@ class ORCMemoryPool : public orc::MemoryPool { Allocator _allocator; }; +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index a488bef7455071..dbf035b6521b75 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -83,7 +83,6 @@ namespace doris { class RuntimeState; - namespace io { struct IOContext; enum class FileCachePolicy : uint8_t; @@ -91,7 +90,7 @@ enum class FileCachePolicy : uint8_t; } // namespace doris namespace doris::vectorized { - +#include "common/compile_check_begin.h" // TODO: we need to determine it by test. static constexpr uint32_t MAX_DICT_CODE_PREDICATE_TO_REWRITE = std::numeric_limits::max(); static constexpr char EMPTY_STRING_FOR_OVERFLOW[ColumnString::MAX_STRINGS_OVERFLOW_SIZE] = ""; @@ -317,7 +316,7 @@ Status OrcReader::init_reader( Status OrcReader::get_parsed_schema(std::vector* col_names, std::vector* col_types) { RETURN_IF_ERROR(_create_file_reader()); - auto& root_type = _is_acid ? _remove_acid(_reader->getType()) : _reader->getType(); + const auto& root_type = _is_acid ? _remove_acid(_reader->getType()) : _reader->getType(); for (int i = 0; i < root_type.getSubtypeCount(); ++i) { col_names->emplace_back(get_field_name_lower_case(&root_type, i)); col_types->emplace_back(convert_to_doris_type(root_type.getSubtype(i))); @@ -329,7 +328,7 @@ Status OrcReader::get_schema_col_name_attribute(std::vector* col_na std::vector* col_attributes, std::string attribute) { RETURN_IF_ERROR(_create_file_reader()); - auto& root_type = _is_acid ? _remove_acid(_reader->getType()) : _reader->getType(); + const auto& root_type = _is_acid ? _remove_acid(_reader->getType()) : _reader->getType(); for (int i = 0; i < root_type.getSubtypeCount(); ++i) { col_names->emplace_back(get_field_name_lower_case(&root_type, i)); col_attributes->emplace_back( @@ -339,7 +338,7 @@ Status OrcReader::get_schema_col_name_attribute(std::vector* col_na } Status OrcReader::_init_read_columns() { - auto& root_type = _reader->getType(); + const auto& root_type = _reader->getType(); std::vector orc_cols; std::vector orc_cols_lower_case; bool is_hive1_orc = false; @@ -350,7 +349,7 @@ Status OrcReader::_init_read_columns() { _is_hive1_orc_or_use_idx = (is_hive1_orc || _is_hive1_orc_or_use_idx) && _scan_params.__isset.slot_name_to_schema_pos; for (size_t i = 0; i < _column_names->size(); ++i) { - auto& col_name = (*_column_names)[i]; + const auto& col_name = (*_column_names)[i]; if (_is_hive1_orc_or_use_idx) { auto iter = _scan_params.slot_name_to_schema_pos.find(col_name); if (iter != _scan_params.slot_name_to_schema_pos.end()) { @@ -371,7 +370,7 @@ Status OrcReader::_init_read_columns() { if (iter == orc_cols_lower_case.end()) { _missing_cols.emplace_back(col_name); } else { - int pos = std::distance(orc_cols_lower_case.begin(), iter); + auto pos = std::distance(orc_cols_lower_case.begin(), iter); std::string read_col; if (_is_acid && i < _column_names->size() - TransactionalHive::READ_PARAMS.size()) { read_col = fmt::format( @@ -402,7 +401,7 @@ Status OrcReader::_init_read_columns() { void OrcReader::_init_orc_cols(const orc::Type& type, std::vector& orc_cols, std::vector& orc_cols_lower_case, std::unordered_map& type_map, - bool* is_hive1_orc) { + bool* is_hive1_orc) const { bool hive1_orc = true; for (int i = 0; i < type.getSubtypeCount(); ++i) { orc_cols.emplace_back(type.getFieldName(i)); @@ -658,7 +657,7 @@ bool OrcReader::_check_slot_can_push_down(const VExprSPtr& expr) { } // check if the literal of expr can be pushed down to orc reader and make orc literal -bool OrcReader::_check_literal_can_push_down(const VExprSPtr& expr, uint16_t child_id) { +bool OrcReader::_check_literal_can_push_down(const VExprSPtr& expr, size_t child_id) { if (!expr->children()[child_id]->is_literal()) { return false; } @@ -940,7 +939,7 @@ Status OrcReader::set_fill_columns( // std::unordered_map> std::unordered_map> predicate_columns; std::function visit_slot = [&](VExpr* expr) { - if (VSlotRef* slot_ref = typeid_cast(expr)) { + if (auto* slot_ref = typeid_cast(expr)) { auto expr_name = slot_ref->expr_name(); auto iter = _table_col_to_file_col.find(expr_name); if (iter != _table_col_to_file_col.end()) { @@ -952,24 +951,23 @@ Status OrcReader::set_fill_columns( _lazy_read_ctx.resize_first_column = false; } return; - } else if (VRuntimeFilterWrapper* runtime_filter = - typeid_cast(expr)) { - auto filter_impl = const_cast(runtime_filter->get_impl().get()); - if (VBloomPredicate* bloom_predicate = typeid_cast(filter_impl)) { - for (auto& child : bloom_predicate->children()) { + } else if (auto* runtime_filter = typeid_cast(expr)) { + auto* filter_impl = const_cast(runtime_filter->get_impl().get()); + if (auto* bloom_predicate = typeid_cast(filter_impl)) { + for (const auto& child : bloom_predicate->children()) { visit_slot(child.get()); } - } else if (VInPredicate* in_predicate = typeid_cast(filter_impl)) { + } else if (auto* in_predicate = typeid_cast(filter_impl)) { if (!in_predicate->children().empty()) { visit_slot(in_predicate->children()[0].get()); } } else { - for (auto& child : filter_impl->children()) { + for (const auto& child : filter_impl->children()) { visit_slot(child.get()); } } } else { - for (auto& child : expr->children()) { + for (const auto& child : expr->children()) { visit_slot(child.get()); } } @@ -981,7 +979,7 @@ Status OrcReader::set_fill_columns( for (auto& read_col : _read_cols_lower_case) { _lazy_read_ctx.all_read_columns.emplace_back(read_col); - if (predicate_columns.size() > 0) { + if (!predicate_columns.empty()) { auto iter = predicate_columns.find(read_col); if (iter == predicate_columns.end()) { _lazy_read_ctx.lazy_read_columns.emplace_back(read_col); @@ -996,7 +994,7 @@ Status OrcReader::set_fill_columns( } if (_tuple_descriptor != nullptr) { - for (auto& each : _tuple_descriptor->slots()) { + for (const auto& each : _tuple_descriptor->slots()) { PrimitiveType column_type = each->col_type(); if (column_type == TYPE_ARRAY || column_type == TYPE_MAP || column_type == TYPE_STRUCT) { @@ -1006,7 +1004,7 @@ Status OrcReader::set_fill_columns( } } - for (auto& kv : partition_columns) { + for (const auto& kv : partition_columns) { auto iter = predicate_columns.find(kv.first); if (iter == predicate_columns.end()) { _lazy_read_ctx.partition_columns.emplace(kv.first, kv.second); @@ -1016,7 +1014,7 @@ Status OrcReader::set_fill_columns( } } - for (auto& kv : missing_columns) { + for (const auto& kv : missing_columns) { auto iter = predicate_columns.find(kv.first); if (iter == predicate_columns.end()) { _lazy_read_ctx.missing_columns.emplace(kv.first, kv.second); @@ -1024,7 +1022,8 @@ Status OrcReader::set_fill_columns( //For check missing column : missing column == xx, missing column is null,missing column is not null. if (_slot_id_to_filter_conjuncts->find(iter->second.second) != _slot_id_to_filter_conjuncts->end()) { - for (auto& ctx : _slot_id_to_filter_conjuncts->find(iter->second.second)->second) { + for (const auto& ctx : + _slot_id_to_filter_conjuncts->find(iter->second.second)->second) { _filter_conjuncts.emplace_back(ctx); } } @@ -1035,9 +1034,8 @@ Status OrcReader::set_fill_columns( } } - if (!_has_complex_type && _enable_lazy_mat && - _lazy_read_ctx.predicate_columns.first.size() > 0 && - _lazy_read_ctx.lazy_read_columns.size() > 0) { + if (!_has_complex_type && _enable_lazy_mat && !_lazy_read_ctx.predicate_columns.first.empty() && + !_lazy_read_ctx.lazy_read_columns.empty()) { _lazy_read_ctx.can_lazy_read = true; } @@ -1115,7 +1113,7 @@ Status OrcReader::set_fill_columns( // create orc row reader if (_lazy_read_ctx.can_lazy_read) { _row_reader_options.filter(_lazy_read_ctx.predicate_orc_columns); - _orc_filter = std::unique_ptr(new ORCFilterImpl(this)); + _orc_filter = std::make_unique(this); } if (!_lazy_read_ctx.conjuncts.empty()) { _string_dict_filter = std::make_unique(this); @@ -1123,7 +1121,7 @@ Status OrcReader::set_fill_columns( _row_reader = _reader->createRowReader(_row_reader_options, _orc_filter.get(), _string_dict_filter.get()); _batch = _row_reader->createRowBatch(_batch_size); - auto& selected_type = _row_reader->getSelectedType(); + const auto& selected_type = _row_reader->getSelectedType(); int idx = 0; RETURN_IF_ERROR(_init_select_types(selected_type, idx)); @@ -1147,7 +1145,7 @@ Status OrcReader::set_fill_columns( auto& [value, slot_desc] = kv.second; auto iter = _slot_id_to_filter_conjuncts->find(slot_desc->id()); if (iter != _slot_id_to_filter_conjuncts->end()) { - for (auto& ctx : iter->second) { + for (const auto& ctx : iter->second) { _filter_conjuncts.push_back(ctx); } } @@ -1176,17 +1174,17 @@ Status OrcReader::_init_select_types(const orc::Type& type, int idx) { } Status OrcReader::_fill_partition_columns( - Block* block, size_t rows, + Block* block, uint64_t rows, const std::unordered_map>& partition_columns) { DataTypeSerDe::FormatOptions _text_formatOptions; - for (auto& kv : partition_columns) { + for (const auto& kv : partition_columns) { auto doris_column = block->get_by_name(kv.first).column; - IColumn* col_ptr = const_cast(doris_column.get()); - auto& [value, slot_desc] = kv.second; + auto* col_ptr = const_cast(doris_column.get()); + const auto& [value, slot_desc] = kv.second; auto _text_serde = slot_desc->get_data_type_ptr()->get_serde(); Slice slice(value.data(), value.size()); - int num_deserialized = 0; + uint64_t num_deserialized = 0; if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, rows, &num_deserialized, _text_formatOptions) != Status::OK()) { @@ -1206,9 +1204,9 @@ Status OrcReader::_fill_partition_columns( } Status OrcReader::_fill_missing_columns( - Block* block, size_t rows, + Block* block, uint64_t rows, const std::unordered_map& missing_columns) { - for (auto& kv : missing_columns) { + for (const auto& kv : missing_columns) { if (kv.second == nullptr) { // no default column, fill with null auto mutable_column = block->get_by_name(kv.first).column->assume_mutable(); @@ -1216,7 +1214,7 @@ Status OrcReader::_fill_missing_columns( nullable_column->insert_many_defaults(rows); } else { // fill with default value - auto& ctx = kv.second; + const auto& ctx = kv.second; auto origin_column_num = block->columns(); int result_column_id = -1; // PT1 => dest primitive type @@ -1275,40 +1273,40 @@ void OrcReader::_init_file_description() { TypeDescriptor OrcReader::convert_to_doris_type(const orc::Type* orc_type) { switch (orc_type->getKind()) { case orc::TypeKind::BOOLEAN: - return TypeDescriptor(PrimitiveType::TYPE_BOOLEAN); + return {PrimitiveType::TYPE_BOOLEAN}; case orc::TypeKind::BYTE: - return TypeDescriptor(PrimitiveType::TYPE_TINYINT); + return {PrimitiveType::TYPE_TINYINT}; case orc::TypeKind::SHORT: - return TypeDescriptor(PrimitiveType::TYPE_SMALLINT); + return {PrimitiveType::TYPE_SMALLINT}; case orc::TypeKind::INT: - return TypeDescriptor(PrimitiveType::TYPE_INT); + return {PrimitiveType::TYPE_INT}; case orc::TypeKind::LONG: - return TypeDescriptor(PrimitiveType::TYPE_BIGINT); + return {PrimitiveType::TYPE_BIGINT}; case orc::TypeKind::FLOAT: - return TypeDescriptor(PrimitiveType::TYPE_FLOAT); + return {PrimitiveType::TYPE_FLOAT}; case orc::TypeKind::DOUBLE: - return TypeDescriptor(PrimitiveType::TYPE_DOUBLE); + return {PrimitiveType::TYPE_DOUBLE}; case orc::TypeKind::STRING: - return TypeDescriptor(PrimitiveType::TYPE_STRING); + return {PrimitiveType::TYPE_STRING}; case orc::TypeKind::BINARY: - return TypeDescriptor(PrimitiveType::TYPE_STRING); + return {PrimitiveType::TYPE_STRING}; case orc::TypeKind::TIMESTAMP: - return TypeDescriptor(PrimitiveType::TYPE_DATETIMEV2); + return {PrimitiveType::TYPE_DATETIMEV2}; case orc::TypeKind::DECIMAL: if (orc_type->getPrecision() == 0) { return TypeDescriptor::create_decimalv3_type(decimal_precision_for_hive11, decimal_scale_for_hive11); } - return TypeDescriptor::create_decimalv3_type(orc_type->getPrecision(), - orc_type->getScale()); + return TypeDescriptor::create_decimalv3_type(cast_set(orc_type->getPrecision()), + cast_set(orc_type->getScale())); case orc::TypeKind::DATE: - return TypeDescriptor(PrimitiveType::TYPE_DATEV2); + return {PrimitiveType::TYPE_DATEV2}; case orc::TypeKind::VARCHAR: - return TypeDescriptor::create_varchar_type(orc_type->getMaximumLength()); + return TypeDescriptor::create_varchar_type(cast_set(orc_type->getMaximumLength())); case orc::TypeKind::CHAR: - return TypeDescriptor::create_char_type(orc_type->getMaximumLength()); + return TypeDescriptor::create_char_type(cast_set(orc_type->getMaximumLength())); case orc::TypeKind::TIMESTAMP_INSTANT: - return TypeDescriptor(PrimitiveType::TYPE_DATETIMEV2); + return {PrimitiveType::TYPE_DATETIMEV2}; case orc::TypeKind::LIST: { TypeDescriptor list_type(PrimitiveType::TYPE_ARRAY); list_type.add_sub_type(convert_to_doris_type(orc_type->getSubtype(0))); @@ -1329,13 +1327,13 @@ TypeDescriptor OrcReader::convert_to_doris_type(const orc::Type* orc_type) { return struct_type; } default: - return TypeDescriptor(PrimitiveType::INVALID_TYPE); + return {PrimitiveType::INVALID_TYPE}; } } Status OrcReader::get_columns(std::unordered_map* name_to_type, std::unordered_set* missing_cols) { - auto& root_type = _reader->getType(); + const auto& root_type = _reader->getType(); for (int i = 0; i < root_type.getSubtypeCount(); ++i) { name_to_type->emplace(get_field_name_lower_case(&root_type, i), convert_to_doris_type(root_type.getSubtype(i))); @@ -1358,10 +1356,10 @@ static inline size_t trim_right(const char* s, size_t size) { template Status OrcReader::_decode_string_column(const std::string& col_name, const MutableColumnPtr& data_column, - const orc::TypeKind& type_kind, orc::ColumnVectorBatch* cvb, - size_t num_values) { + const orc::TypeKind& type_kind, + const orc::ColumnVectorBatch* cvb, size_t num_values) { SCOPED_RAW_TIMER(&_statistics.decode_value_time); - auto* data = dynamic_cast(cvb); + const auto* data = dynamic_cast(cvb); if (data == nullptr) { return Status::InternalError( "Wrong data type for column '{}', expected EncodedStringVectorBatch", col_name); @@ -1379,7 +1377,7 @@ template Status OrcReader::_decode_string_non_dict_encoded_column(const std::string& col_name, const MutableColumnPtr& data_column, const orc::TypeKind& type_kind, - orc::EncodedStringVectorBatch* cvb, + const orc::EncodedStringVectorBatch* cvb, size_t num_values) { const static std::string empty_string; std::vector string_values; @@ -1425,7 +1423,7 @@ Status OrcReader::_decode_string_non_dict_encoded_column(const std::string& col_ } } if (!string_values.empty()) { - data_column->insert_many_strings(&string_values[0], num_values); + data_column->insert_many_strings(string_values.data(), num_values); } return Status::OK(); } @@ -1434,7 +1432,7 @@ template Status OrcReader::_decode_string_dict_encoded_column(const std::string& col_name, const MutableColumnPtr& data_column, const orc::TypeKind& type_kind, - orc::EncodedStringVectorBatch* cvb, + const orc::EncodedStringVectorBatch* cvb, size_t num_values) { std::vector string_values; size_t max_value_length = 0; @@ -1531,7 +1529,7 @@ Status OrcReader::_decode_string_dict_encoded_column(const std::string& col_name } } if (!string_values.empty()) { - data_column->insert_many_strings_overflow(&string_values[0], string_values.size(), + data_column->insert_many_strings_overflow(string_values.data(), string_values.size(), max_value_length); } return Status::OK(); @@ -1540,14 +1538,14 @@ Status OrcReader::_decode_string_dict_encoded_column(const std::string& col_name template Status OrcReader::_decode_int32_column(const std::string& col_name, const MutableColumnPtr& data_column, - orc::ColumnVectorBatch* cvb, size_t num_values) { + const orc::ColumnVectorBatch* cvb, size_t num_values) { SCOPED_RAW_TIMER(&_statistics.decode_value_time); - if (dynamic_cast(cvb) != nullptr) { + if (dynamic_cast(cvb) != nullptr) { return _decode_flat_column(col_name, data_column, cvb, num_values); - } else if (dynamic_cast(cvb) != nullptr) { - auto* data = static_cast(cvb); - auto* cvb_data = data->index.data(); + } else if (dynamic_cast(cvb) != nullptr) { + const auto* data = static_cast(cvb); + const auto* cvb_data = data->index.data(); auto& column_data = static_cast&>(*data_column).get_data(); auto origin_size = column_data.size(); column_data.resize(origin_size + num_values); @@ -1563,11 +1561,11 @@ Status OrcReader::_decode_int32_column(const std::string& col_name, Status OrcReader::_fill_doris_array_offsets(const std::string& col_name, ColumnArray::Offsets64& doris_offsets, - orc::DataBuffer& orc_offsets, + const orc::DataBuffer& orc_offsets, size_t num_values, size_t* element_size) { SCOPED_RAW_TIMER(&_statistics.decode_value_time); if (num_values > 0) { - if (orc_offsets.size() < num_values + 1) { + if (const_cast&>(orc_offsets).size() < num_values + 1) { return Status::InternalError("Wrong array offsets in orc file for column '{}'", col_name); } @@ -1588,7 +1586,7 @@ Status OrcReader::_fill_doris_data_column(const std::string& col_name, MutableColumnPtr& data_column, const DataTypePtr& data_type, const orc::Type* orc_column_type, - orc::ColumnVectorBatch* cvb, size_t num_values) { + const orc::ColumnVectorBatch* cvb, size_t num_values) { TypeIndex logical_type = remove_nullable(data_type)->get_type_id(); switch (logical_type) { #define DISPATCH(FlatType, CppType, OrcColumnType) \ @@ -1633,9 +1631,9 @@ Status OrcReader::_fill_doris_data_column(const std::string& col_name, "Wrong data type for column '{}', expected list, actual {}", col_name, orc_column_type->getKind()); } - auto* orc_list = dynamic_cast(cvb); + const auto* orc_list = dynamic_cast(cvb); auto& doris_offsets = static_cast(*data_column).get_offsets(); - auto& orc_offsets = orc_list->offsets; + const auto& orc_offsets = orc_list->offsets; size_t element_size = 0; RETURN_IF_ERROR(_fill_doris_array_offsets(col_name, doris_offsets, orc_offsets, num_values, &element_size)); @@ -1653,7 +1651,7 @@ Status OrcReader::_fill_doris_data_column(const std::string& col_name, return Status::InternalError("Wrong data type for column '{}', expected map, actual {}", col_name, orc_column_type->getKind()); } - auto* orc_map = dynamic_cast(cvb); + const auto* orc_map = dynamic_cast(cvb); auto& doris_map = static_cast(*data_column); size_t element_size = 0; RETURN_IF_ERROR(_fill_doris_array_offsets(col_name, doris_map.get_offsets(), @@ -1683,7 +1681,7 @@ Status OrcReader::_fill_doris_data_column(const std::string& col_name, "Wrong data type for column '{}', expected struct, actual {}", col_name, orc_column_type->getKind()); } - auto* orc_struct = dynamic_cast(cvb); + const auto* orc_struct = dynamic_cast(cvb); auto& doris_struct = static_cast(*data_column); std::map read_fields; std::set missing_fields; @@ -1737,7 +1735,8 @@ template Status OrcReader::_orc_column_to_doris_column(const std::string& col_name, ColumnPtr& doris_column, const DataTypePtr& data_type, const orc::Type* orc_column_type, - orc::ColumnVectorBatch* cvb, size_t num_values) { + const orc::ColumnVectorBatch* cvb, + size_t num_values) { TypeDescriptor src_type = convert_to_doris_type(orc_column_type); bool is_dict_filter_col = false; for (const std::pair& dict_col : _dict_filter_cols) { @@ -1777,7 +1776,7 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name, Colum auto origin_size = map_data_column.size(); map_data_column.resize(origin_size + num_values); if (cvb->hasNulls) { - auto* cvb_nulls = reinterpret_cast(cvb->notNull.data()); + const auto* cvb_nulls = cvb->notNull.data(); for (int i = 0; i < num_values; ++i) { map_data_column[origin_size + i] = !cvb_nulls[i]; } @@ -2070,7 +2069,7 @@ Status OrcReader::get_next_block_impl(Block* block, size_t* read_rows, bool* eof void OrcReader::_fill_batch_vec(std::vector& result, orc::ColumnVectorBatch* batch, int idx) { - for (auto* field : dynamic_cast(batch)->fields) { + for (auto* field : dynamic_cast(batch)->fields) { result.push_back(field); if (_is_acid && _col_orc_type[idx++]->getKind() == orc::TypeKind::STRUCT) { _fill_batch_vec(result, field, idx); @@ -2081,20 +2080,18 @@ void OrcReader::_fill_batch_vec(std::vector& result, void OrcReader::_build_delete_row_filter(const Block* block, size_t rows) { // transactional hive orc delete row if (_delete_rows != nullptr) { - _delete_rows_filter_ptr.reset(new IColumn::Filter(rows, 1)); + _delete_rows_filter_ptr = std::make_unique(rows, 1); auto* __restrict _pos_delete_filter_data = _delete_rows_filter_ptr->data(); - const ColumnInt64& original_transaction_column = - assert_cast(*remove_nullable( - block->get_by_name(TransactionalHive::ORIGINAL_TRANSACTION_LOWER_CASE) - .column)); - const ColumnInt32& bucket_id_column = assert_cast( + const auto& original_transaction_column = assert_cast(*remove_nullable( + block->get_by_name(TransactionalHive::ORIGINAL_TRANSACTION_LOWER_CASE).column)); + const auto& bucket_id_column = assert_cast( *remove_nullable(block->get_by_name(TransactionalHive::BUCKET_LOWER_CASE).column)); - const ColumnInt64& row_id_column = assert_cast( + const auto& row_id_column = assert_cast( *remove_nullable(block->get_by_name(TransactionalHive::ROW_ID_LOWER_CASE).column)); for (int i = 0; i < rows; ++i) { - Int64 original_transaction = original_transaction_column.get_int(i); - Int32 bucket_id = bucket_id_column.get_int(i); - Int64 row_id = row_id_column.get_int(i); + auto original_transaction = original_transaction_column.get_int(i); + auto bucket_id = bucket_id_column.get_int(i); + auto row_id = row_id_column.get_int(i); TransactionalHiveReader::AcidRowID transactional_row_id = {original_transaction, bucket_id, row_id}; @@ -2106,7 +2103,7 @@ void OrcReader::_build_delete_row_filter(const Block* block, size_t rows) { } Status OrcReader::filter(orc::ColumnVectorBatch& data, uint16_t* sel, uint16_t size, void* arg) { - Block* block = (Block*)arg; + auto* block = (Block*)arg; size_t origin_column_num = block->columns(); if (!_dict_cols_has_converted && !_dict_filter_cols.empty()) { @@ -2162,7 +2159,7 @@ Status OrcReader::filter(orc::ColumnVectorBatch& data, uint16_t* sel, uint16_t s // transactional hive orc delete row _build_delete_row_filter(block, size); - _filter.reset(new IColumn::Filter(size, 1)); + _filter = std::make_unique(size, 1); auto* __restrict result_filter_data = _filter->data(); bool can_filter_all = false; VExprContextSPtrs filter_conjuncts; @@ -2225,15 +2222,15 @@ Status OrcReader::fill_dict_filter_column_names( const std::list& predicate_col_names = _lazy_read_ctx.predicate_columns.first; const std::vector& predicate_col_slot_ids = _lazy_read_ctx.predicate_columns.second; int i = 0; - for (auto& predicate_col_name : predicate_col_names) { + for (const auto& predicate_col_name : predicate_col_names) { int slot_id = predicate_col_slot_ids[i]; if (_can_filter_by_dict(slot_id)) { - _dict_filter_cols.emplace_back(std::make_pair(predicate_col_name, slot_id)); + _dict_filter_cols.emplace_back(predicate_col_name, slot_id); column_names.emplace_back(_col_name_to_file_col_name[predicate_col_name]); } else { if (_slot_id_to_filter_conjuncts->find(slot_id) != _slot_id_to_filter_conjuncts->end()) { - for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) { + for (const auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) { _non_dict_filter_conjuncts.push_back(ctx); } } @@ -2246,7 +2243,7 @@ Status OrcReader::fill_dict_filter_column_names( bool OrcReader::_can_filter_by_dict(int slot_id) { SlotDescriptor* slot = nullptr; const std::vector& slots = _tuple_descriptor->slots(); - for (auto each : slots) { + for (auto* each : slots) { if (each->id() == slot_id) { slot = each; break; @@ -2273,19 +2270,13 @@ bool OrcReader::_can_filter_by_dict(int slot_id) { if (expr->node_type() != TExprNodeType::SLOT_REF) { return false; } - for (auto& child : expr->children()) { - if (!visit_function_call(child.get())) { - return false; - } - } - return true; + return std::ranges::all_of(expr->children(), [&](const auto& child) { + return visit_function_call(child.get()); + }); }; - for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) { - if (!visit_function_call(ctx->root().get())) { - return false; - } - } - return true; + return std::ranges::all_of(_slot_id_to_filter_conjuncts->at(slot_id), [&](const auto& ctx) { + return visit_function_call(ctx->root().get()); + }); } Status OrcReader::on_string_dicts_loaded( @@ -2300,7 +2291,7 @@ Status OrcReader::on_string_dicts_loaded( VExprContextSPtrs ctxs; auto iter = _slot_id_to_filter_conjuncts->find(slot_id); if (iter != _slot_id_to_filter_conjuncts->end()) { - for (auto& ctx : iter->second) { + for (const auto& ctx : iter->second) { ctxs.push_back(ctx); } } else { @@ -2343,7 +2334,7 @@ Status OrcReader::on_string_dicts_loaded( } dict_values.emplace_back(dict_value); } - dict_value_column->insert_many_strings_overflow(&dict_values[0], dict_values.size(), + dict_value_column->insert_many_strings_overflow(dict_values.data(), dict_values.size(), max_value_length); size_t dict_value_column_size = dict_value_column->size(); // 2. Build a temp block from the dict string column, then execute conjuncts and filter block. @@ -2351,7 +2342,7 @@ Status OrcReader::on_string_dicts_loaded( Block temp_block; int dict_pos = -1; int index = 0; - for (const auto slot_desc : _tuple_descriptor->slots()) { + for (auto* const slot_desc : _tuple_descriptor->slots()) { if (!slot_desc->need_materialize()) { // should be ignored from reading continue; @@ -2456,7 +2447,7 @@ Status OrcReader::_rewrite_dict_conjuncts(std::vector& dict_codes, int { SlotDescriptor* slot = nullptr; const std::vector& slots = _tuple_descriptor->slots(); - for (auto each : slots) { + for (auto* each : slots) { if (each->id() == slot_id) { slot = each; break; @@ -2487,15 +2478,15 @@ Status OrcReader::_rewrite_dict_conjuncts(std::vector& dict_codes, int std::shared_ptr hybrid_set( create_set(PrimitiveType::TYPE_INT, dict_codes.size())); - for (int j = 0; j < dict_codes.size(); ++j) { - hybrid_set->insert(&dict_codes[j]); + for (int& dict_code : dict_codes) { + hybrid_set->insert(&dict_code); } root = vectorized::VDirectInPredicate::create_shared(node, hybrid_set); } { SlotDescriptor* slot = nullptr; const std::vector& slots = _tuple_descriptor->slots(); - for (auto each : slots) { + for (auto* each : slots) { if (each->id() == slot_id) { slot = each; break; @@ -2526,10 +2517,9 @@ Status OrcReader::_convert_dict_cols_to_string_cols( return Status::InternalError("Wrong read column '{}' in orc file", dict_filter_cols.first); } - if (auto* nullable_column = check_and_get_column(*column)) { + if (const auto* nullable_column = check_and_get_column(*column)) { const ColumnPtr& nested_column = nullable_column->get_nested_column_ptr(); - const ColumnInt32* dict_column = - assert_cast(nested_column.get()); + const auto* dict_column = assert_cast(nested_column.get()); DCHECK(dict_column); const NullMap& null_map = nullable_column->get_null_map_data(); @@ -2548,7 +2538,7 @@ Status OrcReader::_convert_dict_cols_to_string_cols( pos, ColumnNullable::create(std::move(string_column), nullable_column->get_null_map_column_ptr())); } else { - const ColumnInt32* dict_column = assert_cast(column.get()); + const auto* dict_column = assert_cast(column.get()); MutableColumnPtr string_column; if (batch_vec != nullptr) { string_column = _convert_dict_column_to_string_column( @@ -2586,7 +2576,7 @@ MutableColumnPtr OrcReader::_convert_dict_column_to_string_column( if (orc_column_type->getKind() == orc::TypeKind::CHAR) { // Possibly there are some zero padding characters in CHAR type, we have to strip them off. if (null_map) { - auto* null_map_data = null_map->data(); + const auto* null_map_data = null_map->data(); for (int i = 0; i < num_values; ++i) { if (!null_map_data[i]) { char* val_ptr; @@ -2622,7 +2612,7 @@ MutableColumnPtr OrcReader::_convert_dict_column_to_string_column( } } else { if (null_map) { - auto* null_map_data = null_map->data(); + const auto* null_map_data = null_map->data(); for (int i = 0; i < num_values; ++i) { if (!null_map_data[i]) { char* val_ptr; @@ -2653,7 +2643,7 @@ MutableColumnPtr OrcReader::_convert_dict_column_to_string_column( } } if (!string_values.empty()) { - res->insert_many_strings_overflow(&string_values[0], num_values, max_value_length); + res->insert_many_strings_overflow(string_values.data(), num_values, max_value_length); } return res; } @@ -2675,12 +2665,12 @@ void ORCFileInputStream::beforeReadStripe( ++stream_id) { std::unique_ptr stream = current_strip_information->getStreamInformation(stream_id); - uint32_t columnId = stream->getColumnId(); + uint64_t columnId = stream->getColumnId(); uint64_t length = stream->getLength(); if (selected_columns[columnId]) { total_io_size += length; doris::io::PrefetchRange prefetch_range = {offset, offset + length}; - prefetch_ranges.emplace_back(std::move(prefetch_range)); + prefetch_ranges.emplace_back(prefetch_range); } offset += length; } @@ -2714,4 +2704,5 @@ void OrcReader::_execute_filter_position_delete_rowids(IColumn::Filter& filter) } } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index 6bbf3bead1efce..9cd8412db0ddca 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -59,7 +59,6 @@ namespace doris { class RuntimeState; class TFileRangeDesc; class TFileScanRangeParams; - namespace io { class FileSystem; struct IOContext; @@ -80,7 +79,7 @@ class DataBuffer; } // namespace orc namespace doris::vectorized { - +#include "common/compile_check_begin.h" class ORCFileInputStream; struct LazyReadContext { @@ -153,11 +152,11 @@ class OrcReader : public GenericReader { Status _init_select_types(const orc::Type& type, int idx); Status _fill_partition_columns( - Block* block, size_t rows, + Block* block, uint64_t rows, const std::unordered_map>& partition_columns); Status _fill_missing_columns( - Block* block, size_t rows, + Block* block, uint64_t rows, const std::unordered_map& missing_columns); Status get_next_block(Block* block, size_t* read_rows, bool* eof) override; @@ -249,7 +248,7 @@ class OrcReader : public GenericReader { StringDictFilterImpl(OrcReader* orc_reader) : _orc_reader(orc_reader) {} ~StringDictFilterImpl() override = default; - virtual void fillDictFilterColumnNames( + void fillDictFilterColumnNames( std::unique_ptr current_strip_information, std::list& column_names) const override { if (_status.ok()) { @@ -257,7 +256,7 @@ class OrcReader : public GenericReader { std::move(current_strip_information), column_names); } } - virtual void onStringDictsLoaded( + void onStringDictsLoaded( std::unordered_map& column_name_to_dict_map, bool* is_stripe_filtered) const override { if (_status.ok()) { @@ -285,7 +284,7 @@ class OrcReader : public GenericReader { void _init_orc_cols(const orc::Type& type, std::vector& orc_cols, std::vector& orc_cols_lower_case, std::unordered_map& type_map, - bool* is_hive1_orc); + bool* is_hive1_orc) const; static bool _check_acid_schema(const orc::Type& type); static const orc::Type& _remove_acid(const orc::Type& type); @@ -293,7 +292,7 @@ class OrcReader : public GenericReader { std::tuple _make_orc_literal( const VSlotRef* slot_ref, const VLiteral* literal); bool _check_slot_can_push_down(const VExprSPtr& expr); - bool _check_literal_can_push_down(const VExprSPtr& expr, uint16_t child_id); + bool _check_literal_can_push_down(const VExprSPtr& expr, size_t child_id); bool _check_rest_children_can_push_down(const VExprSPtr& expr); bool _check_expr_can_push_down(const VExprSPtr& expr); void _build_less_than(const VExprSPtr& expr, @@ -317,19 +316,19 @@ class OrcReader : public GenericReader { template Status _fill_doris_data_column(const std::string& col_name, MutableColumnPtr& data_column, const DataTypePtr& data_type, const orc::Type* orc_column_type, - orc::ColumnVectorBatch* cvb, size_t num_values); + const orc::ColumnVectorBatch* cvb, size_t num_values); template Status _orc_column_to_doris_column(const std::string& col_name, ColumnPtr& doris_column, const DataTypePtr& data_type, const orc::Type* orc_column_type, - orc::ColumnVectorBatch* cvb, size_t num_values); + const orc::ColumnVectorBatch* cvb, size_t num_values); template Status _decode_flat_column(const std::string& col_name, const MutableColumnPtr& data_column, - orc::ColumnVectorBatch* cvb, size_t num_values) { + const orc::ColumnVectorBatch* cvb, size_t num_values) { SCOPED_RAW_TIMER(&_statistics.decode_value_time); - OrcColumnType* data = dynamic_cast(cvb); + auto* data = dynamic_cast(cvb); if (data == nullptr) { return Status::InternalError("Wrong data type for column '{}', expected {}", col_name, cvb->toString()); @@ -355,12 +354,14 @@ class OrcReader : public GenericReader { auto dest_scale = decimal_type->get_scale(); if (dest_scale > orc_decimal_scale) { scale_params.scale_type = DecimalScaleParams::SCALE_UP; - scale_params.scale_factor = DecimalScaleParams::get_scale_factor( - dest_scale - orc_decimal_scale); + scale_params.scale_factor = + cast_set(DecimalScaleParams::get_scale_factor( + dest_scale - orc_decimal_scale)); } else if (dest_scale < orc_decimal_scale) { scale_params.scale_type = DecimalScaleParams::SCALE_DOWN; - scale_params.scale_factor = DecimalScaleParams::get_scale_factor( - orc_decimal_scale - dest_scale); + scale_params.scale_factor = + cast_set(DecimalScaleParams::get_scale_factor( + orc_decimal_scale - dest_scale)); } else { scale_params.scale_type = DecimalScaleParams::NO_SCALE; scale_params.scale_factor = 1; @@ -371,8 +372,8 @@ class OrcReader : public GenericReader { Status _decode_explicit_decimal_column(const std::string& col_name, const MutableColumnPtr& data_column, const DataTypePtr& data_type, - orc::ColumnVectorBatch* cvb, size_t num_values) { - OrcColumnType* data = dynamic_cast(cvb); + const orc::ColumnVectorBatch* cvb, size_t num_values) { + auto* data = dynamic_cast(cvb); if (data == nullptr) { return Status::InternalError("Wrong data type for column '{}', expected {}", col_name, cvb->toString()); @@ -398,8 +399,10 @@ class OrcReader : public GenericReader { if constexpr (std::is_same_v) { value = static_cast(cvb_data[i]); } else { - uint64_t hi = data->values[i].getHighBits(); - uint64_t lo = data->values[i].getLowBits(); + // cast data to non const + auto* non_const_data = const_cast(data); + uint64_t hi = non_const_data->values[i].getHighBits(); + uint64_t lo = non_const_data->values[i].getLowBits(); value = (((int128_t)hi) << 64) | (int128_t)lo; } value *= scale_params.scale_factor; @@ -412,8 +415,10 @@ class OrcReader : public GenericReader { if constexpr (std::is_same_v) { value = static_cast(cvb_data[i]); } else { - uint64_t hi = data->values[i].getHighBits(); - uint64_t lo = data->values[i].getLowBits(); + // cast data to non const + auto* non_const_data = const_cast(data); + uint64_t hi = non_const_data->values[i].getHighBits(); + uint64_t lo = non_const_data->values[i].getLowBits(); value = (((int128_t)hi) << 64) | (int128_t)lo; } value /= scale_params.scale_factor; @@ -426,8 +431,10 @@ class OrcReader : public GenericReader { if constexpr (std::is_same_v) { value = static_cast(cvb_data[i]); } else { - uint64_t hi = data->values[i].getHighBits(); - uint64_t lo = data->values[i].getLowBits(); + // cast data to non const + auto* non_const_data = const_cast(data); + uint64_t hi = non_const_data->values[i].getHighBits(); + uint64_t lo = non_const_data->values[i].getLowBits(); value = (((int128_t)hi) << 64) | (int128_t)lo; } auto& v = reinterpret_cast(column_data[origin_size + i]); @@ -439,14 +446,14 @@ class OrcReader : public GenericReader { template Status _decode_int32_column(const std::string& col_name, const MutableColumnPtr& data_column, - orc::ColumnVectorBatch* cvb, size_t num_values); + const orc::ColumnVectorBatch* cvb, size_t num_values); template Status _decode_decimal_column(const std::string& col_name, const MutableColumnPtr& data_column, - const DataTypePtr& data_type, orc::ColumnVectorBatch* cvb, + const DataTypePtr& data_type, const orc::ColumnVectorBatch* cvb, size_t num_values) { SCOPED_RAW_TIMER(&_statistics.decode_value_time); - if (dynamic_cast(cvb) != nullptr) { + if (dynamic_cast(cvb) != nullptr) { return _decode_explicit_decimal_column(col_name, data_column, data_type, cvb, num_values); @@ -459,9 +466,9 @@ class OrcReader : public GenericReader { template Status _decode_time_column(const std::string& col_name, const MutableColumnPtr& data_column, - orc::ColumnVectorBatch* cvb, size_t num_values) { + const orc::ColumnVectorBatch* cvb, size_t num_values) { SCOPED_RAW_TIMER(&_statistics.decode_value_time); - auto* data = dynamic_cast(cvb); + auto* data = dynamic_cast(cvb); if (data == nullptr) { return Status::InternalError("Wrong data type for column '{}', expected {}", col_name, cvb->toString()); @@ -482,7 +489,9 @@ class OrcReader : public GenericReader { continue; } } - int64_t date_value = data->data[i] + _offset_days; + + // because the date api argument is int32_t, we should cast to int32_t. + int32_t date_value = cast_set(data->data[i]) + _offset_days; if constexpr (std::is_same_v) { v.create_from_date_v2(date_dict[date_value], TIME_DATE); // we should cast to date if using date v1. @@ -508,26 +517,26 @@ class OrcReader : public GenericReader { template Status _decode_string_column(const std::string& col_name, const MutableColumnPtr& data_column, - const orc::TypeKind& type_kind, orc::ColumnVectorBatch* cvb, + const orc::TypeKind& type_kind, const orc::ColumnVectorBatch* cvb, size_t num_values); template Status _decode_string_non_dict_encoded_column(const std::string& col_name, const MutableColumnPtr& data_column, const orc::TypeKind& type_kind, - orc::EncodedStringVectorBatch* cvb, + const orc::EncodedStringVectorBatch* cvb, size_t num_values); template Status _decode_string_dict_encoded_column(const std::string& col_name, const MutableColumnPtr& data_column, const orc::TypeKind& type_kind, - orc::EncodedStringVectorBatch* cvb, + const orc::EncodedStringVectorBatch* cvb, size_t num_values); Status _fill_doris_array_offsets(const std::string& col_name, ColumnArray::Offsets64& doris_offsets, - orc::DataBuffer& orc_offsets, size_t num_values, + const orc::DataBuffer& orc_offsets, size_t num_values, size_t* element_size); void _collect_profile_on_close(); @@ -543,7 +552,7 @@ class OrcReader : public GenericReader { const NullMap* null_map, orc::ColumnVectorBatch* cvb, const orc::Type* orc_column_typ); - int64_t get_remaining_rows() { return _remaining_rows; } + int64_t get_remaining_rows() const { return _remaining_rows; } void set_remaining_rows(int64_t rows) { _remaining_rows = rows; } // check if the given name is like _col0, _col1, ... @@ -701,4 +710,5 @@ class ORCFileInputStream : public orc::InputStream, public ProfileCollector { const io::IOContext* _io_ctx = nullptr; RuntimeProfile* _profile = nullptr; }; +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/parquet/schema_desc.cpp b/be/src/vec/exec/format/parquet/schema_desc.cpp index 1eae65b1a4db19..ff327d2b0260be 100644 --- a/be/src/vec/exec/format/parquet/schema_desc.cpp +++ b/be/src/vec/exec/format/parquet/schema_desc.cpp @@ -150,7 +150,7 @@ Status FieldDescriptor::parse_from_thrift(const std::vector _name_to_field; // Used in from_thrift, marking the next schema position that should be parsed size_t _next_schema_pos; - std::unordered_map _field_id_name_mapping; + std::unordered_map _field_id_name_mapping; void parse_physical_field(const tparquet::SchemaElement& physical_schema, bool is_nullable, FieldSchema* physical_field); @@ -135,7 +135,7 @@ class FieldDescriptor { bool has_parquet_field_id() const { return _field_id_name_mapping.size() > 0; } - const doris::Slice get_column_name_from_field_id(int32_t id) const; + const doris::Slice get_column_name_from_field_id(uint64_t id) const; }; } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index a18626066b1811..396cf1c38b4f90 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -659,7 +659,7 @@ Status RowGroupReader::_fill_partition_columns( auto& [value, slot_desc] = kv.second; auto _text_serde = slot_desc->get_data_type_ptr()->get_serde(); Slice slice(value.data(), value.size()); - int num_deserialized = 0; + uint64_t num_deserialized = 0; // Be careful when reading empty rows from parquet row groups. if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, rows, &num_deserialized, diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_reader.cpp index 5b4d8a1a6cebc4..5267bda907a924 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp @@ -51,6 +51,7 @@ #include "vec/exec/format/parquet/vparquet_page_index.h" #include "vec/exprs/vbloom_predicate.h" #include "vec/exprs/vexpr.h" +#include "vec/exprs/vexpr_context.h" #include "vec/exprs/vin_predicate.h" #include "vec/exprs/vruntimefilter_wrapper.h" #include "vec/exprs/vslot_ref.h" diff --git a/be/src/vec/exec/format/table/equality_delete.cpp b/be/src/vec/exec/format/table/equality_delete.cpp index 94f807a408b8dd..bf31f81548cec0 100644 --- a/be/src/vec/exec/format/table/equality_delete.cpp +++ b/be/src/vec/exec/format/table/equality_delete.cpp @@ -17,7 +17,10 @@ #include "vec/exec/format/table/equality_delete.h" +#include "exprs/create_predicate_function.h" + namespace doris::vectorized { +#include "common/compile_check_begin.h" std::unique_ptr EqualityDeleteBase::get_delete_impl(Block* delete_block) { if (delete_block->columns() == 1) { @@ -65,7 +68,7 @@ Status SimpleEqualityDelete::filter_data_block(Block* data_block) { ->get_null_map_data(); _hybrid_set->find_batch_nullable( remove_nullable(column_and_type->column)->assume_mutable_ref(), rows, null_map, - *_filter.get()); + *_filter); if (_hybrid_set->contain_null()) { auto* filter_data = _filter->data(); for (size_t i = 0; i < rows; ++i) { @@ -73,8 +76,7 @@ Status SimpleEqualityDelete::filter_data_block(Block* data_block) { } } } else { - _hybrid_set->find_batch(column_and_type->column->assume_mutable_ref(), rows, - *_filter.get()); + _hybrid_set->find_batch(column_and_type->column->assume_mutable_ref(), rows, *_filter); } // should reverse _filter auto* filter_data = _filter->data(); @@ -82,7 +84,7 @@ Status SimpleEqualityDelete::filter_data_block(Block* data_block) { filter_data[i] = !filter_data[i]; } - Block::filter_block_internal(data_block, *_filter.get(), data_block->columns()); + Block::filter_block_internal(data_block, *_filter, data_block->columns()); return Status::OK(); } @@ -140,7 +142,7 @@ Status MultiEqualityDelete::filter_data_block(Block* data_block) { } } - Block::filter_block_internal(data_block, *_filter.get(), data_block->columns()); + Block::filter_block_internal(data_block, *_filter, data_block->columns()); return Status::OK(); } @@ -157,4 +159,5 @@ bool MultiEqualityDelete::_equal(Block* data_block, size_t data_row_index, return true; } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/equality_delete.h b/be/src/vec/exec/format/table/equality_delete.h index 6ac7d05a4c9818..fbcbcfdaa77fa7 100644 --- a/be/src/vec/exec/format/table/equality_delete.h +++ b/be/src/vec/exec/format/table/equality_delete.h @@ -15,11 +15,12 @@ // specific language governing permissions and limitations // under the License. -#include "exprs/create_predicate_function.h" +#include "exprs/hybrid_set.h" #include "util/runtime_profile.h" #include "vec/core/block.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" /** * Support Iceberg equality delete. @@ -102,4 +103,5 @@ class MultiEqualityDelete : public EqualityDeleteBase { Status filter_data_block(Block* data_block) override; }; +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/hudi_jni_reader.cpp b/be/src/vec/exec/format/table/hudi_jni_reader.cpp index cb109bf05a2393..824ceb7e5d7c5f 100644 --- a/be/src/vec/exec/format/table/hudi_jni_reader.cpp +++ b/be/src/vec/exec/format/table/hudi_jni_reader.cpp @@ -27,14 +27,13 @@ namespace doris { class RuntimeProfile; class RuntimeState; - namespace vectorized { class Block; } // namespace vectorized } // namespace doris namespace doris::vectorized { - +#include "common/compile_check_begin.h" const std::string HudiJniReader::HOODIE_CONF_PREFIX = "hoodie."; const std::string HudiJniReader::HADOOP_CONF_PREFIX = "hadoop_conf."; @@ -46,7 +45,7 @@ HudiJniReader::HudiJniReader(const TFileScanRangeParams& scan_params, _scan_params(scan_params), _hudi_params(hudi_params) { std::vector required_fields; - for (auto& desc : _file_slot_descs) { + for (const auto& desc : _file_slot_descs) { required_fields.emplace_back(desc->col_name()); } @@ -89,7 +88,7 @@ Status HudiJniReader::get_next_block(Block* block, size_t* read_rows, bool* eof) Status HudiJniReader::get_columns(std::unordered_map* name_to_type, std::unordered_set* missing_cols) { - for (auto& desc : _file_slot_descs) { + for (const auto& desc : _file_slot_descs) { name_to_type->emplace(desc->col_name(), desc->type()); } return Status::OK(); @@ -101,4 +100,5 @@ Status HudiJniReader::init_reader( RETURN_IF_ERROR(_jni_connector->init(colname_to_value_range)); return _jni_connector->open(_state, _profile); } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/hudi_jni_reader.h b/be/src/vec/exec/format/table/hudi_jni_reader.h index bfa0291a61035c..363e024dc9f4ae 100644 --- a/be/src/vec/exec/format/table/hudi_jni_reader.h +++ b/be/src/vec/exec/format/table/hudi_jni_reader.h @@ -38,7 +38,7 @@ struct TypeDescriptor; } // namespace doris namespace doris::vectorized { - +#include "common/compile_check_begin.h" class HudiJniReader : public JniReader { ENABLE_FACTORY_CREATOR(HudiJniReader); @@ -66,4 +66,5 @@ class HudiJniReader : public JniReader { std::unordered_map* _colname_to_value_range; }; +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/iceberg/arrow_schema_util.cpp b/be/src/vec/exec/format/table/iceberg/arrow_schema_util.cpp index 35a4d51b7f1959..77ac705076b466 100644 --- a/be/src/vec/exec/format/table/iceberg/arrow_schema_util.cpp +++ b/be/src/vec/exec/format/table/iceberg/arrow_schema_util.cpp @@ -20,8 +20,8 @@ #include #include -namespace doris { -namespace iceberg { +namespace doris::iceberg { +#include "common/compile_check_begin.h" const char* ArrowSchemaUtil::PARQUET_FIELD_ID = "PARQUET:field_id"; const char* ArrowSchemaUtil::ORIGINAL_TYPE = "originalType"; @@ -82,7 +82,7 @@ Status ArrowSchemaUtil::convert_to(const iceberg::NestedField& field, break; case iceberg::TypeID::DECIMAL: { - auto dt = dynamic_cast(field.field_type()); + auto* dt = dynamic_cast(field.field_type()); arrow_type = arrow::decimal(dt->get_precision(), dt->get_scale()); break; } @@ -130,5 +130,5 @@ Status ArrowSchemaUtil::convert_to(const iceberg::NestedField& field, return Status::OK(); } -} // namespace iceberg -} // namespace doris \ No newline at end of file +#include "common/compile_check_end.h" +} // namespace doris::iceberg diff --git a/be/src/vec/exec/format/table/iceberg/arrow_schema_util.h b/be/src/vec/exec/format/table/iceberg/arrow_schema_util.h index 20b7dbc627cc44..74c07fe301e0eb 100644 --- a/be/src/vec/exec/format/table/iceberg/arrow_schema_util.h +++ b/be/src/vec/exec/format/table/iceberg/arrow_schema_util.h @@ -19,12 +19,10 @@ #include -#include - #include "vec/exec/format/table/iceberg/schema.h" -namespace doris { -namespace iceberg { +namespace doris::iceberg { +#include "common/compile_check_begin.h" class ArrowSchemaUtil { public: @@ -41,5 +39,5 @@ class ArrowSchemaUtil { const std::string& timezone); }; -} // namespace iceberg -} // namespace doris +#include "common/compile_check_end.h" +} // namespace doris::iceberg diff --git a/be/src/vec/exec/format/table/iceberg/partition_spec.cpp b/be/src/vec/exec/format/table/iceberg/partition_spec.cpp index c3883fa6b81dab..e188c4a0e51600 100644 --- a/be/src/vec/exec/format/table/iceberg/partition_spec.cpp +++ b/be/src/vec/exec/format/table/iceberg/partition_spec.cpp @@ -22,10 +22,9 @@ #include #include "vec/exec/format/table/iceberg/schema.h" -#include "vec/exec/format/table/iceberg/struct_like.h" -namespace doris { -namespace iceberg { +namespace doris::iceberg { +#include "common/compile_check_begin.h" PartitionField::PartitionField(int source_id, int field_id, std::string name, std::string transform) : _source_id(source_id), @@ -67,5 +66,5 @@ PartitionSpec::PartitionSpec(std::shared_ptr schema, int spec_id, _fields(std::move(fields)), _last_assigned_field_id(last_assigned_field_id) {} -} // namespace iceberg -} // namespace doris +#include "common/compile_check_end.h" +} // namespace doris::iceberg diff --git a/be/src/vec/exec/format/table/iceberg/partition_spec.h b/be/src/vec/exec/format/table/iceberg/partition_spec.h index 19f27c70dd6f88..c3fe5d514fbc19 100644 --- a/be/src/vec/exec/format/table/iceberg/partition_spec.h +++ b/be/src/vec/exec/format/table/iceberg/partition_spec.h @@ -22,8 +22,8 @@ #include #include -namespace doris { -namespace iceberg { +namespace doris::iceberg { +#include "common/compile_check_begin.h" class StructLike; class Schema; @@ -78,7 +78,7 @@ class PartitionSpec { const Schema& schema() const { return *_schema; } - int spec_id() { return _spec_id; } + int spec_id() const { return _spec_id; } const std::vector& fields() const { return _fields; } @@ -93,5 +93,5 @@ class PartitionSpec { int _last_assigned_field_id; }; -} // namespace iceberg -} // namespace doris +#include "common/compile_check_end.h" +} // namespace doris::iceberg diff --git a/be/src/vec/exec/format/table/iceberg/partition_spec_parser.cpp b/be/src/vec/exec/format/table/iceberg/partition_spec_parser.cpp index 7a36d7f99b86f7..37b5b1ceed0a4c 100644 --- a/be/src/vec/exec/format/table/iceberg/partition_spec_parser.cpp +++ b/be/src/vec/exec/format/table/iceberg/partition_spec_parser.cpp @@ -21,8 +21,8 @@ #include "common/exception.h" -namespace doris { -namespace iceberg { +namespace doris::iceberg { +#include "common/compile_check_begin.h" const char* PartitionSpecParser::SPEC_ID = "spec-id"; const char* PartitionSpecParser::FIELDS = "fields"; const char* PartitionSpecParser::SOURCE_ID = "source-id"; @@ -49,7 +49,7 @@ std::unique_ptr PartitionSpecParser::from_json(const std::shared_ void PartitionSpecParser::_build_from_json_fields(UnboundPartitionSpec::Builder& builder, const rapidjson::Value& value) { DCHECK(value.IsArray()); - for (auto& element : value.GetArray()) { + for (const auto& element : value.GetArray()) { DCHECK(element.IsObject()); std::string name = element[NAME].GetString(); std::string transform = element[TRANSFORM].GetString(); @@ -63,5 +63,5 @@ void PartitionSpecParser::_build_from_json_fields(UnboundPartitionSpec::Builder& } } -} // namespace iceberg -} // namespace doris +#include "common/compile_check_end.h" +} // namespace doris::iceberg diff --git a/be/src/vec/exec/format/table/iceberg/partition_spec_parser.h b/be/src/vec/exec/format/table/iceberg/partition_spec_parser.h index 208e61eec78cad..145261b7001861 100644 --- a/be/src/vec/exec/format/table/iceberg/partition_spec_parser.h +++ b/be/src/vec/exec/format/table/iceberg/partition_spec_parser.h @@ -28,8 +28,8 @@ #include "vec/exec/format/table/iceberg/unbound_partition_spec.h" -namespace doris { -namespace iceberg { +namespace doris::iceberg { +#include "common/compile_check_begin.h" class PartitionSpec; class Schema; @@ -52,5 +52,5 @@ class PartitionSpecParser { const rapidjson::Value& value); }; -} // namespace iceberg -} // namespace doris +#include "common/compile_check_end.h" +} // namespace doris::iceberg diff --git a/be/src/vec/exec/format/table/iceberg/schema.cpp b/be/src/vec/exec/format/table/iceberg/schema.cpp index 0652621a1daa1e..d83d420ba97212 100644 --- a/be/src/vec/exec/format/table/iceberg/schema.cpp +++ b/be/src/vec/exec/format/table/iceberg/schema.cpp @@ -17,8 +17,8 @@ #include "vec/exec/format/table/iceberg/schema.h" -namespace doris { -namespace iceberg { +namespace doris::iceberg { +#include "common/compile_check_begin.h" const std::string Schema::ALL_COLUMNS = "*"; const int Schema::DEFAULT_SCHEMA_ID = 0; @@ -49,5 +49,5 @@ const NestedField* Schema::find_field(int id) const { return nullptr; } -} // namespace iceberg -} // namespace doris \ No newline at end of file +#include "common/compile_check_end.h" +} // namespace doris::iceberg diff --git a/be/src/vec/exec/format/table/iceberg/schema.h b/be/src/vec/exec/format/table/iceberg/schema.h index ce3a5afad4b1af..9a6266ec956d71 100644 --- a/be/src/vec/exec/format/table/iceberg/schema.h +++ b/be/src/vec/exec/format/table/iceberg/schema.h @@ -19,8 +19,8 @@ #include "vec/exec/format/table/iceberg/types.h" -namespace doris { -namespace iceberg { +namespace doris::iceberg { +#include "common/compile_check_begin.h" class Type; class StructType; @@ -51,5 +51,5 @@ class Schema { std::unordered_map _id_to_field; }; -} // namespace iceberg -} // namespace doris \ No newline at end of file +#include "common/compile_check_end.h" +} // namespace doris::iceberg diff --git a/be/src/vec/exec/format/table/iceberg/schema_parser.cpp b/be/src/vec/exec/format/table/iceberg/schema_parser.cpp index 49b2e2187edd54..9a76f6672af74e 100644 --- a/be/src/vec/exec/format/table/iceberg/schema_parser.cpp +++ b/be/src/vec/exec/format/table/iceberg/schema_parser.cpp @@ -20,14 +20,15 @@ #include #include +#include #include #include #include "vec/exec/format/table/iceberg/schema.h" #include "vec/exec/format/table/iceberg/types.h" -namespace doris { -namespace iceberg { +namespace doris::iceberg { +#include "common/compile_check_begin.h" const char* SchemaParser::SCHEMA_ID = "schema-id"; const char* SchemaParser::IDENTIFIER_FIELD_IDS = "identifier-field-ids"; @@ -77,7 +78,7 @@ std::unique_ptr SchemaParser::_struct_from_json(const rapidjson::Val std::vector fields; fields.reserve(field_array.Size()); - for (size_t i = 0; i < field_array.Size(); ++i) { + for (uint32_t i = 0; i < field_array.Size(); ++i) { const rapidjson::Value& field = field_array[i]; if (!field.IsObject()) { throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR, @@ -147,7 +148,7 @@ std::unordered_set SchemaParser::_get_integer_set(const char* key, if (value.HasMember(key) && value[key].IsArray()) { const rapidjson::Value& arr = value[key]; - for (size_t i = 0; i < arr.Size(); i++) { + for (uint32_t i = 0; i < arr.Size(); i++) { if (arr[i].IsInt()) { integer_set.insert(arr[i].GetInt()); } else { @@ -159,5 +160,5 @@ std::unordered_set SchemaParser::_get_integer_set(const char* key, return integer_set; } -} // namespace iceberg -} // namespace doris \ No newline at end of file +#include "common/compile_check_end.h" +} // namespace doris::iceberg diff --git a/be/src/vec/exec/format/table/iceberg/schema_parser.h b/be/src/vec/exec/format/table/iceberg/schema_parser.h index fa2f572d21bcf7..8d202e82c80362 100644 --- a/be/src/vec/exec/format/table/iceberg/schema_parser.h +++ b/be/src/vec/exec/format/table/iceberg/schema_parser.h @@ -22,8 +22,8 @@ #include #include -namespace doris { -namespace iceberg { +namespace doris::iceberg { +#include "common/compile_check_begin.h" class Type; class StructType; @@ -64,5 +64,5 @@ class SchemaParser { static std::unordered_set _get_integer_set(const char* key, const rapidjson::Value& value); }; -} // namespace iceberg -} // namespace doris +#include "common/compile_check_end.h" +} // namespace doris::iceberg diff --git a/be/src/vec/exec/format/table/iceberg/struct_like.h b/be/src/vec/exec/format/table/iceberg/struct_like.h index cf02e2d1eb8a25..7c967edf2a6abc 100644 --- a/be/src/vec/exec/format/table/iceberg/struct_like.h +++ b/be/src/vec/exec/format/table/iceberg/struct_like.h @@ -18,10 +18,8 @@ #pragma once #include -#include "vec/exec/format/table/iceberg/types.h" - -namespace doris { -namespace iceberg { +namespace doris::iceberg { +#include "common/compile_check_begin.h" class StructLike { public: @@ -30,5 +28,5 @@ class StructLike { virtual std::any get(size_t pos) const = 0; }; -} // namespace iceberg -} // namespace doris \ No newline at end of file +#include "common/compile_check_end.h" +} // namespace doris::iceberg diff --git a/be/src/vec/exec/format/table/iceberg/types.cpp b/be/src/vec/exec/format/table/iceberg/types.cpp index bf643655ab8810..b8020a68d66af8 100644 --- a/be/src/vec/exec/format/table/iceberg/types.cpp +++ b/be/src/vec/exec/format/table/iceberg/types.cpp @@ -18,9 +18,10 @@ #include "types.h" #include +#include -namespace doris { -namespace iceberg { +namespace doris::iceberg { +#include "common/compile_check_begin.h" std::unique_ptr MapType::of_optional(int key_id, int value_id, std::unique_ptr key_type, @@ -171,8 +172,8 @@ std::unique_ptr Types::from_primitive_string(const std::string& t } else if (lower_type_string == "binary") { return std::make_unique(); } else { - std::regex fixed("fixed\\[\\s*(\\d+)\\s*\\]"); - std::regex decimal("decimal\\(\\s*(\\d+)\\s*,\\s*(\\d+)\\s*\\)"); + std::regex fixed(R"(fixed\[\s*(\d+)\s*\])"); + std::regex decimal(R"(decimal\(\s*(\d+)\s*,\s*(\d+)\s*\))"); std::smatch match; if (std::regex_match(lower_type_string, match, fixed)) { @@ -191,5 +192,5 @@ std::unique_ptr Types::from_primitive_string(const std::string& t } } -} // namespace iceberg -} // namespace doris +#include "common/compile_check_end.h" +} // namespace doris::iceberg diff --git a/be/src/vec/exec/format/table/iceberg/types.h b/be/src/vec/exec/format/table/iceberg/types.h index 91a2f705df0d0b..6b4dd6aeebe695 100644 --- a/be/src/vec/exec/format/table/iceberg/types.h +++ b/be/src/vec/exec/format/table/iceberg/types.h @@ -18,17 +18,14 @@ #pragma once #include -#include #include -#include -#include #include #include #include "common/exception.h" -namespace doris { -namespace iceberg { +namespace doris::iceberg { +#include "common/compile_check_begin.h" class PrimitiveType; class StructType; @@ -115,25 +112,25 @@ class NestedField { class NestedType : public Type { public: - virtual ~NestedType() override = default; + ~NestedType() override = default; bool is_nested_type() override { return true; } NestedType* as_nested_type() override { return this; } - virtual int field_count() const = 0; + virtual size_t field_count() const = 0; virtual Type* field_type(const std::string& field_name) = 0; virtual const NestedField* field(int field_id) = 0; }; class PrimitiveType : public Type { public: - virtual ~PrimitiveType() override = default; + ~PrimitiveType() override = default; bool is_primitive_type() override { return true; } PrimitiveType* as_primitive_type() override { return this; } }; class MapType : public NestedType { public: - ~MapType() = default; + ~MapType() override = default; static std::unique_ptr of_optional(int key_id, int value_id, std::unique_ptr key_type, std::unique_ptr value_type); @@ -154,11 +151,11 @@ class MapType : public NestedType { MapType* as_map_type() override { return this; } - virtual int field_count() const override { return 2; } + size_t field_count() const override { return 2; } - virtual Type* field_type(const std::string& field_name) override; + Type* field_type(const std::string& field_name) override; - virtual const NestedField* field(int field_id) override; + const NestedField* field(int field_id) override; std::string to_string() const override; @@ -179,13 +176,13 @@ class MapType : public NestedType { class ListType : public NestedType { public: - ~ListType() = default; + ~ListType() override = default; static std::unique_ptr of_optional(int element_id, std::unique_ptr element_type); static std::unique_ptr of_required(int element_id, std::unique_ptr element_type); - virtual TypeID type_id() const override { return TypeID::LIST; } + TypeID type_id() const override { return TypeID::LIST; } bool is_list_type() override { return true; } @@ -193,13 +190,13 @@ class ListType : public NestedType { const NestedField& element_field() const { return _element_field; } - virtual std::string to_string() const override; + std::string to_string() const override; - virtual int field_count() const override { return 1; } + size_t field_count() const override { return 1; } - virtual Type* field_type(const std::string& field_name) override; + Type* field_type(const std::string& field_name) override; - virtual const NestedField* field(int field_id) override; + const NestedField* field(int field_id) override; private: ListType(NestedField element_field) : _element_field(std::move(element_field)) {} @@ -214,7 +211,7 @@ class ListType : public NestedType { class StructType : public NestedType { public: - ~StructType() = default; + ~StructType() override = default; StructType(std::vector fields) : _fields(std::move(fields)) { for (const NestedField& field : _fields) { _fields_by_id.insert({field.field_id(), &field}); @@ -224,19 +221,19 @@ class StructType : public NestedType { StructType(const StructType& other) {} - virtual TypeID type_id() const override { return TypeID::STRUCT; } + TypeID type_id() const override { return TypeID::STRUCT; } bool is_struct_type() override { return true; } StructType* as_struct_type() override { return this; } - virtual std::string to_string() const override; + std::string to_string() const override; - virtual int field_count() const override { return _fields.size(); } + size_t field_count() const override { return _fields.size(); } - virtual Type* field_type(const std::string& field_name) override; + Type* field_type(const std::string& field_name) override; - virtual const NestedField* field(int field_id) override { return _fields_by_id[field_id]; } + const NestedField* field(int field_id) override { return _fields_by_id[field_id]; } const std::vector& fields() const { return _fields; } @@ -254,11 +251,11 @@ class DecimalType : public PrimitiveType { int precision; public: - ~DecimalType() = default; + ~DecimalType() override = default; DecimalType(int p, int s) : scale(s), precision(p) {} - virtual TypeID type_id() const override { return TypeID::DECIMAL; } + TypeID type_id() const override { return TypeID::DECIMAL; } std::string to_string() const override { std::stringstream ss; @@ -273,7 +270,7 @@ class DecimalType : public PrimitiveType { class BinaryType : public PrimitiveType { public: - ~BinaryType() = default; + ~BinaryType() override = default; TypeID type_id() const override { return TypeID::BINARY; } @@ -282,10 +279,10 @@ class BinaryType : public PrimitiveType { class FixedType : public PrimitiveType { public: - ~FixedType() = default; + ~FixedType() override = default; FixedType(int len) : length(len) {} - virtual TypeID type_id() const override { return TypeID::FIXED; } + TypeID type_id() const override { return TypeID::FIXED; } std::string to_string() const override { std::stringstream ss; @@ -299,7 +296,7 @@ class FixedType : public PrimitiveType { class UUIDType : public PrimitiveType { public: - ~UUIDType() = default; + ~UUIDType() override = default; TypeID type_id() const override { return TypeID::UUID; } @@ -308,7 +305,7 @@ class UUIDType : public PrimitiveType { class StringType : public PrimitiveType { public: - ~StringType() = default; + ~StringType() override = default; TypeID type_id() const override { return TypeID::STRING; } @@ -317,11 +314,11 @@ class StringType : public PrimitiveType { class TimestampType : public PrimitiveType { public: - ~TimestampType() = default; + ~TimestampType() override = default; TimestampType(bool adjust_to_utc) : _adjust_to_utc(adjust_to_utc) {} - virtual TypeID type_id() const override { return TypeID::TIMESTAMP; } + TypeID type_id() const override { return TypeID::TIMESTAMP; } bool should_adjust_to_utc() const { return _adjust_to_utc; } @@ -339,7 +336,7 @@ class TimestampType : public PrimitiveType { class TimeType : public PrimitiveType { public: - ~TimeType() = default; + ~TimeType() override = default; TypeID type_id() const override { return TypeID::TIME; } @@ -348,7 +345,7 @@ class TimeType : public PrimitiveType { class DateType : public PrimitiveType { public: - ~DateType() = default; + ~DateType() override = default; TypeID type_id() const override { return TypeID::DATE; } @@ -357,7 +354,7 @@ class DateType : public PrimitiveType { class DoubleType : public PrimitiveType { public: - ~DoubleType() = default; + ~DoubleType() override = default; TypeID type_id() const override { return TypeID::DOUBLE; } @@ -373,7 +370,7 @@ class FloatType : public PrimitiveType { class LongType : public PrimitiveType { public: - ~LongType() = default; + ~LongType() override = default; TypeID type_id() const override { return TypeID::LONG; } @@ -382,7 +379,7 @@ class LongType : public PrimitiveType { class IntegerType : public PrimitiveType { public: - ~IntegerType() = default; + ~IntegerType() override = default; TypeID type_id() const override { return TypeID::INTEGER; } @@ -391,7 +388,7 @@ class IntegerType : public PrimitiveType { class BooleanType : public PrimitiveType { public: - ~BooleanType() = default; + ~BooleanType() override = default; TypeID type_id() const override { return TypeID::BOOLEAN; } @@ -403,5 +400,5 @@ class Types { static std::unique_ptr from_primitive_string(const std::string& type_string); }; -} // namespace iceberg -} // namespace doris +#include "common/compile_check_end.h" +} // namespace doris::iceberg diff --git a/be/src/vec/exec/format/table/iceberg/unbound_partition_spec.cpp b/be/src/vec/exec/format/table/iceberg/unbound_partition_spec.cpp index a2ff2c9784e068..2d3f47762e3b5d 100644 --- a/be/src/vec/exec/format/table/iceberg/unbound_partition_spec.cpp +++ b/be/src/vec/exec/format/table/iceberg/unbound_partition_spec.cpp @@ -22,8 +22,8 @@ #include "vec/exec/format/table/iceberg/partition_spec.h" #include "vec/exec/format/table/iceberg/schema.h" -namespace doris { -namespace iceberg { +namespace doris::iceberg { +#include "common/compile_check_begin.h" UnboundPartitionSpec::Builder& UnboundPartitionSpec::Builder::with_spec_id(int new_spec_id) { _spec_id = new_spec_id; @@ -62,14 +62,13 @@ std::unique_ptr UnboundPartitionSpec::_copy_to_builder( std::make_unique(schema); for (const auto& field : _fields) { if (field._partition_id != -1) { - builder->add(field._source_id, field._partition_id, std::move(field._name), - std::move(field._transform)); + builder->add(field._source_id, field._partition_id, field._name, field._transform); } else { - builder->add(field._source_id, std::move(field._name), std::move(field._transform)); + builder->add(field._source_id, field._name, field._transform); } } return builder; } -} // namespace iceberg -} // namespace doris \ No newline at end of file +#include "common/compile_check_end.h" +} // namespace doris::iceberg diff --git a/be/src/vec/exec/format/table/iceberg/unbound_partition_spec.h b/be/src/vec/exec/format/table/iceberg/unbound_partition_spec.h index cf24b8cb4d11a1..3a2bd92be0c798 100644 --- a/be/src/vec/exec/format/table/iceberg/unbound_partition_spec.h +++ b/be/src/vec/exec/format/table/iceberg/unbound_partition_spec.h @@ -23,8 +23,8 @@ #include "partition_spec.h" #include "vec/exec/format/table/iceberg/partition_spec.h" -namespace doris { -namespace iceberg { +namespace doris::iceberg { +#include "common/compile_check_begin.h" struct UnboundPartitionField { std::string _transform; @@ -71,5 +71,5 @@ class UnboundPartitionSpec { std::vector _fields; }; -} // namespace iceberg -} // namespace doris +#include "common/compile_check_end.h" +} // namespace doris::iceberg diff --git a/be/src/vec/exec/format/table/iceberg_reader.cpp b/be/src/vec/exec/format/table/iceberg_reader.cpp index 21a98f79acb171..a10358bbcbc0cd 100644 --- a/be/src/vec/exec/format/table/iceberg_reader.cpp +++ b/be/src/vec/exec/format/table/iceberg_reader.cpp @@ -55,6 +55,7 @@ #include "vec/exec/format/table/table_format_reader.h" namespace cctz { +#include "common/compile_check_begin.h" class time_zone; } // namespace cctz namespace doris { @@ -166,8 +167,8 @@ Status IcebergTableReader::init_row_filters(const TFileRangeDesc& range, io::IOC return Status::OK(); } - auto& table_desc = range.table_format_params.iceberg_params; - auto& version = table_desc.format_version; + const auto& table_desc = range.table_format_params.iceberg_params; + const auto& version = table_desc.format_version; if (version < MIN_SUPPORT_DELETE_FILES_VERSION) { return Status::OK(); } @@ -182,12 +183,12 @@ Status IcebergTableReader::init_row_filters(const TFileRangeDesc& range, io::IOC } } - if (position_delete_files.size() > 0) { + if (!position_delete_files.empty()) { RETURN_IF_ERROR( _position_delete_base(table_desc.original_file_path, position_delete_files)); _file_format_reader->set_push_down_agg_type(TPushAggOp::NONE); } - if (equality_delete_files.size() > 0) { + if (!equality_delete_files.empty()) { RETURN_IF_ERROR(_equality_delete_base(equality_delete_files)); _file_format_reader->set_push_down_agg_type(TPushAggOp::NONE); } @@ -206,7 +207,7 @@ Status IcebergTableReader::_equality_delete_base( std::unordered_map missing_columns; std::vector not_in_file_col_names; - for (auto& delete_file : delete_files) { + for (const auto& delete_file : delete_files) { TFileRangeDesc delete_desc; // must use __set() method to make sure __isset is true delete_desc.__set_fs_name(_range.fs_name); @@ -257,8 +258,7 @@ Status IcebergTableReader::_equality_delete_base( DataTypePtr data_type = DataTypeFactory::instance().create_data_type( equality_delete_col_types[i], true); MutableColumnPtr data_column = data_type->create_column(); - _expand_columns.emplace_back( - ColumnWithTypeAndName(std::move(data_column), data_type, delete_col)); + _expand_columns.emplace_back(std::move(data_column), data_type, delete_col); } } for (const std::string& delete_col : _expand_col_names) { @@ -303,7 +303,7 @@ Status IcebergTableReader::_position_delete_base( std::vector delete_rows_array; int64_t num_delete_rows = 0; std::vector erase_data; - for (auto& delete_file : delete_files) { + for (const auto& delete_file : delete_files) { SCOPED_TIMER(_iceberg_profile.delete_files_read_time); Status create_status = Status::OK(); auto* delete_file_cache = _kv_cache->get( @@ -334,7 +334,7 @@ Status IcebergTableReader::_position_delete_base( DeleteFile& delete_file_map = *((DeleteFile*)delete_file_cache); auto get_value = [&](const auto& v) { DeleteRows* row_ids = v.second.get(); - if (row_ids->size() > 0) { + if (!row_ids->empty()) { delete_rows_array.emplace_back(row_ids); num_delete_rows += row_ids->size(); erase_data.emplace_back(delete_file_cache); @@ -358,7 +358,7 @@ Status IcebergTableReader::_position_delete_base( IcebergTableReader::PositionDeleteRange IcebergTableReader::_get_range( const ColumnDictI32& file_path_column) { IcebergTableReader::PositionDeleteRange range; - int read_rows = file_path_column.get_data().size(); + size_t read_rows = file_path_column.get_data().size(); int* code_path = const_cast(file_path_column.get_data().data()); int* code_path_start = code_path; int* code_path_end = code_path + read_rows; @@ -366,8 +366,7 @@ IcebergTableReader::PositionDeleteRange IcebergTableReader::_get_range( int code = code_path[0]; int* code_end = std::upper_bound(code_path, code_path_end, code); range.data_file_path.emplace_back(file_path_column.get_value(code).to_string()); - range.range.emplace_back( - std::make_pair(code_path - code_path_start, code_end - code_path_start)); + range.range.emplace_back(code_path - code_path_start, code_end - code_path_start); code_path = code_end; } return range; @@ -376,14 +375,14 @@ IcebergTableReader::PositionDeleteRange IcebergTableReader::_get_range( IcebergTableReader::PositionDeleteRange IcebergTableReader::_get_range( const ColumnString& file_path_column) { IcebergTableReader::PositionDeleteRange range; - int read_rows = file_path_column.size(); - int index = 0; + size_t read_rows = file_path_column.size(); + size_t index = 0; while (index < read_rows) { StringRef data_path = file_path_column.get_data_at(index); - int left = index - 1; - int right = read_rows; + size_t left = index - 1; + size_t right = read_rows; while (left + 1 != right) { - int mid = left + (right - left) / 2; + size_t mid = left + (right - left) / 2; if (file_path_column.get_data_at(mid) > data_path) { right = mid; } else { @@ -391,7 +390,7 @@ IcebergTableReader::PositionDeleteRange IcebergTableReader::_get_range( } } range.data_file_path.emplace_back(data_path.to_string()); - range.range.emplace_back(std::make_pair(index, left + 1)); + range.range.emplace_back(index, left + 1); index = left + 1; } return range; @@ -404,7 +403,7 @@ void IcebergTableReader::_sort_delete_rows(std::vector*>& d } if (delete_rows_array.size() == 1) { _iceberg_delete_rows.resize(num_delete_rows); - memcpy(&_iceberg_delete_rows[0], &((*delete_rows_array.front())[0]), + memcpy(_iceberg_delete_rows.data(), delete_rows_array.front()->data(), sizeof(int64_t) * num_delete_rows); return; } @@ -421,16 +420,16 @@ void IcebergTableReader::_sort_delete_rows(std::vector*>& d auto row_id_iter = _iceberg_delete_rows.begin(); auto iter_end = _iceberg_delete_rows.end(); std::vector rows_array; - for (auto rows : delete_rows_array) { - if (rows->size() > 0) { - rows_array.push_back({rows->begin(), rows->end()}); + for (auto* rows : delete_rows_array) { + if (!rows->empty()) { + rows_array.emplace_back(rows->begin(), rows->end()); } } - int array_size = rows_array.size(); + size_t array_size = rows_array.size(); while (row_id_iter != iter_end) { - int min_index = 0; - int min = *rows_array[0].first; - for (int i = 0; i < array_size; ++i) { + int64_t min_index = 0; + int64_t min = *rows_array[0].first; + for (size_t i = 0; i < array_size; ++i) { if (*rows_array[i].first < min) { min_index = i; min = *rows_array[i].first; @@ -460,8 +459,7 @@ void IcebergTableReader::_sort_delete_rows(std::vector*>& d void IcebergTableReader::_gen_file_col_names() { _all_required_col_names.clear(); _not_in_file_col_names.clear(); - for (int i = 0; i < _file_col_names.size(); ++i) { - auto name = _file_col_names[i]; + for (auto name : _file_col_names) { auto iter = _table_col_to_file_col.find(name); if (iter == _table_col_to_file_col.end()) { // If the user creates the iceberg table, directly append the parquet file that already exists, @@ -489,12 +487,12 @@ void IcebergTableReader::_gen_file_col_names() { * _colname_to_value_range with column name in data file. */ void IcebergTableReader::_gen_new_colname_to_value_range() { - for (auto it = _colname_to_value_range->begin(); it != _colname_to_value_range->end(); it++) { - auto iter = _table_col_to_file_col.find(it->first); + for (auto& it : *_colname_to_value_range) { + auto iter = _table_col_to_file_col.find(it.first); if (iter == _table_col_to_file_col.end()) { - _new_colname_to_value_range.emplace(it->first, it->second); + _new_colname_to_value_range.emplace(it.first, it.second); } else { - _new_colname_to_value_range.emplace(iter->second, it->second); + _new_colname_to_value_range.emplace(iter->second, it.second); } } } @@ -535,7 +533,7 @@ void IcebergTableReader::_gen_position_delete_file_range(Block& block, DeleteFil Status IcebergParquetReader::init_reader( const std::vector& file_col_names, - const std::unordered_map& col_id_name_map, + const std::unordered_map& col_id_name_map, std::unordered_map* colname_to_value_range, const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, @@ -543,7 +541,7 @@ Status IcebergParquetReader::init_reader( const VExprContextSPtrs* not_single_slot_filter_conjuncts, const std::unordered_map* slot_id_to_filter_conjuncts) { _file_format = Fileformat::PARQUET; - ParquetReader* parquet_reader = static_cast(_file_format_reader.get()); + auto* parquet_reader = static_cast(_file_format_reader.get()); _col_id_name_map = col_id_name_map; _file_col_names = file_col_names; _colname_to_value_range = colname_to_value_range; @@ -608,7 +606,7 @@ Status IcebergParquetReader ::_read_position_delete_file(const TFileRangeDesc* d Status IcebergOrcReader::init_reader( const std::vector& file_col_names, - const std::unordered_map& col_id_name_map, + const std::unordered_map& col_id_name_map, std::unordered_map* colname_to_value_range, const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, @@ -717,4 +715,5 @@ Status IcebergOrcReader::_gen_col_name_maps(OrcReader* orc_reader) { return Status::OK(); } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/iceberg_reader.h b/be/src/vec/exec/format/table/iceberg_reader.h index ee7dcdd68d24fa..e500538b6f9285 100644 --- a/be/src/vec/exec/format/table/iceberg_reader.h +++ b/be/src/vec/exec/format/table/iceberg_reader.h @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -45,6 +44,7 @@ class KeyValue; } // namespace tparquet namespace doris { +#include "common/compile_check_begin.h" class RowDescriptor; class RuntimeState; class SlotDescriptor; @@ -101,8 +101,7 @@ class IcebergTableReader : public TableFormatReader { }; using DeleteRows = std::vector; using DeleteFile = phmap::parallel_flat_hash_map< - std::string, std::unique_ptr, std::hash, - std::equal_to, + std::string, std::unique_ptr, std::hash, std::equal_to<>, std::allocator>>, 8, std::mutex>; /** @@ -154,7 +153,7 @@ class IcebergTableReader : public TableFormatReader { // copy from _colname_to_value_range with new column name that is in parquet/orc file, to support schema evolution. std::unordered_map _new_colname_to_value_range; // column id to name map. Collect from FE slot descriptor. - std::unordered_map _col_id_name_map; + std::unordered_map _col_id_name_map; // col names in the parquet,orc file std::vector _all_required_col_names; // col names in table but not in parquet,orc file @@ -204,7 +203,7 @@ class IcebergParquetReader final : public IcebergTableReader { kv_cache, io_ctx) {} Status init_reader( const std::vector& file_col_names, - const std::unordered_map& col_id_name_map, + const std::unordered_map& col_id_name_map, std::unordered_map* colname_to_value_range, const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, @@ -250,7 +249,7 @@ class IcebergOrcReader final : public IcebergTableReader { Status init_reader( const std::vector& file_col_names, - const std::unordered_map& col_id_name_map, + const std::unordered_map& col_id_name_map, std::unordered_map* colname_to_value_range, const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, @@ -272,4 +271,5 @@ class IcebergOrcReader final : public IcebergTableReader { }; } // namespace vectorized +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/vec/exec/format/table/lakesoul_jni_reader.cpp b/be/src/vec/exec/format/table/lakesoul_jni_reader.cpp index c285b72df25332..887a793b098232 100644 --- a/be/src/vec/exec/format/table/lakesoul_jni_reader.cpp +++ b/be/src/vec/exec/format/table/lakesoul_jni_reader.cpp @@ -18,15 +18,14 @@ #include "lakesoul_jni_reader.h" #include -#include -#include "common/logging.h" #include "runtime/descriptors.h" #include "runtime/runtime_state.h" #include "runtime/types.h" #include "vec/core/types.h" namespace doris { +#include "common/compile_check_begin.h" class RuntimeProfile; class RuntimeState; @@ -44,7 +43,7 @@ LakeSoulJniReader::LakeSoulJniReader(const TLakeSoulFileDesc& lakesoul_params, _state(state), _profile(profile) { std::vector required_fields; - for (auto& desc : _file_slot_descs) { + for (const auto& desc : _file_slot_descs) { required_fields.emplace_back(desc->col_name()); } @@ -67,7 +66,7 @@ Status LakeSoulJniReader::get_next_block(Block* block, size_t* read_rows, bool* Status LakeSoulJniReader::get_columns(std::unordered_map* name_to_type, std::unordered_set* missing_cols) { - for (auto& desc : _file_slot_descs) { + for (const auto& desc : _file_slot_descs) { name_to_type->emplace(desc->col_name(), desc->type()); } return Status::OK(); @@ -79,4 +78,5 @@ Status LakeSoulJniReader::init_reader( RETURN_IF_ERROR(_jni_connector->init(colname_to_value_range)); return _jni_connector->open(_state, _profile); } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/lakesoul_jni_reader.h b/be/src/vec/exec/format/table/lakesoul_jni_reader.h index dc0db6c2c5dd1a..38f3aaeb27e986 100644 --- a/be/src/vec/exec/format/table/lakesoul_jni_reader.h +++ b/be/src/vec/exec/format/table/lakesoul_jni_reader.h @@ -30,6 +30,7 @@ #include "vec/exec/jni_connector.h" namespace doris { +#include "common/compile_check_begin.h" class RuntimeProfile; class RuntimeState; class SlotDescriptor; @@ -67,4 +68,5 @@ class LakeSoulJniReader : public ::doris::vectorized::GenericReader { std::unordered_map* _colname_to_value_range; std::unique_ptr<::doris::vectorized::JniConnector> _jni_connector; }; +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/max_compute_jni_reader.cpp b/be/src/vec/exec/format/table/max_compute_jni_reader.cpp index 665e19b6bcebd9..229468cec54714 100644 --- a/be/src/vec/exec/format/table/max_compute_jni_reader.cpp +++ b/be/src/vec/exec/format/table/max_compute_jni_reader.cpp @@ -29,14 +29,13 @@ namespace doris { class RuntimeProfile; class RuntimeState; - namespace vectorized { class Block; } // namespace vectorized } // namespace doris namespace doris::vectorized { - +#include "common/compile_check_begin.h" MaxComputeJniReader::MaxComputeJniReader(const MaxComputeTableDescriptor* mc_desc, const TMaxComputeFileDesc& max_compute_params, const std::vector& file_slot_descs, @@ -50,7 +49,7 @@ MaxComputeJniReader::MaxComputeJniReader(const MaxComputeTableDescriptor* mc_des std::ostringstream columns_types; std::vector column_names; int index = 0; - for (auto& desc : _file_slot_descs) { + for (const auto& desc : _file_slot_descs) { std::string field = desc->col_name(); std::string type = JniConnector::get_jni_type(desc->type()); column_names.emplace_back(field); @@ -93,7 +92,7 @@ Status MaxComputeJniReader::get_next_block(Block* block, size_t* read_rows, bool Status MaxComputeJniReader::get_columns( std::unordered_map* name_to_type, std::unordered_set* missing_cols) { - for (auto& desc : _file_slot_descs) { + for (const auto& desc : _file_slot_descs) { name_to_type->emplace(desc->col_name(), desc->type()); } return Status::OK(); @@ -105,4 +104,5 @@ Status MaxComputeJniReader::init_reader( RETURN_IF_ERROR(_jni_connector->init(colname_to_value_range)); return _jni_connector->open(_state, _profile); } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/max_compute_jni_reader.h b/be/src/vec/exec/format/table/max_compute_jni_reader.h index 9bfef59432d6d1..f59fc1a5f77ab7 100644 --- a/be/src/vec/exec/format/table/max_compute_jni_reader.h +++ b/be/src/vec/exec/format/table/max_compute_jni_reader.h @@ -17,9 +17,7 @@ #pragma once -#include - -#include +#include #include #include #include @@ -41,7 +39,7 @@ struct TypeDescriptor; } // namespace doris namespace doris::vectorized { - +#include "common/compile_check_begin.h" /** * The demo usage of JniReader, showing how to read data from java scanner. * The java side is also a mock reader that provide values for each type. @@ -74,4 +72,5 @@ class MaxComputeJniReader : public JniReader { std::unordered_map* _colname_to_value_range = nullptr; }; +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/paimon_jni_reader.cpp b/be/src/vec/exec/format/table/paimon_jni_reader.cpp index 83705426e890ec..cbcdd2d81bd57d 100644 --- a/be/src/vec/exec/format/table/paimon_jni_reader.cpp +++ b/be/src/vec/exec/format/table/paimon_jni_reader.cpp @@ -18,7 +18,6 @@ #include "paimon_jni_reader.h" #include -#include #include "runtime/descriptors.h" #include "runtime/types.h" @@ -27,13 +26,13 @@ namespace doris { class RuntimeProfile; class RuntimeState; - namespace vectorized { class Block; } // namespace vectorized } // namespace doris namespace doris::vectorized { +#include "common/compile_check_begin.h" const std::string PaimonJniReader::PAIMON_OPTION_PREFIX = "paimon."; const std::string PaimonJniReader::HADOOP_OPTION_PREFIX = "hadoop."; @@ -45,7 +44,7 @@ PaimonJniReader::PaimonJniReader(const std::vector& file_slot_d : JniReader(file_slot_descs, state, profile) { std::vector column_names; std::vector column_types; - for (auto& desc : _file_slot_descs) { + for (const auto& desc : _file_slot_descs) { column_names.emplace_back(desc->col_name()); column_types.emplace_back(JniConnector::get_jni_type(desc->type())); } @@ -67,11 +66,11 @@ PaimonJniReader::PaimonJniReader(const std::vector& file_slot_d } // Used to create paimon option - for (auto& kv : range.table_format_params.paimon_params.paimon_options) { + for (const auto& kv : range.table_format_params.paimon_params.paimon_options) { params[PAIMON_OPTION_PREFIX + kv.first] = kv.second; } if (range.table_format_params.paimon_params.__isset.hadoop_conf) { - for (auto& kv : range.table_format_params.paimon_params.hadoop_conf) { + for (const auto& kv : range.table_format_params.paimon_params.hadoop_conf) { params[HADOOP_OPTION_PREFIX + kv.first] = kv.second; } } @@ -85,7 +84,7 @@ Status PaimonJniReader::get_next_block(Block* block, size_t* read_rows, bool* eo Status PaimonJniReader::get_columns(std::unordered_map* name_to_type, std::unordered_set* missing_cols) { - for (auto& desc : _file_slot_descs) { + for (const auto& desc : _file_slot_descs) { name_to_type->emplace(desc->col_name(), desc->type()); } return Status::OK(); @@ -97,4 +96,5 @@ Status PaimonJniReader::init_reader( RETURN_IF_ERROR(_jni_connector->init(colname_to_value_range)); return _jni_connector->open(_state, _profile); } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/paimon_jni_reader.h b/be/src/vec/exec/format/table/paimon_jni_reader.h index 220a6f1f2e9665..64ef962f0de66a 100644 --- a/be/src/vec/exec/format/table/paimon_jni_reader.h +++ b/be/src/vec/exec/format/table/paimon_jni_reader.h @@ -18,7 +18,6 @@ #pragma once #include -#include #include #include #include @@ -39,7 +38,7 @@ struct TypeDescriptor; } // namespace doris namespace doris::vectorized { - +#include "common/compile_check_begin.h" /** * The demo usage of JniReader, showing how to read data from java scanner. * The java side is also a mock reader that provide values for each type. @@ -70,4 +69,5 @@ class PaimonJniReader : public JniReader { std::unordered_map* _colname_to_value_range; }; +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/paimon_reader.cpp b/be/src/vec/exec/format/table/paimon_reader.cpp index 055d6179b2c422..8e4be026bab154 100644 --- a/be/src/vec/exec/format/table/paimon_reader.cpp +++ b/be/src/vec/exec/format/table/paimon_reader.cpp @@ -23,6 +23,7 @@ #include "util/deletion_vector.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" PaimonReader::PaimonReader(std::unique_ptr file_format_reader, RuntimeProfile* profile, const TFileScanRangeParams& params) : TableFormatReader(std::move(file_format_reader)), _profile(profile), _params(params) { @@ -98,4 +99,5 @@ Status PaimonReader::init_row_filters(const TFileRangeDesc& range, io::IOContext } return Status::OK(); } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/paimon_reader.h b/be/src/vec/exec/format/table/paimon_reader.h index 3d82b7a3b5c416..3776b485a4535c 100644 --- a/be/src/vec/exec/format/table/paimon_reader.h +++ b/be/src/vec/exec/format/table/paimon_reader.h @@ -25,6 +25,7 @@ #include "vec/exec/format/table/table_format_reader.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" class PaimonReader : public TableFormatReader { public: PaimonReader(std::unique_ptr file_format_reader, RuntimeProfile* profile, @@ -74,4 +75,5 @@ class PaimonParquetReader final : public PaimonReader { ->set_delete_rows(&_delete_rows); } }; +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/table_format_reader.cpp b/be/src/vec/exec/format/table/table_format_reader.cpp deleted file mode 100644 index ea8111d81b3d04..00000000000000 --- a/be/src/vec/exec/format/table/table_format_reader.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "table_format_reader.h" - -namespace doris::vectorized { - -TableFormatReader::TableFormatReader(std::unique_ptr file_format_reader) - : _file_format_reader(std::move(file_format_reader)) {} - -} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/exec/format/table/table_format_reader.h b/be/src/vec/exec/format/table/table_format_reader.h index 5a102a7665e8f1..b143149c8a6a97 100644 --- a/be/src/vec/exec/format/table/table_format_reader.h +++ b/be/src/vec/exec/format/table/table_format_reader.h @@ -17,8 +17,7 @@ #pragma once -#include - +#include #include #include #include @@ -29,7 +28,6 @@ namespace doris { class TFileRangeDesc; - namespace vectorized { class Block; } // namespace vectorized @@ -37,10 +35,11 @@ struct TypeDescriptor; } // namespace doris namespace doris::vectorized { - +#include "common/compile_check_begin.h" class TableFormatReader : public GenericReader { public: - TableFormatReader(std::unique_ptr file_format_reader); + TableFormatReader(std::unique_ptr file_format_reader) + : _file_format_reader(std::move(file_format_reader)) {} ~TableFormatReader() override = default; Status get_next_block(Block* block, size_t* read_rows, bool* eof) override { return _file_format_reader->get_next_block(block, read_rows, eof); @@ -67,15 +66,13 @@ class TableFormatReader : public GenericReader { virtual Status init_row_filters(const TFileRangeDesc& range, io::IOContext* io_ctx) = 0; protected: + std::string _table_format; // hudi, iceberg + std::unique_ptr _file_format_reader; // parquet, orc void _collect_profile_before_close() override { if (_file_format_reader != nullptr) { _file_format_reader->collect_profile_before_close(); } } - -protected: - std::string _table_format; // hudi, iceberg - std::unique_ptr _file_format_reader; // parquet, orc }; - +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/transactional_hive_common.cpp b/be/src/vec/exec/format/table/transactional_hive_common.cpp index 85e279031cae8e..7311418b06a4e5 100644 --- a/be/src/vec/exec/format/table/transactional_hive_common.cpp +++ b/be/src/vec/exec/format/table/transactional_hive_common.cpp @@ -18,6 +18,7 @@ #include "transactional_hive_common.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" const std::string TransactionalHive::OPERATION = "operation"; const std::string TransactionalHive::ORIGINAL_TRANSACTION = "originalTransaction"; @@ -69,4 +70,5 @@ const std::vector TransactionalHive::ACID_COLUMN_NAMES_LOWER_CASE = OPERATION_LOWER_CASE, ORIGINAL_TRANSACTION_LOWER_CASE, BUCKET_LOWER_CASE, ROW_ID_LOWER_CASE, CURRENT_TRANSACTION_LOWER_CASE, ROW_LOWER_CASE}; +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/transactional_hive_common.h b/be/src/vec/exec/format/table/transactional_hive_common.h index 368af350cff525..25943e796708a2 100644 --- a/be/src/vec/exec/format/table/transactional_hive_common.h +++ b/be/src/vec/exec/format/table/transactional_hive_common.h @@ -23,6 +23,7 @@ #include "runtime/define_primitive_type.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" struct TransactionalHive { static const std::string OPERATION; static const std::string ORIGINAL_TRANSACTION; @@ -51,4 +52,5 @@ struct TransactionalHive { static const std::vector ACID_COLUMN_NAMES; static const std::vector ACID_COLUMN_NAMES_LOWER_CASE; }; +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/transactional_hive_reader.cpp b/be/src/vec/exec/format/table/transactional_hive_reader.cpp index 18642ab1218b4d..d6c185422a5b85 100644 --- a/be/src/vec/exec/format/table/transactional_hive_reader.cpp +++ b/be/src/vec/exec/format/table/transactional_hive_reader.cpp @@ -25,6 +25,7 @@ #include "vec/exec/format/orc/vorc_reader.h" namespace doris { +#include "common/compile_check_begin.h" namespace io { struct IOContext; @@ -63,7 +64,7 @@ Status TransactionalHiveReader::init_reader( const RowDescriptor* row_descriptor, const VExprContextSPtrs* not_single_slot_filter_conjuncts, const std::unordered_map* slot_id_to_filter_conjuncts) { - OrcReader* orc_reader = static_cast(_file_format_reader.get()); + auto* orc_reader = static_cast(_file_format_reader.get()); _col_names.insert(_col_names.end(), column_names.begin(), column_names.end()); _col_names.insert(_col_names.end(), TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.begin(), TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.end()); @@ -74,12 +75,12 @@ Status TransactionalHiveReader::init_reader( } Status TransactionalHiveReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { - for (int i = 0; i < TransactionalHive::READ_PARAMS.size(); ++i) { - DataTypePtr data_type = DataTypeFactory::instance().create_data_type( - TypeDescriptor(TransactionalHive::READ_PARAMS[i].type), false); + for (const auto& i : TransactionalHive::READ_PARAMS) { + DataTypePtr data_type = + DataTypeFactory::instance().create_data_type(TypeDescriptor(i.type), false); MutableColumnPtr data_column = data_type->create_column(); - block->insert(ColumnWithTypeAndName(std::move(data_column), data_type, - TransactionalHive::READ_PARAMS[i].column_lower_case)); + block->insert( + ColumnWithTypeAndName(std::move(data_column), data_type, i.column_lower_case)); } auto res = _file_format_reader->get_next_block(block, read_rows, eof); Block::erase_useless_column(block, block->columns() - TransactionalHive::READ_PARAMS.size()); @@ -104,7 +105,7 @@ Status TransactionalHiveReader::init_row_filters(const TFileRangeDesc& range, } } - OrcReader* orc_reader = (OrcReader*)(_file_format_reader.get()); + auto* orc_reader = (OrcReader*)(_file_format_reader.get()); std::vector delete_file_col_names; int64_t num_delete_rows = 0; int64_t num_delete_files = 0; @@ -126,7 +127,8 @@ Status TransactionalHiveReader::init_row_filters(const TFileRangeDesc& range, }; SCOPED_TIMER(_transactional_orc_profile.delete_files_read_time); - for (auto& delete_delta : range.table_format_params.transactional_hive_params.delete_deltas) { + for (const auto& delete_delta : + range.table_format_params.transactional_hive_params.delete_deltas) { const std::string file_name = file_path.filename().string(); //need opt. @@ -166,13 +168,11 @@ Status TransactionalHiveReader::init_row_filters(const TFileRangeDesc& range, bool eof = false; while (!eof) { Block block; - for (int i = 0; i < TransactionalHive::DELETE_ROW_PARAMS.size(); ++i) { - DataTypePtr data_type = DataTypeFactory::instance().create_data_type( - TransactionalHive::DELETE_ROW_PARAMS[i].type, false); + for (const auto& i : TransactionalHive::DELETE_ROW_PARAMS) { + DataTypePtr data_type = DataTypeFactory::instance().create_data_type(i.type, false); MutableColumnPtr data_column = data_type->create_column(); - block.insert(ColumnWithTypeAndName( - std::move(data_column), data_type, - TransactionalHive::DELETE_ROW_PARAMS[i].column_lower_case)); + block.insert(ColumnWithTypeAndName(std::move(data_column), data_type, + i.column_lower_case)); } eof = false; size_t read_rows = 0; @@ -181,11 +181,11 @@ Status TransactionalHiveReader::init_row_filters(const TFileRangeDesc& range, static int ORIGINAL_TRANSACTION_INDEX = 0; static int BUCKET_ID_INDEX = 1; static int ROW_ID_INDEX = 2; - const ColumnInt64& original_transaction_column = assert_cast( + const auto& original_transaction_column = assert_cast( *block.get_by_position(ORIGINAL_TRANSACTION_INDEX).column); - const ColumnInt32& bucket_id_column = assert_cast( + const auto& bucket_id_column = assert_cast( *block.get_by_position(BUCKET_ID_INDEX).column); - const ColumnInt64& row_id_column = assert_cast( + const auto& row_id_column = assert_cast( *block.get_by_position(ROW_ID_INDEX).column); DCHECK_EQ(original_transaction_column.size(), read_rows); @@ -194,7 +194,7 @@ Status TransactionalHiveReader::init_row_filters(const TFileRangeDesc& range, for (int i = 0; i < read_rows; ++i) { Int64 original_transaction = original_transaction_column.get_int(i); - Int32 bucket_id = bucket_id_column.get_int(i); + Int64 bucket_id = bucket_id_column.get_int(i); Int64 row_id = row_id_column.get_int(i); AcidRowID delete_row_id = {original_transaction, bucket_id, row_id}; _delete_rows.insert(delete_row_id); @@ -211,4 +211,5 @@ Status TransactionalHiveReader::init_row_filters(const TFileRangeDesc& range, } return Status::OK(); } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/transactional_hive_reader.h b/be/src/vec/exec/format/table/transactional_hive_reader.h index a24fee18694b97..9c3f284464cba9 100644 --- a/be/src/vec/exec/format/table/transactional_hive_reader.h +++ b/be/src/vec/exec/format/table/transactional_hive_reader.h @@ -17,24 +17,22 @@ #pragma once -#include -#include - +#include +#include #include -#include #include #include -#include #include +#include "common/factory_creator.h" #include "common/status.h" #include "exec/olap_common.h" #include "table_format_reader.h" #include "util/runtime_profile.h" -#include "vec/columns/column_dictionary.h" #include "vec/common/hash_table/phmap_fwd_decl.h" namespace doris { +#include "common/compile_check_begin.h" class RuntimeState; class SlotDescriptor; class TFileRangeDesc; @@ -57,7 +55,7 @@ class TransactionalHiveReader : public TableFormatReader { public: struct AcidRowID { int64_t original_transaction; - int32_t bucket; + int64_t bucket; int64_t row_id; struct Hash { @@ -65,7 +63,7 @@ class TransactionalHiveReader : public TableFormatReader { size_t hash_value = 0; hash_value ^= std::hash {}(transactional_row_id.original_transaction) + 0x9e3779b9 + (hash_value << 6) + (hash_value >> 2); - hash_value ^= std::hash {}(transactional_row_id.bucket) + 0x9e3779b9 + + hash_value ^= std::hash {}(transactional_row_id.bucket) + 0x9e3779b9 + (hash_value << 6) + (hash_value >> 2); hash_value ^= std::hash {}(transactional_row_id.row_id) + 0x9e3779b9 + (hash_value << 6) + (hash_value >> 2); @@ -137,4 +135,5 @@ inline bool operator<(const TransactionalHiveReader::AcidRowID& lhs, } } // namespace vectorized +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/vec/exec/format/table/trino_connector_jni_reader.cpp b/be/src/vec/exec/format/table/trino_connector_jni_reader.cpp index eaced8572b94b2..c2beca0471c416 100644 --- a/be/src/vec/exec/format/table/trino_connector_jni_reader.cpp +++ b/be/src/vec/exec/format/table/trino_connector_jni_reader.cpp @@ -18,7 +18,6 @@ #include "trino_connector_jni_reader.h" #include -#include #include "runtime/descriptors.h" #include "runtime/types.h" @@ -35,7 +34,7 @@ class Block; } // namespace doris namespace doris::vectorized { - +#include "common/compile_check_begin.h" const std::string TrinoConnectorJniReader::TRINO_CONNECTOR_OPTION_PREFIX = "trino."; TrinoConnectorJniReader::TrinoConnectorJniReader( @@ -91,7 +90,7 @@ Status TrinoConnectorJniReader::get_next_block(Block* block, size_t* read_rows, Status TrinoConnectorJniReader::get_columns( std::unordered_map* name_to_type, std::unordered_set* missing_cols) { - for (auto& desc : _file_slot_descs) { + for (const auto& desc : _file_slot_descs) { name_to_type->emplace(desc->col_name(), desc->type()); } return Status::OK(); @@ -128,4 +127,5 @@ Status TrinoConnectorJniReader::_set_spi_plugins_dir() { return Status::OK(); } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/trino_connector_jni_reader.h b/be/src/vec/exec/format/table/trino_connector_jni_reader.h index de0cf21a881129..3d0e7be4e7b017 100644 --- a/be/src/vec/exec/format/table/trino_connector_jni_reader.h +++ b/be/src/vec/exec/format/table/trino_connector_jni_reader.h @@ -17,9 +17,7 @@ #pragma once -#include - -#include +#include #include #include #include @@ -40,7 +38,7 @@ struct TypeDescriptor; } // namespace doris namespace doris::vectorized { - +#include "common/compile_check_begin.h" class TrinoConnectorJniReader : public JniReader { ENABLE_FACTORY_CREATOR(TrinoConnectorJniReader); @@ -63,4 +61,5 @@ class TrinoConnectorJniReader : public JniReader { private: Status _set_spi_plugins_dir(); }; +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/wal/wal_reader.cpp b/be/src/vec/exec/format/wal/wal_reader.cpp index a9a209b95a4ce1..f6b06c85ad46ea 100644 --- a/be/src/vec/exec/format/wal/wal_reader.cpp +++ b/be/src/vec/exec/format/wal/wal_reader.cpp @@ -23,9 +23,10 @@ #include "gutil/strings/split.h" #include "olap/wal/wal_manager.h" #include "runtime/runtime_state.h" -#include "vec/data_types/data_type_string.h" +#include "vec/core/block.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" WalReader::WalReader(RuntimeState* state) : _state(state) { _wal_id = state->wal_id(); } @@ -67,10 +68,10 @@ Status WalReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { return Status::DataQualityError("check be exec version fail when reading wal file {}", _wal_path); } - vectorized::Block src_block; + Block src_block; RETURN_IF_ERROR(src_block.deserialize(pblock)); //convert to dst block - vectorized::Block dst_block; + Block dst_block; int index = 0; auto output_block_columns = block->get_columns_with_type_and_name(); size_t output_block_column_size = output_block_columns.size(); @@ -85,7 +86,7 @@ Status WalReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { std::to_string(output_block_column_size), std::to_string(_tuple_descriptor->slots().size())); } - for (auto slot_desc : _tuple_descriptor->slots()) { + for (auto* slot_desc : _tuple_descriptor->slots()) { auto pos = _column_pos_map[slot_desc->col_unique_id()]; if (pos >= src_block.columns()) { return Status::InternalError("read wal {} fail, pos {}, columns size {}", _wal_path, @@ -116,7 +117,7 @@ Status WalReader::get_columns(std::unordered_map* n try { int64_t pos = 0; for (auto col_id_str : column_id_vector) { - auto col_id = std::strtoll(col_id_str.c_str(), NULL, 10); + auto col_id = std::strtoll(col_id_str.c_str(), nullptr, 10); _column_pos_map.emplace(col_id, pos); pos++; } @@ -126,4 +127,5 @@ Status WalReader::get_columns(std::unordered_map* n return Status::OK(); } +#include "common/compile_check_end.h" } // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/exec/format/wal/wal_reader.h b/be/src/vec/exec/format/wal/wal_reader.h index 5834d74efeaced..6fd0cc7883e669 100644 --- a/be/src/vec/exec/format/wal/wal_reader.h +++ b/be/src/vec/exec/format/wal/wal_reader.h @@ -20,8 +20,8 @@ #include "runtime/descriptors.h" #include "vec/exec/format/generic_reader.h" -namespace doris { -namespace vectorized { +namespace doris::vectorized { +#include "common/compile_check_begin.h" struct ScannerCounter; class WalReader : public GenericReader { public: @@ -50,5 +50,5 @@ class WalReader : public GenericReader { int64_t _column_id_count; uint32_t _version = 0; }; -} // namespace vectorized -} // namespace doris +#include "common/compile_check_end.h" +} // namespace doris::vectorized diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index 5b96b5561fba1c..0cf4bc1cdc0df6 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -465,7 +465,7 @@ Status VFileScanner::_fill_columns_from_path(size_t rows) { auto& [value, slot_desc] = kv.second; auto _text_serde = slot_desc->get_data_type_ptr()->get_serde(); Slice slice(value.data(), value.size()); - int num_deserialized = 0; + uint64_t num_deserialized = 0; if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, rows, &num_deserialized, _text_formatOptions) != Status::OK()) { diff --git a/be/src/vec/exec/scan/vfile_scanner.h b/be/src/vec/exec/scan/vfile_scanner.h index 86171d634ac693..4898cadc86fdb3 100644 --- a/be/src/vec/exec/scan/vfile_scanner.h +++ b/be/src/vec/exec/scan/vfile_scanner.h @@ -109,7 +109,7 @@ class VFileScanner : public VScanner { // col names from _file_slot_descs std::vector _file_col_names; // column id to name map. Collect from FE slot descriptor. - std::unordered_map _col_id_name_map; + std::unordered_map _col_id_name_map; // Partition source slot descriptors std::vector _partition_slot_descs; diff --git a/be/test/vec/data_types/common_data_type_serder_test.h b/be/test/vec/data_types/common_data_type_serder_test.h index 46206d5ed7eb15..88535e054ffd58 100644 --- a/be/test/vec/data_types/common_data_type_serder_test.h +++ b/be/test/vec/data_types/common_data_type_serder_test.h @@ -52,9 +52,9 @@ // serialize_one_cell_to_json (const IColumn &column, int row_num, BufferWritable &bw, FormatOptions &options) const =0 // serialize_column_to_json (const IColumn &column, int start_idx, int end_idx, BufferWritable &bw, FormatOptions &options) const =0 // deserialize_one_cell_from_json (IColumn &column, Slice &slice, const FormatOptions &options) const =0 -// deserialize_column_from_json_vector (IColumn &column, std::vector< Slice > &slices, int *num_deserialized, const FormatOptions &options) const =0 -// deserialize_column_from_fixed_json (IColumn &column, Slice &slice, int rows, int *num_deserialized, const FormatOptions &options) const -// insert_column_last_value_multiple_times (IColumn &column, int times) const +// deserialize_column_from_json_vector (IColumn &column, std::vector< Slice > &slices, uint64_t *num_deserialized, const FormatOptions &options) const =0 +// deserialize_column_from_fixed_json (IColumn &column, Slice &slice, uint64_t rows, uint64_t *num_deserialized, const FormatOptions &options) const +// insert_column_last_value_multiple_times (IColumn &column, uint64_t times) const // 3. fe|be protobuffer ser-deserialize // write_column_to_pb (const IColumn &column, PValues &result, int start, int end) const =0 // read_column_from_pb (IColumn &column, const PValues &arg) const =0 diff --git a/be/test/vec/data_types/common_data_type_test.h b/be/test/vec/data_types/common_data_type_test.h index 36abc3402e2156..e0d7d57e1a6427 100644 --- a/be/test/vec/data_types/common_data_type_test.h +++ b/be/test/vec/data_types/common_data_type_test.h @@ -19,22 +19,16 @@ #include #include -#include #include #include -#include "olap/schema.h" #include "vec/columns/column.h" -#include "vec/columns/column_array.h" -#include "vec/columns/column_map.h" -#include "vec/columns/columns_number.h" #include "vec/core/field.h" -#include "vec/core/sort_block.h" -#include "vec/core/sort_description.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_map.h" +#include "vec/io/reader_buffer.h" // this test is gonna to be a data type test template for all DataType which should make ut test to coverage the function defined // for example DataTypeIPv4 should test this function: