Skip to content

Commit

Permalink
fix lower and upper
Browse files Browse the repository at this point in the history
  • Loading branch information
hubgeter committed Nov 10, 2024
1 parent 4120ac9 commit 14b5e53
Show file tree
Hide file tree
Showing 12 changed files with 441 additions and 274 deletions.
4 changes: 1 addition & 3 deletions be/src/vec/data_types/serde/data_type_array_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,7 @@ class DataTypeArraySerDe : public DataTypeSerDe {
nested_serde->set_return_object_as_string(value);
}

virtual DataTypeSerDeSPtrs get_nested_serdes() const override {
return {nested_serde};
}
virtual DataTypeSerDeSPtrs get_nested_serdes() const override { return {nested_serde}; }

private:
template <bool is_binary_format>
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/data_types/serde/data_type_map_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class DataTypeMapSerDe : public DataTypeSerDe {
}

virtual DataTypeSerDeSPtrs get_nested_serdes() const override {
return {key_serde,value_serde};
return {key_serde, value_serde};
}

private:
Expand Down
4 changes: 1 addition & 3 deletions be/src/vec/data_types/serde/data_type_nullable_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,7 @@ class DataTypeNullableSerDe : public DataTypeSerDe {
int row_num) const override;
Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override;

virtual DataTypeSerDeSPtrs get_nested_serdes() const override {
return {nested_serde};
}
virtual DataTypeSerDeSPtrs get_nested_serdes() const override { return {nested_serde}; }

private:
template <bool is_binary_format>
Expand Down
2 changes: 0 additions & 2 deletions be/src/vec/data_types/serde/data_type_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -383,8 +383,6 @@ inline void checkArrowStatus(const arrow::Status& status, const std::string& col
}
}



DataTypeSerDeSPtrs create_data_type_serdes(
const std::vector<std::shared_ptr<const IDataType>>& types);
DataTypeSerDeSPtrs create_data_type_serdes(const std::vector<SlotDescriptor*>& slots);
Expand Down
4 changes: 1 addition & 3 deletions be/src/vec/data_types/serde/data_type_struct_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,7 @@ class DataTypeStructSerDe : public DataTypeSerDe {
}
}

virtual DataTypeSerDeSPtrs get_nested_serdes() const override {
return elem_serdes_ptrs;
}
virtual DataTypeSerDeSPtrs get_nested_serdes() const override { return elem_serdes_ptrs; }

private:
std::optional<size_t> try_get_position_by_name(const String& name) const;
Expand Down
532 changes: 296 additions & 236 deletions be/src/vec/exec/format/json/new_json_reader.cpp

Large diffs are not rendered by default.

35 changes: 19 additions & 16 deletions be/src/vec/exec/format/json/new_json_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ class NewJsonReader : public GenericReader {
~NewJsonReader() override = default;

Status init_reader(const std::unordered_map<std::string, vectorized::VExprContextSPtr>&
col_default_value_ctx);
col_default_value_ctx,
bool is_load);
Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
Status get_columns(std::unordered_map<std::string, TypeDescriptor>* name_to_type,
std::unordered_set<std::string>* missing_cols) override;
Expand Down Expand Up @@ -129,15 +130,10 @@ class NewJsonReader : public GenericReader {
const std::vector<SlotDescriptor*>& slot_descs, bool* valid);

Status _write_data_to_column(rapidjson::Value::ConstValueIterator value,
SlotDescriptor* slot_desc, vectorized::IColumn* column_ptr,
const TypeDescriptor& type_desc, vectorized::IColumn* column_ptr,
const std::string& column_name, DataTypeSerDeSPtr serde,
bool* valid);

Status _write_data_to_column(rapidjson::Value::ConstValueIterator value,
const TypeDescriptor& type_desc, vectorized::IColumn* column_ptr,
DataTypeSerDeSPtr serde,
bool* valid);


Status _write_columns_by_jsonpath(rapidjson::Value& objectValue,
const std::vector<SlotDescriptor*>& slot_descs, Block& block,
bool* valid);
Expand All @@ -150,7 +146,7 @@ class NewJsonReader : public GenericReader {
Status _read_one_message(std::unique_ptr<uint8_t[]>* file_buf, size_t* read_size);

// simdjson, replace none simdjson function if it is ready
Status _simdjson_init_reader();
Status _simdjson_init_reader(bool is_load);
Status _simdjson_parse_json(size_t* size, bool* is_empty_row, bool* eof,
simdjson::error_code* error);
Status _get_json_value(size_t* size, bool* eof, simdjson::error_code* error,
Expand Down Expand Up @@ -184,14 +180,10 @@ class NewJsonReader : public GenericReader {
const std::vector<SlotDescriptor*>& slot_descs, bool* valid);

Status _simdjson_write_data_to_column(simdjson::ondemand::value& value,
SlotDescriptor* slot_desc,
vectorized::IColumn* column_ptr, bool* valid);

Status _simdjson_write_data_to_column(simdjson::ondemand::value& value,
const TypeDescriptor& type_desc,
const TypeDescriptor& type_desc,
vectorized::IColumn* column_ptr,
DataTypeSerDeSPtr serde,
bool* valid);
const std::string& column_name, DataTypeSerDeSPtr serde,
bool* valid);

Status _simdjson_write_columns_by_jsonpath(simdjson::ondemand::object* value,
const std::vector<SlotDescriptor*>& slot_descs,
Expand Down Expand Up @@ -305,6 +297,17 @@ class NewJsonReader : public GenericReader {
std::unordered_map<std::string, std::string> _col_default_value_map;

int32_t skip_bitmap_col_idx {-1};

bool _is_load = true;
bool _is_hive_table = false;
// In hive : create table xxx ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe';
// Hive will not allow you to create columns with the same name but different case, including field names inside
// structs, and will automatically convert uppercase names in create sql to lowercase.However, when Hive loads data
// to table, the column names in the data may be uppercase,and there may be multiple columns with
// the same name but different capitalization.We refer to the behavior of hive, convert all column names
// in the data to lowercase,and use the last one as the insertion value

DataTypeSerDeSPtrs _serdes;
};

} // namespace vectorized
Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/exec/scan/vfile_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -969,8 +969,8 @@ Status VFileScanner::_get_next_reader() {
_cur_reader =
NewJsonReader::create_unique(_state, _profile, &_counter, *_params, range,
_file_slot_descs, &_scanner_eof, _io_ctx.get());
init_status =
((NewJsonReader*)(_cur_reader.get()))->init_reader(_col_default_value_ctx);
init_status = ((NewJsonReader*)(_cur_reader.get()))
->init_reader(_col_default_value_ctx, _is_load);
break;
}
case TFileFormatType::FORMAT_AVRO: {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
use `default`;


CREATE TABLE json_load_data_table (
`id` int,
`col1` int,
`col2` struct< col2a:int, col2b:string>,
`col3` map<int,string>
) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
LOCATION
'/user/doris/preinstalled_data/json/json_load_data_table';

msck repair table json_load_data_table;
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{"id":1,"col1":10,"col2":{"col2a":10,"col2b":"string1"},"col3":{"1":"string10"}}
{"id":2,"col1":10,"col1":20,"col2":{"col2b":"string2","col2a":0,"Col2A":20},"col3":{"2":"string2"}}
{"id":3,"col1":10,"col1":20,"COL1":30,"COL2":{"col2a":30,"col2b":"string3"}}
{"id":4,"COL1":40,"col2":{"col2a":10,"col2b":"string4","new_col":"new_val","col2a":40},"col3":{"4":"string4"}}
{"id":5}
{"id":6,"col1":60,"col2":{"COL2a":60,"col2b":600},"col3":{"6":600}}
{"id":7,"col1":70,"col3":{"7":"string7"},"col2":{"col2b":"string7","col2a":70}}
Loading

0 comments on commit 14b5e53

Please sign in to comment.