From da586b4872387e6e3f74e69b0f435c644a7383ce Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 17 Oct 2024 14:10:53 +0800 Subject: [PATCH 1/7] feat: support object insert --- Cargo.toml | 2 +- src/query/functions/src/scalars/variant.rs | 115 +++++++++++++++++++++ 2 files changed, 116 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c8273d4b01bd..e6c9b2d98f9d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -415,7 +415,7 @@ backtrace = { git = "https://github.com/rust-lang/backtrace-rs.git", rev = "7226 color-eyre = { git = "https://github.com/eyre-rs/eyre.git", rev = "e5d92c3" } deltalake = { git = "https://github.com/delta-io/delta-rs", rev = "57795da" } ethnum = { git = "https://github.com/datafuse-extras/ethnum-rs", rev = "4cb05f1" } -jsonb = { git = "https://github.com/databendlabs/jsonb", rev = "c7525d9" } +jsonb = { git = "https://github.com/b41sh/jsonb", rev = "348a303efb1c200e9577b26aefcbeaac7096a754" } openai_api_rust = { git = "https://github.com/datafuse-extras/openai-api", rev = "819a0ed" } orc-rust = { git = "https://github.com/datafuse-extras/datafusion-orc", rev = "03372b97" } recursive = { git = "https://github.com/datafuse-extras/recursive.git", rev = "6af35a1" } diff --git a/src/query/functions/src/scalars/variant.rs b/src/query/functions/src/scalars/variant.rs index 44772c5ec997..da0695e38c16 100644 --- a/src/query/functions/src/scalars/variant.rs +++ b/src/query/functions/src/scalars/variant.rs @@ -1644,6 +1644,41 @@ pub fn register(registry: &mut FunctionRegistry) { }, ), ); + + registry.register_function_factory("json_object_insert", |_, args_type| { + if args_type.len() != 3 && args_type.len() != 4 { + return None; + } + if (args_type[0].remove_nullable() != DataType::Variant && args_type[0] != DataType::Null) + || (args_type[1].remove_nullable() != DataType::String + && args_type[1] != DataType::Null) + { + return None; + } + if args_type.len() == 4 { + if (args_type[3].remove_nullable() != DataType::Boolean && args_type[3] != DataType::Null) { + return None; + } + } + let is_nullable = args_type[0].is_nullable_or_null() { + let return_type = is_nullable { + DataType::Nullable(Box::new(DataType::Variant)) + } else { + DataType::Variant + }; + + Some(Arc::new(Function { + signature: FunctionSignature { + name: "json_object_insert".to_string(), + args_type: args_type.to_vec(), + return_type, + }, + eval: FunctionEval::Scalar { + calc_domain: Box::new(|_, _| FunctionDomain::MayThrow), + eval: Box::new(|args, ctx| json_object_insert_fn(args, ctx, is_nullable)), + }, + })) + }); } fn json_array_fn(args: &[ValueRef], ctx: &mut EvalContext) -> Value { @@ -2017,6 +2052,86 @@ where } } +fn json_object_insert_fn( + args: &[ValueRef], + ctx: &mut EvalContext, + is_nullable: bool, +) -> Value { + let len_opt = args.iter().find_map(|arg| match arg { + ValueRef::Column(col) => Some(col.len()), + _ => None, + }); + let len = len_opt.unwrap_or(1); + let mut validity = MutableBitmap::with_capacity(len); + let mut builder = BinaryColumnBuilder::with_capacity(len, len * 50); + for idx in 0..len { + let value = unsafe { columns[0].index_unchecked(idx) }; + if value == ScalarRef::Null { + builder.commit_row(); + validity.push(false); + continue; + } + let new_key = unsafe { columns[1].index_unchecked(idx) }; + let new_val = unsafe { columns[2].index_unchecked(idx) }; + if new_key == ScalarRef::Null || new_val == ScalarRef::Null { + builder.push(value) + builder.commit_row(); + validity.push(true); + continue; + } + let update_flag = if args.len() == 4 { + let v = unsafe { columns[3].index_unchecked(idx) }; + match v { + ScalarRef::Boolean(v) => v, + _ => false, + } + } else { + false + }; + let value = value.as_variant().unwrap(); + let new_key = new_key.as_string().unwrap(); + match new_val { + ScalarRef::Variant(new_val) => { + if let Err(err) = + jsonb::object_insert(value, new_key, new_val, update_flag, &mut builder.data) + { + ctx.set_error(builder.len(), err.to_string()); + } + } + _ => { + let mut new_val_buf = vec![]; + cast_scalar_to_variant(new_val, ctx.func_ctx.tz, &mut new_val_buf); + if let Err(err) = + jsonb::object_insert(value, new_key, &new_val_buf, update_flag, &mut builder.data) + { + ctx.set_error(builder.len(), err.to_string()); + } + } + } + builder.commit_row(); + validity.push(true); + } + if is_nullable { + let validity: Bitmap = validity.into(); + match len_opt { + Some(_) => Value::Column(Column::Variant(builder.build())).wrap_nullable(Some(validity)), + None => { + if !validity.get_bit(0) { + Value::Scalar(Scalar::Null) + } else { + Value::Scalar(Scalar::Variant(builder.build_scalar())) + } + } + } + } else { + match len_opt { + Some(_) => Value::Column(Column::Variant(builder.build())), + None => Value::Scalar(Scalar::Variant(builder.build_scalar())), + } + } +} + + // Extract string for string type, other types convert to JSON string. fn cast_to_string(v: &[u8]) -> String { match to_str(v) { From 7d7d6067bc7295c6913ea3a6fedd2cbf0df3ddee Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 17 Oct 2024 23:43:10 +0800 Subject: [PATCH 2/7] fix --- src/query/functions/src/scalars/variant.rs | 49 +++++--- .../functions/tests/it/scalars/variant.rs | 105 ++++++++++++++++++ 2 files changed, 138 insertions(+), 16 deletions(-) diff --git a/src/query/functions/src/scalars/variant.rs b/src/query/functions/src/scalars/variant.rs index da0695e38c16..0e0cedb623f9 100644 --- a/src/query/functions/src/scalars/variant.rs +++ b/src/query/functions/src/scalars/variant.rs @@ -1656,17 +1656,17 @@ pub fn register(registry: &mut FunctionRegistry) { return None; } if args_type.len() == 4 { - if (args_type[3].remove_nullable() != DataType::Boolean && args_type[3] != DataType::Null) { + if args_type[3].remove_nullable() != DataType::Boolean && args_type[3] != DataType::Null + { return None; } } - let is_nullable = args_type[0].is_nullable_or_null() { - let return_type = is_nullable { + let is_nullable = args_type[0].is_nullable_or_null(); + let return_type = if is_nullable { DataType::Nullable(Box::new(DataType::Variant)) } else { DataType::Variant }; - Some(Arc::new(Function { signature: FunctionSignature { name: "json_object_insert".to_string(), @@ -1675,7 +1675,7 @@ pub fn register(registry: &mut FunctionRegistry) { }, eval: FunctionEval::Scalar { calc_domain: Box::new(|_, _| FunctionDomain::MayThrow), - eval: Box::new(|args, ctx| json_object_insert_fn(args, ctx, is_nullable)), + eval: Box::new(move |args, ctx| json_object_insert_fn(args, ctx, is_nullable)), }, })) }); @@ -2065,22 +2065,35 @@ fn json_object_insert_fn( let mut validity = MutableBitmap::with_capacity(len); let mut builder = BinaryColumnBuilder::with_capacity(len, len * 50); for idx in 0..len { - let value = unsafe { columns[0].index_unchecked(idx) }; + let value = match &args[0] { + ValueRef::Scalar(scalar) => scalar.clone(), + ValueRef::Column(col) => unsafe { col.index_unchecked(idx) }, + }; if value == ScalarRef::Null { builder.commit_row(); validity.push(false); continue; } - let new_key = unsafe { columns[1].index_unchecked(idx) }; - let new_val = unsafe { columns[2].index_unchecked(idx) }; + let value = value.as_variant().unwrap(); + let new_key = match &args[1] { + ValueRef::Scalar(scalar) => scalar.clone(), + ValueRef::Column(col) => unsafe { col.index_unchecked(idx) }, + }; + let new_val = match &args[2] { + ValueRef::Scalar(scalar) => scalar.clone(), + ValueRef::Column(col) => unsafe { col.index_unchecked(idx) }, + }; if new_key == ScalarRef::Null || new_val == ScalarRef::Null { - builder.push(value) + builder.put(value); builder.commit_row(); validity.push(true); continue; } let update_flag = if args.len() == 4 { - let v = unsafe { columns[3].index_unchecked(idx) }; + let v = match &args[3] { + ValueRef::Scalar(scalar) => scalar.clone(), + ValueRef::Column(col) => unsafe { col.index_unchecked(idx) }, + }; match v { ScalarRef::Boolean(v) => v, _ => false, @@ -2088,7 +2101,6 @@ fn json_object_insert_fn( } else { false }; - let value = value.as_variant().unwrap(); let new_key = new_key.as_string().unwrap(); match new_val { ScalarRef::Variant(new_val) => { @@ -2101,9 +2113,13 @@ fn json_object_insert_fn( _ => { let mut new_val_buf = vec![]; cast_scalar_to_variant(new_val, ctx.func_ctx.tz, &mut new_val_buf); - if let Err(err) = - jsonb::object_insert(value, new_key, &new_val_buf, update_flag, &mut builder.data) - { + if let Err(err) = jsonb::object_insert( + value, + new_key, + &new_val_buf, + update_flag, + &mut builder.data, + ) { ctx.set_error(builder.len(), err.to_string()); } } @@ -2114,7 +2130,9 @@ fn json_object_insert_fn( if is_nullable { let validity: Bitmap = validity.into(); match len_opt { - Some(_) => Value::Column(Column::Variant(builder.build())).wrap_nullable(Some(validity)), + Some(_) => { + Value::Column(Column::Variant(builder.build())).wrap_nullable(Some(validity)) + } None => { if !validity.get_bit(0) { Value::Scalar(Scalar::Null) @@ -2131,7 +2149,6 @@ fn json_object_insert_fn( } } - // Extract string for string type, other types convert to JSON string. fn cast_to_string(v: &[u8]) -> String { match to_str(v) { diff --git a/src/query/functions/tests/it/scalars/variant.rs b/src/query/functions/tests/it/scalars/variant.rs index 3130c6ce53dd..43bbae7d6038 100644 --- a/src/query/functions/tests/it/scalars/variant.rs +++ b/src/query/functions/tests/it/scalars/variant.rs @@ -69,6 +69,7 @@ fn test_variant() { test_json_array_intersection(file); test_json_array_except(file); test_json_array_overlap(file); + test_json_object_insert(file); } fn test_parse_json(file: &mut impl Write) { @@ -1934,3 +1935,107 @@ fn test_json_array_overlap(file: &mut impl Write) { ], ); } + + +fn test_json_object_insert(file: &mut impl Write) { + run_ast( + file, + r#"json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'a', 'hello')"#, + &[], + ); + run_ast( + file, + r#"json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'n', 100)"#, + &[], + ); + run_ast( + file, + r#"json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'z', [10,20])"#, + &[], + ); + run_ast( + file, + r#"json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'x', '{"a":"b"}'::variant)"#, + &[], + ); + run_ast( + file, + r#"json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'v', null)"#, + &[], + ); + run_ast( + file, + r#"json_object_insert('{}'::variant, 'v', 'vv')"#, + &[], + ); + run_ast( + file, + r#"json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'a', 'hello', true)"#, + &[], + ); + run_ast( + file, + r#"json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'n', 100, true)"#, + &[], + ); + run_ast( + file, + r#"json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'z', [10,20], true)"#, + &[], + ); + run_ast( + file, + r#"json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'x', '{"a":"b"}'::variant, true)"#, + &[], + ); + run_ast( + file, + r#"json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'v', null, true)"#, + &[], + ); + run_ast( + file, + r#"json_object_insert('{}'::variant, 'v', 'vv', true)"#, + &[], + ); + + run_ast( + file, + "json_object_insert(parse_json(v), 'x', parse_json(n))", + &[ + ( + "v", + StringType::from_data_with_validity( + vec![r#"{"k":"v"}"#, r#"{"m":"n"}"#, "", r#"{"a":"b","c":"d","y":"z"}"#], + vec![true, true, false, true], + ), + ), + ( + "n", + StringType::from_data_with_validity(vec![r#""hi""#, "", "true", "[1,2,3]"], vec![ + true, false, true, true, + ]), + ), + ], + ); + run_ast( + file, + "json_object_insert(parse_json(v), 'c', parse_json(n), true)", + &[ + ( + "v", + StringType::from_data_with_validity( + vec![r#"{"k":"v"}"#, r#"{"m":"n"}"#, "", r#"{"a":"b","c":"d","y":"z"}"#], + vec![true, true, false, true], + ), + ), + ( + "n", + StringType::from_data_with_validity(vec![r#""hi""#, "", "true", "[1,2,3]"], vec![ + true, false, true, true, + ]), + ), + ], + ); +} + From 59f71b61a897bb82d8e15c0409738eb5d25eeb45 Mon Sep 17 00:00:00 2001 From: baishen Date: Fri, 18 Oct 2024 11:05:37 +0800 Subject: [PATCH 3/7] fix --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/query/functions/src/scalars/variant.rs | 10 ++++----- .../functions/tests/it/scalars/variant.rs | 22 +++++++++++-------- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5b5e4c190107..75c5e0b968ad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9252,7 +9252,7 @@ dependencies = [ [[package]] name = "jsonb" version = "0.4.3" -source = "git+https://github.com/databendlabs/jsonb?rev=c7525d9#c7525d93ad7b1e800df982e49a6e0e6454bd308e" +source = "git+https://github.com/b41sh/jsonb?rev=10462ca#10462cacfe769e7e046ae8722ecf8aa9af8b40c3" dependencies = [ "byteorder", "fast-float", diff --git a/Cargo.toml b/Cargo.toml index e6c9b2d98f9d..5804a48235ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -415,7 +415,7 @@ backtrace = { git = "https://github.com/rust-lang/backtrace-rs.git", rev = "7226 color-eyre = { git = "https://github.com/eyre-rs/eyre.git", rev = "e5d92c3" } deltalake = { git = "https://github.com/delta-io/delta-rs", rev = "57795da" } ethnum = { git = "https://github.com/datafuse-extras/ethnum-rs", rev = "4cb05f1" } -jsonb = { git = "https://github.com/b41sh/jsonb", rev = "348a303efb1c200e9577b26aefcbeaac7096a754" } +jsonb = { git = "https://github.com/b41sh/jsonb", rev = "10462ca" } openai_api_rust = { git = "https://github.com/datafuse-extras/openai-api", rev = "819a0ed" } orc-rust = { git = "https://github.com/datafuse-extras/datafusion-orc", rev = "03372b97" } recursive = { git = "https://github.com/datafuse-extras/recursive.git", rev = "6af35a1" } diff --git a/src/query/functions/src/scalars/variant.rs b/src/query/functions/src/scalars/variant.rs index 0e0cedb623f9..762e8f617f21 100644 --- a/src/query/functions/src/scalars/variant.rs +++ b/src/query/functions/src/scalars/variant.rs @@ -1655,11 +1655,11 @@ pub fn register(registry: &mut FunctionRegistry) { { return None; } - if args_type.len() == 4 { - if args_type[3].remove_nullable() != DataType::Boolean && args_type[3] != DataType::Null - { - return None; - } + if args_type.len() == 4 + && args_type[3].remove_nullable() != DataType::Boolean + && args_type[3] != DataType::Null + { + return None; } let is_nullable = args_type[0].is_nullable_or_null(); let return_type = if is_nullable { diff --git a/src/query/functions/tests/it/scalars/variant.rs b/src/query/functions/tests/it/scalars/variant.rs index 43bbae7d6038..c9966594d499 100644 --- a/src/query/functions/tests/it/scalars/variant.rs +++ b/src/query/functions/tests/it/scalars/variant.rs @@ -1936,7 +1936,6 @@ fn test_json_array_overlap(file: &mut impl Write) { ); } - fn test_json_object_insert(file: &mut impl Write) { run_ast( file, @@ -1963,11 +1962,7 @@ fn test_json_object_insert(file: &mut impl Write) { r#"json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'v', null)"#, &[], ); - run_ast( - file, - r#"json_object_insert('{}'::variant, 'v', 'vv')"#, - &[], - ); + run_ast(file, r#"json_object_insert('{}'::variant, 'v', 'vv')"#, &[]); run_ast( file, r#"json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'a', 'hello', true)"#, @@ -2006,7 +2001,12 @@ fn test_json_object_insert(file: &mut impl Write) { ( "v", StringType::from_data_with_validity( - vec![r#"{"k":"v"}"#, r#"{"m":"n"}"#, "", r#"{"a":"b","c":"d","y":"z"}"#], + vec![ + r#"{"k":"v"}"#, + r#"{"m":"n"}"#, + "", + r#"{"a":"b","c":"d","y":"z"}"#, + ], vec![true, true, false, true], ), ), @@ -2025,7 +2025,12 @@ fn test_json_object_insert(file: &mut impl Write) { ( "v", StringType::from_data_with_validity( - vec![r#"{"k":"v"}"#, r#"{"m":"n"}"#, "", r#"{"a":"b","c":"d","y":"z"}"#], + vec![ + r#"{"k":"v"}"#, + r#"{"m":"n"}"#, + "", + r#"{"a":"b","c":"d","y":"z"}"#, + ], vec![true, true, false, true], ), ), @@ -2038,4 +2043,3 @@ fn test_json_object_insert(file: &mut impl Write) { ], ); } - From 69b2488cf1a82e242674a049fa54f16d7fff9bd6 Mon Sep 17 00:00:00 2001 From: baishen Date: Fri, 18 Oct 2024 11:23:42 +0800 Subject: [PATCH 4/7] fix tests --- .../it/scalars/testdata/function_list.txt | 1 + .../tests/it/scalars/testdata/variant.txt | 171 ++++++++++++++++++ .../functions/tests/it/scalars/variant.rs | 7 + 3 files changed, 179 insertions(+) diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index 3cfcf1ca8423..d14e8fd8697c 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -2312,6 +2312,7 @@ Functions overloads: 0 json_extract_path_text(String, String) :: String NULL 1 json_extract_path_text(String NULL, String NULL) :: String NULL 0 json_object FACTORY +0 json_object_insert FACTORY 0 json_object_keep_null FACTORY 0 json_object_keys(Variant NULL) :: Variant NULL 0 json_path_exists FACTORY diff --git a/src/query/functions/tests/it/scalars/testdata/variant.txt b/src/query/functions/tests/it/scalars/testdata/variant.txt index e67880991dc4..88eebbd52769 100644 --- a/src/query/functions/tests/it/scalars/testdata/variant.txt +++ b/src/query/functions/tests/it/scalars/testdata/variant.txt @@ -5402,3 +5402,174 @@ evaluation (internal): +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +ast : json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'a', 'hello') +raw expr : json_object_insert(CAST('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}' AS Variant), 'a', 'hello') +checked expr : json_object_insert(parse_json("{\"b\":12,\"d\":34,\"m\":[1,2],\"x\":{\"k\":\"v\"}}"), "a", "hello") +optimized expr : 0x400000051000000110000001100000011000000110000001100000052000000220000002500000105000000e6162646d7868656c6c6f500c5022800000022000000220000002500150024000000110000001100000016b76 +output type : Variant +output domain : Undefined +output : '{"a":"hello","b":12,"d":34,"m":[1,2],"x":{"k":"v"}}' + + +ast : json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'n', 100) +raw expr : json_object_insert(CAST('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}' AS Variant), 'n', 100) +checked expr : json_object_insert(parse_json("{\"b\":12,\"d\":34,\"m\":[1,2],\"x\":{\"k\":\"v\"}}"), "n", 100_u8) +optimized expr : 0x400000051000000110000001100000011000000110000001200000022000000250000010200000025000000e62646d6e78500c50228000000220000002200000025001500250644000000110000001100000016b76 +output type : Variant +output domain : Undefined +output : '{"b":12,"d":34,"m":[1,2],"n":100,"x":{"k":"v"}}' + + +ast : json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'z', [10,20]) +raw expr : json_object_insert(CAST('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}' AS Variant), 'z', array(10, 20)) +checked expr : json_object_insert(parse_json("{\"b\":12,\"d\":34,\"m\":[1,2],\"x\":{\"k\":\"v\"}}"), "z", array(10_u8, 20_u8)) +optimized expr : 0x4000000510000001100000011000000110000001100000012000000220000002500000105000000e5000001062646d787a500c5022800000022000000220000002500150024000000110000001100000016b76800000022000000220000002500a5014 +output type : Variant +output domain : Undefined +output : '{"b":12,"d":34,"m":[1,2],"x":{"k":"v"},"z":[10,20]}' + + +error: + --> SQL:1:1 + | +1 | json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'x', '{"a":"b"}'::variant) + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ObjectDuplicateKey while evaluating function `json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}', 'x', '{"a":"b"}')` in expr `json_object_insert(parse_json('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'), 'x', parse_json('{"a":"b"}'))` + + + +ast : json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'v', null) +raw expr : json_object_insert(CAST('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}' AS Variant), 'v', NULL) +checked expr : json_object_insert(parse_json("{\"b\":12,\"d\":34,\"m\":[1,2],\"x\":{\"k\":\"v\"}}"), "v", NULL) +optimized expr : 0x40000004100000011000000110000001100000012000000220000002500000105000000e62646d78500c5022800000022000000220000002500150024000000110000001100000016b76 +output type : Variant +output domain : Undefined +output : '{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}' + + +ast : json_object_insert('{}'::variant, 'v', 'vv') +raw expr : json_object_insert(CAST('{}' AS Variant), 'v', 'vv') +checked expr : json_object_insert(parse_json("{}"), "v", "vv") +optimized expr : 0x400000011000000110000002767676 +output type : Variant +output domain : Undefined +output : '{"v":"vv"}' + + +error: + --> SQL:1:1 + | +1 | json_object_insert('123'::variant, 'v', 'vv') + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ InvalidObject while evaluating function `json_object_insert('123', 'v', 'vv')` in expr `json_object_insert(parse_json('123'), 'v', 'vv')` + + + +ast : json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'a', 'hello', true) +raw expr : json_object_insert(CAST('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}' AS Variant), 'a', 'hello', true) +checked expr : json_object_insert(parse_json("{\"b\":12,\"d\":34,\"m\":[1,2],\"x\":{\"k\":\"v\"}}"), "a", "hello", true) +optimized expr : 0x400000051000000110000001100000011000000110000001100000052000000220000002500000105000000e6162646d7868656c6c6f500c5022800000022000000220000002500150024000000110000001100000016b76 +output type : Variant +output domain : Undefined +output : '{"a":"hello","b":12,"d":34,"m":[1,2],"x":{"k":"v"}}' + + +ast : json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'n', 100, true) +raw expr : json_object_insert(CAST('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}' AS Variant), 'n', 100, true) +checked expr : json_object_insert(parse_json("{\"b\":12,\"d\":34,\"m\":[1,2],\"x\":{\"k\":\"v\"}}"), "n", 100_u8, true) +optimized expr : 0x400000051000000110000001100000011000000110000001200000022000000250000010200000025000000e62646d6e78500c50228000000220000002200000025001500250644000000110000001100000016b76 +output type : Variant +output domain : Undefined +output : '{"b":12,"d":34,"m":[1,2],"n":100,"x":{"k":"v"}}' + + +ast : json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'z', [10,20], true) +raw expr : json_object_insert(CAST('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}' AS Variant), 'z', array(10, 20), true) +checked expr : json_object_insert(parse_json("{\"b\":12,\"d\":34,\"m\":[1,2],\"x\":{\"k\":\"v\"}}"), "z", array(10_u8, 20_u8), true) +optimized expr : 0x4000000510000001100000011000000110000001100000012000000220000002500000105000000e5000001062646d787a500c5022800000022000000220000002500150024000000110000001100000016b76800000022000000220000002500a5014 +output type : Variant +output domain : Undefined +output : '{"b":12,"d":34,"m":[1,2],"x":{"k":"v"},"z":[10,20]}' + + +ast : json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'x', '{"a":"b"}'::variant, true) +raw expr : json_object_insert(CAST('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}' AS Variant), 'x', CAST('{"a":"b"}' AS Variant), true) +checked expr : json_object_insert(parse_json("{\"b\":12,\"d\":34,\"m\":[1,2],\"x\":{\"k\":\"v\"}}"), "x", parse_json("{\"a\":\"b\"}"), true) +optimized expr : 0x40000004100000011000000110000001100000012000000220000002500000105000000e62646d78500c5022800000022000000220000002500150024000000110000001100000016162 +output type : Variant +output domain : Undefined +output : '{"b":12,"d":34,"m":[1,2],"x":{"a":"b"}}' + + +ast : json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'v', null, true) +raw expr : json_object_insert(CAST('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}' AS Variant), 'v', NULL, true) +checked expr : json_object_insert(parse_json("{\"b\":12,\"d\":34,\"m\":[1,2],\"x\":{\"k\":\"v\"}}"), "v", NULL, true) +optimized expr : 0x40000004100000011000000110000001100000012000000220000002500000105000000e62646d78500c5022800000022000000220000002500150024000000110000001100000016b76 +output type : Variant +output domain : Undefined +output : '{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}' + + +ast : json_object_insert('{}'::variant, 'v', 'vv', true) +raw expr : json_object_insert(CAST('{}' AS Variant), 'v', 'vv', true) +checked expr : json_object_insert(parse_json("{}"), "v", "vv", true) +optimized expr : 0x400000011000000110000002767676 +output type : Variant +output domain : Undefined +output : '{"v":"vv"}' + + +error: + --> SQL:1:1 + | +1 | json_object_insert('123'::variant, 'v', 'vv', true) + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ InvalidObject while evaluating function `json_object_insert('123', 'v', 'vv', true)` in expr `json_object_insert(parse_json('123'), 'v', 'vv', true)` + + + +ast : json_object_insert(parse_json(v), 'x', parse_json(n)) +raw expr : json_object_insert(parse_json(v::String NULL), 'x', parse_json(n::String NULL)) +checked expr : json_object_insert(parse_json(v), "x", parse_json(n)) +evaluation: ++--------+---------------------------------+------------------------+-----------------------------------------+ +| | v | n | Output | ++--------+---------------------------------+------------------------+-----------------------------------------+ +| Type | String NULL | String NULL | Variant NULL | +| Domain | {""..="{\"m\":\"n\"}"} ∪ {NULL} | {""..="true"} ∪ {NULL} | Unknown | +| Row 0 | '{"k":"v"}' | '"hi"' | '{"k":"v","x":"hi"}' | +| Row 1 | '{"m":"n"}' | NULL | '{"m":"n"}' | +| Row 2 | NULL | 'true' | NULL | +| Row 3 | '{"a":"b","c":"d","y":"z"}' | '[1,2,3]' | '{"a":"b","c":"d","x":[1,2,3],"y":"z"}' | ++--------+---------------------------------+------------------------+-----------------------------------------+ +evaluation (internal): ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| v | NullableColumn { column: StringColumn { data: 0x7b226b223a2276227d7b226d223a226e227d7b2261223a2262222c2263223a2264222c2279223a227a227d, offsets: [0, 9, 18, 18, 43] }, validity: [0b____1011] } | +| n | NullableColumn { column: StringColumn { data: 0x22686922747275655b312c322c335d, offsets: [0, 4, 4, 8, 15] }, validity: [0b____1101] } | +| Output | NullableColumn { column: BinaryColumn { data: 0x40000002100000011000000110000001100000026b787668694000000110000001100000016d6e400000041000000110000001100000011000000110000001100000015000001610000001616378796264800000032000000220000002200000025001500250037a, offsets: [0, 25, 39, 39, 104] }, validity: [0b____1011] } | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + +ast : json_object_insert(parse_json(v), 'c', parse_json(n), true) +raw expr : json_object_insert(parse_json(v::String NULL), 'c', parse_json(n::String NULL), true) +checked expr : json_object_insert(parse_json(v), "c", parse_json(n), true) +evaluation: ++--------+---------------------------------+------------------------+---------------------------------+ +| | v | n | Output | ++--------+---------------------------------+------------------------+---------------------------------+ +| Type | String NULL | String NULL | Variant NULL | +| Domain | {""..="{\"m\":\"n\"}"} ∪ {NULL} | {""..="true"} ∪ {NULL} | Unknown | +| Row 0 | '{"k":"v"}' | '"hi"' | '{"c":"hi","k":"v"}' | +| Row 1 | '{"m":"n"}' | NULL | '{"m":"n"}' | +| Row 2 | NULL | 'true' | NULL | +| Row 3 | '{"a":"b","c":"d","y":"z"}' | '[1,2,3]' | '{"a":"b","c":[1,2,3],"y":"z"}' | ++--------+---------------------------------+------------------------+---------------------------------+ +evaluation (internal): ++--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| v | NullableColumn { column: StringColumn { data: 0x7b226b223a2276227d7b226d223a226e227d7b2261223a2262222c2263223a2264222c2279223a227a227d, offsets: [0, 9, 18, 18, 43] }, validity: [0b____1011] } | +| n | NullableColumn { column: StringColumn { data: 0x22686922747275655b312c322c335d, offsets: [0, 4, 4, 8, 15] }, validity: [0b____1101] } | +| Output | NullableColumn { column: BinaryColumn { data: 0x4000000210000001100000011000000210000001636b6869764000000110000001100000016d6e4000000310000001100000011000000110000001500000161000000161637962800000032000000220000002200000025001500250037a, offsets: [0, 25, 39, 39, 94] }, validity: [0b____1011] } | ++--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + diff --git a/src/query/functions/tests/it/scalars/variant.rs b/src/query/functions/tests/it/scalars/variant.rs index c9966594d499..5d961e312c28 100644 --- a/src/query/functions/tests/it/scalars/variant.rs +++ b/src/query/functions/tests/it/scalars/variant.rs @@ -1963,6 +1963,8 @@ fn test_json_object_insert(file: &mut impl Write) { &[], ); run_ast(file, r#"json_object_insert('{}'::variant, 'v', 'vv')"#, &[]); + run_ast(file, r#"json_object_insert('123'::variant, 'v', 'vv')"#, &[ + ]); run_ast( file, r#"json_object_insert('{"b":12,"d":34,"m":[1,2],"x":{"k":"v"}}'::variant, 'a', 'hello', true)"#, @@ -1993,6 +1995,11 @@ fn test_json_object_insert(file: &mut impl Write) { r#"json_object_insert('{}'::variant, 'v', 'vv', true)"#, &[], ); + run_ast( + file, + r#"json_object_insert('123'::variant, 'v', 'vv', true)"#, + &[], + ); run_ast( file, From 5e2d769c93aca6d7e4957c1e986d3322e8929bf7 Mon Sep 17 00:00:00 2001 From: baishen Date: Fri, 18 Oct 2024 11:48:39 +0800 Subject: [PATCH 5/7] add tests --- .../functions/02_0065_function_json.test | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/tests/sqllogictests/suites/query/functions/02_0065_function_json.test b/tests/sqllogictests/suites/query/functions/02_0065_function_json.test index 11db15d9ce87..28c92fc25c20 100644 --- a/tests/sqllogictests/suites/query/functions/02_0065_function_json.test +++ b/tests/sqllogictests/suites/query/functions/02_0065_function_json.test @@ -1372,3 +1372,67 @@ SELECT id, json_array_overlap(v1, v2) from t4 statement ok DROP TABLE IF EXISTS t4 + + +statement ok +CREATE OR REPLACE TABLE t5(id Int, v1 Variant) + +statement ok +INSERT INTO t5(id, v1) VALUES + (1, '{"k1":"v1","k2":"v2"}'), + (2, '{"a":[1,2,3],"c":{"c1":"v1","c2":"v2"},"m":true}'), + (3, '{"a":1,"h":2,"m":3,"n":4}') + +query T +SELECT json_object_insert('{"a":1,"b":2,"d":4}'::variant, 'c', 3) +---- +{"a":1,"b":2,"c":3,"d":4} + +query T +SELECT json_object_insert('{"a":1,"b":2,"d":4}'::variant, 'x', 10) +---- +{"a":1,"b":2,"d":4,"x":10} + +query T +SELECT json_object_insert('{"a":1,"b":2,"d":4}'::variant, 'A', 10) +---- +{"A":10,"a":1,"b":2,"d":4} + +statement error 1006 +SELECT json_object_insert('{"a":1,"b":2,"d":4}'::variant, 'a', 10) + +statement error 1006 +SELECT json_object_insert('1234'::variant, 'a', 10) + +statement error 1006 +SELECT json_object_insert('{"city":"San Mateo","state":"CA"}'::variant, 'city', 'San Jose', false) + +query T +SELECT json_object_insert('{"city":"San Mateo","state":"CA"}'::variant, 'city', 'San Jose', true) +---- +{"city":"San Jose","state":"CA"} + +query T +SELECT json_object_insert('{"city":"San Mateo","state":"CA"}'::variant, 'zip_code', 94402::DOUBLE, false) +---- +{"city":"San Mateo","state":"CA","zip_code":94402.0} + +query T +SELECT json_object_insert('{"city":"San Mateo","state":"CA"}'::variant, 'zip_code', null) +---- +{"city":"San Mateo","state":"CA"} + +query T +SELECT json_object_insert('{"city":"San Mateo","state":"CA"}'::variant, 'zip_code', 'null'::variant) +---- +{"city":"San Mateo","state":"CA","zip_code":null} + +query IT +SELECT id, json_object_insert(v1, 'b', '100'::variant) from t5 +---- +1 {"b":100,"k1":"v1","k2":"v2"} +2 {"a":[1,2,3],"b":100,"c":{"c1":"v1","c2":"v2"},"m":true} +3 {"a":1,"b":100,"h":2,"m":3,"n":4} + +statement ok +DROP TABLE IF EXISTS t5 From fb49c92a4583f125f0939aea1177b1d3ff875782 Mon Sep 17 00:00:00 2001 From: baishen Date: Fri, 18 Oct 2024 12:35:17 +0800 Subject: [PATCH 6/7] fix --- src/query/functions/src/scalars/variant.rs | 34 +++++++++---------- .../tests/it/scalars/testdata/variant.txt | 4 +-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/query/functions/src/scalars/variant.rs b/src/query/functions/src/scalars/variant.rs index 762e8f617f21..0081f4b5e625 100644 --- a/src/query/functions/src/scalars/variant.rs +++ b/src/query/functions/src/scalars/variant.rs @@ -2075,6 +2075,12 @@ fn json_object_insert_fn( continue; } let value = value.as_variant().unwrap(); + if !is_object(value) { + ctx.set_error(builder.len(), "Invalid json object"); + builder.commit_row(); + validity.push(false); + continue; + } let new_key = match &args[1] { ValueRef::Scalar(scalar) => scalar.clone(), ValueRef::Column(col) => unsafe { col.index_unchecked(idx) }, @@ -2102,30 +2108,24 @@ fn json_object_insert_fn( false }; let new_key = new_key.as_string().unwrap(); - match new_val { + let res = match new_val { ScalarRef::Variant(new_val) => { - if let Err(err) = - jsonb::object_insert(value, new_key, new_val, update_flag, &mut builder.data) - { - ctx.set_error(builder.len(), err.to_string()); - } + jsonb::object_insert(value, new_key, new_val, update_flag, &mut builder.data) } _ => { + // if the new value is not a json value, cast it to json. let mut new_val_buf = vec![]; - cast_scalar_to_variant(new_val, ctx.func_ctx.tz, &mut new_val_buf); - if let Err(err) = jsonb::object_insert( - value, - new_key, - &new_val_buf, - update_flag, - &mut builder.data, - ) { - ctx.set_error(builder.len(), err.to_string()); - } + cast_scalar_to_variant(new_val.clone(), ctx.func_ctx.tz, &mut new_val_buf); + jsonb::object_insert(value, new_key, &new_val_buf, update_flag, &mut builder.data) } + }; + if let Err(err) = res { + validity.push(false); + ctx.set_error(builder.len(), err.to_string()); + } else { + validity.push(true); } builder.commit_row(); - validity.push(true); } if is_nullable { let validity: Bitmap = validity.into(); diff --git a/src/query/functions/tests/it/scalars/testdata/variant.txt b/src/query/functions/tests/it/scalars/testdata/variant.txt index 88eebbd52769..00ea67f28e02 100644 --- a/src/query/functions/tests/it/scalars/testdata/variant.txt +++ b/src/query/functions/tests/it/scalars/testdata/variant.txt @@ -5459,7 +5459,7 @@ error: --> SQL:1:1 | 1 | json_object_insert('123'::variant, 'v', 'vv') - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ InvalidObject while evaluating function `json_object_insert('123', 'v', 'vv')` in expr `json_object_insert(parse_json('123'), 'v', 'vv')` + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Invalid json object while evaluating function `json_object_insert('123', 'v', 'vv')` in expr `json_object_insert(parse_json('123'), 'v', 'vv')` @@ -5521,7 +5521,7 @@ error: --> SQL:1:1 | 1 | json_object_insert('123'::variant, 'v', 'vv', true) - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ InvalidObject while evaluating function `json_object_insert('123', 'v', 'vv', true)` in expr `json_object_insert(parse_json('123'), 'v', 'vv', true)` + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Invalid json object while evaluating function `json_object_insert('123', 'v', 'vv', true)` in expr `json_object_insert(parse_json('123'), 'v', 'vv', true)` From 521710603111e503d661f43fa96694cca1beeffb Mon Sep 17 00:00:00 2001 From: baishen Date: Fri, 18 Oct 2024 13:52:55 +0800 Subject: [PATCH 7/7] fix --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 75c5e0b968ad..e3984fe9051a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9252,7 +9252,7 @@ dependencies = [ [[package]] name = "jsonb" version = "0.4.3" -source = "git+https://github.com/b41sh/jsonb?rev=10462ca#10462cacfe769e7e046ae8722ecf8aa9af8b40c3" +source = "git+https://github.com/databendlabs/jsonb?rev=672e423#672e4234758889b8fcb79ba43ac00af8c0aef120" dependencies = [ "byteorder", "fast-float", diff --git a/Cargo.toml b/Cargo.toml index 5804a48235ae..36927b95c90c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -415,7 +415,7 @@ backtrace = { git = "https://github.com/rust-lang/backtrace-rs.git", rev = "7226 color-eyre = { git = "https://github.com/eyre-rs/eyre.git", rev = "e5d92c3" } deltalake = { git = "https://github.com/delta-io/delta-rs", rev = "57795da" } ethnum = { git = "https://github.com/datafuse-extras/ethnum-rs", rev = "4cb05f1" } -jsonb = { git = "https://github.com/b41sh/jsonb", rev = "10462ca" } +jsonb = { git = "https://github.com/databendlabs/jsonb", rev = "672e423" } openai_api_rust = { git = "https://github.com/datafuse-extras/openai-api", rev = "819a0ed" } orc-rust = { git = "https://github.com/datafuse-extras/datafusion-orc", rev = "03372b97" } recursive = { git = "https://github.com/datafuse-extras/recursive.git", rev = "6af35a1" }