From 6691067e51ed7ee29f50663b7ce9f467cb1dc367 Mon Sep 17 00:00:00 2001 From: kould Date: Wed, 13 Aug 2025 16:30:21 +0800 Subject: [PATCH 1/6] feat: support the `to_binary` with format --- src/query/functions/src/scalars/binary.rs | 58 ++++++++++++++++++- .../sql/src/planner/semantic/type_check.rs | 34 +++++++++++ .../03_common/03_0041_insert_into_binary.test | 7 ++- .../02_0019_function_strings_hex.test | 9 +++ .../02_0024_function_strings_base_64.test | 5 ++ 5 files changed, 109 insertions(+), 4 deletions(-) diff --git a/src/query/functions/src/scalars/binary.rs b/src/query/functions/src/scalars/binary.rs index bfc4f54ed54a5..6582c0d83a589 100644 --- a/src/query/functions/src/scalars/binary.rs +++ b/src/query/functions/src/scalars/binary.rs @@ -35,8 +35,12 @@ use databend_common_expression::FunctionRegistry; use databend_common_expression::Value; pub fn register(registry: &mut FunctionRegistry) { - registry.register_aliases("to_hex", &["hex"]); - registry.register_aliases("from_hex", &["unhex"]); + registry.register_aliases("to_hex", &["hex", "hex_encode"]); + registry.register_aliases("from_hex", &["unhex", "hex_decode_binary"]); + registry.register_aliases("try_from_hex", &["try_hex_decode_binary"]); + registry.register_aliases("to_base64", &["base64_encode"]); + registry.register_aliases("from_base64", &["base64_decode_binary"]); + registry.register_aliases("try_from_base64", &["try_base64_decode_binary"]); registry.register_passthrough_nullable_1_arg::, _, _>( "length", @@ -137,6 +141,32 @@ pub fn register(registry: &mut FunctionRegistry) { }, ); + registry.register_passthrough_nullable_2_arg::( + "to_binary", + |_, _, _| FunctionDomain::Full, + |val, format, ctx| { + let Some(format) = format.as_scalar() else { + ctx.set_error( + 0, + "`format` parameter must be a scalar constant, not a column or expression", + ); + return Value::Scalar(Vec::new()); + }; + match format.to_ascii_lowercase().as_str() { + "hex" => eval_unhex(val, ctx), + "base64" => eval_from_base64(val, ctx), + "utf-8" => match val { + Value::Scalar(val) => Value::Scalar(val.as_bytes().to_vec()), + Value::Column(col) => Value::Column(col.into()), + }, + _ => { + ctx.set_error(0, "The format option only supports hex, base64, and utf-8"); + Value::Scalar(Vec::new()) + } + } + }, + ); + registry.register_combine_nullable_1_arg::( "try_to_binary", |_, _| FunctionDomain::Full, @@ -149,6 +179,30 @@ pub fn register(registry: &mut FunctionRegistry) { }, ); + registry.register_combine_nullable_2_arg::( + "try_to_binary", + |_, _, _| FunctionDomain::Full, + |val, format, ctx| { + let Some(format) = format.as_scalar() else { + return Value::Scalar(None); + }; + match format.to_ascii_lowercase().as_str() { + "hex" => error_to_null(eval_unhex)(val, ctx), + "base64" => error_to_null(eval_from_base64)(val, ctx), + "utf-8" => match val { + Value::Scalar(val) => Value::Scalar(Some(val.as_bytes().to_vec())), + Value::Column(col) => { + let validity = Bitmap::new_constant(true, col.len()); + Value::Column(NullableColumn::new_unchecked(col.into(), validity)) + } + }, + _ => { + Value::Scalar(None) + } + } + }, + ); + registry.register_passthrough_nullable_1_arg::( "to_hex", |_, _| FunctionDomain::Full, diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 48073b7e37ba8..7ba6cd3f961aa 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -3644,6 +3644,10 @@ impl<'a> TypeChecker<'a> { Ascii::new("stream_has_data"), Ascii::new("getvariable"), Ascii::new("equal_null"), + Ascii::new("hex_decode_string"), + Ascii::new("base64_decode_string"), + Ascii::new("try_hex_decode_string"), + Ascii::new("try_base64_decode_string"), ]; FUNCTIONS } @@ -4302,6 +4306,36 @@ impl<'a> TypeChecker<'a> { Some(self.resolve_map_access(span, expr, paths)) } } + (func_name, &[expr]) + if matches!( + func_name, + "hex_decode_string" + | "try_hex_decode_string" + | "base64_decode_string" + | "try_base64_decode_string" + ) => + { + Some(self.resolve(&Expr::Cast { + span, + expr: Box::new(Expr::FunctionCall { + span, + func: ASTFunctionCall { + distinct: func_name.starts_with("try_"), + name: Identifier::from_name( + span, + func_name.replace("_string", "_binary"), + ), + args: vec![expr.clone()], + params: vec![], + order_by: vec![], + window: None, + lambda: None, + }, + }), + target_type: TypeName::String, + pg_style: false, + })) + } _ => None, } } diff --git a/tests/sqllogictests/suites/base/03_common/03_0041_insert_into_binary.test b/tests/sqllogictests/suites/base/03_common/03_0041_insert_into_binary.test index 0c31827150be2..e50d41bec4b5b 100644 --- a/tests/sqllogictests/suites/base/03_common/03_0041_insert_into_binary.test +++ b/tests/sqllogictests/suites/base/03_common/03_0041_insert_into_binary.test @@ -11,10 +11,10 @@ statement ok CREATE TABLE IF NOT EXISTS t1(id Int, v binary) Engine = Fuse statement ok -INSERT INTO t1 (id, v) VALUES(1, to_binary('aaa')),(2, from_hex('616161')),(3, from_base64('YWFh')) +INSERT INTO t1 (id, v) VALUES(1, to_binary('aaa')),(2, from_hex('616161')),(3, from_base64('YWFh')),(4, to_binary('aaa', 'utf-8')),(5, to_binary('616161', 'hex')),(6, to_binary('YWFh', 'base64')) statement ok -INSERT INTO t1 (id, v) VALUES(4, 'aaa') +INSERT INTO t1 (id, v) VALUES(7, 'aaa') query IT SELECT id, v FROM t1 order by id @@ -23,6 +23,9 @@ SELECT id, v FROM t1 order by id 2 616161 3 616161 4 616161 +5 616161 +6 616161 +7 616161 statement ok ALTER TABLE t1 MODIFY COLUMN v string diff --git a/tests/sqllogictests/suites/query/functions/02_0019_function_strings_hex.test b/tests/sqllogictests/suites/query/functions/02_0019_function_strings_hex.test index 2cdffb831861f..27201b094af65 100644 --- a/tests/sqllogictests/suites/query/functions/02_0019_function_strings_hex.test +++ b/tests/sqllogictests/suites/query/functions/02_0019_function_strings_hex.test @@ -18,3 +18,12 @@ select hex(null) ---- NULL +query T +SELECT FROM_HEX(TO_HEX('abc'))::STRING +---- +abc + +query T +SELECT HEX_DECODE_STRING(TO_HEX('abc')) +---- +abc diff --git a/tests/sqllogictests/suites/query/functions/02_0024_function_strings_base_64.test b/tests/sqllogictests/suites/query/functions/02_0024_function_strings_base_64.test index 1402122e5a609..c6ea57c5d3643 100644 --- a/tests/sqllogictests/suites/query/functions/02_0024_function_strings_base_64.test +++ b/tests/sqllogictests/suites/query/functions/02_0024_function_strings_base_64.test @@ -8,6 +8,11 @@ SELECT FROM_BASE64(TO_BASE64('abc'))::STRING ---- abc +query T +SELECT BASE64_DECODE_STRING(TO_BASE64('abc')) +---- +abc + query T SELECT TO_BASE64(NULL) ---- From 6b6be8b3ea4b59bb87d61116eb3649d79a62523e Mon Sep 17 00:00:00 2001 From: kould Date: Wed, 13 Aug 2025 17:05:00 +0800 Subject: [PATCH 2/6] chore: codefmt --- src/query/functions/src/scalars/binary.rs | 4 +--- .../tests/it/scalars/testdata/function_list.txt | 10 ++++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/query/functions/src/scalars/binary.rs b/src/query/functions/src/scalars/binary.rs index 6582c0d83a589..26c224629a02f 100644 --- a/src/query/functions/src/scalars/binary.rs +++ b/src/query/functions/src/scalars/binary.rs @@ -196,9 +196,7 @@ pub fn register(registry: &mut FunctionRegistry) { Value::Column(NullableColumn::new_unchecked(col.into(), validity)) } }, - _ => { - Value::Scalar(None) - } + _ => Value::Scalar(None) } }, ); diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index 04355bae76276..1ced92fcd9caf 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -6,6 +6,8 @@ array_get -> get array_length -> length array_size -> length array_slice -> slice +base64_decode_binary -> from_base64 +base64_encode -> to_base64 between_dows -> between_days between_doys -> between_days between_epochs -> between_seconds @@ -28,6 +30,8 @@ diff_doys -> diff_days diff_epochs -> diff_seconds diff_isodows -> diff_days hex -> to_hex +hex_decode_binary -> from_hex +hex_encode -> to_hex intdiv -> div ipv4_num_to_string -> inet_ntoa ipv4_string_to_num -> inet_aton @@ -95,6 +99,8 @@ to_start_of_iso_week -> to_monday to_text -> to_string to_varchar -> to_string trunc -> truncate +try_base64_decode_binary -> try_from_base64 +try_hex_decode_binary -> try_from_hex try_ipv4_num_to_string -> try_inet_ntoa try_ipv4_string_to_num -> try_inet_aton try_json_object -> try_object_construct @@ -3944,6 +3950,8 @@ Functions overloads: 7 to_binary(Geography NULL) :: Binary NULL 8 to_binary(String) :: Binary 9 to_binary(String NULL) :: Binary NULL +10 to_binary(String, String) :: Binary +11 to_binary(String NULL, String NULL) :: Binary NULL 0 to_bitmap(String) :: Bitmap 1 to_bitmap(String NULL) :: Bitmap NULL 2 to_bitmap(UInt64) :: Bitmap @@ -4616,6 +4624,8 @@ Functions overloads: 7 try_to_binary(Geography NULL) :: Binary NULL 8 try_to_binary(String) :: Binary NULL 9 try_to_binary(String NULL) :: Binary NULL +10 try_to_binary(String, String) :: Binary NULL +11 try_to_binary(String NULL, String NULL) :: Binary NULL 0 try_to_boolean(Variant) :: Boolean NULL 1 try_to_boolean(Variant NULL) :: Boolean NULL 2 try_to_boolean(String) :: Boolean NULL From 9ffbeeeb667b39adbc5202448a329f12022b7668 Mon Sep 17 00:00:00 2001 From: kould Date: Wed, 13 Aug 2025 17:10:39 +0800 Subject: [PATCH 3/6] chore: codefmt --- src/query/functions/src/scalars/binary.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/functions/src/scalars/binary.rs b/src/query/functions/src/scalars/binary.rs index 26c224629a02f..73ac59c605a1c 100644 --- a/src/query/functions/src/scalars/binary.rs +++ b/src/query/functions/src/scalars/binary.rs @@ -196,7 +196,7 @@ pub fn register(registry: &mut FunctionRegistry) { Value::Column(NullableColumn::new_unchecked(col.into(), validity)) } }, - _ => Value::Scalar(None) + _ => Value::Scalar(None), } }, ); From f760ff0dd3f8d294aa663edd7c012f7c0c27471b Mon Sep 17 00:00:00 2001 From: kould Date: Wed, 13 Aug 2025 21:44:49 +0800 Subject: [PATCH 4/6] chore: codefmt --- .../suites/base/03_common/03_0041_insert_into_binary.test | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/sqllogictests/suites/base/03_common/03_0041_insert_into_binary.test b/tests/sqllogictests/suites/base/03_common/03_0041_insert_into_binary.test index e50d41bec4b5b..c19c48725b522 100644 --- a/tests/sqllogictests/suites/base/03_common/03_0041_insert_into_binary.test +++ b/tests/sqllogictests/suites/base/03_common/03_0041_insert_into_binary.test @@ -37,6 +37,9 @@ SELECT id, v FROM t1 order by id 2 aaa 3 aaa 4 aaa +5 aaa +6 aaa +7 aaa statement ok ALTER TABLE t1 MODIFY COLUMN v binary @@ -48,6 +51,9 @@ SELECT id, v FROM t1 order by id 2 616161 3 616161 4 616161 +5 616161 +6 616161 +7 616161 statement ok create table t2(a int, b binary NOT NULL DEFAULT 'abc', c double default 'inf', e float default 'nan' ); From ef54cebc83615ccb9288f093642a568fe250f609 Mon Sep 17 00:00:00 2001 From: kould Date: Thu, 14 Aug 2025 11:50:22 +0800 Subject: [PATCH 5/6] chore: codefmt --- tests/suites/0_stateless/20+_others/20_0013_pretty_error.result | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/suites/0_stateless/20+_others/20_0013_pretty_error.result b/tests/suites/0_stateless/20+_others/20_0013_pretty_error.result index 37398dc50a4b9..ff2144877535a 100644 --- a/tests/suites/0_stateless/20+_others/20_0013_pretty_error.result +++ b/tests/suites/0_stateless/20+_others/20_0013_pretty_error.result @@ -16,7 +16,7 @@ Error: APIError: QueryFailed: [1008]error: --> SQL:1:8 | 1 | select base64(1) - | ^^^^^^^^^ no function matches the given name: 'base64', do you mean 'to_base64'? + | ^^^^^^^^^ no function matches the given name: 'base64', do you mean 'base64_encode', 'base64_decode_binary', 'base64_decode_string', 'to_base64'? Error: APIError: QueryFailed: [1065]error: From aa0dc459cff99c574717583a895c1736cec220af Mon Sep 17 00:00:00 2001 From: kould Date: Fri, 15 Aug 2025 10:15:10 +0800 Subject: [PATCH 6/6] chore: codefmt --- src/query/sql/src/planner/semantic/type_check.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 7ba6cd3f961aa..3d9b54afac8c4 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -4320,7 +4320,7 @@ impl<'a> TypeChecker<'a> { expr: Box::new(Expr::FunctionCall { span, func: ASTFunctionCall { - distinct: func_name.starts_with("try_"), + distinct: false, name: Identifier::from_name( span, func_name.replace("_string", "_binary"),