Skip to content

Commit 37b7375

Browse files
authored
fix: coalesce function should return correct data type (#9459)
* fix: Remove supported coalesce types * Use comparison_coercion * Fix test * Fix * Add comment * More * fix
1 parent 8d58b03 commit 37b7375

File tree

7 files changed

+84
-35
lines changed

7 files changed

+84
-35
lines changed

datafusion/expr/src/built_in_function.rs

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,7 @@ use std::sync::{Arc, OnceLock};
2626
use crate::signature::TIMEZONE_WILDCARD;
2727
use crate::type_coercion::binary::get_wider_type;
2828
use crate::type_coercion::functions::data_types;
29-
use crate::{
30-
conditional_expressions, FuncMonotonicity, Signature, TypeSignature, Volatility,
31-
};
29+
use crate::{FuncMonotonicity, Signature, TypeSignature, Volatility};
3230

3331
use arrow::datatypes::{DataType, Field, Fields, IntervalUnit, TimeUnit};
3432
use datafusion_common::{exec_err, plan_err, DataFusionError, Result};
@@ -899,10 +897,9 @@ impl BuiltinScalarFunction {
899897
| BuiltinScalarFunction::ConcatWithSeparator => {
900898
Signature::variadic(vec![Utf8], self.volatility())
901899
}
902-
BuiltinScalarFunction::Coalesce => Signature::variadic(
903-
conditional_expressions::SUPPORTED_COALESCE_TYPES.to_vec(),
904-
self.volatility(),
905-
),
900+
BuiltinScalarFunction::Coalesce => {
901+
Signature::variadic_equal(self.volatility())
902+
}
906903
BuiltinScalarFunction::SHA224
907904
| BuiltinScalarFunction::SHA256
908905
| BuiltinScalarFunction::SHA384
@@ -1575,4 +1572,13 @@ mod tests {
15751572
assert_eq!(func_from_str, *func_original);
15761573
}
15771574
}
1575+
1576+
#[test]
1577+
fn test_coalesce_return_types() {
1578+
let coalesce = BuiltinScalarFunction::Coalesce;
1579+
let return_type = coalesce
1580+
.return_type(&[DataType::Date32, DataType::Date32])
1581+
.unwrap();
1582+
assert_eq!(return_type, DataType::Date32);
1583+
}
15781584
}

datafusion/expr/src/conditional_expressions.rs

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,25 +22,6 @@ use arrow::datatypes::DataType;
2222
use datafusion_common::{plan_err, DFSchema, Result};
2323
use std::collections::HashSet;
2424

25-
/// Currently supported types by the coalesce function.
26-
/// The order of these types correspond to the order on which coercion applies
27-
/// This should thus be from least informative to most informative
28-
pub static SUPPORTED_COALESCE_TYPES: &[DataType] = &[
29-
DataType::Boolean,
30-
DataType::UInt8,
31-
DataType::UInt16,
32-
DataType::UInt32,
33-
DataType::UInt64,
34-
DataType::Int8,
35-
DataType::Int16,
36-
DataType::Int32,
37-
DataType::Int64,
38-
DataType::Float32,
39-
DataType::Float64,
40-
DataType::Utf8,
41-
DataType::LargeUtf8,
42-
];
43-
4425
/// Helper struct for building [Expr::Case]
4526
pub struct CaseBuilder {
4627
expr: Option<Box<Expr>>,

datafusion/expr/src/type_coercion/binary.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ fn string_temporal_coercion(
361361

362362
/// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a comparison operation
363363
/// where one both are numeric
364-
fn comparison_binary_numeric_coercion(
364+
pub(crate) fn comparison_binary_numeric_coercion(
365365
lhs_type: &DataType,
366366
rhs_type: &DataType,
367367
) -> Option<DataType> {

datafusion/expr/src/type_coercion/functions.rs

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ use arrow::{
2828
use datafusion_common::utils::{coerced_fixed_size_list_to_list, list_ndims};
2929
use datafusion_common::{internal_datafusion_err, internal_err, plan_err, Result};
3030

31-
use super::binary::comparison_coercion;
31+
use super::binary::{comparison_binary_numeric_coercion, comparison_coercion};
3232

3333
/// Performs type coercion for function arguments.
3434
///
@@ -187,6 +187,10 @@ fn get_valid_types(
187187
let new_type = current_types.iter().skip(1).try_fold(
188188
current_types.first().unwrap().clone(),
189189
|acc, x| {
190+
// The coerced types found by `comparison_coercion` are not guaranteed to be
191+
// coercible for the arguments. `comparison_coercion` returns more loose
192+
// types that can be coerced to both `acc` and `x` for comparison purpose.
193+
// See `maybe_data_types` for the actual coercion.
190194
let coerced_type = comparison_coercion(&acc, x);
191195
if let Some(coerced_type) = coerced_type {
192196
Ok(coerced_type)
@@ -276,9 +280,9 @@ fn maybe_data_types(
276280
if current_type == valid_type {
277281
new_type.push(current_type.clone())
278282
} else {
279-
// attempt to coerce
280-
if let Some(valid_type) = coerced_from(valid_type, current_type) {
281-
new_type.push(valid_type)
283+
// attempt to coerce.
284+
if let Some(coerced_type) = coerced_from(valid_type, current_type) {
285+
new_type.push(coerced_type)
282286
} else {
283287
// not possible
284288
return None;
@@ -427,8 +431,19 @@ fn coerced_from<'a>(
427431
Some(type_into.clone())
428432
}
429433

430-
// cannot coerce
431-
_ => None,
434+
// More coerce rules.
435+
// Note that not all rules in `comparison_coercion` can be reused here.
436+
// For example, all numeric types can be coerced into Utf8 for comparison,
437+
// but not for function arguments.
438+
_ => comparison_binary_numeric_coercion(type_into, type_from).and_then(
439+
|coerced_type| {
440+
if *type_into == coerced_type {
441+
Some(coerced_type)
442+
} else {
443+
None
444+
}
445+
},
446+
),
432447
}
433448
}
434449

datafusion/physical-expr/src/math_expressions.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use std::sync::Arc;
2525
use arrow::array::ArrayRef;
2626
use arrow::array::{BooleanArray, Float32Array, Float64Array, Int64Array};
2727
use arrow::datatypes::DataType;
28+
use arrow_array::Array;
2829
use rand::{thread_rng, Rng};
2930

3031
use datafusion_common::ScalarValue::{Float32, Int64};
@@ -92,8 +93,9 @@ macro_rules! downcast_arg {
9293
($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
9394
$ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
9495
DataFusionError::Internal(format!(
95-
"could not cast {} to {}",
96+
"could not cast {} from {} to {}",
9697
$NAME,
98+
$ARG.data_type(),
9799
type_name::<$ARRAY_TYPE>()
98100
))
99101
})?

datafusion/sqllogictest/test_files/joins.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1782,7 +1782,7 @@ AS VALUES
17821782
('BB', 6, 1),
17831783
('BB', 6, 1);
17841784

1785-
query TIR
1785+
query TII
17861786
select col1, col2, coalesce(sum_col3, 0) as sum_col3
17871787
from (select distinct col2 from tbl) AS q1
17881788
cross join (select distinct col1 from tbl) AS q2

datafusion/sqllogictest/test_files/scalar.slt

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1841,6 +1841,51 @@ SELECT COALESCE(c1 * c2, 0) FROM test
18411841
statement ok
18421842
drop table test
18431843

1844+
# coalesce date32
1845+
1846+
statement ok
1847+
CREATE TABLE test(
1848+
d1_date DATE,
1849+
d2_date DATE,
1850+
d3_date DATE
1851+
) as VALUES
1852+
('2022-12-12','2022-12-12','2022-12-12'),
1853+
(NULL,'2022-12-11','2022-12-12'),
1854+
('2022-12-12','2022-12-10','2022-12-12'),
1855+
('2022-12-12',NULL,'2022-12-12'),
1856+
('2022-12-12','2022-12-8','2022-12-12'),
1857+
('2022-12-12','2022-12-7',NULL),
1858+
('2022-12-12',NULL,'2022-12-12'),
1859+
(NULL,'2022-12-5','2022-12-12')
1860+
;
1861+
1862+
query D
1863+
SELECT COALESCE(d1_date, d2_date, d3_date) FROM test
1864+
----
1865+
2022-12-12
1866+
2022-12-11
1867+
2022-12-12
1868+
2022-12-12
1869+
2022-12-12
1870+
2022-12-12
1871+
2022-12-12
1872+
2022-12-05
1873+
1874+
query T
1875+
SELECT arrow_typeof(COALESCE(d1_date, d2_date, d3_date)) FROM test
1876+
----
1877+
Date32
1878+
Date32
1879+
Date32
1880+
Date32
1881+
Date32
1882+
Date32
1883+
Date32
1884+
Date32
1885+
1886+
statement ok
1887+
drop table test
1888+
18441889
statement ok
18451890
CREATE TABLE test(
18461891
i32 INT,

0 commit comments

Comments
 (0)