Fix Float and Decimal coercion

findepi · findepi · commit 056e2d0f324f · 2025-01-24T15:57:51.000+01:00
Before the change, Float* and Decimal* would coerce to a decimal type.
However, decimal cannot store all the float values: different range,
NaN, and infinity. Coercing to floating point is more desirable and also
what other systems typically do.
diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs
@@ -184,7 +184,13 @@ impl TestOutput {
 /// and the appropriate scenario
 impl ContextWithParquet {
     async fn new(scenario: Scenario, unit: Unit) -> Self {
-        Self::with_config(scenario, unit, SessionConfig::new()).await
+        let mut session_config = SessionConfig::new();
+        // TODO (https://github.com/apache/datafusion/issues/12817) once this is the default behavior, remove from here
+        session_config
+            .options_mut()
+            .sql_parser
+            .parse_float_as_decimal = true;
+        Self::with_config(scenario, unit, session_config).await
     }
 
     async fn with_config(
diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -928,9 +928,6 @@ fn coerce_numeric_type_to_decimal(numeric_type: &DataType) -> Option<DataType> {
         Int16 | UInt16 => Some(Decimal128(5, 0)),
         Int32 | UInt32 => Some(Decimal128(10, 0)),
         Int64 | UInt64 => Some(Decimal128(20, 0)),
-        // TODO if we convert the floating-point data to the decimal type, it maybe overflow.
-        Float32 => Some(Decimal128(14, 7)),
-        Float64 => Some(Decimal128(30, 15)),
         _ => None,
     }
 }
@@ -946,9 +943,6 @@ fn coerce_numeric_type_to_decimal256(numeric_type: &DataType) -> Option<DataType
         Int16 | UInt16 => Some(Decimal256(5, 0)),
         Int32 | UInt32 => Some(Decimal256(10, 0)),
         Int64 | UInt64 => Some(Decimal256(20, 0)),
-        // TODO if we convert the floating-point data to the decimal type, it maybe overflow.
-        Float32 => Some(Decimal256(14, 7)),
-        Float64 => Some(Decimal256(30, 15)),
         _ => None,
     }
 }
@@ -1494,8 +1488,8 @@ mod tests {
             DataType::Decimal128(20, 3),
             DataType::Decimal128(20, 3),
             DataType::Decimal128(23, 3),
-            DataType::Decimal128(24, 7),
-            DataType::Decimal128(32, 15),
+            DataType::Float32,
+            DataType::Float64,
             DataType::Decimal128(38, 10),
             DataType::Decimal128(25, 8),
             DataType::Decimal128(20, 3),
@@ -1541,14 +1535,8 @@ mod tests {
             coerce_numeric_type_to_decimal(&DataType::Int64).unwrap(),
             DataType::Decimal128(20, 0)
         );
-        assert_eq!(
-            coerce_numeric_type_to_decimal(&DataType::Float32).unwrap(),
-            DataType::Decimal128(14, 7)
-        );
-        assert_eq!(
-            coerce_numeric_type_to_decimal(&DataType::Float64).unwrap(),
-            DataType::Decimal128(30, 15)
-        );
+        assert_eq!(coerce_numeric_type_to_decimal(&DataType::Float32), None);
+        assert_eq!(coerce_numeric_type_to_decimal(&DataType::Float64), None);
     }
 
     #[test]
@@ -2013,7 +2001,7 @@ mod tests {
             DataType::Float64,
             DataType::Decimal128(10, 3),
             Operator::Gt,
-            DataType::Decimal128(30, 15)
+            DataType::Float64
         );
         test_coercion_binary_rule!(
             DataType::Int64,
diff --git a/datafusion/sqllogictest/test_files/math.slt b/datafusion/sqllogictest/test_files/math.slt
@@ -694,3 +694,9 @@ select FACTORIAL(350943270);
 
 statement ok
 drop table signed_integers
+
+# Should not fail. The operands should coerce to float
+query B
+SELECT '1'::decimal(10,0) = '1e40'::double;
+----
+false
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
@@ -836,3 +836,18 @@ physical_plan
 # Clean up after the test
 statement ok
 drop table aggregate_test_100;
+
+query T
+SELECT DISTINCT arrow_typeof(a) FROM (SELECT '1'::float UNION ALL SELECT '1'::decimal(10)) t(a)
+----
+Float32
+
+query T
+SELECT DISTINCT arrow_typeof(a) FROM (SELECT '1'::decimal(10) UNION ALL SELECT '1'::float ) t(a)
+----
+Float32
+
+query T
+SELECT DISTINCT arrow_typeof(a) FROM (SELECT '1'::decimal(10) UNION ALL SELECT '1'::double) t(a)
+----
+Float64