Skip to content

Commit 420a46c

Browse files
committed
Datum based arithmetic
1 parent 137bf81 commit 420a46c

File tree

17 files changed

+136
-3426
lines changed

17 files changed

+136
-3426
lines changed

Cargo.toml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,11 @@ rust-version = "1.64"
4747

4848
[workspace.dependencies]
4949
arrow = { version = "43.0.0", features = ["prettyprint", "dyn_cmp_dict"] }
50-
arrow-flight = { version = "43.0.0", features = ["flight-sql-experimental"] }
50+
arrow-arith = { version = "43.0.0" }
51+
arrow-array = { version = "43.0.0", default-features = false, features = ["chrono-tz"] }
5152
arrow-buffer = { version = "43.0.0", default-features = false }
53+
arrow-flight = { version = "43.0.0", features = ["flight-sql-experimental"] }
5254
arrow-schema = { version = "43.0.0", default-features = false }
53-
arrow-array = { version = "43.0.0", default-features = false, features = ["chrono-tz"] }
5455
parquet = { version = "43.0.0", features = ["arrow", "async", "object_store"] }
5556
sqlparser = { version = "0.35", features = ["visitor"] }
5657

@@ -71,3 +72,12 @@ opt-level = 3
7172
overflow-checks = false
7273
panic = 'unwind'
7374
rpath = false
75+
76+
[patch.crates-io]
77+
arrow = { git = "https://github.com/tustvold/arrow-rs.git", rev = "41a7e5ac8691b181199e9d2fc8b90c383c6a8cd6" }
78+
arrow-arith = { git = "https://github.com/tustvold/arrow-rs.git", rev = "41a7e5ac8691b181199e9d2fc8b90c383c6a8cd6" }
79+
arrow-array = { git = "https://github.com/tustvold/arrow-rs.git", rev = "41a7e5ac8691b181199e9d2fc8b90c383c6a8cd6" }
80+
arrow-buffer = { git = "https://github.com/tustvold/arrow-rs.git", rev = "41a7e5ac8691b181199e9d2fc8b90c383c6a8cd6" }
81+
arrow-flight = { git = "https://github.com/tustvold/arrow-rs.git", rev = "41a7e5ac8691b181199e9d2fc8b90c383c6a8cd6" }
82+
arrow-schema = { git = "https://github.com/tustvold/arrow-rs.git", rev = "41a7e5ac8691b181199e9d2fc8b90c383c6a8cd6" }
83+
parquet = { git = "https://github.com/tustvold/arrow-rs.git", rev = "41a7e5ac8691b181199e9d2fc8b90c383c6a8cd6" }

datafusion-cli/Cargo.lock

Lines changed: 21 additions & 30 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion-cli/Cargo.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,12 @@ assert_cmd = "2.0"
4949
ctor = "0.2.0"
5050
predicates = "3.0"
5151
rstest = "0.17"
52+
53+
[patch.crates-io]
54+
arrow = { git = "https://github.com/tustvold/arrow-rs.git", rev = "41a7e5ac8691b181199e9d2fc8b90c383c6a8cd6" }
55+
arrow-arith = { git = "https://github.com/tustvold/arrow-rs.git", rev = "41a7e5ac8691b181199e9d2fc8b90c383c6a8cd6" }
56+
arrow-array = { git = "https://github.com/tustvold/arrow-rs.git", rev = "41a7e5ac8691b181199e9d2fc8b90c383c6a8cd6" }
57+
arrow-buffer = { git = "https://github.com/tustvold/arrow-rs.git", rev = "41a7e5ac8691b181199e9d2fc8b90c383c6a8cd6" }
58+
arrow-flight = { git = "https://github.com/tustvold/arrow-rs.git", rev = "41a7e5ac8691b181199e9d2fc8b90c383c6a8cd6" }
59+
arrow-schema = { git = "https://github.com/tustvold/arrow-rs.git", rev = "41a7e5ac8691b181199e9d2fc8b90c383c6a8cd6" }
60+
parquet = { git = "https://github.com/tustvold/arrow-rs.git", rev = "41a7e5ac8691b181199e9d2fc8b90c383c6a8cd6" }

datafusion/expr/src/type_coercion/binary.rs

Lines changed: 0 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -517,35 +517,6 @@ fn create_decimal_type(precision: u8, scale: i8) -> DataType {
517517
)
518518
}
519519

520-
/// Returns the coerced type of applying mathematics operations on decimal types.
521-
/// Two sides of the mathematics operation will be coerced to the same type. Note
522-
/// that we don't coerce the decimal operands in analysis phase, but do it in the
523-
/// execution phase because this is not idempotent.
524-
pub fn coercion_decimal_mathematics_type(
525-
mathematics_op: &Operator,
526-
left_decimal_type: &DataType,
527-
right_decimal_type: &DataType,
528-
) -> Option<DataType> {
529-
// TODO: Move this logic into kernel implementations
530-
use arrow::datatypes::DataType::*;
531-
match (left_decimal_type, right_decimal_type) {
532-
// The promotion rule from spark
533-
// https://github.com/apache/spark/blob/c20af535803a7250fef047c2bf0fe30be242369d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala#L35
534-
(Decimal128(_, _), Decimal128(_, _)) => match mathematics_op {
535-
Operator::Plus | Operator::Minus => decimal_op_mathematics_type(
536-
mathematics_op,
537-
left_decimal_type,
538-
right_decimal_type,
539-
),
540-
Operator::Divide | Operator::Modulo => {
541-
get_wider_decimal_type(left_decimal_type, right_decimal_type)
542-
}
543-
_ => None,
544-
},
545-
_ => None,
546-
}
547-
}
548-
549520
/// Returns the output type of applying mathematics operations on two decimal types.
550521
/// The rule is from spark. Note that this is different to the coerced type applied
551522
/// to two sides of the arithmetic operation.
@@ -917,49 +888,17 @@ mod tests {
917888
DataType::Decimal128(30, 15)
918889
);
919890

920-
let op = Operator::Plus;
921891
let left_decimal_type = DataType::Decimal128(10, 3);
922892
let right_decimal_type = DataType::Decimal128(20, 4);
923-
let result = coercion_decimal_mathematics_type(
924-
&op,
925-
&left_decimal_type,
926-
&right_decimal_type,
927-
);
928-
assert_eq!(DataType::Decimal128(21, 4), result.unwrap());
929-
let op = Operator::Minus;
930-
let result = coercion_decimal_mathematics_type(
931-
&op,
932-
&left_decimal_type,
933-
&right_decimal_type,
934-
);
935-
assert_eq!(DataType::Decimal128(21, 4), result.unwrap());
936893
let op = Operator::Multiply;
937-
let result = coercion_decimal_mathematics_type(
938-
&op,
939-
&left_decimal_type,
940-
&right_decimal_type,
941-
);
942-
assert_eq!(None, result);
943894
let result =
944895
decimal_op_mathematics_type(&op, &left_decimal_type, &right_decimal_type);
945896
assert_eq!(DataType::Decimal128(31, 7), result.unwrap());
946897
let op = Operator::Divide;
947-
let result = coercion_decimal_mathematics_type(
948-
&op,
949-
&left_decimal_type,
950-
&right_decimal_type,
951-
);
952-
assert_eq!(DataType::Decimal128(20, 4), result.unwrap());
953898
let result =
954899
decimal_op_mathematics_type(&op, &left_decimal_type, &right_decimal_type);
955900
assert_eq!(DataType::Decimal128(35, 24), result.unwrap());
956901
let op = Operator::Modulo;
957-
let result = coercion_decimal_mathematics_type(
958-
&op,
959-
&left_decimal_type,
960-
&right_decimal_type,
961-
);
962-
assert_eq!(DataType::Decimal128(20, 4), result.unwrap());
963902
let result =
964903
decimal_op_mathematics_type(&op, &left_decimal_type, &right_decimal_type);
965904
assert_eq!(DataType::Decimal128(11, 4), result.unwrap());
@@ -1228,19 +1167,13 @@ mod tests {
12281167
mathematics_op: Operator,
12291168
expected_lhs_type: DataType,
12301169
expected_rhs_type: DataType,
1231-
expected_coerced_type: Option<DataType>,
12321170
expected_output_type: DataType,
12331171
) {
12341172
// The coerced types for lhs and rhs, if any of them is not decimal
12351173
let (lhs_type, rhs_type) = math_decimal_coercion(&lhs_type, &rhs_type).unwrap();
12361174
assert_eq!(lhs_type, expected_lhs_type);
12371175
assert_eq!(rhs_type, expected_rhs_type);
12381176

1239-
// The coerced type of decimal math expression, applied during expression evaluation
1240-
let coerced_type =
1241-
coercion_decimal_mathematics_type(&mathematics_op, &lhs_type, &rhs_type);
1242-
assert_eq!(coerced_type, expected_coerced_type);
1243-
12441177
// The output type of decimal math expression
12451178
let output_type =
12461179
decimal_op_mathematics_type(&mathematics_op, &lhs_type, &rhs_type).unwrap();
@@ -1255,7 +1188,6 @@ mod tests {
12551188
Operator::Plus,
12561189
DataType::Decimal128(10, 2),
12571190
DataType::Decimal128(10, 2),
1258-
Some(DataType::Decimal128(11, 2)),
12591191
DataType::Decimal128(11, 2),
12601192
);
12611193

@@ -1265,7 +1197,6 @@ mod tests {
12651197
Operator::Plus,
12661198
DataType::Decimal128(10, 0),
12671199
DataType::Decimal128(10, 2),
1268-
Some(DataType::Decimal128(13, 2)),
12691200
DataType::Decimal128(13, 2),
12701201
);
12711202

@@ -1275,7 +1206,6 @@ mod tests {
12751206
Operator::Minus,
12761207
DataType::Decimal128(10, 0),
12771208
DataType::Decimal128(10, 2),
1278-
Some(DataType::Decimal128(13, 2)),
12791209
DataType::Decimal128(13, 2),
12801210
);
12811211

@@ -1285,7 +1215,6 @@ mod tests {
12851215
Operator::Multiply,
12861216
DataType::Decimal128(10, 0),
12871217
DataType::Decimal128(10, 2),
1288-
None,
12891218
DataType::Decimal128(21, 2),
12901219
);
12911220

@@ -1295,7 +1224,6 @@ mod tests {
12951224
Operator::Divide,
12961225
DataType::Decimal128(10, 0),
12971226
DataType::Decimal128(10, 2),
1298-
Some(DataType::Decimal128(12, 2)),
12991227
DataType::Decimal128(23, 11),
13001228
);
13011229

@@ -1305,7 +1233,6 @@ mod tests {
13051233
Operator::Modulo,
13061234
DataType::Decimal128(10, 0),
13071235
DataType::Decimal128(10, 2),
1308-
Some(DataType::Decimal128(12, 2)),
13091236
DataType::Decimal128(10, 2),
13101237
);
13111238

datafusion/physical-expr/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ unicode_expressions = ["unicode-segmentation"]
4545
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
4646
arrow = { workspace = true }
4747
arrow-array = { workspace = true }
48+
arrow-arith = { workspace = true }
4849
arrow-buffer = { workspace = true }
4950
arrow-schema = { workspace = true }
5051
blake2 = { version = "^0.10.2", optional = true }

0 commit comments

Comments
 (0)