Skip to content

Commit 1732978

Browse files
committed
update tests
1 parent 297b879 commit 1732978

File tree

14 files changed

+69
-39
lines changed

14 files changed

+69
-39
lines changed

Cargo.toml

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# under the License.
1717

1818
[workspace]
19-
exclude = ["datafusion-cli", "dev/depcheck"]
19+
exclude = ["datafusion-cli", "dev/depcheck", "datafusion-examples"]
2020
members = [
2121
"datafusion/common",
2222
"datafusion/common-runtime",
@@ -40,7 +40,6 @@ members = [
4040
"datafusion/sqllogictest",
4141
"datafusion/substrait",
4242
"datafusion/wasmtest",
43-
"datafusion-examples",
4443
"docs",
4544
"test-utils",
4645
"benchmarks",
@@ -158,3 +157,17 @@ large_futures = "warn"
158157
[workspace.lints.rust]
159158
unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin)"] }
160159
unused_imports = "deny"
160+
161+
[patch.crates-io]
162+
arrow = { git = "https://github.com/apache/arrow-rs.git" }
163+
arrow-array = { git = "https://github.com/apache/arrow-rs.git" }
164+
arrow-buffer = { git = "https://github.com/apache/arrow-rs.git" }
165+
arrow-cast = { git = "https://github.com/apache/arrow-rs.git" }
166+
arrow-data = { git = "https://github.com/apache/arrow-rs.git" }
167+
arrow-ipc = { git = "https://github.com/apache/arrow-rs.git" }
168+
arrow-schema = { git = "https://github.com/apache/arrow-rs.git" }
169+
arrow-select = { git = "https://github.com/apache/arrow-rs.git" }
170+
arrow-string = { git = "https://github.com/apache/arrow-rs.git" }
171+
arrow-ord = { git = "https://github.com/apache/arrow-rs.git" }
172+
arrow-flight = { git = "https://github.com/apache/arrow-rs.git" }
173+
parquet = { git = "https://github.com/apache/arrow-rs.git" }

benchmarks/Cargo.toml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,17 @@ tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }
5353

5454
[dev-dependencies]
5555
datafusion-proto = { workspace = true }
56+
57+
[patch.crates-io]
58+
arrow = { git = "https://github.com/apache/arrow-rs.git" }
59+
arrow-array = { git = "https://github.com/apache/arrow-rs.git" }
60+
arrow-buffer = { git = "https://github.com/apache/arrow-rs.git" }
61+
arrow-cast = { git = "https://github.com/apache/arrow-rs.git" }
62+
arrow-data = { git = "https://github.com/apache/arrow-rs.git" }
63+
arrow-ipc = { git = "https://github.com/apache/arrow-rs.git" }
64+
arrow-schema = { git = "https://github.com/apache/arrow-rs.git" }
65+
arrow-select = { git = "https://github.com/apache/arrow-rs.git" }
66+
arrow-string = { git = "https://github.com/apache/arrow-rs.git" }
67+
arrow-ord = { git = "https://github.com/apache/arrow-rs.git" }
68+
arrow-flight = { git = "https://github.com/apache/arrow-rs.git" }
69+
parquet = { git = "https://github.com/apache/arrow-rs.git" }

datafusion/common/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ libc = "0.2.140"
6060
num_cpus = { workspace = true }
6161
object_store = { workspace = true, optional = true }
6262
parquet = { workspace = true, optional = true, default-features = true }
63-
pyo3 = { version = "0.21.0", optional = true }
63+
pyo3 = { version = "0.22.0", optional = true }
6464
sqlparser = { workspace = true }
6565

6666
[target.'cfg(target_family = "wasm")'.dependencies]

datafusion/common/src/config.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ config_namespace! {
472472

473473
/// (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`,
474474
/// and `Binary/BinaryLarge` with `BinaryView`.
475-
pub schema_force_string_view: bool, default = false
475+
pub schema_force_string_view: bool, default = true
476476
}
477477
}
478478

datafusion/common/src/scalar/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4329,7 +4329,7 @@ mod tests {
43294329
.strip_backtrace();
43304330
assert_eq!(
43314331
err,
4332-
"Arrow error: Compute error: Overflow happened on: 2147483647 - -2147483648"
4332+
"Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 - -2147483648"
43334333
)
43344334
}
43354335

@@ -4350,7 +4350,7 @@ mod tests {
43504350
.sub_checked(&int_value_2)
43514351
.unwrap_err()
43524352
.strip_backtrace();
4353-
assert_eq!(err, "Arrow error: Compute error: Overflow happened on: 9223372036854775807 - -9223372036854775808")
4353+
assert_eq!(err, "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808")
43544354
}
43554355

43564356
#[test]
@@ -5866,7 +5866,7 @@ mod tests {
58665866
let root_err = err.find_root();
58675867
match root_err{
58685868
DataFusionError::ArrowError(
5869-
ArrowError::ComputeError(_),
5869+
ArrowError::ArithmeticOverflow(_),
58705870
_,
58715871
) => {}
58725872
_ => return Err(err),

datafusion/core/src/datasource/file_format/parquet.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1248,7 +1248,7 @@ mod tests {
12481248
use arrow_schema::{DataType, Field};
12491249
use async_trait::async_trait;
12501250
use datafusion_common::cast::{
1251-
as_binary_array, as_boolean_array, as_float32_array, as_float64_array,
1251+
as_binary_view_array, as_boolean_array, as_float32_array, as_float64_array,
12521252
as_int32_array, as_timestamp_nanosecond_array,
12531253
};
12541254
use datafusion_common::config::ParquetOptions;
@@ -1799,8 +1799,8 @@ mod tests {
17991799
bigint_col: Int64\n\
18001800
float_col: Float32\n\
18011801
double_col: Float64\n\
1802-
date_string_col: Binary\n\
1803-
string_col: Binary\n\
1802+
date_string_col: BinaryView\n\
1803+
string_col: BinaryView\n\
18041804
timestamp_col: Timestamp(Nanosecond, None)",
18051805
y
18061806
);
@@ -1956,7 +1956,7 @@ mod tests {
19561956
assert_eq!(1, batches[0].num_columns());
19571957
assert_eq!(8, batches[0].num_rows());
19581958

1959-
let array = as_binary_array(batches[0].column(0))?;
1959+
let array = as_binary_view_array(batches[0].column(0))?;
19601960
let mut values: Vec<&str> = vec![];
19611961
for i in 0..batches[0].num_rows() {
19621962
values.push(std::str::from_utf8(array.value(i)).unwrap());
@@ -2070,7 +2070,7 @@ mod tests {
20702070
let int_col_offset = offset_index.get(4).unwrap();
20712071

20722072
// 325 pages in int_col
2073-
assert_eq!(int_col_offset.len(), 325);
2073+
assert_eq!(int_col_offset.page_locations().len(), 325);
20742074
match int_col_index {
20752075
Index::INT32(index) => {
20762076
assert_eq!(index.indexes.len(), 325);

datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@ impl<'a> PagesPruningStatistics<'a> {
406406
converter,
407407
column_index,
408408
offset_index,
409-
page_offsets,
409+
page_offsets: &page_offsets.page_locations,
410410
})
411411
}
412412

datafusion/functions/src/regex/regexpreplace.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ fn _regexp_replace_static_pattern_replace<T: OffsetSizeTrait>(
402402
let string_view_array = as_string_view_array(&args[0])?;
403403

404404
let mut builder = StringViewBuilder::with_capacity(string_view_array.len())
405-
.with_block_size(1024 * 1024 * 2);
405+
.with_fixed_block_size(1024 * 1024 * 2);
406406

407407
for val in string_view_array.iter() {
408408
if let Some(val) = val {

datafusion/physical-expr-common/src/binary_view_map.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ where
149149
output_type,
150150
map: hashbrown::raw::RawTable::with_capacity(INITIAL_MAP_CAPACITY),
151151
map_size: 0,
152-
builder: GenericByteViewBuilder::new().with_block_size(2 * 1024 * 1024),
152+
builder: GenericByteViewBuilder::new().with_fixed_block_size(2 * 1024 * 1024),
153153
random_state: RandomState::new(),
154154
hashes_buffer: vec![],
155155
null: None,

datafusion/physical-plan/src/coalesce_batches.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,7 @@ fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch {
494494
// See https://github.com/apache/arrow-rs/issues/6094 for more details.
495495
let mut builder = StringViewBuilder::with_capacity(s.len());
496496
if ideal_buffer_size > 0 {
497-
builder = builder.with_block_size(ideal_buffer_size as u32);
497+
builder = builder.with_fixed_block_size(ideal_buffer_size as u32);
498498
}
499499

500500
for v in s.iter() {
@@ -804,7 +804,8 @@ mod tests {
804804
impl StringViewTest {
805805
/// Create a `StringViewArray` with the parameters specified in this struct
806806
fn build(self) -> StringViewArray {
807-
let mut builder = StringViewBuilder::with_capacity(100).with_block_size(8192);
807+
let mut builder =
808+
StringViewBuilder::with_capacity(100).with_fixed_block_size(8192);
808809
loop {
809810
for &v in self.strings.iter() {
810811
builder.append_option(v);

datafusion/sql/src/unparser/expr.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2077,49 +2077,49 @@ mod tests {
20772077
"1 YEAR 1 MONTH 1 DAY 3 HOUR 10 MINUTE 20 SECOND",
20782078
),
20792079
IntervalStyle::PostgresVerbose,
2080-
r#"INTERVAL '0 YEARS 13 MONS 1 DAYS 3 HOURS 10 MINS 20.000000000 SECS'"#,
2080+
r#"INTERVAL '13 MONS 1 DAYS 3 HOURS 10 MINS 20.000000000 SECS'"#,
20812081
),
20822082
(
20832083
interval_month_day_nano_lit("1.5 MONTH"),
20842084
IntervalStyle::PostgresVerbose,
2085-
r#"INTERVAL '0 YEARS 1 MONS 15 DAYS 0 HOURS 0 MINS 0.000000000 SECS'"#,
2085+
r#"INTERVAL '1 MONS 15 DAYS'"#,
20862086
),
20872087
(
20882088
interval_month_day_nano_lit("-3 MONTH"),
20892089
IntervalStyle::PostgresVerbose,
2090-
r#"INTERVAL '0 YEARS -3 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS'"#,
2090+
r#"INTERVAL '-3 MONS'"#,
20912091
),
20922092
(
20932093
interval_month_day_nano_lit("1 MONTH")
20942094
.add(interval_month_day_nano_lit("1 DAY")),
20952095
IntervalStyle::PostgresVerbose,
2096-
r#"(INTERVAL '0 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS' + INTERVAL '0 YEARS 0 MONS 1 DAYS 0 HOURS 0 MINS 0.000000000 SECS')"#,
2096+
r#"(INTERVAL '1 MONS' + INTERVAL '1 DAYS')"#,
20972097
),
20982098
(
20992099
interval_month_day_nano_lit("1 MONTH")
21002100
.sub(interval_month_day_nano_lit("1 DAY")),
21012101
IntervalStyle::PostgresVerbose,
2102-
r#"(INTERVAL '0 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS' - INTERVAL '0 YEARS 0 MONS 1 DAYS 0 HOURS 0 MINS 0.000000000 SECS')"#,
2102+
r#"(INTERVAL '1 MONS' - INTERVAL '1 DAYS')"#,
21032103
),
21042104
(
21052105
interval_datetime_lit("10 DAY 1 HOUR 10 MINUTE 20 SECOND"),
21062106
IntervalStyle::PostgresVerbose,
2107-
r#"INTERVAL '0 YEARS 0 MONS 10 DAYS 1 HOURS 10 MINS 20.000 SECS'"#,
2107+
r#"INTERVAL '10 DAYS 1 HOURS 10 MINS 20.000 SECS'"#,
21082108
),
21092109
(
21102110
interval_datetime_lit("10 DAY 1.5 HOUR 10 MINUTE 20 SECOND"),
21112111
IntervalStyle::PostgresVerbose,
2112-
r#"INTERVAL '0 YEARS 0 MONS 10 DAYS 1 HOURS 40 MINS 20.000 SECS'"#,
2112+
r#"INTERVAL '10 DAYS 1 HOURS 40 MINS 20.000 SECS'"#,
21132113
),
21142114
(
21152115
interval_year_month_lit("1 YEAR 1 MONTH"),
21162116
IntervalStyle::PostgresVerbose,
2117-
r#"INTERVAL '1 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.00 SECS'"#,
2117+
r#"INTERVAL '1 YEARS 1 MONS'"#,
21182118
),
21192119
(
21202120
interval_year_month_lit("1.5 YEAR 1 MONTH"),
21212121
IntervalStyle::PostgresVerbose,
2122-
r#"INTERVAL '1 YEARS 7 MONS 0 DAYS 0 HOURS 0 MINS 0.00 SECS'"#,
2122+
r#"INTERVAL '1 YEARS 7 MONS'"#,
21232123
),
21242124
(
21252125
interval_year_month_lit("1 YEAR 1 MONTH"),

datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,9 @@ pub(crate) fn convert_schema_to_types(columns: &Fields) -> Vec<DFColumnType> {
267267
| DataType::Float64
268268
| DataType::Decimal128(_, _)
269269
| DataType::Decimal256(_, _) => DFColumnType::Float,
270-
DataType::Utf8 | DataType::LargeUtf8 => DFColumnType::Text,
270+
DataType::Utf8 | DataType::Utf8View | DataType::LargeUtf8 => {
271+
DFColumnType::Text
272+
}
271273
DataType::Date32
272274
| DataType::Date64
273275
| DataType::Time32(_)

datafusion/sqllogictest/test_files/arrow_typeof.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ select arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)');
424424
[1, 2, 3]
425425

426426
# Tests for Utf8View
427-
query ?T
427+
query TT
428428
select arrow_cast('MyAwesomeString', 'Utf8View'), arrow_typeof(arrow_cast('MyAwesomeString', 'Utf8View'))
429429
----
430430
MyAwesomeString Utf8View

datafusion/sqllogictest/test_files/math.slt

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -252,19 +252,19 @@ select abs(c1), abs(c2), abs(c3), abs(c4) from test_nullable_integer where datas
252252
NULL NULL NULL NULL
253253

254254
# abs: Int8 overlow
255-
statement error DataFusion error: Arrow error: Arithmetic overflow: Int8Array overflow on abs\(-128\)
255+
statement error DataFusion error: Arrow error: Compute error: Int8Array overflow on abs\(-128\)
256256
select abs(c1) from test_nullable_integer where dataset = 'mins'
257257

258258
# abs: Int16 overlow
259-
statement error DataFusion error: Arrow error: Arithmetic overflow: Int16Array overflow on abs\(-32768\)
259+
statement error DataFusion error: Arrow error: Compute error: Int16Array overflow on abs\(-32768\)
260260
select abs(c2) from test_nullable_integer where dataset = 'mins'
261261

262262
# abs: Int32 overlow
263-
statement error DataFusion error: Arrow error: Arithmetic overflow: Int32Array overflow on abs\(-2147483648\)
263+
statement error DataFusion error: Arrow error: Compute error: Int32Array overflow on abs\(-2147483648\)
264264
select abs(c3) from test_nullable_integer where dataset = 'mins'
265265

266266
# abs: Int64 overlow
267-
statement error DataFusion error: Arrow error: Arithmetic overflow: Int64Array overflow on abs\(-9223372036854775808\)
267+
statement error DataFusion error: Arrow error: Compute error: Int64Array overflow on abs\(-9223372036854775808\)
268268
select abs(c4) from test_nullable_integer where dataset = 'mins'
269269

270270
statement ok
@@ -620,15 +620,15 @@ select gcd(a, b), gcd(c*d + 1, abs(e)) + f from signed_integers;
620620
NULL NULL
621621

622622
# gcd(i64::MIN, i64::MIN)
623-
query error DataFusion error: Arrow error: Arithmetic overflow:Signed integer overflow in GCD\(\-9223372036854775808, \-9223372036854775808\)
623+
query error DataFusion error: Arrow error: Compute error: Signed integer overflow in GCD\(\-9223372036854775808, \-9223372036854775808\)
624624
select gcd(-9223372036854775808, -9223372036854775808);
625625

626626
# gcd(i64::MIN, 0)
627-
query error DataFusion error: Arrow error: Arithmetic overflow:Signed integer overflow in GCD\(\-9223372036854775808, 0\)
627+
query error DataFusion error: Arrow error: Compute error: Signed integer overflow in GCD\(\-9223372036854775808, 0\)
628628
select gcd(-9223372036854775808, 0);
629629

630630
# gcd(0, i64::MIN)
631-
query error DataFusion error: Arrow error: Arithmetic overflow:Signed integer overflow in GCD\(0, \-9223372036854775808\)
631+
query error DataFusion error: Arrow error: Compute error: Signed integer overflow in GCD\(0, \-9223372036854775808\)
632632
select gcd(0, -9223372036854775808);
633633

634634

@@ -662,22 +662,22 @@ select lcm(a, b), lcm(c, d), lcm(e, f) from signed_integers;
662662
NULL NULL NULL
663663

664664
# Result cannot fit in i64
665-
query error DataFusion error: Arrow error: Arithmetic overflow:Signed integer overflow in LCM\(\-9223372036854775808, \-9223372036854775808\)
665+
query error DataFusion error: Arrow error: Compute error: Signed integer overflow in LCM\(\-9223372036854775808, \-9223372036854775808\)
666666
select lcm(-9223372036854775808, -9223372036854775808);
667667

668-
query error DataFusion error: Arrow error: Arithmetic overflow:Signed integer overflow in LCM\(1, \-9223372036854775808\)
668+
query error DataFusion error: Arrow error: Compute error: Signed integer overflow in LCM\(1, \-9223372036854775808\)
669669
select lcm(1, -9223372036854775808);
670670

671671
# Overflow on multiplication
672-
query error DataFusion error: Arrow error: Arithmetic overflow:Signed integer overflow in LCM\(2, 9223372036854775803\)
672+
query error DataFusion error: Arrow error: Compute error: Signed integer overflow in LCM\(2, 9223372036854775803\)
673673
select lcm(2, 9223372036854775803);
674674

675675

676676
query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: 2107754225 \^ 1221660777
677677
select power(2107754225, 1221660777);
678678

679679
# factorial overflow
680-
query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on FACTORIAL\(350943270\)
680+
query error DataFusion error: Arrow error: Compute error: Overflow happened on FACTORIAL\(350943270\)
681681
select FACTORIAL(350943270);
682682

683683
statement ok

0 commit comments

Comments
 (0)