Skip to content

Commit faa8c1b

Browse files
Chen-Yuan-LaiCheng-Yuan-LaiIan Laialamb
authored
refactor: remove remaining uses of arrow_array and use reexport in arrow instead (#14528)
* refactor: remove remaining uses of arrow_array and use reexport in arrow instead * fix: remove blank * fix: run carg fmt * fix: import record_batch * fix: update cargo.lock * fix: add chrono-tz feature in arrow * fix: remove remaining arrow-array & update cargo.lock --------- Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com> Co-authored-by: Ian Lai <[email protected]> Co-authored-by: Andrew Lamb <[email protected]>
1 parent 91c0975 commit faa8c1b

File tree

132 files changed

+297
-354
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

132 files changed

+297
-354
lines changed

Cargo.toml

-2
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,6 @@ ahash = { version = "0.8", default-features = false, features = [
8080
] }
8181
arrow = { version = "54.1.0", features = [
8282
"prettyprint",
83-
] }
84-
arrow-array = { version = "54.1.0", default-features = false, features = [
8583
"chrono-tz",
8684
] }
8785
arrow-buffer = { version = "54.1.0", default-features = false }

datafusion-cli/Cargo.lock

+8-13
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion-examples/examples/ffi/ffi_example_table_provider/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ publish = false
2424
[dependencies]
2525
abi_stable = "0.11.3"
2626
arrow = { workspace = true }
27-
arrow-array = { workspace = true }
2827
arrow-schema = { workspace = true }
2928
datafusion = { workspace = true }
3029
datafusion-ffi = { workspace = true }

datafusion-examples/examples/ffi/ffi_example_table_provider/src/lib.rs

+3-6
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,9 @@
1818
use std::sync::Arc;
1919

2020
use abi_stable::{export_root_module, prefix_type::PrefixTypeTrait};
21-
use arrow_array::RecordBatch;
22-
use datafusion::{
23-
arrow::datatypes::{DataType, Field, Schema},
24-
common::record_batch,
25-
datasource::MemTable,
26-
};
21+
use arrow::array::RecordBatch;
22+
use arrow::datatypes::{DataType, Field, Schema};
23+
use datafusion::{common::record_batch, datasource::MemTable};
2724
use datafusion_ffi::table_provider::FFI_TableProvider;
2825
use ffi_module_interface::{TableProviderModule, TableProviderModuleRef};
2926

datafusion/common/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ apache-avro = { version = "0.17", default-features = false, features = [
5151
"zstandard",
5252
], optional = true }
5353
arrow = { workspace = true }
54-
arrow-array = { workspace = true }
5554
arrow-ipc = { workspace = true }
5655
arrow-schema = { workspace = true }
5756
base64 = "0.22.1"

datafusion/common/src/cast.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
//! kernels in arrow-rs such as `as_boolean_array` do.
2222
2323
use crate::{downcast_value, DataFusionError, Result};
24+
use arrow::array::{
25+
BinaryViewArray, Float16Array, Int16Array, Int8Array, LargeBinaryArray,
26+
LargeStringArray, StringViewArray, UInt16Array,
27+
};
2428
use arrow::{
2529
array::{
2630
Array, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array,
@@ -36,10 +40,6 @@ use arrow::{
3640
},
3741
datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType},
3842
};
39-
use arrow_array::{
40-
BinaryViewArray, Float16Array, Int16Array, Int8Array, LargeBinaryArray,
41-
LargeStringArray, StringViewArray, UInt16Array,
42-
};
4343

4444
// Downcast ArrayRef to Date32Array
4545
pub fn as_date32_array(array: &dyn Array) -> Result<&Date32Array> {

datafusion/common/src/pyarrow.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,8 @@
1717

1818
//! Conversions between PyArrow and DataFusion types
1919
20-
use arrow::array::ArrayData;
20+
use arrow::array::{Array, ArrayData};
2121
use arrow::pyarrow::{FromPyArrow, ToPyArrow};
22-
use arrow_array::Array;
2322
use pyo3::exceptions::PyException;
2423
use pyo3::prelude::PyErr;
2524
use pyo3::types::{PyAnyMethods, PyList};

datafusion/common/src/scalar/mod.rs

+18-16
Original file line numberDiff line numberDiff line change
@@ -40,22 +40,24 @@ use crate::cast::{
4040
use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err};
4141
use crate::hash_utils::create_hashes;
4242
use crate::utils::SingleRowListArrayBuilder;
43-
use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano};
43+
use arrow::array::{
44+
types::{IntervalDayTime, IntervalMonthDayNano},
45+
*,
46+
};
4447
use arrow::buffer::ScalarBuffer;
45-
use arrow::compute::kernels::numeric::*;
46-
use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
47-
use arrow::{
48-
array::*,
49-
compute::kernels::cast::{cast_with_options, CastOptions},
50-
datatypes::{
51-
i256, ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType,
52-
Date32Type, Date64Type, Field, Float32Type, Int16Type, Int32Type, Int64Type,
53-
Int8Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit,
54-
IntervalYearMonthType, TimeUnit, TimestampMicrosecondType,
55-
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
56-
UInt16Type, UInt32Type, UInt64Type, UInt8Type, DECIMAL128_MAX_PRECISION,
57-
},
48+
use arrow::compute::kernels::{
49+
cast::{cast_with_options, CastOptions},
50+
numeric::*,
5851
};
52+
use arrow::datatypes::{
53+
i256, ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType,
54+
Date32Type, Date64Type, Field, Float32Type, Int16Type, Int32Type, Int64Type,
55+
Int8Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit,
56+
IntervalYearMonthType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType,
57+
TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type,
58+
UInt8Type, DECIMAL128_MAX_PRECISION,
59+
};
60+
use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
5961
use arrow_schema::{UnionFields, UnionMode};
6062

6163
use crate::format::DEFAULT_CAST_OPTIONS;
@@ -165,7 +167,7 @@ pub use struct_builder::ScalarStructBuilder;
165167
/// ```
166168
/// # use std::sync::Arc;
167169
/// # use arrow::datatypes::{DataType, Field, Fields};
168-
/// # use arrow_array::{ArrayRef, Int32Array, StructArray, StringArray};
170+
/// # use arrow::array::{ArrayRef, Int32Array, StructArray, StringArray};
169171
/// # use datafusion_common::ScalarValue;
170172
/// // Build a struct like: {a: 1, b: "foo"}
171173
/// // Field description
@@ -1674,7 +1676,7 @@ impl ScalarValue {
16741676
///
16751677
/// assert_eq!(&result, &expected);
16761678
/// ```
1677-
/// [`Datum`]: arrow_array::Datum
1679+
/// [`Datum`]: arrow::array::Datum
16781680
pub fn to_scalar(&self) -> Result<Scalar<ArrayRef>> {
16791681
Ok(Scalar::new(self.to_array_of_size(1)?))
16801682
}

datafusion/common/src/test_util.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ use std::{error::Error, path::PathBuf};
3434
/// ```
3535
/// # use std::sync::Arc;
3636
/// # use arrow::record_batch::RecordBatch;
37-
/// # use arrow_array::{ArrayRef, Int32Array};
37+
/// # use arrow::array::{ArrayRef, Int32Array};
3838
/// # use datafusion_common::assert_batches_eq;
3939
/// let col: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
4040
/// let batch = RecordBatch::try_from_iter([("column", col)]).unwrap();
@@ -344,7 +344,7 @@ macro_rules! record_batch {
344344
)*
345345
]));
346346

347-
let batch = arrow_array::RecordBatch::try_new(
347+
let batch = arrow::array::RecordBatch::try_new(
348348
schema,
349349
vec![$(
350350
$crate::create_array!($type, $values),
@@ -416,7 +416,7 @@ mod tests {
416416

417417
#[test]
418418
fn test_create_record_batch() -> Result<()> {
419-
use arrow_array::Array;
419+
use arrow::array::Array;
420420

421421
let batch = record_batch!(
422422
("a", Int32, vec![1, 2, 3, 4]),

datafusion/common/src/utils/mod.rs

+6-7
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,13 @@ pub mod string_utils;
2424

2525
use crate::error::{_internal_datafusion_err, _internal_err};
2626
use crate::{DataFusionError, Result, ScalarValue};
27-
use arrow::array::ArrayRef;
27+
use arrow::array::{
28+
cast::AsArray, Array, ArrayRef, FixedSizeListArray, LargeListArray, ListArray,
29+
OffsetSizeTrait,
30+
};
2831
use arrow::buffer::OffsetBuffer;
2932
use arrow::compute::{partition, SortColumn, SortOptions};
3033
use arrow::datatypes::{Field, SchemaRef};
31-
use arrow_array::cast::AsArray;
32-
use arrow_array::{
33-
Array, FixedSizeListArray, LargeListArray, ListArray, OffsetSizeTrait,
34-
};
3534
use arrow_schema::DataType;
3635
use sqlparser::ast::Ident;
3736
use sqlparser::dialect::GenericDialect;
@@ -329,8 +328,8 @@ pub fn longest_consecutive_prefix<T: Borrow<usize>>(
329328
/// # Example
330329
/// ```
331330
/// # use std::sync::Arc;
332-
/// # use arrow_array::{Array, ListArray};
333-
/// # use arrow_array::types::Int64Type;
331+
/// # use arrow::array::{Array, ListArray};
332+
/// # use arrow::array::types::Int64Type;
334333
/// # use datafusion_common::utils::SingleRowListArrayBuilder;
335334
/// // Array is [1, 2, 3]
336335
/// let arr = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![

datafusion/common/src/utils/string_utils.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717

1818
//! Utilities for working with strings
1919
20-
use arrow::{array::AsArray, datatypes::DataType};
21-
use arrow_array::Array;
20+
use arrow::array::{Array, AsArray};
21+
use arrow::datatypes::DataType;
2222

2323
/// Convenient function to convert an Arrow string array to a vector of strings
2424
pub fn string_array_to_vec(array: &dyn Array) -> Vec<Option<&str>> {

datafusion/core/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ extended_tests = []
8585
[dependencies]
8686
apache-avro = { version = "0.17", optional = true }
8787
arrow = { workspace = true }
88-
arrow-array = { workspace = true }
8988
arrow-ipc = { workspace = true }
9089
arrow-schema = { workspace = true }
9190
async-compression = { version = "0.4.0", features = [

datafusion/core/benches/data_utils/mod.rs

+4-8
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,11 @@
1717

1818
//! This module provides the in-memory table for more realistic benchmarking.
1919
20-
use arrow::{
21-
array::Float32Array,
22-
array::Float64Array,
23-
array::StringArray,
24-
array::UInt64Array,
25-
datatypes::{DataType, Field, Schema, SchemaRef},
26-
record_batch::RecordBatch,
20+
use arrow::array::{
21+
builder::{Int64Builder, StringBuilder},
22+
Float32Array, Float64Array, RecordBatch, StringArray, UInt64Array,
2723
};
28-
use arrow_array::builder::{Int64Builder, StringBuilder};
24+
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
2925
use datafusion::datasource::MemTable;
3026
use datafusion::error::Result;
3127
use datafusion_common::DataFusionError;

datafusion/core/benches/map_query_sql.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use std::sync::Arc;
1919

20-
use arrow_array::{ArrayRef, Int32Array, RecordBatch};
20+
use arrow::array::{ArrayRef, Int32Array, RecordBatch};
2121
use criterion::{black_box, criterion_group, criterion_main, Criterion};
2222
use parking_lot::Mutex;
2323
use rand::prelude::ThreadRng;

datafusion/core/benches/sql_planner.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ extern crate datafusion;
2323
mod data_utils;
2424

2525
use crate::criterion::Criterion;
26+
use arrow::array::{ArrayRef, RecordBatch};
2627
use arrow::datatypes::{DataType, Field, Fields, Schema};
27-
use arrow_array::{ArrayRef, RecordBatch};
2828
use criterion::Bencher;
2929
use datafusion::datasource::MemTable;
3030
use datafusion::execution::context::SessionContext;

datafusion/core/benches/sql_query_with_io.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@
1717

1818
use std::{fmt::Write, sync::Arc, time::Duration};
1919

20-
use arrow::array::{Int64Builder, UInt64Builder};
21-
use arrow_array::RecordBatch;
20+
use arrow::array::{Int64Builder, RecordBatch, UInt64Builder};
2221
use arrow_schema::{DataType, Field, Schema, SchemaRef};
2322
use bytes::Bytes;
2423
use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};

datafusion/core/src/datasource/file_format/csv.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -760,10 +760,10 @@ mod tests {
760760
use crate::prelude::{CsvReadOptions, SessionConfig, SessionContext};
761761
use crate::test_util::arrow_test_data;
762762

763+
use arrow::array::{BooleanArray, Float64Array, Int32Array, StringArray};
763764
use arrow::compute::concat_batches;
764765
use arrow::csv::ReaderBuilder;
765766
use arrow::util::pretty::pretty_format_batches;
766-
use arrow_array::{BooleanArray, Float64Array, Int32Array, StringArray};
767767
use datafusion_common::cast::as_string_array;
768768
use datafusion_common::internal_err;
769769
use datafusion_common::stats::Precision;

datafusion/core/src/datasource/file_format/json.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,11 @@ use crate::physical_plan::{
4242
DisplayAs, DisplayFormatType, SendableRecordBatchStream, Statistics,
4343
};
4444

45+
use arrow::array::RecordBatch;
4546
use arrow::datatypes::Schema;
4647
use arrow::datatypes::SchemaRef;
4748
use arrow::json;
4849
use arrow::json::reader::{infer_json_schema_from_iterator, ValueIter};
49-
use arrow_array::RecordBatch;
5050
use arrow_schema::ArrowError;
5151
use datafusion_catalog::Session;
5252
use datafusion_common::config::{ConfigField, ConfigFileType, JsonOptions};

datafusion/core/src/datasource/file_format/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,12 @@ use std::fmt::{self, Debug, Display};
3737
use std::sync::Arc;
3838
use std::task::Poll;
3939

40+
use crate::arrow::array::RecordBatch;
4041
use crate::arrow::datatypes::SchemaRef;
4142
use crate::datasource::physical_plan::{FileScanConfig, FileSinkConfig};
4243
use crate::error::Result;
4344
use crate::physical_plan::{ExecutionPlan, Statistics};
4445

45-
use arrow_array::RecordBatch;
4646
use arrow_schema::{ArrowError, DataType, Field, FieldRef, Schema};
4747
use datafusion_catalog::Session;
4848
use datafusion_common::file_options::file_type::FileType;

datafusion/core/src/datasource/file_format/parquet.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -1309,9 +1309,10 @@ mod tests {
13091309
use crate::datasource::file_format::parquet::test_util::store_parquet;
13101310
use crate::physical_plan::metrics::MetricValue;
13111311
use crate::prelude::{ParquetReadOptions, SessionConfig, SessionContext};
1312-
use arrow::array::{Array, ArrayRef, StringArray};
1313-
use arrow_array::types::Int32Type;
1314-
use arrow_array::{DictionaryArray, Int32Array, Int64Array};
1312+
use arrow::array::{
1313+
types::Int32Type, Array, ArrayRef, DictionaryArray, Int32Array, Int64Array,
1314+
StringArray,
1315+
};
13151316
use arrow_schema::{DataType, Field};
13161317
use async_trait::async_trait;
13171318
use datafusion_common::cast::{

0 commit comments

Comments
 (0)