Skip to content

Commit 3b5085e

Browse files
Upgrade deps to datafusion 41 (#802)
* update datafusion deps to point to githuc.com/apache/datafusion Datafusion 41 is not yet released on crates.io. * update TableProvider::scan Ref: apache/datafusion#11516 * use SessionStateBuilder The old constructor is deprecated. Ref: apache/datafusion#11403 * update AggregateFunction Upstream Changes: - The field name was switched from `func_name` to func. - AggregateFunctionDefinition was removed Ref: apache/datafusion#11803 * update imports in catalog Catlog API was extracted to a separate crate. Ref: apache/datafusion#11516 * use appropriate path for approx_distinct Ref: apache/datafusion#11644 * migrate AggregateExt to ExprFunctionExt Also removed `sqlparser` dependency since it's re-exported upstream. Ref: apache/datafusion#11550 * update regr_count tests for new return type Ref: apache/datafusion#11731 * migrate from function-array to functions-nested The package was renamed upstream. Ref: apache/datafusion#11602 * cargo fmt * lock datafusion deps to 41 * remove todo from cargo.toml All the datafusion dependencies are re-exported, but I still need to figure out *why*.
1 parent 805183b commit 3b5085e

File tree

10 files changed

+126
-95
lines changed

10 files changed

+126
-95
lines changed

Cargo.lock

+72-36
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+7-8
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ tokio = { version = "1.39", features = ["macros", "rt", "rt-multi-thread", "sync
3838
rand = "0.8"
3939
pyo3 = { version = "0.21", features = ["extension-module", "abi3", "abi3-py38"] }
4040
arrow = { version = "52", feature = ["pyarrow"] }
41-
datafusion = { version = "40.0.0", features = ["pyarrow", "avro", "unicode_expressions"] }
42-
datafusion-common = { version = "40.0.0", features = ["pyarrow"] }
43-
datafusion-expr = "40.0.0"
44-
datafusion-functions-array = "40.0.0"
45-
datafusion-optimizer = "40.0.0"
46-
datafusion-sql = "40.0.0"
47-
datafusion-substrait = { version = "40.0.0", optional = true }
41+
datafusion = { version = "41.0.0", features = ["pyarrow", "avro", "unicode_expressions"] }
42+
datafusion-common = { version = "41.0.0", features = ["pyarrow"] }
43+
datafusion-expr = { version = "41.0.0" }
44+
datafusion-functions-nested = { version = "41.0.0" }
45+
datafusion-optimizer = { version = "41.0.0" }
46+
datafusion-sql = { version = "41.0.0" }
47+
datafusion-substrait = { version = "41.0.0", optional = true }
4848
prost = "0.12" # keep in line with `datafusion-substrait`
4949
prost-types = "0.12" # keep in line with `datafusion-substrait`
5050
uuid = { version = "1.9", features = ["v4"] }
@@ -56,7 +56,6 @@ parking_lot = "0.12"
5656
regex-syntax = "0.8"
5757
syn = "2.0.68"
5858
url = "2"
59-
sqlparser = "0.47.0"
6059

6160
[build-dependencies]
6261
pyo3-build-config = "0.21"

python/datafusion/tests/test_functions.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -808,7 +808,7 @@ def test_regr_funcs_sql(df):
808808

809809
assert result[0].column(0) == pa.array([None], type=pa.float64())
810810
assert result[0].column(1) == pa.array([None], type=pa.float64())
811-
assert result[0].column(2) == pa.array([1], type=pa.float64())
811+
assert result[0].column(2) == pa.array([1], type=pa.uint64())
812812
assert result[0].column(3) == pa.array([None], type=pa.float64())
813813
assert result[0].column(4) == pa.array([1], type=pa.float64())
814814
assert result[0].column(5) == pa.array([1], type=pa.float64())
@@ -840,7 +840,7 @@ def test_regr_funcs_sql_2():
840840
# Assertions for SQL results
841841
assert result_sql[0].column(0) == pa.array([2], type=pa.float64())
842842
assert result_sql[0].column(1) == pa.array([0], type=pa.float64())
843-
assert result_sql[0].column(2) == pa.array([3], type=pa.float64()) # todo: i would not expect this to be float
843+
assert result_sql[0].column(2) == pa.array([3], type=pa.uint64())
844844
assert result_sql[0].column(3) == pa.array([1], type=pa.float64())
845845
assert result_sql[0].column(4) == pa.array([2], type=pa.float64())
846846
assert result_sql[0].column(5) == pa.array([4], type=pa.float64())
@@ -852,7 +852,7 @@ def test_regr_funcs_sql_2():
852852
@pytest.mark.parametrize("func, expected", [
853853
pytest.param(f.regr_slope, pa.array([2], type=pa.float64()), id="regr_slope"),
854854
pytest.param(f.regr_intercept, pa.array([0], type=pa.float64()), id="regr_intercept"),
855-
pytest.param(f.regr_count, pa.array([3], type=pa.float64()), id="regr_count"), # TODO: I would expect this to return an int array
855+
pytest.param(f.regr_count, pa.array([3], type=pa.uint64()), id="regr_count"),
856856
pytest.param(f.regr_r2, pa.array([1], type=pa.float64()), id="regr_r2"),
857857
pytest.param(f.regr_avgx, pa.array([2], type=pa.float64()), id="regr_avgx"),
858858
pytest.param(f.regr_avgy, pa.array([4], type=pa.float64()), id="regr_avgy"),

src/catalog.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use crate::errors::DataFusionError;
2525
use crate::utils::wait_for_future;
2626
use datafusion::{
2727
arrow::pyarrow::ToPyArrow,
28-
catalog::{schema::SchemaProvider, CatalogProvider},
28+
catalog::{CatalogProvider, SchemaProvider},
2929
datasource::{TableProvider, TableType},
3030
};
3131

src/common/data_type.rs

+9-8
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
use datafusion::arrow::array::Array;
1919
use datafusion::arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
2020
use datafusion_common::{DataFusionError, ScalarValue};
21+
use datafusion_expr::sqlparser::ast::NullTreatment as DFNullTreatment;
2122
use pyo3::{exceptions::PyValueError, prelude::*};
2223

2324
use crate::errors::py_datafusion_err;
@@ -775,20 +776,20 @@ pub enum NullTreatment {
775776
RESPECT_NULLS,
776777
}
777778

778-
impl From<NullTreatment> for sqlparser::ast::NullTreatment {
779-
fn from(null_treatment: NullTreatment) -> sqlparser::ast::NullTreatment {
779+
impl From<NullTreatment> for DFNullTreatment {
780+
fn from(null_treatment: NullTreatment) -> DFNullTreatment {
780781
match null_treatment {
781-
NullTreatment::IGNORE_NULLS => sqlparser::ast::NullTreatment::IgnoreNulls,
782-
NullTreatment::RESPECT_NULLS => sqlparser::ast::NullTreatment::RespectNulls,
782+
NullTreatment::IGNORE_NULLS => DFNullTreatment::IgnoreNulls,
783+
NullTreatment::RESPECT_NULLS => DFNullTreatment::RespectNulls,
783784
}
784785
}
785786
}
786787

787-
impl From<sqlparser::ast::NullTreatment> for NullTreatment {
788-
fn from(null_treatment: sqlparser::ast::NullTreatment) -> NullTreatment {
788+
impl From<DFNullTreatment> for NullTreatment {
789+
fn from(null_treatment: DFNullTreatment) -> NullTreatment {
789790
match null_treatment {
790-
sqlparser::ast::NullTreatment::IgnoreNulls => NullTreatment::IGNORE_NULLS,
791-
sqlparser::ast::NullTreatment::RespectNulls => NullTreatment::RESPECT_NULLS,
791+
DFNullTreatment::IgnoreNulls => NullTreatment::IGNORE_NULLS,
792+
DFNullTreatment::RespectNulls => NullTreatment::RESPECT_NULLS,
792793
}
793794
}
794795
}

0 commit comments

Comments
 (0)