Skip to content

Commit 7402981

Browse files
committed
Merge remote-tracking branch 'upstream/main' into research
2 parents f44b7ed + 405b99c commit 7402981

File tree

382 files changed

+14040
-6516
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

382 files changed

+14040
-6516
lines changed

.github/workflows/rust.yml

+27-20
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,12 @@ jobs:
8080
- name: Check datafusion-common without default features
8181
run: cargo check --all-targets --no-default-features -p datafusion-common
8282

83-
- name: Check datafusion-functions
83+
- name: Check datafusion-functions without default features
8484
run: cargo check --all-targets --no-default-features -p datafusion-functions
8585

86+
- name: Check datafusion-substrait without default features
87+
run: cargo check --all-targets --no-default-features -p datafusion-substrait
88+
8689
- name: Check workspace in debug mode
8790
run: cargo check --all-targets --workspace
8891

@@ -323,22 +326,26 @@ jobs:
323326
env:
324327
POSTGRES_PORT: ${{ job.services.postgres.ports[5432] }}
325328

326-
windows:
327-
name: cargo test (win64)
328-
runs-on: windows-latest
329-
steps:
330-
- uses: actions/checkout@v4
331-
with:
332-
submodules: true
333-
- name: Setup Rust toolchain
334-
uses: ./.github/actions/setup-windows-builder
335-
- name: Run tests (excluding doctests)
336-
shell: bash
337-
run: |
338-
export PATH=$PATH:$HOME/d/protoc/bin
339-
cargo test --lib --tests --bins --features avro,json,backtrace
340-
cd datafusion-cli
341-
cargo test --lib --tests --bins --all-features
329+
# Temporarily commenting out the Windows flow, the reason is enormously slow running build
330+
# Waiting for new Windows 2025 github runner
331+
# Details: https://github.com/apache/datafusion/issues/13726
332+
#
333+
# windows:
334+
# name: cargo test (win64)
335+
# runs-on: windows-latest
336+
# steps:
337+
# - uses: actions/checkout@v4
338+
# with:
339+
# submodules: true
340+
# - name: Setup Rust toolchain
341+
# uses: ./.github/actions/setup-windows-builder
342+
# - name: Run tests (excluding doctests)
343+
# shell: bash
344+
# run: |
345+
# export PATH=$PATH:$HOME/d/protoc/bin
346+
# cargo test --lib --tests --bins --features avro,json,backtrace
347+
# cd datafusion-cli
348+
# cargo test --lib --tests --bins --all-features
342349

343350
macos:
344351
name: cargo test (macos)
@@ -582,9 +589,9 @@ jobs:
582589
#
583590
# To reproduce:
584591
# 1. Install the version of Rust that is failing. Example:
585-
# rustup install 1.79.0
592+
# rustup install 1.80.1
586593
# 2. Run the command that failed with that version. Example:
587-
# cargo +1.79.0 check -p datafusion
594+
# cargo +1.80.1 check -p datafusion
588595
#
589596
# To resolve, either:
590597
# 1. Change your code to use older Rust features,
@@ -603,4 +610,4 @@ jobs:
603610
run: cargo msrv --output-format json --log-target stdout verify
604611
- name: Check datafusion-cli
605612
working-directory: datafusion-cli
606-
run: cargo msrv --output-format json --log-target stdout verify
613+
run: cargo msrv --output-format json --log-target stdout verify

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ datafusion/sqllogictest/test_files/scratch*
6767
# temp file for core
6868
datafusion/core/*.parquet
6969

70+
# Generated core benchmark data
71+
datafusion/core/benches/data/*
72+
7073
# rat
7174
filtered_rat.txt
7275
rat.txt

Cargo.toml

+3-3
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ homepage = "https://datafusion.apache.org"
6666
license = "Apache-2.0"
6767
readme = "README.md"
6868
repository = "https://github.com/apache/datafusion"
69-
rust-version = "1.79"
69+
rust-version = "1.80.1"
7070
version = "43.0.0"
7171

7272
[workspace.dependencies]
@@ -95,7 +95,7 @@ arrow-ord = { path = "../arrow-rs/arrow-ord", default-features = false }
9595
arrow-schema = { path = "../arrow-rs/arrow-schema", default-features = false }
9696
arrow-string = { path = "../arrow-rs/arrow-string", default-features = false }
9797
async-trait = "0.1.73"
98-
bigdecimal = "0.4.6"
98+
bigdecimal = "0.4.7"
9999
bytes = "1.4"
100100
chrono = { version = "0.4.38", default-features = false }
101101
ctor = "0.2.0"
@@ -152,7 +152,7 @@ recursive = "0.1.1"
152152
regex = "1.8"
153153
rstest = "0.23.0"
154154
serde_json = "1"
155-
sqlparser = { version = "0.52.0", features = ["visitor"] }
155+
sqlparser = { version = "0.53.0", features = ["visitor"] }
156156
tempfile = "3"
157157
tokio = { version = "1.36", features = ["macros", "rt", "sync"] }
158158
url = "2.2"

README.md

+15-8
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,8 @@ Default features:
112112
- `parquet`: support for reading the [Apache Parquet] format
113113
- `regex_expressions`: regular expression functions, such as `regexp_match`
114114
- `unicode_expressions`: Include unicode aware functions such as `character_length`
115-
- `unparser` : enables support to reverse LogicalPlans back into SQL
115+
- `unparser`: enables support to reverse LogicalPlans back into SQL
116+
- `recursive-protection`: uses [recursive](https://docs.rs/recursive/latest/recursive/) for stack overflow protection.
116117

117118
Optional features:
118119

@@ -126,16 +127,22 @@ Optional features:
126127

127128
## Rust Version Compatibility Policy
128129

129-
DataFusion's Minimum Required Stable Rust Version (MSRV) policy is to support stable [4 latest
130-
Rust versions](https://releases.rs) OR the stable minor Rust version as of 4 months, whichever is lower.
130+
The Rust toolchain releases are tracked at [Rust Versions](https://releases.rs) and follow
131+
[semantic versioning](https://semver.org/). A Rust toolchain release can be identified
132+
by a version string like `1.80.0`, or more generally `major.minor.patch`.
133+
134+
DataFusion's supports the last 4 stable Rust minor versions released and any such versions released within the last 4 months.
131135

132136
For example, given the releases `1.78.0`, `1.79.0`, `1.80.0`, `1.80.1` and `1.81.0` DataFusion will support 1.78.0, which is 3 minor versions prior to the most minor recent `1.81`.
133137

134-
If a hotfix is released for the minimum supported Rust version (MSRV), the MSRV will be the minor version with all hotfixes, even if it surpasses the four-month window.
138+
Note: If a Rust hotfix is released for the current MSRV, the MSRV will be updated to the specific minor version that includes all applicable hotfixes preceding other policies.
139+
140+
DataFusion enforces MSRV policy using a [MSRV CI Check](https://github.com/search?q=repo%3Aapache%2Fdatafusion+rust-version+language%3ATOML+path%3A%2F%5ECargo.toml%2F&type=code)
135141

136-
We enforce this policy using a [MSRV CI Check](https://github.com/search?q=repo%3Aapache%2Fdatafusion+rust-version+language%3ATOML+path%3A%2F%5ECargo.toml%2F&type=code)
142+
## DataFusion API Evolution and Deprecation Guidelines
137143

138-
## DataFusion API evolution policy
144+
Public methods in Apache DataFusion evolve over time: while we try to maintain a
145+
stable API, we also improve the API over time. As a result, we typically
146+
deprecate methods before removing them, according to the [deprecation guidelines].
139147

140-
Public methods in Apache DataFusion are subject to evolve as part of the API lifecycle.
141-
Deprecated methods will be phased out in accordance with the [policy](https://datafusion.apache.org/library-user-guide/api-health.html), ensuring the API is stable and healthy.
148+
[deprecation guidelines]: https://datafusion.apache.org/library-user-guide/api-health.html

benchmarks/src/bin/external_aggr.rs

+18-17
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
use std::collections::HashMap;
2121
use std::path::PathBuf;
2222
use std::sync::Arc;
23-
use std::sync::OnceLock;
23+
use std::sync::LazyLock;
2424
use structopt::StructOpt;
2525

2626
use arrow::record_batch::RecordBatch;
@@ -33,7 +33,8 @@ use datafusion::datasource::{MemTable, TableProvider};
3333
use datafusion::error::Result;
3434
use datafusion::execution::memory_pool::FairSpillPool;
3535
use datafusion::execution::memory_pool::{human_readable_size, units};
36-
use datafusion::execution::runtime_env::RuntimeConfig;
36+
use datafusion::execution::runtime_env::RuntimeEnvBuilder;
37+
use datafusion::execution::SessionStateBuilder;
3738
use datafusion::physical_plan::display::DisplayableExecutionPlan;
3839
use datafusion::physical_plan::{collect, displayable};
3940
use datafusion::prelude::*;
@@ -90,7 +91,13 @@ struct QueryResult {
9091
/// Memory limits to run: 64MiB, 32MiB, 16MiB
9192
/// Q2 requires 250MiB for aggregation
9293
/// Memory limits to run: 512MiB, 256MiB, 128MiB, 64MiB, 32MiB
93-
static QUERY_MEMORY_LIMITS: OnceLock<HashMap<usize, Vec<u64>>> = OnceLock::new();
94+
static QUERY_MEMORY_LIMITS: LazyLock<HashMap<usize, Vec<u64>>> = LazyLock::new(|| {
95+
use units::*;
96+
let mut map = HashMap::new();
97+
map.insert(1, vec![64 * MB, 32 * MB, 16 * MB]);
98+
map.insert(2, vec![512 * MB, 256 * MB, 128 * MB, 64 * MB, 32 * MB]);
99+
map
100+
});
94101

95102
impl ExternalAggrConfig {
96103
const AGGR_TABLES: [&'static str; 1] = ["lineitem"];
@@ -113,16 +120,6 @@ impl ExternalAggrConfig {
113120
"#,
114121
];
115122

116-
fn init_query_memory_limits() -> &'static HashMap<usize, Vec<u64>> {
117-
use units::*;
118-
QUERY_MEMORY_LIMITS.get_or_init(|| {
119-
let mut map = HashMap::new();
120-
map.insert(1, vec![64 * MB, 32 * MB, 16 * MB]);
121-
map.insert(2, vec![512 * MB, 256 * MB, 128 * MB, 64 * MB, 32 * MB]);
122-
map
123-
})
124-
}
125-
126123
/// If `--query` and `--memory-limit` is not speicified, run all queries
127124
/// with pre-configured memory limits
128125
/// If only `--query` is specified, run the query with all memory limits
@@ -160,8 +157,7 @@ impl ExternalAggrConfig {
160157
query_executions.push((query_id, limit));
161158
}
162159
None => {
163-
let memory_limits_table = Self::init_query_memory_limits();
164-
let memory_limits = memory_limits_table.get(&query_id).unwrap();
160+
let memory_limits = QUERY_MEMORY_LIMITS.get(&query_id).unwrap();
165161
for limit in memory_limits {
166162
query_executions.push((query_id, *limit));
167163
}
@@ -195,10 +191,15 @@ impl ExternalAggrConfig {
195191
let query_name =
196192
format!("Q{query_id}({})", human_readable_size(mem_limit as usize));
197193
let config = self.common.config();
198-
let runtime_config = RuntimeConfig::new()
194+
let runtime_env = RuntimeEnvBuilder::new()
199195
.with_memory_pool(Arc::new(FairSpillPool::new(mem_limit as usize)))
200196
.build_arc()?;
201-
let ctx = SessionContext::new_with_config_rt(config, runtime_config);
197+
let state = SessionStateBuilder::new()
198+
.with_config(config)
199+
.with_runtime_env(runtime_env)
200+
.with_default_features()
201+
.build();
202+
let ctx = SessionContext::from(state);
202203

203204
// register tables
204205
self.register_tables(&ctx).await?;

benchmarks/src/sort_tpch.rs

+6-4
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use datafusion::datasource::listing::{
3232
};
3333
use datafusion::datasource::{MemTable, TableProvider};
3434
use datafusion::error::Result;
35-
use datafusion::execution::runtime_env::RuntimeConfig;
35+
use datafusion::execution::SessionStateBuilder;
3636
use datafusion::physical_plan::display::DisplayableExecutionPlan;
3737
use datafusion::physical_plan::{displayable, execute_stream};
3838
use datafusion::prelude::*;
@@ -188,9 +188,11 @@ impl RunOpt {
188188
/// Benchmark query `query_id` in `SORT_QUERIES`
189189
async fn benchmark_query(&self, query_id: usize) -> Result<Vec<QueryResult>> {
190190
let config = self.common.config();
191-
192-
let runtime_config = RuntimeConfig::new().build_arc()?;
193-
let ctx = SessionContext::new_with_config_rt(config, runtime_config);
191+
let state = SessionStateBuilder::new()
192+
.with_config(config)
193+
.with_default_features()
194+
.build();
195+
let ctx = SessionContext::from(state);
194196

195197
// register tables
196198
self.register_tables(&ctx).await?;

0 commit comments

Comments
 (0)