Skip to content

Commit 83d15e8

Browse files
authored
Move SpawnedTask from datafusion_physical_plan to new datafusion_common_runtime crate (#9414)
* Initial commit * Tmp * Remove clippy ignores * Move SpawnedTask to under new crate * Minor changes * re-export from core
1 parent 10fbf42 commit 83d15e8

File tree

22 files changed

+186
-66
lines changed

22 files changed

+186
-66
lines changed

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
exclude = ["datafusion-cli"]
2020
members = [
2121
"datafusion/common",
22+
"datafusion/common_runtime",
2223
"datafusion/core",
2324
"datafusion/expr",
2425
"datafusion/execution",
@@ -72,6 +73,7 @@ ctor = "0.2.0"
7273
dashmap = "5.4.0"
7374
datafusion = { path = "datafusion/core", version = "36.0.0", default-features = false }
7475
datafusion-common = { path = "datafusion/common", version = "36.0.0", default-features = false }
76+
datafusion-common-runtime = { path = "datafusion/common_runtime", version = "36.0.0" }
7577
datafusion-execution = { path = "datafusion/execution", version = "36.0.0" }
7678
datafusion-expr = { path = "datafusion/expr", version = "36.0.0" }
7779
datafusion-functions = { path = "datafusion/functions", version = "36.0.0" }

datafusion-cli/Cargo.lock

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/common_runtime/Cargo.toml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
[package]
19+
name = "datafusion-common-runtime"
20+
description = "Common Runtime functionality for DataFusion query engine"
21+
keywords = ["arrow", "query", "sql"]
22+
readme = "README.md"
23+
version = { workspace = true }
24+
edition = { workspace = true }
25+
homepage = { workspace = true }
26+
repository = { workspace = true }
27+
license = { workspace = true }
28+
authors = { workspace = true }
29+
rust-version = { workspace = true }
30+
31+
[lib]
32+
name = "datafusion_common_runtime"
33+
path = "src/lib.rs"
34+
35+
[dependencies]
36+
tokio = { workspace = true }

datafusion/common_runtime/README.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
<!---
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing,
13+
software distributed under the License is distributed on an
14+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
KIND, either express or implied. See the License for the
16+
specific language governing permissions and limitations
17+
under the License.
18+
-->
19+
20+
# DataFusion Common Runtime
21+
22+
[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
23+
24+
This crate is a submodule of DataFusion that provides common utilities.
25+
26+
[df]: https://crates.io/crates/datafusion
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use std::future::Future;
19+
20+
use tokio::task::{JoinError, JoinSet};
21+
22+
/// Helper that provides a simple API to spawn a single task and join it.
23+
/// Provides guarantees of aborting on `Drop` to keep it cancel-safe.
24+
///
25+
/// Technically, it's just a wrapper of `JoinSet` (with size=1).
26+
#[derive(Debug)]
27+
pub struct SpawnedTask<R> {
28+
inner: JoinSet<R>,
29+
}
30+
31+
impl<R: 'static> SpawnedTask<R> {
32+
pub fn spawn<T>(task: T) -> Self
33+
where
34+
T: Future<Output = R>,
35+
T: Send + 'static,
36+
R: Send,
37+
{
38+
let mut inner = JoinSet::new();
39+
inner.spawn(task);
40+
Self { inner }
41+
}
42+
43+
pub fn spawn_blocking<T>(task: T) -> Self
44+
where
45+
T: FnOnce() -> R,
46+
T: Send + 'static,
47+
R: Send,
48+
{
49+
let mut inner = JoinSet::new();
50+
inner.spawn_blocking(task);
51+
Self { inner }
52+
}
53+
54+
pub async fn join(mut self) -> Result<R, JoinError> {
55+
self.inner
56+
.join_next()
57+
.await
58+
.expect("`SpawnedTask` instance always contains exactly 1 task")
59+
}
60+
}

datafusion/common_runtime/src/lib.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
pub mod common;
19+
20+
pub use common::SpawnedTask;

datafusion/core/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ bzip2 = { version = "0.4.3", optional = true }
8989
chrono = { workspace = true }
9090
dashmap = { workspace = true }
9191
datafusion-common = { workspace = true, features = ["object_store"] }
92+
datafusion-common-runtime = { workspace = true }
9293
datafusion-execution = { workspace = true }
9394
datafusion-expr = { workspace = true }
9495
datafusion-functions = { workspace = true }

datafusion/core/src/dataframe/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,6 +1510,7 @@ mod tests {
15101510
use arrow::array::{self, Int32Array};
15111511
use arrow::datatypes::DataType;
15121512
use datafusion_common::{Constraint, Constraints};
1513+
use datafusion_common_runtime::SpawnedTask;
15131514
use datafusion_expr::{
15141515
avg, cast, count, count_distinct, create_udf, expr, lit, max, min, sum,
15151516
BuiltInWindowFunction, ScalarFunctionImplementation, Volatility, WindowFrame,
@@ -2169,15 +2170,14 @@ mod tests {
21692170
}
21702171

21712172
#[tokio::test]
2172-
#[allow(clippy::disallowed_methods)]
21732173
async fn sendable() {
21742174
let df = test_table().await.unwrap();
21752175
// dataframes should be sendable between threads/tasks
2176-
let task = tokio::task::spawn(async move {
2176+
let task = SpawnedTask::spawn(async move {
21772177
df.select_columns(&["c1"])
21782178
.expect("should be usable in a task")
21792179
});
2180-
task.await.expect("task completed successfully");
2180+
task.join().await.expect("task completed successfully");
21812181
}
21822182

21832183
#[tokio::test]

datafusion/core/src/datasource/file_format/parquet.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ use arrow::datatypes::SchemaRef;
4040
use arrow::datatypes::{Fields, Schema};
4141
use bytes::{BufMut, BytesMut};
4242
use datafusion_common::{exec_err, not_impl_err, DataFusionError, FileType};
43+
use datafusion_common_runtime::SpawnedTask;
4344
use datafusion_execution::TaskContext;
4445
use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
45-
use datafusion_physical_plan::common::SpawnedTask;
4646
use futures::{StreamExt, TryStreamExt};
4747
use hashbrown::HashMap;
4848
use object_store::path::Path;

datafusion/core/src/datasource/file_format/write/demux.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,14 @@ use arrow_array::{downcast_dictionary_array, RecordBatch, StringArray, StructArr
3333
use arrow_schema::{DataType, Schema};
3434
use datafusion_common::cast::as_string_array;
3535
use datafusion_common::{exec_datafusion_err, DataFusionError};
36-
36+
use datafusion_common_runtime::SpawnedTask;
3737
use datafusion_execution::TaskContext;
3838

3939
use futures::StreamExt;
4040
use object_store::path::Path;
4141

4242
use rand::distributions::DistString;
4343

44-
use datafusion_physical_plan::common::SpawnedTask;
4544
use tokio::sync::mpsc::{self, Receiver, Sender, UnboundedReceiver, UnboundedSender};
4645

4746
type RecordBatchReceiver = Receiver<RecordBatch>;

0 commit comments

Comments
 (0)