From e91f0b81e1d4c4595ec9ac7efa9b69b88ff23bde Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 2 Nov 2023 14:47:08 -0400 Subject: [PATCH 1/3] Minor: Improve documentation for `PartitionStream` and `StreamingTableExec` --- datafusion/physical-plan/src/streaming.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs index 27f03b727c29..71916c46316c 100644 --- a/datafusion/physical-plan/src/streaming.rs +++ b/datafusion/physical-plan/src/streaming.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! Execution plan for streaming [`PartitionStream`] +//! Generic plans for deferred execution: [`StreamingTableExec`] and [`PartitionStream`] use std::any::Any; use std::sync::Arc; @@ -35,6 +35,10 @@ use futures::stream::StreamExt; use log::debug; /// A partition that can be converted into a [`SendableRecordBatchStream`] +/// +/// Combined with [`StreamingTable`], you can use this trait to implement +/// [`ExecutionPlan`] for a custom source with less boiler plate than +/// implementing `ExecutionPlan` directly for many use cases. pub trait PartitionStream: Send + Sync { /// Returns the schema of this partition fn schema(&self) -> &SchemaRef; @@ -43,7 +47,10 @@ pub trait PartitionStream: Send + Sync { fn execute(&self, ctx: Arc) -> SendableRecordBatchStream; } -/// An [`ExecutionPlan`] for [`PartitionStream`] +/// An [`ExecutionPlan`] for one or more [`PartitionStream`]s. +/// +/// If your source can be represented as one or more [`PartitionStream`]s, you can +/// use this struct to implement [`ExecutionPlan`]. pub struct StreamingTableExec { partitions: Vec>, projection: Option>, From 76db218f93f8ccad2130659c5e2af3ab8063292b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 2 Nov 2023 14:52:14 -0400 Subject: [PATCH 2/3] fmt --- datafusion/core/src/datasource/file_format/arrow.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/core/src/datasource/file_format/arrow.rs b/datafusion/core/src/datasource/file_format/arrow.rs index 2777805078c7..900aad95747e 100644 --- a/datafusion/core/src/datasource/file_format/arrow.rs +++ b/datafusion/core/src/datasource/file_format/arrow.rs @@ -105,7 +105,7 @@ impl FileFormat for ArrowFormat { const ARROW_MAGIC: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1']; const CONTINUATION_MARKER: [u8; 4] = [0xff; 4]; -/// Custom implementation of inferring schema. Should eventually be moved upstream to arrow-rs. +/// Custom implementation of inferring schema. Should eventually be moved upstream to arrow-rs. /// See https://github.com/apache/arrow-rs/issues/5021 async fn infer_schema_from_file_stream( mut stream: BoxStream<'static, object_store::Result>, From 6b955b4c703b134c351f0242e60735659a83e7f7 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 2 Nov 2023 14:52:32 -0400 Subject: [PATCH 3/3] fmt --- datafusion/physical-plan/src/streaming.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs index 71916c46316c..77b56e1d7540 100644 --- a/datafusion/physical-plan/src/streaming.rs +++ b/datafusion/physical-plan/src/streaming.rs @@ -36,7 +36,7 @@ use log::debug; /// A partition that can be converted into a [`SendableRecordBatchStream`] /// -/// Combined with [`StreamingTable`], you can use this trait to implement +/// Combined with [`StreamingTableExec`], you can use this trait to implement /// [`ExecutionPlan`] for a custom source with less boiler plate than /// implementing `ExecutionPlan` directly for many use cases. pub trait PartitionStream: Send + Sync {