Skip to content

Commit

Permalink
Merge branch 'main' into vectorize-append-value
Browse files Browse the repository at this point in the history
  • Loading branch information
Rachelint committed Nov 2, 2024
2 parents 7a1ed90 + 89e96b4 commit e8c0aaa
Show file tree
Hide file tree
Showing 239 changed files with 6,460 additions and 1,768 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
under the License.
-->

* [DataFusion CHANGELOG](./datafusion/CHANGELOG.md)
Change logs for each release can be found [here](dev/changelog).


For older versions, see [apache/arrow/CHANGELOG.md](https://github.com/apache/arrow/blob/master/CHANGELOG.md).
50 changes: 26 additions & 24 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ members = [
"datafusion/expr",
"datafusion/expr-common",
"datafusion/execution",
"datafusion/ffi",
"datafusion/functions",
"datafusion/functions-aggregate",
"datafusion/functions-aggregate-common",
Expand Down Expand Up @@ -59,7 +60,7 @@ license = "Apache-2.0"
readme = "README.md"
repository = "https://github.com/apache/datafusion"
rust-version = "1.79"
version = "42.1.0"
version = "42.2.0"

[workspace.dependencies]
# We turn off default-features for some dependencies here so the workspaces which inherit them can
Expand Down Expand Up @@ -92,29 +93,30 @@ bytes = "1.4"
chrono = { version = "0.4.38", default-features = false }
ctor = "0.2.0"
dashmap = "6.0.1"
datafusion = { path = "datafusion/core", version = "42.1.0", default-features = false }
datafusion-catalog = { path = "datafusion/catalog", version = "42.1.0" }
datafusion-common = { path = "datafusion/common", version = "42.1.0", default-features = false }
datafusion-common-runtime = { path = "datafusion/common-runtime", version = "42.1.0" }
datafusion-execution = { path = "datafusion/execution", version = "42.1.0" }
datafusion-expr = { path = "datafusion/expr", version = "42.1.0" }
datafusion-expr-common = { path = "datafusion/expr-common", version = "42.1.0" }
datafusion-functions = { path = "datafusion/functions", version = "42.1.0" }
datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "42.1.0" }
datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "42.1.0" }
datafusion-functions-nested = { path = "datafusion/functions-nested", version = "42.1.0" }
datafusion-functions-window = { path = "datafusion/functions-window", version = "42.1.0" }
datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "42.1.0" }
datafusion-optimizer = { path = "datafusion/optimizer", version = "42.1.0", default-features = false }
datafusion-physical-expr = { path = "datafusion/physical-expr", version = "42.1.0", default-features = false }
datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "42.1.0", default-features = false }
datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "42.1.0" }
datafusion-physical-plan = { path = "datafusion/physical-plan", version = "42.1.0" }
datafusion-proto = { path = "datafusion/proto", version = "42.1.0" }
datafusion-proto-common = { path = "datafusion/proto-common", version = "42.1.0" }
datafusion-sql = { path = "datafusion/sql", version = "42.1.0" }
datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "42.1.0" }
datafusion-substrait = { path = "datafusion/substrait", version = "42.1.0" }
datafusion = { path = "datafusion/core", version = "42.2.0", default-features = false }
datafusion-catalog = { path = "datafusion/catalog", version = "42.2.0" }
datafusion-common = { path = "datafusion/common", version = "42.2.0", default-features = false }
datafusion-common-runtime = { path = "datafusion/common-runtime", version = "42.2.0" }
datafusion-execution = { path = "datafusion/execution", version = "42.2.0" }
datafusion-expr = { path = "datafusion/expr", version = "42.2.0" }
datafusion-expr-common = { path = "datafusion/expr-common", version = "42.2.0" }
datafusion-ffi = { path = "datafusion/ffi", version = "42.2.0" }
datafusion-functions = { path = "datafusion/functions", version = "42.2.0" }
datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "42.2.0" }
datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "42.2.0" }
datafusion-functions-nested = { path = "datafusion/functions-nested", version = "42.2.0" }
datafusion-functions-window = { path = "datafusion/functions-window", version = "42.2.0" }
datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "42.2.0" }
datafusion-optimizer = { path = "datafusion/optimizer", version = "42.2.0", default-features = false }
datafusion-physical-expr = { path = "datafusion/physical-expr", version = "42.2.0", default-features = false }
datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "42.2.0", default-features = false }
datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "42.2.0" }
datafusion-physical-plan = { path = "datafusion/physical-plan", version = "42.2.0" }
datafusion-proto = { path = "datafusion/proto", version = "42.2.0" }
datafusion-proto-common = { path = "datafusion/proto-common", version = "42.2.0" }
datafusion-sql = { path = "datafusion/sql", version = "42.2.0" }
datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "42.2.0" }
datafusion-substrait = { path = "datafusion/substrait", version = "42.2.0" }
doc-comment = "0.3"
env_logger = "0.11"
futures = "0.3"
Expand Down
7 changes: 1 addition & 6 deletions benchmarks/src/bin/external_aggr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,12 +193,7 @@ impl ExternalAggrConfig {
) -> Result<Vec<QueryResult>> {
let query_name =
format!("Q{query_id}({})", human_readable_size(mem_limit as usize));
let mut config = self.common.config();
config
.options_mut()
.execution
.parquet
.schema_force_view_types = self.common.force_view_types;
let config = self.common.config();
let runtime_config = RuntimeConfig::new()
.with_memory_pool(Arc::new(FairSpillPool::new(mem_limit as usize)))
.build_arc()?;
Expand Down
1 change: 0 additions & 1 deletion benchmarks/src/clickbench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ impl RunOpt {
let mut config = self.common.config();
{
let parquet_options = &mut config.options_mut().execution.parquet;
parquet_options.schema_force_view_types = self.common.force_view_types;
// The hits_partitioned dataset specifies string columns
// as binary due to how it was written. Force it to strings
parquet_options.binary_as_string = true;
Expand Down
8 changes: 1 addition & 7 deletions benchmarks/src/imdb/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,11 +305,7 @@ impl RunOpt {
.config()
.with_collect_statistics(!self.disable_statistics);
config.options_mut().optimizer.prefer_hash_join = self.prefer_hash_join;
config
.options_mut()
.execution
.parquet
.schema_force_view_types = self.common.force_view_types;

let ctx = SessionContext::new_with_config(config);

// register tables
Expand Down Expand Up @@ -517,7 +513,6 @@ mod tests {
partitions: Some(2),
batch_size: 8192,
debug: false,
force_view_types: false,
};
let opt = RunOpt {
query: Some(query),
Expand Down Expand Up @@ -551,7 +546,6 @@ mod tests {
partitions: Some(2),
batch_size: 8192,
debug: false,
force_view_types: false,
};
let opt = RunOpt {
query: Some(query),
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::util::{AccessLogOpt, BenchmarkRun, CommonOpt};

use arrow::util::pretty;
use datafusion::common::Result;
use datafusion::physical_expr::PhysicalSortExpr;
use datafusion::physical_expr::{LexOrdering, LexOrderingRef, PhysicalSortExpr};
use datafusion::physical_plan::collect;
use datafusion::physical_plan::sorts::sort::SortExec;
use datafusion::prelude::{SessionConfig, SessionContext};
Expand Down Expand Up @@ -170,13 +170,13 @@ impl RunOpt {

async fn exec_sort(
ctx: &SessionContext,
expr: &[PhysicalSortExpr],
expr: LexOrderingRef<'_>,
test_file: &TestParquetFile,
debug: bool,
) -> Result<(usize, std::time::Duration)> {
let start = Instant::now();
let scan = test_file.create_scan(ctx, None).await?;
let exec = Arc::new(SortExec::new(expr.to_owned(), scan));
let exec = Arc::new(SortExec::new(LexOrdering::new(expr.to_owned()), scan));
let task_ctx = ctx.task_ctx();
let result = collect(exec, task_ctx).await?;
let elapsed = start.elapsed();
Expand Down
7 changes: 0 additions & 7 deletions benchmarks/src/tpch/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,6 @@ impl RunOpt {
.config()
.with_collect_statistics(!self.disable_statistics);
config.options_mut().optimizer.prefer_hash_join = self.prefer_hash_join;
config
.options_mut()
.execution
.parquet
.schema_force_view_types = self.common.force_view_types;
let ctx = SessionContext::new_with_config(config);

// register tables
Expand Down Expand Up @@ -345,7 +340,6 @@ mod tests {
partitions: Some(2),
batch_size: 8192,
debug: false,
force_view_types: false,
};
let opt = RunOpt {
query: Some(query),
Expand Down Expand Up @@ -379,7 +373,6 @@ mod tests {
partitions: Some(2),
batch_size: 8192,
debug: false,
force_view_types: false,
};
let opt = RunOpt {
query: Some(query),
Expand Down
5 changes: 0 additions & 5 deletions benchmarks/src/util/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,6 @@ pub struct CommonOpt {
/// Activate debug mode to see more details
#[structopt(short, long)]
pub debug: bool,

/// If true, will use StringView/BinaryViewArray instead of String/BinaryArray
/// when reading ParquetFiles
#[structopt(long)]
pub force_view_types: bool,
}

impl CommonOpt {
Expand Down
Loading

0 comments on commit e8c0aaa

Please sign in to comment.