Skip to content

Commit 85f3578

Browse files
jayzhan211alamb
andauthored
Minor: Move hash utils to common (#7684)
* move hash utils to common Signed-off-by: jayzhan211 <[email protected]> * support backward compatibility Signed-off-by: jayzhan211 <[email protected]> --------- Signed-off-by: jayzhan211 <[email protected]> Co-authored-by: Andrew Lamb <[email protected]>
1 parent 2d6e768 commit 85f3578

File tree

9 files changed

+26
-13
lines changed

9 files changed

+26
-13
lines changed

datafusion-cli/Cargo.lock

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/common/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,14 @@ default = ["parquet"]
3939
pyarrow = ["pyo3", "arrow/pyarrow"]
4040

4141
[dependencies]
42+
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
4243
apache-avro = { version = "0.16", default-features = false, features = ["snappy"], optional = true }
4344
arrow = { workspace = true }
4445
arrow-array = { workspace = true }
46+
arrow-buffer = { workspace = true }
47+
arrow-schema = { workspace = true }
4548
chrono = { workspace = true }
49+
half = { version = "2.1", default-features = false }
4650
num_cpus = "1.13.0"
4751
object_store = { version = "0.7.0", default-features = false, optional = true }
4852
parquet = { workspace = true, optional = true }

datafusion/physical-expr/src/hash_utils.rs renamed to datafusion/common/src/hash_utils.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,19 @@
1717

1818
//! Functionality used both on logical and physical plans
1919
20+
use std::sync::Arc;
21+
2022
use ahash::RandomState;
2123
use arrow::array::*;
2224
use arrow::datatypes::*;
2325
use arrow::row::Rows;
2426
use arrow::{downcast_dictionary_array, downcast_primitive_array};
2527
use arrow_buffer::i256;
26-
use datafusion_common::{
27-
cast::{
28-
as_boolean_array, as_generic_binary_array, as_primitive_array, as_string_array,
29-
},
30-
internal_err, DataFusionError, Result,
28+
29+
use crate::cast::{
30+
as_boolean_array, as_generic_binary_array, as_primitive_array, as_string_array,
3131
};
32-
use std::sync::Arc;
32+
use crate::error::{DataFusionError, Result, _internal_err};
3333

3434
// Combines two hashes into one hash
3535
#[inline]
@@ -51,7 +51,7 @@ fn hash_null(random_state: &RandomState, hashes_buffer: &'_ mut [u64], mul_col:
5151
}
5252
}
5353

54-
pub(crate) trait HashValue {
54+
pub trait HashValue {
5555
fn hash_one(&self, state: &RandomState) -> u64;
5656
}
5757

@@ -337,7 +337,7 @@ pub fn create_hashes<'a>(
337337
}
338338
_ => {
339339
// This is internal because we should have caught this before.
340-
return internal_err!(
340+
return _internal_err!(
341341
"Unsupported data type in hasher: {}",
342342
col.data_type()
343343
);

datafusion/common/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ mod error;
2525
pub mod file_options;
2626
pub mod format;
2727
mod functional_dependencies;
28+
pub mod hash_utils;
2829
mod join_type;
2930
pub mod parsers;
3031
#[cfg(feature = "pyarrow")]

datafusion/physical-expr/src/expressions/in_list.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ use std::fmt::Debug;
2424
use std::hash::{Hash, Hasher};
2525
use std::sync::Arc;
2626

27-
use crate::hash_utils::HashValue;
2827
use crate::physical_expr::down_cast_any_ref;
2928
use crate::utils::expr_list_eq_any_order;
3029
use crate::PhysicalExpr;
@@ -37,6 +36,7 @@ use arrow::datatypes::*;
3736
use arrow::record_batch::RecordBatch;
3837
use arrow::util::bit_iterator::BitIndexIterator;
3938
use arrow::{downcast_dictionary_array, downcast_primitive_array};
39+
use datafusion_common::hash_utils::HashValue;
4040
use datafusion_common::{
4141
cast::{as_boolean_array, as_generic_binary_array, as_string_array},
4242
internal_err, not_impl_err, DataFusionError, Result, ScalarValue,

datafusion/physical-expr/src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ pub mod equivalence;
2828
pub mod execution_props;
2929
pub mod expressions;
3030
pub mod functions;
31-
pub mod hash_utils;
3231
pub mod intervals;
3332
pub mod math_expressions;
3433
mod partitioning;
@@ -49,6 +48,9 @@ pub mod utils;
4948
pub mod var_provider;
5049
pub mod window;
5150

51+
// For backwards compatibility
52+
pub use datafusion_common::hash_utils;
53+
5254
pub use aggregate::groups_accumulator::{
5355
EmitTo, GroupsAccumulator, GroupsAccumulatorAdapter,
5456
};

datafusion/physical-plan/src/aggregates/group_values/row.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ use arrow::record_batch::RecordBatch;
2222
use arrow::row::{RowConverter, Rows, SortField};
2323
use arrow_array::{Array, ArrayRef};
2424
use arrow_schema::{DataType, SchemaRef};
25+
use datafusion_common::hash_utils::create_hashes;
2526
use datafusion_common::{DataFusionError, Result};
2627
use datafusion_execution::memory_pool::proxy::{RawTableAllocExt, VecAllocExt};
27-
use datafusion_physical_expr::hash_utils::create_hashes;
2828
use datafusion_physical_expr::EmitTo;
2929
use hashbrown::raw::RawTable;
3030

datafusion/physical-plan/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -375,10 +375,11 @@ pub mod windows;
375375

376376
use crate::repartition::RepartitionExec;
377377
use crate::sorts::sort_preserving_merge::SortPreservingMergeExec;
378+
pub use datafusion_common::hash_utils;
378379
pub use datafusion_common::utils::project_schema;
379380
use datafusion_execution::TaskContext;
380381
pub use datafusion_physical_expr::{
381-
expressions, functions, hash_utils, ordering_equivalence_properties_helper, udf,
382+
expressions, functions, ordering_equivalence_properties_helper, udf,
382383
};
383384

384385
#[cfg(test)]

datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ use arrow::{
4343
datatypes::{Schema, SchemaBuilder, SchemaRef},
4444
record_batch::RecordBatch,
4545
};
46+
47+
use datafusion_common::hash_utils::create_hashes;
4648
use datafusion_common::utils::{
4749
evaluate_partition_ranges, get_arrayref_at_indices, get_at_indices,
4850
get_record_batch_at_indices, get_row_at_idx,
@@ -51,7 +53,6 @@ use datafusion_common::{exec_err, plan_err, DataFusionError, Result};
5153
use datafusion_execution::TaskContext;
5254
use datafusion_expr::window_state::{PartitionBatchState, WindowAggState};
5355
use datafusion_expr::ColumnarValue;
54-
use datafusion_physical_expr::hash_utils::create_hashes;
5556
use datafusion_physical_expr::window::{
5657
PartitionBatches, PartitionKey, PartitionWindowAggStates, WindowState,
5758
};

0 commit comments

Comments
 (0)