Skip to content

Commit

Permalink
Minor: Move hash utils to common (#7684)
Browse files Browse the repository at this point in the history
* move hash utils to common

Signed-off-by: jayzhan211 <[email protected]>

* support backward compatibility

Signed-off-by: jayzhan211 <[email protected]>

---------

Signed-off-by: jayzhan211 <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
jayzhan211 and alamb committed Sep 29, 2023
1 parent 2d6e768 commit 85f3578
Show file tree
Hide file tree
Showing 9 changed files with 26 additions and 13 deletions.
4 changes: 4 additions & 0 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,14 @@ default = ["parquet"]
pyarrow = ["pyo3", "arrow/pyarrow"]

[dependencies]
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
apache-avro = { version = "0.16", default-features = false, features = ["snappy"], optional = true }
arrow = { workspace = true }
arrow-array = { workspace = true }
arrow-buffer = { workspace = true }
arrow-schema = { workspace = true }
chrono = { workspace = true }
half = { version = "2.1", default-features = false }
num_cpus = "1.13.0"
object_store = { version = "0.7.0", default-features = false, optional = true }
parquet = { workspace = true, optional = true }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,19 @@

//! Functionality used both on logical and physical plans

use std::sync::Arc;

use ahash::RandomState;
use arrow::array::*;
use arrow::datatypes::*;
use arrow::row::Rows;
use arrow::{downcast_dictionary_array, downcast_primitive_array};
use arrow_buffer::i256;
use datafusion_common::{
cast::{
as_boolean_array, as_generic_binary_array, as_primitive_array, as_string_array,
},
internal_err, DataFusionError, Result,

use crate::cast::{
as_boolean_array, as_generic_binary_array, as_primitive_array, as_string_array,
};
use std::sync::Arc;
use crate::error::{DataFusionError, Result, _internal_err};

// Combines two hashes into one hash
#[inline]
Expand All @@ -51,7 +51,7 @@ fn hash_null(random_state: &RandomState, hashes_buffer: &'_ mut [u64], mul_col:
}
}

pub(crate) trait HashValue {
pub trait HashValue {
fn hash_one(&self, state: &RandomState) -> u64;
}

Expand Down Expand Up @@ -337,7 +337,7 @@ pub fn create_hashes<'a>(
}
_ => {
// This is internal because we should have caught this before.
return internal_err!(
return _internal_err!(
"Unsupported data type in hasher: {}",
col.data_type()
);
Expand Down
1 change: 1 addition & 0 deletions datafusion/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ mod error;
pub mod file_options;
pub mod format;
mod functional_dependencies;
pub mod hash_utils;
mod join_type;
pub mod parsers;
#[cfg(feature = "pyarrow")]
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-expr/src/expressions/in_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ use std::fmt::Debug;
use std::hash::{Hash, Hasher};
use std::sync::Arc;

use crate::hash_utils::HashValue;
use crate::physical_expr::down_cast_any_ref;
use crate::utils::expr_list_eq_any_order;
use crate::PhysicalExpr;
Expand All @@ -37,6 +36,7 @@ use arrow::datatypes::*;
use arrow::record_batch::RecordBatch;
use arrow::util::bit_iterator::BitIndexIterator;
use arrow::{downcast_dictionary_array, downcast_primitive_array};
use datafusion_common::hash_utils::HashValue;
use datafusion_common::{
cast::{as_boolean_array, as_generic_binary_array, as_string_array},
internal_err, not_impl_err, DataFusionError, Result, ScalarValue,
Expand Down
4 changes: 3 additions & 1 deletion datafusion/physical-expr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ pub mod equivalence;
pub mod execution_props;
pub mod expressions;
pub mod functions;
pub mod hash_utils;
pub mod intervals;
pub mod math_expressions;
mod partitioning;
Expand All @@ -49,6 +48,9 @@ pub mod utils;
pub mod var_provider;
pub mod window;

// For backwards compatibility
pub use datafusion_common::hash_utils;

pub use aggregate::groups_accumulator::{
EmitTo, GroupsAccumulator, GroupsAccumulatorAdapter,
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ use arrow::record_batch::RecordBatch;
use arrow::row::{RowConverter, Rows, SortField};
use arrow_array::{Array, ArrayRef};
use arrow_schema::{DataType, SchemaRef};
use datafusion_common::hash_utils::create_hashes;
use datafusion_common::{DataFusionError, Result};
use datafusion_execution::memory_pool::proxy::{RawTableAllocExt, VecAllocExt};
use datafusion_physical_expr::hash_utils::create_hashes;
use datafusion_physical_expr::EmitTo;
use hashbrown::raw::RawTable;

Expand Down
3 changes: 2 additions & 1 deletion datafusion/physical-plan/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -375,10 +375,11 @@ pub mod windows;

use crate::repartition::RepartitionExec;
use crate::sorts::sort_preserving_merge::SortPreservingMergeExec;
pub use datafusion_common::hash_utils;
pub use datafusion_common::utils::project_schema;
use datafusion_execution::TaskContext;
pub use datafusion_physical_expr::{
expressions, functions, hash_utils, ordering_equivalence_properties_helper, udf,
expressions, functions, ordering_equivalence_properties_helper, udf,
};

#[cfg(test)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ use arrow::{
datatypes::{Schema, SchemaBuilder, SchemaRef},
record_batch::RecordBatch,
};

use datafusion_common::hash_utils::create_hashes;
use datafusion_common::utils::{
evaluate_partition_ranges, get_arrayref_at_indices, get_at_indices,
get_record_batch_at_indices, get_row_at_idx,
Expand All @@ -51,7 +53,6 @@ use datafusion_common::{exec_err, plan_err, DataFusionError, Result};
use datafusion_execution::TaskContext;
use datafusion_expr::window_state::{PartitionBatchState, WindowAggState};
use datafusion_expr::ColumnarValue;
use datafusion_physical_expr::hash_utils::create_hashes;
use datafusion_physical_expr::window::{
PartitionBatches, PartitionKey, PartitionWindowAggStates, WindowState,
};
Expand Down

0 comments on commit 85f3578

Please sign in to comment.