Skip to content

Commit 2e52580

Browse files
Introduce HashMap and HashSet type aliases (#13236)
* Unite all references to hashbrown::HashMap by using a common type definition * Replace some use of std::collections::HashMap with hashbrown::HashMap * Replace some use of std::collections::HashMap with hashbrown::HashMap * Replace some use of std::collections::HashMap with hashbrown::HashMap * Unite all references to hashbrown::HashSet by using a common type definition * Replace some use of std::collections::HashSet with hashbrown::HashSet
1 parent eeb9d58 commit 2e52580

File tree

41 files changed

+67
-79
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+67
-79
lines changed

datafusion/common/src/functional_dependencies.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,12 @@
1818
//! FunctionalDependencies keeps track of functional dependencies
1919
//! inside DFSchema.
2020
21-
use std::collections::HashSet;
2221
use std::fmt::{Display, Formatter};
2322
use std::ops::Deref;
2423
use std::vec::IntoIter;
2524

2625
use crate::utils::{merge_and_order_indices, set_difference};
27-
use crate::{DFSchema, JoinType};
26+
use crate::{DFSchema, HashSet, JoinType};
2827

2928
/// This object defines a constraint on a table.
3029
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]

datafusion/common/src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ pub use functional_dependencies::{
6666
get_target_functional_dependencies, Constraint, Constraints, Dependency,
6767
FunctionalDependence, FunctionalDependencies,
6868
};
69+
use hashbrown::hash_map::DefaultHashBuilder;
6970
pub use join_type::{JoinConstraint, JoinSide, JoinType};
7071
pub use param_value::ParamValues;
7172
pub use scalar::{ScalarType, ScalarValue};
@@ -87,6 +88,10 @@ pub use error::{
8788
_substrait_datafusion_err,
8889
};
8990

91+
// The HashMap and HashSet implementations that should be used as the uniform defaults
92+
pub type HashMap<K, V, S = DefaultHashBuilder> = hashbrown::HashMap<K, V, S>;
93+
pub type HashSet<T, S = DefaultHashBuilder> = hashbrown::HashSet<T, S>;
94+
9095
/// Downcast an Arrow Array to a concrete type, return an `DataFusionError::Internal` if the cast is
9196
/// not possible. In normal usage of DataFusion the downcast should always succeed.
9297
///

datafusion/core/src/bin/print_functions_docs.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,11 @@
1616
// under the License.
1717

1818
use datafusion::execution::SessionStateDefaults;
19-
use datafusion_common::{not_impl_err, Result};
19+
use datafusion_common::{not_impl_err, HashSet, Result};
2020
use datafusion_expr::{
2121
aggregate_doc_sections, scalar_doc_sections, window_doc_sections, AggregateUDF,
2222
DocSection, Documentation, ScalarUDF, WindowUDF,
2323
};
24-
use hashbrown::HashSet;
2524
use itertools::Itertools;
2625
use std::env::args;
2726
use std::fmt::Write as _;

datafusion/core/src/catalog_common/listing_schema.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,16 @@
1818
//! [`ListingSchemaProvider`]: [`SchemaProvider`] that scans ObjectStores for tables automatically
1919
2020
use std::any::Any;
21-
use std::collections::{HashMap, HashSet};
21+
use std::collections::HashSet;
2222
use std::path::Path;
2323
use std::sync::{Arc, Mutex};
2424

2525
use crate::catalog::{SchemaProvider, TableProvider, TableProviderFactory};
2626
use crate::execution::context::SessionState;
2727

28-
use datafusion_common::{Constraints, DFSchema, DataFusionError, TableReference};
28+
use datafusion_common::{
29+
Constraints, DFSchema, DataFusionError, HashMap, TableReference,
30+
};
2931
use datafusion_expr::CreateExternalTable;
3032

3133
use async_trait::async_trait;

datafusion/core/src/datasource/file_format/parquet.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ use datafusion_physical_plan::metrics::MetricsSet;
6363

6464
use async_trait::async_trait;
6565
use bytes::Bytes;
66-
use hashbrown::HashMap;
66+
use datafusion_common::HashMap;
6767
use log::debug;
6868
use object_store::buffered::BufWriter;
6969
use parquet::arrow::arrow_writer::{

datafusion/core/src/datasource/listing/helpers.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,14 @@
1717

1818
//! Helper functions for the table implementation
1919
20-
use std::collections::HashMap;
2120
use std::mem;
2221
use std::sync::Arc;
2322

2423
use super::ListingTableUrl;
2524
use super::PartitionedFile;
2625
use crate::execution::context::SessionState;
2726
use datafusion_common::internal_err;
28-
use datafusion_common::{Result, ScalarValue};
27+
use datafusion_common::{HashMap, Result, ScalarValue};
2928
use datafusion_expr::{BinaryExpr, Operator};
3029

3130
use arrow::{

datafusion/core/src/physical_optimizer/sort_pushdown.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use crate::physical_plan::{ExecutionPlan, ExecutionPlanProperties};
3232
use datafusion_common::tree_node::{
3333
ConcreteTreeNode, Transformed, TreeNode, TreeNodeRecursion,
3434
};
35-
use datafusion_common::{plan_err, JoinSide, Result};
35+
use datafusion_common::{plan_err, HashSet, JoinSide, Result};
3636
use datafusion_expr::JoinType;
3737
use datafusion_physical_expr::expressions::Column;
3838
use datafusion_physical_expr::utils::collect_columns;
@@ -41,8 +41,6 @@ use datafusion_physical_expr_common::sort_expr::{
4141
LexOrdering, LexOrderingRef, LexRequirement,
4242
};
4343

44-
use hashbrown::HashSet;
45-
4644
/// This is a "data class" we use within the [`EnforceSorting`] rule to push
4745
/// down [`SortExec`] in the plan. In some cases, we can reduce the total
4846
/// computational cost by pushing down `SortExec`s through some executors. The

datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ use test_utils::{add_empty_batches, StringBatchGenerator};
4242
use crate::fuzz_cases::aggregation_fuzzer::{
4343
AggregationFuzzerBuilder, ColumnDescr, DatasetGeneratorConfig, QueryBuilder,
4444
};
45+
use datafusion_common::HashMap;
4546
use datafusion_physical_expr_common::sort_expr::LexOrdering;
46-
use hashbrown::HashMap;
4747
use rand::rngs::StdRng;
4848
use rand::{Rng, SeedableRng};
4949
use tokio::task::JoinSet;

datafusion/core/tests/fuzz_cases/window_fuzz.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,10 @@ use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
4545
use test_utils::add_empty_batches;
4646

4747
use datafusion::functions_window::row_number::row_number_udwf;
48+
use datafusion_common::HashMap;
4849
use datafusion_functions_window::lead_lag::{lag_udwf, lead_udwf};
4950
use datafusion_functions_window::rank::{dense_rank_udwf, rank_udwf};
5051
use datafusion_physical_expr_common::sort_expr::LexOrdering;
51-
use hashbrown::HashMap;
5252
use rand::distributions::Alphanumeric;
5353
use rand::rngs::StdRng;
5454
use rand::{Rng, SeedableRng};

datafusion/core/tests/user_defined/user_defined_scalar_functions.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
// under the License.
1717

1818
use std::any::Any;
19-
use std::collections::HashMap;
2019
use std::hash::{DefaultHasher, Hash, Hasher};
2120
use std::sync::Arc;
2221

@@ -39,7 +38,8 @@ use datafusion_common::cast::{as_float64_array, as_int32_array};
3938
use datafusion_common::tree_node::{Transformed, TreeNode};
4039
use datafusion_common::{
4140
assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_err, internal_err,
42-
not_impl_err, plan_err, DFSchema, DataFusionError, ExprSchema, Result, ScalarValue,
41+
not_impl_err, plan_err, DFSchema, DataFusionError, ExprSchema, HashMap, Result,
42+
ScalarValue,
4343
};
4444
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
4545
use datafusion_expr::{

datafusion/execution/src/memory_pool/pool.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
// under the License.
1717

1818
use crate::memory_pool::{MemoryConsumer, MemoryPool, MemoryReservation};
19+
use datafusion_common::HashMap;
1920
use datafusion_common::{resources_datafusion_err, DataFusionError, Result};
20-
use hashbrown::HashMap;
2121
use log::debug;
2222
use parking_lot::Mutex;
2323
use std::{

datafusion/expr/src/conditional_expressions.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@
1919
use crate::expr::Case;
2020
use crate::{expr_schema::ExprSchemable, Expr};
2121
use arrow::datatypes::DataType;
22-
use datafusion_common::{plan_err, DFSchema, Result};
23-
use std::collections::HashSet;
22+
use datafusion_common::{plan_err, DFSchema, HashSet, Result};
2423

2524
/// Helper struct for building [Expr::Case]
2625
pub struct CaseBuilder {

datafusion/expr/src/execution_props.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
use crate::var_provider::{VarProvider, VarType};
1919
use chrono::{DateTime, TimeZone, Utc};
2020
use datafusion_common::alias::AliasGenerator;
21-
use std::collections::HashMap;
21+
use datafusion_common::HashMap;
2222
use std::sync::Arc;
2323

2424
/// Holds per-query execution properties and data (such as statement

datafusion/expr/src/expr.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
//! Logical Expressions: [`Expr`]
1919
20-
use std::collections::{HashMap, HashSet};
20+
use std::collections::HashSet;
2121
use std::fmt::{self, Display, Formatter, Write};
2222
use std::hash::{Hash, Hasher};
2323
use std::mem;
@@ -39,7 +39,7 @@ use datafusion_common::tree_node::{
3939
Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
4040
};
4141
use datafusion_common::{
42-
plan_err, Column, DFSchema, Result, ScalarValue, TableReference,
42+
plan_err, Column, DFSchema, HashMap, Result, ScalarValue, TableReference,
4343
};
4444
use datafusion_functions_window_common::field::WindowUDFFieldArgs;
4545
use sqlparser::ast::{

datafusion/expr/src/registry.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
use crate::expr_rewriter::FunctionRewrite;
2121
use crate::planner::ExprPlanner;
2222
use crate::{AggregateUDF, ScalarUDF, UserDefinedLogicalNode, WindowUDF};
23-
use datafusion_common::{not_impl_err, plan_datafusion_err, Result};
24-
use std::collections::{HashMap, HashSet};
23+
use datafusion_common::{not_impl_err, plan_datafusion_err, HashMap, Result};
24+
use std::collections::HashSet;
2525
use std::fmt::Debug;
2626
use std::sync::Arc;
2727

datafusion/expr/src/utils.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
//! Expression utilities
1919
2020
use std::cmp::Ordering;
21-
use std::collections::{HashMap, HashSet};
21+
use std::collections::HashSet;
2222
use std::ops::Deref;
2323
use std::sync::Arc;
2424

@@ -36,7 +36,7 @@ use datafusion_common::tree_node::{
3636
use datafusion_common::utils::get_at_indices;
3737
use datafusion_common::{
3838
internal_err, plan_datafusion_err, plan_err, Column, DFSchema, DFSchemaRef,
39-
DataFusionError, Result, TableReference,
39+
DataFusionError, HashMap, Result, TableReference,
4040
};
4141

4242
use indexmap::IndexSet;

datafusion/functions-aggregate/src/median.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use std::collections::HashSet;
1918
use std::fmt::{Debug, Formatter};
2019
use std::mem::{size_of, size_of_val};
2120
use std::sync::{Arc, OnceLock};
@@ -33,7 +32,7 @@ use arrow::array::Array;
3332
use arrow::array::ArrowNativeTypeOp;
3433
use arrow::datatypes::ArrowNativeType;
3534

36-
use datafusion_common::{DataFusionError, Result, ScalarValue};
35+
use datafusion_common::{DataFusionError, HashSet, Result, ScalarValue};
3736
use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
3837
use datafusion_expr::function::StateFieldsArgs;
3938
use datafusion_expr::{

datafusion/functions-aggregate/src/regr.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,10 @@ use arrow::{
2424
datatypes::DataType,
2525
datatypes::Field,
2626
};
27-
use datafusion_common::{downcast_value, plan_err, unwrap_or_internal_err, ScalarValue};
28-
use datafusion_common::{DataFusionError, Result};
27+
use datafusion_common::{
28+
downcast_value, plan_err, unwrap_or_internal_err, DataFusionError, HashMap, Result,
29+
ScalarValue,
30+
};
2931
use datafusion_expr::aggregate_doc_sections::DOC_SECTION_STATISTICAL;
3032
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
3133
use datafusion_expr::type_coercion::aggregates::NUMERICS;
@@ -34,7 +36,6 @@ use datafusion_expr::{
3436
Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
3537
};
3638
use std::any::Any;
37-
use std::collections::HashMap;
3839
use std::fmt::Debug;
3940
use std::mem::size_of_val;
4041
use std::sync::OnceLock;

datafusion/functions-nested/src/except.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,12 @@ use arrow_array::cast::AsArray;
2323
use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
2424
use arrow_buffer::OffsetBuffer;
2525
use arrow_schema::{DataType, FieldRef};
26-
use datafusion_common::{exec_err, internal_err, Result};
26+
use datafusion_common::{exec_err, internal_err, HashSet, Result};
2727
use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
2828
use datafusion_expr::{
2929
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
3030
};
3131
use std::any::Any;
32-
use std::collections::HashSet;
3332
use std::sync::{Arc, OnceLock};
3433

3534
make_udf_expr_and_func!(

datafusion/functions-nested/src/map.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use std::any::Any;
19-
use std::collections::{HashSet, VecDeque};
19+
use std::collections::VecDeque;
2020
use std::sync::{Arc, OnceLock};
2121

2222
use arrow::array::ArrayData;
@@ -25,7 +25,7 @@ use arrow_buffer::{Buffer, ToByteSlice};
2525
use arrow_schema::{DataType, Field, SchemaBuilder};
2626

2727
use datafusion_common::utils::{fixed_size_list_to_arrays, list_to_arrays};
28-
use datafusion_common::{exec_err, Result, ScalarValue};
28+
use datafusion_common::{exec_err, HashSet, Result, ScalarValue};
2929
use datafusion_expr::expr::ScalarFunction;
3030
use datafusion_expr::scalar_doc_sections::DOC_SECTION_MAP;
3131
use datafusion_expr::{

datafusion/functions/src/core/named_struct.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,10 @@
1717

1818
use arrow::array::StructArray;
1919
use arrow::datatypes::{DataType, Field, Fields};
20-
use datafusion_common::{exec_err, internal_err, Result, ScalarValue};
20+
use datafusion_common::{exec_err, internal_err, HashSet, Result, ScalarValue};
2121
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRUCT;
2222
use datafusion_expr::{ColumnarValue, Documentation, Expr, ExprSchemable};
2323
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
24-
use hashbrown::HashSet;
2524
use std::any::Any;
2625
use std::sync::{Arc, OnceLock};
2726

datafusion/functions/src/unicode/translate.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use arrow::array::{
2222
ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray, OffsetSizeTrait,
2323
};
2424
use arrow::datatypes::DataType;
25-
use hashbrown::HashMap;
25+
use datafusion_common::HashMap;
2626
use unicode_segmentation::UnicodeSegmentation;
2727

2828
use crate::utils::{make_scalar_function, utf8_to_str_type};

datafusion/optimizer/src/decorrelate.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
//! [`PullUpCorrelatedExpr`] converts correlated subqueries to `Joins`
1919
20-
use std::collections::{BTreeSet, HashMap};
20+
use std::collections::BTreeSet;
2121
use std::ops::Deref;
2222
use std::sync::Arc;
2323

@@ -27,7 +27,7 @@ use crate::utils::collect_subquery_cols;
2727
use datafusion_common::tree_node::{
2828
Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter,
2929
};
30-
use datafusion_common::{plan_err, Column, DFSchemaRef, Result, ScalarValue};
30+
use datafusion_common::{plan_err, Column, DFSchemaRef, HashMap, Result, ScalarValue};
3131
use datafusion_expr::expr::Alias;
3232
use datafusion_expr::simplify::SimplifyContext;
3333
use datafusion_expr::utils::{conjunction, find_join_exprs, split_conjunction};

datafusion/optimizer/src/optimize_projections/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@
1919
2020
mod required_indices;
2121

22-
use std::collections::{HashMap, HashSet};
22+
use std::collections::HashSet;
2323
use std::sync::Arc;
2424

2525
use crate::optimizer::ApplyOrder;
2626
use crate::{OptimizerConfig, OptimizerRule};
2727

2828
use datafusion_common::{
2929
get_required_group_by_exprs_indices, internal_datafusion_err, internal_err, Column,
30-
JoinType, Result,
30+
HashMap, JoinType, Result,
3131
};
3232
use datafusion_expr::expr::Alias;
3333
use datafusion_expr::Unnest;

datafusion/optimizer/src/optimizer.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
//! [`Optimizer`] and [`OptimizerRule`]
1919
20-
use std::collections::HashSet;
2120
use std::fmt::Debug;
2221
use std::sync::Arc;
2322

@@ -29,7 +28,7 @@ use datafusion_common::alias::AliasGenerator;
2928
use datafusion_common::config::ConfigOptions;
3029
use datafusion_common::instant::Instant;
3130
use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
32-
use datafusion_common::{internal_err, DFSchema, DataFusionError, Result};
31+
use datafusion_common::{internal_err, DFSchema, DataFusionError, HashSet, Result};
3332
use datafusion_expr::logical_plan::LogicalPlan;
3433

3534
use crate::common_subexpr_eliminate::CommonSubexprEliminate;

datafusion/optimizer/src/single_distinct_to_groupby.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@ use std::sync::Arc;
2222
use crate::optimizer::ApplyOrder;
2323
use crate::{OptimizerConfig, OptimizerRule};
2424

25-
use datafusion_common::{internal_err, tree_node::Transformed, DataFusionError, Result};
25+
use datafusion_common::{
26+
internal_err, tree_node::Transformed, DataFusionError, HashSet, Result,
27+
};
2628
use datafusion_expr::builder::project;
2729
use datafusion_expr::{
2830
col,
@@ -31,8 +33,6 @@ use datafusion_expr::{
3133
Expr,
3234
};
3335

34-
use hashbrown::HashSet;
35-
3636
/// single distinct to group by optimizer rule
3737
/// ```text
3838
/// Before:

0 commit comments

Comments
 (0)