Skip to content

Commit 13a4225

Browse files
goldmedalalamb
andauthored
Introduce binary_as_string parquet option, upgrade to arrow/parquet 53.2.0 (#12816)
* Update to arrow-rs 53.2.0 * introduce binary_as_string parquet option * Fix test --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 6a3c0b0 commit 13a4225

File tree

18 files changed

+581
-244
lines changed

18 files changed

+581
-244
lines changed

Cargo.toml

+9-9
Original file line numberDiff line numberDiff line change
@@ -70,22 +70,22 @@ version = "42.1.0"
7070
ahash = { version = "0.8", default-features = false, features = [
7171
"runtime-rng",
7272
] }
73-
arrow = { version = "53.1.0", features = [
73+
arrow = { version = "53.2.0", features = [
7474
"prettyprint",
7575
] }
76-
arrow-array = { version = "53.1.0", default-features = false, features = [
76+
arrow-array = { version = "53.2.0", default-features = false, features = [
7777
"chrono-tz",
7878
] }
79-
arrow-buffer = { version = "53.1.0", default-features = false }
80-
arrow-flight = { version = "53.1.0", features = [
79+
arrow-buffer = { version = "53.2.0", default-features = false }
80+
arrow-flight = { version = "53.2.0", features = [
8181
"flight-sql-experimental",
8282
] }
83-
arrow-ipc = { version = "53.1.0", default-features = false, features = [
83+
arrow-ipc = { version = "53.2.0", default-features = false, features = [
8484
"lz4",
8585
] }
86-
arrow-ord = { version = "53.1.0", default-features = false }
87-
arrow-schema = { version = "53.1.0", default-features = false }
88-
arrow-string = { version = "53.1.0", default-features = false }
86+
arrow-ord = { version = "53.2.0", default-features = false }
87+
arrow-schema = { version = "53.2.0", default-features = false }
88+
arrow-string = { version = "53.2.0", default-features = false }
8989
async-trait = "0.1.73"
9090
bigdecimal = "=0.4.1"
9191
bytes = "1.4"
@@ -126,7 +126,7 @@ log = "^0.4"
126126
num_cpus = "1.13.0"
127127
object_store = { version = "0.11.0", default-features = false }
128128
parking_lot = "0.12"
129-
parquet = { version = "53.1.0", default-features = false, features = [
129+
parquet = { version = "53.2.0", default-features = false, features = [
130130
"arrow",
131131
"async",
132132
"object_store",

benchmarks/src/clickbench.rs

+9-6
Original file line numberDiff line numberDiff line change
@@ -115,12 +115,15 @@ impl RunOpt {
115115
None => queries.min_query_id()..=queries.max_query_id(),
116116
};
117117

118+
// configure parquet options
118119
let mut config = self.common.config();
119-
config
120-
.options_mut()
121-
.execution
122-
.parquet
123-
.schema_force_view_types = self.common.force_view_types;
120+
{
121+
let parquet_options = &mut config.options_mut().execution.parquet;
122+
parquet_options.schema_force_view_types = self.common.force_view_types;
123+
// The hits_partitioned dataset specifies string columns
124+
// as binary due to how it was written. Force it to strings
125+
parquet_options.binary_as_string = true;
126+
}
124127

125128
let ctx = SessionContext::new_with_config(config);
126129
self.register_hits(&ctx).await?;
@@ -148,7 +151,7 @@ impl RunOpt {
148151
Ok(())
149152
}
150153

151-
/// Registrs the `hits.parquet` as a table named `hits`
154+
/// Registers the `hits.parquet` as a table named `hits`
152155
async fn register_hits(&self, ctx: &SessionContext) -> Result<()> {
153156
let options = Default::default();
154157
let path = self.path.as_os_str().to_str().unwrap();

0 commit comments

Comments
 (0)