Skip to content

Commit 3104474

Browse files
committed
Add JSON writer benchmarks (#5314)
1 parent 639e81e commit 3104474

File tree

2 files changed

+193
-0
lines changed

2 files changed

+193
-0
lines changed

arrow/Cargo.toml

+5
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,11 @@ name = "json_reader"
195195
harness = false
196196
required-features = ["test_utils", "json"]
197197

198+
[[bench]]
199+
name = "json_writer"
200+
harness = false
201+
required-features = ["test_utils", "json"]
202+
198203
[[bench]]
199204
name = "equal"
200205
harness = false

arrow/benches/json_writer.rs

+188
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use criterion::*;
19+
20+
use arrow::datatypes::*;
21+
use arrow::util::bench_util::{
22+
create_primitive_array, create_string_array, create_string_array_with_len,
23+
create_string_dict_array,
24+
};
25+
use arrow::util::test_util::seedable_rng;
26+
use arrow_array::{Array, ListArray, RecordBatch, StructArray};
27+
use arrow_buffer::{BooleanBuffer, NullBuffer, OffsetBuffer};
28+
use arrow_json::LineDelimitedWriter;
29+
use rand::Rng;
30+
use std::sync::Arc;
31+
32+
const NUM_ROWS: usize = 65536;
33+
34+
fn do_bench(c: &mut Criterion, name: &str, batch: &RecordBatch) {
35+
c.bench_function(name, |b| {
36+
b.iter(|| {
37+
let mut out = Vec::with_capacity(1024);
38+
LineDelimitedWriter::new(&mut out).write(&batch).unwrap();
39+
out
40+
})
41+
});
42+
}
43+
44+
fn create_mixed(len: usize) -> RecordBatch {
45+
let c1 = Arc::new(create_string_array::<i32>(len, 0.));
46+
let c2 = Arc::new(create_primitive_array::<Int32Type>(len, 0.));
47+
let c3 = Arc::new(create_primitive_array::<UInt32Type>(len, 0.));
48+
let c4 = Arc::new(create_string_array_with_len::<i32>(len, 0.2, 10));
49+
let c5 = Arc::new(create_string_array_with_len::<i32>(len, 0.2, 20));
50+
let c6 = Arc::new(create_primitive_array::<Float32Type>(len, 0.2));
51+
RecordBatch::try_from_iter([
52+
("c1", c1 as _),
53+
("c2", c2 as _),
54+
("c3", c3 as _),
55+
("c4", c4 as _),
56+
("c5", c5 as _),
57+
("c6", c6 as _),
58+
])
59+
.unwrap()
60+
}
61+
62+
fn create_nulls(len: usize) -> NullBuffer {
63+
let mut rng = seedable_rng();
64+
BooleanBuffer::from_iter((0..len).map(|_| rng.gen_bool(0.2))).into()
65+
}
66+
67+
fn create_offsets(len: usize) -> (usize, OffsetBuffer<i32>) {
68+
let mut rng = seedable_rng();
69+
let mut last_offset = 0;
70+
let mut offsets = Vec::with_capacity(len + 1);
71+
offsets.push(0);
72+
for _ in 0..len {
73+
let len = rng.gen_range(0..10);
74+
offsets.push(last_offset + len);
75+
last_offset += len;
76+
}
77+
(
78+
*offsets.last().unwrap() as _,
79+
OffsetBuffer::new(offsets.into()),
80+
)
81+
}
82+
83+
fn create_nullable_struct(len: usize) -> StructArray {
84+
let c2 = StructArray::from(create_mixed(len));
85+
StructArray::new(
86+
c2.fields().clone(),
87+
c2.columns().to_vec(),
88+
Some(create_nulls(c2.len())),
89+
)
90+
}
91+
92+
fn bench_primitive(c: &mut Criterion) {
93+
let c1 = Arc::new(create_string_array::<i32>(NUM_ROWS, 0.));
94+
let c2 = Arc::new(create_primitive_array::<Int32Type>(NUM_ROWS, 0.));
95+
let c3 = Arc::new(create_primitive_array::<UInt32Type>(NUM_ROWS, 0.));
96+
97+
let batch =
98+
RecordBatch::try_from_iter([("c1", c1 as _), ("c2", c2 as _), ("c3", c3 as _)]).unwrap();
99+
100+
do_bench(c, "bench_primitive", &batch)
101+
}
102+
103+
fn bench_mixed(c: &mut Criterion) {
104+
let batch = create_mixed(NUM_ROWS);
105+
do_bench(c, "bench_mixed", &batch)
106+
}
107+
108+
fn bench_dict_array(c: &mut Criterion) {
109+
let c1 = Arc::new(create_string_array::<i32>(NUM_ROWS, 0.));
110+
let c2 = Arc::new(create_string_dict_array::<Int32Type>(NUM_ROWS, 0., 20));
111+
let c3 = Arc::new(create_string_dict_array::<Int32Type>(NUM_ROWS, 0.1, 20));
112+
113+
let batch =
114+
RecordBatch::try_from_iter([("c1", c1 as _), ("c2", c2 as _), ("c3", c3 as _)]).unwrap();
115+
116+
do_bench(c, "bench_dict_array", &batch)
117+
}
118+
119+
fn bench_string_array(c: &mut Criterion) {
120+
let c1 = Arc::new(create_string_array::<i32>(NUM_ROWS, 0.));
121+
let c2 = Arc::new(create_string_dict_array::<Int32Type>(NUM_ROWS, 0., 20));
122+
let c3 = Arc::new(create_string_dict_array::<Int32Type>(NUM_ROWS, 0.1, 20));
123+
124+
let batch =
125+
RecordBatch::try_from_iter([("c1", c1 as _), ("c2", c2 as _), ("c3", c3 as _)]).unwrap();
126+
127+
do_bench(c, "bench_dict_array", &batch)
128+
}
129+
130+
fn bench_struct(c: &mut Criterion) {
131+
let c1 = Arc::new(create_string_array::<i32>(NUM_ROWS, 0.));
132+
let c2 = Arc::new(StructArray::from(create_mixed(NUM_ROWS)));
133+
let batch = RecordBatch::try_from_iter([("c1", c1 as _), ("c2", c2 as _)]).unwrap();
134+
135+
do_bench(c, "bench_struct", &batch)
136+
}
137+
138+
fn bench_nullable_struct(c: &mut Criterion) {
139+
let c1 = Arc::new(create_string_array::<i32>(NUM_ROWS, 0.));
140+
let c2 = Arc::new(create_nullable_struct(NUM_ROWS));
141+
let batch = RecordBatch::try_from_iter([("c1", c1 as _), ("c2", c2 as _)]).unwrap();
142+
143+
do_bench(c, "bench_nullable_struct", &batch)
144+
}
145+
146+
fn bench_list(c: &mut Criterion) {
147+
let (values_len, offsets) = create_offsets(NUM_ROWS);
148+
let c1_values = Arc::new(create_string_array::<i32>(values_len, 0.));
149+
let c1_field = Arc::new(Field::new_list_field(c1_values.data_type().clone(), false));
150+
let c1 = Arc::new(ListArray::new(c1_field, offsets, c1_values, None));
151+
let batch = RecordBatch::try_from_iter([("c1", c1 as _)]).unwrap();
152+
do_bench(c, "bench_list", &batch)
153+
}
154+
155+
fn bench_nullable_list(c: &mut Criterion) {
156+
let (values_len, offsets) = create_offsets(NUM_ROWS);
157+
let c1_values = Arc::new(create_string_array::<i32>(values_len, 0.1));
158+
let c1_field = Arc::new(Field::new_list_field(c1_values.data_type().clone(), true));
159+
let c1_nulls = create_nulls(NUM_ROWS);
160+
let c1 = Arc::new(ListArray::new(c1_field, offsets, c1_values, Some(c1_nulls)));
161+
let batch = RecordBatch::try_from_iter([("c1", c1 as _)]).unwrap();
162+
do_bench(c, "bench_nullable_list", &batch)
163+
}
164+
165+
fn bench_struct_list(c: &mut Criterion) {
166+
let (values_len, offsets) = create_offsets(NUM_ROWS);
167+
let c1_values = Arc::new(create_nullable_struct(values_len));
168+
let c1_field = Arc::new(Field::new_list_field(c1_values.data_type().clone(), true));
169+
let c1_nulls = create_nulls(NUM_ROWS);
170+
let c1 = Arc::new(ListArray::new(c1_field, offsets, c1_values, Some(c1_nulls)));
171+
let batch = RecordBatch::try_from_iter([("c1", c1 as _)]).unwrap();
172+
do_bench(c, "bench_struct_list", &batch)
173+
}
174+
175+
fn criterion_benchmark(c: &mut Criterion) {
176+
bench_primitive(c);
177+
bench_string_array(c);
178+
bench_mixed(c);
179+
bench_dict_array(c);
180+
bench_struct(c);
181+
bench_nullable_struct(c);
182+
bench_list(c);
183+
bench_nullable_list(c);
184+
bench_struct_list(c);
185+
}
186+
187+
criterion_group!(benches, criterion_benchmark);
188+
criterion_main!(benches);

0 commit comments

Comments
 (0)