@@ -23,15 +23,20 @@ use std::sync::Arc;
23
23
24
24
use arrow:: ipc:: reader:: FileReader ;
25
25
use arrow:: util:: pretty;
26
+ use datafusion:: datasource:: physical_plan:: parquet:: Parquet7FileReaderFactory ;
27
+ use datafusion:: execution:: cache:: cache_unit:: Cache37 ;
28
+ use datafusion:: execution:: object_store:: ObjectStoreUrl ;
26
29
use datafusion:: physical_plan:: collect;
27
30
use datafusion:: physical_plan:: display:: DisplayableExecutionPlan ;
31
+ use datafusion:: prelude:: ParquetReadOptions ;
28
32
use datafusion:: {
29
33
error:: { DataFusionError , Result } ,
30
34
prelude:: SessionContext ,
31
35
} ;
32
36
use datafusion_common:: exec_datafusion_err;
33
37
use datafusion_common:: instant:: Instant ;
34
38
use object_store:: aws:: AmazonS3Builder ;
39
+ use object_store:: ObjectStore ;
35
40
use parquet:: arrow:: builder:: ArrowArrayCache ;
36
41
use structopt:: StructOpt ;
37
42
use url:: Url ;
@@ -89,6 +94,10 @@ pub struct RunOpt {
89
94
/// Check the answers against the stored answers
90
95
#[ structopt( long) ]
91
96
skip_answers : bool ,
97
+
98
+ /// Generate a flamegraph
99
+ #[ structopt( parse( from_os_str) , long) ]
100
+ flamegraph : Option < PathBuf > ,
92
101
}
93
102
94
103
struct AllQueries {
@@ -140,6 +149,7 @@ impl AllQueries {
140
149
self . queries . len ( ) - 1
141
150
}
142
151
}
152
+
143
153
impl RunOpt {
144
154
pub async fn run ( self ) -> Result < ( ) > {
145
155
println ! ( "Running benchmarks with the following options: {self:?}" ) ;
@@ -168,8 +178,18 @@ impl RunOpt {
168
178
println ! ( "Q{query_id}: {sql}" ) ;
169
179
170
180
for i in 0 ..iterations {
181
+ let profiler_guard = if self . flamegraph . is_some ( ) && i == iterations - 1 {
182
+ Some (
183
+ pprof:: ProfilerGuardBuilder :: default ( )
184
+ . frequency ( 1000 )
185
+ . blocklist ( & [ "libc" , "libgcc" , "pthread" , "vdso" ] )
186
+ . build ( )
187
+ . unwrap ( ) ,
188
+ )
189
+ } else {
190
+ None
191
+ } ;
171
192
let start = Instant :: now ( ) ;
172
- // let results = ctx.sql(sql).await?.collect().await?;
173
193
let plan = ctx. sql ( sql) . await ?;
174
194
let ( state, plan) = plan. into_parts ( ) ;
175
195
@@ -252,28 +272,50 @@ impl RunOpt {
252
272
println ! ( "Query {} iteration {} answer not checked" , query_id, i) ;
253
273
}
254
274
275
+ if let Some ( guard) = profiler_guard {
276
+ let flamegraph_path = self . flamegraph . as_ref ( ) . unwrap ( ) ;
277
+ if let Ok ( report) = guard. report ( ) . build ( ) {
278
+ let file = File :: create ( flamegraph_path) . unwrap ( ) ;
279
+ report. flamegraph ( file) . unwrap ( ) ;
280
+ }
281
+ }
282
+
255
283
benchmark_run. write_iter ( elapsed, row_count) ;
256
284
}
257
285
}
286
+
258
287
benchmark_run. set_cache_stats ( ArrowArrayCache :: get ( ) . stats ( ) ) ;
288
+ benchmark_run. set_parquet_cache_size ( Cache37 :: memory_usage ( ) ) ;
259
289
benchmark_run. maybe_write_json ( self . output_path . as_ref ( ) ) ?;
260
290
Ok ( ( ) )
261
291
}
262
292
263
293
/// Registrs the `hits.parquet` as a table named `hits`
264
294
async fn register_hits ( & self , ctx : & SessionContext ) -> Result < ( ) > {
265
- let options = Default :: default ( ) ;
266
295
let path = self . path . as_os_str ( ) . to_str ( ) . unwrap ( ) ;
267
- let url = Url :: parse ( & "minio://parquet-oo" ) . unwrap ( ) ;
268
- let object_store = AmazonS3Builder :: new ( )
269
- . with_bucket_name ( "parquet-oo" )
270
- . with_endpoint ( "http://c220g5-110910.wisc.cloudlab.us:9000" )
271
- . with_allow_http ( true )
272
- . with_region ( "us-east-1" )
273
- . with_access_key_id ( env:: var ( "MINIO_ACCESS_KEY_ID" ) . unwrap ( ) )
274
- . with_secret_access_key ( env:: var ( "MINIO_SECRET_ACCESS_KEY" ) . unwrap ( ) )
275
- . build ( ) ?;
276
- ctx. register_object_store ( & url, Arc :: new ( object_store) ) ;
296
+
297
+ let object_store: Arc < dyn ObjectStore > = if path. starts_with ( "minio://" ) {
298
+ let url = Url :: parse ( path) . unwrap ( ) ;
299
+ let bucket_name = url. host_str ( ) . unwrap_or ( "parquet-oo" ) ;
300
+ let object_store = AmazonS3Builder :: new ( )
301
+ . with_bucket_name ( bucket_name)
302
+ . with_endpoint ( "http://c220g5-110910.wisc.cloudlab.us:9000" )
303
+ . with_allow_http ( true )
304
+ . with_region ( "us-east-1" )
305
+ . with_access_key_id ( env:: var ( "MINIO_ACCESS_KEY_ID" ) . unwrap ( ) )
306
+ . with_secret_access_key ( env:: var ( "MINIO_SECRET_ACCESS_KEY" ) . unwrap ( ) )
307
+ . build ( ) ?;
308
+ let object_store = Arc :: new ( object_store) ;
309
+ ctx. register_object_store ( & url, object_store. clone ( ) ) ;
310
+ object_store
311
+ } else {
312
+ let url = ObjectStoreUrl :: local_filesystem ( ) ;
313
+ let object_store = ctx. runtime_env ( ) . object_store ( url) . unwrap ( ) ;
314
+ Arc :: new ( object_store)
315
+ } ;
316
+
317
+ let mut options: ParquetReadOptions < ' _ > = Default :: default ( ) ;
318
+ options. reader = Some ( Arc :: new ( Parquet7FileReaderFactory :: new ( object_store) ) ) ;
277
319
278
320
ctx. register_parquet ( "hits" , & path, options)
279
321
. await
0 commit comments