@@ -23,12 +23,10 @@ use datafusion::datasource::listing::ListingOptions;
23
23
use datafusion:: datasource:: MemTable ;
24
24
use datafusion:: error:: { DataFusionError , Result } ;
25
25
use datafusion:: prelude:: SessionContext ;
26
- use datafusion_common:: exec_datafusion_err;
26
+ use datafusion_common:: { assert_batches_eq , exec_datafusion_err} ;
27
27
use object_store:: local:: LocalFileSystem ;
28
28
use std:: path:: Path ;
29
29
use std:: sync:: Arc ;
30
- use std:: time:: Duration ;
31
- use tokio:: time:: timeout;
32
30
33
31
/// Examples of various ways to execute queries using SQL
34
32
///
@@ -52,17 +50,30 @@ pub async fn query_memtable() -> Result<()> {
52
50
// Register the in-memory table containing the data
53
51
ctx. register_table ( "users" , Arc :: new ( mem_table) ) ?;
54
52
53
+ // running a SQL query results in a "DataFrame", which can be used
54
+ // to execute the query and collect the results
55
55
let dataframe = ctx. sql ( "SELECT * FROM users;" ) . await ?;
56
56
57
- timeout ( Duration :: from_secs ( 10 ) , async move {
58
- let result = dataframe. collect ( ) . await . unwrap ( ) ;
59
- let record_batch = result. first ( ) . unwrap ( ) ;
60
-
61
- assert_eq ! ( 1 , record_batch. column( 0 ) . len( ) ) ;
62
- dbg ! ( record_batch. columns( ) ) ;
63
- } )
64
- . await
65
- . unwrap ( ) ;
57
+ // Calling 'show' on the dataframe will execute the query and
58
+ // print the results
59
+ dataframe. clone ( ) . show ( ) . await ?;
60
+
61
+ // calling 'collect' on the dataframe will execute the query and
62
+ // buffer the results into a vector of RecordBatch. There are other
63
+ // APIs on DataFrame for incrementally generating results (e.g. streaming)
64
+ let result = dataframe. collect ( ) . await ?;
65
+
66
+ // Use the assert_batches_eq macro to compare the results
67
+ assert_batches_eq ! (
68
+ [
69
+ "+----+--------------+" ,
70
+ "| id | bank_account |" ,
71
+ "+----+--------------+" ,
72
+ "| 1 | 9000 |" ,
73
+ "+----+--------------+" ,
74
+ ] ,
75
+ & result
76
+ ) ;
66
77
67
78
Ok ( ( ) )
68
79
}
@@ -133,7 +144,16 @@ async fn query_parquet() -> Result<()> {
133
144
. await ?;
134
145
135
146
// print the results
136
- df. show ( ) . await ?;
147
+ let results = df. collect ( ) . await ?;
148
+ assert_batches_eq ! (
149
+ [
150
+ "+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+" ,
151
+ "| id | bool_col | tinyint_col | smallint_col | int_col | bigint_col | float_col | double_col | date_string_col | string_col | timestamp_col |" ,
152
+ "+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+" ,
153
+ "| 4 | true | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 30332f30312f3039 | 30 | 2009-03-01T00:00:00 |" ,
154
+ "+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+" ,
155
+ ] ,
156
+ & results) ;
137
157
138
158
// Second example were we temporarily move into the test data's parent directory and
139
159
// simulate a relative path, this requires registering an ObjectStore.
@@ -173,7 +193,16 @@ async fn query_parquet() -> Result<()> {
173
193
. await ?;
174
194
175
195
// print the results
176
- df. show ( ) . await ?;
196
+ let results = df. collect ( ) . await ?;
197
+ assert_batches_eq ! (
198
+ [
199
+ "+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+" ,
200
+ "| id | bool_col | tinyint_col | smallint_col | int_col | bigint_col | float_col | double_col | date_string_col | string_col | timestamp_col |" ,
201
+ "+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+" ,
202
+ "| 4 | true | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 30332f30312f3039 | 30 | 2009-03-01T00:00:00 |" ,
203
+ "+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+" ,
204
+ ] ,
205
+ & results) ;
177
206
178
207
// Reset the current directory
179
208
std:: env:: set_current_dir ( cur_dir) ?;
0 commit comments