Skip to content

Commit 63ca714

Browse files
authored
Parse Sqllogictest column types from physical schema (#11929)
* Parse Sqllogictest column types from physical schema * Use execute_stream
1 parent fd237f8 commit 63ca714

File tree

14 files changed

+96
-90
lines changed

14 files changed

+96
-90
lines changed

datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use std::sync::Arc;
1819
use std::{path::PathBuf, time::Duration};
1920

2021
use arrow::record_batch::RecordBatch;
2122
use async_trait::async_trait;
23+
use datafusion::physical_plan::common::collect;
24+
use datafusion::physical_plan::execute_stream;
2225
use datafusion::prelude::SessionContext;
2326
use log::info;
2427
use sqllogictest::DBOutput;
@@ -69,9 +72,12 @@ impl sqllogictest::AsyncDB for DataFusion {
6972

7073
async fn run_query(ctx: &SessionContext, sql: impl Into<String>) -> Result<DFOutput> {
7174
let df = ctx.sql(sql.into().as_str()).await?;
75+
let task_ctx = Arc::new(df.task_ctx());
76+
let plan = df.create_physical_plan().await?;
7277

73-
let types = normalize::convert_schema_to_types(df.schema().fields());
74-
let results: Vec<RecordBatch> = df.collect().await?;
78+
let stream = execute_stream(plan, task_ctx)?;
79+
let types = normalize::convert_schema_to_types(stream.schema().fields());
80+
let results: Vec<RecordBatch> = collect(stream).await?;
7581
let rows = normalize::convert_batches(results)?;
7682

7783
if rows.is_empty() && types.is_empty() {

datafusion/sqllogictest/test_files/coalesce.slt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ select coalesce(1, 2, 3);
2323
1
2424

2525
# test with first null
26-
query ?T
26+
query IT
2727
select coalesce(null, 3, 2, 1), arrow_typeof(coalesce(null, 3, 2, 1));
2828
----
2929
3 Int64
@@ -35,7 +35,7 @@ select coalesce(null, null);
3535
NULL
3636

3737
# cast to float
38-
query IT
38+
query RT
3939
select
4040
coalesce(1, 2.0),
4141
arrow_typeof(coalesce(1, 2.0))
@@ -51,7 +51,7 @@ select
5151
----
5252
2 Float64
5353

54-
query IT
54+
query RT
5555
select
5656
coalesce(1, arrow_cast(2.0, 'Float32')),
5757
arrow_typeof(coalesce(1, arrow_cast(2.0, 'Float32')))
@@ -177,7 +177,7 @@ select
177177
2 Decimal256(22, 2)
178178

179179
# coalesce string
180-
query T?
180+
query TT
181181
select
182182
coalesce('', 'test'),
183183
coalesce(null, 'test');
@@ -246,7 +246,7 @@ drop table test1
246246
statement ok
247247
create table t(c varchar) as values ('a'), (null);
248248

249-
query TT
249+
query ?T
250250
select
251251
coalesce(c, arrow_cast('b', 'Dictionary(Int32, Utf8)')),
252252
arrow_typeof(coalesce(c, arrow_cast('b', 'Dictionary(Int32, Utf8)')))
@@ -295,7 +295,7 @@ statement ok
295295
drop table t;
296296

297297
# test dict(int32, int8)
298-
query I
298+
query ?
299299
select coalesce(34, arrow_cast(123, 'Dictionary(Int32, Int8)'));
300300
----
301301
34

datafusion/sqllogictest/test_files/copy.slt

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@ statement ok
2020
create table source_table(col1 integer, col2 varchar) as values (1, 'Foo'), (2, 'Bar');
2121

2222
# Copy to directory as multiple files
23-
query IT
23+
query I
2424
COPY source_table TO 'test_files/scratch/copy/table/' STORED AS parquet OPTIONS ('format.compression' 'zstd(10)');
2525
----
2626
2
2727

2828
# Copy to directory as partitioned files
29-
query IT
29+
query I
3030
COPY source_table TO 'test_files/scratch/copy/partitioned_table1/' STORED AS parquet PARTITIONED BY (col2) OPTIONS ('format.compression' 'zstd(10)');
3131
----
3232
2
@@ -53,7 +53,7 @@ select * from validate_partitioned_parquet_bar order by col1;
5353
2
5454

5555
# Copy to directory as partitioned files
56-
query ITT
56+
query I
5757
COPY (values (1, 'a', 'x'), (2, 'b', 'y'), (3, 'c', 'z')) TO 'test_files/scratch/copy/partitioned_table2/' STORED AS parquet PARTITIONED BY (column2, column3)
5858
OPTIONS ('format.compression' 'zstd(10)');
5959
----
@@ -81,7 +81,7 @@ select * from validate_partitioned_parquet_a_x order by column1;
8181
1
8282

8383
# Copy to directory as partitioned files
84-
query TTT
84+
query I
8585
COPY (values ('1', 'a', 'x'), ('2', 'b', 'y'), ('3', 'c', 'z')) TO 'test_files/scratch/copy/partitioned_table3/' STORED AS parquet PARTITIONED BY (column1, column3)
8686
OPTIONS ('format.compression' 'zstd(10)');
8787
----
@@ -167,7 +167,7 @@ physical_plan
167167
02)--MemoryExec: partitions=1, partition_sizes=[1]
168168

169169
# Copy to directory as partitioned files with keep_partition_by_columns enabled
170-
query TT
170+
query I
171171
COPY (values ('1', 'a'), ('2', 'b'), ('3', 'c')) TO 'test_files/scratch/copy/partitioned_table4/' STORED AS parquet PARTITIONED BY (column1)
172172
OPTIONS (execution.keep_partition_by_columns true);
173173
----
@@ -184,7 +184,7 @@ select column1, column2 from validate_partitioned_parquet4 order by column1,colu
184184
1 a
185185

186186
# Copy more files to directory via query
187-
query IT
187+
query I
188188
COPY (select * from source_table UNION ALL select * from source_table) to 'test_files/scratch/copy/table/' STORED AS PARQUET;
189189
----
190190
4
@@ -203,7 +203,7 @@ select * from validate_parquet;
203203
1 Foo
204204
2 Bar
205205

206-
query ?
206+
query I
207207
copy (values (struct(timestamp '2021-01-01 01:00:01', 1)), (struct(timestamp '2022-01-01 01:00:01', 2)),
208208
(struct(timestamp '2023-01-03 01:00:01', 3)), (struct(timestamp '2024-01-01 01:00:01', 4)))
209209
to 'test_files/scratch/copy/table_nested2/' STORED AS PARQUET;
@@ -221,7 +221,7 @@ select * from validate_parquet_nested2;
221221
{c0: 2023-01-03T01:00:01, c1: 3}
222222
{c0: 2024-01-01T01:00:01, c1: 4}
223223

224-
query ??
224+
query I
225225
COPY
226226
(values (struct ('foo', (struct ('foo', make_array(struct('a',1), struct('b',2))))), make_array(timestamp '2023-01-01 01:00:01',timestamp '2023-01-01 01:00:01')),
227227
(struct('bar', (struct ('foo', make_array(struct('aa',10), struct('bb',20))))), make_array(timestamp '2024-01-01 01:00:01', timestamp '2024-01-01 01:00:01')))
@@ -239,7 +239,7 @@ select * from validate_parquet_nested;
239239
{c0: foo, c1: {c0: foo, c1: [{c0: a, c1: 1}, {c0: b, c1: 2}]}} [2023-01-01T01:00:01, 2023-01-01T01:00:01]
240240
{c0: bar, c1: {c0: foo, c1: [{c0: aa, c1: 10}, {c0: bb, c1: 20}]}} [2024-01-01T01:00:01, 2024-01-01T01:00:01]
241241

242-
query ?
242+
query I
243243
copy (values ([struct('foo', 1), struct('bar', 2)]))
244244
to 'test_files/scratch/copy/array_of_struct/'
245245
STORED AS PARQUET;
@@ -255,7 +255,7 @@ select * from validate_array_of_struct;
255255
----
256256
[{c0: foo, c1: 1}, {c0: bar, c1: 2}]
257257

258-
query ?
258+
query I
259259
copy (values (struct('foo', [1,2,3], struct('bar', [2,3,4]))))
260260
to 'test_files/scratch/copy/struct_with_array/' STORED AS PARQUET;
261261
----
@@ -272,7 +272,7 @@ select * from validate_struct_with_array;
272272

273273

274274
# Copy parquet with all supported statement overrides
275-
query IT
275+
query I
276276
COPY source_table
277277
TO 'test_files/scratch/copy/table_with_options/'
278278
STORED AS PARQUET
@@ -378,7 +378,7 @@ select * from validate_parquet_with_options;
378378
2 Bar
379379

380380
# Copy from table to single file
381-
query IT
381+
query I
382382
COPY source_table to 'test_files/scratch/copy/table.parquet';
383383
----
384384
2
@@ -394,7 +394,7 @@ select * from validate_parquet_single;
394394
2 Bar
395395

396396
# copy from table to folder of compressed json files
397-
query IT
397+
query I
398398
COPY source_table to 'test_files/scratch/copy/table_json_gz' STORED AS JSON OPTIONS ('format.compression' gzip);
399399
----
400400
2
@@ -410,7 +410,7 @@ select * from validate_json_gz;
410410
2 Bar
411411

412412
# copy from table to folder of compressed csv files
413-
query IT
413+
query I
414414
COPY source_table to 'test_files/scratch/copy/table_csv' STORED AS CSV OPTIONS ('format.has_header' false, 'format.compression' gzip);
415415
----
416416
2
@@ -426,7 +426,7 @@ select * from validate_csv;
426426
2 Bar
427427

428428
# Copy from table to single csv
429-
query IT
429+
query I
430430
COPY source_table to 'test_files/scratch/copy/table.csv';
431431
----
432432
2
@@ -442,7 +442,7 @@ select * from validate_single_csv;
442442
2 Bar
443443

444444
# Copy from table to folder of json
445-
query IT
445+
query I
446446
COPY source_table to 'test_files/scratch/copy/table_json' STORED AS JSON;
447447
----
448448
2
@@ -458,7 +458,7 @@ select * from validate_json;
458458
2 Bar
459459

460460
# Copy from table to single json file
461-
query IT
461+
query I
462462
COPY source_table to 'test_files/scratch/copy/table.json' STORED AS JSON ;
463463
----
464464
2
@@ -474,7 +474,7 @@ select * from validate_single_json;
474474
2 Bar
475475

476476
# COPY csv files with all options set
477-
query IT
477+
query I
478478
COPY source_table
479479
to 'test_files/scratch/copy/table_csv_with_options'
480480
STORED AS CSV OPTIONS (
@@ -499,7 +499,7 @@ select * from validate_csv_with_options;
499499
2;Bar
500500

501501
# Copy from table to single arrow file
502-
query IT
502+
query I
503503
COPY source_table to 'test_files/scratch/copy/table.arrow' STORED AS ARROW;
504504
----
505505
2
@@ -517,7 +517,7 @@ select * from validate_arrow_file;
517517
2 Bar
518518

519519
# Copy from dict encoded values to single arrow file
520-
query T?
520+
query I
521521
COPY (values
522522
('c', arrow_cast('foo', 'Dictionary(Int32, Utf8)')), ('d', arrow_cast('bar', 'Dictionary(Int32, Utf8)')))
523523
to 'test_files/scratch/copy/table_dict.arrow' STORED AS ARROW;
@@ -538,7 +538,7 @@ d bar
538538

539539

540540
# Copy from table to folder of json
541-
query IT
541+
query I
542542
COPY source_table to 'test_files/scratch/copy/table_arrow' STORED AS ARROW;
543543
----
544544
2
@@ -556,7 +556,7 @@ select * from validate_arrow;
556556
# Format Options Support without the 'format.' prefix
557557

558558
# Copy with format options for Parquet without the 'format.' prefix
559-
query IT
559+
query I
560560
COPY source_table TO 'test_files/scratch/copy/format_table.parquet'
561561
OPTIONS (
562562
compression snappy,
@@ -566,14 +566,14 @@ OPTIONS (
566566
2
567567

568568
# Copy with format options for JSON without the 'format.' prefix
569-
query IT
569+
query I
570570
COPY source_table to 'test_files/scratch/copy/format_table'
571571
STORED AS JSON OPTIONS (compression gzip);
572572
----
573573
2
574574

575575
# Copy with format options for CSV without the 'format.' prefix
576-
query IT
576+
query I
577577
COPY source_table to 'test_files/scratch/copy/format_table.csv'
578578
OPTIONS (
579579
has_header false,

datafusion/sqllogictest/test_files/csv_files.slt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -115,14 +115,14 @@ CREATE TABLE src_table_2 (
115115
(7, 'ggg', 700, 2),
116116
(8, 'hhh', 800, 2);
117117

118-
query ITII
118+
query I
119119
COPY src_table_1 TO 'test_files/scratch/csv_files/csv_partitions/1.csv'
120120
STORED AS CSV;
121121
----
122122
4
123123

124124

125-
query ITII
125+
query I
126126
COPY src_table_2 TO 'test_files/scratch/csv_files/csv_partitions/2.csv'
127127
STORED AS CSV;
128128
----
@@ -175,7 +175,7 @@ CREATE TABLE table_with_necessary_quoting (
175175
(4, 'h|h|h');
176176

177177
# quote is required because `|` is delimiter and part of the data
178-
query IT
178+
query I
179179
COPY table_with_necessary_quoting TO 'test_files/scratch/csv_files/table_with_necessary_quoting.csv'
180180
STORED AS csv
181181
OPTIONS ('format.quote' '~',
@@ -247,7 +247,7 @@ id2 "value2"
247247
id3 "value3"
248248

249249
# ensure that double quote option is used when writing to csv
250-
query TT
250+
query I
251251
COPY csv_with_double_quote TO 'test_files/scratch/csv_files/table_with_double_quotes.csv'
252252
STORED AS csv
253253
OPTIONS ('format.double_quote' 'true');
@@ -271,7 +271,7 @@ id2 "value2"
271271
id3 "value3"
272272

273273
# ensure when double quote option is disabled that quotes are escaped instead
274-
query TT
274+
query I
275275
COPY csv_with_double_quote TO 'test_files/scratch/csv_files/table_with_escaped_quotes.csv'
276276
STORED AS csv
277277
OPTIONS ('format.double_quote' 'false', 'format.escape' '#');

0 commit comments

Comments
 (0)