Skip to content

Commit

Permalink
DRILL-6865: Added tests to verify the bug fix. (#524)
Browse files Browse the repository at this point in the history
Cases covered:
1. Filter pruning shouldn't work if at least one filter cannot be pruned ('like' operator or expression on a column).
2. Filter pruning should work in case of single parquet file with single row group.
  • Loading branch information
agozhiy authored and Agirish committed Dec 27, 2018
1 parent f781bc4 commit d1b32d9
Show file tree
Hide file tree
Showing 268 changed files with 416 additions and 351 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
create table if not exists dfs.drillTestDir.`DRILL_6118_parquet_partitioned_by_files` (c1, c2, c3, c4, c5)
create table if not exists dfs.drillTestDir.`filter/pushdown/DRILL_6118_parquet_partitioned_by_files` (c1, c2, c3, c4, c5)
partition by (c1) as
select cast(columns[0] as int) c1, columns[1] c2, columns[2] c3, columns[3] c4, columns[4] c5
from dfs.drillTestDir.`parquet_storage/DRILL_6118/DRILL_6118_data_source.csv`;
from dfs.drillTestDir.`filter/pushdown/DRILL_6118_data_source.csv`;

create view if not exists dfs.drillTestDir.`DRILL_6118_parquet_partitioned_by_files_view` as
create view if not exists dfs.drillTestDir.`filter/pushdown/DRILL_6118_parquet_partitioned_by_files_view` as
select *
from dfs.drillTestDir.`DRILL_6118_parquet_partitioned_by_files`;
from dfs.drillTestDir.`filter/pushdown/DRILL_6118_parquet_partitioned_by_files`;
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
create table if not exists dfs.drillTestDir.`DRILL_6118_parquet_partitioned_by_folders/d1` (c1, c2, c3, c4, c5)
create table if not exists dfs.drillTestDir.`filter/pushdown/DRILL_6118_parquet_partitioned_by_folders/d1` (c1, c2, c3, c4, c5)
as select cast(columns[0] as int) c1, columns[1] c2, columns[2] c3, columns[3] c4, columns[4] c5
from dfs.drillTestDir.`parquet_storage/DRILL_6118/DRILL_6118_data_source.csv`
from dfs.drillTestDir.`filter/pushdown/DRILL_6118_data_source.csv`
where columns[0] in (1, 3);
create table if not exists dfs.drillTestDir.`DRILL_6118_parquet_partitioned_by_folders/d2` (c1, c2, c3, c4, c5)
create table if not exists dfs.drillTestDir.`filter/pushdown/DRILL_6118_parquet_partitioned_by_folders/d2` (c1, c2, c3, c4, c5)
as select cast(columns[0] as int) c1, columns[1] c2, columns[2] c3, columns[3] c4, columns[4] c5
from dfs.drillTestDir.`parquet_storage/DRILL_6118/DRILL_6118_data_source.csv`
from dfs.drillTestDir.`filter/pushdown/DRILL_6118_data_source.csv`
where columns[0]=2;
create table if not exists dfs.drillTestDir.`DRILL_6118_parquet_partitioned_by_folders/d3` (c1, c2, c3, c4, c5)
create table if not exists dfs.drillTestDir.`filter/pushdown/DRILL_6118_parquet_partitioned_by_folders/d3` (c1, c2, c3, c4, c5)
as select cast(columns[0] as int) c1, columns[1] c2, columns[2] c3, columns[3] c4, columns[4] c5
from dfs.drillTestDir.`parquet_storage/DRILL_6118/DRILL_6118_data_source.csv`
from dfs.drillTestDir.`filter/pushdown/DRILL_6118_data_source.csv`
where columns[0]>3;

create view if not exists dfs.drillTestDir.`DRILL_6118_parquet_partitioned_by_folders_view` as
create view if not exists dfs.drillTestDir.`filter/pushdown/DRILL_6118_parquet_partitioned_by_folders_view` as
select *
from dfs.drillTestDir.`DRILL_6118_parquet_partitioned_by_folders`;
from dfs.drillTestDir.`filter/pushdown/DRILL_6118_parquet_partitioned_by_folders`;
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
create table if not exists dfs.drillTestDir.`DRILL_6118_parquet_without_partitioning` (c1, c2, c3, c4, c5)
create table if not exists dfs.drillTestDir.`filter/pushdown/DRILL_6118_parquet_without_partitioning` (c1, c2, c3, c4, c5)
as select cast(columns[0] as int) c1, columns[1] c2, columns[2] c3, columns[3] c4, columns[4] c5
from dfs.drillTestDir.`parquet_storage/DRILL_6118/DRILL_6118_data_source.csv`;
from dfs.drillTestDir.`filter/pushdown/DRILL_6118_data_source.csv`;

create view if not exists dfs.drillTestDir.`DRILL_6118_parquet_without_partitioning_view` as
create view if not exists dfs.drillTestDir.`filter/pushdown/DRILL_6118_parquet_without_partitioning_view` as
select *
from dfs.drillTestDir.`DRILL_6118_parquet_without_partitioning`;
from dfs.drillTestDir.`filter/pushdown/DRILL_6118_parquet_without_partitioning`;
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Scan
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d1/0_0_0.parquet
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d2/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d1/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d2/0_0_0.parquet
numFiles=2
numRowGroups=2
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where c2 = false;
select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where c2 = false;
Original file line number Diff line number Diff line change
@@ -1 +1 @@
explain plan for select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where c2 = false;
explain plan for select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where c2 = false;
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Scan
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d1/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d1/0_0_0.parquet
numFiles=1
numRowGroups=1
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where c2 = false and c3 is not null;
select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where c2 = false and c3 is not null;
Original file line number Diff line number Diff line change
@@ -1 +1 @@
explain plan for select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where c2 = false and c3 is not null;
explain plan for select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where c2 = false and c3 is not null;
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Scan
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d3/0_0_0.parquet
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d1/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d3/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d1/0_0_0.parquet
numFiles=2
numRowGroups=2
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where c3 is not null;
select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where c3 is not null;
Original file line number Diff line number Diff line change
@@ -1 +1 @@
explain plan for select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where c3 is not null;
explain plan for select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where c3 is not null;
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Scan
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d1/0_0_0.parquet
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d2/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d1/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d2/0_0_0.parquet
numFiles=2
numRowGroups=2
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where c2 is not true;
select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where c2 is not true;
Original file line number Diff line number Diff line change
@@ -1 +1 @@
explain plan for select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where c2 is not true;
explain plan for select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where c2 is not true;
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Scan
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d3/0_0_0.parquet
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d2/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d3/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d2/0_0_0.parquet
numFiles=2
numRowGroups=2
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where c3 is null;
select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where c3 is null;
Original file line number Diff line number Diff line change
@@ -1 +1 @@
explain plan for select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where c3 is null;
explain plan for select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where c3 is null;
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Scan
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d3/0_0_0.parquet
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d2/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d3/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d2/0_0_0.parquet
numFiles=2
numRowGroups=2
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where c2 is true;
select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where c2 is true;
Original file line number Diff line number Diff line change
@@ -1 +1 @@
explain plan for select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where c2 is true;
explain plan for select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where c2 is true;
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Scan
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d3/0_0_0.parquet
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d2/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d3/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d2/0_0_0.parquet
numFiles=2
numRowGroups=2
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where not c2 = false;
select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where not c2 = false;
Original file line number Diff line number Diff line change
@@ -1 +1 @@
explain plan for select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where not c2 = false;
explain plan for select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where not c2 = false;
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Scan
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d3/0_0_0.parquet
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d2/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d3/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d2/0_0_0.parquet
numFiles=2
numRowGroups=2
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where not c2 is false;
select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where not c2 is false;
Original file line number Diff line number Diff line change
@@ -1 +1 @@
explain plan for select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where not c2 is false;
explain plan for select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where not c2 is false;
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Scan
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d3/0_0_0.parquet
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d2/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d3/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d2/0_0_0.parquet
numFiles=2
numRowGroups=2
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where not c2 = false or c3 is null;
select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where not c2 = false or c3 is null;
Original file line number Diff line number Diff line change
@@ -1 +1 @@
explain plan for select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where not c2 = false or c3 is null;
explain plan for select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where not c2 = false or c3 is null;
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Scan
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d3/0_0_0.parquet
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d1/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d3/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d1/0_0_0.parquet
numFiles=2
numRowGroups=2
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where not c3 is null;
select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where not c3 is null;
Original file line number Diff line number Diff line change
@@ -1 +1 @@
explain plan for select c1, c2, c3 from `parquet_storage/DRILL_6174/DRILL_6174_test_data` where not c3 is null;
explain plan for select c1, c2, c3 from `filter/pushdown/DRILL_6174_test_data` where not c3 is null;
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select count(id) from `parquet_storage/DRILL_6603/DRILL_6603_test_data` where str is null;
select count(id) from `filter/pushdown/DRILL_6603_test_data` where str is null;
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select count(*) from `parquet_storage/DRILL_6603/DRILL_6603_test_data` where str is null;
select count(*) from `filter/pushdown/DRILL_6603_test_data` where str is null;
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select id from `parquet_storage/DRILL_6603/DRILL_6603_test_data` where not str is null;
select id from `filter/pushdown/DRILL_6603_test_data` where not str is null;
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select id from `parquet_storage/DRILL_6603/DRILL_6603_test_data` where str is null;
select id from `filter/pushdown/DRILL_6603_test_data` where str is null;
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `parquet_storage/DRILL_6603/DRILL_6603_test_data` where str is not null;
select * from `filter/pushdown/DRILL_6603_test_data` where str is not null;
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `parquet_storage/DRILL_6603/DRILL_6603_test_data` where str is null;
select * from `filter/pushdown/DRILL_6603_test_data` where str is null;
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `parquet_storage/DRILL_6603/DRILL_6603_test_data` where str is null and id < 5;
select * from `filter/pushdown/DRILL_6603_test_data` where str is null and id < 5;
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
{
"mode": "cp",
"src": "Datasources/parquet_storage/DRILL_6174/DRILL_6174_test_data",
"dest": "parquet_storage/DRILL_6174/DRILL_6174_test_data"
"dest": "filter/pushdown/DRILL_6174_test_data"
},
{
"mode": "cp",
"src": "Datasources/parquet_storage/DRILL_6603/DRILL_6603_test_data",
"dest": "parquet_storage/DRILL_6603/DRILL_6603_test_data"
"dest": "filter/pushdown/DRILL_6603_test_data"
}
]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
{
"mode": "cp",
"src": "Datasources/parquet_storage/DRILL_6174/DRILL_6174_test_data",
"dest": "parquet_storage/DRILL_6174/DRILL_6174_test_data"
"dest": "filter/pushdown/DRILL_6174_test_data"
}
]
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Scan
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d1/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d1/0_0_0.parquet
numFiles=1
numRowGroups=1
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
select cast(c4 as varchar), cast(c5 as varchar), cast(c6 as varchar)
from `parquet_storage/DRILL_6174/DRILL_6174_test_data`
from `filter/pushdown/DRILL_6174_test_data`
where c5 < cast(date '2017-01-05' as date);
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
explain plan for
select cast(c4 as varchar), cast(c5 as varchar), cast(c6 as varchar)
from `parquet_storage/DRILL_6174/DRILL_6174_test_data`
from `filter/pushdown/DRILL_6174_test_data`
where c5 < cast(date '2017-01-05' as date);
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Scan
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d3/0_0_0.parquet
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d2/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d3/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d2/0_0_0.parquet
numFiles=2
numRowGroups=2
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
select cast(c4 as varchar), cast(c5 as varchar), cast(c6 as varchar)
from `parquet_storage/DRILL_6174/DRILL_6174_test_data`
from `filter/pushdown/DRILL_6174_test_data`
where c4 > cast(date '2017-01-19' as timestamp);
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
explain plan for
select cast(c4 as varchar), cast(c5 as varchar), cast(c6 as varchar)
from `parquet_storage/DRILL_6174/DRILL_6174_test_data`
from `filter/pushdown/DRILL_6174_test_data`
where c4 > cast(date '2017-01-19' as timestamp);
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Scan
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d3/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d3/0_0_0.parquet
numFiles=1
numRowGroups=1
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
select cast(c4 as varchar), cast(c5 as varchar), cast(c6 as varchar)
from `parquet_storage/DRILL_6174/DRILL_6174_test_data`
from `filter/pushdown/DRILL_6174_test_data`
where c5 > cast(date '2017-01-19' as varchar);
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
explain plan for
select cast(c4 as varchar), cast(c5 as varchar), cast(c6 as varchar)
from `parquet_storage/DRILL_6174/DRILL_6174_test_data`
from `filter/pushdown/DRILL_6174_test_data`
where c5 > cast(date '2017-01-19' as varchar);
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Scan
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d1/0_0_0.parquet
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d2/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d1/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d2/0_0_0.parquet
numFiles=2
numRowGroups=2
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
select cast(c4 as varchar), cast(c5 as varchar), cast(c6 as varchar)
from `parquet_storage/DRILL_6174/DRILL_6174_test_data`
from `filter/pushdown/DRILL_6174_test_data`
where c6 = cast(time '03:08:47' as time);
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
explain plan for
select cast(c4 as varchar), cast(c5 as varchar), cast(c6 as varchar)
from `parquet_storage/DRILL_6174/DRILL_6174_test_data`
from `filter/pushdown/DRILL_6174_test_data`
where c6 = cast(time '03:08:47' as time);
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Scan
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d1/0_0_0.parquet
path=.*/parquet_storage/DRILL_6174/DRILL_6174_test_data/d2/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d1/0_0_0.parquet
path=.*/filter/pushdown/DRILL_6174_test_data/d2/0_0_0.parquet
numFiles=2
numRowGroups=2
Loading

0 comments on commit d1b32d9

Please sign in to comment.