Skip to content

Commit 9eca58e

Browse files
wangyumdongjoon-hyun
authored andcommitted
[SPARK-28334][SQL][TEST] Port select.sql
## What changes were proposed in this pull request? This PR is to port select.sql from PostgreSQL regression tests. https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select.sql The expected results can be found in the link: https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/expected/select.out When porting the test cases, found four PostgreSQL specific features that do not exist in Spark SQL: [SPARK-28010](https://issues.apache.org/jira/browse/SPARK-28010): Support ORDER BY ... USING syntax [SPARK-28329](https://issues.apache.org/jira/browse/SPARK-28329): Support SELECT INTO syntax [SPARK-28330](https://issues.apache.org/jira/browse/SPARK-28330): Enhance query limit [SPARK-28296](https://issues.apache.org/jira/browse/SPARK-28296): Improved VALUES support Also, found one inconsistent behavior: [SPARK-28333](https://issues.apache.org/jira/browse/SPARK-28333): `NULLS FIRST` for `DESC` and `NULLS LAST` for `ASC` ## How was this patch tested? N/A Closes apache#25096 from wangyum/SPARK-28334. Authored-by: Yuming Wang <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent f830005 commit 9eca58e

File tree

2 files changed

+828
-0
lines changed

2 files changed

+828
-0
lines changed
Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
--
2+
-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
3+
--
4+
--
5+
-- SELECT
6+
-- Test int8 64-bit integers.
7+
-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select.sql
8+
--
9+
create or replace temporary view onek2 as select * from onek;
10+
create or replace temporary view INT8_TBL as select * from values
11+
(cast(trim(' 123 ') as bigint), cast(trim(' 456') as bigint)),
12+
(cast(trim('123 ') as bigint),cast('4567890123456789' as bigint)),
13+
(cast('4567890123456789' as bigint),cast('123' as bigint)),
14+
(cast(+4567890123456789 as bigint),cast('4567890123456789' as bigint)),
15+
(cast('+4567890123456789' as bigint),cast('-4567890123456789' as bigint))
16+
as INT8_TBL(q1, q2);
17+
18+
-- btree index
19+
-- awk '{if($1<10){print;}else{next;}}' onek.data | sort +0n -1
20+
--
21+
SELECT * FROM onek
22+
WHERE onek.unique1 < 10
23+
ORDER BY onek.unique1;
24+
25+
-- [SPARK-28010] Support ORDER BY ... USING syntax
26+
--
27+
-- awk '{if($1<20){print $1,$14;}else{next;}}' onek.data | sort +0nr -1
28+
--
29+
SELECT onek.unique1, onek.stringu1 FROM onek
30+
WHERE onek.unique1 < 20
31+
ORDER BY unique1 DESC;
32+
33+
--
34+
-- awk '{if($1>980){print $1,$14;}else{next;}}' onek.data | sort +1d -2
35+
--
36+
SELECT onek.unique1, onek.stringu1 FROM onek
37+
WHERE onek.unique1 > 980
38+
ORDER BY stringu1 ASC;
39+
40+
--
41+
-- awk '{if($1>980){print $1,$16;}else{next;}}' onek.data |
42+
-- sort +1d -2 +0nr -1
43+
--
44+
SELECT onek.unique1, onek.string4 FROM onek
45+
WHERE onek.unique1 > 980
46+
ORDER BY string4 ASC, unique1 DESC;
47+
48+
--
49+
-- awk '{if($1>980){print $1,$16;}else{next;}}' onek.data |
50+
-- sort +1dr -2 +0n -1
51+
--
52+
SELECT onek.unique1, onek.string4 FROM onek
53+
WHERE onek.unique1 > 980
54+
ORDER BY string4 DESC, unique1 ASC;
55+
56+
--
57+
-- awk '{if($1<20){print $1,$16;}else{next;}}' onek.data |
58+
-- sort +0nr -1 +1d -2
59+
--
60+
SELECT onek.unique1, onek.string4 FROM onek
61+
WHERE onek.unique1 < 20
62+
ORDER BY unique1 DESC, string4 ASC;
63+
64+
--
65+
-- awk '{if($1<20){print $1,$16;}else{next;}}' onek.data |
66+
-- sort +0n -1 +1dr -2
67+
--
68+
SELECT onek.unique1, onek.string4 FROM onek
69+
WHERE onek.unique1 < 20
70+
ORDER BY unique1 ASC, string4 DESC;
71+
72+
--
73+
-- test partial btree indexes
74+
--
75+
-- As of 7.2, planner probably won't pick an indexscan without stats,
76+
-- so ANALYZE first. Also, we want to prevent it from picking a bitmapscan
77+
-- followed by sort, because that could hide index ordering problems.
78+
--
79+
-- ANALYZE onek2;
80+
81+
-- SET enable_seqscan TO off;
82+
-- SET enable_bitmapscan TO off;
83+
-- SET enable_sort TO off;
84+
85+
--
86+
-- awk '{if($1<10){print $0;}else{next;}}' onek.data | sort +0n -1
87+
--
88+
SELECT onek2.* FROM onek2 WHERE onek2.unique1 < 10;
89+
90+
--
91+
-- awk '{if($1<20){print $1,$14;}else{next;}}' onek.data | sort +0nr -1
92+
--
93+
SELECT onek2.unique1, onek2.stringu1 FROM onek2
94+
WHERE onek2.unique1 < 20
95+
ORDER BY unique1 DESC;
96+
97+
--
98+
-- awk '{if($1>980){print $1,$14;}else{next;}}' onek.data | sort +1d -2
99+
--
100+
SELECT onek2.unique1, onek2.stringu1 FROM onek2
101+
WHERE onek2.unique1 > 980;
102+
103+
-- RESET enable_seqscan;
104+
-- RESET enable_bitmapscan;
105+
-- RESET enable_sort;
106+
107+
-- [SPARK-28329] SELECT INTO syntax
108+
-- SELECT two, stringu1, ten, string4
109+
-- INTO TABLE tmp
110+
-- FROM onek;
111+
CREATE TABLE tmp USING parquet AS
112+
SELECT two, stringu1, ten, string4
113+
FROM onek;
114+
115+
-- Skip the person table because there is a point data type that we don't support.
116+
--
117+
-- awk '{print $1,$2;}' person.data |
118+
-- awk '{if(NF!=2){print $3,$2;}else{print;}}' - emp.data |
119+
-- awk '{if(NF!=2){print $3,$2;}else{print;}}' - student.data |
120+
-- awk 'BEGIN{FS=" ";}{if(NF!=2){print $4,$5;}else{print;}}' - stud_emp.data
121+
--
122+
-- SELECT name, age FROM person*; ??? check if different
123+
-- SELECT p.name, p.age FROM person* p;
124+
125+
--
126+
-- awk '{print $1,$2;}' person.data |
127+
-- awk '{if(NF!=2){print $3,$2;}else{print;}}' - emp.data |
128+
-- awk '{if(NF!=2){print $3,$2;}else{print;}}' - student.data |
129+
-- awk 'BEGIN{FS=" ";}{if(NF!=1){print $4,$5;}else{print;}}' - stud_emp.data |
130+
-- sort +1nr -2
131+
--
132+
-- SELECT p.name, p.age FROM person* p ORDER BY age DESC, name;
133+
134+
-- [SPARK-28330] Enhance query limit
135+
--
136+
-- Test some cases involving whole-row Var referencing a subquery
137+
--
138+
select foo.* from (select 1) as foo;
139+
select foo.* from (select null) as foo;
140+
select foo.* from (select 'xyzzy',1,null) as foo;
141+
142+
--
143+
-- Test VALUES lists
144+
--
145+
select * from onek, values(147, 'RFAAAA'), (931, 'VJAAAA') as v (i, j)
146+
WHERE onek.unique1 = v.i and onek.stringu1 = v.j;
147+
148+
-- [SPARK-28296] Improved VALUES support
149+
-- a more complex case
150+
-- looks like we're coding lisp :-)
151+
-- select * from onek,
152+
-- (values ((select i from
153+
-- (values(10000), (2), (389), (1000), (2000), ((select 10029))) as foo(i)
154+
-- order by i asc limit 1))) bar (i)
155+
-- where onek.unique1 = bar.i;
156+
157+
-- try VALUES in a subquery
158+
-- select * from onek
159+
-- where (unique1,ten) in (values (1,1), (20,0), (99,9), (17,99))
160+
-- order by unique1;
161+
162+
-- VALUES is also legal as a standalone query or a set-operation member
163+
VALUES (1,2), (3,4+4), (7,77.7);
164+
165+
VALUES (1,2), (3,4+4), (7,77.7)
166+
UNION ALL
167+
SELECT 2+2, 57
168+
UNION ALL
169+
TABLE int8_tbl;
170+
171+
--
172+
-- Test ORDER BY options
173+
--
174+
175+
CREATE OR REPLACE TEMPORARY VIEW foo AS
176+
SELECT * FROM (values(42),(3),(10),(7),(null),(null),(1)) as foo (f1);
177+
178+
-- [SPARK-28333] NULLS FIRST for DESC and NULLS LAST for ASC
179+
SELECT * FROM foo ORDER BY f1;
180+
SELECT * FROM foo ORDER BY f1 ASC; -- same thing
181+
SELECT * FROM foo ORDER BY f1 NULLS FIRST;
182+
SELECT * FROM foo ORDER BY f1 DESC;
183+
SELECT * FROM foo ORDER BY f1 DESC NULLS LAST;
184+
185+
-- check if indexscans do the right things
186+
-- CREATE INDEX fooi ON foo (f1);
187+
-- SET enable_sort = false;
188+
189+
-- SELECT * FROM foo ORDER BY f1;
190+
-- SELECT * FROM foo ORDER BY f1 NULLS FIRST;
191+
-- SELECT * FROM foo ORDER BY f1 DESC;
192+
-- SELECT * FROM foo ORDER BY f1 DESC NULLS LAST;
193+
194+
-- DROP INDEX fooi;
195+
-- CREATE INDEX fooi ON foo (f1 DESC);
196+
197+
-- SELECT * FROM foo ORDER BY f1;
198+
-- SELECT * FROM foo ORDER BY f1 NULLS FIRST;
199+
-- SELECT * FROM foo ORDER BY f1 DESC;
200+
-- SELECT * FROM foo ORDER BY f1 DESC NULLS LAST;
201+
202+
-- DROP INDEX fooi;
203+
-- CREATE INDEX fooi ON foo (f1 DESC NULLS LAST);
204+
205+
-- SELECT * FROM foo ORDER BY f1;
206+
-- SELECT * FROM foo ORDER BY f1 NULLS FIRST;
207+
-- SELECT * FROM foo ORDER BY f1 DESC;
208+
-- SELECT * FROM foo ORDER BY f1 DESC NULLS LAST;
209+
210+
--
211+
-- Test planning of some cases with partial indexes
212+
--
213+
214+
-- partial index is usable
215+
-- explain (costs off)
216+
-- select * from onek2 where unique2 = 11 and stringu1 = 'ATAAAA';
217+
select * from onek2 where unique2 = 11 and stringu1 = 'ATAAAA';
218+
-- actually run the query with an analyze to use the partial index
219+
-- explain (costs off, analyze on, timing off, summary off)
220+
-- select * from onek2 where unique2 = 11 and stringu1 = 'ATAAAA';
221+
-- explain (costs off)
222+
-- select unique2 from onek2 where unique2 = 11 and stringu1 = 'ATAAAA';
223+
select unique2 from onek2 where unique2 = 11 and stringu1 = 'ATAAAA';
224+
-- partial index predicate implies clause, so no need for retest
225+
-- explain (costs off)
226+
-- select * from onek2 where unique2 = 11 and stringu1 < 'B';
227+
select * from onek2 where unique2 = 11 and stringu1 < 'B';
228+
-- explain (costs off)
229+
-- select unique2 from onek2 where unique2 = 11 and stringu1 < 'B';
230+
select unique2 from onek2 where unique2 = 11 and stringu1 < 'B';
231+
-- but if it's an update target, must retest anyway
232+
-- explain (costs off)
233+
-- select unique2 from onek2 where unique2 = 11 and stringu1 < 'B' for update;
234+
-- select unique2 from onek2 where unique2 = 11 and stringu1 < 'B' for update;
235+
-- partial index is not applicable
236+
-- explain (costs off)
237+
-- select unique2 from onek2 where unique2 = 11 and stringu1 < 'C';
238+
select unique2 from onek2 where unique2 = 11 and stringu1 < 'C';
239+
-- partial index implies clause, but bitmap scan must recheck predicate anyway
240+
-- SET enable_indexscan TO off;
241+
-- explain (costs off)
242+
-- select unique2 from onek2 where unique2 = 11 and stringu1 < 'B';
243+
select unique2 from onek2 where unique2 = 11 and stringu1 < 'B';
244+
-- RESET enable_indexscan;
245+
-- check multi-index cases too
246+
-- explain (costs off)
247+
-- select unique1, unique2 from onek2
248+
-- where (unique2 = 11 or unique1 = 0) and stringu1 < 'B';
249+
select unique1, unique2 from onek2
250+
where (unique2 = 11 or unique1 = 0) and stringu1 < 'B';
251+
-- explain (costs off)
252+
-- select unique1, unique2 from onek2
253+
-- where (unique2 = 11 and stringu1 < 'B') or unique1 = 0;
254+
select unique1, unique2 from onek2
255+
where (unique2 = 11 and stringu1 < 'B') or unique1 = 0;
256+
257+
--
258+
-- Test some corner cases that have been known to confuse the planner
259+
--
260+
261+
-- ORDER BY on a constant doesn't really need any sorting
262+
SELECT 1 AS x ORDER BY x;
263+
264+
-- But ORDER BY on a set-valued expression does
265+
-- create function sillysrf(int) returns setof int as
266+
-- 'values (1),(10),(2),($1)' language sql immutable;
267+
268+
-- select sillysrf(42);
269+
-- select sillysrf(-1) order by 1;
270+
271+
-- drop function sillysrf(int);
272+
273+
-- X = X isn't a no-op, it's effectively X IS NOT NULL assuming = is strict
274+
-- (see bug #5084)
275+
select * from (values (2),(null),(1)) v(k) where k = k order by k;
276+
select * from (values (2),(null),(1)) v(k) where k = k;
277+
278+
-- Test partitioned tables with no partitions, which should be handled the
279+
-- same as the non-inheritance case when expanding its RTE.
280+
-- create table list_parted_tbl (a int,b int) partition by list (a);
281+
-- create table list_parted_tbl1 partition of list_parted_tbl
282+
-- for values in (1) partition by list(b);
283+
-- explain (costs off) select * from list_parted_tbl;
284+
-- drop table list_parted_tbl;
285+
drop table tmp;

0 commit comments

Comments
 (0)