Skip to content

Commit ad2030a

Browse files
alambDandandan
authored andcommitted
Add clickbench queries to sqllogictest coverage (apache#6836)
* Add clickbench queries to sqllogictest coverage * rowsort * Update datafusion/core/tests/sqllogictests/test_files/clickbench.slt Co-authored-by: Daniël Heres <[email protected]> * fix typo -- 🤦 * Update queries now that they pass --------- Co-authored-by: Daniël Heres <[email protected]>
1 parent 0bc2275 commit ad2030a

File tree

2 files changed

+275
-0
lines changed

2 files changed

+275
-0
lines changed
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
19+
# This file contains the clickbench schema and queries
20+
# and the first 10 rows of data. Since ClickBench contains case sensitive queries
21+
# this is also a good test of that usecase too
22+
23+
# create.sql came from
24+
# https://github.com/ClickHouse/ClickBench/blob/8b9e3aa05ea18afa427f14909ddc678b8ef0d5e6/datafusion/create.sql
25+
# Data file made with DuckDB:
26+
# COPY (SELECT * FROM 'hits.parquet' LIMIT 10) TO 'clickbench_hits_10.parquet' (FORMAT PARQUET);
27+
28+
statement ok
29+
CREATE EXTERNAL TABLE hits
30+
STORED AS PARQUET
31+
LOCATION 'tests/data/clickbench_hits_10.parquet';
32+
33+
34+
# queries.sql came from
35+
# https://github.com/ClickHouse/ClickBench/blob/8b9e3aa05ea18afa427f14909ddc678b8ef0d5e6/datafusion/queries.sql
36+
37+
query I
38+
SELECT COUNT(*) FROM hits;
39+
----
40+
10
41+
42+
query I
43+
SELECT COUNT(*) FROM hits WHERE "AdvEngineID" <> 0;
44+
----
45+
0
46+
47+
query IIR
48+
SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM hits;
49+
----
50+
0 10 0
51+
52+
query R
53+
SELECT AVG("UserID") FROM hits;
54+
----
55+
-304548765855551600
56+
57+
query I
58+
SELECT COUNT(DISTINCT "UserID") FROM hits;
59+
----
60+
5
61+
62+
query I
63+
SELECT COUNT(DISTINCT "SearchPhrase") FROM hits;
64+
----
65+
1
66+
67+
query DD
68+
SELECT MIN("EventDate"::INT::DATE), MAX("EventDate"::INT::DATE) FROM hits;
69+
----
70+
2013-07-15 2013-07-15
71+
72+
query II
73+
SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY COUNT(*) DESC;
74+
----
75+
76+
query II rowsort
77+
SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" ORDER BY u DESC LIMIT 10;
78+
----
79+
197 1
80+
229 1
81+
39 1
82+
839 2
83+
84+
query IIIRI rowsort
85+
SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" ORDER BY c DESC LIMIT 10;
86+
----
87+
197 0 2 0 1
88+
229 0 1 0 1
89+
39 0 1 0 1
90+
839 0 6 0 2
91+
92+
query TI
93+
SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
94+
----
95+
96+
query ITI
97+
SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
98+
----
99+
100+
query TI
101+
SELECT "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
102+
----
103+
104+
query TI
105+
SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10;
106+
----
107+
108+
query ITI
109+
SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC LIMIT 10;
110+
----
111+
112+
query II rowsort
113+
SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY COUNT(*) DESC LIMIT 10;
114+
----
115+
-2461439046089301801 5
116+
376160620089546609 1
117+
427738049800818189 1
118+
519640690937130534 2
119+
7418527520126366595 1
120+
121+
query ITI rowsort
122+
SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10;
123+
----
124+
-2461439046089301801 (empty) 5
125+
376160620089546609 (empty) 1
126+
427738049800818189 (empty) 1
127+
519640690937130534 (empty) 2
128+
7418527520126366595 (empty) 1
129+
130+
query ITI rowsort
131+
SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" LIMIT 10;
132+
----
133+
-2461439046089301801 (empty) 5
134+
376160620089546609 (empty) 1
135+
427738049800818189 (empty) 1
136+
519640690937130534 (empty) 2
137+
7418527520126366595 (empty) 1
138+
139+
query IRTI rowsort
140+
SELECT "UserID", extract(minute FROM to_timestamp_seconds("EventTime")) AS m, "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10;
141+
----
142+
-2461439046089301801 18 (empty) 1
143+
-2461439046089301801 33 (empty) 1
144+
-2461439046089301801 38 (empty) 1
145+
-2461439046089301801 56 (empty) 1
146+
-2461439046089301801 58 (empty) 1
147+
376160620089546609 30 (empty) 1
148+
427738049800818189 40 (empty) 1
149+
519640690937130534 26 (empty) 1
150+
519640690937130534 36 (empty) 1
151+
7418527520126366595 18 (empty) 1
152+
153+
query I
154+
SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449;
155+
----
156+
157+
query I
158+
SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%';
159+
----
160+
0
161+
162+
query TTI
163+
SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
164+
----
165+
166+
query TTTII
167+
SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
168+
----
169+
170+
query IITIIIIIIIIIITTIIIIIIIIIITIIITIIIITTIIITIIIIIIIIIITIIIIITIIIIIITIIIIIIIIIITTTTIIIIIIIITITTITTTTTTTTTTIIII
171+
SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY to_timestamp_seconds("EventTime") LIMIT 10;
172+
----
173+
174+
query T
175+
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY to_timestamp_seconds("EventTime") LIMIT 10;
176+
----
177+
178+
query T
179+
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10;
180+
----
181+
182+
query T
183+
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY to_timestamp_seconds("EventTime"), "SearchPhrase" LIMIT 10;
184+
----
185+
186+
query IRI
187+
SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
188+
----
189+
190+
query TRIT
191+
SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
192+
----
193+
194+
query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
195+
SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" + 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("ResolutionWidth" + 17), SUM("ResolutionWidth" + 18), SUM("ResolutionWidth" + 19), SUM("ResolutionWidth" + 20), SUM("ResolutionWidth" + 21), SUM("ResolutionWidth" + 22), SUM("ResolutionWidth" + 23), SUM("ResolutionWidth" + 24), SUM("ResolutionWidth" + 25), SUM("ResolutionWidth" + 26), SUM("ResolutionWidth" + 27), SUM("ResolutionWidth" + 28), SUM("ResolutionWidth" + 29), SUM("ResolutionWidth" + 30), SUM("ResolutionWidth" + 31), SUM("ResolutionWidth" + 32), SUM("ResolutionWidth" + 33), SUM("ResolutionWidth" + 34), SUM("ResolutionWidth" + 35), SUM("ResolutionWidth" + 36), SUM("ResolutionWidth" + 37), SUM("ResolutionWidth" + 38), SUM("ResolutionWidth" + 39), SUM("ResolutionWidth" + 40), SUM("ResolutionWidth" + 41), SUM("ResolutionWidth" + 42), SUM("ResolutionWidth" + 43), SUM("ResolutionWidth" + 44), SUM("ResolutionWidth" + 45), SUM("ResolutionWidth" + 46), SUM("ResolutionWidth" + 47), SUM("ResolutionWidth" + 48), SUM("ResolutionWidth" + 49), SUM("ResolutionWidth" + 50), SUM("ResolutionWidth" + 51), SUM("ResolutionWidth" + 52), SUM("ResolutionWidth" + 53), SUM("ResolutionWidth" + 54), SUM("ResolutionWidth" + 55), SUM("ResolutionWidth" + 56), SUM("ResolutionWidth" + 57), SUM("ResolutionWidth" + 58), SUM("ResolutionWidth" + 59), SUM("ResolutionWidth" + 60), SUM("ResolutionWidth" + 61), SUM("ResolutionWidth" + 62), SUM("ResolutionWidth" + 63), SUM("ResolutionWidth" + 64), SUM("ResolutionWidth" + 65), SUM("ResolutionWidth" + 66), SUM("ResolutionWidth" + 67), SUM("ResolutionWidth" + 68), SUM("ResolutionWidth" + 69), SUM("ResolutionWidth" + 70), SUM("ResolutionWidth" + 71), SUM("ResolutionWidth" + 72), SUM("ResolutionWidth" + 73), SUM("ResolutionWidth" + 74), SUM("ResolutionWidth" + 75), SUM("ResolutionWidth" + 76), SUM("ResolutionWidth" + 77), SUM("ResolutionWidth" + 78), SUM("ResolutionWidth" + 79), SUM("ResolutionWidth" + 80), SUM("ResolutionWidth" + 81), SUM("ResolutionWidth" + 82), SUM("ResolutionWidth" + 83), SUM("ResolutionWidth" + 84), SUM("ResolutionWidth" + 85), SUM("ResolutionWidth" + 86), SUM("ResolutionWidth" + 87), SUM("ResolutionWidth" + 88), SUM("ResolutionWidth" + 89) FROM hits;
196+
----
197+
0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 310 320 330 340 350 360 370 380 390 400 410 420 430 440 450 460 470 480 490 500 510 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660 670 680 690 700 710 720 730 740 750 760 770 780 790 800 810 820 830 840 850 860 870 880 890
198+
199+
query IIIIR
200+
SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10;
201+
----
202+
203+
query IIIIR
204+
SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10;
205+
----
206+
207+
query IIIIR rowsort
208+
SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10;
209+
----
210+
4894690465724379622 1568366281 1 0 0
211+
5206346422301499756 -1216690514 1 0 0
212+
6308646140879811077 -1216690514 1 0 0
213+
6635790769678439148 1427531677 1 0 0
214+
6864353419233967042 1568366281 1 0 0
215+
8120543446287442873 -1216690514 1 0 0
216+
8156744413230856864 -1216690514 1 0 0
217+
8740403056911509777 1615432634 1 0 0
218+
8924809397503602651 -1216690514 1 0 0
219+
9110818468285196899 -1216690514 1 0 0
220+
221+
query TI rowsort
222+
SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10;
223+
----
224+
(empty) 5
225+
http://afisha.mail.ru/catalog/314/women.ru/ency=1&page3/?errovat-pinniki 1
226+
http://bonprix.ru/index.ru/cinema/art/0 986 424 233 сезон 1
227+
http://bonprix.ru/index.ru/cinema/art/A00387,3797); ru)&bL 1
228+
http://holodilnik.ru/russia/05jul2013&model=0 1
229+
http://tours/Ekategoriya%2F&sr=http://slovareniye 1
230+
231+
query ITI rowsort
232+
SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC LIMIT 10;
233+
----
234+
1 (empty) 5
235+
1 http://afisha.mail.ru/catalog/314/women.ru/ency=1&page3/?errovat-pinniki 1
236+
1 http://bonprix.ru/index.ru/cinema/art/0 986 424 233 сезон 1
237+
1 http://bonprix.ru/index.ru/cinema/art/A00387,3797); ru)&bL 1
238+
1 http://holodilnik.ru/russia/05jul2013&model=0 1
239+
1 http://tours/Ekategoriya%2F&sr=http://slovareniye 1
240+
241+
query IIIII rowsort
242+
SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 ORDER BY c DESC LIMIT 10;
243+
----
244+
-1216690514 -1216690515 -1216690516 -1216690517 6
245+
1427531677 1427531676 1427531675 1427531674 1
246+
1568366281 1568366280 1568366279 1568366278 2
247+
1615432634 1615432633 1615432632 1615432631 1
248+
249+
query TI
250+
SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10;
251+
----
252+
253+
query TI
254+
SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY PageViews DESC LIMIT 10;
255+
----
256+
257+
query TI
258+
SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
259+
----
260+
261+
query IIITTI
262+
SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
263+
----
264+
265+
query IDI
266+
SELECT "URLHash", "EventDate"::INT::DATE, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate"::INT::DATE ORDER BY PageViews DESC LIMIT 10 OFFSET 100;
267+
----
268+
269+
query III
270+
SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
271+
----
272+
273+
query PI
274+
SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-14' AND "EventDate"::INT::DATE <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) ORDER BY DATE_TRUNC('minute', M) LIMIT 10 OFFSET 1000;
275+
----

0 commit comments

Comments
 (0)