forked from max-webster/get-started-impala
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbillion_row_setup.sql
51 lines (41 loc) · 1.52 KB
/
billion_row_setup.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
-- impala-shell -i localhost
CREATE DATABASE IF NOT EXISTS oreilly;
USE oreilly;
create table if not exists sample_data
(id bigint, val int, zerofill string, name string,
assertion boolean, city string, state string)
row format delimited fields terminated by ",";
desc sample_data;
/* Expected output:
+-----------+---------+---------+
| name | type | comment |
+-----------+---------+---------+
| id | bigint | |
| val | int | |
| zerofill | string | |
| name | string | |
| assertion | boolean | |
| city | string | |
| state | string | |
+-----------+---------+---------+
*/
describe formatted sample_data;
/* Expected output (except that LOCATION will differ for each different system).
...
| # Detailed Table Information | NULL
| Database: | oreilly
| Owner: | jrussell
| CreateTime: | Fri Jul 18 16:25:06 PDT 2014
| LastAccessTime: | UNKNOWN
| Protect Mode: | None
| Retention: | 0
| Location: | hdfs://a1730.abcde.example.com:8020 1
| | /user/impala/warehouse/oreilly.db/
| | sample_data
| Table Type: | MANAGED_TABLE
...
*/
/* TK: script this in a reliable way, to work regardless of HDFS layout. */
!hdfs dfs -put billion_rows.csv '/user/impala/warehouse/oreilly.db/sample_data';
refresh sample_data;
select count(*) from sample_data;