-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprep-queries.py
84 lines (78 loc) · 2.87 KB
/
prep-queries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import psycopg2
from datetime import datetime
conn = psycopg2.connect(dbname='test_data', user='pauloarantes', host='/tmp')
c = conn.cursor()
c.execute(
'''
WITH cities AS(
SELECT data->'properties'->>'user id' as user_id,
MAX(data->'properties'->>'time') as max_time
FROM mp_began_session
WHERE data->'properties'->>'$city' IS NOT NULL
GROUP BY data->'properties'->>'user id'
)
SELECT CAST(mp.data->'properties'->>'user id' AS int) as user_id,
mp.data->'properties'->>'$city' as city
INTO user_city
FROM mp_began_session mp
JOIN user_cities_time c
ON c.user_id=mp.data->'properties'->>'user id'
AND c.max_time=mp.data->'properties'->>'time';
'''
)
c.execute(
'''
WITH genders AS(
SELECT data->'properties'->>'user id' as user_id,
MAX(data->'properties'->>'time') as max_time
FROM mp_began_session
WHERE data->'properties'->>'gender' IS NOT NULL
GROUP BY data->'properties'->>'user id'
)
SELECT CAST(mp.data->'properties'->>'user id' AS int) as user_id,
mp.data->'properties'->>'gender' as gender
INTO user_gender
FROM mp_began_session mp
JOIN genders
ON genders.user_id=mp.data->'properties'->>'user id'
AND genders.max_time=mp.data->'properties'->>'time';
'''
)
c.execute(
'''
WITH user_types AS(
SELECT data->'properties'->>'user id' as user_id,
MAX(data->'properties'->>'time') as max_time
FROM mp_began_session
WHERE data->'properties'->>'User Type' IS NOT NULL
GROUP BY data->'properties'->>'user id'
)
SELECT CAST(mp.data->'properties'->>'user id' AS int) as user_id,
mp.data->'properties'->>'User Type' as user_type
INTO user_type
FROM mp_began_session mp
JOIN user_types
ON user_types.user_id=mp.data->'properties'->>'user id'
AND user_types.max_time=mp.data->'properties'->>'time';
'''
)
c.execute(
'''
WITH oss AS(
SELECT data->'properties'->>'user id' as user_id,
MAX(data->'properties'->>'time') as max_time
FROM mp_began_session
WHERE data->'properties'->>'$os' IS NOT NULL
GROUP BY data->'properties'->>'user id'
)
SELECT CAST(mp.data->'properties'->>'user id' AS int) as user_id,
mp.data->'properties'->>'$os' as os
INTO user_os
FROM mp_began_session mp
JOIN oss
ON oss.user_id=mp.data->'properties'->>'user id'
AND oss.max_time=mp.data->'properties'->>'time';
'''
)
conn.commit()
conn.close()