-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsql_data_to_csv.py
107 lines (83 loc) · 3.71 KB
/
sql_data_to_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import sqlite3
import scipy.sparse as sp
import numpy as np
import csv
from preview_routes import normalizeDifficulty, plot_holds
import pandas as pd
import matplotlib.pyplot as plt
import h5py
def scrape_routes(filepath: str='csv/routes.csv', ascensionist_filter: int=50, quality_filter: float=2.0, is_listed: int=1, layout_id: int=1):
# Connect to the SQLite database
conn = sqlite3.connect('db/kilter.db')
# SQL Query for uuid, route name, path, angle, completions, avg diff., avg quality
sql_query = f"""
SELECT c.uuid, c.name, c.frames, cs.angle, cs.ascensionist_count, cs.difficulty_average, cs.quality_average
FROM climbs c
JOIN climb_stats cs ON c.uuid = cs.climb_uuid
WHERE c.is_listed = {is_listed}
AND c.layout_id = {layout_id};
"""
df = pd.read_sql_query(sql_query, conn)
conn.close()
# Removing duplicates
df = df.drop_duplicates(subset='uuid', keep='first')
# Filter climbs with "ascensionist_count" over 50 and "quality_average" over 2.0
df_filtered = df[(df['ascensionist_count'] > ascensionist_filter) & (df['quality_average'] > quality_filter)]
# Sort the filtered DataFrame by "difficulty_average"
df_filtered_sorted = df_filtered.sort_values(by='difficulty_average')
df_filtered_sorted.to_csv(filepath, index=False)
def scrape_holds(filepath: str='csv/holds.csv'):
# Connect to the SQLite database
conn = sqlite3.connect('db/kilter.db')
# SQL Query for uuid, route name, path, angle, completions, avg diff., avg quality
sql_query = """SELECT p.id, h.x, h.y
FROM placements p
JOIN holes h ON p.hole_id = h.id
"""
df = pd.read_sql_query(sql_query, conn)
conn.close()
df.to_csv(filepath, index=False)
def create_hdf5(filepath: str='csv/routes.csv', ascensionist_filter: int=20, quality_filter: float=1.5):
# Connect to the SQLite database
conn = sqlite3.connect('db/kilter.db')
# SQL Query for uuid, route name, path, angle, completions, avg diff., avg quality
sql_query = f"""
SELECT climbs.frames, climb_stats.angle, climb_stats.difficulty_average
FROM climb_stats
INNER JOIN climbs ON climb_stats.climb_uuid = climbs.uuid
WHERE climbs.is_listed = 1
AND climbs.layout_id = 1
AND climb_stats.ascensionist_count > {ascensionist_filter}
AND climb_stats.quality_average > {quality_filter}
"""
df_routes = pd.read_sql_query(sql_query, conn)
conn.close()
routes = df_routes.iterrows()
matrices = []
labels = []
with open('csv/holds.csv', mode='r') as holds_csv:
# index, route = next(routes)
reader = csv.reader(holds_csv)
holddict = dict((rows[0],[rows[1],rows[2]]) for rows in reader)
for index, route in routes:
mat = sp.lil_matrix((168, 168))
r_angle = route['angle']
r_label = normalizeDifficulty(round(route['difficulty_average']))
r_frames = route['frames']
for frame in r_frames.split("p")[1:]:
hold_id = (frame[0:4])
x = int(holddict[hold_id][0])# // 4
y = int(holddict[hold_id][1])# // 4
mat[x, y] = 1
matrices.append(mat.toarray())
labels.append(r_label)
# print(mat, r_label)
# img = plt.imread("assets/kilterbg.jpg")
# plot_holds(route['frames'], 'csv/holds.csv')
# plt.title(f'V{r_label}, angle: {r_angle}')
# plt.imshow(np.flipud(img), origin='lower', extent=[0, 143.625, 0, 158])
# plt.show()
hf = h5py.File('data.h5', 'w')
hf.create_dataset('routes', data=matrices, dtype='int8')
hf.create_dataset('labels', data=labels, dtype='int8')
hf.close