-
Notifications
You must be signed in to change notification settings - Fork 2
/
data.py
37 lines (25 loc) · 788 Bytes
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# Author: Mathieu Blondel, 2019
# License: BSD
import numpy as np
def load_label_ranking_data(fn):
f = open(fn)
next(f) # skip first line
X = []
Y = []
for line in f:
arr = line.strip().split(",")
features = np.array(arr[:-1], dtype=float)
X.append(features)
# Labels have the form b > c > a.
# We encoded it as y = [1 2 0].
# Therefore, y[rank] = label.
ranking = arr[-1].split(">")
y = np.zeros(len(ranking))
for i, letter in enumerate(ranking):
label = ord(letter) - ord("a")
y[i] = label
Y.append(y)
return np.array(X), np.array(Y, dtype=int)
if __name__ == '__main__':
X, Y = load_label_ranking_data("data/ranking/iris.txt")
print(Y)