-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathelasticnet_test.seq
68 lines (64 loc) · 2.35 KB
/
elasticnet_test.seq
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import python
import math
import sys
pydef testing_pred(adult_age, directory, id_dict)->list[float]:
import numpy as np
import os
def predict(X, coef_, intercept_):
y = np.dot(X, coef_)
y += intercept_*np.ones(len(y))
return y
indices = []
with open(os.path.join(directory, "important_sk_variables.txt")) as vars_file:
for line in vars_file:
variable = line.rstrip().split(',')[0]
indices.append(id_dict[variable])
X_valid = np.genfromtxt('data/training/GSE42700_female.csv', delimiter=',')
#USE BELOW IF IMPORTANT betas and intercept are selected
#X_valid = X_valid[:,indices]
#Standard scaling
X_valid = (X_valid - np.mean(X_valid, axis=0)) / np.std(X_valid, axis=0)
#USE important betas and intercept if using above indices
beta = np.load(os.path.join(directory, "enet_sk_betas.npy"))
intercept = np.load(os.path.join(directory, "enet_sk_intercept.npy"))
pred_valid = predict(X_valid, beta, intercept)
return pred_valid.tolist()
pydef testing_metrics(y_list, y_hat):
import numpy as np
def median_absolute_difference(y, y_hat):
return np.median(np.abs(y-y_hat))
def mean_squared_error(y, y_hat):
delta = y_hat - y
return delta.dot(delta)/len(y)
y = np.array(y_list, dtype=np.float)
y_h = np.array(y_hat, dtype=np.float)
mad = median_absolute_difference(y, y_h)
print(f"MAD: {mad}")
mse = mean_squared_error(y, y_h)
print(f"MSE: {mse}")
adult_age = 20 #Humans
def inverseF(age):
if age < 0:
return (1. + adult_age)*math.exp(age) - 1.
else:
return (1. + adult_age)*age + adult_age
directory = sys.argv[1]
y_list = list[int]()
with open("data/training/GSE42700_female.labels") as lbl_file:
for line in lbl_file:
age, Fage = line.rstrip().split(',')
y_list.append(int(age))
id_dict = dict[str, int]()
with open("data/training/methylation_ids.txt") as ids_file:
for line in ids_file:
split_line = line.rstrip().split(',')
id_val = int(split_line[1])
cpg_id = split_line[0]
id_dict[cpg_id] = id_val
pred_valid = testing_pred(adult_age, directory, id_dict)
y_hat = list[float]()
for i in range(len(pred_valid)):
tmp = inverseF(pred_valid[i])
y_hat.append(tmp)
print(f"{tmp} - {y_list[i]}")
testing_metrics(y_list, y_hat)