-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcross_validation_ridge_super.py
64 lines (53 loc) · 2.51 KB
/
cross_validation_ridge_super.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import numpy as np
import matplotlib.pyplot as plt
from regression_tools import *
from preprocessing import *
def cross_validation_ridge_super(y, phi, k_indices, k, lambda_, degree, not_poly_features):
"""
Return the proportion of correct classifications of ridge/linear regression in a step of k-fold cross-validation.
"""
# Get k'th subgroup in test, others in train
train_indices = np.delete(k_indices , k , 0).reshape((k_indices.shape[0]-1) * k_indices.shape[1])
x_test = phi[k_indices[k],:]
x_train = phi[train_indices,:]
y_test = y[k_indices[k]]
y_train = y[train_indices]
# Form data with polynomial degree
tx_train = build_polinomial(x_train, degree, not_poly_features)
tx_test = build_polinomial(x_test, degree, not_poly_features)
# Ridge regression / Linear regression
if lambda_!=0:
w, loss = ridge_regression(y_train, tx_train, lambda_)
else:
w, loss = least_squares(y_train,tx_train)
# Calculate proportion of correct classification for given lambda and degree
result=(y_test==(tx_test.dot(w)>0.5)).sum()/y_test.shape[0]
return result
def cross_validation_super_demo(y_train,x_train,degrees,k_fold,lambdas,seed):
"""
Performs cross-validation with ridge regression.
Returns a matrix which stores the proportion of correct classifications where:
rows: lambda
columns: degree of polynomial of the features.
"""
# Split data in k fold
k_indices = build_k_indices(y_train, k_fold, seed)
# Clean data
x_train_cleaned,nmc_tr=cleaning_function(x_train,-999)
# Cross validation steps
cost_te=np.zeros((lambdas.size,degrees.size))
for ind_lamb,lambda_ in enumerate(lambdas):
print(lambda_)
if lambda_!=0:
x_train_agm,super_col=super_features_augmentation(x_train_cleaned,y_train,lambda_,not_super_features=nmc_tr+1,is_train=True,augmentation=False)
super_col_nb=len(super_col)
x_train_agm,noaf=features_augmentation(x_train_agm,not_augm_features=nmc_tr+1)
x_train_agm=norm_data(x_train_agm,not_norm_features=nmc_tr+1)
for ind_deg, degree_ in enumerate(degrees):
loss_te = np.zeros(k_fold)
for k in range (k_fold):
result = cross_validation_ridge_super(y_train, x_train_agm, k_indices, k , lambda_, degree_, nmc_tr+1+noaf+super_col_nb)
loss_te[k]= result
print('new deg')
cost_te[ind_lamb,ind_deg]=loss_te.mean()
return cost_te