-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathproject_test.py
104 lines (76 loc) · 3.63 KB
/
project_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 8 12:06:50 2021
@author: inessarana
"""
# MACHINE LEARNING PROJECT
# PART 1
import numpy as np
import matplotlib.pyplot as plt
x_train1 = np.load('/Users/inessarana/Documents/Faculdade/5º ano /1º Semestre/ML/Project/Xtrain_Regression_Part1.npy')
x_train2 = np.load('/Users/inessarana/Documents/Faculdade/5º ano /1º Semestre/ML/Project/Xtrain_Regression_Part2.npy')
y_train1 = np.load('/Users/inessarana/Documents/Faculdade/5º ano /1º Semestre/ML/Project/Ytrain_Regression_Part1.npy')
y_train2 = np.load('/Users/inessarana/Documents/Faculdade/5º ano /1º Semestre/ML/Project/Ytrain_Regression_Part2.npy')
x_test1 = np.load('/Users/inessarana/Documents/Faculdade/5º ano /1º Semestre/ML/Project/Xtest_Regression_Part1.npy')
x_test2 = np.load('/Users/inessarana/Documents/Faculdade/5º ano /1º Semestre/ML/Project/Xtest_Regression_Part2.npy')
# PREDICTOR 1: LINEAR REGRESSION
# calculate parameters using normal equations (in lr now)
def lr_par(xt, yt):
# given train sets xt and outcomes yt, determine beta parameters for predictor
# design matrix
c = len(xt)
X = np.append(np.ones((c,1)),xt,axis=1)
Xtrans = np.transpose(X)
# use normal equation to determine beta parameters
beta = np.matmul(np.matmul(np.linalg.inv(np.matmul(Xtrans, X)),Xtrans),yt)
return beta
def lr(beta,xt):
# using the test set xt and the determined beta parameters, predict y
c = len(xt)
# use beta parameters to determine y using x testing set
X = np.append(np.ones((c,1)),xt,axis=1)
return np.matmul(X,beta)
def lrpredictor(xt,yt,x_test): # predicts y based on training with xt and yt
y_test=lr(lr_par(xt,yt),x_test)
return y_test
def sse(y,yt):
# calculate the squared erros using the training set yt when compared to a predicted set in y
# yt: training set
# y: test/ predicted set
return np.array((yt-y)**2).sum()
# CROSS VALIDATION
def cross_val(xt,yt,k):
# train the data set using k data sets obtained by dividing the training
# set into k sets each with a section excluded to use as a test set. This
# is used to evaluate the performance of the model usingthe available data set.
# xt: training set
# yt: test set
# k: number of folds
c = len(xt) #length of training set
if (c%k)!=0:
print("Cannot compute. Choose a divider of "+str(c))
return
elif k==1:
print("Cannot perform 1-fold classification since there is no test set.")
return
else:
fold = c//k
f=len(xt[0])
# create training sets with missing test element
x_train = np.empty((k,c-fold,f)) #each element of list is a training set with 1 section excluded
y_train = np.empty((k,c-fold))
for i in range(k):
x_train[i,:,:] = [item for item in xt if np.where(xt == item)[0][0] not in range(i*fold,i*fold+fold)]
y_train[i,:] = [item for item in yt if np.where(yt == item)[0][0] not in range(i*fold,i*fold+fold)]
# using the predictor, generate the outcomes using the k different sets determined agove
y_test = np.empty((k,c-fold))
for i in range(k):
y_test[i,:] = lrpredictor(xt,yt,x_train[i,:,:]) #outcomes predicted using linear regression model
# compute errors for each set
errors = np.empty(k)
for i in range(k):
errors[i] = sse(y_train[i,:],y_test[i,:])
print("The mean SSE for "+str(k)+"-folds is "+str(np.mean(errors)))
return errors
cross_val(x_train1,y_train1,5)