-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgpr_emsemble.py
101 lines (87 loc) · 3.77 KB
/
gpr_emsemble.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import numpy as np
import copy
from ML_Coupling.gpr_frame import GPR
import matplotlib.pyplot as plt
# load mo_pairs
homo_pairs = np.load('./data/homo_homo_pair.npy')
lumo_pairs = np.load('./data/lumo_lumo_pair.npy')
# load coupling
raw_data = np.loadtxt('./data/results.csv', delimiter=',',comments='#')
c_homo = abs(raw_data[:,3])
c_lumo = abs(raw_data[:,4])
# remove zero values from coupling
izero = []
for ii,i in enumerate(c_homo):
if i<=0.0000:
izero.append(ii)
c_homo[ii] = 1e-9
izero = []
for ii,i in enumerate(c_lumo):
if i<=0.0000:
izero.append(ii)
c_lumo[ii] = 1e-8
# build data set and preprocess data
train_homo_pairs = homo_pairs
train_lumo_pairs = lumo_pairs
train_c_homo = -np.log(c_homo)
train_c_lumo = -np.log(c_lumo)
# start gpr training
n_ensemble = 8
MPEs = []
MAPEs = []
for i in range(7, n_ensemble):
# randomly pick data for training
train_homo = copy.deepcopy(train_homo_pairs)
train_chomo = copy.deepcopy(train_c_homo)
index = np.random.choice(len(train_c_homo), size=int(len(train_c_homo)* 0.1 * i), replace=False)
train_homo_ = np.delete(train_homo,index,0)
train_chomo_ = np.delete(train_chomo,index,0)
###
train_lumo = copy.deepcopy(train_lumo_pairs)
train_clumo = copy.deepcopy(train_c_lumo)
index = np.random.choice(len(train_c_lumo), size=int(len(train_c_lumo)* 0.1 * i), replace=False)
train_lumo_ = np.delete(train_lumo,index,0)
train_clumo_ = np.delete(train_clumo,index,0)
# randomly pick data for testing
iall = np.arange(len(train_c_homo))
idiff = np.setdiff1d(iall,index)
test_homo = np.delete(copy.deepcopy(train_homo_pairs),idiff,0)
test_chomo = np.delete(copy.deepcopy(train_chomo),idiff,0)
gpr = GPR(setting_dict={'alpha':0.02, 'kernel':'RBF', 'optimizer':'fmin_l_bfgs_b', 'length_scale':0.1, 'length_scale_bounds':(1e-2, 1e1)})
dshape = train_homo_.shape
new_train = train_homo_.reshape((dshape[0],dshape[1]*dshape[2]*dshape[3]))
gpr.train(new_train, train_chomo_)
# testing
pred1, std1 = gpr.predict(train_homo_pairs.reshape((len(train_homo_pairs),2*8*8)))
pred2, std2 = gpr.predict(train_homo_.reshape((len(train_homo_),2*8*8)))
pred3, std3 = gpr.predict(test_homo.reshape((len(test_homo),2*8*8)))
# MPE
error1 = np.mean(np.multiply(pred1-train_c_homo, np.power(train_c_homo,-1))*100)
error2 = np.mean(np.multiply(pred2-train_chomo_, np.power(train_chomo_,-1))*100)
error3 = np.mean(np.multiply(pred3-test_chomo, np.power(test_chomo,-1))*100)
MPEs.append((error1, error2, error3))
print('Ensemble %d:'%i)
print('MPE of full data set: %5.3f %% \nError of training set with %d samples: %5.3f %% \nError of testing set with %d samples: %5.3f %% '%(error1,len(train_homo_),error2,len(test_homo),error3))
# MAPE
error1 = np.mean(np.multiply(abs(pred1-train_c_homo), np.power(train_c_homo,-1))*100)
error2 = np.mean(np.multiply(abs(pred2-train_chomo_), np.power(train_chomo_,-1))*100)
error3 = np.mean(np.multiply(abs(pred3-test_chomo), np.power(test_chomo,-1))*100)
print('MAPE of full data set: %5.3f %% \nError of training set with %d samples: %5.3f %% \nError of testing set with %d samples: %5.3f %% '%(error1,len(train_homo_),error2,len(test_homo),error3))
print('\n')
MAPEs.append((error1, error2, error3))
# plotting
x = np.exp(-pred2) * 27.211
y1 = np.exp(-train_chomo_) * 27.211
y2 = train_c_lumo
x0 = [0,1.25]
y0 = [0,1.25]
fig, ax = plt.subplots()
ax.scatter(x,y1)
ax.plot(x0,y0, color='r')
ax.set_xlim(0,1.25)
ax.set_ylim(0,1.25)
ax.set_xlabel('GPR Electron Coupling (eV)')
ax.set_ylabel('CDFT Electron Coupling (eV)')
# ax.set_title('Error: %5.3f%%'%error)
ax.set_aspect('equal')
plt.savefig('gpr_%d.png'%len(train_homo_))