-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmodel.py
167 lines (143 loc) · 5.21 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import numpy as np
import pickle
import os
def model_pred(mod, X):
"""
Model prediction (for XGBClassifier and keras MLP)
"""
if "XGB" in str(type(mod)):
pred = mod.predict_proba(X)[:,1]
elif "GradientBoostingClassifier" in str(type(mod)):
pred = mod.predict_proba(X)[:,1]
elif "tensorflow" in str(type(mod)):
pred = mod.predict(X)[:,0]*.999
return(pred)
def models_pred(models, X):
"""
Params
======
models : List of models (Currently accepts XGBClassifier
and Keras MLP)
X : X data to make predictions on
Returns
=======
pred_arr : Returns predictions with shape (# samples in X, # models)
"""
pred_arr = np.vstack([model_pred(mod, X) for mod in models]).T
return(pred_arr)
#####################################
### Gradient boosting trees (XGB) ###
#####################################
from xgboost import XGBClassifier
def fit_xgb_trval(X_train, y_train, X_valid=None, y_valid=None):
"""
Fit xgboost model for available data. Depending on if validation
data is provided, use an evaluation set.
"""
xgb_model = XGBClassifier()
if not X_valid is None and not y_valid is None:
xgb_model.fit(X_train, y_train, eval_set=[(X_valid, y_valid)],
early_stopping_rounds=10, verbose=False)
else:
xgb_model.fit(X_train, y_train, verbose=False)
return(xgb_model)
def fit_xgb(X_train, y_train, X_valid=None, y_valid=None, mname=None):
"""
Params
======
X_train : Train X data
y_train : Train y data
X_valid : Validation X data
y_valid : Validation y data
mname : The name of the model (for saving in models/ dir)
Returns
=======
xgb_model : Return the trained model
"""
# Don't save or try to load if no model name
if mname is None:
xgb_model = fit_xgb_trval(X_train, y_train, X_valid, y_valid)
return(xgb_model)
# Train or load model depending on if it exists
mpath = "models/"+mname
if not os.path.exists(mpath):
xgb_model = fit_xgb_trval(X_train, y_train, X_valid, y_valid)
pickle.dump(xgb_model, open(mpath,"wb"))
else:
xgb_model = pickle.load(open(mpath,"rb"))
return(xgb_model)
#########################################
### Gradient boosting trees (sklearn) ###
#########################################
from sklearn.ensemble import GradientBoostingClassifier
def fit_gbc(X_train, y_train, X_valid=None, y_valid=None, mname=None):
"""
Params
======
X_train : Train X data
y_train : Train y data
X_valid : Validation X data
y_valid : Validation y data
mname : The name of the model (for saving in models/ dir)
Returns
=======
xgb_model : Return the trained model
"""
# Don't save or try to load if no model name
if mname is None:
gbc_model = GradientBoostingClassifier()
gbc_model.fit(X_train, y_train)
return(gbc_model)
# Train or load model depending on if it exists
mpath = "models/"+mname
if not os.path.exists(mpath):
gbc_model = GradientBoostingClassifier()
gbc_model.fit(X_train, y_train)
pickle.dump(gbc_model, open(mpath,"wb"))
else:
gbc_model = pickle.load(open(mpath,"rb"))
return(gbc_model)
#####################################
### Multi-layer perceptrons (MLP) ###
#####################################
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import Input
def fit_mlp(X_train, y_train, X_valid=None, y_valid=None, mname=None, eta=0.001):
"""
Params
======
X_train : Train X data
y_train : Train y data
X_valid : Validation X data
y_valid : Validation y data
mname : The name of the model (for saving in models/ dir)
Returns
=======
mlp_model : Return the trained model
"""
# Don't save or try to load if no model name
if mname is None:
mlp_model = Sequential()
mlp_model.add(Dense(100, activation='relu', input_shape=(X_train.shape[1],)))
mlp_model.add(Dense(100, activation='relu'))
mlp_model.add(Dense(1, activation='sigmoid'))
mlp_model.compile(loss='binary_crossentropy',optimizer=SGD(learning_rate=eta))
# mlp_model.compile(loss='binary_crossentropy',optimizer=Adam(learning_rate=0.01))
mlp_model.fit(X_train,y_train,epochs=10,verbose=False)
return(mlp_model)
# Train or load model depending on if it exists
mpath = "models/" + mname
if not os.path.exists(mpath):
mlp_model = Sequential()
mlp_model.add(Dense(100, activation='relu', input_shape=(X_train.shape[1],)))
mlp_model.add(Dense(100, activation='relu'))
mlp_model.add(Dense(1, activation='sigmoid'))
mlp_model.compile(loss='binary_crossentropy',optimizer=SGD(learning_rate=eta))
# mlp_model.compile(loss='binary_crossentropy',optimizer=Adam(learning_rate=0.01))
mlp_model.fit(X_train,y_train,epochs=10,verbose=False)
mlp_model.save(mpath)
else:
mlp_model = load_model(mpath)
return(mlp_model)