Skip to content

Commit

Permalink
Merges branch warm_start into master
Browse files Browse the repository at this point in the history
  • Loading branch information
ClimbsRocks committed Jul 9, 2017
2 parents 8f1a290 + e13c917 commit 60d3eb4
Show file tree
Hide file tree
Showing 7 changed files with 62 additions and 26 deletions.
6 changes: 3 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ python:

# Command to install dependencies
install:
- docker pull climbsrocks/auto_ml_tests:0.0.3
- docker pull climbsrocks/auto_ml_tests:0.0.5
- pip install coveralls

# Command to run tests
script: docker run -v $PWD:/tmp/working -w=/tmp/working --rm -it climbsrocks/auto_ml_tests:0.0.3 python setup.py nosetests
script: docker run -v $PWD:/tmp/working -w=/tmp/working --rm -it climbsrocks/auto_ml_tests:0.0.5 python setup.py nosetests

after_success:
- docker cp climbsrocks/auto_ml_tests:0.0.3:/.coverage
- docker cp climbsrocks/auto_ml_tests:0.0.5:/.coverage
- coveralls
4 changes: 4 additions & 0 deletions auto_ml/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1427,6 +1427,10 @@ def score_uncertainty(self, X, y, advanced_scoring=True, verbose=2):
return score


def transform_only(self, X):
return self.transformation_pipeline.transform(X)


def save(self, file_name='auto_ml_saved_pipeline.dill', verbose=True):

def save_one_step(pipeline_step, used_deep_learning):
Expand Down
9 changes: 9 additions & 0 deletions auto_ml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,12 @@ def score_uncertainty(self, X):
return self.steps[-1][-1].score_uncertainty(Xt)


@if_delegate_has_method(delegate='_final_estimator')
def transform_only(self, X):
Xt = X
for name, transform in self.steps[:-1]:
if transform is not None:
Xt = transform.transform(Xt)
return self.steps[-1][-1].transform_only(Xt)


46 changes: 36 additions & 10 deletions auto_ml/utils_model_training.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from collections import Iterable
from copy import deepcopy
import os
import random

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -88,6 +89,21 @@ def fit(self, X, y):
early_stopping = EarlyStopping(monitor='loss', patience=25, verbose=1)
self.model.fit(X_fit, y, callbacks=[early_stopping])

elif self.model_name[:4] == 'LGBM':

X_fit, X_test, y, y_test = train_test_split(X_fit, y, test_size=0.15)

if self.type_of_estimator == 'regressor':
eval_metric = 'rmse'
elif self.type_of_estimator == 'classifier':
if len(set(y_test)) > 2:
eval_metric = 'multi_logloss'
else:
eval_metric = 'binary_logloss'


self.model.fit(X_fit, y, eval_set=[(X_test, y_test)], early_stopping_rounds=50, eval_metric=eval_metric, eval_names=['random_holdout_set_from_training_data'])

elif self.model_name[:16] == 'GradientBoosting':
if scipy.sparse.issparse(X_fit):
X_fit = X_fit.todense()
Expand Down Expand Up @@ -121,7 +137,7 @@ def fit(self, X, y):
best_model = deepcopy(self.model)
else:
num_worse_rounds += 1

print('[' + str(num_iter) + '] random_holdout_set_from_training_data\'s score is: ' + str(round(val_loss, 3)))
if num_worse_rounds >= patience:
break
except KeyboardInterrupt:
Expand Down Expand Up @@ -299,7 +315,10 @@ def predict_proba(self, X, verbose=False):
X = X.todense()

try:
predictions = self.model.predict_proba(X)
if self.model_name[:4] == 'LGBM':
predictions = self.model.predict_proba(X, num_iteration=self.model.best_iteration)
else:
predictions = self.model.predict_proba(X)

except AttributeError as e:
try:
Expand Down Expand Up @@ -347,19 +366,22 @@ def predict(self, X, verbose=False):
else:
X_predict = X

prediction = self.model.predict(X_predict)
if self.model_name[:4] == 'LGBM':
predictions = self.model.predict(X_predict, num_iteration=self.model.best_iteration)
else:
predictions = self.model.predict(X_predict)
# Handle cases of getting a prediction for a single item.
# It makes a cleaner interface just to get just the single prediction back, rather than a list with the prediction hidden inside.

if isinstance(prediction, np.ndarray):
prediction = prediction.tolist()
if isinstance(prediction, float) or isinstance(prediction, int) or isinstance(prediction, str):
return prediction
if isinstance(predictions, np.ndarray):
predictions = predictions.tolist()
if isinstance(predictions, float) or isinstance(predictions, int) or isinstance(predictions, str):
return predictions

if len(prediction) == 1:
return prediction[0]
if len(predictions) == 1:
return predictions[0]
else:
return prediction
return predictions

# transform is initially designed to be used with feature_learning
def transform(self, X):
Expand All @@ -369,6 +391,10 @@ def transform(self, X):
X = scipy.sparse.hstack([X, predicted_features], format='csr')
return X

# Allows the user to get the fully transformed data
def transform_only(self, X):
return X

def predict_uncertainty(self, X):
if self.uncertainty_model is None:
print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
Expand Down
8 changes: 4 additions & 4 deletions auto_ml/utils_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ def get_model_from_name(model_name, training_params=None):
'AdaBoostRegressor': {'n_estimators': 10},
'XGBRegressor': {'nthread':-1, 'n_estimators': 200},
'XGBClassifier': {'nthread':-1, 'n_estimators': 200},
'LGBMRegressor': {},
'LGBMClassifier': {},
'LGBMRegressor': {'n_estimators': 2000, 'learning_rate': 0.05, 'num_leaves': 8, 'lambda_l2': 0.001},
'LGBMClassifier': {'n_estimators': 2000, 'learning_rate': 0.05, 'num_leaves': 8, 'lambda_l2': 0.001},
'DeepLearningRegressor': {'epochs': epochs, 'batch_size': 50, 'verbose': 2},
'DeepLearningClassifier': {'epochs': epochs, 'batch_size': 50, 'verbose': 2}
}
Expand Down Expand Up @@ -454,7 +454,7 @@ def get_search_params(model_name):

, 'LGBMClassifier': {
# 'max_bin': [25, 50, 100, 200, 250, 300, 400, 500, 750, 1000]
'num_leaves': [10, 20, 30, 40, 50, 200]
'num_leaves': [2, 4, 7, 10, 15, 20, 25, 30, 35, 40, 50, 125, 200]
, 'colsample_bytree': [0.7, 0.9, 1.0]
, 'subsample': [0.7, 0.9, 1.0]
# , 'subsample_freq': [0.3, 0.5, 0.7, 0.9, 1.0]
Expand All @@ -466,7 +466,7 @@ def get_search_params(model_name):

, 'LGBMRegressor': {
# 'max_bin': [25, 50, 100, 200, 250, 300, 400, 500, 750, 1000]
'num_leaves': [10, 20, 30, 40, 50, 200]
'num_leaves': [2, 4, 7, 10, 15, 20, 25, 30, 35, 40, 50, 125, 200]
, 'colsample_bytree': [0.7, 0.9, 1.0]
, 'subsample': [0.7, 0.9, 1.0]
# , 'subsample_freq': [0.3, 0.5, 0.7, 0.9, 1.0]
Expand Down
11 changes: 3 additions & 8 deletions tests/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,6 @@ RUN sed -i "s/httpredir.debian.org/debian.uchicago.edu/" /etc/apt/sources.list &
cd /usr/local/src && \
pip install tensorflow && \

# Microsoft's LightGBM
cd /usr/local/src && git clone --recursive --depth 1 https://github.com/Microsoft/LightGBM && \
cd LightGBM && mkdir build && cd build && cmake .. && make -j $(nproc) && \
cd /usr/local/src/LightGBM/python-package && python setup.py install && \

# XGBoost
cd /usr/local/src && mkdir xgboost && cd xgboost && \
git clone --depth 1 --recursive https://github.com/dmlc/xgboost.git && cd xgboost && \
Expand All @@ -21,9 +16,9 @@ RUN sed -i "s/httpredir.debian.org/debian.uchicago.edu/" /etc/apt/sources.list &
apt-get autoremove -y && apt-get clean

RUN pip install --upgrade pip
RUN pip install --upgrade numpy dill h5py scikit-learn scipy python-dateutil pandas pathos keras coveralls nose
RUN pip install --upgrade numpy dill h5py scikit-learn scipy python-dateutil pandas pathos keras coveralls nose lightgbm tabulate imblearn

# To update this image and upload it:
# docker build -t testdocker .
# docker tag testdocker climbsrocks/auto_ml_tests:0.0.4
# docker push climbsrocks/auto_ml_tests:0.0.4
# docker tag testdocker climbsrocks/auto_ml_tests:0.0.6
# docker push climbsrocks/auto_ml_tests:0.0.6
4 changes: 3 additions & 1 deletion tests/classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def optimize_final_model_classification(model_name=None):

if model_name == 'DeepLearningClassifier':
lower_bound = -0.235
if model_name == 'LGBMClassifier':
lower_bound = -0.221

assert lower_bound < test_score < -0.17

Expand Down Expand Up @@ -429,7 +431,7 @@ def feature_learning_categorical_ensembling_getting_single_predictions_classific
if model_name == 'GradientBoostingClassifier' or model_name is None:
lower_bound = -0.25
if model_name == 'LGBMClassifier':
lower_bound = -0.221
lower_bound = -0.23
if model_name == 'XGBClassifier':
lower_bound = -0.25

Expand Down

0 comments on commit 60d3eb4

Please sign in to comment.