Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed Windows multiprocessing | Added pretty README plots | Issue with AttributeError: can't set attribute #53

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,20 @@ or clone the repo and just type the following on your shell:

python setup.py install

Usage examples
--------------

Recreating the sklearn [classifier comparison plot](http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html) with no prior hyper-parameter knowledge. The average fitness of the GA is shown below each plot for 50 generations in orange. The best fitness of the GA is shown in blue.

![img](https://github.com/flipdazed/sklearn-deap/blob/master/example/all_classifiers.png)

This example code can be found in `/example` and demonstrates tuned hyper-parameters can easily be found for a variety of algorithms.

We can compare to the [original classifier comparison](http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html) and see that there is generally an improvement in accuracy

![img](http://scikit-learn.org/stable/_images/sphx_glr_plot_classifier_comparison_001.png)


Usage examples
--------------

Example of usage:

Expand Down
54 changes: 32 additions & 22 deletions evolutionary_search/cv.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
import numpy as np
import random
from deap import base, creator, tools, algorithms
from deap import cma
from collections import defaultdict
from sklearn.base import clone, is_classifier
from sklearn.model_selection._validation import _fit_and_score
from sklearn.model_selection._search import BaseSearchCV, check_cv, _check_param_grid
from sklearn.metrics.scorer import check_scoring
from sklearn.utils.validation import _num_samples, indexable


def enum(**enums):
return type('Enum', (), enums)

Expand Down Expand Up @@ -98,10 +100,13 @@ def _evalFunction(individual, name_values, X, y, scorer, cv, iid, fit_params,
else:
for train, test in cv.split(X, y):
assert len(train) > 0 and len(test) > 0, "Training and/or testing not long enough for evaluation."
_score = _fit_and_score(estimator=individual.est, X=X, y=y, scorer=scorer,
train=train, test=test, verbose=verbose,
parameters=parameters, fit_params=fit_params,
error_score=error_score)[0]
try:
_score = _fit_and_score(estimator=individual.est, X=X, y=y, scorer=scorer,
train=train, test=test, verbose=verbose,
parameters=parameters, fit_params=fit_params,
error_score=error_score)[0]
except:
return (-np.inf,)

if iid:
score += _score * len(test)
Expand Down Expand Up @@ -278,6 +283,8 @@ class EvolutionaryAlgorithmSearchCV(BaseSearchCV):
With the statistics of the evolution.

"""
best_score_ = None
best_params_ = None
def __init__(self, estimator, params, scoring=None, cv=4,
refit=True, verbose=False, population_size=50,
gene_mutation_prob=0.1, gene_crossover_prob=0.5,
Expand All @@ -298,11 +305,11 @@ def __init__(self, estimator, params, scoring=None, cv=4,
self.gene_type = gene_type
self.all_history_, self.all_logbooks_ = [], []
self._cv_results = None
self.best_score_ = None
self.best_params_ = None
self.score_cache = {}
self.n_jobs = n_jobs
# if "FitnessMax" not in creator.__dict__:
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
# if "Individual" not in creator.__dict__:
creator.create("Individual", list, est=clone(self.estimator), fitness=creator.FitnessMax)

@property
Expand Down Expand Up @@ -358,9 +365,11 @@ def fit(self, X, y=None):
self.best_estimator_ = clone(self.estimator)
self.best_estimator_.set_params(**self.best_mem_params_)
if self.fit_params is not None:

self.best_estimator_.fit(X, y, **self.fit_params)
else:
self.best_estimator_.fit(X, y)
return self

def _fit(self, X, y, parameter_dict):
self._cv_results = None # To indicate to the property the need to update
Expand Down Expand Up @@ -394,16 +403,16 @@ def _fit(self, X, y, parameter_dict):
# wrapper so that pools are not recursively created when the module is reloaded in each map
if isinstance(self.n_jobs, int):
if self.n_jobs > 1 or self.n_jobs < 0:
from multiprocessing import Pool # Only imports if needed
if os.name == 'nt': # Checks if we are on Windows
warnings.warn(("Windows requires Pools to be declared from within "
"an \'if __name__==\"__main__\":\' structure. In this "
"case, n_jobs will accept map functions as well to "
"facilitate custom parallelism. Please check to see "
"that all code is working as expected."))
pool = Pool(self.n_jobs)
toolbox.register("map", pool.map)

if __name__ == '__main__':
from multiprocessing import Pool # Only imports if needed
if os.name == 'nt': # Checks if we are on Windows
warnings.warn(("Windows requires Pools to be declared from within "
"an \'if __name__==\"__main__\":\' structure. In this "
"case, n_jobs will accept map functions as well to "
"facilitate custom parallelism. Please check to see "
"that all code is working as expected."))
pool = Pool(self.n_jobs)
toolbox.register("map", pool.map)
# If it's not an int, we are going to pass it as the map directly
else:
try:
Expand All @@ -427,10 +436,10 @@ def _fit(self, X, y, parameter_dict):

# Stats
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.nanmean)
stats.register("min", np.nanmin)
stats.register("max", np.nanmax)
stats.register("std", np.nanstd)
stats.register("avg", lambda x: np.ma.masked_invalid(x).mean())
stats.register("min", lambda x: np.ma.masked_invalid(x).min())
stats.register("max", lambda x: np.ma.masked_invalid(x).max())
stats.register("std", lambda x: np.ma.masked_invalid(x).std())

# History
hist = tools.History()
Expand Down Expand Up @@ -463,8 +472,9 @@ def _fit(self, X, y, parameter_dict):

# Close your pools if you made them
if isinstance(self.n_jobs, int) and (self.n_jobs > 1 or self.n_jobs < 0):
pool.close()
pool.join()
if __name__ == '__main__':
pool.close()
pool.join()

self.best_score_ = current_best_score_
self.best_params_ = current_best_params_
Binary file added example/Thumbs.db
Binary file not shown.
Binary file added example/all_classifiers.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading