Skip to content

Commit

Permalink
Merge pull request #4 from skylergrammer/issue3
Browse files Browse the repository at this point in the history
Issue3
  • Loading branch information
skylergrammer authored Jul 22, 2019
2 parents 193a318 + 7a2971e commit 60b6b78
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 42 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
*.pyc
py37*
*.egg-info
__pycache__
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ k = (log(`T_min`) - log(`T`)) / log(`alpha`)
Example
===
```python
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
from sklearn import svm, datasets
from sklearn.metrics import classification_report
from simulated_annealing.optimize import SimulatedAnneal
Expand All @@ -71,7 +71,7 @@ svc_params = {'C':np.logspace(-8, 10, 19, base=2),
clf = svm.LinearSVC()
# Initialize Simulated Annealing and fit
sa = SimulatedAnneal(clf, svc_params, T=10.0, T_min=0.001, alpha=0.75,
verbose=True, max_iter=0.25, n_trans=5, max_runtime=300,
verbose=True, max_iter=1, n_trans=5, max_runtime=300,
cv=3, scoring='f1_macro', refit=True)
sa.fit(X_train, y_train)
# Print the best score and the best params
Expand Down
17 changes: 7 additions & 10 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(
name = "simulated_annealing",
packages = find_packages(),
version = "0.2.0",
version = "0.3.0",
author='Skyler Grammer, Andrew Nystrom',
author_email="[email protected], [email protected]",
description = "A Simulated Annealing implimentation with a scikit-learn style API backed by joblib for speed.",
Expand All @@ -12,13 +12,10 @@
license = "Apache 2.0",
install_requires = ['scikit-learn>=0.16.0', 'numpy>=1.9.0'],
classifiers = ["Programming Language :: Python",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 2",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.4",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Development Status :: 4 - Beta",
"Intended Audience :: Developers"
]
)
"Programming Language :: Python :: 3.4+",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Development Status :: 4 - Beta",
"Intended Audience :: Developers"
])
71 changes: 41 additions & 30 deletions simulated_annealing/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,35 @@
from sklearn.model_selection import KFold
from sklearn.model_selection._validation import _fit_and_score

class SimulatedAnneal(object):

def accept_prob(old, new, T):
# No div by zero errors
T += 0.01
return np.exp((new-old)/T)


def dt(t0, t1):
if t0 is not None:
return t1-t0
else:
return 0


class SimulatedAnneal():
def __init__(self, estimator, param_grid, scoring='roc_auc',
T=10, T_min=0.0001, alpha=0.75, n_trans=10,
max_iter=300, max_runtime=300, cv=3,
verbose=False, refit=True, n_jobs=1, max_score=np.inf):

assert alpha <= 1.0
assert T > T_min
assert isinstance(param_grid, dict) or isinstance(param_grid, list)
assert isinstance(param_grid, (dict, list))
# If param_grid is a list of dicts, convert to a single dict
if isinstance(param_grid, list):
try:
param_grid_dict = {}
for each in param_grid:
k,v = each.items()[0]
k, v = each.items()[0]
param_grid_dict[k] = v
param_grid = param_grid_dict
except:
Expand Down Expand Up @@ -76,12 +90,12 @@ def __init__(self, estimator, param_grid, scoring='roc_auc',

def fit(self, X, y):
# If types of X, y are dataframe, convert to matrix
if isinstance(X,pd.DataFrame):
X=X.as_matrix()
if isinstance(y,pd.DataFrame):
y=y.as_matrix()
elif isinstance(y,list) or isinstance(y, pd.Series):
y=np.array(y)
if isinstance(X, pd.DataFrame):
X = X.as_matrix()
if isinstance(y, pd.DataFrame):
y = y.as_matrix()
elif isinstance(y, (list, pd.Series)):
y = np.array(y)
# Set up the initial params
T = self.__T
T_min = self.__T_min
Expand All @@ -94,12 +108,9 @@ def fit(self, X, y):
cv = self.__cv
new_score = -np.inf

# Computes the acceptance probability as a function of T; maximization
accept_prob = lambda old, new, T: np.exp((new-old)/T)

# Select random values for each parameter and convert to dict
old_params = dict((k, val.rvs() if hasattr(val, 'rvs')
else np.random.choice(val))
else np.random.choice(val))
for k, val in grid.items())

# Compute the initial score based off randomly selected params
Expand Down Expand Up @@ -128,7 +139,6 @@ def fit(self, X, y):
time_at_start = None
else:
time_at_start = time.time()
dt = lambda t0,t1: t1-t0 if t0 is not None else 0
t_elapsed = dt(time_at_start, time.time())

while T > T_min and total_iter < max_iter and t_elapsed < max_runtime and new_score < self.__max_score:
Expand All @@ -144,7 +154,7 @@ def fit(self, X, y):
else:
sampel_space = [v for v in grid[rand_key] if v != old_params[rand_key]]
if not sampel_space:
sampel_space = grid[rand_key]
sampel_space = grid[rand_key]
new_rand_key_val = np.random.choice(sampel_space)
new_params[rand_key] = new_rand_key_val
try:
Expand All @@ -169,10 +179,9 @@ def fit(self, X, y):
best_params = new_params

if self.__verbose:
print("%s T: %s, score: %s, std: %s, params: %s"
% (str(total_iter), '{:.5f}'.format(T),
'{:.6f}'.format(new_score), '{:.6f}'.format(new_std),
str({key: round(value, 3) for key, value in new_params.items()})))
print("{} T: {:.5f}, score: {:.6f}, std: {:.6f}, params: {}"
.format(total_iter, T, new_score, new_std,
{k: v for k, v in new_params.items()}))

# Decide whether to keep old params or move to new params
a = accept_prob(old_score, new_score, T)
Expand All @@ -184,6 +193,7 @@ def fit(self, X, y):
t_elapsed = dt(time_at_start, time.time())
iter_ += 1
if new_score >= self.__max_score:
print("Max score reached {}!".format(new_score))
break
T *= alpha

Expand All @@ -198,7 +208,8 @@ def fit(self, X, y):
self.best_score_ = best_score
self.best_params_ = best_params

class MultiProcCvFolds(object):

class MultiProcCvFolds():
def __init__(self, clf, metric, cv, n_jobs=1, verbose=0, pre_dispatch='2*n_jobs'):
try:
cv = int(cv)
Expand All @@ -217,22 +228,22 @@ def fit_score(self, X, Y):
n_folds = self.cv
self.cv = KFold(n_splits=n_folds).split(X)

out = Parallel(
n_jobs=self.n_jobs, verbose=self.verbose,
pre_dispatch=self.pre_dispatch
)(
delayed(_fit_and_score)(clone(self.clf), X, Y, self.metric,
train, test, self.verbose, {},
{}, return_parameters=False,
error_score='raise')
for train, test in self.cv)
# Formatting is kinda ugly but provides best debugging view
out = Parallel(n_jobs=self.n_jobs,
verbose=self.verbose,
pre_dispatch=self.pre_dispatch)\
(delayed(_fit_and_score)(clone(self.clf), X, Y, self.metric,
train, test, self.verbose, {},
{}, return_parameters=False,
error_score='raise')
for train, test in self.cv)

# Out is a list of triplet: score, estimator, n_test_samples
scores = list(zip(*out))[0]
return np.mean(scores), np.std(scores)


class CVFolds(object):
class CVFolds():
def __init__(self, estimator, scorer, cv=3):
try:
cv = int(cv)
Expand Down

0 comments on commit 60b6b78

Please sign in to comment.