Skip to content

Commit

Permalink
Added: Optional use of mlflow, only recording params and fitness btm
Browse files Browse the repository at this point in the history
  • Loading branch information
Caparrini committed Mar 7, 2024
1 parent 33e8f35 commit a82a45f
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 13 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ jobs:
run: pip install -r requirements.txt
- name: Install pytest and pytest-cov
run: pip install pytest pytest-cov
- name: Install mlflow
run: pip install mlflow
- name: Run tests and collect coverage
run: pytest --cov mloptimizer
- name: Upload coverage to Codecov
Expand Down
26 changes: 24 additions & 2 deletions mloptimizer/aux/tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import shutil
from datetime import datetime
import importlib


class Tracker:
Expand All @@ -17,9 +18,10 @@ class Tracker:
log_file : str
Name of the log file.
"""
def __init__(self, name, folder=os.curdir, log_file="mloptimizer.log"):

def __init__(self, name, folder=os.curdir, log_file="mloptimizer.log", use_mlflow=False):
self.name = name
self.bugs = []
self.gen = 0
# Main folder, current by default
self.folder = create_optimization_folder(folder)
# Log files
Expand All @@ -34,6 +36,12 @@ def __init__(self, name, folder=os.curdir, log_file="mloptimizer.log"):
self.results_path = None
self.graphics_path = None

# MLFlow
self.use_mlflow = use_mlflow

if self.use_mlflow:
self.mlflow = importlib.import_module("mlflow")

def start_optimization(self, opt_class):
"""
Start the optimization process.
Expand Down Expand Up @@ -63,6 +71,9 @@ def start_checkpoint(self, opt_run_folder_name):
datetime.now().strftime("%Y%m%d_%H%M%S"),
type(self).__name__)

if self.use_mlflow:
self.mlflow.set_experiment(opt_run_folder_name)

self.opt_run_folder = os.path.join(self.folder, opt_run_folder_name)
self.opt_run_checkpoint_path = os.path.join(self.opt_run_folder, "checkpoints")
self.results_path = os.path.join(self.opt_run_folder, "results")
Expand All @@ -81,8 +92,19 @@ def start_checkpoint(self, opt_run_folder_name):
)

def log_clfs(self, classifiers_list: list, generation: int, fitness_list: list[int]):
self.gen = generation
for i in range(len(classifiers_list)):
self.optimization_logger.info(f"Generation {generation} - Classifier TOP {i}")
self.optimization_logger.info(f"Classifier: {classifiers_list[i]}")
self.optimization_logger.info(f"Fitness: {fitness_list[i]}")
self.optimization_logger.info("Hyperparams: {}".format(str(classifiers_list[i].get_params())))
self.gen = generation + 1

def log_evaluation(self, classifier, metric):
self.optimization_logger.info(f"Adding to mlflow...\nClassifier: {classifier}\nFitness: {metric}")

if self.use_mlflow:
with self.mlflow.start_run():
self.mlflow.log_params(classifier.get_params())
# We use the generation as the step
self.mlflow.log_metric(key="fitness", value=metric, step=self.gen)
20 changes: 14 additions & 6 deletions mloptimizer/genoptimizer/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,16 @@ class BaseOptimizer(object):
seed for the random functions
use_parallel : bool
flag to use parallel processing
use_mlflow : bool
flag to use mlflow
"""
__metaclass__ = ABCMeta

def __init__(self, features: np.array, labels: np.array, folder=os.curdir, log_file="mloptimizer.log",
hyperparam_space: HyperparameterSpace = None,
eval_function=train_score,
score_function=accuracy_score, seed=random.randint(0, 1000000),
use_parallel=False):
use_parallel=False, use_mlflow=False):
"""
Creates object BaseOptimizer.
Expand All @@ -71,6 +73,10 @@ def __init__(self, features: np.array, labels: np.array, folder=os.curdir, log_f
function to evaluate the model from X, y, clf
score_function : func, optional (default=balanced_accuracy_score)
function to score from y, y_pred
use_parallel : bool, optional (default=False)
flag to use parallel processing
use_mlflow : bool, optional (default=False)
flag to use mlflow
seed : int, optional (default=0)
seed for the random functions (deap, models, and splits on evaluations)
"""
Expand All @@ -80,9 +86,6 @@ def __init__(self, features: np.array, labels: np.array, folder=os.curdir, log_f
# Input search space hyperparameters
self.hyperparam_space = hyperparam_space

# Tracker
self.tracker = Tracker(name="mloptimizer", folder=folder, log_file=log_file)

self.eval_function = eval_function
self.score_function = score_function

Expand All @@ -96,6 +99,12 @@ def __init__(self, features: np.array, labels: np.array, folder=os.curdir, log_f
# Parallel
self.use_parallel = use_parallel

# mlflow
self.use_mlflow = use_mlflow

# Tracker
self.tracker = Tracker(name="mloptimizer", folder=folder, log_file=log_file, use_mlflow=self.use_mlflow)

def set_mlopt_seed(self, seed):
"""
Method to set the seed for the random functions
Expand Down Expand Up @@ -207,7 +216,7 @@ def evaluate_clf(self, individual):
"""
mean = self.eval_function(self.features, self.labels, self.get_clf(individual),
score_function=self.score_function)
# TODO: Log parameters and metrics to MLFlow if it is active
self.tracker.log_evaluation(self.get_clf(individual), mean)
return (mean,)

def population_2_df(self):
Expand Down Expand Up @@ -309,7 +318,6 @@ def optimize_clf(self, population: int = 10, generations: int = 3,
stats.register("max", np.max)

start_gen = 0
# self.file_out.write("Optimizing accuracy:\n")
# Using deap, custom for decision tree
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)
Expand Down
10 changes: 5 additions & 5 deletions mloptimizer/test/test_genoptimizer/test_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ def test_sklearn_optimizer(clf_class):
assert mlopt is not None


def test_mloptimizer():
@pytest.mark.parametrize('use_mlflow', [True, False])
def test_mloptimizer(use_mlflow):
X, y = load_iris(return_X_y=True)

Check notice on line 38 in mloptimizer/test/test_genoptimizer/test_meta.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

PEP 8 naming convention violation

Variable in function should be lowercase
mlopt = SklearnOptimizer(clf_class=DecisionTreeClassifier,
hyperparam_space=HyperparameterSpace(fixed_hyperparams={},
evolvable_hyperparams=custom_evolvable_hyperparams),
features=X, labels=y)
mlopt = SklearnOptimizer(clf_class=XGBClassifier,
hyperparam_space=HyperparameterSpace.get_default_hyperparameter_space(XGBClassifier),
features=X, labels=y, use_mlflow=use_mlflow)
mlopt.optimize_clf(5, 5)
assert mlopt is not None

Expand Down

0 comments on commit a82a45f

Please sign in to comment.