Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-6779: Add custom metric to leaderboard [nocheck] #15568

Merged
merged 13 commits into from
Jul 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions h2o-algos/src/main/java/hex/ensemble/Metalearner.java
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ protected void setCommonParams(P parms) {
parms._weights_column = _model._parms._weights_column;
parms._offset_column = _model._parms._offset_column;
parms._main_model_time_budget_factor = _model._parms._main_model_time_budget_factor;
parms._custom_metric_func = _model._parms._custom_metric_func;
}

protected void setCrossValidationParams(P parms) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ public static final class StackedEnsembleParametersV99 extends ModelParametersSc
"max_runtime_secs",
"weights_column",
"offset_column",
"custom_metric_func",
"seed",
"score_training_samples",
"keep_levelone_frame",
Expand Down
4 changes: 3 additions & 1 deletion h2o-algos/src/main/java/hex/tree/Score.java
Original file line number Diff line number Diff line change
Expand Up @@ -154,11 +154,13 @@ protected boolean modifiesVolatileVecs() {
_mb.reduce(t._mb);
}

// We need to satsify MB invariant
// We need to satisfy MB invariant
@Override protected void postGlobal() {
super.postGlobal();
if(_mb != null) {
_mb.postGlobal(getComputedCustomMetric());
if (null != cFuncRef)
_mb._CMetricScoringTask = (CMetricScoringTask) this;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ public static final class AutoMLBuildControl extends Iced {
public double tweedie_power = 1.5;
public double quantile_alpha = 0.5;
public double huber_alpha = 0.9;
public String custom_metric_func;

public boolean keep_cross_validation_predictions = false;
public boolean keep_cross_validation_models = false;
Expand Down
3 changes: 2 additions & 1 deletion h2o-automl/src/main/java/ai/h2o/automl/ModelingStep.java
Original file line number Diff line number Diff line change
Expand Up @@ -361,11 +361,12 @@ protected void setCommonModelBuilderParams(Model.Parameters params) {
setCrossValidationParams(params);
setWeightingParams(params);
setClassBalancingParams(params);
params._custom_metric_func = buildSpec.build_control.custom_metric_func;

params._keep_cross_validation_models = buildSpec.build_control.keep_cross_validation_models;
params._keep_cross_validation_fold_assignment = buildSpec.build_control.nfolds != 0 && buildSpec.build_control.keep_cross_validation_fold_assignment;
params._export_checkpoints_dir = buildSpec.build_control.export_checkpoints_dir;

/** Using _main_model_time_budget_factor to determine if and how we should restrict the time for the main model.
* Value 0 means do not use time constraint for the main model.
* More details in {@link ModelBuilder#setMaxRuntimeSecsForMainModel()}.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,10 @@ public static final class AutoMLBuildControlV99 extends SchemaV3<AutoMLBuildSpec
@API(direction = API.Direction.INPUT,
help = "Desired quantile for Huber/M-regression (threshold between quadratic and linear loss, must be between 0 and 1).")
public double huber_alpha;


@API(help = "Reference to custom evaluation function, format: `language:keyName=funcName`", level = API.Level.secondary, direction=API.Direction.INOUT, gridable = false)
public String custom_metric_func;

@API(help = "Reference to custom distribution, format: `language:keyName=funcName`", direction=API.Direction.INOUT)
public String custom_distribution_func;
} // class AutoMLBuildControlV99
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public AutoMLKeyV3(Key<AutoML> key) {

@API(help="Metric used to sort leaderboard", direction=API.Direction.INPUT)
public String sort_metric;

@API(help="The list of modeling steps effectively used during the AutoML run", direction=API.Direction.OUTPUT)
public StepDefinitionV99[] modeling_steps;

Expand Down
15 changes: 11 additions & 4 deletions h2o-core/src/main/java/hex/CMetricScoringTask.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ protected final void customMetricPerRow(double preds[], float yact[],double weig
@Override
public void reduce(T t) {
super.reduce(t);
reduceCustomMetric(t);
}

public void reduceCustomMetric(T t) {
if (func != null) {
if (customMetricWs == null) {
customMetricWs = t.customMetricWs;
Expand All @@ -56,15 +60,18 @@ public void reduce(T t) {
@Override
protected void postGlobal() {
super.postGlobal();
result = computeCustomMetric();
}

public CustomMetric computeCustomMetric() {
if (func != null) {
result = CustomMetric.from(cFuncRef.getName(),
return CustomMetric.from(cFuncRef.getName(),
customMetricWs != null ? func.metric(customMetricWs)
: Double.NaN);
} else {
result = null;
}
return null;
}

public CustomMetric getComputedCustomMetric() {
return result;
}
Expand Down
2 changes: 2 additions & 0 deletions h2o-core/src/main/java/hex/Model.java
Original file line number Diff line number Diff line change
Expand Up @@ -2288,6 +2288,8 @@ public void close() {
super.postGlobal();
if(_mb != null) {
_mb.postGlobal(getComputedCustomMetric());
if (null != cFuncRef)
_mb._CMetricScoringTask = (CMetricScoringTask) this;
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion h2o-core/src/main/java/hex/ModelBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -854,7 +854,7 @@ public ModelMetrics.MetricBuilder[] cv_scoreCVModels(int N, Vec[] weights, Model
|| _parms._keep_cross_validation_predictions
|| (cvModel.isDistributionHuber() /*need to compute quantiles on abs error of holdout predictions*/)) {
String predName = cvModelBuilders[i].getPredictionKey();
Model.PredictScoreResult result = cvModel.predictScoreImpl(cvValid, adaptFr, predName, _job, true, CFuncRef.NOP);
Model.PredictScoreResult result = cvModel.predictScoreImpl(cvValid, adaptFr, predName, _job, true, CFuncRef.from(_parms._custom_metric_func));
result.makeModelMetrics(cvValid, adaptFr);
mbs[i] = result.getMetricBuilder();
DKV.put(cvModel);
Expand Down
13 changes: 12 additions & 1 deletion h2o-core/src/main/java/hex/ModelMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@ public static double getMetricFromModelMetric(ModelMetrics mm, String criterion)
Object obj = null;
criterion = criterion.toLowerCase();

if ("custom".equals(criterion)){
if (null == mm._custom_metric)
return Double.NaN;
return mm._custom_metric.value;
}

// Constructing confusion matrix based on criterion
ConfusionMatrix cm;
if(mm instanceof ModelMetricsBinomial) {
Expand Down Expand Up @@ -173,7 +179,7 @@ public static double getMetricFromModelMetric(ModelMetrics mm, String criterion)
}
}
if (null == method)
throw new H2OIllegalArgumentException("Failed to find ModelMetrics for criterion: " + criterion);
throw new H2OIllegalArgumentException("Failed to find ModelMetrics for criterion: " + criterion + " for model_id: " + mm._modelKey);

try {
return (double) method.invoke(obj);
Expand Down Expand Up @@ -417,6 +423,7 @@ public static abstract class MetricBuilder<T extends MetricBuilder<T>> extends I

// Custom metric holder
public CustomMetric _customMetric = null;
public CMetricScoringTask _CMetricScoringTask = null;

public double weightedSigma() {
// double sampleCorrection = _count/(_count-1); //sample variance -> depends on the number of ACTUAL ROWS (not the weighted count)
Expand All @@ -442,6 +449,10 @@ public void reduce(Object mb) {
}

public void reduceForCV(T mb){
if (null != _CMetricScoringTask) {
_CMetricScoringTask.reduceCustomMetric(mb._CMetricScoringTask);
_customMetric = _CMetricScoringTask.computeCustomMetric();
}
this.reduce(mb);
}

Expand Down
30 changes: 21 additions & 9 deletions h2o-core/src/main/java/hex/leaderboard/Leaderboard.java
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,19 @@ public void addModels(final Key<Model>[] modelKeys) {

if (_metrics == null) {
// lazily set to default for this model category
setDefaultMetrics(modelKeys[0].get());
Model model = null;
String cm = modelKeys[0].get()._parms._custom_metric_func;
String[] metricsFirst = defaultMetricsForModel(modelKeys[0].get());
for (Key<Model> k : modelKeys) {
final String[] metrics = defaultMetricsForModel(model = k.get());
if (metrics.length != metricsFirst.length || !Arrays.equals(metricsFirst, metrics))
throw new H2OIllegalArgumentException("Models don't have the same metrics (e.g. model \"" +
modelKeys[0].toString()+"\" and model \""+k+"\").");
if (!Objects.equals(cm, k.get()._parms._custom_metric_func))
throw new H2OIllegalArgumentException("Models don't have the same custom metrics (e.g. model \"" +
modelKeys[0].toString()+"\" and model \""+k+"\").");
}
setDefaultMetrics(model);
}

for (Key<Model> key : badKeys) {
Expand Down Expand Up @@ -634,11 +646,8 @@ private double getMetric(String metric, Model model) {
);
} else {
// otherwise use default model metrics
Key model_key = model._key;
long model_checksum = model.checksum();
ModelMetrics mm = getModelMetrics(model);
return ModelMetrics.getMetricFromModelMetric(
_leaderboard_model_metrics.get(ModelMetrics.buildKey(model_key, model_checksum, mm.frame()._key, mm.frame().checksum())),
getModelMetrics(model),
metric
);
}
Expand Down Expand Up @@ -670,14 +679,17 @@ protected Futures remove_impl(Futures fs, boolean cascade) {
}

private static String[] defaultMetricsForModel(Model m) {
ArrayList<String> result = new ArrayList<>();
if (m._output.isBinomialClassifier()) { //binomial
return new String[] {"auc", "logloss", "aucpr", "mean_per_class_error", "rmse", "mse"};
Collections.addAll(result, "auc", "logloss", "aucpr", "mean_per_class_error", "rmse", "mse");
} else if (m._output.isMultinomialClassifier()) { // multinomial
return new String[] {"mean_per_class_error", "logloss", "rmse", "mse"};
Collections.addAll(result, "mean_per_class_error", "logloss", "rmse", "mse");
} else if (m._output.isSupervised()) { // regression
return new String[] {"rmse", "mse", "mae", "rmsle", "mean_residual_deviance"};
Collections.addAll(result, "rmse", "mse", "mae", "rmsle", "mean_residual_deviance");
}
return new String[0];
if (m._parms._custom_metric_func != null)
result.add("custom");
return result.toArray(new String[0]);
}

private double[] getModelMetricValues(int rank) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
``custom_metric_func``
----------------------

- Available in: GBM, DRF, Deeplearning
- Available in: GBM, DRF, Deeplearning, Stacked Ensembles
- Hyperparameter: no

Description
Expand Down
8 changes: 7 additions & 1 deletion h2o-py/h2o/automl/_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def __init__(self,
keep_cross_validation_models=False,
keep_cross_validation_fold_assignment=False,
sort_metric="AUTO",
custom_metric_func=None,
export_checkpoints_dir=None,
verbosity="warn",
**kwargs):
Expand Down Expand Up @@ -289,6 +290,9 @@ def __init__(self,
- ``"rmlse"``

Defaults to ``"AUTO"`` (This translates to ``"auc"`` for binomial classification, ``"mean_per_class_error"`` for multinomial classification, ``"deviance"`` for regression).
:param custom_metric_func: Reference to custom evaluation function, format: `language:keyName=funcName`
Defaults to ``None``.
:type custom_metric_func: str, optional
:param export_checkpoints_dir: Path to a directory where every model will be stored in binary form.
:param verbosity: Verbosity of the backend messages printed during training.
Available options are ``None`` (live log disabled), ``"debug"``, ``"info"``, ``"warn"`` or ``"error"``.
Expand Down Expand Up @@ -333,6 +337,7 @@ def __init__(self,
self.project_name = project_name
self.nfolds = nfolds
self.distribution = distribution
self.custom_metric_func = custom_metric_func
self.balance_classes = balance_classes
self.class_sampling_factors = class_sampling_factors
self.max_after_balance_size = max_after_balance_size
Expand Down Expand Up @@ -489,6 +494,7 @@ def __validate_distribution(self, distribution):
_huber_alpha = _aml_property('build_control.huber_alpha', types=(numeric,), freezable=True)
_tweedie_power = _aml_property('build_control.tweedie_power', types=(numeric,), freezable=True)
_quantile_alpha = _aml_property('build_control.quantile_alpha', types=(numeric,), freezable=True)
custom_metric_func = _aml_property('build_control.custom_metric_func', types=(str, None))
balance_classes = _aml_property('build_control.balance_classes', types=(bool,), freezable=True)
class_sampling_factors = _aml_property('build_control.class_sampling_factors', types=(None, [numeric]), freezable=True)
max_after_balance_size = _aml_property('build_control.max_after_balance_size', types=(None, numeric), freezable=True)
Expand Down Expand Up @@ -532,7 +538,7 @@ def __validate_distribution(self, distribution):
blending_frame = _aml_property('input_spec.blending_frame', set_input=False,
validate_fn=ft.partial(__validate_frame, name='blending_frame'))
response_column = _aml_property('input_spec.response_column', types=(str,))

#---------------------------------------------------------------------------
# Basic properties
#---------------------------------------------------------------------------
Expand Down
19 changes: 19 additions & 0 deletions h2o-py/h2o/estimators/stackedensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def __init__(self,
max_runtime_secs=0.0, # type: float
weights_column=None, # type: Optional[str]
offset_column=None, # type: Optional[str]
custom_metric_func=None, # type: Optional[str]
seed=-1, # type: int
score_training_samples=10000, # type: int
keep_levelone_frame=False, # type: bool
Expand Down Expand Up @@ -151,6 +152,9 @@ def __init__(self,
function.
Defaults to ``None``.
:type offset_column: str, optional
:param custom_metric_func: Reference to custom evaluation function, format: `language:keyName=funcName`
Defaults to ``None``.
:type custom_metric_func: str, optional
:param seed: Seed for random numbers; passed through to the metalearner algorithm. Defaults to -1 (time-based
random number)
Defaults to ``-1``.
Expand Down Expand Up @@ -186,6 +190,7 @@ def __init__(self,
self.max_runtime_secs = max_runtime_secs
self.weights_column = weights_column
self.offset_column = offset_column
self.custom_metric_func = custom_metric_func
self.seed = seed
self.score_training_samples = score_training_samples
self.keep_levelone_frame = keep_levelone_frame
Expand Down Expand Up @@ -714,6 +719,20 @@ def offset_column(self, offset_column):
assert_is_type(offset_column, None, str)
self._parms["offset_column"] = offset_column

@property
def custom_metric_func(self):
"""
Reference to custom evaluation function, format: `language:keyName=funcName`

Type: ``str``.
"""
return self._parms.get("custom_metric_func")

@custom_metric_func.setter
def custom_metric_func(self, custom_metric_func):
assert_is_type(custom_metric_func, None, str)
self._parms["custom_metric_func"] = custom_metric_func

@property
def seed(self):
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os
import sys

sys.path.insert(1, os.path.join("..", "..", ".."))
import h2o
from h2o.automl import H2OAutoML
from tests import pyunit_utils as pu, dataset_prostate, CustomMaeFunc


def test_automl_custom_metric():
def custom_mae_mm():
return h2o.upload_custom_metric(CustomMaeFunc, func_name="mae", func_file="mm_mae.py")

ftrain, fvalid, _ = dataset_prostate()
ftrain = ftrain.rbind(fvalid)
ftrain = h2o.H2OFrame(ftrain.as_data_frame(), "my_training_frame")
aml = H2OAutoML(max_models=20, custom_metric_func=custom_mae_mm(), sort_metric="custom")
aml.train(y="AGE", training_frame=ftrain)

for sd in ["train", "valid", "xval", "AUTO"]:
print(sd + "\n" + ("=" * len(sd)))
ldb = h2o.make_leaderboard(aml, scoring_data="xval").as_data_frame()
print(f"MAE==Custom: {((ldb.mae == ldb.custom) | ldb.custom.isna()).all()}")
print(ldb)
assert ((ldb.mae == ldb.custom) | ldb.custom.isna()).all() and (~ldb.custom.isna()).any()


pu.run_tests([
test_automl_custom_metric,
])
Loading