h2oai · tomasfryda · Jul 13, 2023 · Jun 9, 2023 · Jun 9, 2023 · Jun 9, 2023
diff --git a/h2o-algos/src/main/java/hex/ensemble/Metalearner.java b/h2o-algos/src/main/java/hex/ensemble/Metalearner.java
@@ -117,6 +117,7 @@ protected void setCommonParams(P parms) {
     parms._weights_column = _model._parms._weights_column;
     parms._offset_column = _model._parms._offset_column;
     parms._main_model_time_budget_factor = _model._parms._main_model_time_budget_factor;
+    parms._custom_metric_func = _model._parms._custom_metric_func;
   }
 
   protected void setCrossValidationParams(P parms) {

diff --git a/h2o-algos/src/main/java/hex/schemas/StackedEnsembleV99.java b/h2o-algos/src/main/java/hex/schemas/StackedEnsembleV99.java
@@ -51,6 +51,7 @@ public static final class StackedEnsembleParametersV99 extends ModelParametersSc
       "max_runtime_secs",
       "weights_column",
       "offset_column",
+      "custom_metric_func",
       "seed",
       "score_training_samples",
       "keep_levelone_frame",

diff --git a/h2o-algos/src/main/java/hex/tree/Score.java b/h2o-algos/src/main/java/hex/tree/Score.java
@@ -154,11 +154,13 @@ protected boolean modifiesVolatileVecs() {
     _mb.reduce(t._mb);
   }
 
-  // We need to satsify MB invariant
+  // We need to satisfy MB invariant
   @Override protected void postGlobal() {
     super.postGlobal();
     if(_mb != null) {
       _mb.postGlobal(getComputedCustomMetric());
+      if (null != cFuncRef)
+        _mb._CMetricScoringTask = (CMetricScoringTask) this;
     }
   }
 

diff --git a/h2o-automl/src/main/java/ai/h2o/automl/AutoMLBuildSpec.java b/h2o-automl/src/main/java/ai/h2o/automl/AutoMLBuildSpec.java
@@ -51,6 +51,7 @@ public static final class AutoMLBuildControl extends Iced {
     public double tweedie_power = 1.5;
     public double quantile_alpha = 0.5;
     public double huber_alpha = 0.9;
+    public String custom_metric_func;
 
     public boolean keep_cross_validation_predictions = false;
     public boolean keep_cross_validation_models = false;

diff --git a/h2o-automl/src/main/java/ai/h2o/automl/ModelingStep.java b/h2o-automl/src/main/java/ai/h2o/automl/ModelingStep.java
@@ -361,11 +361,12 @@ protected void setCommonModelBuilderParams(Model.Parameters params) {
         setCrossValidationParams(params);
         setWeightingParams(params);
         setClassBalancingParams(params);
+        params._custom_metric_func = buildSpec.build_control.custom_metric_func;
 
         params._keep_cross_validation_models = buildSpec.build_control.keep_cross_validation_models;
         params._keep_cross_validation_fold_assignment = buildSpec.build_control.nfolds != 0 && buildSpec.build_control.keep_cross_validation_fold_assignment;
         params._export_checkpoints_dir = buildSpec.build_control.export_checkpoints_dir;
-
+        
         /** Using _main_model_time_budget_factor to determine if and how we should restrict the time for the main model.
          *  Value 0 means do not use time constraint for the main model.
          *  More details in {@link ModelBuilder#setMaxRuntimeSecsForMainModel()}.

diff --git a/h2o-automl/src/main/java/water/automl/api/schemas3/AutoMLBuildSpecV99.java b/h2o-automl/src/main/java/water/automl/api/schemas3/AutoMLBuildSpecV99.java
@@ -86,7 +86,10 @@ public static final class AutoMLBuildControlV99 extends SchemaV3<AutoMLBuildSpec
     @API(direction = API.Direction.INPUT,
             help = "Desired quantile for Huber/M-regression (threshold between quadratic and linear loss, must be between 0 and 1).")
     public double huber_alpha;
-
+
+    @API(help = "Reference to custom evaluation function, format: `language:keyName=funcName`", level = API.Level.secondary, direction=API.Direction.INOUT, gridable = false)
+    public String custom_metric_func;
+
     @API(help = "Reference to custom distribution, format: `language:keyName=funcName`", direction=API.Direction.INOUT)
     public String custom_distribution_func;
   } // class AutoMLBuildControlV99

diff --git a/h2o-automl/src/main/java/water/automl/api/schemas3/AutoMLV99.java b/h2o-automl/src/main/java/water/automl/api/schemas3/AutoMLV99.java
@@ -64,7 +64,7 @@ public AutoMLKeyV3(Key<AutoML> key) {
 
   @API(help="Metric used to sort leaderboard", direction=API.Direction.INPUT)
   public String sort_metric;
-
+  
   @API(help="The list of modeling steps effectively used during the AutoML run", direction=API.Direction.OUTPUT)
   public StepDefinitionV99[] modeling_steps;
 

diff --git a/h2o-core/src/main/java/hex/CMetricScoringTask.java b/h2o-core/src/main/java/hex/CMetricScoringTask.java
@@ -42,6 +42,10 @@ protected final void customMetricPerRow(double preds[], float yact[],double weig
   @Override
   public void reduce(T t) {
     super.reduce(t);
+    reduceCustomMetric(t);
+  }
+
+  public void reduceCustomMetric(T t) {
     if (func != null) {
       if (customMetricWs == null) {
         customMetricWs = t.customMetricWs;
@@ -56,15 +60,18 @@ public void reduce(T t) {
   @Override
   protected void postGlobal() {
     super.postGlobal();
+    result = computeCustomMetric();
+  }
+
+  public CustomMetric computeCustomMetric() {
     if (func != null) {
-      result = CustomMetric.from(cFuncRef.getName(),
+      return CustomMetric.from(cFuncRef.getName(),
                                  customMetricWs != null ? func.metric(customMetricWs)
                                                         : Double.NaN);
-    } else {
-      result = null;
     }
+    return null;
   }
-  
+
   public CustomMetric getComputedCustomMetric() {
     return result;
   }

diff --git a/h2o-core/src/main/java/hex/Model.java b/h2o-core/src/main/java/hex/Model.java
@@ -2288,6 +2288,8 @@ public void close() {
       super.postGlobal();
       if(_mb != null) {
         _mb.postGlobal(getComputedCustomMetric());
+        if (null != cFuncRef)
+          _mb._CMetricScoringTask = (CMetricScoringTask) this;
       }
     }
   }

diff --git a/h2o-core/src/main/java/hex/ModelBuilder.java b/h2o-core/src/main/java/hex/ModelBuilder.java
@@ -854,7 +854,7 @@ public ModelMetrics.MetricBuilder[] cv_scoreCVModels(int N, Vec[] weights, Model
                 || _parms._keep_cross_validation_predictions
                 || (cvModel.isDistributionHuber() /*need to compute quantiles on abs error of holdout predictions*/)) {
           String predName = cvModelBuilders[i].getPredictionKey();
-          Model.PredictScoreResult result = cvModel.predictScoreImpl(cvValid, adaptFr, predName, _job, true, CFuncRef.NOP);
+          Model.PredictScoreResult result = cvModel.predictScoreImpl(cvValid, adaptFr, predName, _job, true, CFuncRef.from(_parms._custom_metric_func));
           result.makeModelMetrics(cvValid, adaptFr);
           mbs[i] = result.getMetricBuilder();
           DKV.put(cvModel);

diff --git a/h2o-core/src/main/java/hex/ModelMetrics.java b/h2o-core/src/main/java/hex/ModelMetrics.java
@@ -140,6 +140,12 @@ public static double getMetricFromModelMetric(ModelMetrics mm, String criterion)
     Object obj = null;
     criterion = criterion.toLowerCase();
 
+    if ("custom".equals(criterion)){
+      if (null == mm._custom_metric) 
+        return Double.NaN;
+      return mm._custom_metric.value;
+    }
+
     // Constructing confusion matrix based on criterion
     ConfusionMatrix cm;
     if(mm instanceof ModelMetricsBinomial) {
@@ -173,7 +179,7 @@ public static double getMetricFromModelMetric(ModelMetrics mm, String criterion)
       }
     }
     if (null == method)
-      throw new H2OIllegalArgumentException("Failed to find ModelMetrics for criterion: " + criterion);
+      throw new H2OIllegalArgumentException("Failed to find ModelMetrics for criterion: " + criterion + " for model_id: " + mm._modelKey);
 
     try {
       return (double) method.invoke(obj);
@@ -417,6 +423,7 @@ public static abstract class MetricBuilder<T extends MetricBuilder<T>> extends I
 
     // Custom metric holder
     public CustomMetric _customMetric = null;
+    public CMetricScoringTask _CMetricScoringTask = null;
 
     public  double weightedSigma() {
 //      double sampleCorrection = _count/(_count-1); //sample variance -> depends on the number of ACTUAL ROWS (not the weighted count)
@@ -442,6 +449,10 @@ public void reduce(Object mb) {
     }
 
     public void reduceForCV(T mb){
+      if (null != _CMetricScoringTask) {
+        _CMetricScoringTask.reduceCustomMetric(mb._CMetricScoringTask);
+        _customMetric = _CMetricScoringTask.computeCustomMetric();
+      }
       this.reduce(mb);
     }
 

diff --git a/h2o-core/src/main/java/hex/leaderboard/Leaderboard.java b/h2o-core/src/main/java/hex/leaderboard/Leaderboard.java
@@ -476,7 +476,19 @@ public void addModels(final Key<Model>[] modelKeys) {
 
     if (_metrics == null) {
       // lazily set to default for this model category
-      setDefaultMetrics(modelKeys[0].get());
+      Model model = null;
+      String cm = modelKeys[0].get()._parms._custom_metric_func;
+      String[] metricsFirst = defaultMetricsForModel(modelKeys[0].get());
+      for (Key<Model> k : modelKeys) {
+        final String[] metrics = defaultMetricsForModel(model = k.get());
+        if (metrics.length != metricsFirst.length || !Arrays.equals(metricsFirst, metrics))
+          throw new H2OIllegalArgumentException("Models don't have the same metrics (e.g. model \"" + 
+                  modelKeys[0].toString()+"\" and model \""+k+"\").");
+        if (!Objects.equals(cm, k.get()._parms._custom_metric_func))
+          throw new H2OIllegalArgumentException("Models don't have the same custom metrics (e.g. model \"" +
+                  modelKeys[0].toString()+"\" and model \""+k+"\").");
+      }
+      setDefaultMetrics(model);
     }
 
     for (Key<Model> key : badKeys) {
@@ -634,11 +646,8 @@ private double getMetric(String metric, Model model) {
       );
     } else {
       // otherwise use default model metrics
-      Key model_key = model._key;
-      long model_checksum = model.checksum();
-      ModelMetrics mm = getModelMetrics(model);
       return ModelMetrics.getMetricFromModelMetric(
-              _leaderboard_model_metrics.get(ModelMetrics.buildKey(model_key, model_checksum, mm.frame()._key, mm.frame().checksum())),
+              getModelMetrics(model),
               metric
       );
     }
@@ -670,14 +679,17 @@ protected Futures remove_impl(Futures fs, boolean cascade) {
   }
 
   private static String[] defaultMetricsForModel(Model m) {
+    ArrayList<String> result = new ArrayList<>();
     if (m._output.isBinomialClassifier()) { //binomial
-      return new String[] {"auc", "logloss", "aucpr", "mean_per_class_error", "rmse", "mse"};
+      Collections.addAll(result, "auc", "logloss", "aucpr", "mean_per_class_error", "rmse", "mse");
     } else if (m._output.isMultinomialClassifier()) { // multinomial
-      return new String[] {"mean_per_class_error", "logloss", "rmse", "mse"};
+      Collections.addAll(result, "mean_per_class_error", "logloss", "rmse", "mse");
     } else if (m._output.isSupervised()) { // regression
-      return new String[] {"rmse", "mse", "mae", "rmsle", "mean_residual_deviance"};
+      Collections.addAll(result, "rmse", "mse", "mae", "rmsle", "mean_residual_deviance");
     }
-    return new String[0];
+    if (m._parms._custom_metric_func != null)
+      result.add("custom");
+    return result.toArray(new String[0]);
   }
 
   private double[] getModelMetricValues(int rank) {

diff --git a/h2o-docs/src/product/data-science/algo-params/custom_metric_func.rst b/h2o-docs/src/product/data-science/algo-params/custom_metric_func.rst
@@ -3,7 +3,7 @@
 ``custom_metric_func``
 ----------------------
 
-- Available in: GBM, DRF, Deeplearning 
+- Available in: GBM, DRF, Deeplearning, Stacked Ensembles
 - Hyperparameter: no
 
 Description

diff --git a/h2o-py/h2o/automl/_estimator.py b/h2o-py/h2o/automl/_estimator.py
@@ -153,6 +153,7 @@ def __init__(self,
                  keep_cross_validation_models=False,
                  keep_cross_validation_fold_assignment=False,
                  sort_metric="AUTO",
+                 custom_metric_func=None,
                  export_checkpoints_dir=None,
                  verbosity="warn",
                  **kwargs):
@@ -289,6 +290,9 @@ def __init__(self,
                 - ``"rmlse"``
 
             Defaults to ``"AUTO"`` (This translates to ``"auc"`` for binomial classification, ``"mean_per_class_error"`` for multinomial classification, ``"deviance"`` for regression).
+        :param custom_metric_func: Reference to custom evaluation function, format: `language:keyName=funcName`
+               Defaults to ``None``.
+        :type custom_metric_func: str, optional
         :param export_checkpoints_dir: Path to a directory where every model will be stored in binary form.
         :param verbosity: Verbosity of the backend messages printed during training.
             Available options are ``None`` (live log disabled), ``"debug"``, ``"info"``, ``"warn"`` or ``"error"``.
@@ -333,6 +337,7 @@ def __init__(self,
         self.project_name = project_name
         self.nfolds = nfolds
         self.distribution = distribution
+        self.custom_metric_func = custom_metric_func
         self.balance_classes = balance_classes
         self.class_sampling_factors = class_sampling_factors
         self.max_after_balance_size = max_after_balance_size
@@ -489,6 +494,7 @@ def __validate_distribution(self, distribution):
     _huber_alpha = _aml_property('build_control.huber_alpha', types=(numeric,), freezable=True)
     _tweedie_power = _aml_property('build_control.tweedie_power', types=(numeric,), freezable=True)
     _quantile_alpha = _aml_property('build_control.quantile_alpha', types=(numeric,), freezable=True)
+    custom_metric_func = _aml_property('build_control.custom_metric_func', types=(str, None))
     balance_classes = _aml_property('build_control.balance_classes', types=(bool,), freezable=True)
     class_sampling_factors = _aml_property('build_control.class_sampling_factors', types=(None, [numeric]), freezable=True)
     max_after_balance_size = _aml_property('build_control.max_after_balance_size', types=(None, numeric), freezable=True)
@@ -532,7 +538,7 @@ def __validate_distribution(self, distribution):
     blending_frame = _aml_property('input_spec.blending_frame', set_input=False,
                                    validate_fn=ft.partial(__validate_frame, name='blending_frame'))
     response_column = _aml_property('input_spec.response_column', types=(str,))
-
+    
     #---------------------------------------------------------------------------
     # Basic properties
     #---------------------------------------------------------------------------

diff --git a/h2o-py/h2o/estimators/stackedensemble.py b/h2o-py/h2o/estimators/stackedensemble.py
@@ -81,6 +81,7 @@ def __init__(self,
                  max_runtime_secs=0.0,  # type: float
                  weights_column=None,  # type: Optional[str]
                  offset_column=None,  # type: Optional[str]
+                 custom_metric_func=None,  # type: Optional[str]
                  seed=-1,  # type: int
                  score_training_samples=10000,  # type: int
                  keep_levelone_frame=False,  # type: bool
@@ -151,6 +152,9 @@ def __init__(self,
                function.
                Defaults to ``None``.
         :type offset_column: str, optional
+        :param custom_metric_func: Reference to custom evaluation function, format: `language:keyName=funcName`
+               Defaults to ``None``.
+        :type custom_metric_func: str, optional
         :param seed: Seed for random numbers; passed through to the metalearner algorithm. Defaults to -1 (time-based
                random number)
                Defaults to ``-1``.
@@ -186,6 +190,7 @@ def __init__(self,
         self.max_runtime_secs = max_runtime_secs
         self.weights_column = weights_column
         self.offset_column = offset_column
+        self.custom_metric_func = custom_metric_func
         self.seed = seed
         self.score_training_samples = score_training_samples
         self.keep_levelone_frame = keep_levelone_frame
@@ -714,6 +719,20 @@ def offset_column(self, offset_column):
         assert_is_type(offset_column, None, str)
         self._parms["offset_column"] = offset_column
 
+    @property
+    def custom_metric_func(self):
+        """
+        Reference to custom evaluation function, format: `language:keyName=funcName`
+
+        Type: ``str``.
+        """
+        return self._parms.get("custom_metric_func")
+
+    @custom_metric_func.setter
+    def custom_metric_func(self, custom_metric_func):
+        assert_is_type(custom_metric_func, None, str)
+        self._parms["custom_metric_func"] = custom_metric_func
+
     @property
     def seed(self):
         """

diff --git a/h2o-py/tests/testdir_algos/automl/pyunit_automl_supports_custom_metric.py b/h2o-py/tests/testdir_algos/automl/pyunit_automl_supports_custom_metric.py
@@ -0,0 +1,30 @@
+import os
+import sys
+
+sys.path.insert(1, os.path.join("..", "..", ".."))
+import h2o
+from h2o.automl import H2OAutoML
+from tests import pyunit_utils as pu, dataset_prostate, CustomMaeFunc
+
+
+def test_automl_custom_metric():
+    def custom_mae_mm():
+        return h2o.upload_custom_metric(CustomMaeFunc, func_name="mae", func_file="mm_mae.py")
+
+    ftrain, fvalid, _ = dataset_prostate()
+    ftrain = ftrain.rbind(fvalid)
+    ftrain = h2o.H2OFrame(ftrain.as_data_frame(), "my_training_frame")
+    aml = H2OAutoML(max_models=20, custom_metric_func=custom_mae_mm(), sort_metric="custom")
+    aml.train(y="AGE", training_frame=ftrain)
+
+    for sd in ["train", "valid", "xval", "AUTO"]:
+        print(sd + "\n" + ("=" * len(sd)))
+        ldb = h2o.make_leaderboard(aml, scoring_data="xval").as_data_frame()
+        print(f"MAE==Custom: {((ldb.mae == ldb.custom) | ldb.custom.isna()).all()}")
+        print(ldb)
+        assert ((ldb.mae == ldb.custom) | ldb.custom.isna()).all() and (~ldb.custom.isna()).any()
+
+
+pu.run_tests([
+    test_automl_custom_metric,
+])