From edfcaca3f07257b5000ab97a34885c0bf3f0ee23 Mon Sep 17 00:00:00 2001
From: Veronika Maurerova <veronika.maurerova@h2o.ai>
Date: Mon, 19 Jun 2023 18:36:52 +0200
Subject: [PATCH 01/12] Implement ATE, ATT, ATC metrics

---
 .../main/java/hex/tree/uplift/UpliftDRF.java  |  12 ++
 .../hex/util/EffectiveParametersUtils.java    |   1 -
 .../java/hex/ModelMetricsBinomialUplift.java  |  44 ++++-
 h2o-core/src/main/java/hex/ScoreKeeper.java   |   6 +
 .../ModelMetricsBinomialUpliftV3.java         |  12 ++
 .../src/product/data-science/upliftdrf.rst    |  22 ++-
 h2o-py/h2o/model/metrics/uplift.py            |  92 +++++++++-
 h2o-py/h2o/model/models/uplift.py             | 108 ++++++++++++
 .../uplift/pyunit_uplift_rf_api_test.py       |   3 +
 h2o-r/h2o-package/R/models.R                  | 166 +++++++++++++++++-
 .../runit_make_metrics_uplift_binomial.R      |  15 ++
 11 files changed, 469 insertions(+), 12 deletions(-)

diff --git a/h2o-algos/src/main/java/hex/tree/uplift/UpliftDRF.java b/h2o-algos/src/main/java/hex/tree/uplift/UpliftDRF.java
index ff2c744a081e..dc3891a1797b 100644
--- a/h2o-algos/src/main/java/hex/tree/uplift/UpliftDRF.java
+++ b/h2o-algos/src/main/java/hex/tree/uplift/UpliftDRF.java
@@ -404,6 +404,9 @@ static TwoDimTable createUpliftScoringHistoryTable(Model.Output _output,
         colHeaders.add("Timestamp"); colTypes.add("string"); colFormat.add("%s");
         colHeaders.add("Duration"); colTypes.add("string"); colFormat.add("%s");
         colHeaders.add("Number of Trees"); colTypes.add("long"); colFormat.add("%d");
+        colHeaders.add("Training ATE"); colTypes.add("double"); colFormat.add("%d");
+        colHeaders.add("Training ATT"); colTypes.add("double"); colFormat.add("%d");
+        colHeaders.add("Training ATC"); colTypes.add("double"); colFormat.add("%d");
         colHeaders.add("Training AUUC nbins"); colTypes.add("int"); colFormat.add("%d");
         colHeaders.add("Training AUUC"); colTypes.add("double"); colFormat.add("%.5f");
         colHeaders.add("Training AUUC normalized"); colTypes.add("double"); colFormat.add("%.5f");
@@ -413,6 +416,9 @@ static TwoDimTable createUpliftScoringHistoryTable(Model.Output _output,
         }
 
         if (_output._validation_metrics != null) {
+            colHeaders.add("Validation ATE"); colTypes.add("double"); colFormat.add("%d");
+            colHeaders.add("Validation ATT"); colTypes.add("double"); colFormat.add("%d");
+            colHeaders.add("Validation ATC"); colTypes.add("double"); colFormat.add("%d");
             colHeaders.add("Validation AUUC nbins"); colTypes.add("int"); colFormat.add("%d");
             colHeaders.add("Validation AUUC"); colTypes.add("double"); colFormat.add("%.5f");
             colHeaders.add("Validation AUUC normalized"); colTypes.add("double"); colFormat.add("%.5f");
@@ -443,6 +449,9 @@ static TwoDimTable createUpliftScoringHistoryTable(Model.Output _output,
             table.set(row, col++, PrettyPrint.msecs(_training_time_ms[i] - job.start_time(), true));
             table.set(row, col++, i);
             ScoreKeeper st = _scored_train[i];
+            table.set(row, col++, st._ate);
+            table.set(row, col++, st._att);
+            table.set(row, col++, st._atc);
             table.set(row, col++, st._auuc_nbins);
             table.set(row, col++, st._AUUC);
             table.set(row, col++, st._auuc_normalized);
@@ -451,6 +460,9 @@ static TwoDimTable createUpliftScoringHistoryTable(Model.Output _output,
 
             if (_output._validation_metrics != null) {
                 st = _scored_valid[i];
+                table.set(row, col++, st._ate);
+                table.set(row, col++, st._att);
+                table.set(row, col++, st._atc);
                 table.set(row, col++, st._auuc_nbins);
                 table.set(row, col++, st._AUUC);
                 table.set(row, col++, st._auuc_normalized);
diff --git a/h2o-algos/src/main/java/hex/util/EffectiveParametersUtils.java b/h2o-algos/src/main/java/hex/util/EffectiveParametersUtils.java
index e0413effda7e..55bbf2ad8aea 100644
--- a/h2o-algos/src/main/java/hex/util/EffectiveParametersUtils.java
+++ b/h2o-algos/src/main/java/hex/util/EffectiveParametersUtils.java
@@ -1,6 +1,5 @@
 package hex.util;
 
-import hex.AUUC;
 import hex.Model;
 import hex.ScoreKeeper;
 import hex.genmodel.utils.DistributionFamily;
diff --git a/h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java b/h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java
index de3b8b19e70f..30a57f808bf3 100644
--- a/h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java
+++ b/h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java
@@ -10,11 +10,17 @@
 
 public class ModelMetricsBinomialUplift extends ModelMetricsSupervised {
     public final AUUC _auuc;
+    public double _ate;
+    public double _att;
+    public double _atc;
 
-    public ModelMetricsBinomialUplift(Model model, Frame frame, long nobs, String[] domain,
-                                      double sigma, AUUC auuc,
+    public ModelMetricsBinomialUplift(Model model, Frame frame, long nobs, String[] domain, 
+                                      double ate, double att, double atc, double sigma, AUUC auuc,
                                       CustomMetric customMetric) {
         super(model, frame,  nobs, 0, domain, sigma, customMetric);
+        _ate = ate;
+        _att = att;
+        _atc = atc;
         _auuc = auuc;
     }
 
@@ -30,6 +36,9 @@ public static ModelMetricsBinomialUplift getFromDKV(Model model, Frame frame) {
     public String toString() {
         StringBuilder sb = new StringBuilder();
         sb.append(super.toString());
+        sb.append("ATE:" ).append((float) _ate).append("\n");
+        sb.append("ATT:" ).append((float) _att).append("\n");
+        sb.append("ATC:" ).append((float) _atc).append("\n");
         if(_auuc != null){
             sb.append("Default AUUC: ").append((float) _auuc.auuc()).append("\n");
             sb.append("Qini AUUC: ").append((float) _auuc.auucByType(AUUC.AUUCType.qini)).append("\n");
@@ -50,6 +59,12 @@ public String toString() {
     public double auucNormalized(){return _auuc.auucNormalized();}
     
     public int nbins(){return _auuc._nBins;}
+    
+    public double ate() {return _ate;}
+    
+    public double att() {return _att;}
+    
+    public double atc() {return _atc;}
 
     @Override
     protected StringBuilder appendToStringMetrics(StringBuilder sb) {
@@ -143,7 +158,10 @@ public UpliftBinomialMetrics(String[] domain, double[] thresholds) {
     public static class MetricBuilderBinomialUplift extends MetricBuilderSupervised<MetricBuilderBinomialUplift> {
 
         protected AUUC.AUUCBuilder _auuc;
-
+        public double _sumTE;
+        public double _sumTETreatment;
+        public long _treatmentCount;
+        
         public MetricBuilderBinomialUplift( String[] domain, double[] thresholds) { 
             super(2,domain); 
             if(thresholds != null) {
@@ -163,7 +181,6 @@ public MetricBuilderBinomialUplift( String[] domain) {
         public double[] perRow(double[] ds, float[] yact, double weight, double offset, Model m) {
             assert _auuc == null || yact.length == 2 : "Treatment must be included in `yact` when calculating AUUC";
             if(Float .isNaN(yact[0])) return ds; // No errors if   actual   is missing
-            if(ArrayUtils.hasNaNs(ds)) return ds;  // No errors if prediction has missing values (can happen for GLM)
             if(weight == 0 || Double.isNaN(weight)) return ds;
             int y = (int)yact[0];
             if (y != 0 && y != 1) return ds; // The actual is effectively a NaN
@@ -171,9 +188,13 @@ public double[] perRow(double[] ds, float[] yact, double weight, double offset,
             _wYY += weight * y * y;
             _count++;
             _wcount += weight;
+            float treatmentGroup = yact[1]; // treatment = 1, control = 0
+            double treatmentEffect = ds[0];
+            _sumTE += treatmentEffect; // result prediction
+            _sumTETreatment += treatmentGroup * treatmentEffect; 
+            _treatmentCount += treatmentGroup;
             if (_auuc != null) {
-                float treatment = yact[1];
-                _auuc.perRow(ds[0], weight, y, treatment);
+                _auuc.perRow(treatmentEffect, weight, y, treatmentGroup);
             }
             return ds;
         }
@@ -183,6 +204,9 @@ public double[] perRow(double[] ds, float[] yact, double weight, double offset,
             if(_auuc != null) {
                 _auuc.reduce(mb._auuc);
             }
+            _sumTE += mb._sumTE;
+            _sumTETreatment += mb._sumTETreatment;
+            _treatmentCount += _treatmentCount;
         }
 
         /**
@@ -231,15 +255,21 @@ private ModelMetrics makeModelMetrics(final Model m, final Frame f, final Frame
 
         private ModelMetrics makeModelMetrics(Model m, Frame f, AUUC auuc) {
             double sigma = Double.NaN;
+            double ate = Double.NaN;
+            double atc = Double.NaN;
+            double att = Double.NaN;
             if(_wcount > 0) {
                 if (auuc == null) {
                     sigma = weightedSigma();
                     auuc = new AUUC(_auuc, m._parms._auuc_type);
                 }
+                ate = _sumTE/_wcount;
+                att = _sumTETreatment/_treatmentCount;
+                atc = (_sumTE-_sumTETreatment)/(_wcount-_treatmentCount);
             } else {
                 auuc = new AUUC();
             }
-            ModelMetricsBinomialUplift mm = new ModelMetricsBinomialUplift(m, f, _count, _domain, sigma, auuc, _customMetric);
+            ModelMetricsBinomialUplift mm = new ModelMetricsBinomialUplift(m, f, _count, _domain, ate, att, atc, sigma, auuc, _customMetric);
             if (m!=null) m.addModelMetrics(mm);
             return mm;
         }
diff --git a/h2o-core/src/main/java/hex/ScoreKeeper.java b/h2o-core/src/main/java/hex/ScoreKeeper.java
index 0ea29a319c0e..fb02c60f50ea 100644
--- a/h2o-core/src/main/java/hex/ScoreKeeper.java
+++ b/h2o-core/src/main/java/hex/ScoreKeeper.java
@@ -35,6 +35,9 @@ public class ScoreKeeper extends Iced {
   public double _auuc_normalized = Double.NaN;
   public double _qini = Double.NaN;
   public int _auuc_nbins = 0;
+  public double _ate = Double.NaN;
+  public double _att = Double.NaN;
+  public double _atc = Double.NaN;
 
   public ScoreKeeper() {}
 
@@ -125,6 +128,9 @@ else if (m instanceof ModelMetricsMultinomial) {
       _auuc_normalized = ((ModelMetricsBinomialUplift)m).auucNormalized();
       _qini = ((ModelMetricsBinomialUplift)m).qini();
       _auuc_nbins = ((ModelMetricsBinomialUplift)m).nbins();
+      _ate = ((ModelMetricsBinomialUplift)m).ate();
+      _att = ((ModelMetricsBinomialUplift)m).att();
+      _atc = ((ModelMetricsBinomialUplift)m).atc();
     }
     if (customMetric != null ) {
       _custom_metric = customMetric.value;
diff --git a/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialUpliftV3.java b/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialUpliftV3.java
index abd6a5b6b3f9..f1c102da6092 100644
--- a/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialUpliftV3.java
+++ b/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialUpliftV3.java
@@ -13,6 +13,15 @@
 public class ModelMetricsBinomialUpliftV3<I extends ModelMetricsBinomialUplift, S extends water.api.schemas3.ModelMetricsBinomialUpliftV3<I, S>>
             extends ModelMetricsBaseV3<I,S> {
 
+    @API(help="Average Treatment Effect.", direction=API.Direction.OUTPUT)
+    public double ate;
+
+    @API(help="Average Treatment Effect on the Treated.", direction=API.Direction.OUTPUT)
+    public double att;
+
+    @API(help="Average Treatment Effect on the Control.", direction=API.Direction.OUTPUT)
+    public double atc;
+
     @API(help="The default AUUC for this scoring run.", direction=API.Direction.OUTPUT)
     public double AUUC;
 
@@ -40,6 +49,9 @@ public S fillFromImpl(ModelMetricsBinomialUplift modelMetrics) {
 
         AUUC auuc = modelMetrics._auuc;
         if (null != auuc) {
+            ate = modelMetrics.ate();
+            att = modelMetrics.att();
+            atc = modelMetrics.atc();
             AUUC  = auuc.auuc();
             auuc_normalized = auuc.auucNormalized();
             qini = auuc.qini();
diff --git a/h2o-docs/src/product/data-science/upliftdrf.rst b/h2o-docs/src/product/data-science/upliftdrf.rst
index aca182592ed3..437f8b063d66 100644
--- a/h2o-docs/src/product/data-science/upliftdrf.rst
+++ b/h2o-docs/src/product/data-science/upliftdrf.rst
@@ -227,10 +227,10 @@ By default, the following output displays:
 -  **Scoring history** in tabular format
 -  **Training metrics** (model name, checksum name, frame name, frame
    checksum name, description, model category, duration in ms, scoring
-   time, predictions, AUUC, all AUUC types table, Thresholds and metric scores, table)
+   time, predictions, ATE, ATT, ATC, AUUC, all AUUC types table, Thresholds and metric scores table)
 -  **Validation metrics** (model name, checksum name, frame name, frame
    checksum name, description, model category, duration in ms, scoring
-   time, predictions, AUUC, all AUUC types table, Thresholds and metric scores table)
+   time, predictions, ATE, ATT, ATC, AUUC, all AUUC types table, Thresholds and metric scores table)
 -  **Default AUUC metric** calculated based on ``auuc_type`` parameter
 -  **Default normalized AUUC metric** calculated based on ``auuc_type`` parameter
 -  **AUUC table** which contains all computed AUUC types and normalized AUUC (qini, lift, gain)
@@ -240,6 +240,24 @@ By default, the following output displays:
 -  **Uplift Curve plot** for given metric type (qini, lift, gain)
 
 
+Treatment effect metrics (ATE, ATT, ATC)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Overall treatment effect metrics show how the uplift predictions look across the whole dataset (population). Scored data are used to calculate these metrics (``uplift_predict`` column = individual treatment effect).
+
+- **Average Treatment Effect (ATE)** Average expected uplift prediction (treatment effect) overall records in the dataset.
+- **Average Treatment Effect on the Treated (ATT)** Average expected uplift prediction (treatment effect) of all records in the dataset belonging to the treatment group.
+- **Average Treatment Effect on the Control (ATC)** Average expected uplift prediction (treatment effect) of all records in the dataset belonging to the control group.
+
+The interpretation depends on concrete data meanings. We currently support only Bernoulli data distribution, so whether the treatment impacts the target value y=1 or not. 
+
+For example, we analyze data to determine if some medical help to recover from disease or not. We have patients in the treatment group and the control group. The target variable is if the medicine (treatment) helped recovery (y=1) or not (y=0). In this case:
+- positive ATE means the medicine helps with recovery in general
+- negative ATE means the medicine does not help with recovery in general
+- ATE equal or similar to zero means the medicine does not affect recovery in general
+- similar interpretation applies to ATT and ATC, the positive ATT is usually what scientists look for, but ATC is also an interesting metric (in an ideal case, positive both ATT and ATC say the treatment has an exact effect).
+
+
 Uplift Curve and Area Under Uplift Curve (AUUC) calculation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/h2o-py/h2o/model/metrics/uplift.py b/h2o-py/h2o/model/metrics/uplift.py
index ae0cdc13bdf5..508fe776f71a 100644
--- a/h2o-py/h2o/model/metrics/uplift.py
+++ b/h2o-py/h2o/model/metrics/uplift.py
@@ -11,6 +11,9 @@ class H2OBinomialUpliftModelMetrics(MetricsBase):
     
     def _str_items_custom(self):
         items = [
+            "ATE: {}".format(self.ate()),
+            "ATT: {}".format(self.att()),
+            "ATC: {}".format(self.atc()),
             "AUUC: {}".format(self.auuc()),
             "AUUC normalized: {}".format(self.auuc_normalized()),
         ]
@@ -20,7 +23,94 @@ def _str_items_custom(self):
         aecut = self.aecu_table()
         if aecut: items.append(aecut)
         return items
-    
+
+    def ate(self):
+        """
+        Retrieve Average Treatment Effect value.
+        
+        :returns: ATE value.
+
+        :examples:
+        
+        >>> from h2o.estimators import H2OUpliftRandomForestEstimator
+        >>> train = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/uplift/criteo_uplift_13k.csv")
+        >>> treatment_column = "treatment"
+        >>> response_column = "conversion"
+        >>> train[treatment_column] = train[treatment_column].asfactor()
+        >>> train[response_column] = train[response_column].asfactor()
+        >>> predictors = ["f1", "f2", "f3", "f4", "f5", "f6"]
+        >>>
+        >>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10, 
+        ...                                               max_depth=5,
+        ...                                               treatment_column=treatment_column,
+        ...                                               uplift_metric="kl",
+        ...                                               distribution="bernoulli",
+        ...                                               min_rows=10,
+        ...                                               auuc_type="gain")
+        >>> uplift_model.train(y=response_column, x=predictors, training_frame=train)
+        >>> perf = uplift_model.model_performance()
+        >>> perf.ate()
+        """
+        return self._metric_json['ate']
+
+    def att(self):
+        """
+        Retrieve Average Treatment Effect on the Treated.
+        
+        :returns: ATT value.
+
+        :examples:
+        
+        >>> from h2o.estimators import H2OUpliftRandomForestEstimator
+        >>> train = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/uplift/criteo_uplift_13k.csv")
+        >>> treatment_column = "treatment"
+        >>> response_column = "conversion"
+        >>> train[treatment_column] = train[treatment_column].asfactor()
+        >>> train[response_column] = train[response_column].asfactor()
+        >>> predictors = ["f1", "f2", "f3", "f4", "f5", "f6"]
+        >>>
+        >>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10, 
+        ...                                               max_depth=5,
+        ...                                               treatment_column=treatment_column,
+        ...                                               uplift_metric="kl",
+        ...                                               distribution="bernoulli",
+        ...                                               min_rows=10,
+        ...                                               auuc_type="gain")
+        >>> uplift_model.train(y=response_column, x=predictors, training_frame=train)
+        >>> perf = uplift_model.model_performance()
+        >>> perf.att()
+        """
+        return self._metric_json['att']
+
+    def atc(self):
+        """
+        Retrieve Average Treatment Effect on the Control.
+        
+        :returns: ATC value.
+
+        :examples:
+        
+        >>> from h2o.estimators import H2OUpliftRandomForestEstimator
+        >>> train = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/uplift/criteo_uplift_13k.csv")
+        >>> treatment_column = "treatment"
+        >>> response_column = "conversion"
+        >>> train[treatment_column] = train[treatment_column].asfactor()
+        >>> train[response_column] = train[response_column].asfactor()
+        >>> predictors = ["f1", "f2", "f3", "f4", "f5", "f6"]
+        >>>
+        >>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10, 
+        ...                                               max_depth=5,
+        ...                                               treatment_column=treatment_column,
+        ...                                               uplift_metric="kl",
+        ...                                               distribution="bernoulli",
+        ...                                               min_rows=10,
+        ...                                               auuc_type="gain")
+        >>> uplift_model.train(y=response_column, x=predictors, training_frame=train)
+        >>> perf = uplift_model.model_performance()
+        >>> perf.atc()
+        """
+        return self._metric_json['atc']    
+
     def auuc(self, metric=None):
         """
         Retrieve area under cumulative uplift curve (AUUC) value.
diff --git a/h2o-py/h2o/model/models/uplift.py b/h2o-py/h2o/model/models/uplift.py
index 6f75a0536879..621464cb663f 100644
--- a/h2o-py/h2o/model/models/uplift.py
+++ b/h2o-py/h2o/model/models/uplift.py
@@ -369,6 +369,114 @@ def qini(self, train=False, valid=False):
         """
         return self._delegate_to_metrics(method='qini', train=train, valid=valid)
 
+    def ate(self, train=False, valid=False):
+        """
+        Retrieve Average Treatment Effect
+
+        If all are False (default), then return the training ATE metric.
+        If more than one options is set to True, then return a dictionary of metrics where the 
+        keys are "train" and "valid".
+
+        :param bool train: If True, return the ATE value for the training data.
+        :param bool valid: If True, return the ATE value for the validation data.
+
+        :returns: the ATE value for the specified key(s).
+
+        :examples:
+
+        >>> from h2o.estimators import H2OUpliftRandomForestEstimator
+        >>> train = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/uplift/criteo_uplift_13k.csv")
+        >>> treatment_column = "treatment"
+        >>> response_column = "conversion"
+        >>> train[treatment_column] = train[treatment_column].asfactor()
+        >>> train[response_column] = train[response_column].asfactor()
+        >>> predictors = ["f1", "f2", "f3", "f4", "f5", "f6"]
+        >>>
+        >>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10,
+        ...                                               max_depth=5,
+        ...                                               treatment_column=treatment_column,
+        ...                                               uplift_metric="kl",
+        ...                                               distribution="bernoulli",
+        ...                                               min_rows=10,
+        ...                                               auuc_type="gain")
+        >>> uplift_model.train(y=response_column, x=predictors, training_frame=train)
+        >>> uplift_model.ate() # <- Default: return training metric value
+        >>> uplift_model.ate(train=True)
+        """
+        return self._delegate_to_metrics(method='ate', train=train, valid=valid)
+
+    def att(self, train=False, valid=False):
+        """
+        Retrieve Average Treatment Effect on the Treated
+
+        If all are False (default), then return the training ATT metric.
+        If more than one options is set to True, then return a dictionary of metrics where the 
+        keys are "train" and "valid".
+
+        :param bool train: If True, return the ATT value for the training data.
+        :param bool valid: If True, return the ATT value for the validation data.
+
+        :returns: the ATT value for the specified key(s).
+
+        :examples:
+
+        >>> from h2o.estimators import H2OUpliftRandomForestEstimator
+        >>> train = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/uplift/criteo_uplift_13k.csv")
+        >>> treatment_column = "treatment"
+        >>> response_column = "conversion"
+        >>> train[treatment_column] = train[treatment_column].asfactor()
+        >>> train[response_column] = train[response_column].asfactor()
+        >>> predictors = ["f1", "f2", "f3", "f4", "f5", "f6"]
+        >>>
+        >>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10,
+        ...                                               max_depth=5,
+        ...                                               treatment_column=treatment_column,
+        ...                                               uplift_metric="kl",
+        ...                                               distribution="bernoulli",
+        ...                                               min_rows=10,
+        ...                                               auuc_type="gain")
+        >>> uplift_model.train(y=response_column, x=predictors, training_frame=train)
+        >>> uplift_model.att() # <- Default: return training metric value
+        >>> uplift_model.att(train=True)
+        """
+        return self._delegate_to_metrics(method='att', train=train, valid=valid)
+
+    def atc(self, train=False, valid=False):
+        """
+        Retrieve Average Treatment Effect on the Control
+
+        If all are False (default), then return the training ATC metric.
+        If more than one options is set to True, then return a dictionary of metrics where the 
+        keys are "train" and "valid".
+
+        :param bool train: If True, return the ATC value for the training data.
+        :param bool valid: If True, return the ATC value for the validation data.
+
+        :returns: the ATC value for the specified key(s).
+
+        :examples:
+
+        >>> from h2o.estimators import H2OUpliftRandomForestEstimator
+        >>> train = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/uplift/criteo_uplift_13k.csv")
+        >>> treatment_column = "treatment"
+        >>> response_column = "conversion"
+        >>> train[treatment_column] = train[treatment_column].asfactor()
+        >>> train[response_column] = train[response_column].asfactor()
+        >>> predictors = ["f1", "f2", "f3", "f4", "f5", "f6"]
+        >>>
+        >>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10,
+        ...                                               max_depth=5,
+        ...                                               treatment_column=treatment_column,
+        ...                                               uplift_metric="kl",
+        ...                                               distribution="bernoulli",
+        ...                                               min_rows=10,
+        ...                                               auuc_type="gain")
+        >>> uplift_model.train(y=response_column, x=predictors, training_frame=train)
+        >>> uplift_model.atc() # <- Default: return training metric value
+        >>> uplift_model.atc(train=True)
+        """
+        return self._delegate_to_metrics(method='atc', train=train, valid=valid)
+
     def _delegate_to_metrics(self, method, train=False, valid=False, **kwargs):
         tm = ModelBase._get_metrics(self, train, valid, xval=None)
         m = {}
diff --git a/h2o-py/tests/testdir_algos/uplift/pyunit_uplift_rf_api_test.py b/h2o-py/tests/testdir_algos/uplift/pyunit_uplift_rf_api_test.py
index 3362594c1dd2..1081064db3bf 100644
--- a/h2o-py/tests/testdir_algos/uplift/pyunit_uplift_rf_api_test.py
+++ b/h2o-py/tests/testdir_algos/uplift/pyunit_uplift_rf_api_test.py
@@ -46,6 +46,9 @@ def uplift_random_forest_api_smoke():
     assert_equals(perf.thresholds_and_metric_scores(), uplift_model.thresholds_and_metric_scores())
     assert_equals(perf.auuc_table(), uplift_model.auuc_table())
     assert_equals(perf.qini(), uplift_model.qini())
+    assert_equals(perf.ate(), uplift_model.ate())
+    assert_equals(perf.att(), uplift_model.att())
+    assert_equals(perf.atc(), uplift_model.atc())
 
     try:
         uplift_model.partial_plot(train_h2o, cols=['feature_8'])
diff --git a/h2o-r/h2o-package/R/models.R b/h2o-r/h2o-package/R/models.R
index 7e925b684264..9a224a6fe9f6 100755
--- a/h2o-r/h2o-package/R/models.R
+++ b/h2o-r/h2o-package/R/models.R
@@ -1149,7 +1149,6 @@ h2o.make_metrics <- function(predicted, actuals, domain=NULL, distribution=NULL,
     params$weights_frame <- h2o.getId(weights)
   }
   if (!is.null(treatment)) {
-    params$treatment_frame <- h2o.getId(treatment)
       if (!(auuc_type %in% c("qini", "lift", "gain", "AUTO"))) {
         stop("auuc_type argument must be gini, lift, gain or AUTO")
       }
@@ -1332,6 +1331,171 @@ h2o.auuc <- function(object, train=FALSE, valid=FALSE, metric=NULL) {
     invisible(NULL)
 }
 
+#' Retrieve Average Treatment Effect
+#'
+#' Retrieves ATE from an \linkS4class{H2OBinomialUpliftMetrics}.
+#' If "train" and "valid" parameters are FALSE (default), then the training ATE is returned. If more
+#' than one parameter is set to TRUE, then a named vector of ATE values are returned, where the names are "train", "valid".
+#'
+#' @param object An \linkS4class{H2OBinomialUpliftMetrics} or 
+#' @param train Retrieve the training ATE value
+#' @param valid Retrieve the validation ATE value
+#' @examples
+#' \dontrun{
+#' library(h2o)
+#' h2o.init()
+#' f <- "https://s3.amazonaws.com/h2o-public-test-data/smalldata/uplift/criteo_uplift_13k.csv"
+#' train <- h2o.importFile(f)
+#' train$treatment <- as.factor(train$treatment)
+#' train$conversion <- as.factor(train$conversion)
+#' 
+#' model <- h2o.upliftRandomForest(training_frame=train, x=sprintf("f%s",seq(0:10)), y="conversion",
+#'                                 ntrees=10, max_depth=5, treatment_column="treatment",
+#'                                 auuc_type="AUTO")
+#' perf <- h2o.performance(model, train=TRUE) 
+#' h2o.ate(perf)
+#' }
+#' @export
+h2o.ate <- function(object, train=FALSE, valid=FALSE) {
+    if( is(object, "H2OModelMetrics") ) return( object@metrics$ate )
+    if( is(object, "H2OModel") ) {
+        model.parts <- .model.parts(object)
+        if ( !train && !valid ) {
+            metric <- model.parts$tm@metrics$ate
+            if ( !is.null(metric) ) return(metric)
+        }
+        v <- c()
+        v_names <- c()
+        if ( train ) {
+            v <- c(v,model.parts$tm@metrics$ate)
+            v_names <- c(v_names,"train")
+        }
+        if ( valid ) {
+            if( is.null(model.parts$vm) ) return(invisible(.warn.no.validation()))
+            else {
+                v <- c(v,model.parts$vm@metrics$ate)
+                v_names <- c(v_names,"valid")
+            }
+        }
+        if ( !is.null(v) ) {
+            names(v) <- v_names
+            if ( length(v)==1 ) { return( v[[1]] ) } else { return( v ) }
+        }
+    }
+    warning(paste0("No ATE value for ", class(object)))
+    invisible(NULL)
+}
+
+#' Retrieve Average Treatment Effect on the Treated
+#'
+#' Retrieves ATE from an \linkS4class{H2OBinomialUpliftMetrics}.
+#' If "train" and "valid" parameters are FALSE (default), then the training ATT is returned. If more
+#' than one parameter is set to TRUE, then a named vector of ATT values are returned, where the names are "train", "valid".
+#'
+#' @param object An \linkS4class{H2OBinomialUpliftMetrics} or 
+#' @param train Retrieve the training ATT value
+#' @param valid Retrieve the validation ATT value
+#' @examples
+#' \dontrun{
+#' library(h2o)
+#' h2o.init()
+#' f <- "https://s3.amazonaws.com/h2o-public-test-data/smalldata/uplift/criteo_uplift_13k.csv"
+#' train <- h2o.importFile(f)
+#' train$treatment <- as.factor(train$treatment)
+#' train$conversion <- as.factor(train$conversion)
+#' 
+#' model <- h2o.upliftRandomForest(training_frame=train, x=sprintf("f%s",seq(0:10)), y="conversion",
+#'                                 ntrees=10, max_depth=5, treatment_column="treatment",
+#'                                 auuc_type="AUTO")
+#' perf <- h2o.performance(model, train=TRUE) 
+#' h2o.att(perf)
+#' }
+#' @export
+h2o.att <- function(object, train=FALSE, valid=FALSE) {
+    if( is(object, "H2OModelMetrics") ) return( object@metrics$att )
+    if( is(object, "H2OModel") ) {
+        model.parts <- .model.parts(object)
+        if ( !train && !valid ) {
+            metric <- model.parts$tm@metrics$att
+            if ( !is.null(metric) ) return(metric)
+        }
+        v <- c()
+        v_names <- c()
+        if ( train ) {
+            v <- c(v,model.parts$tm@metrics$att)
+            v_names <- c(v_names,"train")
+        }
+        if ( valid ) {
+            if( is.null(model.parts$vm) ) return(invisible(.warn.no.validation()))
+            else {
+                v <- c(v,model.parts$vm@metrics$att)
+                v_names <- c(v_names,"valid")
+            }
+        }
+        if ( !is.null(v) ) {
+            names(v) <- v_names
+            if ( length(v)==1 ) { return( v[[1]] ) } else { return( v ) }
+        }
+    }
+    warning(paste0("No ATT value for ", class(object)))
+    invisible(NULL)
+}
+
+#' Retrieve Average Treatment Effect on the Control
+#'
+#' Retrieves ATC from an \linkS4class{H2OBinomialUpliftMetrics}.
+#' If "train" and "valid" parameters are FALSE (default), then the training ATC is returned. If more
+#' than one parameter is set to TRUE, then a named vector of ATC values are returned, where the names are "train", "valid".
+#'
+#' @param object An \linkS4class{H2OBinomialUpliftMetrics} or 
+#' @param train Retrieve the training ATC value
+#' @param valid Retrieve the validation ATC value
+#' @examples
+#' \dontrun{
+#' library(h2o)
+#' h2o.init()
+#' f <- "https://s3.amazonaws.com/h2o-public-test-data/smalldata/uplift/criteo_uplift_13k.csv"
+#' train <- h2o.importFile(f)
+#' train$treatment <- as.factor(train$treatment)
+#' train$conversion <- as.factor(train$conversion)
+#' 
+#' model <- h2o.upliftRandomForest(training_frame=train, x=sprintf("f%s",seq(0:10)), y="conversion",
+#'                                 ntrees=10, max_depth=5, treatment_column="treatment",
+#'                                 auuc_type="AUTO")
+#' perf <- h2o.performance(model, train=TRUE) 
+#' h2o.atc(perf)
+#' }
+#' @export
+h2o.atc <- function(object, train=FALSE, valid=FALSE) {
+    if( is(object, "H2OModelMetrics") ) return( object@metrics$atc )
+    if( is(object, "H2OModel") ) {
+        model.parts <- .model.parts(object)
+        if ( !train && !valid ) {
+            metric <- model.parts$tm@metrics$atc
+            if ( !is.null(metric) ) return(metric)
+        }
+        v <- c()
+        v_names <- c()
+        if ( train ) {
+            v <- c(v,model.parts$tm@metrics$atc)
+            v_names <- c(v_names,"train")
+        }
+        if ( valid ) {
+            if( is.null(model.parts$vm) ) return(invisible(.warn.no.validation()))
+            else {
+                v <- c(v,model.parts$vm@metrics$atc)
+                v_names <- c(v_names,"valid")
+            }
+        }
+        if ( !is.null(v) ) {
+            names(v) <- v_names
+            if ( length(v)==1 ) { return( v[[1]] ) } else { return( v ) }
+        }
+    }
+    warning(paste0("No ATC value for ", class(object)))
+    invisible(NULL)
+}
+
 #' Retrieve normalized AUUC
 #'
 #' Retrieves the AUUC value from an \linkS4class{H2OBinomialUpliftMetrics}. If the metric parameter is "AUTO", 
diff --git a/h2o-r/tests/testdir_misc/runit_make_metrics_uplift_binomial.R b/h2o-r/tests/testdir_misc/runit_make_metrics_uplift_binomial.R
index 54abe36f217b..1857820b55f9 100644
--- a/h2o-r/tests/testdir_misc/runit_make_metrics_uplift_binomial.R
+++ b/h2o-r/tests/testdir_misc/runit_make_metrics_uplift_binomial.R
@@ -60,6 +60,21 @@ test.make_metrics_uplift_binomial <- function() {
     expect_true(is.data.frame(aecu_table1))
  
     expect_equal(aecu_table0, aecu_table1)
+
+    ate0 <- h2o.ate(m0)
+    ate1 <- h2o.ate(m1)
+
+    expect_equal(ate0, ate1)
+
+    att0 <- h2o.att(m0)
+    att1 <- h2o.att(m1)
+
+    expect_equal(att0, att1)
+
+    atc0 <- h2o.atc(m0)
+    atc1 <- h2o.atc(m1)
+
+    expect_equal(atc0, atc1)
 }
 
 doSuite("Check making uplift binomial model metrics.", makeSuite(

From add9e8cd8e42cdb7b1b78aa8d572b1d360628aaa Mon Sep 17 00:00:00 2001
From: Veronika Maurerova <veronika.maurerova@h2o.ai>
Date: Thu, 22 Jun 2023 11:38:20 +0200
Subject: [PATCH 02/12] Fix score with treatment column

---
 h2o-core/src/main/java/hex/Model.java           | 10 +++++++++-
 .../tests/testdir_misc/pyunit_make_metrics.py   | 17 ++++++++++++-----
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/h2o-core/src/main/java/hex/Model.java b/h2o-core/src/main/java/hex/Model.java
index 3917a5007eaf..9875fb20f980 100755
--- a/h2o-core/src/main/java/hex/Model.java
+++ b/h2o-core/src/main/java/hex/Model.java
@@ -2222,10 +2222,15 @@ protected void setupLocal() {
       Chunk weightsChunk = _hasWeights && _computeMetrics ? chks[_output.weightsIdx()] : null;
       Chunk offsetChunk = _output.hasOffset() ? chks[_output.offsetIdx()] : null;
       Chunk responseChunk = null;
+      Chunk treatmentChunk = null;
       float [] actual = null;
       _mb = Model.this.makeMetricBuilder(_domain);
       if (_computeMetrics) {
-        if (_output.hasResponse()) {
+        if (_output.hasTreatment()){
+          actual = new float[2];
+          responseChunk = chks[_output.responseIdx()];
+          treatmentChunk = chks[_output.treatmentIdx()];
+        } else if (_output.hasResponse()) {
           actual = new float[1];
           responseChunk = chks[_output.responseIdx()];
         } else
@@ -2252,6 +2257,9 @@ protected void setupLocal() {
               for (int i = 0; i < actual.length; ++i)
                 actual[i] = (float) data(chks, row, i);
             }
+            if(treatmentChunk != null){
+              actual[1] = (float) treatmentChunk.atd(row);
+            }
             _mb.perRow(preds, actual, weight, offset, Model.this);
             // Handle custom metric
             customMetricPerRow(preds, actual, weight, offset, Model.this);
diff --git a/h2o-py/tests/testdir_misc/pyunit_make_metrics.py b/h2o-py/tests/testdir_misc/pyunit_make_metrics.py
index 0449221bba12..8cea0b9ab6c3 100644
--- a/h2o-py/tests/testdir_misc/pyunit_make_metrics.py
+++ b/h2o-py/tests/testdir_misc/pyunit_make_metrics.py
@@ -217,13 +217,20 @@ def pyunit_make_metrics_uplift():
     treatment = test[treatment_column]
     m1 = model.model_performance(test_data=test, auuc_type="AUTO", auuc_nbins=nbins)
     m2 = h2o.make_metrics(predicted, actual, treatment=treatment, auuc_type="AUTO", auuc_nbins=nbins)
+
+    err = 1e-5
     
-    print(m0.auuc())
-    print(m1.auuc())
-    print(m2.auuc())
+    assert abs(m0.auuc() - m1.auuc()) < err
+    assert abs(m1.auuc() - m2.auuc()) < err
     
-    assert abs(m0.auuc() - m1.auuc()) < 1e-5
-    assert abs(m1.auuc() - m2.auuc()) < 1e-5
+    assert abs(m0.ate() - m1.ate()) < err
+    assert abs(m1.ate() - m2.ate()) < err
+
+    assert abs(m0.att() - m1.att()) < err
+    assert abs(m1.att() - m2.att()) < err
+
+    assert abs(m0.atc() - m1.atc()) < err
+    assert abs(m1.atc() - m2.atc()) < err
 
 
 def suite_model_metrics():

From b33725552651cdf7dccb61743f12d83c2cff680d Mon Sep 17 00:00:00 2001
From: Veronika Maurerova <veronika.maurerova@h2o.ai>
Date: Thu, 22 Jun 2023 16:02:40 +0200
Subject: [PATCH 03/12] Enable custom metric for UpliftDRF

---
 .../main/java/hex/schemas/UpliftDRFV3.java    |  1 +
 .../main/java/hex/tree/uplift/UpliftDRF.java  |  2 -
 h2o-py/h2o/estimators/uplift_random_forest.py | 19 ++++++++
 .../tests/pyunit_utils/utils_model_metrics.py | 46 ++++++++++++++++++-
 .../uplift/pyunit_custom_metric_uplift.py     | 44 ++++++++++++++++++
 .../tests/testdir_misc/pyunit_make_metrics.py |  2 +-
 h2o-r/h2o-package/R/upliftrandomforest.R      |  7 +++
 7 files changed, 117 insertions(+), 4 deletions(-)
 create mode 100644 h2o-py/tests/testdir_algos/uplift/pyunit_custom_metric_uplift.py

diff --git a/h2o-algos/src/main/java/hex/schemas/UpliftDRFV3.java b/h2o-algos/src/main/java/hex/schemas/UpliftDRFV3.java
index df4be83d79bb..a711e0c2e7c1 100644
--- a/h2o-algos/src/main/java/hex/schemas/UpliftDRFV3.java
+++ b/h2o-algos/src/main/java/hex/schemas/UpliftDRFV3.java
@@ -34,6 +34,7 @@ public static final class UpliftDRFParametersV3 extends SharedTreeV3.SharedTreeP
                 "categorical_encoding",
                 "distribution",
                 "check_constant_response",
+                "custom_metric_func",
                 "treatment_column",
                 "uplift_metric",
                 "auuc_type",
diff --git a/h2o-algos/src/main/java/hex/tree/uplift/UpliftDRF.java b/h2o-algos/src/main/java/hex/tree/uplift/UpliftDRF.java
index dc3891a1797b..eef583f66bfa 100644
--- a/h2o-algos/src/main/java/hex/tree/uplift/UpliftDRF.java
+++ b/h2o-algos/src/main/java/hex/tree/uplift/UpliftDRF.java
@@ -110,8 +110,6 @@ public boolean providesVarImp() {
             error("_treatment_column", "The treatment column has to be defined.");
         if (_parms._custom_distribution_func != null)
             error("_custom_distribution_func", "The custom distribution is not yet supported for Uplift DRF.");
-        if (_parms._custom_metric_func != null)
-            error("_custom_metric_func", "The custom metric is not yet supported for Uplift DRF.");
         if (_parms._stopping_metric != ScoreKeeper.StoppingMetric.AUTO)
             error("_stopping_metric", "The early stopping is not yet supported for Uplift DRF.");
         if (_parms._stopping_rounds != 0)
diff --git a/h2o-py/h2o/estimators/uplift_random_forest.py b/h2o-py/h2o/estimators/uplift_random_forest.py
index 199398ba28ed..4528b7afa233 100644
--- a/h2o-py/h2o/estimators/uplift_random_forest.py
+++ b/h2o-py/h2o/estimators/uplift_random_forest.py
@@ -46,6 +46,7 @@ def __init__(self,
                  categorical_encoding="auto",  # type: Literal["auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen", "label_encoder", "sort_by_response", "enum_limited"]
                  distribution="auto",  # type: Literal["auto", "bernoulli", "multinomial", "gaussian", "poisson", "gamma", "tweedie", "laplace", "quantile", "huber"]
                  check_constant_response=True,  # type: bool
+                 custom_metric_func=None,  # type: Optional[str]
                  treatment_column="treatment",  # type: str
                  uplift_metric="auto",  # type: Literal["auto", "kl", "euclidean", "chi_squared"]
                  auuc_type="auto",  # type: Literal["auto", "qini", "lift", "gain"]
@@ -137,6 +138,9 @@ def __init__(self,
                column being a constant value or not.
                Defaults to ``True``.
         :type check_constant_response: bool
+        :param custom_metric_func: Reference to custom evaluation function, format: `language:keyName=funcName`
+               Defaults to ``None``.
+        :type custom_metric_func: str, optional
         :param treatment_column: Define the column which will be used for computing uplift gain to select best split for
                a tree. The column has to divide the dataset into treatment (value 1) and control (value 0) groups.
                Defaults to ``"treatment"``.
@@ -178,6 +182,7 @@ def __init__(self,
         self.categorical_encoding = categorical_encoding
         self.distribution = distribution
         self.check_constant_response = check_constant_response
+        self.custom_metric_func = custom_metric_func
         self.treatment_column = treatment_column
         self.uplift_metric = uplift_metric
         self.auuc_type = auuc_type
@@ -525,6 +530,20 @@ def check_constant_response(self, check_constant_response):
         assert_is_type(check_constant_response, None, bool)
         self._parms["check_constant_response"] = check_constant_response
 
+    @property
+    def custom_metric_func(self):
+        """
+        Reference to custom evaluation function, format: `language:keyName=funcName`
+
+        Type: ``str``.
+        """
+        return self._parms.get("custom_metric_func")
+
+    @custom_metric_func.setter
+    def custom_metric_func(self, custom_metric_func):
+        assert_is_type(custom_metric_func, None, str)
+        self._parms["custom_metric_func"] = custom_metric_func
+
     @property
     def treatment_column(self):
         """
diff --git a/h2o-py/tests/pyunit_utils/utils_model_metrics.py b/h2o-py/tests/pyunit_utils/utils_model_metrics.py
index 1d973c79f34c..a76c8973928d 100644
--- a/h2o-py/tests/pyunit_utils/utils_model_metrics.py
+++ b/h2o-py/tests/pyunit_utils/utils_model_metrics.py
@@ -27,6 +27,7 @@ def metric(self, l):
         import java.lang.Math as math
         return math.sqrt(l[0] / l[1])
 
+
 class CustomLoglossFunc:
     def map(self, pred, act, w, o, model):
         import water.util.MathUtils as math
@@ -40,6 +41,30 @@ def reduce(self, l, r):
     def metric(self, l):
         return l[0] / l[1]
 
+
+class CustomAteFunc:
+    def map(self, pred, act, w, o, model):
+        return [pred[0], 1]
+
+    def reduce(self, l, r):
+        return [l[0] + r[0], l[1] + r[1]]
+
+    def metric(self, l):
+        return l[0] / l[1]
+
+
+class CustomAttFunc:
+    def map(self, pred, act, w, o, model):
+        treatment = pred[1]
+        return [pred[0], 1] if treatment == 1 else [0, 0]
+
+    def reduce(self, l, r):
+        return [l[0] + r[0], l[1] + r[1]]
+
+    def metric(self, l):
+        return l[0] / l[1]
+
+
 class CustomNullFunc:
     def map(self, pred, act, w, o, model):
         return []
@@ -137,6 +162,14 @@ def dataset_iris():
     return df.split_frame(ratios=[0.6, 0.3], seed=0)
 
 
+def dataset_uplift():
+    treatment_column = "treatment"
+    response_column = "outcome"
+    df = h2o.upload_file(path=locate("smalldata/uplift/upliftml_train.csv"))
+    df[treatment_column] = df[treatment_column].asfactor()
+    df[response_column] = df[response_column].asfactor()
+    return df.split_frame(ratios=[0.6, 0.3], seed=0)
+
 # Regression Model fixture
 def regression_model(ModelType, custom_metric_func, params={}):
     (ftrain, fvalid, ftest) = dataset_prostate()
@@ -147,7 +180,6 @@ def regression_model(ModelType, custom_metric_func, params={}):
     return model, ftest
 
 
-# Binomial model fixture
 def binomial_model(ModelType, custom_metric_func, params={}):
     (ftrain, fvalid, ftest) = dataset_prostate()
     model = ModelType(model_id="binomial",
@@ -164,3 +196,15 @@ def multinomial_model(ModelType, custom_metric_func, params={}):
                       custom_metric_func=custom_metric_func, **params)
     model.train(y="class", x=ftrain.names, training_frame=ftrain, validation_frame=fvalid)
     return model, ftest
+
+
+def uplift_binomial_model(ModelType, custom_metric_func):
+    (ftrain, fvalid, ftest) = dataset_uplift()
+    params = {"treatment_column": "treatment"}
+    response_column = "outcome"
+    model = ModelType(model_id="uplift_binomial", ntrees=3, max_depth=5,
+                      score_each_iteration=True,
+                      custom_metric_func=custom_metric_func,
+                      **params)
+    model.train(y=response_column, x=ftrain.names, training_frame=ftrain, validation_frame=fvalid)
+    return model, ftest
diff --git a/h2o-py/tests/testdir_algos/uplift/pyunit_custom_metric_uplift.py b/h2o-py/tests/testdir_algos/uplift/pyunit_custom_metric_uplift.py
new file mode 100644
index 000000000000..99e188c9018b
--- /dev/null
+++ b/h2o-py/tests/testdir_algos/uplift/pyunit_custom_metric_uplift.py
@@ -0,0 +1,44 @@
+import sys
+
+sys.path.insert(1, "../../../")
+import h2o
+from tests import pyunit_utils
+from tests.pyunit_utils import CustomAteFunc, CustomAttFunc, uplift_binomial_model, assert_correct_custom_metric
+from h2o.estimators.uplift_random_forest import H2OUpliftRandomForestEstimator
+
+
+# Custom model metrics fixture
+def custom_ate_mm():
+    return h2o.upload_custom_metric(CustomAteFunc, func_name="ate", func_file="mm_ate.py")
+
+
+def custom_att_mm():
+    return h2o.upload_custom_metric(CustomAttFunc, func_name="att", func_file="mm_att.py")
+
+
+# Test that the custom model metric is computed
+# and compare them with implicit custom metric
+def test_custom_metric_computation_binomial_ate():
+    (model, f_test) = uplift_binomial_model(H2OUpliftRandomForestEstimator, custom_ate_mm())
+    print(model)
+    assert_correct_custom_metric(model, f_test, "ate", "Binomial ATE on prostate")
+
+
+def test_custom_metric_computation_binomial_att():
+    (model, f_test) = uplift_binomial_model(H2OUpliftRandomForestEstimator, custom_att_mm())
+    print(model)
+    assert_correct_custom_metric(model, f_test, "att", "Binomial ATT on prostate")
+
+
+# Tests to invoke in this suite
+__TESTS__ = [
+    test_custom_metric_computation_binomial_ate,
+    test_custom_metric_computation_binomial_att
+]
+
+if __name__ == "__main__":
+    for func in __TESTS__:
+        pyunit_utils.standalone_test(func)
+else:
+    for func in __TESTS__:
+        func()
diff --git a/h2o-py/tests/testdir_misc/pyunit_make_metrics.py b/h2o-py/tests/testdir_misc/pyunit_make_metrics.py
index 8cea0b9ab6c3..0a92cb4f0e8c 100644
--- a/h2o-py/tests/testdir_misc/pyunit_make_metrics.py
+++ b/h2o-py/tests/testdir_misc/pyunit_make_metrics.py
@@ -167,7 +167,7 @@ def pyunit_make_metrics(weights_col=None):
     model.train(x=predictors, y=response, training_frame=fr)
     predicted = h2o.assign(model.predict(fr)[1:], "pred")
     actual = h2o.assign(fr[response].asfactor(), "act")
-    domain = fr[response].levels()[0]
+    domain = fr[response].levels()[0]               
 
     m0 = model.model_performance(train=True)
     m1 = h2o.make_metrics(predicted, actual, domain=domain, weights=weights, auc_type="MACRO_OVR")
diff --git a/h2o-r/h2o-package/R/upliftrandomforest.R b/h2o-r/h2o-package/R/upliftrandomforest.R
index a5ee99be70e3..9233ee00fae1 100644
--- a/h2o-r/h2o-package/R/upliftrandomforest.R
+++ b/h2o-r/h2o-package/R/upliftrandomforest.R
@@ -47,6 +47,7 @@
 #' @param check_constant_response \code{Logical}. Check if response column is constant. If enabled, then an exception is thrown if the response
 #'        column is a constant value.If disabled, then model will train regardless of the response column being a
 #'        constant value or not. Defaults to TRUE.
+#' @param custom_metric_func Reference to custom evaluation function, format: `language:keyName=funcName`
 #' @param uplift_metric Divergence metric used to find best split when building an uplift tree. Must be one of: "AUTO", "KL",
 #'        "Euclidean", "ChiSquared". Defaults to AUTO.
 #' @param auuc_type Metric used to calculate Area Under Uplift Curve. Must be one of: "AUTO", "qini", "lift", "gain". Defaults to
@@ -82,6 +83,7 @@ h2o.upliftRandomForest <- function(x,
                                    categorical_encoding = c("AUTO", "Enum", "OneHotInternal", "OneHotExplicit", "Binary", "Eigen", "LabelEncoder", "SortByResponse", "EnumLimited"),
                                    distribution = c("AUTO", "bernoulli", "multinomial", "gaussian", "poisson", "gamma", "tweedie", "laplace", "quantile", "huber"),
                                    check_constant_response = TRUE,
+                                   custom_metric_func = NULL,
                                    uplift_metric = c("AUTO", "KL", "Euclidean", "ChiSquared"),
                                    auuc_type = c("AUTO", "qini", "lift", "gain"),
                                    auuc_nbins = -1,
@@ -155,6 +157,8 @@ h2o.upliftRandomForest <- function(x,
     parms$categorical_encoding <- categorical_encoding
   if (!missing(check_constant_response))
     parms$check_constant_response <- check_constant_response
+  if (!missing(custom_metric_func))
+    parms$custom_metric_func <- custom_metric_func
   if (!missing(uplift_metric))
     parms$uplift_metric <- uplift_metric
   if (!missing(auuc_type))
@@ -196,6 +200,7 @@ h2o.upliftRandomForest <- function(x,
                                                    categorical_encoding = c("AUTO", "Enum", "OneHotInternal", "OneHotExplicit", "Binary", "Eigen", "LabelEncoder", "SortByResponse", "EnumLimited"),
                                                    distribution = c("AUTO", "bernoulli", "multinomial", "gaussian", "poisson", "gamma", "tweedie", "laplace", "quantile", "huber"),
                                                    check_constant_response = TRUE,
+                                                   custom_metric_func = NULL,
                                                    uplift_metric = c("AUTO", "KL", "Euclidean", "ChiSquared"),
                                                    auuc_type = c("AUTO", "qini", "lift", "gain"),
                                                    auuc_nbins = -1,
@@ -273,6 +278,8 @@ h2o.upliftRandomForest <- function(x,
     parms$categorical_encoding <- categorical_encoding
   if (!missing(check_constant_response))
     parms$check_constant_response <- check_constant_response
+  if (!missing(custom_metric_func))
+    parms$custom_metric_func <- custom_metric_func
   if (!missing(uplift_metric))
     parms$uplift_metric <- uplift_metric
   if (!missing(auuc_type))

From 214879d2e2906b048943816a0c81429140f39dd9 Mon Sep 17 00:00:00 2001
From: Veronika Maurerova <veronika.maurerova@h2o.ai>
Date: Mon, 26 Jun 2023 15:46:08 +0200
Subject: [PATCH 04/12] Fix python test

---
 h2o-py/tests/pyunit_utils/utils_model_metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/h2o-py/tests/pyunit_utils/utils_model_metrics.py b/h2o-py/tests/pyunit_utils/utils_model_metrics.py
index a76c8973928d..8dabbd430a25 100644
--- a/h2o-py/tests/pyunit_utils/utils_model_metrics.py
+++ b/h2o-py/tests/pyunit_utils/utils_model_metrics.py
@@ -62,7 +62,7 @@ def reduce(self, l, r):
         return [l[0] + r[0], l[1] + r[1]]
 
     def metric(self, l):
-        return l[0] / l[1]
+        return l[0] / l[1] if [1] != 0 else 0
 
 
 class CustomNullFunc:

From 57ac2d71679b8495fe6fe32daeae42e940df9177 Mon Sep 17 00:00:00 2001
From: Veronika Maurerova <veronika.maurerova@h2o.ai>
Date: Tue, 27 Jun 2023 15:12:25 +0200
Subject: [PATCH 05/12] fix custom att calculation, add example to doc

---
 .../algo-params/upload_custom_metric.rst      |  2 +-
 .../src/product/data-science/upliftdrf.rst    | 72 +++++++++++++++++++
 .../tests/pyunit_utils/utils_model_metrics.py |  2 +-
 3 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/h2o-docs/src/product/data-science/algo-params/upload_custom_metric.rst b/h2o-docs/src/product/data-science/algo-params/upload_custom_metric.rst
index 6d3ada8e12b0..9aee8aa39de0 100644
--- a/h2o-docs/src/product/data-science/algo-params/upload_custom_metric.rst
+++ b/h2o-docs/src/product/data-science/algo-params/upload_custom_metric.rst
@@ -3,7 +3,7 @@
 ``upload_custom_metric``
 ------------------------
 
-- Available in: GBM, DRF, Deeplearning
+- Available in: GBM, DRF, Deeplearning, UpliftDRF
 - Hyperparameter: no
 
 Description
diff --git a/h2o-docs/src/product/data-science/upliftdrf.rst b/h2o-docs/src/product/data-science/upliftdrf.rst
index 437f8b063d66..ed63b2722b1a 100644
--- a/h2o-docs/src/product/data-science/upliftdrf.rst
+++ b/h2o-docs/src/product/data-science/upliftdrf.rst
@@ -126,6 +126,8 @@ Shared-tree algorithm parameters
 
 -  `col_sample_rate_per_tree <algo-params/col_sample_rate_per_tree.html>`__: Specify the column sample rate per tree.  This method samples without replacement. This can be a value from 0.0 to 1.0 and defaults to ``1``.
 
+-  `custom_metric_func <algo-params/custom_metric_func.html>`__: Specify a custom evaluation function.
+
 -  `histogram_type <algo-params/histogram_type.html>`__: By default (``AUTO``) Uplift DRF bins from min...max in steps of :math:`\frac{(max-min)}{N}`. ``Random`` split points or quantile-based split points can be selected as well. ``RoundRobin`` can be specified to cycle through all histogram types (one per tree). Use one of these options to specify the type of histogram to use for finding optimal split points:
 
     - ``AUTO`` (default)
@@ -157,6 +159,8 @@ Shared-tree algorithm parameters
 
 -  `sample_rate_per_class <algo-params/sample_rate_per_class.html>`__: When building models from imbalanced datasets, this option specifies that each tree in the ensemble should sample from the full training dataset using a per-class-specific sampling rate rather than a global sample factor (as with ``sample_rate``). This method samples without replacement. The range for this option is 0.0 to 1.0.
 
+-  `upload_custom_metric <algo-params/upload_custom_metric.html>`__: Upload a custom metric into a running H2O cluster.
+
 Common parameters
 '''''''''''''''''
 
@@ -257,6 +261,74 @@ For example, we analyze data to determine if some medical help to recover from d
 - ATE equal or similar to zero means the medicine does not affect recovery in general
 - similar interpretation applies to ATT and ATC, the positive ATT is usually what scientists look for, but ATC is also an interesting metric (in an ideal case, positive both ATT and ATC say the treatment has an exact effect).
 
+Custom metric example for Uplift DRF
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. tabs::
+   .. code-tab:: python
+   
+    import h2o
+    from h2o.estimators import H2OUpliftRandomForestEstimator
+    h2o.init()
+
+    # Import the cars dataset into H2O:
+    data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/uplift/criteo_uplift_13k.csv")
+
+    # Set the predictors, response, and treatment column:
+    predictors = ["f1", "f2", "f3", "f4", "f5", "f6","f7", "f8"]
+    # set the response as a factor
+    response = "conversion"
+    data[response] = data[response].asfactor()
+    # set the treatment as a factor
+    treatment_column = "treatment"
+    data[treatment_column] = data[treatment_column].asfactor()
+
+    # Split the dataset into a train and valid set:
+    train, valid = data.split_frame(ratios=[.8], seed=1234)
+
+    # Define custom metric function
+    # ``pred`` is prediction array of length 3, where:
+    #   - pred[0]  = ``uplift_predict``: result uplift prediction score, which is calculated as ``p_y1_ct1 - p_y1_ct0``
+    #   - pred[1] = ``p_y1_ct1``: probability the response is 1 if the row is from the treatment group
+    #   - pred[2] = ``p_y1_ct0``: probability the response is 1 if the row is from the control group
+    # ``act`` is array with original data where
+    #   - act[0] = target variable
+    #   - act[1] = if the record belongs to the treatment or control group
+    # ``w`` (weight) and ``o`` (offset) are nor supported in Uplift DRF yet
+    
+    class CustomAteFunc:
+        def map(self, pred, act, w, o, model):
+            return [pred[0], 1]
+
+        def reduce(self, l, r):
+            return [l[0] + r[0], l[1] + r[1]]
+
+        def metric(self, l):
+            return l[0] / l[1]
+
+    custom_metric = h2o.upload_custom_metric(CustomAteFunc, func_name="ate", func_file="mm_ate.py")
+
+    # Build and train the model:
+    uplift_model = H2OUpliftRandomForestEstimator(ntrees=10,
+                                                  max_depth=5,
+                                                  treatment_column=treatment_column,
+                                                  uplift_metric="KL",
+                                                  min_rows=10,
+                                                  seed=1234,
+                                                  auuc_type="qini"
+                                                  custom_metric_func=custom_metric)
+    uplift_model.train(x=predictors, 
+                       y=response, 
+                       training_frame=train, 
+                       validation_frame=valid)
+
+    # Eval performance:
+    perf = uplift_model.model_performance()
+    custom_att = perf._metric_json["training_custom"]
+    print(custom_att)
+    att = perf.att(train=True)
+    print(att)
+
 
 Uplift Curve and Area Under Uplift Curve (AUUC) calculation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/h2o-py/tests/pyunit_utils/utils_model_metrics.py b/h2o-py/tests/pyunit_utils/utils_model_metrics.py
index 8dabbd430a25..fe81f82ed398 100644
--- a/h2o-py/tests/pyunit_utils/utils_model_metrics.py
+++ b/h2o-py/tests/pyunit_utils/utils_model_metrics.py
@@ -55,7 +55,7 @@ def metric(self, l):
 
 class CustomAttFunc:
     def map(self, pred, act, w, o, model):
-        treatment = pred[1]
+        treatment = act[1]
         return [pred[0], 1] if treatment == 1 else [0, 0]
 
     def reduce(self, l, r):

From a32fc84f0b401fcb4660cf604c04ef76e3e63f70 Mon Sep 17 00:00:00 2001
From: Veronika Maurerova <veronika.maurerova@h2o.ai>
Date: Thu, 20 Jul 2023 15:00:45 +0200
Subject: [PATCH 06/12] GH-6783 fix custom function definition

---
 .../tests/pyunit_utils/utils_model_metrics.py | 25 +++++++++++++------
 .../uplift/pyunit_custom_metric_uplift.py     |  4 +--
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/h2o-py/tests/pyunit_utils/utils_model_metrics.py b/h2o-py/tests/pyunit_utils/utils_model_metrics.py
index fe81f82ed398..614858562305 100644
--- a/h2o-py/tests/pyunit_utils/utils_model_metrics.py
+++ b/h2o-py/tests/pyunit_utils/utils_model_metrics.py
@@ -44,6 +44,8 @@ def metric(self, l):
 
 class CustomAteFunc:
     def map(self, pred, act, w, o, model):
+        if w == 0:
+            return [0,0]
         return [pred[0], 1]
 
     def reduce(self, l, r):
@@ -55,6 +57,8 @@ def metric(self, l):
 
 class CustomAttFunc:
     def map(self, pred, act, w, o, model):
+        if w == 0:
+            return [0,0]
         treatment = act[1]
         return [pred[0], 1] if treatment == 1 else [0, 0]
 
@@ -62,7 +66,7 @@ def reduce(self, l, r):
         return [l[0] + r[0], l[1] + r[1]]
 
     def metric(self, l):
-        return l[0] / l[1] if [1] != 0 else 0
+        return l[0] / l[1] if l[1] != 0 else 0
 
 
 class CustomNullFunc:
@@ -98,12 +102,15 @@ def metric(self, l):
         return 1
 '''
 
-def assert_metrics_equal(metric, metric_name1, metric_name2, msg=None):
+
+def assert_metrics_equal(metric, metric_name1, metric_name2, msg=None, delta=1e-5):
     metric_name1 = metric_name1 if metric_name1 in metric._metric_json else metric_name1.upper()
     metric_name2 = metric_name2 if metric_name2 in metric._metric_json else metric_name2.upper()
-    metric_value1 = metric._metric_json[metric_name1]
-    metric_value2 = metric._metric_json[metric_name2]
-    assert metric_value1 == metric_value2, "{} {}={} {}={}".format(msg, metric_name1, metric_value1, metric_name2, metric_value2)
+    m1 = metric._metric_json[metric_name1]
+    m2 = metric._metric_json[metric_name2]
+    m1 = float(m1) if m1 != "NaN" else 0
+    m2 = float(m2) if m2 != "NaN" else 0
+    assert (m1-m2) < delta, "{}: {} != {}".format(msg, m1, m2)
 
 
 def assert_all_metrics_equal(model, f_test, metric_name, value):
@@ -125,8 +132,12 @@ def assert_scoring_history(model, metric_name1, metric_name2, msg=None):
     scoring_history = model.scoring_history()
     sh1 = scoring_history[metric_name1]
     sh2 = scoring_history[metric_name2]
-    assert (sh1.isnull() == sh2.isnull()).all(), msg
-    assert (sh1.dropna() == sh2.dropna()).all(), msg
+    isnull1 = sh1.isnull()
+    isnull2 = sh2.isnull()
+    assert (isnull1 == isnull2).all(), "{} isnull1: {} isnull2: {}".format(msg, isnull1, isnull2)
+    drop1 = sh1.dropna()
+    drop2 = sh2.dropna()
+    assert (drop1 == drop2).all(), "{} drop1: {} drop2: {}".format(msg, drop1, drop2)
 
 
 def assert_correct_custom_metric(model, f_test, metric_name, msg=None):
diff --git a/h2o-py/tests/testdir_algos/uplift/pyunit_custom_metric_uplift.py b/h2o-py/tests/testdir_algos/uplift/pyunit_custom_metric_uplift.py
index 99e188c9018b..1ee6417e9de7 100644
--- a/h2o-py/tests/testdir_algos/uplift/pyunit_custom_metric_uplift.py
+++ b/h2o-py/tests/testdir_algos/uplift/pyunit_custom_metric_uplift.py
@@ -9,11 +9,11 @@
 
 # Custom model metrics fixture
 def custom_ate_mm():
-    return h2o.upload_custom_metric(CustomAteFunc, func_name="ate", func_file="mm_ate.py")
+    return h2o.upload_custom_metric(CustomAteFunc, func_name="Custom ATE", func_file="mm_ate.py")
 
 
 def custom_att_mm():
-    return h2o.upload_custom_metric(CustomAttFunc, func_name="att", func_file="mm_att.py")
+    return h2o.upload_custom_metric(CustomAttFunc, func_name="Custom ATT", func_file="mm_att.py")
 
 
 # Test that the custom model metric is computed

From f72b93adb23e3d09fb8bbb944ceefd653a768500 Mon Sep 17 00:00:00 2001
From: Veronika Maurerova <veronika.maurerova@h2o.ai>
Date: Mon, 24 Jul 2023 15:06:24 +0200
Subject: [PATCH 07/12] Add atc custom, test new metric in R

---
 .../tests/pyunit_utils/utils_model_metrics.py  | 14 ++++++++++++++
 .../uplift/pyunit_custom_metric_uplift.py      | 16 ++++++++++++++--
 .../testdir_algos/uplift/runit_uplift_smoke.R  | 18 ++++++++++++++++++
 3 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/h2o-py/tests/pyunit_utils/utils_model_metrics.py b/h2o-py/tests/pyunit_utils/utils_model_metrics.py
index 614858562305..18f2ff65d155 100644
--- a/h2o-py/tests/pyunit_utils/utils_model_metrics.py
+++ b/h2o-py/tests/pyunit_utils/utils_model_metrics.py
@@ -69,6 +69,20 @@ def metric(self, l):
         return l[0] / l[1] if l[1] != 0 else 0
 
 
+class CustomAtcFunc:
+    def map(self, pred, act, w, o, model):
+        if w == 0:
+            return [0,0]
+        treatment = act[1]
+        return [pred[0], 1] if treatment == 0 else [0, 0]
+
+    def reduce(self, l, r):
+        return [l[0] + r[0], l[1] + r[1]]
+
+    def metric(self, l):
+        return l[0] / l[1] if l[1] != 0 else 0
+
+
 class CustomNullFunc:
     def map(self, pred, act, w, o, model):
         return []
diff --git a/h2o-py/tests/testdir_algos/uplift/pyunit_custom_metric_uplift.py b/h2o-py/tests/testdir_algos/uplift/pyunit_custom_metric_uplift.py
index 1ee6417e9de7..5587f7f5f235 100644
--- a/h2o-py/tests/testdir_algos/uplift/pyunit_custom_metric_uplift.py
+++ b/h2o-py/tests/testdir_algos/uplift/pyunit_custom_metric_uplift.py
@@ -3,7 +3,8 @@
 sys.path.insert(1, "../../../")
 import h2o
 from tests import pyunit_utils
-from tests.pyunit_utils import CustomAteFunc, CustomAttFunc, uplift_binomial_model, assert_correct_custom_metric
+from tests.pyunit_utils import CustomAteFunc, CustomAttFunc, CustomAtcFunc, \
+    uplift_binomial_model, assert_correct_custom_metric
 from h2o.estimators.uplift_random_forest import H2OUpliftRandomForestEstimator
 
 
@@ -16,6 +17,10 @@ def custom_att_mm():
     return h2o.upload_custom_metric(CustomAttFunc, func_name="Custom ATT", func_file="mm_att.py")
 
 
+def custom_atc_mm():
+    return h2o.upload_custom_metric(CustomAttFunc, func_name="Custom ATC", func_file="mm_atc.py")
+
+
 # Test that the custom model metric is computed
 # and compare them with implicit custom metric
 def test_custom_metric_computation_binomial_ate():
@@ -30,10 +35,17 @@ def test_custom_metric_computation_binomial_att():
     assert_correct_custom_metric(model, f_test, "att", "Binomial ATT on prostate")
 
 
+def test_custom_metric_computation_binomial_atc():
+    (model, f_test) = uplift_binomial_model(H2OUpliftRandomForestEstimator, custom_atc_mm())
+    print(model)
+    assert_correct_custom_metric(model, f_test, "atc", "Binomial ATC on prostate")
+
+
 # Tests to invoke in this suite
 __TESTS__ = [
     test_custom_metric_computation_binomial_ate,
-    test_custom_metric_computation_binomial_att
+    test_custom_metric_computation_binomial_att,
+    test_custom_metric_computation_binomial_atc
 ]
 
 if __name__ == "__main__":
diff --git a/h2o-r/tests/testdir_algos/uplift/runit_uplift_smoke.R b/h2o-r/tests/testdir_algos/uplift/runit_uplift_smoke.R
index 4c6af677e29b..1bd30485a5ec 100644
--- a/h2o-r/tests/testdir_algos/uplift/runit_uplift_smoke.R
+++ b/h2o-r/tests/testdir_algos/uplift/runit_uplift_smoke.R
@@ -119,6 +119,24 @@ test.uplift <- function() {
     expect_equal(auuc_norm, expected_values_auuc_norm_qini[i], tolerance=tol)
     expect_equal(auuc_gain_norm, expected_values_auuc_norm_gain[i], tolerance=tol)
     expect_equal(auuc_lift_norm, expected_values_auuc_norm_lift[i], tolerance=tol)
+      
+    model_ate <- h2o.ate(model, train=TRUE, valid=TRUE)
+    print(model_ate)
+    perf_ate <- h2o.ate(perf)
+    print(perf_ate)
+    expect_equal(model_ate[["train"]], perf_ate, tolerance=tol)
+      
+    model_att <- h2o.att(model, train=TRUE, valid=TRUE)
+    print(model_att)  
+    perf_att <- h2o.att(perf)
+    print(perf_att)
+    expect_equal(model_att[["train"]], perf_att, tolerance=tol)
+
+    model_atc <- h2o.atc(model, train=TRUE, valid=TRUE)
+    print(model_atc)  
+    perf_atc <- h2o.atc(perf)
+    print(perf_atc)
+    expect_equal(model_atc[["train"]], perf_atc, tolerance=tol)
 
     plot(perf)
     plot(perf, normalize=TRUE)  

From 8a879f4a1fea39ef47993a6faa7ae615503c57c7 Mon Sep 17 00:00:00 2001
From: Veronika Maurerova <veronika.maurerova@h2o.ai>
Date: Thu, 27 Jul 2023 15:17:20 +0200
Subject: [PATCH 08/12] fix test

---
 h2o-py/tests/pyunit_utils/utils_model_metrics.py         | 9 ++-------
 .../testdir_algos/uplift/pyunit_custom_metric_uplift.py  | 2 +-
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/h2o-py/tests/pyunit_utils/utils_model_metrics.py b/h2o-py/tests/pyunit_utils/utils_model_metrics.py
index 18f2ff65d155..ef22da4f1bd5 100644
--- a/h2o-py/tests/pyunit_utils/utils_model_metrics.py
+++ b/h2o-py/tests/pyunit_utils/utils_model_metrics.py
@@ -44,9 +44,7 @@ def metric(self, l):
 
 class CustomAteFunc:
     def map(self, pred, act, w, o, model):
-        if w == 0:
-            return [0,0]
-        return [pred[0], 1]
+        return [pred[0], 1] 
 
     def reduce(self, l, r):
         return [l[0] + r[0], l[1] + r[1]]
@@ -57,8 +55,6 @@ def metric(self, l):
 
 class CustomAttFunc:
     def map(self, pred, act, w, o, model):
-        if w == 0:
-            return [0,0]
         treatment = act[1]
         return [pred[0], 1] if treatment == 1 else [0, 0]
 
@@ -71,8 +67,6 @@ def metric(self, l):
 
 class CustomAtcFunc:
     def map(self, pred, act, w, o, model):
-        if w == 0:
-            return [0,0]
         treatment = act[1]
         return [pred[0], 1] if treatment == 0 else [0, 0]
 
@@ -124,6 +118,7 @@ def assert_metrics_equal(metric, metric_name1, metric_name2, msg=None, delta=1e-
     m2 = metric._metric_json[metric_name2]
     m1 = float(m1) if m1 != "NaN" else 0
     m2 = float(m2) if m2 != "NaN" else 0
+    print("{} == {}".format(m1, m2))
     assert (m1-m2) < delta, "{}: {} != {}".format(msg, m1, m2)
 
 
diff --git a/h2o-py/tests/testdir_algos/uplift/pyunit_custom_metric_uplift.py b/h2o-py/tests/testdir_algos/uplift/pyunit_custom_metric_uplift.py
index 5587f7f5f235..fe15aebf3e59 100644
--- a/h2o-py/tests/testdir_algos/uplift/pyunit_custom_metric_uplift.py
+++ b/h2o-py/tests/testdir_algos/uplift/pyunit_custom_metric_uplift.py
@@ -18,7 +18,7 @@ def custom_att_mm():
 
 
 def custom_atc_mm():
-    return h2o.upload_custom_metric(CustomAttFunc, func_name="Custom ATC", func_file="mm_atc.py")
+    return h2o.upload_custom_metric(CustomAtcFunc, func_name="Custom ATC", func_file="mm_atc.py")
 
 
 # Test that the custom model metric is computed

From 0d1f73078eb6303adf9b0db65bec322689b1c13f Mon Sep 17 00:00:00 2001
From: Veronika Maurerova <veronika.maurerova@h2o.ai>
Date: Mon, 7 Aug 2023 17:43:57 +0200
Subject: [PATCH 09/12] fix att and atc  metric bug

---
 .../src/main/java/hex/tree/SharedTree.java    |  1 +
 h2o-core/src/main/java/hex/Model.java         |  2 +-
 .../java/hex/ModelMetricsBinomialUplift.java  | 12 +++++------
 .../tests/pyunit_utils/utils_model_metrics.py | 20 +++++++++----------
 4 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/h2o-algos/src/main/java/hex/tree/SharedTree.java b/h2o-algos/src/main/java/hex/tree/SharedTree.java
index 1e22e53dad3b..4cf64ca9ad64 100755
--- a/h2o-algos/src/main/java/hex/tree/SharedTree.java
+++ b/h2o-algos/src/main/java/hex/tree/SharedTree.java
@@ -852,6 +852,7 @@ protected final boolean doScoringAndSaveModel(boolean finalScoring, boolean oob,
         ModelMetrics mmv = scv.scoreAndMakeModelMetrics(_model, _parms.valid(), v, build_tree_one_node);
         _lastScoredTree = _model._output._ntrees;
         out._validation_metrics = mmv;
+        out._validation_metrics._description = "Validation metrics";
         if (_model._output._ntrees>0 || scoreZeroTrees()) //don't score the 0-tree model - the error is too large
           out._scored_valid[out._ntrees].fillFrom(mmv);
       }
diff --git a/h2o-core/src/main/java/hex/Model.java b/h2o-core/src/main/java/hex/Model.java
index 9875fb20f980..0afdbbb8387e 100755
--- a/h2o-core/src/main/java/hex/Model.java
+++ b/h2o-core/src/main/java/hex/Model.java
@@ -2257,7 +2257,7 @@ protected void setupLocal() {
               for (int i = 0; i < actual.length; ++i)
                 actual[i] = (float) data(chks, row, i);
             }
-            if(treatmentChunk != null){
+            if (treatmentChunk != null) {
               actual[1] = (float) treatmentChunk.atd(row);
             }
             _mb.perRow(preds, actual, weight, offset, Model.this);
diff --git a/h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java b/h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java
index 30a57f808bf3..5031e53bff0a 100644
--- a/h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java
+++ b/h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java
@@ -142,13 +142,13 @@ public UpliftBinomialMetrics(String[] domain, double[] thresholds) {
             _mb = new MetricBuilderBinomialUplift(domain, thresholds);
             Chunk uplift = chks[0];
             Chunk actuals = chks[1];
-            Chunk treatment =chks[2];
+            Chunk treatment = chks[2];
             double[] ds = new double[1];
             float[] acts = new float[2];
             for (int i=0; i<chks[0]._len;++i) {
                 ds[0] = uplift.atd(i);
                 acts[0] = (float) actuals.atd(i);
-                acts[1] = (float )treatment.atd(i);
+                acts[1] = (float) treatment.atd(i);
                 _mb.perRow(ds, acts, 1, 0, null);
             }
         }
@@ -188,11 +188,11 @@ public double[] perRow(double[] ds, float[] yact, double weight, double offset,
             _wYY += weight * y * y;
             _count++;
             _wcount += weight;
-            float treatmentGroup = yact[1]; // treatment = 1, control = 0
-            double treatmentEffect = ds[0];
+            int treatmentGroup = (int)yact[1]; // treatment = 1, control = 0
+            double treatmentEffect = ds[0] *  weight;
             _sumTE += treatmentEffect; // result prediction
             _sumTETreatment += treatmentGroup * treatmentEffect; 
-            _treatmentCount += treatmentGroup;
+            _treatmentCount += treatmentGroup *  weight;
             if (_auuc != null) {
                 _auuc.perRow(treatmentEffect, weight, y, treatmentGroup);
             }
@@ -206,7 +206,7 @@ public double[] perRow(double[] ds, float[] yact, double weight, double offset,
             }
             _sumTE += mb._sumTE;
             _sumTETreatment += mb._sumTETreatment;
-            _treatmentCount += _treatmentCount;
+            _treatmentCount += mb._treatmentCount;
         }
 
         /**
diff --git a/h2o-py/tests/pyunit_utils/utils_model_metrics.py b/h2o-py/tests/pyunit_utils/utils_model_metrics.py
index ef22da4f1bd5..37498dc92cce 100644
--- a/h2o-py/tests/pyunit_utils/utils_model_metrics.py
+++ b/h2o-py/tests/pyunit_utils/utils_model_metrics.py
@@ -55,8 +55,8 @@ def metric(self, l):
 
 class CustomAttFunc:
     def map(self, pred, act, w, o, model):
-        treatment = act[1]
-        return [pred[0], 1] if treatment == 1 else [0, 0]
+        treatment = act[1] * w
+        return [pred[0] * treatment, treatment]
 
     def reduce(self, l, r):
         return [l[0] + r[0], l[1] + r[1]]
@@ -67,8 +67,8 @@ def metric(self, l):
 
 class CustomAtcFunc:
     def map(self, pred, act, w, o, model):
-        treatment = act[1]
-        return [pred[0], 1] if treatment == 0 else [0, 0]
+        control = 1 * w if act[1] == 0 else 0
+        return [pred[0] * control, control]
 
     def reduce(self, l, r):
         return [l[0] + r[0], l[1] + r[1]]
@@ -119,7 +119,7 @@ def assert_metrics_equal(metric, metric_name1, metric_name2, msg=None, delta=1e-
     m1 = float(m1) if m1 != "NaN" else 0
     m2 = float(m2) if m2 != "NaN" else 0
     print("{} == {}".format(m1, m2))
-    assert (m1-m2) < delta, "{}: {} != {}".format(msg, m1, m2)
+    assert abs(m1-m2) <= delta, "{}: {} != {}".format(msg, m1, m2)
 
 
 def assert_all_metrics_equal(model, f_test, metric_name, value):
@@ -137,16 +137,16 @@ def assert_all_metrics_equal(model, f_test, metric_name, value):
         "{} metric on validation data should be {}".format(metric_name, value)
 
 
-def assert_scoring_history(model, metric_name1, metric_name2, msg=None):
+def assert_scoring_history(model, metric_name1, metric_name2, delta=1e-5, msg=None):
     scoring_history = model.scoring_history()
     sh1 = scoring_history[metric_name1]
     sh2 = scoring_history[metric_name2]
     isnull1 = sh1.isnull()
     isnull2 = sh2.isnull()
-    assert (isnull1 == isnull2).all(), "{} isnull1: {} isnull2: {}".format(msg, isnull1, isnull2)
-    drop1 = sh1.dropna()
-    drop2 = sh2.dropna()
-    assert (drop1 == drop2).all(), "{} drop1: {} drop2: {}".format(msg, drop1, drop2)
+    assert (isnull1 == isnull2).all(), "{} scoring 1: {} scoring 2: {}".format(msg, isnull1, isnull2)
+    drop1 = sh1.dropna().round(10)
+    drop2 = sh2.dropna().round(10)
+    assert (drop1 == drop2).all(skipna=True), "{} scoring 1: {} scoring 2: {}".format(msg, drop1, drop2)
 
 
 def assert_correct_custom_metric(model, f_test, metric_name, msg=None):

From 7787ab514b026b8ac5a38f768a8085a4d8da4e55 Mon Sep 17 00:00:00 2001
From: Veronika Maurerova <veronika.maurerova@h2o.ai>
Date: Wed, 9 Aug 2023 13:54:13 +0200
Subject: [PATCH 10/12] fix make metrics runit

---
 h2o-r/h2o-package/R/models.R                                 | 5 +++--
 .../tests/testdir_misc/runit_make_metrics_uplift_binomial.R  | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/h2o-r/h2o-package/R/models.R b/h2o-r/h2o-package/R/models.R
index 9a224a6fe9f6..efa202bece44 100755
--- a/h2o-r/h2o-package/R/models.R
+++ b/h2o-r/h2o-package/R/models.R
@@ -1137,7 +1137,7 @@ h2o.make_metrics <- function(predicted, actuals, domain=NULL, distribution=NULL,
   predicted <- .validate.H2OFrame(predicted, required=TRUE)
   actuals <- .validate.H2OFrame(actuals, required=TRUE)
   weights <- .validate.H2OFrame(weights, required=FALSE)
-  treatment <- .validate.H2OFrame(treatment, required=FALSE)
+  treatment <- .validate.H2OFrame(treatment, required=TRUE)
   if (!is.character(auc_type)) stop("auc_type argument must be of type character")
   if (!(auc_type %in% c("MACRO_OVO", "MACRO_OVR", "WEIGHTED_OVO", "WEIGHTED_OVR", "NONE", "AUTO"))) {
     stop("auc_type argument must be MACRO_OVO, MACRO_OVR, WEIGHTED_OVO, WEIGHTED_OVR, NONE, AUTO")
@@ -1149,6 +1149,7 @@ h2o.make_metrics <- function(predicted, actuals, domain=NULL, distribution=NULL,
     params$weights_frame <- h2o.getId(weights)
   }
   if (!is.null(treatment)) {
+      params$treatment_frame <- h2o.getId(treatment)
       if (!(auuc_type %in% c("qini", "lift", "gain", "AUTO"))) {
         stop("auuc_type argument must be gini, lift, gain or AUTO")
       }
@@ -1174,7 +1175,7 @@ h2o.make_metrics <- function(predicted, actuals, domain=NULL, distribution=NULL,
     params[["domain"]] <- out
   }
   params["auc_type"] <- auc_type  
-  url <- paste0("ModelMetrics/predictions_frame/",params$predictions_frame,"/actuals_frame/",params$actuals_frame)
+  url <- paste0("ModelMetrics/predictions_frame/",params$predictions_frame,"/actuals_frame/",params$actuals_frame,"/treatment_frame/",params$treatment_frame)
   res <- .h2o.__remoteSend(method = "POST", url, .params = params)
   model_metrics <- res$model_metrics
   metrics <- model_metrics[!(names(model_metrics) %in% c("__meta", "names", "domains", "model_category"))]
diff --git a/h2o-r/tests/testdir_misc/runit_make_metrics_uplift_binomial.R b/h2o-r/tests/testdir_misc/runit_make_metrics_uplift_binomial.R
index 1857820b55f9..c56634b99833 100644
--- a/h2o-r/tests/testdir_misc/runit_make_metrics_uplift_binomial.R
+++ b/h2o-r/tests/testdir_misc/runit_make_metrics_uplift_binomial.R
@@ -25,8 +25,9 @@ test.make_metrics_uplift_binomial <- function() {
     pred <- h2o.assign(h2o.predict(model,train)[,1],"pred")
     actual <- h2o.assign(train[,response],"act")
     treat <- h2o.assign(train[,treatment],"treatment")
+    print(treat)
     
-    m0 <- h2o.make_metrics(pred, actual, treatment=treatment)
+    m0 <- h2o.make_metrics(pred, actual, treatment=treat)
     print(m0)
     m1 <- h2o.performance(model, train)
     print(m1)

From fa5b011181392ed8f872279baf2ee1024ec2b296 Mon Sep 17 00:00:00 2001
From: Veronika Maurerova <veronika.maurerova@h2o.ai>
Date: Fri, 11 Aug 2023 16:46:46 +0200
Subject: [PATCH 11/12] Fix make_metrics bug

---
 h2o-py/h2o/h2o.py            | 1 +
 h2o-r/h2o-package/R/models.R | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/h2o-py/h2o/h2o.py b/h2o-py/h2o/h2o.py
index 521cba050c24..94d4f664b322 100644
--- a/h2o-py/h2o/h2o.py
+++ b/h2o-py/h2o/h2o.py
@@ -2043,6 +2043,7 @@ def make_metrics(predicted, actual, domain=None, distribution=None, weights=None
     if weights is not None:
         params["weights_frame"] = weights.frame_id
     if treatment is not None:
+        assert treatment.ncol == 1, "`treatment` frame should have exactly 1 column"
         params["treatment_frame"] = treatment.frame_id
         allowed_auuc_types = ["qini", "lift", "gain", "AUTO"]
         assert auuc_type in allowed_auuc_types, "auuc_type should be "+(" ".join([str(type) for type in allowed_auuc_types]))
diff --git a/h2o-r/h2o-package/R/models.R b/h2o-r/h2o-package/R/models.R
index efa202bece44..4f836c14e3e2 100755
--- a/h2o-r/h2o-package/R/models.R
+++ b/h2o-r/h2o-package/R/models.R
@@ -1137,7 +1137,7 @@ h2o.make_metrics <- function(predicted, actuals, domain=NULL, distribution=NULL,
   predicted <- .validate.H2OFrame(predicted, required=TRUE)
   actuals <- .validate.H2OFrame(actuals, required=TRUE)
   weights <- .validate.H2OFrame(weights, required=FALSE)
-  treatment <- .validate.H2OFrame(treatment, required=TRUE)
+  treatment <- .validate.H2OFrame(treatment, required=FALSE)
   if (!is.character(auc_type)) stop("auc_type argument must be of type character")
   if (!(auc_type %in% c("MACRO_OVO", "MACRO_OVR", "WEIGHTED_OVO", "WEIGHTED_OVR", "NONE", "AUTO"))) {
     stop("auc_type argument must be MACRO_OVO, MACRO_OVR, WEIGHTED_OVO, WEIGHTED_OVR, NONE, AUTO")
@@ -1156,8 +1156,8 @@ h2o.make_metrics <- function(predicted, actuals, domain=NULL, distribution=NULL,
       if (auuc_nbins < -1 || auuc_nbins == 0) {
         stop("auuc_nbins must be -1 or higher than 0.")
       }
-      params$auuc_type = auuc_type
-      params$auuc_nbins = auuc_nbins
+      params$auuc_type <- auuc_type
+      params$auuc_nbins <- auuc_nbins
   }
   params$domain <- domain
   params$distribution <- distribution

From 6475d3233dce36d109b4b8c07c1710a76425c9ba Mon Sep 17 00:00:00 2001
From: Veronika Maurerova <veronika.maurerova@h2o.ai>
Date: Wed, 16 Aug 2023 18:28:20 +0200
Subject: [PATCH 12/12] implement review sugestions, fix R print model

---
 h2o-docs/src/product/data-science/upliftdrf.rst       | 4 ++--
 h2o-r/h2o-package/R/classes.R                         | 3 +++
 h2o-r/tests/testdir_algos/uplift/runit_uplift_smoke.R | 2 ++
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/h2o-docs/src/product/data-science/upliftdrf.rst b/h2o-docs/src/product/data-science/upliftdrf.rst
index ed63b2722b1a..ec2e3a5f000e 100644
--- a/h2o-docs/src/product/data-science/upliftdrf.rst
+++ b/h2o-docs/src/product/data-science/upliftdrf.rst
@@ -255,10 +255,10 @@ Overall treatment effect metrics show how the uplift predictions look across the
 
 The interpretation depends on concrete data meanings. We currently support only Bernoulli data distribution, so whether the treatment impacts the target value y=1 or not. 
 
-For example, we analyze data to determine if some medical help to recover from disease or not. We have patients in the treatment group and the control group. The target variable is if the medicine (treatment) helped recovery (y=1) or not (y=0). In this case:
+For example, we want to analyze data to determine if some medical will help to recover from a disease or not. We have patients in the treatment group and the control group. The target variable is if the medicine (treatment) helped recovery (y=1) or not (y=0). In this case:
 - positive ATE means the medicine helps with recovery in general
 - negative ATE means the medicine does not help with recovery in general
-- ATE equal or similar to zero means the medicine does not affect recovery in general
+- ATE equal to or close to zero means the medicine does not affect recovery in general
 - similar interpretation applies to ATT and ATC, the positive ATT is usually what scientists look for, but ATC is also an interesting metric (in an ideal case, positive both ATT and ATC say the treatment has an exact effect).
 
 Custom metric example for Uplift DRF
diff --git a/h2o-r/h2o-package/R/classes.R b/h2o-r/h2o-package/R/classes.R
index 78348c0ea5f8..f52ea4c5e0f8 100755
--- a/h2o-r/h2o-package/R/classes.R
+++ b/h2o-r/h2o-package/R/classes.R
@@ -672,6 +672,9 @@ setClass("H2OBinomialUpliftMetrics",    contains="H2OModelMetrics")
 #' @export
 setMethod("show", "H2OBinomialUpliftMetrics", function(object) {
     callNextMethod(object)  # call to the super
+    cat("ATE: ", object@metrics$ate, "\n", sep="" )
+    cat("ATT: ", object@metrics$atc, "\n", sep="" )
+    cat("ATC: ", object@metrics$att, "\n", sep="" )
     cat("Default AUUC:  ", object@metrics$AUUC, "\n", sep="")
     cat("All types of AUUC:  ", "\n", sep="")
     print(object@metrics$auuc_table)
diff --git a/h2o-r/tests/testdir_algos/uplift/runit_uplift_smoke.R b/h2o-r/tests/testdir_algos/uplift/runit_uplift_smoke.R
index 1bd30485a5ec..97519196601a 100644
--- a/h2o-r/tests/testdir_algos/uplift/runit_uplift_smoke.R
+++ b/h2o-r/tests/testdir_algos/uplift/runit_uplift_smoke.R
@@ -55,6 +55,8 @@ test.uplift <- function() {
         min_rows = 10,
         nbins = 100,
         seed = seed) 
+      
+    print(model)  
         
     # test model metrics
     print("Test model metrics")