From e0c17ac7f0258de63e4596cea3ac1a7ff10eeb8a Mon Sep 17 00:00:00 2001
From: Alex Shelkovnykov <ashelkov@ashelkov-ld1.linkedin.biz>
Date: Wed, 12 Feb 2020 13:22:31 -0800
Subject: [PATCH 1/2] Remove experimental SmoothedHingeLoss training task

- Experimental results when comparing Smoothed Hinge Loss to Logistic Regression were substantially worse and also reduced training time significantly due to slower convergence
- Removed GLMLossFunction and derived classes, since all training tasks in Photon ML are GLM tasks
---
 .../RandomEffectCoordinateIntegTest.scala     |   7 +-
 .../estimators/GameEstimatorIntegTest.scala   |   1 -
 .../EvaluatorFactoryIntegTest.scala           |   1 -
 ...istributedObjectiveFunctionIntegTest.scala | 146 ++++++++++---
 .../DistributedGLMLossFunctionIntegTest.scala | 129 ------------
 ...edSmoothedHingeLossFunctionIntegTest.scala | 125 -----------
 .../NormalizationContextIntegTest.scala       |  11 +-
 ...tributedOptimizationProblemIntegTest.scala | 110 +---------
 ...ngleNodeOptimizationProblemIntegTest.scala |  13 +-
 .../ml/supervised/BaseGLMIntegTest.scala      |  11 +-
 .../linkedin/photon/ml/ModelTraining.scala    |  21 +-
 .../photon/ml/SparkSessionConfiguration.scala |   3 +-
 .../photon/ml/estimators/GameEstimator.scala  |  11 +-
 .../ml/evaluation/EvaluatorFactory.scala      |   2 -
 .../SmoothedHingeLossEvaluator.scala          |  40 ----
 .../DistributedObjectiveFunction.scala        | 168 ++++++++++++++-
 .../ml/function/ObjectiveFunctionHelper.scala |  37 +++-
 .../SingleNodeObjectiveFunction.scala         | 168 ++++++++++++++-
 .../glm/DistributedGLMLossFunction.scala      | 195 ------------------
 .../ml/function/glm/GLMLossFunction.scala     |  62 ------
 .../ml/function/glm/PoissonLossFunction.scala |   1 +
 .../glm/SingleNodeGLMLossFunction.scala       | 187 -----------------
 ...DistributedSmoothedHingeLossFunction.scala | 132 ------------
 .../SingleNodeSmoothedHingeLossFunction.scala | 120 -----------
 .../svm/SmoothedHingeLossFunction.scala       | 116 -----------
 ...GeneralizedLinearOptimizationProblem.scala |  60 +-----
 .../ml/sampling/DownSamplerHelper.scala       |   2 +-
 .../SmoothedHingeLossLinearSVMModel.scala     | 154 --------------
 .../DistributedObjectiveFunctionTest.scala    |  34 ++-
 .../ObjectiveFunctionHelperTest.scala         |  45 ++--
 .../SingleNodeObjectiveFunctionTest.scala     | 141 ++++++++++---
 .../ml/function/glm/GLMLossFunctionTest.scala |  74 -------
 .../glm/SingleNodeGLMLossFunctionTest.scala   | 124 -----------
 ...gleNodeSmoothedHingeLossFunctionTest.scala | 117 -----------
 .../svm/SmoothedHingeLossFunctionTest.scala   |  75 -------
 .../DistributedOptimizationProblemTest.scala  | 121 +++++++++--
 ...ralizedLinearOptimizationProblemTest.scala | 101 +--------
 .../SingleNodeOptimizationProblemTest.scala   |  17 +-
 .../ml/sampling/DownSamplerHelperTest.scala   |   5 +-
 .../photon/ml/util/GameTestUtils.scala        |  15 +-
 .../ml/data/DataValidatorsIntegTest.scala     |  28 ---
 .../scala/com/linkedin/photon/ml/Driver.scala |   5 +-
 .../photon/ml/data/DataValidators.scala       |   2 -
 .../com/linkedin/photon/ml/util/Utils.scala   |   1 -
 .../linkedin/photon/ml/util/UtilsTest.scala   |   5 -
 .../ml/optimization/IntegTestObjective.scala  |   5 +-
 .../com/linkedin/photon/ml/TaskType.scala     |   2 +-
 .../photon/ml/evaluation/EvaluatorType.scala  |   3 +-
 .../ml/function/ObjectiveFunction.scala       |   3 +-
 .../ml/evaluation/EvaluatorTypeTest.scala     |   5 -
 .../ml/function/L2RegularizationTest.scala    |   5 +-
 .../ml/optimization/TestObjective.scala       |  10 +-
 52 files changed, 820 insertions(+), 2156 deletions(-)
 delete mode 100644 photon-api/src/integTest/scala/com/linkedin/photon/ml/function/glm/DistributedGLMLossFunctionIntegTest.scala
 delete mode 100644 photon-api/src/integTest/scala/com/linkedin/photon/ml/function/svm/DistributedSmoothedHingeLossFunctionIntegTest.scala
 delete mode 100644 photon-api/src/main/scala/com/linkedin/photon/ml/evaluation/SmoothedHingeLossEvaluator.scala
 delete mode 100644 photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/DistributedGLMLossFunction.scala
 delete mode 100644 photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/GLMLossFunction.scala
 delete mode 100644 photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunction.scala
 delete mode 100644 photon-api/src/main/scala/com/linkedin/photon/ml/function/svm/DistributedSmoothedHingeLossFunction.scala
 delete mode 100644 photon-api/src/main/scala/com/linkedin/photon/ml/function/svm/SingleNodeSmoothedHingeLossFunction.scala
 delete mode 100644 photon-api/src/main/scala/com/linkedin/photon/ml/function/svm/SmoothedHingeLossFunction.scala
 delete mode 100644 photon-api/src/main/scala/com/linkedin/photon/ml/supervised/classification/SmoothedHingeLossLinearSVMModel.scala
 delete mode 100644 photon-api/src/test/scala/com/linkedin/photon/ml/function/glm/GLMLossFunctionTest.scala
 delete mode 100644 photon-api/src/test/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunctionTest.scala
 delete mode 100644 photon-api/src/test/scala/com/linkedin/photon/ml/function/svm/SingleNodeSmoothedHingeLossFunctionTest.scala
 delete mode 100644 photon-api/src/test/scala/com/linkedin/photon/ml/function/svm/SmoothedHingeLossFunctionTest.scala

diff --git a/photon-api/src/integTest/scala/com/linkedin/photon/ml/algorithm/RandomEffectCoordinateIntegTest.scala b/photon-api/src/integTest/scala/com/linkedin/photon/ml/algorithm/RandomEffectCoordinateIntegTest.scala
index fa14159a..a775a2a6 100644
--- a/photon-api/src/integTest/scala/com/linkedin/photon/ml/algorithm/RandomEffectCoordinateIntegTest.scala
+++ b/photon-api/src/integTest/scala/com/linkedin/photon/ml/algorithm/RandomEffectCoordinateIntegTest.scala
@@ -21,7 +21,7 @@ import org.testng.annotations.{DataProvider, Test}
 
 import com.linkedin.photon.ml.constants.MathConst
 import com.linkedin.photon.ml.data.RandomEffectDataset
-import com.linkedin.photon.ml.function.glm.SingleNodeGLMLossFunction
+import com.linkedin.photon.ml.function.SingleNodeObjectiveFunction
 import com.linkedin.photon.ml.model.{Coefficients, RandomEffectModel}
 import com.linkedin.photon.ml.optimization.game.RandomEffectOptimizationProblem
 import com.linkedin.photon.ml.projector.{LinearSubspaceProjector, LinearSubspaceProjectorTest}
@@ -36,7 +36,6 @@ class RandomEffectCoordinateIntegTest extends SparkTestUtils with GameTestUtils
 
   import RandomEffectCoordinateIntegTest._
 
-
   /**
    *
    */
@@ -64,7 +63,7 @@ class RandomEffectCoordinateIntegTest extends SparkTestUtils with GameTestUtils
       "someShard")
 
     val mockRandomEffectDataset = mock(classOf[RandomEffectDataset])
-    val mockRandomEffectOptimizationProblem = mock(classOf[RandomEffectOptimizationProblem[SingleNodeGLMLossFunction]])
+    val mockRandomEffectOptimizationProblem = mock(classOf[RandomEffectOptimizationProblem[SingleNodeObjectiveFunction]])
 
     doReturn(linearSubspaceProjectors).when(mockRandomEffectDataset).projectors
 
@@ -120,7 +119,7 @@ class RandomEffectCoordinateIntegTest extends SparkTestUtils with GameTestUtils
       "someShard")
 
     val mockRandomEffectDataset = mock(classOf[RandomEffectDataset])
-    val mockRandomEffectOptimizationProblem = mock(classOf[RandomEffectOptimizationProblem[SingleNodeGLMLossFunction]])
+    val mockRandomEffectOptimizationProblem = mock(classOf[RandomEffectOptimizationProblem[SingleNodeObjectiveFunction]])
 
     doReturn(linearSubspaceProjectors).when(mockRandomEffectDataset).projectors
 
diff --git a/photon-api/src/integTest/scala/com/linkedin/photon/ml/estimators/GameEstimatorIntegTest.scala b/photon-api/src/integTest/scala/com/linkedin/photon/ml/estimators/GameEstimatorIntegTest.scala
index a11e21c1..4a95fdba 100644
--- a/photon-api/src/integTest/scala/com/linkedin/photon/ml/estimators/GameEstimatorIntegTest.scala
+++ b/photon-api/src/integTest/scala/com/linkedin/photon/ml/estimators/GameEstimatorIntegTest.scala
@@ -301,7 +301,6 @@ class GameEstimatorIntegTest extends SparkTestUtils with TestTemplateWithTmpDir
     Array(
       Array(TaskType.LINEAR_REGRESSION, RMSE),
       Array(TaskType.LOGISTIC_REGRESSION, AUC),
-      Array(TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM, AUC),
       Array(TaskType.POISSON_REGRESSION, PoissonLoss))
 
   /**
diff --git a/photon-api/src/integTest/scala/com/linkedin/photon/ml/evaluation/EvaluatorFactoryIntegTest.scala b/photon-api/src/integTest/scala/com/linkedin/photon/ml/evaluation/EvaluatorFactoryIntegTest.scala
index ffea0965..a31e3f3c 100644
--- a/photon-api/src/integTest/scala/com/linkedin/photon/ml/evaluation/EvaluatorFactoryIntegTest.scala
+++ b/photon-api/src/integTest/scala/com/linkedin/photon/ml/evaluation/EvaluatorFactoryIntegTest.scala
@@ -38,7 +38,6 @@ class EvaluatorFactoryIntegTest extends SparkTestUtils {
       Array(RMSE),
       Array(PoissonLoss),
       Array(LogisticLoss),
-      Array(SmoothedHingeLoss),
       Array(SquaredLoss),
       Array(MultiPrecisionAtK(1, ID_TAG)),
       Array(MultiPrecisionAtK(5, ID_TAG)),
diff --git a/photon-api/src/integTest/scala/com/linkedin/photon/ml/function/DistributedObjectiveFunctionIntegTest.scala b/photon-api/src/integTest/scala/com/linkedin/photon/ml/function/DistributedObjectiveFunctionIntegTest.scala
index 78411191..9214795b 100644
--- a/photon-api/src/integTest/scala/com/linkedin/photon/ml/function/DistributedObjectiveFunctionIntegTest.scala
+++ b/photon-api/src/integTest/scala/com/linkedin/photon/ml/function/DistributedObjectiveFunctionIntegTest.scala
@@ -23,13 +23,12 @@ import org.testng.annotations.{DataProvider, Test}
 
 import com.linkedin.photon.ml.TaskType
 import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.function.glm.{DistributedGLMLossFunction, LogisticLossFunction, PoissonLossFunction, SquaredLossFunction}
-import com.linkedin.photon.ml.function.svm.DistributedSmoothedHingeLossFunction
+import com.linkedin.photon.ml.function.glm.{LogisticLossFunction, PoissonLossFunction, SquaredLossFunction}
 import com.linkedin.photon.ml.normalization.NoNormalization
-import com.linkedin.photon.ml.optimization.game.GLMOptimizationConfiguration
-import com.linkedin.photon.ml.optimization.{L2RegularizationContext, NoRegularizationContext}
+import com.linkedin.photon.ml.optimization.game.{FixedEffectOptimizationConfiguration, GLMOptimizationConfiguration}
+import com.linkedin.photon.ml.optimization.{ElasticNetRegularizationContext, L2RegularizationContext, NoRegularizationContext, OptimizerConfig}
 import com.linkedin.photon.ml.test.SparkTestUtils
-import com.linkedin.photon.ml.util.PhotonBroadcast
+import com.linkedin.photon.ml.util.{PhotonBroadcast, PhotonNonBroadcast}
 
 /**
  * Integration tests for [[DistributedObjectiveFunction]] to verify that the loss functions compute gradients & Hessians
@@ -39,11 +38,10 @@ class DistributedObjectiveFunctionIntegTest extends SparkTestUtils {
 
   import DistributedObjectiveFunctionIntegTest._
 
-  private val twiceDiffTasks = Array(
+  private val tasks = Array(
     TaskType.LOGISTIC_REGRESSION,
     TaskType.LINEAR_REGRESSION,
     TaskType.POISSON_REGRESSION)
-  private val diffTasks = twiceDiffTasks ++ Array(TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM)
   private val binaryClassificationDatasetGenerationFuncs = Array(
     generateBenignDatasetBinaryClassification _,
     generateWeightedBenignDatasetBinaryClassification _,
@@ -67,15 +65,15 @@ class DistributedObjectiveFunctionIntegTest extends SparkTestUtils {
    * @return Anonymous functions to generate the loss function and training data for the gradient tests
    */
   @DataProvider(parallel = true)
-  def getDifferentiableFunctions: Array[Array[Object]] = diffTasks
+  def getDifferentiableFunctions: Array[Array[Object]] = tasks
     .flatMap {
       case TaskType.LOGISTIC_REGRESSION =>
         treeAggregateDepths.flatMap { treeAggDepth =>
           def lossFuncBuilder =
-            () => DistributedGLMLossFunction(NO_REG_CONFIGURATION_MOCK, LogisticLossFunction, treeAggDepth)
+            () => DistributedObjectiveFunction(NO_REG_CONFIGURATION_MOCK, LogisticLossFunction, treeAggDepth)
 
           def lossFuncWithL2Builder =
-            () => DistributedGLMLossFunction(L2_REG_CONFIGURATION_MOCK, LogisticLossFunction, treeAggDepth)
+            () => DistributedObjectiveFunction(L2_REG_CONFIGURATION_MOCK, LogisticLossFunction, treeAggDepth)
 
           binaryClassificationDatasetGenerationFuncs.flatMap { dataGenFunc =>
             Seq[(Object, Object)]((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
@@ -85,10 +83,10 @@ class DistributedObjectiveFunctionIntegTest extends SparkTestUtils {
       case TaskType.LINEAR_REGRESSION =>
         treeAggregateDepths.flatMap { treeAggDepth =>
           def lossFuncBuilder =
-            () => DistributedGLMLossFunction(NO_REG_CONFIGURATION_MOCK, SquaredLossFunction, treeAggDepth)
+            () => DistributedObjectiveFunction(NO_REG_CONFIGURATION_MOCK, SquaredLossFunction, treeAggDepth)
 
           def lossFuncWithL2Builder =
-            () => DistributedGLMLossFunction(L2_REG_CONFIGURATION_MOCK, SquaredLossFunction, treeAggDepth)
+            () => DistributedObjectiveFunction(L2_REG_CONFIGURATION_MOCK, SquaredLossFunction, treeAggDepth)
 
           linearRegressionDatasetGenerationFuncs.flatMap { dataGenFunc =>
             Seq[(Object, Object)]((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
@@ -98,29 +96,16 @@ class DistributedObjectiveFunctionIntegTest extends SparkTestUtils {
       case TaskType.POISSON_REGRESSION =>
         treeAggregateDepths.flatMap { treeAggDepth =>
           def lossFuncBuilder =
-            () => DistributedGLMLossFunction(NO_REG_CONFIGURATION_MOCK, PoissonLossFunction, treeAggDepth)
+            () => DistributedObjectiveFunction(NO_REG_CONFIGURATION_MOCK, PoissonLossFunction, treeAggDepth)
 
           def lossFuncWithL2Builder =
-            () => DistributedGLMLossFunction(L2_REG_CONFIGURATION_MOCK, PoissonLossFunction, treeAggDepth)
+            () => DistributedObjectiveFunction(L2_REG_CONFIGURATION_MOCK, PoissonLossFunction, treeAggDepth)
 
           poissonRegressionDatasetGenerationFuncs.flatMap { dataGenFunc =>
             Seq[(Object, Object)]((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
           }
         }
 
-      case TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM =>
-        treeAggregateDepths.flatMap { treeAggDepth =>
-          def lossFuncBuilder =
-            () => DistributedSmoothedHingeLossFunction(NO_REG_CONFIGURATION_MOCK, treeAggDepth)
-
-          def lossFuncWithL2Builder =
-            () => DistributedSmoothedHingeLossFunction(L2_REG_CONFIGURATION_MOCK, treeAggDepth)
-
-          binaryClassificationDatasetGenerationFuncs.flatMap { dataGenFunc =>
-            Seq[(Object, Object)]((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
-          }
-        }
-
       case other =>
         throw new IllegalArgumentException(s"Unrecognized task type: $other")
     }
@@ -132,15 +117,15 @@ class DistributedObjectiveFunctionIntegTest extends SparkTestUtils {
    * @return Anonymous functions to generate the loss function and training data for the Hessian tests
    */
   @DataProvider(parallel = true)
-  def getTwiceDifferentiableFunctions: Array[Array[Object]] = twiceDiffTasks
+  def getTwiceDifferentiableFunctions: Array[Array[Object]] = tasks
     .flatMap {
       case TaskType.LOGISTIC_REGRESSION =>
         treeAggregateDepths.flatMap { treeAggDepth =>
           def lossFuncBuilder =
-            () => DistributedGLMLossFunction(NO_REG_CONFIGURATION_MOCK, LogisticLossFunction, treeAggDepth)
+            () => DistributedObjectiveFunction(NO_REG_CONFIGURATION_MOCK, LogisticLossFunction, treeAggDepth)
 
           def lossFuncWithL2Builder =
-            () => DistributedGLMLossFunction(L2_REG_CONFIGURATION_MOCK, LogisticLossFunction, treeAggDepth)
+            () => DistributedObjectiveFunction(L2_REG_CONFIGURATION_MOCK, LogisticLossFunction, treeAggDepth)
 
           binaryClassificationDatasetGenerationFuncs.flatMap { dataGenFunc =>
             Seq((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
@@ -150,10 +135,10 @@ class DistributedObjectiveFunctionIntegTest extends SparkTestUtils {
       case TaskType.LINEAR_REGRESSION =>
         treeAggregateDepths.flatMap { treeAggDepth =>
           def lossFuncBuilder =
-            () => DistributedGLMLossFunction(NO_REG_CONFIGURATION_MOCK, SquaredLossFunction, treeAggDepth)
+            () => DistributedObjectiveFunction(NO_REG_CONFIGURATION_MOCK, SquaredLossFunction, treeAggDepth)
 
           def lossFuncWithL2Builder =
-            () => DistributedGLMLossFunction(L2_REG_CONFIGURATION_MOCK, SquaredLossFunction, treeAggDepth)
+            () => DistributedObjectiveFunction(L2_REG_CONFIGURATION_MOCK, SquaredLossFunction, treeAggDepth)
 
           linearRegressionDatasetGenerationFuncs.flatMap { dataGenFunc =>
             Seq((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
@@ -163,10 +148,10 @@ class DistributedObjectiveFunctionIntegTest extends SparkTestUtils {
       case TaskType.POISSON_REGRESSION =>
         treeAggregateDepths.flatMap { treeAggDepth =>
           def lossFuncBuilder =
-            () => DistributedGLMLossFunction(NO_REG_CONFIGURATION_MOCK, PoissonLossFunction, treeAggDepth)
+            () => DistributedObjectiveFunction(NO_REG_CONFIGURATION_MOCK, PoissonLossFunction, treeAggDepth)
 
           def lossFuncWithL2Builder =
-            () => DistributedGLMLossFunction(L2_REG_CONFIGURATION_MOCK, PoissonLossFunction, treeAggDepth)
+            () => DistributedObjectiveFunction(L2_REG_CONFIGURATION_MOCK, PoissonLossFunction, treeAggDepth)
 
           poissonRegressionDatasetGenerationFuncs.flatMap { dataGenFunc =>
             Seq((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
@@ -573,10 +558,92 @@ class DistributedObjectiveFunctionIntegTest extends SparkTestUtils {
 
     normalizationContextBroadcast.bv.unpersist()
   }
+
+  /**
+   * Verify the value of loss function without regularization.
+   */
+  @Test
+  def testValueNoRegularization(): Unit = sparkTest("testValueNoRegularization") {
+
+    val labeledPoints = sc.parallelize(Array(LABELED_POINT_1, LABELED_POINT_2))
+    val coefficients = COEFFICIENT_VECTOR
+
+    val fixedEffectRegularizationContext = NoRegularizationContext
+    val fixedEffectOptimizationConfiguration = FixedEffectOptimizationConfiguration(
+      FIXED_EFFECT_OPTIMIZER_CONFIG,
+      fixedEffectRegularizationContext)
+    val distributedGLMLossFunction = DistributedObjectiveFunction(
+      fixedEffectOptimizationConfiguration,
+      LogisticLossFunction,
+      TREE_AGGREGATE_DEPTH)
+    val value = distributedGLMLossFunction.value(
+      labeledPoints,
+      coefficients,
+      PhotonNonBroadcast(NORMALIZATION_CONTEXT))
+
+    // expectValue = log(1 + exp(3)) + log(1 + exp(2)) = 5.1755
+    assertEquals(value, 5.1755, EPSILON)
+  }
+
+  /**
+   * Verify the value of loss function with L2 regularization.
+   */
+  @Test
+  def testValueWithL2Regularization(): Unit = sparkTest("testValueWithL2Regularization") {
+
+    val labeledPoints = sc.parallelize(Array(LABELED_POINT_1, LABELED_POINT_2))
+    val coefficients = COEFFICIENT_VECTOR
+
+    val fixedEffectRegularizationContext = L2RegularizationContext
+    val fixedEffectOptimizationConfiguration = FixedEffectOptimizationConfiguration(
+      FIXED_EFFECT_OPTIMIZER_CONFIG,
+      fixedEffectRegularizationContext,
+      FIXED_EFFECT_REGULARIZATION_WEIGHT)
+    val distributedGLMLossFunction = DistributedObjectiveFunction(
+      fixedEffectOptimizationConfiguration,
+      LogisticLossFunction,
+      TREE_AGGREGATE_DEPTH)
+    val value = distributedGLMLossFunction.value(
+      labeledPoints,
+      coefficients,
+      PhotonNonBroadcast(NORMALIZATION_CONTEXT))
+
+    // expectedValue = log(1 + exp(3)) + log(1 + exp(2)) + 1 * ((-2)^2 + 3^2) / 2 = 11.6755
+    assertEquals(value, 11.6755, EPSILON)
+  }
+
+  /**
+   * Verify the value of loss function with elastic net regularization.
+   */
+  @Test
+  def testValueWithElasticNetRegularization(): Unit = sparkTest("testValueWithElasticNetRegularization") {
+
+    val labeledPoints = sc.parallelize(Array(LABELED_POINT_1, LABELED_POINT_2))
+    val coefficients = COEFFICIENT_VECTOR
+
+    val fixedEffectRegularizationContext = ElasticNetRegularizationContext(ALPHA)
+    val fixedEffectOptimizationConfiguration = FixedEffectOptimizationConfiguration(
+      FIXED_EFFECT_OPTIMIZER_CONFIG,
+      fixedEffectRegularizationContext,
+      FIXED_EFFECT_REGULARIZATION_WEIGHT)
+    val distributedGLMLossFunction = DistributedObjectiveFunction(
+      fixedEffectOptimizationConfiguration,
+      LogisticLossFunction,
+      TREE_AGGREGATE_DEPTH)
+    val value = distributedGLMLossFunction.value(
+      labeledPoints,
+      coefficients,
+      PhotonNonBroadcast(NORMALIZATION_CONTEXT))
+
+    // L1 is computed by the optimizer.
+    // expectedValue = log(1 + exp(3)) + log(1 + exp(2)) + (1 - 0.4) * 1 * ((-2)^2 + 3^2) / 2 = 9.0755
+    assertEquals(value, 9.0755, EPSILON)
+  }
 }
 
 object DistributedObjectiveFunctionIntegTest {
 
+  // Gradient and Hessian test constants
   private val SPARK_CONSISTENCY_CHECK_SAMPLES = 5
   private val NUM_PARTITIONS = 4
   private val PROBLEM_DIMENSION = 5
@@ -593,6 +660,17 @@ object DistributedObjectiveFunctionIntegTest {
   private val WEIGHT_RANDOM_MAX = 10
   private val TRAINING_SAMPLES = PROBLEM_DIMENSION * PROBLEM_DIMENSION
 
+  // Regularization test constants
+  private val FIXED_EFFECT_OPTIMIZER_CONFIG = mock(classOf[OptimizerConfig])
+  private val LABELED_POINT_1 = new LabeledPoint(0, DenseVector(0.0, 1.0))
+  private val LABELED_POINT_2 = new LabeledPoint(1, DenseVector(1.0, 0.0))
+  private val COEFFICIENT_VECTOR = Vector(-2.0, 3.0)
+  private val NORMALIZATION_CONTEXT = NoNormalization()
+  private val FIXED_EFFECT_REGULARIZATION_WEIGHT = 1D
+  private val ALPHA = 0.4
+  private val TREE_AGGREGATE_DEPTH = 2
+  private val EPSILON = 1e-3
+
   doReturn(L2RegularizationContext).when(L2_REG_CONFIGURATION_MOCK).regularizationContext
   doReturn(REGULARIZATION_WEIGHT).when(L2_REG_CONFIGURATION_MOCK).regularizationWeight
   doReturn(NoRegularizationContext).when(NO_REG_CONFIGURATION_MOCK).regularizationContext
diff --git a/photon-api/src/integTest/scala/com/linkedin/photon/ml/function/glm/DistributedGLMLossFunctionIntegTest.scala b/photon-api/src/integTest/scala/com/linkedin/photon/ml/function/glm/DistributedGLMLossFunctionIntegTest.scala
deleted file mode 100644
index 075257fb..00000000
--- a/photon-api/src/integTest/scala/com/linkedin/photon/ml/function/glm/DistributedGLMLossFunctionIntegTest.scala
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright 2019 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.function.glm
-
-import breeze.linalg.{DenseVector, Vector}
-import org.mockito.Mockito._
-import org.testng.Assert.assertEquals
-import org.testng.annotations.Test
-
-import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.normalization.NoNormalization
-import com.linkedin.photon.ml.optimization._
-import com.linkedin.photon.ml.optimization.game.FixedEffectOptimizationConfiguration
-import com.linkedin.photon.ml.test.SparkTestUtils
-import com.linkedin.photon.ml.util.PhotonNonBroadcast
-
-/**
- * Integration tests for [[DistributedGLMLossFunction]].
- */
-class DistributedGLMLossFunctionIntegTest extends SparkTestUtils {
-
-  import DistributedGLMLossFunctionIntegTest._
-
-  /**
-   * Verify the value of loss function without regularization.
-   */
-  @Test()
-  def testValueNoRegularization(): Unit = sparkTest("testValueNoRegularization") {
-
-    val labeledPoints = sc.parallelize(Array(LABELED_POINT_1, LABELED_POINT_2))
-    val coefficients = COEFFICIENT_VECTOR
-
-    val fixedEffectRegularizationContext = NoRegularizationContext
-    val fixedEffectOptimizationConfiguration = FixedEffectOptimizationConfiguration(
-      FIXED_EFFECT_OPTIMIZER_CONFIG,
-      fixedEffectRegularizationContext)
-    val distributedGLMLossFunction = DistributedGLMLossFunction(
-      fixedEffectOptimizationConfiguration,
-      LogisticLossFunction,
-      TREE_AGGREGATE_DEPTH)
-    val value = distributedGLMLossFunction.value(
-      labeledPoints,
-      coefficients,
-      PhotonNonBroadcast(NORMALIZATION_CONTEXT))
-
-    // expectValue = log(1 + exp(3)) + log(1 + exp(2)) = 5.1755
-    assertEquals(value, 5.1755, EPSILON)
-  }
-
-  /**
-   * Verify the value of loss function with L2 regularization.
-   */
-  @Test()
-  def testValueWithL2Regularization(): Unit = sparkTest("testValueWithL2Regularization") {
-
-    val labeledPoints = sc.parallelize(Array(LABELED_POINT_1, LABELED_POINT_2))
-    val coefficients = COEFFICIENT_VECTOR
-
-    val fixedEffectRegularizationContext = L2RegularizationContext
-    val fixedEffectOptimizationConfiguration = FixedEffectOptimizationConfiguration(
-      FIXED_EFFECT_OPTIMIZER_CONFIG,
-      fixedEffectRegularizationContext,
-      FIXED_EFFECT_REGULARIZATION_WEIGHT)
-    val distributedGLMLossFunction = DistributedGLMLossFunction(
-      fixedEffectOptimizationConfiguration,
-      LogisticLossFunction,
-      TREE_AGGREGATE_DEPTH)
-    val value = distributedGLMLossFunction.value(
-      labeledPoints,
-      coefficients,
-      PhotonNonBroadcast(NORMALIZATION_CONTEXT))
-
-    // expectedValue = log(1 + exp(3)) + log(1 + exp(2)) + 1 * ((-2)^2 + 3^2) / 2 = 11.6755
-    assertEquals(value, 11.6755, EPSILON)
-  }
-
-  /**
-   * Verify the value of loss function with elastic net regularization.
-   */
-  @Test()
-  def testValueWithElasticNetRegularization(): Unit = sparkTest("testValueWithElasticNetRegularization") {
-
-    val labeledPoints = sc.parallelize(Array(LABELED_POINT_1, LABELED_POINT_2))
-    val coefficients = COEFFICIENT_VECTOR
-
-    val fixedEffectRegularizationContext = ElasticNetRegularizationContext(ALPHA)
-    val fixedEffectOptimizationConfiguration = FixedEffectOptimizationConfiguration(
-      FIXED_EFFECT_OPTIMIZER_CONFIG,
-      fixedEffectRegularizationContext,
-      FIXED_EFFECT_REGULARIZATION_WEIGHT)
-    val distributedGLMLossFunction = DistributedGLMLossFunction(
-      fixedEffectOptimizationConfiguration,
-      LogisticLossFunction,
-      TREE_AGGREGATE_DEPTH)
-    val value = distributedGLMLossFunction.value(
-      labeledPoints,
-      coefficients,
-      PhotonNonBroadcast(NORMALIZATION_CONTEXT))
-
-    // L1 is computed by the optimizer.
-    // expectedValue = log(1 + exp(3)) + log(1 + exp(2)) + (1 - 0.4) * 1 * ((-2)^2 + 3^2) / 2 = 9.0755
-    assertEquals(value, 9.0755, EPSILON)
-  }
-}
-
-object DistributedGLMLossFunctionIntegTest {
-
-  private val FIXED_EFFECT_OPTIMIZER_CONFIG = mock(classOf[OptimizerConfig])
-  private val LABELED_POINT_1 = new LabeledPoint(0, DenseVector(0.0, 1.0))
-  private val LABELED_POINT_2 = new LabeledPoint(1, DenseVector(1.0, 0.0))
-  private val COEFFICIENT_VECTOR = Vector(-2.0, 3.0)
-  private val NORMALIZATION_CONTEXT = NoNormalization()
-  private val FIXED_EFFECT_REGULARIZATION_WEIGHT = 1D
-  private val ALPHA = 0.4
-  private val TREE_AGGREGATE_DEPTH = 2
-  private val EPSILON = 1e-3
-}
diff --git a/photon-api/src/integTest/scala/com/linkedin/photon/ml/function/svm/DistributedSmoothedHingeLossFunctionIntegTest.scala b/photon-api/src/integTest/scala/com/linkedin/photon/ml/function/svm/DistributedSmoothedHingeLossFunctionIntegTest.scala
deleted file mode 100644
index 9a82a61e..00000000
--- a/photon-api/src/integTest/scala/com/linkedin/photon/ml/function/svm/DistributedSmoothedHingeLossFunctionIntegTest.scala
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright 2019 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.function.svm
-
-import breeze.linalg.{DenseVector, Vector}
-import org.mockito.Mockito._
-import org.testng.Assert.assertEquals
-import org.testng.annotations.Test
-
-import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.normalization.NoNormalization
-import com.linkedin.photon.ml.optimization._
-import com.linkedin.photon.ml.optimization.game.FixedEffectOptimizationConfiguration
-import com.linkedin.photon.ml.test.SparkTestUtils
-import com.linkedin.photon.ml.util.PhotonNonBroadcast
-
-/**
- * Integration tests for [[DistributedSmoothedHingeLossFunction]].
- */
-class DistributedSmoothedHingeLossFunctionIntegTest extends SparkTestUtils {
-
-  import DistributedSmoothedHingeLossFunctionIntegTest._
-
-  /**
-   * Verify the value of loss function without regularization.
-   */
-  @Test()
-  def testValueNoRegularization(): Unit = sparkTest("testValueNoRegularization") {
-
-    val labeledPoints = sc.parallelize(Array(LABELED_POINT_1, LABELED_POINT_2))
-    val coefficients = COEFFICIENT_VECTOR
-
-    val fixedEffectRegularizationContext = NoRegularizationContext
-    val fixedEffectOptimizationConfiguration = FixedEffectOptimizationConfiguration(
-      FIXED_EFFECT_OPTIMIZER_CONFIG,
-      fixedEffectRegularizationContext)
-    val distributedSmoothedHingeLossFunction = DistributedSmoothedHingeLossFunction(
-      fixedEffectOptimizationConfiguration,
-      TREE_AGGREGATE_DEPTH)
-    val value = distributedSmoothedHingeLossFunction.value(
-      labeledPoints,
-      coefficients,
-      PhotonNonBroadcast(NORMALIZATION_CONTEXT))
-
-    assertEquals(value, 6.0, EPSILON)
-  }
-
-  /**
-   * Verify the value of loss function with L2 regularization.
-   */
-  @Test()
-  def testValueWithL2Regularization(): Unit = sparkTest("testValueWithL2Regularization") {
-
-    val labeledPoints = sc.parallelize(Array(LABELED_POINT_1, LABELED_POINT_2))
-    val coefficients = COEFFICIENT_VECTOR
-
-    val fixedEffectRegularizationContext = L2RegularizationContext
-    val fixedEffectOptimizationConfiguration = FixedEffectOptimizationConfiguration(
-      FIXED_EFFECT_OPTIMIZER_CONFIG,
-      fixedEffectRegularizationContext,
-      FIXED_EFFECT_REGULARIZATION_WEIGHT)
-    val distributedSmoothedHingeLossFunction = DistributedSmoothedHingeLossFunction(
-      fixedEffectOptimizationConfiguration,
-      TREE_AGGREGATE_DEPTH)
-    val value = distributedSmoothedHingeLossFunction.value(
-      labeledPoints,
-      coefficients,
-      PhotonNonBroadcast(NORMALIZATION_CONTEXT))
-
-    // expectedValue = 6 + 1 * ((-2)^2 + 3^2) / 2 = 12.5
-    assertEquals(value, 12.5, EPSILON)
-  }
-
-  /**
-   * Verify the value of loss function with elastic net regularization.
-   */
-  @Test()
-  def testValueWithElasticNetRegularization(): Unit = sparkTest("testValueWithElasticNetRegularization") {
-
-    val labeledPoints = sc.parallelize(Array(LABELED_POINT_1, LABELED_POINT_2))
-    val coefficients = COEFFICIENT_VECTOR
-
-    val fixedEffectRegularizationContext = ElasticNetRegularizationContext(ALPHA)
-    val fixedEffectOptimizationConfiguration = FixedEffectOptimizationConfiguration(
-      FIXED_EFFECT_OPTIMIZER_CONFIG,
-      fixedEffectRegularizationContext,
-      FIXED_EFFECT_REGULARIZATION_WEIGHT)
-    val distributedSmoothedHingeLossFunction = DistributedSmoothedHingeLossFunction(
-      fixedEffectOptimizationConfiguration,
-      TREE_AGGREGATE_DEPTH)
-    val value = distributedSmoothedHingeLossFunction.value(
-      labeledPoints,
-      coefficients,
-      PhotonNonBroadcast(NORMALIZATION_CONTEXT))
-
-    // L1 is computed by the optimizer.
-    // expectedValue = 6 + (1 - 0.4) * 1 * ((-2)^2 + 3^2) / 2 = 9.9
-    assertEquals(value, 9.9, EPSILON)
-  }
-}
-
-object DistributedSmoothedHingeLossFunctionIntegTest {
-
-  private val FIXED_EFFECT_OPTIMIZER_CONFIG = mock(classOf[OptimizerConfig])
-  private val LABELED_POINT_1 = new LabeledPoint(0, DenseVector(0.0, 1.0))
-  private val LABELED_POINT_2 = new LabeledPoint(1, DenseVector(1.0, 0.0))
-  private val COEFFICIENT_VECTOR = Vector(-2.0, 3.0)
-  private val NORMALIZATION_CONTEXT = NoNormalization()
-  private val FIXED_EFFECT_REGULARIZATION_WEIGHT = 1D
-  private val ALPHA = 0.4
-  private val TREE_AGGREGATE_DEPTH = 2
-  private val EPSILON = 1e-3
-}
diff --git a/photon-api/src/integTest/scala/com/linkedin/photon/ml/normalization/NormalizationContextIntegTest.scala b/photon-api/src/integTest/scala/com/linkedin/photon/ml/normalization/NormalizationContextIntegTest.scala
index 9c1d2360..3671325d 100644
--- a/photon-api/src/integTest/scala/com/linkedin/photon/ml/normalization/NormalizationContextIntegTest.scala
+++ b/photon-api/src/integTest/scala/com/linkedin/photon/ml/normalization/NormalizationContextIntegTest.scala
@@ -28,7 +28,7 @@ import org.testng.annotations.{DataProvider, Test}
 import com.linkedin.photon.ml.{ModelTraining, TaskType}
 import com.linkedin.photon.ml.data.LabeledPoint
 import com.linkedin.photon.ml.function.DistributedObjectiveFunction
-import com.linkedin.photon.ml.function.glm.{DistributedGLMLossFunction, LogisticLossFunction, PoissonLossFunction, SquaredLossFunction}
+import com.linkedin.photon.ml.function.glm.{LogisticLossFunction, PoissonLossFunction, SquaredLossFunction}
 import com.linkedin.photon.ml.model.Coefficients
 import com.linkedin.photon.ml.normalization.NormalizationType.NormalizationType
 import com.linkedin.photon.ml.optimization._
@@ -202,18 +202,17 @@ class NormalizationContextIntegTest extends SparkTestUtils with GameTestUtils {
 
     val configuration = FixedEffectOptimizationConfiguration(generateOptimizerConfig())
 
-    val testData = for (optimizerType <- OptimizerType.values;
-                        taskType<- TaskType.values.filterNot(_ == TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM)) yield {
+    val testData = for (optimizerType <- OptimizerType.values; taskType <- TaskType.values) yield {
 
       val objectiveFunction = taskType match {
         case TaskType.LOGISTIC_REGRESSION =>
-          DistributedGLMLossFunction(configuration, LogisticLossFunction, treeAggregateDepth = 1)
+          DistributedObjectiveFunction(configuration, LogisticLossFunction, treeAggregateDepth = 1)
 
         case TaskType.LINEAR_REGRESSION =>
-          DistributedGLMLossFunction(configuration, SquaredLossFunction, treeAggregateDepth = 1)
+          DistributedObjectiveFunction(configuration, SquaredLossFunction, treeAggregateDepth = 1)
 
         case TaskType.POISSON_REGRESSION =>
-          DistributedGLMLossFunction(configuration, PoissonLossFunction, treeAggregateDepth = 1)
+          DistributedObjectiveFunction(configuration, PoissonLossFunction, treeAggregateDepth = 1)
       }
       val optimizerNorm = optimizerType match {
         case OptimizerType.LBFGS =>
diff --git a/photon-api/src/integTest/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblemIntegTest.scala b/photon-api/src/integTest/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblemIntegTest.scala
index abf513c4..ab0d5c86 100644
--- a/photon-api/src/integTest/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblemIntegTest.scala
+++ b/photon-api/src/integTest/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblemIntegTest.scala
@@ -17,7 +17,6 @@ package com.linkedin.photon.ml.optimization
 import java.util.Random
 
 import breeze.linalg.{DenseMatrix, DenseVector, Vector, diag, pinv}
-import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.mockito.Mockito._
 import org.testng.Assert._
@@ -25,16 +24,13 @@ import org.testng.annotations.{DataProvider, Test}
 
 import com.linkedin.photon.ml.constants.MathConst
 import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.function.L2RegularizationDiff
+import com.linkedin.photon.ml.function.DistributedObjectiveFunction
 import com.linkedin.photon.ml.function.glm._
-import com.linkedin.photon.ml.function.svm.DistributedSmoothedHingeLossFunction
 import com.linkedin.photon.ml.model.Coefficients
-import com.linkedin.photon.ml.normalization.{NoNormalization, NormalizationContext}
 import com.linkedin.photon.ml.optimization.game.FixedEffectOptimizationConfiguration
-import com.linkedin.photon.ml.supervised.classification.LogisticRegressionModel
 import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
 import com.linkedin.photon.ml.test.{CommonTestUtils, SparkTestUtils}
-import com.linkedin.photon.ml.util.{BroadcastWrapper, VectorUtils}
+import com.linkedin.photon.ml.util.VectorUtils
 
 /**
  * Integration tests for [[DistributedOptimizationProblem]].
@@ -42,7 +38,6 @@ import com.linkedin.photon.ml.util.{BroadcastWrapper, VectorUtils}
 class DistributedOptimizationProblemIntegTest extends SparkTestUtils {
 
   import CommonTestUtils._
-  import DistributedOptimizationProblemIntegTest._
 
   /**
    * Function to generate a mock [[GeneralizedLinearModel]].
@@ -140,87 +135,6 @@ class DistributedOptimizationProblemIntegTest extends SparkTestUtils {
     }
   }
 
-  /**
-   * Test that regularization weights can be updated.
-   */
-  @Test
-  def testUpdateRegularizationWeight(): Unit = sparkTest("testUpdateRegularizationWeight") {
-
-    val normalization = NoNormalization()
-    val initL1Weight = 1D
-    val initL2Weight = 2D
-    val finalL1Weight = 3D
-    val finalL2Weight = 4D
-    val finalElasticWeight = 5D
-    val alpha = 0.75
-    val elasticFinalL1Weight = finalElasticWeight * alpha
-    val elasticFinalL2Weight = finalElasticWeight * (1 - alpha)
-
-    val normalizationMock = mock(classOf[BroadcastWrapper[NormalizationContext]])
-    val optimizer = mock(classOf[Optimizer[DistributedSmoothedHingeLossFunction]])
-    val statesTracker = mock(classOf[OptimizationStatesTracker])
-    val objectiveFunction = mock(classOf[DistributedSmoothedHingeLossFunction])
-
-    doReturn(normalization).when(normalizationMock).value
-    doReturn(statesTracker).when(optimizer).getStateTracker
-
-    val optimizerL1 = new OWLQN(initL1Weight, normalizationMock)
-    val objectiveFunctionL2 = new L2LossFunction(sc)
-    objectiveFunctionL2.l2RegularizationWeight = initL2Weight
-
-    val l1Problem = new DistributedOptimizationProblem(
-      optimizerL1,
-      objectiveFunction,
-      samplerOption = None,
-      LogisticRegressionModel.apply,
-      L1RegularizationContext,
-      VarianceComputationType.NONE)
-    val l2Problem = new DistributedOptimizationProblem(
-      optimizer,
-      objectiveFunctionL2,
-      samplerOption = None,
-      LogisticRegressionModel.apply,
-      L2RegularizationContext,
-      VarianceComputationType.NONE)
-    val elasticProblem = new DistributedOptimizationProblem(
-      optimizerL1,
-      objectiveFunctionL2,
-      samplerOption = None,
-      LogisticRegressionModel.apply,
-      ElasticNetRegularizationContext(alpha),
-      VarianceComputationType.NONE)
-
-    // Check update to L1/L2 weights individually
-    assertNotEquals(optimizerL1.l1RegularizationWeight, finalL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertNotEquals(objectiveFunctionL2.l2RegularizationWeight, finalL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(optimizerL1.l1RegularizationWeight, initL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(objectiveFunctionL2.l2RegularizationWeight, initL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-
-    l1Problem.updateRegularizationWeight(finalL1Weight)
-    l2Problem.updateRegularizationWeight(finalL2Weight)
-
-    assertNotEquals(optimizerL1.l1RegularizationWeight, initL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertNotEquals(objectiveFunctionL2.l2RegularizationWeight, initL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(optimizerL1.l1RegularizationWeight, finalL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(objectiveFunctionL2.l2RegularizationWeight, finalL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-
-    // Check updates to L1/L2 weights together
-    optimizerL1.l1RegularizationWeight = initL1Weight
-    objectiveFunctionL2.l2RegularizationWeight = initL2Weight
-
-    assertNotEquals(optimizerL1.l1RegularizationWeight, elasticFinalL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertNotEquals(objectiveFunctionL2.l2RegularizationWeight, elasticFinalL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(optimizerL1.l1RegularizationWeight, initL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(objectiveFunctionL2.l2RegularizationWeight, initL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-
-    elasticProblem.updateRegularizationWeight(finalElasticWeight)
-
-    assertNotEquals(optimizerL1.l1RegularizationWeight, initL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertNotEquals(objectiveFunctionL2.l2RegularizationWeight, initL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(optimizerL1.l1RegularizationWeight, elasticFinalL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(objectiveFunctionL2.l2RegularizationWeight, elasticFinalL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-  }
-
   /**
    * Test simple coefficient variance computation for weighted data points, with regularization.
    *
@@ -239,7 +153,7 @@ class DistributedOptimizationProblemIntegTest extends SparkTestUtils {
     val input = sc.parallelize(dataGenerationFunction())
     val coefficients = generateDenseVector(OptimizationProblemIntegTestUtils.DIMENSIONS)
 
-    val optimizer = mock(classOf[Optimizer[DistributedGLMLossFunction]])
+    val optimizer = mock(classOf[Optimizer[DistributedObjectiveFunction]])
     val statesTracker = mock(classOf[OptimizationStatesTracker])
     val regContext = mock(classOf[RegularizationContext])
     val optConfig = mock(classOf[FixedEffectOptimizationConfiguration])
@@ -250,7 +164,7 @@ class DistributedOptimizationProblemIntegTest extends SparkTestUtils {
     doReturn(RegularizationType.L2).when(regContext).regularizationType
     doReturn(regularizationWeight).when(regContext).getL2RegularizationWeight(regularizationWeight)
 
-    val objective = DistributedGLMLossFunction(optConfig, lossFunction, treeAggregateDepth = 1)
+    val objective = DistributedObjectiveFunction(optConfig, lossFunction, treeAggregateDepth = 1)
 
     val optimizationProblem = new DistributedOptimizationProblem(
       optimizer,
@@ -292,7 +206,7 @@ class DistributedOptimizationProblemIntegTest extends SparkTestUtils {
     val dimensions = OptimizationProblemIntegTestUtils.DIMENSIONS
     val coefficients = generateDenseVector(dimensions)
 
-    val optimizer = mock(classOf[Optimizer[DistributedGLMLossFunction]])
+    val optimizer = mock(classOf[Optimizer[DistributedObjectiveFunction]])
     val statesTracker = mock(classOf[OptimizationStatesTracker])
     val regContext = mock(classOf[RegularizationContext])
     val optConfig = mock(classOf[FixedEffectOptimizationConfiguration])
@@ -303,7 +217,7 @@ class DistributedOptimizationProblemIntegTest extends SparkTestUtils {
     doReturn(RegularizationType.L2).when(regContext).regularizationType
     doReturn(regularizationWeight).when(regContext).getL2RegularizationWeight(regularizationWeight)
 
-    val objective = DistributedGLMLossFunction(optConfig, lossFunction, treeAggregateDepth = 1)
+    val objective = DistributedObjectiveFunction(optConfig, lossFunction, treeAggregateDepth = 1)
 
     val optimizationProblem = new DistributedOptimizationProblem(
       optimizer,
@@ -345,7 +259,7 @@ class DistributedOptimizationProblemIntegTest extends SparkTestUtils {
       }
     }
 
-    val optimizer = mock(classOf[Optimizer[DistributedGLMLossFunction]])
+    val optimizer = mock(classOf[Optimizer[DistributedObjectiveFunction]])
     val statesTracker = mock(classOf[OptimizationStatesTracker])
     val regContext = mock(classOf[RegularizationContext])
     val optConfig = mock(classOf[FixedEffectOptimizationConfiguration])
@@ -354,7 +268,7 @@ class DistributedOptimizationProblemIntegTest extends SparkTestUtils {
     doReturn(regContext).when(optConfig).regularizationContext
     doReturn(RegularizationType.NONE).when(regContext).regularizationType
 
-    val objective = DistributedGLMLossFunction(optConfig, LogisticLossFunction, treeAggregateDepth = 1)
+    val objective = DistributedObjectiveFunction(optConfig, LogisticLossFunction, treeAggregateDepth = 1)
 
     val optimizationProblem = new DistributedOptimizationProblem(
       optimizer,
@@ -402,11 +316,3 @@ class DistributedOptimizationProblemIntegTest extends SparkTestUtils {
     VectorUtils.areAlmostEqual(actual, expected)
   }
 }
-
-object DistributedOptimizationProblemIntegTest {
-
-  // No way to pass Mixin class type to Mockito, need to define a concrete class
-  private class L2LossFunction(sc: SparkContext)
-    extends DistributedSmoothedHingeLossFunction(treeAggregateDepth = 1)
-      with L2RegularizationDiff
-}
diff --git a/photon-api/src/integTest/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblemIntegTest.scala b/photon-api/src/integTest/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblemIntegTest.scala
index b1b5e8b1..64f1301b 100644
--- a/photon-api/src/integTest/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblemIntegTest.scala
+++ b/photon-api/src/integTest/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblemIntegTest.scala
@@ -25,6 +25,7 @@ import org.testng.annotations.{DataProvider, Test}
 
 import com.linkedin.photon.ml.constants.MathConst
 import com.linkedin.photon.ml.data.LabeledPoint
+import com.linkedin.photon.ml.function.SingleNodeObjectiveFunction
 import com.linkedin.photon.ml.function.glm._
 import com.linkedin.photon.ml.model.Coefficients
 import com.linkedin.photon.ml.optimization.game.FixedEffectOptimizationConfiguration
@@ -156,7 +157,7 @@ class SingleNodeOptimizationProblemIntegTest extends SparkTestUtils {
 
     val coefficients = generateDenseVector(OptimizationProblemIntegTestUtils.DIMENSIONS)
 
-    val optimizer = mock(classOf[Optimizer[SingleNodeGLMLossFunction]])
+    val optimizer = mock(classOf[Optimizer[SingleNodeObjectiveFunction]])
     val statesTracker = mock(classOf[OptimizationStatesTracker])
     val regContext = mock(classOf[RegularizationContext])
     val optConfig = mock(classOf[FixedEffectOptimizationConfiguration])
@@ -167,7 +168,7 @@ class SingleNodeOptimizationProblemIntegTest extends SparkTestUtils {
     doReturn(RegularizationType.L2).when(regContext).regularizationType
     doReturn(regularizationWeight).when(regContext).getL2RegularizationWeight(regularizationWeight)
 
-    val objective = SingleNodeGLMLossFunction(optConfig, lossFunction)
+    val objective = SingleNodeObjectiveFunction(optConfig, lossFunction)
 
     val optimizationProblem = new SingleNodeOptimizationProblem(
       optimizer,
@@ -205,7 +206,7 @@ class SingleNodeOptimizationProblemIntegTest extends SparkTestUtils {
     val dimensions = OptimizationProblemIntegTestUtils.DIMENSIONS
     val coefficients = generateDenseVector(dimensions)
 
-    val optimizer = mock(classOf[Optimizer[SingleNodeGLMLossFunction]])
+    val optimizer = mock(classOf[Optimizer[SingleNodeObjectiveFunction]])
     val statesTracker = mock(classOf[OptimizationStatesTracker])
     val regContext = mock(classOf[RegularizationContext])
     val optConfig = mock(classOf[FixedEffectOptimizationConfiguration])
@@ -216,7 +217,7 @@ class SingleNodeOptimizationProblemIntegTest extends SparkTestUtils {
     doReturn(RegularizationType.L2).when(regContext).regularizationType
     doReturn(regularizationWeight).when(regContext).getL2RegularizationWeight(regularizationWeight)
 
-    val objective = SingleNodeGLMLossFunction(optConfig, lossFunction)
+    val objective = SingleNodeObjectiveFunction(optConfig, lossFunction)
 
     val optimizationProblem = new SingleNodeOptimizationProblem(
       optimizer,
@@ -253,7 +254,7 @@ class SingleNodeOptimizationProblemIntegTest extends SparkTestUtils {
         new LabeledPoint(label, DenseVector(features))
       }
 
-    val optimizer = mock(classOf[Optimizer[SingleNodeGLMLossFunction]])
+    val optimizer = mock(classOf[Optimizer[SingleNodeObjectiveFunction]])
     val statesTracker = mock(classOf[OptimizationStatesTracker])
     val regContext = mock(classOf[RegularizationContext])
     val optConfig = mock(classOf[FixedEffectOptimizationConfiguration])
@@ -262,7 +263,7 @@ class SingleNodeOptimizationProblemIntegTest extends SparkTestUtils {
     doReturn(regContext).when(optConfig).regularizationContext
     doReturn(RegularizationType.NONE).when(regContext).regularizationType
 
-    val objective = SingleNodeGLMLossFunction(optConfig, LogisticLossFunction)
+    val objective = SingleNodeObjectiveFunction(optConfig, LogisticLossFunction)
 
     val optimizationProblem = new SingleNodeOptimizationProblem(
       optimizer,
diff --git a/photon-api/src/integTest/scala/com/linkedin/photon/ml/supervised/BaseGLMIntegTest.scala b/photon-api/src/integTest/scala/com/linkedin/photon/ml/supervised/BaseGLMIntegTest.scala
index 4c9cab34..76d7ca16 100644
--- a/photon-api/src/integTest/scala/com/linkedin/photon/ml/supervised/BaseGLMIntegTest.scala
+++ b/photon-api/src/integTest/scala/com/linkedin/photon/ml/supervised/BaseGLMIntegTest.scala
@@ -20,7 +20,8 @@ import org.testng.Assert.assertTrue
 import org.testng.annotations.{DataProvider, Test}
 
 import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.function.glm.{DistributedGLMLossFunction, LogisticLossFunction, PoissonLossFunction, SquaredLossFunction}
+import com.linkedin.photon.ml.function.DistributedObjectiveFunction
+import com.linkedin.photon.ml.function.glm.{LogisticLossFunction, PoissonLossFunction, SquaredLossFunction}
 import com.linkedin.photon.ml.normalization.{NoNormalization, NormalizationContext}
 import com.linkedin.photon.ml.optimization._
 import com.linkedin.photon.ml.optimization.game.FixedEffectOptimizationConfiguration
@@ -117,7 +118,7 @@ class BaseGLMIntegTest extends SparkTestUtils {
         (normalizationContext: BroadcastWrapper[NormalizationContext]) =>
           DistributedOptimizationProblem(
             lbfgsConfig,
-            DistributedGLMLossFunction(lbfgsConfig, SquaredLossFunction, treeAggregateDepth = 1),
+            DistributedObjectiveFunction(lbfgsConfig, SquaredLossFunction, treeAggregateDepth = 1),
             None,
             LinearRegressionModel.apply,
             normalizationContext,
@@ -132,7 +133,7 @@ class BaseGLMIntegTest extends SparkTestUtils {
         (normalizationContext: BroadcastWrapper[NormalizationContext]) =>
           DistributedOptimizationProblem(
             lbfgsConfig,
-            DistributedGLMLossFunction(lbfgsConfig, PoissonLossFunction, treeAggregateDepth = 1),
+            DistributedObjectiveFunction(lbfgsConfig, PoissonLossFunction, treeAggregateDepth = 1),
             None,
             PoissonRegressionModel.apply,
             normalizationContext,
@@ -149,7 +150,7 @@ class BaseGLMIntegTest extends SparkTestUtils {
         (normalizationContext: BroadcastWrapper[NormalizationContext]) =>
           DistributedOptimizationProblem(
             lbfgsConfig,
-            DistributedGLMLossFunction(lbfgsConfig, LogisticLossFunction, treeAggregateDepth = 1),
+            DistributedObjectiveFunction(lbfgsConfig, LogisticLossFunction, treeAggregateDepth = 1),
             None,
             LogisticRegressionModel.apply,
             normalizationContext,
@@ -171,7 +172,7 @@ class BaseGLMIntegTest extends SparkTestUtils {
   def runGeneralizedLinearOptimizationProblemScenario(
       desc: String,
       optimizationProblemBuilder: BroadcastWrapper[NormalizationContext] =>
-        DistributedOptimizationProblem[DistributedGLMLossFunction],
+        DistributedOptimizationProblem[DistributedObjectiveFunction],
       data: Seq[LabeledPoint],
       validator: ModelValidator[GeneralizedLinearModel]): Unit = sparkTest(desc) {
 
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/ModelTraining.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/ModelTraining.scala
index 81a8370f..b60f390a 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/ModelTraining.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/ModelTraining.scala
@@ -16,14 +16,15 @@ package com.linkedin.photon.ml
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
+
 import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.function.glm.{DistributedGLMLossFunction, LogisticLossFunction, PoissonLossFunction, SquaredLossFunction}
-import com.linkedin.photon.ml.function.svm.DistributedSmoothedHingeLossFunction
+import com.linkedin.photon.ml.function.DistributedObjectiveFunction
+import com.linkedin.photon.ml.function.glm.{LogisticLossFunction, PoissonLossFunction, SquaredLossFunction}
 import com.linkedin.photon.ml.normalization.NormalizationContext
 import com.linkedin.photon.ml.optimization.OptimizerType.OptimizerType
 import com.linkedin.photon.ml.optimization._
 import com.linkedin.photon.ml.optimization.game.FixedEffectOptimizationConfiguration
-import com.linkedin.photon.ml.supervised.classification.{LogisticRegressionModel, SmoothedHingeLossLinearSVMModel}
+import com.linkedin.photon.ml.supervised.classification.LogisticRegressionModel
 import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
 import com.linkedin.photon.ml.supervised.regression.{LinearRegressionModel, PoissonRegressionModel}
 import com.linkedin.photon.ml.util.{Logging, PhotonBroadcast}
@@ -121,7 +122,7 @@ object ModelTraining extends Logging {
     val (glmConstructor, objectiveFunction) = taskType match {
       case TaskType.LOGISTIC_REGRESSION =>
         val constructor = LogisticRegressionModel.apply _
-        val objective = DistributedGLMLossFunction(
+        val objective = DistributedObjectiveFunction(
           optimizationConfig,
           LogisticLossFunction,
           treeAggregateDepth)
@@ -130,7 +131,7 @@ object ModelTraining extends Logging {
 
       case TaskType.LINEAR_REGRESSION =>
         val constructor = LinearRegressionModel.apply _
-        val objective = DistributedGLMLossFunction(
+        val objective = DistributedObjectiveFunction(
           optimizationConfig,
           SquaredLossFunction,
           treeAggregateDepth)
@@ -139,21 +140,13 @@ object ModelTraining extends Logging {
 
       case TaskType.POISSON_REGRESSION =>
         val constructor = PoissonRegressionModel.apply _
-        val objective = DistributedGLMLossFunction(
+        val objective = DistributedObjectiveFunction(
           optimizationConfig,
           PoissonLossFunction,
           treeAggregateDepth)
 
         (constructor, objective)
 
-      case TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM =>
-        val constructor = SmoothedHingeLossLinearSVMModel.apply _
-        val objective = DistributedSmoothedHingeLossFunction(
-          optimizationConfig,
-          treeAggregateDepth)
-
-        (constructor, objective)
-
       case _ => throw new Exception(s"Loss function for taskType $taskType is currently not supported.")
     }
     val optimizationProblem = DistributedOptimizationProblem(
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/SparkSessionConfiguration.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/SparkSessionConfiguration.scala
index f1b3df63..b8f1be57 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/SparkSessionConfiguration.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/SparkSessionConfiguration.scala
@@ -29,7 +29,7 @@ import com.linkedin.photon.ml.normalization.NormalizationContext
 import com.linkedin.photon.ml.optimization._
 import com.linkedin.photon.ml.optimization.game.GLMOptimizationConfiguration
 import com.linkedin.photon.ml.projector.LinearSubspaceProjector
-import com.linkedin.photon.ml.supervised.classification.{LogisticRegressionModel, SmoothedHingeLossLinearSVMModel}
+import com.linkedin.photon.ml.supervised.classification.LogisticRegressionModel
 import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
 import com.linkedin.photon.ml.supervised.regression.{LinearRegressionModel, PoissonRegressionModel}
 
@@ -70,7 +70,6 @@ object SparkSessionConfiguration {
     classOf[Set[Int]],
     classOf[SingleNodeObjectiveFunction],
     classOf[SingleNodeOptimizationProblem[_]],
-    classOf[SmoothedHingeLossLinearSVMModel],
     classOf[SparseVector[Double]],
     classOf[TRON],
     classOf[ValueAndGradientAggregator],
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala
index 5adda9eb..ba50c6fd 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/estimators/GameEstimator.scala
@@ -33,7 +33,7 @@ import com.linkedin.photon.ml.Types.{CoordinateId, FeatureShardId, UniqueSampleI
 import com.linkedin.photon.ml.algorithm._
 import com.linkedin.photon.ml.data._
 import com.linkedin.photon.ml.evaluation._
-import com.linkedin.photon.ml.function.ObjectiveFunctionHelper
+import com.linkedin.photon.ml.function.{DistributedObjectiveFunction, ObjectiveFunctionHelper, SingleNodeObjectiveFunction}
 import com.linkedin.photon.ml.function.glm._
 import com.linkedin.photon.ml.model.{FixedEffectModel, GameModel, RandomEffectModel}
 import com.linkedin.photon.ml.normalization._
@@ -42,7 +42,7 @@ import com.linkedin.photon.ml.optimization.VarianceComputationType.VarianceCompu
 import com.linkedin.photon.ml.optimization.game._
 import com.linkedin.photon.ml.sampling.DownSamplerHelper
 import com.linkedin.photon.ml.spark.{BroadcastLike, RDDLike}
-import com.linkedin.photon.ml.supervised.classification.{LogisticRegressionModel, SmoothedHingeLossLinearSVMModel}
+import com.linkedin.photon.ml.supervised.classification.LogisticRegressionModel
 import com.linkedin.photon.ml.supervised.regression.{LinearRegressionModel, PoissonRegressionModel}
 import com.linkedin.photon.ml.util._
 
@@ -57,8 +57,8 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
   import GameEstimator._
 
   // 2 types that make the code more readable
-  type SingleNodeLossFunctionConstructor = PointwiseLossFunction => SingleNodeGLMLossFunction
-  type DistributedLossFunctionConstructor = PointwiseLossFunction => DistributedGLMLossFunction
+  type SingleNodeLossFunctionConstructor = PointwiseLossFunction => SingleNodeObjectiveFunction
+  type DistributedLossFunctionConstructor = PointwiseLossFunction => DistributedObjectiveFunction
 
   private implicit val parent: Identifiable = this
 
@@ -689,7 +689,7 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
         // Get default evaluators given the task type
         val taskType = getRequiredParam(trainingTask)
         val defaultEvaluator = taskType match {
-          case TaskType.LOGISTIC_REGRESSION | TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM => AreaUnderROCCurveEvaluator
+          case TaskType.LOGISTIC_REGRESSION => AreaUnderROCCurveEvaluator
           case TaskType.LINEAR_REGRESSION => RMSEEvaluator
           case TaskType.POISSON_REGRESSION => PoissonLossEvaluator
           case _ => throw new UnsupportedOperationException(s"$taskType is not a valid GAME training task")
@@ -755,7 +755,6 @@ class GameEstimator(val sc: SparkContext, implicit val logger: Logger) extends P
       case TaskType.LOGISTIC_REGRESSION => LogisticRegressionModel.apply _
       case TaskType.LINEAR_REGRESSION => LinearRegressionModel.apply _
       case TaskType.POISSON_REGRESSION => PoissonRegressionModel.apply _
-      case TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM => SmoothedHingeLossLinearSVMModel.apply _
       case _ => throw new Exception("Need to specify a valid loss function")
     }
     val downSamplerFactory = DownSamplerHelper.buildFactory(task)
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/evaluation/EvaluatorFactory.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/evaluation/EvaluatorFactory.scala
index 740efe79..35d82a0f 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/evaluation/EvaluatorFactory.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/evaluation/EvaluatorFactory.scala
@@ -47,8 +47,6 @@ object EvaluatorFactory {
 
       case LogisticLoss => LogisticLossEvaluator
 
-      case SmoothedHingeLoss => SmoothedHingeLossEvaluator
-
       case SquaredLoss => SquaredLossEvaluator
 
       case MultiPrecisionAtK(k, idTag) =>
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/evaluation/SmoothedHingeLossEvaluator.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/evaluation/SmoothedHingeLossEvaluator.scala
deleted file mode 100644
index dd3e36a4..00000000
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/evaluation/SmoothedHingeLossEvaluator.scala
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2017 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.evaluation
-
-import org.apache.spark.rdd.RDD
-
-import com.linkedin.photon.ml.function.svm.SmoothedHingeLossFunction
-
-/**
- * Evaluator for smoothed hinge loss.
- */
-object SmoothedHingeLossEvaluator extends SingleEvaluator {
-
-  val evaluatorType = EvaluatorType.SmoothedHingeLoss
-
-  /**
-   * Compute smoothed hinge loss for the given data.
-   *
-   * @param scoresAndLabelsAndWeights A [[RDD]] of scored data
-   * @return The smoothed hinge loss
-   */
-  override def evaluate(scoresAndLabelsAndWeights: RDD[(Double, Double, Double)]): Double =
-    scoresAndLabelsAndWeights
-      .map { case (score, label, weight) =>
-        weight * SmoothedHingeLossFunction.lossAndDzLoss(score, label)._1
-      }
-      .reduce(_ + _)
-}
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/DistributedObjectiveFunction.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/DistributedObjectiveFunction.scala
index 2574aea5..78b5bcf2 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/DistributedObjectiveFunction.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/function/DistributedObjectiveFunction.scala
@@ -14,21 +14,183 @@
  */
 package com.linkedin.photon.ml.function
 
+import breeze.linalg.{DenseMatrix, Vector}
 import org.apache.spark.rdd.RDD
 
 import com.linkedin.photon.ml.data.LabeledPoint
+import com.linkedin.photon.ml.function.glm.{HessianDiagonalAggregator, HessianMatrixAggregator, HessianVectorAggregator, PointwiseLossFunction, ValueAndGradientAggregator}
+import com.linkedin.photon.ml.model.{Coefficients => ModelCoefficients}
+import com.linkedin.photon.ml.normalization.NormalizationContext
+import com.linkedin.photon.ml.optimization.RegularizationType
+import com.linkedin.photon.ml.optimization.game.GLMOptimizationConfiguration
+import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
+import com.linkedin.photon.ml.util.BroadcastWrapper
 
 /**
- * The base objective function used by DistributedOptimizationProblems. This function works with an RDD of data
- * distributed across the cluster.
+ * This class is used to calculate the value, gradient, and Hessian of generalized linear models for distributed
+ * optimization problems. The loss function of a generalized linear model can all be expressed as:
  *
+ * L(w) = \sum_i l(z_i, y_i)
+ *
+ * with:
+ *
+ * z_i = w^T^ x_i.
+ *
+ * Different generalized linear models will have different l(z, y). The functionality of l(z, y) is provided by a
+ * [[PointwiseLossFunction]]. Since the loss function could change for different types of normalization, a normalization
+ * context object indicates which normalization strategy is used to evaluate the loss function.
+ *
+ * @param singlePointLossFunction A single loss function l(z, y) used for the generalized linear model
  * @param treeAggregateDepth The depth used by treeAggregate. Depth 1 indicates normal linear aggregate. Using
  *                           depth > 1 can reduce memory consumption in the Driver and may also speed up the
  *                           aggregation.
  */
-abstract class DistributedObjectiveFunction(treeAggregateDepth: Int) extends ObjectiveFunction {
+class DistributedObjectiveFunction private (
+    singlePointLossFunction: PointwiseLossFunction,
+    treeAggregateDepth: Int)
+  extends ObjectiveFunction(singlePointLossFunction)
+    with TwiceDiffFunction {
 
   type Data = RDD[LabeledPoint]
 
   require(treeAggregateDepth > 0, s"Tree aggregate depth must be greater than 0: $treeAggregateDepth")
+
+  /**
+   * Compute the value of the function over the given data for the given model coefficients.
+   *
+   * @param input The given data over which to compute the objective value
+   * @param coefficients The model coefficients used to compute the function's value
+   * @param normalizationContext The normalization context
+   * @return The computed value of the function
+   */
+  override protected[ml] def value(
+      input: RDD[LabeledPoint],
+      coefficients: Vector[Double],
+      normalizationContext: BroadcastWrapper[NormalizationContext]): Double =
+    calculate(input, coefficients, normalizationContext)._1
+
+  /**
+   * Compute the gradient of the function over the given data for the given model coefficients.
+   *
+   * @param input The given data over which to compute the gradient
+   * @param coefficients The model coefficients used to compute the function's gradient
+   * @param normalizationContext The normalization context
+   * @return The computed gradient of the function
+   */
+  override protected[ml] def gradient(
+      input: RDD[LabeledPoint],
+      coefficients: Vector[Double],
+      normalizationContext: BroadcastWrapper[NormalizationContext]): Vector[Double] =
+    calculate(input, coefficients, normalizationContext)._2
+
+  /**
+   * Compute both the value and the gradient of the function for the given model coefficients (computing value and
+   * gradient at once is sometimes more efficient than computing them sequentially).
+   *
+   * @param input The given data over which to compute the value and gradient
+   * @param coefficients The model coefficients used to compute the function's value and gradient
+   * @param normalizationContext The normalization context
+   * @return The computed value and gradient of the function
+   */
+  override protected[ml] def calculate(
+      input: RDD[LabeledPoint],
+      coefficients: Vector[Double],
+      normalizationContext: BroadcastWrapper[NormalizationContext]): (Double, Vector[Double]) =
+    ValueAndGradientAggregator.calculateValueAndGradient(
+      input,
+      coefficients,
+      singlePointLossFunction,
+      normalizationContext,
+      treeAggregateDepth)
+
+  /**
+   * Compute the Hessian matrix over the given data for the given model coefficients.
+   *
+   * @param input The given data over which to compute the diagonal of the Hessian matrix
+   * @param coefficients The model coefficients used to compute the diagonal of the Hessian matrix
+   * @return The computed Hessian matrix
+   */
+  override protected[ml] def hessianMatrix(input: RDD[LabeledPoint], coefficients: Vector[Double]): DenseMatrix[Double] =
+    HessianMatrixAggregator.calcHessianMatrix(input, coefficients, singlePointLossFunction, treeAggregateDepth)
+
+  /**
+   * Compute an approximation of the Hessian diagonal over the given data for the given model coefficients.
+   *
+   * @param input The given data over which to compute the diagonal of the Hessian matrix
+   * @param coefficients The model coefficients used to compute the diagonal of the Hessian matrix
+   * @return The computed diagonal of the Hessian matrix
+   */
+  override protected[ml] def hessianDiagonal(input: RDD[LabeledPoint], coefficients: Vector[Double]): Vector[Double] =
+    HessianDiagonalAggregator.calcHessianDiagonal(input, coefficients, singlePointLossFunction, treeAggregateDepth)
+
+  /**
+   * Compute the Hessian of the function over the given data for the given model coefficients.
+   *
+   * @param input The given data over which to compute the Hessian
+   * @param coefficients The model coefficients used to compute the function's hessian, multiplied by a given vector
+   * @param multiplyVector The given vector to be dot-multiplied with the Hessian. For example, in conjugate
+   *                       gradient method this would correspond to the gradient multiplyVector.
+   * @param normalizationContext The normalization context
+   * @return The computed Hessian multiplied by the given multiplyVector
+   */
+  override protected[ml] def hessianVector(
+      input: RDD[LabeledPoint],
+      coefficients: Vector[Double],
+      multiplyVector: Vector[Double],
+      normalizationContext: BroadcastWrapper[NormalizationContext]): Vector[Double] =
+    HessianVectorAggregator.calcHessianVector(
+      input,
+      coefficients,
+      multiplyVector,
+      singlePointLossFunction,
+      normalizationContext,
+      treeAggregateDepth)
+}
+
+object DistributedObjectiveFunction {
+
+  /**
+   * Factory method to create a new objective function with DistributedGLMLossFunctions as the base loss function.
+   *
+   * @param configuration The optimization problem configuration
+   * @param singleLossFunction The PointwiseLossFunction providing functionality for l(z, y)
+   * @param treeAggregateDepth The tree aggregation depth
+   * @param priorModelOpt Optional prior model, required if this is an objective function for incremental training
+   * @param interceptIndexOpt The index of the intercept, if there is one
+   * @return A new DistributedGLMLossFunction
+   */
+  def apply(
+      configuration: GLMOptimizationConfiguration,
+      singleLossFunction: PointwiseLossFunction,
+      treeAggregateDepth: Int,
+      priorModelOpt: Option[GeneralizedLinearModel] = None,
+      interceptIndexOpt: Option[Int] = None): DistributedObjectiveFunction = {
+
+    val regularizationContext = configuration.regularizationContext
+    val regularizationWeight = configuration.regularizationWeight
+
+    priorModelOpt match {
+      case None =>
+        regularizationContext.regularizationType match {
+          case RegularizationType.L2 | RegularizationType.ELASTIC_NET =>
+            new DistributedObjectiveFunction(singleLossFunction, treeAggregateDepth) with L2RegularizationTwiceDiff {
+              l2RegWeight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
+
+              override def interceptOpt: Option[Int] = interceptIndexOpt
+            }
+
+          case _ => new DistributedObjectiveFunction(singleLossFunction, treeAggregateDepth)
+        }
+
+      case Some(priorModel) =>
+        val l2Weight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
+        val priorModelCoefficients = priorModel.coefficients
+
+        new DistributedObjectiveFunction(singleLossFunction, treeAggregateDepth) with PriorDistributionTwiceDiff {
+          override val priorCoefficients: ModelCoefficients = priorModelCoefficients
+          l2RegWeight = l2Weight
+          incrementalWeight = configuration.incrementalWeight.getOrElse(1.0D)
+        }
+    }
+  }
 }
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/ObjectiveFunctionHelper.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/ObjectiveFunctionHelper.scala
index 08537d6c..34c4b8a9 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/ObjectiveFunctionHelper.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/function/ObjectiveFunctionHelper.scala
@@ -17,9 +17,8 @@ package com.linkedin.photon.ml.function
 import com.linkedin.photon.ml.TaskType
 import com.linkedin.photon.ml.TaskType.TaskType
 import com.linkedin.photon.ml.algorithm.Coordinate
-import com.linkedin.photon.ml.function.glm.{GLMLossFunction, LogisticLossFunction, PoissonLossFunction, SquaredLossFunction}
-import com.linkedin.photon.ml.function.svm.SmoothedHingeLossFunction
-import com.linkedin.photon.ml.optimization.game.CoordinateOptimizationConfiguration
+import com.linkedin.photon.ml.function.glm.{LogisticLossFunction, PointwiseLossFunction, PoissonLossFunction, SquaredLossFunction}
+import com.linkedin.photon.ml.optimization.game.{CoordinateOptimizationConfiguration, FixedEffectOptimizationConfiguration, RandomEffectOptimizationConfiguration}
 import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
 
 /**
@@ -41,10 +40,34 @@ object ObjectiveFunctionHelper {
    */
   def buildFactory(taskType: TaskType, treeAggregateDepth: Int): ObjectiveFunctionFactoryFactory =
     taskType match {
-      case TaskType.LOGISTIC_REGRESSION => GLMLossFunction.buildFactory(LogisticLossFunction, treeAggregateDepth)
-      case TaskType.LINEAR_REGRESSION => GLMLossFunction.buildFactory(SquaredLossFunction, treeAggregateDepth)
-      case TaskType.POISSON_REGRESSION => GLMLossFunction.buildFactory(PoissonLossFunction, treeAggregateDepth)
-      case TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM => SmoothedHingeLossFunction.buildFactory(treeAggregateDepth)
+      case TaskType.LOGISTIC_REGRESSION => factoryHelper(LogisticLossFunction, treeAggregateDepth)
+      case TaskType.LINEAR_REGRESSION => factoryHelper(SquaredLossFunction, treeAggregateDepth)
+      case TaskType.POISSON_REGRESSION => factoryHelper(PoissonLossFunction, treeAggregateDepth)
       case _ => throw new IllegalArgumentException(s"Unknown optimization task type: $taskType")
     }
+
+  /**
+   * Construct a factory function for building distributed and non-distributed generalized linear model loss functions.
+   *
+   * @param lossFunction A [[PointwiseLossFunction]] for training a generalized linear model
+   * @param treeAggregateDepth The tree-aggregate depth to use during aggregation
+   * @return A function which builds the appropriate type of [[ObjectiveFunction]] for a given [[Coordinate]] type and
+   *         optimization settings.
+   */
+  private def factoryHelper
+      (lossFunction: PointwiseLossFunction, treeAggregateDepth: Int)
+      (config: CoordinateOptimizationConfiguration): (Option[GeneralizedLinearModel], Option[Int]) => ObjectiveFunction =
+    config match {
+      case fEOptConfig: FixedEffectOptimizationConfiguration =>
+        (priorModelOpt: Option[GeneralizedLinearModel], interceptIndexOpt: Option[Int]) =>
+          DistributedObjectiveFunction(fEOptConfig, lossFunction, treeAggregateDepth, priorModelOpt,  interceptIndexOpt)
+
+      case rEOptConfig: RandomEffectOptimizationConfiguration =>
+        (priorModelOpt: Option[GeneralizedLinearModel], interceptIndexOpt: Option[Int]) =>
+          SingleNodeObjectiveFunction(rEOptConfig, lossFunction, priorModelOpt, interceptIndexOpt)
+
+      case _ =>
+        throw new UnsupportedOperationException(
+          s"Cannot create a GLM loss function from a coordinate configuration with class '${config.getClass.getName}'")
+    }
 }
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/SingleNodeObjectiveFunction.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/SingleNodeObjectiveFunction.scala
index 28d1d18b..2433c2f8 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/SingleNodeObjectiveFunction.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/function/SingleNodeObjectiveFunction.scala
@@ -14,13 +14,175 @@
  */
 package com.linkedin.photon.ml.function
 
+import breeze.linalg.{DenseMatrix, Vector}
+
 import com.linkedin.photon.ml.data.LabeledPoint
+import com.linkedin.photon.ml.function.glm.{HessianDiagonalAggregator, HessianMatrixAggregator, HessianVectorAggregator, PointwiseLossFunction, ValueAndGradientAggregator}
+import com.linkedin.photon.ml.model.{Coefficients => ModelCoefficients}
+import com.linkedin.photon.ml.normalization.NormalizationContext
+import com.linkedin.photon.ml.optimization.RegularizationType
+import com.linkedin.photon.ml.optimization.game.GLMOptimizationConfiguration
+import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
+import com.linkedin.photon.ml.util.BroadcastWrapper
 
 /**
- * The base objective function used by SingleNodeOptimizationProblems. This function works with data locally as part of
- * a single task (on a single executor).
+ * This class is used to calculate the value, gradient, and Hessian of generalized linear models for individual
+ * optimization problems. The loss function of a generalized linear model can all be expressed as:
+ *
+ * L(w) = \sum_i l(z_i, y_i)
+ *
+ * with:
+ *
+ * z_i = w^T^ x_i.
+ *
+ * Different generalized linear models will have different l(z, y). The functionality of l(z, y) is provided by a
+ * [[PointwiseLossFunction]]. Since the loss function could change for different types of normalization, a normalization
+ * context object indicates which normalization strategy is used to evaluate the loss function.
+ *
+ * @param singlePointLossFunction A single loss function l(z, y) used for the generalized linear model
  */
-abstract class SingleNodeObjectiveFunction extends ObjectiveFunction with Serializable {
+protected[ml] class SingleNodeObjectiveFunction private (singlePointLossFunction: PointwiseLossFunction)
+  extends ObjectiveFunction(singlePointLossFunction)
+    with TwiceDiffFunction {
 
   type Data = Iterable[LabeledPoint]
+
+  /**
+   * Compute the value of the function over the given data for the given model coefficients.
+   *
+   * @param input The given data over which to compute the objective value
+   * @param coefficients The model coefficients used to compute the function's value
+   * @param normalizationContext The normalization context
+   * @return The computed value of the function
+   */
+  override protected[ml] def value(
+      input: Iterable[LabeledPoint],
+      coefficients: Vector[Double],
+      normalizationContext: BroadcastWrapper[NormalizationContext]): Double =
+    calculate(input, coefficients, normalizationContext)._1
+
+  /**
+   * Compute the gradient of the function over the given data for the given model coefficients.
+   *
+   * @param input The given data over which to compute the gradient
+   * @param coefficients The model coefficients used to compute the function's gradient
+   * @param normalizationContext The normalization context
+   * @return The computed gradient of the function
+   */
+  override protected[ml] def gradient(
+      input: Iterable[LabeledPoint],
+      coefficients: Vector[Double],
+      normalizationContext: BroadcastWrapper[NormalizationContext]): Vector[Double] =
+    calculate(input, coefficients, normalizationContext)._2
+
+  /**
+   * Compute both the value and the gradient of the function for the given model coefficients (computing value and
+   * gradient at once is sometimes more efficient than computing them sequentially).
+   *
+   * @param input The given data over which to compute the value and gradient
+   * @param coefficients The model coefficients used to compute the function's value and gradient
+   * @param normalizationContext The normalization context
+   * @return The computed value and gradient of the function
+   */
+  override protected[ml] def calculate(
+      input: Iterable[LabeledPoint],
+      coefficients: Vector[Double],
+      normalizationContext: BroadcastWrapper[NormalizationContext]): (Double, Vector[Double]) =
+    ValueAndGradientAggregator.calculateValueAndGradient(
+      input,
+      coefficients,
+      singlePointLossFunction,
+      normalizationContext)
+
+  /**
+   * Compute the Hessian matrix over the given data for the given model coefficients.
+   *
+   * @param input The given data over which to compute the diagonal of the Hessian matrix
+   * @param coefficients The model coefficients used to compute the diagonal of the Hessian matrix
+   * @return The computed Hessian matrix
+   */
+  override protected[ml] def hessianMatrix(
+      input: Iterable[LabeledPoint],
+      coefficients: Vector[Double]): DenseMatrix[Double] =
+    HessianMatrixAggregator.calcHessianMatrix(input, coefficients, singlePointLossFunction)
+
+  /**
+   * Compute an approximation of the Hessian diagonal over the given data for the given model coefficients.
+   *
+   * @param input The given data over which to compute the diagonal of the Hessian matrix
+   * @param coefficients The model coefficients used to compute the diagonal of the Hessian matrix
+   * @return The computed diagonal of the Hessian matrix
+   */
+  override protected[ml] def hessianDiagonal(
+      input: Iterable[LabeledPoint],
+      coefficients: Vector[Double]): Vector[Double] =
+    HessianDiagonalAggregator.calcHessianDiagonal(input, coefficients, singlePointLossFunction)
+
+  /**
+   * Compute the Hessian of the function over the given data for the given model coefficients.
+   *
+   * @param input The given data over which to compute the Hessian
+   * @param coefficients The model coefficients used to compute the function's hessian, multiplied by a given vector
+   * @param multiplyVector The given vector to be dot-multiplied with the Hessian. For example, in conjugate
+   *                       gradient method this would correspond to the gradient multiplyVector.
+   * @param normalizationContext The normalization context
+   * @return The computed Hessian multiplied by the given multiplyVector
+   */
+  override protected[ml] def hessianVector(
+      input: Iterable[LabeledPoint],
+      coefficients: Vector[Double],
+      multiplyVector: Vector[Double],
+      normalizationContext: BroadcastWrapper[NormalizationContext]): Vector[Double] =
+    HessianVectorAggregator.calcHessianVector(
+      input,
+      coefficients,
+      multiplyVector,
+      singlePointLossFunction,
+      normalizationContext)
+}
+
+object SingleNodeObjectiveFunction {
+
+  /**
+   * Factory method to create a new objective function with SingleNodeGLMLossFunctions as the base loss function.
+   *
+   * @param configuration The optimization problem configuration
+   * @param singleLossFunction The PointwiseLossFunction providing functionality for l(z, y)
+   * @param priorModelOpt Optional prior model, required if this is an objective function for incremental training
+   * @param interceptIndexOpt The index of the intercept, if there is one
+   * @return A new SingleNodeGLMLossFunction
+   */
+  def apply(
+      configuration: GLMOptimizationConfiguration,
+      singleLossFunction: PointwiseLossFunction,
+      priorModelOpt: Option[GeneralizedLinearModel] = None,
+      interceptIndexOpt: Option[Int] = None): SingleNodeObjectiveFunction = {
+
+    val regularizationContext = configuration.regularizationContext
+    val regularizationWeight = configuration.regularizationWeight
+
+    priorModelOpt match {
+      case None =>
+        regularizationContext.regularizationType match {
+          case RegularizationType.L2 | RegularizationType.ELASTIC_NET =>
+            new SingleNodeObjectiveFunction(singleLossFunction) with L2RegularizationTwiceDiff {
+              l2RegWeight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
+
+              override def interceptOpt: Option[Int] = interceptIndexOpt
+            }
+
+          case _ => new SingleNodeObjectiveFunction(singleLossFunction)
+        }
+
+      case Some(priorModel) =>
+        val l2Weight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
+        val priorModelCoefficients = priorModel.coefficients
+
+        new SingleNodeObjectiveFunction(singleLossFunction) with PriorDistributionTwiceDiff {
+          override val priorCoefficients: ModelCoefficients = priorModelCoefficients
+          l2RegWeight = l2Weight
+          incrementalWeight = configuration.incrementalWeight.getOrElse(1.0D)
+        }
+    }
+  }
 }
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/DistributedGLMLossFunction.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/DistributedGLMLossFunction.scala
deleted file mode 100644
index 9a8f2a1b..00000000
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/DistributedGLMLossFunction.scala
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright 2017 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.function.glm
-
-import breeze.linalg._
-import org.apache.spark.rdd.RDD
-
-import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.function._
-import com.linkedin.photon.ml.model.{Coefficients => ModelCoefficients}
-import com.linkedin.photon.ml.normalization.NormalizationContext
-import com.linkedin.photon.ml.optimization.RegularizationType
-import com.linkedin.photon.ml.optimization.game.GLMOptimizationConfiguration
-import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
-import com.linkedin.photon.ml.util.BroadcastWrapper
-
-/**
- * This class is used to calculate the value, gradient, and Hessian of generalized linear models for distributed
- * optimization problems. The loss function of a generalized linear model can all be expressed as:
- *
- * L(w) = \sum_i l(z_i, y_i)
- *
- * with:
- *
- * z_i = w^T^ x_i.
- *
- * Different generalized linear models will have different l(z, y). The functionality of l(z, y) is provided by a
- * [[PointwiseLossFunction]]. Since the loss function could change for different types of normalization, a normalization
- * context object indicates which normalization strategy is used to evaluate the loss function.
- *
- * @param singlePointLossFunction A single loss function l(z, y) used for the generalized linear model
- * @param treeAggregateDepth The depth used by treeAggregate. Depth 1 indicates normal linear aggregate. Using
- *                           depth > 1 can reduce memory consumption in the Driver and may also speed up the
- *                           aggregation. It is experimental currently because treeAggregate is unstable in Spark
- *                           versions 1.4 and 1.5.
- */
-protected[ml] class DistributedGLMLossFunction private (
-    singlePointLossFunction: PointwiseLossFunction,
-    treeAggregateDepth: Int)
-  extends DistributedObjectiveFunction(treeAggregateDepth)
-  with TwiceDiffFunction {
-
-  /**
-   * Compute the value of the function over the given data for the given model coefficients.
-   *
-   * @param input The given data over which to compute the objective value
-   * @param coefficients The model coefficients used to compute the function's value
-   * @param normalizationContext The normalization context
-   * @return The computed value of the function
-   */
-  override protected[ml] def value(
-      input: RDD[LabeledPoint],
-      coefficients: Vector[Double],
-      normalizationContext: BroadcastWrapper[NormalizationContext]): Double =
-    calculate(input, coefficients, normalizationContext)._1
-
-  /**
-   * Compute the gradient of the function over the given data for the given model coefficients.
-   *
-   * @param input The given data over which to compute the gradient
-   * @param coefficients The model coefficients used to compute the function's gradient
-   * @param normalizationContext The normalization context
-   * @return The computed gradient of the function
-   */
-  override protected[ml] def gradient(
-      input: RDD[LabeledPoint],
-      coefficients: Vector[Double],
-      normalizationContext: BroadcastWrapper[NormalizationContext]): Vector[Double] =
-    calculate(input, coefficients, normalizationContext)._2
-
-  /**
-   * Compute both the value and the gradient of the function for the given model coefficients (computing value and
-   * gradient at once is sometimes more efficient than computing them sequentially).
-   *
-   * @param input The given data over which to compute the value and gradient
-   * @param coefficients The model coefficients used to compute the function's value and gradient
-   * @param normalizationContext The normalization context
-   * @return The computed value and gradient of the function
-   */
-  override protected[ml] def calculate(
-      input: RDD[LabeledPoint],
-      coefficients: Vector[Double],
-      normalizationContext: BroadcastWrapper[NormalizationContext]): (Double, Vector[Double]) =
-    ValueAndGradientAggregator.calculateValueAndGradient(
-      input,
-      coefficients,
-      singlePointLossFunction,
-      normalizationContext,
-      treeAggregateDepth)
-
-  /**
-   * Compute the Hessian matrix over the given data for the given model coefficients.
-   *
-   * @param input The given data over which to compute the diagonal of the Hessian matrix
-   * @param coefficients The model coefficients used to compute the diagonal of the Hessian matrix
-   * @return The computed Hessian matrix
-   */
-  override protected[ml] def hessianMatrix(input: RDD[LabeledPoint], coefficients: Vector[Double]): DenseMatrix[Double] =
-    HessianMatrixAggregator.calcHessianMatrix(input, coefficients, singlePointLossFunction, treeAggregateDepth)
-
-  /**
-   * Compute an approximation of the Hessian diagonal over the given data for the given model coefficients.
-   *
-   * @param input The given data over which to compute the diagonal of the Hessian matrix
-   * @param coefficients The model coefficients used to compute the diagonal of the Hessian matrix
-   * @return The computed diagonal of the Hessian matrix
-   */
-  override protected[ml] def hessianDiagonal(input: RDD[LabeledPoint], coefficients: Vector[Double]): Vector[Double] =
-    HessianDiagonalAggregator.calcHessianDiagonal(input, coefficients, singlePointLossFunction, treeAggregateDepth)
-
-  /**
-   * Compute the Hessian of the function over the given data for the given model coefficients.
-   *
-   * @param input The given data over which to compute the Hessian
-   * @param coefficients The model coefficients used to compute the function's hessian, multiplied by a given vector
-   * @param multiplyVector The given vector to be dot-multiplied with the Hessian. For example, in conjugate
-   *                       gradient method this would correspond to the gradient multiplyVector.
-   * @param normalizationContext The normalization context
-   * @return The computed Hessian multiplied by the given multiplyVector
-   */
-  override protected[ml] def hessianVector(
-      input: RDD[LabeledPoint],
-      coefficients: Vector[Double],
-      multiplyVector: Vector[Double],
-      normalizationContext: BroadcastWrapper[NormalizationContext]): Vector[Double] =
-    HessianVectorAggregator.calcHessianVector(
-      input,
-      coefficients,
-      multiplyVector,
-      singlePointLossFunction,
-      normalizationContext,
-      treeAggregateDepth)
-}
-
-object DistributedGLMLossFunction {
-
-  /**
-   * Factory method to create a new objective function with DistributedGLMLossFunctions as the base loss function.
-   *
-   * @param configuration The optimization problem configuration
-   * @param singleLossFunction The PointwiseLossFunction providing functionality for l(z, y)
-   * @param treeAggregateDepth The tree aggregation depth
-   * @param priorModelOpt Optional prior model, required if this is an objective function for incremental training
-   * @param interceptIndexOpt The index of the intercept, if there is one
-   * @return A new DistributedGLMLossFunction
-   */
-  def apply(
-      configuration: GLMOptimizationConfiguration,
-      singleLossFunction: PointwiseLossFunction,
-      treeAggregateDepth: Int,
-      priorModelOpt: Option[GeneralizedLinearModel] = None,
-      interceptIndexOpt: Option[Int] = None): DistributedGLMLossFunction = {
-
-    val regularizationContext = configuration.regularizationContext
-    val regularizationWeight = configuration.regularizationWeight
-
-    priorModelOpt match {
-      case None =>
-        regularizationContext.regularizationType match {
-          case RegularizationType.L2 | RegularizationType.ELASTIC_NET =>
-            new DistributedGLMLossFunction(singleLossFunction, treeAggregateDepth)
-              with L2RegularizationTwiceDiff {
-
-                l2RegWeight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
-
-                override def interceptOpt: Option[Int] = interceptIndexOpt
-              }
-
-          case _ => new DistributedGLMLossFunction(singleLossFunction, treeAggregateDepth)
-        }
-
-      case Some(priorModel) =>
-        val l2Weight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
-        val priorModelCoefficients = priorModel.coefficients
-
-        new DistributedGLMLossFunction(singleLossFunction, treeAggregateDepth) with PriorDistributionTwiceDiff {
-          override val priorCoefficients: ModelCoefficients = priorModelCoefficients
-          l2RegWeight = l2Weight
-          incrementalWeight = configuration.incrementalWeight.getOrElse(1.0D)
-        }
-    }
-  }
-}
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/GLMLossFunction.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/GLMLossFunction.scala
deleted file mode 100644
index 22c8702e..00000000
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/GLMLossFunction.scala
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright 2017 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.function.glm
-
-import com.linkedin.photon.ml.algorithm.Coordinate
-import com.linkedin.photon.ml.function.ObjectiveFunction
-import com.linkedin.photon.ml.optimization.game.{CoordinateOptimizationConfiguration, FixedEffectOptimizationConfiguration, RandomEffectOptimizationConfiguration}
-import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
-
-/**
- * Helper for generalized linear model loss function related tasks.
- */
-object GLMLossFunction {
-
-  /**
-   * Construct a factory function for building distributed and non-distributed generalized linear model loss functions.
-   *
-   * @param lossFunction A [[PointwiseLossFunction]] for training a generalized linear model
-   * @param treeAggregateDepth The tree-aggregate depth to use during aggregation
-   * @param config Optimization problem configuration
-   * @return A function which builds the appropriate type of [[ObjectiveFunction]] for a given [[Coordinate]] type and
-   *         optimization settings.
-   */
-  def buildFactory(
-      lossFunction: PointwiseLossFunction,
-      treeAggregateDepth: Int)(
-      config: CoordinateOptimizationConfiguration): (Option[GeneralizedLinearModel], Option[Int]) => ObjectiveFunction =
-    config match {
-      case fEOptConfig: FixedEffectOptimizationConfiguration =>
-        (generalizedLinearModelOpt: Option[GeneralizedLinearModel], interceptIndexOpt: Option[Int]) =>
-          DistributedGLMLossFunction(
-            fEOptConfig,
-            lossFunction,
-            treeAggregateDepth,
-            generalizedLinearModelOpt,
-            interceptIndexOpt)
-
-      case rEOptConfig: RandomEffectOptimizationConfiguration =>
-        (generalizedLinearModelOpt: Option[GeneralizedLinearModel], interceptIndexOpt: Option[Int]) =>
-          SingleNodeGLMLossFunction(
-            rEOptConfig,
-            lossFunction,
-            generalizedLinearModelOpt,
-            interceptIndexOpt)
-
-      case _ =>
-        throw new UnsupportedOperationException(
-          s"Cannot create a GLM loss function from a coordinate configuration with class '${config.getClass.getName}'")
-    }
-}
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/PoissonLossFunction.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/PoissonLossFunction.scala
index 278d0560..412d6e53 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/PoissonLossFunction.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/PoissonLossFunction.scala
@@ -32,6 +32,7 @@ object PoissonLossFunction extends PointwiseLossFunction {
 
   /**
    * l(z, y) = exp(z) - y * z
+   *
    * dl/dz   = exp(z) - y
    *
    * @param margin The margin, i.e. z in l(z, y)
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunction.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunction.scala
deleted file mode 100644
index 23966c53..00000000
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunction.scala
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * Copyright 2017 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.function.glm
-
-import breeze.linalg._
-
-import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.function._
-import com.linkedin.photon.ml.model.{Coefficients => ModelCoefficients}
-import com.linkedin.photon.ml.normalization.NormalizationContext
-import com.linkedin.photon.ml.optimization.RegularizationType
-import com.linkedin.photon.ml.optimization.game.GLMOptimizationConfiguration
-import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
-import com.linkedin.photon.ml.util.BroadcastWrapper
-
-/**
- * This class is used to calculate the value, gradient, and Hessian of generalized linear models for individual
- * optimization problems. The loss function of a generalized linear model can all be expressed as:
- *
- * L(w) = \sum_i l(z_i, y_i)
- *
- * with:
- *
- * z_i = w^T^ x_i.
- *
- * Different generalized linear models will have different l(z, y). The functionality of l(z, y) is provided by a
- * [[PointwiseLossFunction]]. Since the loss function could change for different types of normalization, a normalization
- * context object indicates which normalization strategy is used to evaluate the loss function.
- *
- * @param singlePointLossFunction A single loss function l(z, y) used for the generalized linear model
- */
-protected[ml] class SingleNodeGLMLossFunction private (singlePointLossFunction: PointwiseLossFunction)
-  extends SingleNodeObjectiveFunction
-  with TwiceDiffFunction {
-
-  /**
-   * Compute the value of the function over the given data for the given model coefficients.
-   *
-   * @param input The given data over which to compute the objective value
-   * @param coefficients The model coefficients used to compute the function's value
-   * @param normalizationContext The normalization context
-   * @return The computed value of the function
-   */
-  override protected[ml] def value(
-      input: Iterable[LabeledPoint],
-      coefficients: Vector[Double],
-      normalizationContext: BroadcastWrapper[NormalizationContext]): Double =
-    calculate(input, coefficients, normalizationContext)._1
-
-  /**
-   * Compute the gradient of the function over the given data for the given model coefficients.
-   *
-   * @param input The given data over which to compute the gradient
-   * @param coefficients The model coefficients used to compute the function's gradient
-   * @param normalizationContext The normalization context
-   * @return The computed gradient of the function
-   */
-  override protected[ml] def gradient(
-      input: Iterable[LabeledPoint],
-      coefficients: Vector[Double],
-      normalizationContext: BroadcastWrapper[NormalizationContext]): Vector[Double] =
-    calculate(input, coefficients, normalizationContext)._2
-
-  /**
-   * Compute both the value and the gradient of the function for the given model coefficients (computing value and
-   * gradient at once is sometimes more efficient than computing them sequentially).
-   *
-   * @param input The given data over which to compute the value and gradient
-   * @param coefficients The model coefficients used to compute the function's value and gradient
-   * @param normalizationContext The normalization context
-   * @return The computed value and gradient of the function
-   */
-  override protected[ml] def calculate(
-      input: Iterable[LabeledPoint],
-      coefficients: Vector[Double],
-      normalizationContext: BroadcastWrapper[NormalizationContext]): (Double, Vector[Double]) =
-    ValueAndGradientAggregator.calculateValueAndGradient(
-      input,
-      coefficients,
-      singlePointLossFunction,
-      normalizationContext)
-
-  /**
-   * Compute the Hessian matrix over the given data for the given model coefficients.
-   *
-   * @param input The given data over which to compute the diagonal of the Hessian matrix
-   * @param coefficients The model coefficients used to compute the diagonal of the Hessian matrix
-   * @return The computed Hessian matrix
-   */
-  override protected[ml] def hessianMatrix(
-      input: Iterable[LabeledPoint],
-      coefficients: Vector[Double]): DenseMatrix[Double] =
-    HessianMatrixAggregator.calcHessianMatrix(input, coefficients, singlePointLossFunction)
-
-  /**
-   * Compute an approximation of the Hessian diagonal over the given data for the given model coefficients.
-   *
-   * @param input The given data over which to compute the diagonal of the Hessian matrix
-   * @param coefficients The model coefficients used to compute the diagonal of the Hessian matrix
-   * @return The computed diagonal of the Hessian matrix
-   */
-  override protected[ml] def hessianDiagonal(
-      input: Iterable[LabeledPoint],
-      coefficients: Vector[Double]): Vector[Double] =
-    HessianDiagonalAggregator.calcHessianDiagonal(input, coefficients, singlePointLossFunction)
-
-  /**
-   * Compute the Hessian of the function over the given data for the given model coefficients.
-   *
-   * @param input The given data over which to compute the Hessian
-   * @param coefficients The model coefficients used to compute the function's hessian, multiplied by a given vector
-   * @param multiplyVector The given vector to be dot-multiplied with the Hessian. For example, in conjugate
-   *                       gradient method this would correspond to the gradient multiplyVector.
-   * @param normalizationContext The normalization context
-   * @return The computed Hessian multiplied by the given multiplyVector
-   */
-  override protected[ml] def hessianVector(
-      input: Iterable[LabeledPoint],
-      coefficients: Vector[Double],
-      multiplyVector: Vector[Double],
-      normalizationContext: BroadcastWrapper[NormalizationContext]): Vector[Double] =
-    HessianVectorAggregator.calcHessianVector(
-      input,
-      coefficients,
-      multiplyVector,
-      singlePointLossFunction,
-      normalizationContext)
-}
-
-object SingleNodeGLMLossFunction {
-
-  /**
-   * Factory method to create a new objective function with SingleNodeGLMLossFunctions as the base loss function.
-   *
-   * @param configuration The optimization problem configuration
-   * @param singleLossFunction The PointwiseLossFunction providing functionality for l(z, y)
-   * @param priorModelOpt Optional prior model, required if this is an objective function for incremental training
-   * @param interceptIndexOpt The index of the intercept, if there is one
-   * @return A new SingleNodeGLMLossFunction
-   */
-  def apply(
-      configuration: GLMOptimizationConfiguration,
-      singleLossFunction: PointwiseLossFunction,
-      priorModelOpt: Option[GeneralizedLinearModel] = None,
-      interceptIndexOpt: Option[Int] = None): SingleNodeGLMLossFunction = {
-
-    val regularizationContext = configuration.regularizationContext
-    val regularizationWeight = configuration.regularizationWeight
-
-    priorModelOpt match {
-      case Some(priorModel) =>
-        val l2Weight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
-        val priorModelCoefficients = priorModel.coefficients
-
-        new SingleNodeGLMLossFunction(singleLossFunction) with PriorDistributionTwiceDiff {
-          override val priorCoefficients: ModelCoefficients = priorModelCoefficients
-          l2RegWeight = l2Weight
-          incrementalWeight = configuration.incrementalWeight.getOrElse(1.0D)
-        }
-
-      case None =>
-        regularizationContext.regularizationType match {
-          case RegularizationType.L2 | RegularizationType.ELASTIC_NET =>
-            new SingleNodeGLMLossFunction(singleLossFunction) with L2RegularizationTwiceDiff {
-
-              l2RegWeight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
-
-              override def interceptOpt: Option[Int] = interceptIndexOpt
-            }
-
-          case _ => new SingleNodeGLMLossFunction(singleLossFunction)
-        }
-    }
-  }
-}
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/svm/DistributedSmoothedHingeLossFunction.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/svm/DistributedSmoothedHingeLossFunction.scala
deleted file mode 100644
index e29222c5..00000000
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/svm/DistributedSmoothedHingeLossFunction.scala
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright 2017 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.function.svm
-
-import breeze.linalg.Vector
-import org.apache.spark.rdd.RDD
-
-import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.function.{DiffFunction, DistributedObjectiveFunction, L2RegularizationDiff}
-import com.linkedin.photon.ml.normalization.NormalizationContext
-import com.linkedin.photon.ml.optimization.RegularizationType
-import com.linkedin.photon.ml.optimization.game.GLMOptimizationConfiguration
-import com.linkedin.photon.ml.util.{BroadcastWrapper, VectorUtils}
-
-/**
- * This class is used to calculate the value and gradient of Rennie's smoothed hinge loss function, as an approximation
- * of a linear SVM, for distributed optimization problems.
- *
- * FAQ: Why use cumGradient (cumulative gradient)?
- * A:   Using cumGradient allows the functions to avoid memory allocation by modifying and returning cumGradient instead
- *      of creating a new gradient vector.
- *
- * @param treeAggregateDepth The depth used by treeAggregate. Depth 1 indicates normal linear aggregate. Using
- *                           depth > 1 can reduce memory consumption in the Driver and may also speed up the
- *                           aggregation. It is experimental currently because treeAggregate is unstable in Spark
- *                           versions 1.4 and 1.5.
- */
-protected[ml] class DistributedSmoothedHingeLossFunction(treeAggregateDepth: Int)
-  extends DistributedObjectiveFunction(treeAggregateDepth)
-  with DiffFunction {
-
-  /**
-   * Compute the value of the function over the given data for the given model coefficients.
-   *
-   * @param input The given data over which to compute the objective value
-   * @param coefficients The model coefficients used to compute the function's value
-   * @param normalizationContext The normalization context
-   * @return The computed value of the function
-   */
-  override protected[ml] def value(
-      input: RDD[LabeledPoint],
-      coefficients: Vector[Double],
-      normalizationContext: BroadcastWrapper[NormalizationContext]): Double =
-    calculate(input, coefficients, normalizationContext)._1
-
-  /**
-   * Compute the gradient of the function over the given data for the given model coefficients.
-   *
-   * @param input The given data over which to compute the gradient
-   * @param coefficients The model coefficients used to compute the function's gradient
-   * @param normalizationContext The normalization context
-   * @return The computed gradient of the function
-   */
-  override protected[ml] def gradient(
-      input: RDD[LabeledPoint],
-      coefficients: Vector[Double],
-      normalizationContext: BroadcastWrapper[NormalizationContext]): Vector[Double] =
-    calculate(input, coefficients, normalizationContext)._2
-
-  /**
-   * Compute both the value and the gradient of the function for the given model coefficients (computing value and
-   * gradient at once is sometimes more efficient than computing them sequentially).
-   *
-   * @param input The given data over which to compute the value and gradient
-   * @param coefficients The model coefficients used to compute the function's value and gradient
-   * @param normalizationContext The normalization context
-   * @return The computed value and gradient of the function
-   */
-  override protected[ml] def calculate(
-      input: RDD[LabeledPoint],
-      coefficients: Vector[Double],
-      normalizationContext: BroadcastWrapper[NormalizationContext]): (Double, Vector[Double]) = {
-
-    val initialCumGradient = VectorUtils.zeroOfSameType(coefficients)
-    val result = input.treeAggregate((0.0, initialCumGradient))(
-      seqOp = {
-        case ((loss, cumGradient), datum) =>
-          val v = SmoothedHingeLossFunction.calculateAt(datum, coefficients, cumGradient)
-          (loss + v, cumGradient)
-      },
-      combOp = {
-        case ((loss1, grad1), (loss2, grad2)) =>
-          (loss1 + loss2, grad1 += grad2)
-      },
-      treeAggregateDepth)
-
-    result
-  }
-}
-
-object DistributedSmoothedHingeLossFunction {
-
-  /**
-   * Factory method to create a new objective function with DistributedSmoothedHingeLossFunction as the base loss
-   * function.
-   *
-   * @param configuration The optimization problem configuration
-   * @param treeAggregateDepth The tree aggregation depth
-   * @param interceptIndexOpt The index of the intercept, if there is one
-   * @return A new DistributedSmoothedHingeLossFunction
-   */
-  def apply(
-      configuration: GLMOptimizationConfiguration,
-      treeAggregateDepth: Int,
-      interceptIndexOpt: Option[Int] = None): DistributedSmoothedHingeLossFunction = {
-
-    val regularizationContext = configuration.regularizationContext
-
-    regularizationContext.regularizationType match {
-      case RegularizationType.L2 | RegularizationType.ELASTIC_NET =>
-        new DistributedSmoothedHingeLossFunction(treeAggregateDepth) with L2RegularizationDiff {
-          l2RegWeight = regularizationContext.getL2RegularizationWeight(configuration.regularizationWeight)
-
-          override def interceptOpt: Option[Int] = interceptIndexOpt
-        }
-
-      case _ => new DistributedSmoothedHingeLossFunction(treeAggregateDepth)
-    }
-  }
-}
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/svm/SingleNodeSmoothedHingeLossFunction.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/svm/SingleNodeSmoothedHingeLossFunction.scala
deleted file mode 100644
index 75dee4bb..00000000
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/svm/SingleNodeSmoothedHingeLossFunction.scala
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright 2017 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.function.svm
-
-import breeze.linalg.Vector
-
-import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.function.{DiffFunction, L2RegularizationDiff, SingleNodeObjectiveFunction}
-import com.linkedin.photon.ml.normalization.NormalizationContext
-import com.linkedin.photon.ml.optimization.RegularizationType
-import com.linkedin.photon.ml.optimization.game.GLMOptimizationConfiguration
-import com.linkedin.photon.ml.util.{BroadcastWrapper, VectorUtils}
-
-/**
- * This class is used to calculate the value and gradient of Rennie's smoothed hinge loss function, as an approximation
- * of a linear SVM, for individual optimization problems.
- *
- * FAQ: Why use cumGradient (cumulative gradient)?
- * A:   Using cumGradient allows the functions to avoid memory allocation by modifying and returning cumGradient instead
- *      of creating a new gradient vector.
- */
-protected[ml] class SingleNodeSmoothedHingeLossFunction extends SingleNodeObjectiveFunction with DiffFunction {
-
-  /**
-   * Compute the value of the function over the given data for the given model coefficients.
-   *
-   * @param input The given data over which to compute the objective value
-   * @param coefficients The model coefficients used to compute the function's value
-   * @param normalizationContext The normalization context
-   * @return The computed value of the function
-   */
-  override protected[ml] def value(
-      input: Iterable[LabeledPoint],
-      coefficients: Vector[Double],
-      normalizationContext: BroadcastWrapper[NormalizationContext]): Double =
-    calculate(input, coefficients, normalizationContext)._1
-
-  /**
-   * Compute the gradient of the function over the given data for the given model coefficients.
-   *
-   * @param input The given data over which to compute the gradient
-   * @param coefficients The model coefficients used to compute the function's gradient
-   * @param normalizationContext The normalization context
-   * @return The computed gradient of the function
-   */
-  override protected[ml] def gradient(
-      input: Iterable[LabeledPoint],
-      coefficients: Vector[Double],
-      normalizationContext: BroadcastWrapper[NormalizationContext]): Vector[Double] =
-    calculate(input, coefficients, normalizationContext)._2
-
-  /**
-   * Compute both the value and the gradient of the function for the given model coefficients (computing value and
-   * gradient at once is sometimes more efficient than computing them sequentially).
-   *
-   * @param input The given data over which to compute the value and gradient
-   * @param coefficients The model coefficients used to compute the function's value and gradient
-   * @param normalizationContext The normalization context
-   * @return The computed value and gradient of the function
-   */
-  override protected[ml] def calculate(
-      input: Iterable[LabeledPoint],
-      coefficients: Vector[Double],
-      normalizationContext: BroadcastWrapper[NormalizationContext]): (Double, Vector[Double]) = {
-    val initialCumGradient = VectorUtils.zeroOfSameType(coefficients)
-
-    input.aggregate((0.0, initialCumGradient))(
-      seqop = {
-        case ((loss, cumGradient), datum) =>
-          val v = SmoothedHingeLossFunction.calculateAt(datum, coefficients, cumGradient)
-          (loss + v, cumGradient)
-      },
-      combop = {
-        case ((loss1, grad1), (loss2, grad2)) =>
-          (loss1 + loss2, grad1 += grad2)
-      })
-  }
-}
-
-object SingleNodeSmoothedHingeLossFunction {
-
-  /**
-   * Factory method to create a new objective function with SingleNodeSmoothedHingeLossFunction as the base loss
-   * function.
-   *
-   * @param configuration The optimization problem configuration
-   * @param interceptIndexOpt The index of the intercept, if there is one
-   * @return A new SingleNodeSmoothedHingeLossFunction
-   */
-  def apply(
-      configuration: GLMOptimizationConfiguration,
-      interceptIndexOpt: Option[Int] = None): SingleNodeSmoothedHingeLossFunction = {
-
-    val regularizationContext = configuration.regularizationContext
-    val regularizationWeight = configuration.regularizationWeight
-
-    regularizationContext.regularizationType match {
-      case RegularizationType.L2 | RegularizationType.ELASTIC_NET =>
-        new SingleNodeSmoothedHingeLossFunction with L2RegularizationDiff {
-          l2RegWeight = regularizationContext.getL2RegularizationWeight(regularizationWeight)
-
-          override def interceptOpt: Option[Int] = interceptIndexOpt
-        }
-
-      case _ => new SingleNodeSmoothedHingeLossFunction
-    }
-  }
-}
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/svm/SmoothedHingeLossFunction.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/svm/SmoothedHingeLossFunction.scala
deleted file mode 100644
index ea22663a..00000000
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/svm/SmoothedHingeLossFunction.scala
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright 2017 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.function.svm
-
-import breeze.linalg.Vector
-
-import com.linkedin.photon.ml.algorithm.Coordinate
-import com.linkedin.photon.ml.constants.MathConst
-import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.function.ObjectiveFunction
-import com.linkedin.photon.ml.optimization.game.{CoordinateOptimizationConfiguration, FixedEffectOptimizationConfiguration, RandomEffectOptimizationConfiguration}
-import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
-
-/**
- * Implement Rennie's smoothed hinge loss function (http://qwone.com/~jason/writing/smoothHinge.pdf) as an
- * optimizer-friendly approximation for linear SVMs. This Object is to the individual/distributed smoothed hinge loss
- * functions as the PointwiseLossFunction is to the individual/distributed GLM loss functions.
- *
- * @note Function names follow the differentiation notation found here:
- *       [[http://www.wikiwand.com/en/Notation_for_differentiation#/Euler.27s_notation]]
- */
-object SmoothedHingeLossFunction {
-  /**
-   * Compute the loss and derivative of the smoothed hinge loss function at a single point.
-   *
-   * Note that the derivative is multiplied element-wise by the label in advance.
-   *
-   * @param margin The margin, i.e. z in l(z, y)
-   * @param label The label, i.e. y in l(z, y)
-   * @return The value and the 1st derivative
-   */
-  def lossAndDzLoss(margin: Double, label: Double): (Double, Double) = {
-
-    val modifiedLabel = if (label < MathConst.POSITIVE_RESPONSE_THRESHOLD) -1D else 1D
-    val z = modifiedLabel * margin
-
-    // Eq: 2, page 2
-    val loss = if (z <= 0) {
-      0.5 - z
-    } else if (z < 1) {
-      0.5 * (1.0 - z) * (1.0 - z)
-    } else {
-      0.0
-    }
-
-    // Eq. 3, page 2
-    val deriv = if (z < 0) {
-      -1.0
-    } else if (z < 1) {
-      z - 1.0
-    } else {
-      0.0
-    }
-
-    (loss, deriv * modifiedLabel)
-  }
-
-  /**
-   * Compute the loss and derivative of the smoothed hinge loss function at a single point.
-   *
-   * @param datum A single data point
-   * @param coefficients The model coefficients
-   * @param cumGradient The cumulative Gradient vector for all points in the dataset
-   * @return The value at the given data point
-   */
-  def calculateAt(
-    datum: LabeledPoint,
-    coefficients: Vector[Double],
-    cumGradient: Vector[Double]): Double = {
-
-    val margin = datum.computeMargin(coefficients)
-    val (loss, deriv) = lossAndDzLoss(margin, datum.label)
-
-    // Eq. 5, page 2 (derivative multiplied by label in lossAndDerivative method)
-    breeze.linalg.axpy(datum.weight * deriv, datum.features, cumGradient)
-    datum.weight * loss
-  }
-
-  /**
-   * Construct a factory function for building distributed and non-distributed smoothed hinge loss functions.
-   *
-   * @param treeAggregateDepth The tree-aggregate depth to use during aggregation
-   * @param config Optimization problem configuration
-   * @return A function which builds the appropriate type of [[ObjectiveFunction]] for a given [[Coordinate]] type and
-   *         optimization settings.
-   */
-  def buildFactory(
-      treeAggregateDepth: Int)(
-      config: CoordinateOptimizationConfiguration): (Option[GeneralizedLinearModel], Option[Int]) => ObjectiveFunction =
-    config match {
-      case fEOptConfig: FixedEffectOptimizationConfiguration =>
-        (_: Option[GeneralizedLinearModel], _: Option[Int]) =>
-          DistributedSmoothedHingeLossFunction(fEOptConfig, treeAggregateDepth)
-
-      case rEOptConfig: RandomEffectOptimizationConfiguration =>
-        (_: Option[GeneralizedLinearModel], _: Option[Int]) =>
-          SingleNodeSmoothedHingeLossFunction(rEOptConfig)
-
-      case _ =>
-        throw new UnsupportedOperationException(
-          s"Cannot create a smoothed hinge loss linear SVM loss function from a coordinate configuration with class " +
-            s"'${config.getClass.getName}'")
-    }
-}
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/GeneralizedLinearOptimizationProblem.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/GeneralizedLinearOptimizationProblem.scala
index 59a01aec..3933ee55 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/GeneralizedLinearOptimizationProblem.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/optimization/GeneralizedLinearOptimizationProblem.scala
@@ -14,16 +14,13 @@
  */
 package com.linkedin.photon.ml.optimization
 
-import scala.math.abs
+import breeze.linalg.Vector
 
-import breeze.linalg.{Vector, sum}
-
-import com.linkedin.photon.ml.function.{L2Regularization, ObjectiveFunction}
+import com.linkedin.photon.ml.function.ObjectiveFunction
 import com.linkedin.photon.ml.model.Coefficients
-import com.linkedin.photon.ml.normalization.NormalizationContext
 import com.linkedin.photon.ml.optimization.VarianceComputationType.VarianceComputationType
 import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
-import com.linkedin.photon.ml.util.{BroadcastWrapper, Logging}
+import com.linkedin.photon.ml.util.Logging
 
 /**
  * An abstract base for the convex optimization problem which produce trained generalized linear models (GLMs) when
@@ -92,55 +89,4 @@ protected[ml] abstract class GeneralizedLinearOptimizationProblem[Objective <: O
    * @return The learned GLM for the given optimization problem, data, regularization type, and regularization weight
    */
   def run(input: objectiveFunction.Data, initialModel: GeneralizedLinearModel): (GeneralizedLinearModel, OptimizationStatesTracker)
-
-  /**
-   * Compute the regularization term value
-   *
-   * @param model A trained GLM
-   * @return The regularization term value of this optimization problem for the given GLM
-   */
-  def getRegularizationTermValue(model: GeneralizedLinearModel): Double = {
-    import GeneralizedLinearOptimizationProblem._
-
-    val l1RegValue = optimizer match {
-      case l1Optimizer: OWLQN => getL1RegularizationTermValue(model, l1Optimizer.l1RegularizationWeight)
-      case _ => 0D
-    }
-    val l2RegValue = objectiveFunction match {
-      case l2ObjFunc: L2Regularization =>
-        getL2RegularizationTermValue(model, l2ObjFunc.l2RegularizationWeight)
-      case _ => 0D
-    }
-
-    l1RegValue + l2RegValue
-  }
-}
-
-object GeneralizedLinearOptimizationProblem {
-  /**
-   * Compute the L1 regularization term value
-   *
-   * @param model the model
-   * @param regularizationWeight the weight of the regularization value
-   * @return L1 regularization term value
-   */
-  protected[ml] def getL1RegularizationTermValue(
-      model: GeneralizedLinearModel,
-      regularizationWeight: Double): Double =
-    sum(model.coefficients.means.map(abs)) * regularizationWeight
-
-  /**
-   * Compute the L2 regularization term value
-   *
-   * @param model the model
-   * @param regularizationWeight the weight of the regularization value
-   * @return L2 regularization term value
-   */
-  protected[ml] def getL2RegularizationTermValue(
-      model: GeneralizedLinearModel,
-      regularizationWeight: Double): Double = {
-
-    val coefficients = model.coefficients.means
-    coefficients.dot(coefficients) * regularizationWeight / 2
-  }
 }
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/sampling/DownSamplerHelper.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/sampling/DownSamplerHelper.scala
index 80143589..a6d4ec93 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/sampling/DownSamplerHelper.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/sampling/DownSamplerHelper.scala
@@ -32,7 +32,7 @@ object DownSamplerHelper {
    */
   def buildFactory(trainingTask: TaskType): DownSamplerFactory = trainingTask match {
 
-    case TaskType.LOGISTIC_REGRESSION | TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM =>
+    case TaskType.LOGISTIC_REGRESSION =>
       (downSamplingRate: Double) => new BinaryClassificationDownSampler(downSamplingRate)
 
     case TaskType.LINEAR_REGRESSION | TaskType.POISSON_REGRESSION =>
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/supervised/classification/SmoothedHingeLossLinearSVMModel.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/supervised/classification/SmoothedHingeLossLinearSVMModel.scala
deleted file mode 100644
index ad546926..00000000
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/supervised/classification/SmoothedHingeLossLinearSVMModel.scala
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Copyright 2017 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.supervised.classification
-
-import breeze.linalg.Vector
-import org.apache.spark.rdd.RDD
-
-import com.linkedin.photon.ml.TaskType._
-import com.linkedin.photon.ml.model.Coefficients
-import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
-import com.linkedin.photon.ml.supervised.regression.Regression
-
-/**
- * Class for the classification model trained using soft hinge loss linear SVM.
- *
- * @param coefficients Weights estimated for every feature
- */
-class SmoothedHingeLossLinearSVMModel(override val coefficients: Coefficients)
-  extends GeneralizedLinearModel(coefficients)
-  with BinaryClassifier
-  with Regression
-  with Serializable {
-
-  /**
-   * Check the model type.
-   *
-   * @return The model type
-   */
-  override val modelType: TaskType = SMOOTHED_HINGE_LOSS_LINEAR_SVM
-
-  /**
-   * Compute the mean response of the smoothed hinge loss linear SVM model.
-   *
-   * @param features The input data point's feature
-   * @param offset The input data point's offset
-   * @return The mean for the passed features
-   */
-  override protected[ml] def computeMean(features: Vector[Double], offset: Double): Double =
-    coefficients.computeScore(features) + offset
-
-  /**
-   * Create a new model of the same type with updated coefficients.
-   *
-   * @param updatedCoefficients The new coefficients
-   * @return A new generalized linear model with the passed coefficients
-   */
-  override def updateCoefficients(updatedCoefficients: Coefficients): SmoothedHingeLossLinearSVMModel =
-    new SmoothedHingeLossLinearSVMModel(updatedCoefficients)
-
-  /**
-   * Compares two [[SmoothedHingeLossLinearSVMModel]] objects.
-   *
-   * @param other Some other object
-   * @return True if the both models conform to the equality contract and have the same model coefficients, false
-   *         otherwise
-   */
-  override def equals(other: Any): Boolean = other match {
-    case that: SmoothedHingeLossLinearSVMModel => super.equals(that)
-    case _ => false
-  }
-
-  /**
-   * Build a human-readable summary for the object.
-   *
-   * @return A summary of the object in string representation
-   */
-  override def toSummaryString: String =
-    s"Smoothed Hinge Loss Linear SVM Model with the following coefficients:\n${coefficients.toSummaryString}"
-
-  /**
-   * Predict values for a single data point with offset.
-   *
-   * @param features Vector a single data point's features
-   * @param offset Offset of the data point
-   * @param threshold Threshold that separates positive predictions from negative predictions. An example with
-   *                  prediction score greater than or equal to this threshold is identified as positive, and negative
-   *                  otherwise.
-   * @return Predicted category from the trained model
-   */
-  override def predictClassWithOffset(features: Vector[Double], offset: Double, threshold: Double = 0.5): Double =
-    classifyScore(predictWithOffset(features, offset), threshold)
-
-  /**
-   * Predict values for the given data points with offsets of the form RDD[(feature, offset)].
-   *
-   * @param featuresWithOffsets Data points of the form RDD[(feature, offset)]
-   * @param threshold Threshold that separates positive predictions from negative predictions. An example with
-   *                  prediction score greater than or equal to this threshold is identified as positive, and negative
-   *                  otherwise.
-   * @return An RDD[Double] where each entry contains the corresponding prediction
-   */
-  override def predictClassAllWithOffsets(
-      featuresWithOffsets: RDD[(Vector[Double], Double)],
-      threshold: Double = 0.5): RDD[Double] =
-    predictAllWithOffsets(featuresWithOffsets).map(classifyScore(_, threshold))
-
-  /**
-   * Predict values for a single data point with offset.
-   *
-   * @param features Vector representing feature of a single data point's features
-   * @param offset Offset of the data point
-   * @return Double prediction from the trained model
-   */
-  override def predictWithOffset(features: Vector[Double], offset: Double): Double =
-    computeMeanFunctionWithOffset(features, offset)
-
-  /**
-   * Predict values for the given data points with offsets of the form RDD[(feature, offset)].
-   *
-   * @param featuresWithOffsets Data points of the form RDD[(feature, offset)]
-   * @return RDD[Double] where each entry contains the corresponding prediction
-   */
-  override def predictAllWithOffsets(featuresWithOffsets: RDD[(Vector[Double], Double)]): RDD[Double] =
-    GeneralizedLinearModel.computeMeanFunctionsWithOffsets(this, featuresWithOffsets)
-
-  /**
-   * Predict the binary class of a score, given a threshold.
-   *
-   * @param score The score
-   * @param threshold The threshold for a positive class label
-   * @return A positive label if the score is greater than or equal to the threshold, a negative label otherwise
-   */
-  private def classifyScore(score: Double, threshold: Double): Double = {
-    if (score < threshold) {
-      BinaryClassifier.negativeClassLabel
-    } else {
-      BinaryClassifier.positiveClassLabel
-    }
-  }
-}
-
-object SmoothedHingeLossLinearSVMModel {
-
-  /**
-   * Create a new smoothed hinge loss SVM model with the provided coefficients (means) and variances.
-   *
-   * @param coefficients The feature coefficient means and variances for the model
-   * @return A smoothed hinge loss SVM model
-   */
-  def apply(coefficients: Coefficients): SmoothedHingeLossLinearSVMModel =
-    new SmoothedHingeLossLinearSVMModel(coefficients)
-}
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/function/DistributedObjectiveFunctionTest.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/function/DistributedObjectiveFunctionTest.scala
index 20f78626..e7b6ea05 100644
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/function/DistributedObjectiveFunctionTest.scala
+++ b/photon-api/src/test/scala/com/linkedin/photon/ml/function/DistributedObjectiveFunctionTest.scala
@@ -14,11 +14,12 @@
  */
 package com.linkedin.photon.ml.function
 
-import breeze.linalg.Vector
+import org.mockito.Mockito._
 import org.testng.annotations.{DataProvider, Test}
 
-import com.linkedin.photon.ml.normalization.NormalizationContext
-import com.linkedin.photon.ml.util.BroadcastWrapper
+import com.linkedin.photon.ml.function.glm.LogisticLossFunction
+import com.linkedin.photon.ml.optimization.NoRegularizationContext
+import com.linkedin.photon.ml.optimization.game.FixedEffectOptimizationConfiguration
 
 /**
  * Tests for [[DistributedObjectiveFunction]]
@@ -37,17 +38,28 @@ class DistributedObjectiveFunctionTest {
    */
   @Test(dataProvider = "invalidInput", expectedExceptions = Array(classOf[IllegalArgumentException]))
   def testSetupWithInvalidInput(treeAggregateDepth: Int): Unit =
-    new MockDistributedObjectiveFunctionFactory(treeAggregateDepth)
+    buildDistributedObjectiveFunction(treeAggregateDepth)
 }
 
 object DistributedObjectiveFunctionTest {
 
-  class MockDistributedObjectiveFunctionFactory(treeAggregateDepth: Int)
-    extends DistributedObjectiveFunction(treeAggregateDepth) {
+  val MOCK_REGULARIZATION_WEIGHT = 0D
+  val MOCK_REGULARIZATION_CONTEXT = NoRegularizationContext
+  val MOCK_COORDINATE_CONFIG = mock(classOf[FixedEffectOptimizationConfiguration])
 
-    override protected[ml] def value(
-        input: Data,
-        coefficients: Vector[Double],
-        normalizationContext: BroadcastWrapper[NormalizationContext]): Double = 0D
-  }
+  doReturn(MOCK_REGULARIZATION_WEIGHT).when(MOCK_COORDINATE_CONFIG).regularizationWeight
+  doReturn(MOCK_REGULARIZATION_CONTEXT).when(MOCK_COORDINATE_CONFIG).regularizationContext
+
+  /**
+   * Helper function to build a [[DistributedObjectiveFunction]] object.
+   *
+   * @param treeAggregateDepth The tree aggregation depth (see [[DistributedObjectiveFunction]] for documentation)
+   * @return A new [[DistributedObjectiveFunction]] object
+   */
+  def buildDistributedObjectiveFunction(treeAggregateDepth: Int): DistributedObjectiveFunction =
+     DistributedObjectiveFunction(
+       MOCK_COORDINATE_CONFIG,
+       LogisticLossFunction,
+       treeAggregateDepth,
+       interceptIndexOpt = None)
 }
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/function/ObjectiveFunctionHelperTest.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/function/ObjectiveFunctionHelperTest.scala
index 2376d2bc..0eda7396 100644
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/function/ObjectiveFunctionHelperTest.scala
+++ b/photon-api/src/test/scala/com/linkedin/photon/ml/function/ObjectiveFunctionHelperTest.scala
@@ -14,16 +14,13 @@
  */
 package com.linkedin.photon.ml.function
 
+import org.mockito.Mockito._
 import org.testng.Assert._
 import org.testng.annotations.{DataProvider, Test}
 
 import com.linkedin.photon.ml.TaskType
-import com.linkedin.photon.ml.TaskType.TaskType
-import com.linkedin.photon.ml.function.glm.DistributedGLMLossFunction
-import com.linkedin.photon.ml.function.svm.DistributedSmoothedHingeLossFunction
+import com.linkedin.photon.ml.optimization.game.{CoordinateOptimizationConfiguration, RandomEffectOptimizationConfiguration}
 import com.linkedin.photon.ml.optimization.game.FixedEffectOptimizationConfiguration
-import com.linkedin.photon.ml.optimization.{OptimizerConfig, OptimizerType}
-import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
 
 /**
  * Unit tests for [[ObjectiveFunctionHelper]].
@@ -35,42 +32,34 @@ class ObjectiveFunctionHelperTest {
   @DataProvider
   def trainingTaskProvider(): Array[Array[Any]] =
     Array(
-      Array(TaskType.LOGISTIC_REGRESSION),
-      Array(TaskType.LINEAR_REGRESSION),
-      Array(TaskType.POISSON_REGRESSION),
-      Array(TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM))
+      Array(FIXED_COORDINATE_OPT_CONFIG),
+      Array(RANDOM_COORDINATE_OPT_CONFIG))
 
   /**
    * Test that the [[ObjectiveFunction]] generated by the factory function returned by the [[ObjectiveFunctionHelper]]
-   * is of the appropriate type for the given training task.
+   * is of the appropriate type for the given coordinate configuration.
    *
-   * @param trainingTask The training task
+   * @param config The coordinate configuration
    */
   @Test(dataProvider = "trainingTaskProvider")
-  def testBuildFactory(trainingTask: TaskType): Unit = {
+  def testBuildFactory(config: CoordinateOptimizationConfiguration): Unit = {
 
-    val objectiveFunction = ObjectiveFunctionHelper.buildFactory(
-      trainingTask,
-      TREE_AGGREGATE_DEPTH)(COORDINATE_OPT_CONFIG)
+    val objectiveFunctionFactory =
+      ObjectiveFunctionHelper.buildFactory(TaskType.LOGISTIC_REGRESSION, TREE_AGGREGATE_DEPTH)(config)
 
-    trainingTask match {
-      case TaskType.LOGISTIC_REGRESSION | TaskType.LINEAR_REGRESSION | TaskType.POISSON_REGRESSION =>
-        assertTrue(
-          objectiveFunction.isInstanceOf[(Option[GeneralizedLinearModel], Option[Int]) => DistributedGLMLossFunction])
+    config match {
+      case _: FixedEffectOptimizationConfiguration =>
+        assertTrue(objectiveFunctionFactory.isInstanceOf[ObjectiveFunctionHelper.DistributedObjectiveFunctionFactory])
 
-      case TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM =>
-        assertTrue(
-          objectiveFunction
-            .isInstanceOf[(Option[GeneralizedLinearModel], Option[Int]) => DistributedSmoothedHingeLossFunction])
+      case _: RandomEffectOptimizationConfiguration =>
+        assertTrue(objectiveFunctionFactory.isInstanceOf[ObjectiveFunctionHelper.SingleNodeObjectiveFunctionFactory])
     }
   }
 }
 
 object ObjectiveFunctionHelperTest {
 
-  val COORDINATE_OPT_CONFIG = FixedEffectOptimizationConfiguration(OptimizerConfig(OptimizerType.LBFGS, 1, 2e-2))
-  val ENABLE_INCREMENTAL_TRAINING = false
-  val MAXIMUM_ITERATIONS = 1
-  val TOLERANCE = 2e-2
-  val TREE_AGGREGATE_DEPTH = 3
+  private val FIXED_COORDINATE_OPT_CONFIG = mock(classOf[FixedEffectOptimizationConfiguration])
+  private val RANDOM_COORDINATE_OPT_CONFIG = mock(classOf[RandomEffectOptimizationConfiguration])
+  private val TREE_AGGREGATE_DEPTH = 3
 }
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/function/SingleNodeObjectiveFunctionTest.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/function/SingleNodeObjectiveFunctionTest.scala
index 53c55dbc..1c18ec2f 100644
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/function/SingleNodeObjectiveFunctionTest.scala
+++ b/photon-api/src/test/scala/com/linkedin/photon/ml/function/SingleNodeObjectiveFunctionTest.scala
@@ -23,13 +23,12 @@ import org.testng.annotations.{DataProvider, Test}
 
 import com.linkedin.photon.ml.TaskType
 import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.function.glm.{LogisticLossFunction, PoissonLossFunction, SingleNodeGLMLossFunction, SquaredLossFunction}
-import com.linkedin.photon.ml.function.svm.SingleNodeSmoothedHingeLossFunction
+import com.linkedin.photon.ml.function.glm.{LogisticLossFunction, PoissonLossFunction, SquaredLossFunction}
 import com.linkedin.photon.ml.normalization.{NoNormalization, NormalizationContext}
-import com.linkedin.photon.ml.optimization.game.GLMOptimizationConfiguration
-import com.linkedin.photon.ml.optimization.{L2RegularizationContext, NoRegularizationContext}
+import com.linkedin.photon.ml.optimization.game.{GLMOptimizationConfiguration, RandomEffectOptimizationConfiguration}
+import com.linkedin.photon.ml.optimization.{ElasticNetRegularizationContext, L2RegularizationContext, NoRegularizationContext, OptimizerConfig}
 import com.linkedin.photon.ml.test.SparkTestUtils
-import com.linkedin.photon.ml.util.BroadcastWrapper
+import com.linkedin.photon.ml.util.{BroadcastWrapper, PhotonNonBroadcast}
 
 /**
  * Unit tests to verify that the loss functions compute gradients & Hessians accurately.
@@ -38,9 +37,7 @@ class SingleNodeObjectiveFunctionTest extends SparkTestUtils {
 
   import SingleNodeObjectiveFunctionTest._
 
-  private val twiceDiffTasks =
-    Array(TaskType.LOGISTIC_REGRESSION, TaskType.LINEAR_REGRESSION, TaskType.POISSON_REGRESSION)
-  private val diffTasks = twiceDiffTasks ++ Array(TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM)
+  private val tasks = Array(TaskType.LOGISTIC_REGRESSION, TaskType.LINEAR_REGRESSION, TaskType.POISSON_REGRESSION)
   private val binaryClassificationDatasetGenerationFuncs = Array(
     generateBenignDatasetBinaryClassification _,
     generateWeightedBenignDatasetBinaryClassification _,
@@ -63,39 +60,31 @@ class SingleNodeObjectiveFunctionTest extends SparkTestUtils {
    * @return Anonymous functions to generate the loss function and training data for the gradient tests
    */
   @DataProvider(parallel = true)
-  def getDifferentiableFunctions: Array[Array[Object]] = diffTasks.flatMap {
+  def getDifferentiableFunctions: Array[Array[Object]] = tasks.flatMap {
       case TaskType.LOGISTIC_REGRESSION =>
-        val lossFunc = SingleNodeGLMLossFunction(NO_REG_CONFIGURATION_MOCK, LogisticLossFunction)
-        val lossFuncWithL2 = SingleNodeGLMLossFunction(L2_REG_CONFIGURATION_MOCK, LogisticLossFunction)
+        val lossFunc = SingleNodeObjectiveFunction(NO_REG_CONFIGURATION_MOCK, LogisticLossFunction)
+        val lossFuncWithL2 = SingleNodeObjectiveFunction(L2_REG_CONFIGURATION_MOCK, LogisticLossFunction)
 
         binaryClassificationDatasetGenerationFuncs.flatMap { dataGenFunc =>
           Seq[(SingleNodeObjectiveFunction, _)]((lossFunc, dataGenFunc), (lossFuncWithL2, dataGenFunc))
         }
 
       case TaskType.LINEAR_REGRESSION =>
-        val lossFunc = SingleNodeGLMLossFunction(NO_REG_CONFIGURATION_MOCK, SquaredLossFunction)
-        val lossFuncWithL2 = SingleNodeGLMLossFunction(L2_REG_CONFIGURATION_MOCK, SquaredLossFunction)
+        val lossFunc = SingleNodeObjectiveFunction(NO_REG_CONFIGURATION_MOCK, SquaredLossFunction)
+        val lossFuncWithL2 = SingleNodeObjectiveFunction(L2_REG_CONFIGURATION_MOCK, SquaredLossFunction)
 
         linearRegressionDatasetGenerationFuncs.flatMap { dataGenFunc =>
           Seq[(SingleNodeObjectiveFunction, _)]((lossFunc, dataGenFunc), (lossFuncWithL2, dataGenFunc))
         }
 
       case TaskType.POISSON_REGRESSION =>
-        val lossFunc = SingleNodeGLMLossFunction(NO_REG_CONFIGURATION_MOCK, PoissonLossFunction)
-        val lossFuncWithL2 = SingleNodeGLMLossFunction(L2_REG_CONFIGURATION_MOCK, PoissonLossFunction)
+        val lossFunc = SingleNodeObjectiveFunction(NO_REG_CONFIGURATION_MOCK, PoissonLossFunction)
+        val lossFuncWithL2 = SingleNodeObjectiveFunction(L2_REG_CONFIGURATION_MOCK, PoissonLossFunction)
 
         poissonRegressionDatasetGenerationFuncs.flatMap { dataGenFunc =>
           Seq[(SingleNodeObjectiveFunction, _)]((lossFunc, dataGenFunc), (lossFuncWithL2, dataGenFunc))
         }
 
-      case TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM =>
-        val lossFunc = SingleNodeSmoothedHingeLossFunction(NO_REG_CONFIGURATION_MOCK)
-        val lossFuncWithL2 = SingleNodeSmoothedHingeLossFunction(L2_REG_CONFIGURATION_MOCK)
-
-        binaryClassificationDatasetGenerationFuncs.flatMap { dataGenFunc =>
-          Seq[(SingleNodeObjectiveFunction, _)]((lossFunc, dataGenFunc), (lossFuncWithL2, dataGenFunc))
-        }
-
       case other =>
         throw new IllegalArgumentException(s"Unrecognized task type: $other")
     }
@@ -107,26 +96,26 @@ class SingleNodeObjectiveFunctionTest extends SparkTestUtils {
    * @return Anonymous functions to generate the loss function and training data for the Hessian tests
    */
   @DataProvider(parallel = true)
-  def getTwiceDifferentiableFunctions: Array[Array[Object]] = twiceDiffTasks.flatMap {
+  def getTwiceDifferentiableFunctions: Array[Array[Object]] = tasks.flatMap {
       case TaskType.LOGISTIC_REGRESSION =>
-        val lossFunc = SingleNodeGLMLossFunction(NO_REG_CONFIGURATION_MOCK, LogisticLossFunction)
-        val lossFuncWithL2 = SingleNodeGLMLossFunction(L2_REG_CONFIGURATION_MOCK, LogisticLossFunction)
+        val lossFunc = SingleNodeObjectiveFunction(NO_REG_CONFIGURATION_MOCK, LogisticLossFunction)
+        val lossFuncWithL2 = SingleNodeObjectiveFunction(L2_REG_CONFIGURATION_MOCK, LogisticLossFunction)
 
         binaryClassificationDatasetGenerationFuncs.flatMap { dataGenFunc =>
           Seq((lossFunc, dataGenFunc), (lossFuncWithL2, dataGenFunc))
         }
 
       case TaskType.LINEAR_REGRESSION =>
-        val lossFunc = SingleNodeGLMLossFunction(NO_REG_CONFIGURATION_MOCK, SquaredLossFunction)
-        val lossFuncWithL2 = SingleNodeGLMLossFunction(L2_REG_CONFIGURATION_MOCK, SquaredLossFunction)
+        val lossFunc = SingleNodeObjectiveFunction(NO_REG_CONFIGURATION_MOCK, SquaredLossFunction)
+        val lossFuncWithL2 = SingleNodeObjectiveFunction(L2_REG_CONFIGURATION_MOCK, SquaredLossFunction)
 
         linearRegressionDatasetGenerationFuncs.flatMap { dataGenFunc =>
           Seq((lossFunc, dataGenFunc), (lossFuncWithL2, dataGenFunc))
         }
 
       case TaskType.POISSON_REGRESSION =>
-        val lossFunc = SingleNodeGLMLossFunction(NO_REG_CONFIGURATION_MOCK, PoissonLossFunction)
-        val lossFuncWithL2 = SingleNodeGLMLossFunction(L2_REG_CONFIGURATION_MOCK, PoissonLossFunction)
+        val lossFunc = SingleNodeObjectiveFunction(NO_REG_CONFIGURATION_MOCK, PoissonLossFunction)
+        val lossFuncWithL2 = SingleNodeObjectiveFunction(L2_REG_CONFIGURATION_MOCK, PoissonLossFunction)
 
         poissonRegressionDatasetGenerationFuncs.flatMap { dataGenFunc =>
           Seq((lossFunc, dataGenFunc), (lossFuncWithL2, dataGenFunc))
@@ -463,7 +452,7 @@ class SingleNodeObjectiveFunctionTest extends SparkTestUtils {
     dependsOnMethods = Array("checkGradientConsistentWithObjectiveLocal"),
     groups = Array[String]("ObjectiveFunctionTests", "testCore"))
   def checkHessianConsistentWithObjectiveLocal(
-    function: SingleNodeGLMLossFunction with TwiceDiffFunction,
+    function: SingleNodeObjectiveFunction with TwiceDiffFunction,
     dataGenerationFunction: () => List[LabeledPoint]): Unit = {
 
     val data = dataGenerationFunction()
@@ -510,9 +499,89 @@ class SingleNodeObjectiveFunctionTest extends SparkTestUtils {
       }
     }
   }
+
+  /**
+   * Verify the value of loss function without regularization.
+   */
+  @Test
+  def testValueNoRegularization(): Unit = {
+
+    val labeledPoints = Iterable(LABELED_POINT_1, LABELED_POINT_2)
+    val coefficients = COEFFICIENT_VECTOR
+
+    val randomEffectRegularizationContext = NoRegularizationContext
+    val randomEffectOptimizationConfiguration = RandomEffectOptimizationConfiguration(
+      RANDOM_EFFECT_OPTIMIZER_CONFIG,
+      randomEffectRegularizationContext)
+    val singleNodeGLMLossFunction = SingleNodeObjectiveFunction(
+      randomEffectOptimizationConfiguration,
+      LogisticLossFunction)
+    val value = singleNodeGLMLossFunction.value(
+      labeledPoints,
+      coefficients,
+      PhotonNonBroadcast(NORMALIZATION_CONTEXT))
+
+    // expectValue = log(1 + exp(3)) + log(1 + exp(2)) = 5.1755
+    assertEquals(value, 5.1755, EPSILON)
+  }
+
+  /**
+   * Verify the value of loss function with L2 regularization.
+   */
+  @Test
+  def testValueWithL2Regularization(): Unit = {
+
+    val labeledPoints = Iterable(LABELED_POINT_1, LABELED_POINT_2)
+    val coefficients = COEFFICIENT_VECTOR
+
+    val randomEffectRegularizationContext = L2RegularizationContext
+    val randomEffectOptimizationConfiguration = RandomEffectOptimizationConfiguration(
+      RANDOM_EFFECT_OPTIMIZER_CONFIG,
+      randomEffectRegularizationContext,
+      RANDOM_EFFECT_REGULARIZATION_WEIGHT)
+    val singleNodeGLMLossFunction = SingleNodeObjectiveFunction(
+      randomEffectOptimizationConfiguration,
+      LogisticLossFunction)
+    val value = singleNodeGLMLossFunction.value(
+      labeledPoints,
+      coefficients,
+      PhotonNonBroadcast(NORMALIZATION_CONTEXT))
+
+    // expectedValue = log(1 + exp(3)) + log(1 + exp(2)) + 1 * ((-2)^2 + 3^2) / 2 = 11.6755
+    assertEquals(value, 11.6755, EPSILON)
+  }
+
+  /**
+   * Verify the value of loss function with elastic net regularization.
+   */
+  @Test
+  def testValueWithElasticNetRegularization(): Unit = {
+
+    val labeledPoints = Iterable(LABELED_POINT_1, LABELED_POINT_2)
+    val coefficients = COEFFICIENT_VECTOR
+
+    val randomEffectRegularizationContext = ElasticNetRegularizationContext(ALPHA)
+    val randomEffectOptimizationConfiguration = RandomEffectOptimizationConfiguration(
+      RANDOM_EFFECT_OPTIMIZER_CONFIG,
+      randomEffectRegularizationContext,
+      RANDOM_EFFECT_REGULARIZATION_WEIGHT)
+    val singleNodeGLMLossFunction = SingleNodeObjectiveFunction(
+      randomEffectOptimizationConfiguration,
+      LogisticLossFunction)
+    val value = singleNodeGLMLossFunction.value(
+      labeledPoints,
+      coefficients,
+      PhotonNonBroadcast(NORMALIZATION_CONTEXT))
+
+    // L1 is computed by the optimizer.
+    // expectedValue = log(1 + exp(3)) + log(1 + exp(2)) + (1 - 0.4) * 1 * ((-2)^2 + 3^2) / 2 = 9.0755
+    assertEquals(value, 9.0755, EPSILON)
+  }
 }
 
 object SingleNodeObjectiveFunctionTest {
+
+  // Gradient and Hessian test constants
   private val LOCAL_CONSISTENCY_CHECK_SAMPLES = 25
   private val PROBLEM_DIMENSION = 5
   private val NORMALIZATION = NoNormalization()
@@ -529,6 +598,16 @@ object SingleNodeObjectiveFunctionTest {
   private val WEIGHT_RANDOM_MAX = 10
   private val TRAINING_SAMPLES = PROBLEM_DIMENSION * PROBLEM_DIMENSION
 
+  // Regularization test constants
+  private val RANDOM_EFFECT_OPTIMIZER_CONFIG = mock(classOf[OptimizerConfig])
+  private val LABELED_POINT_1 = new LabeledPoint(0, DenseVector(0.0, 1.0))
+  private val LABELED_POINT_2 = new LabeledPoint(1, DenseVector(1.0, 0.0))
+  private val COEFFICIENT_VECTOR = DenseVector(-2.0, 3.0)
+  private val NORMALIZATION_CONTEXT = NoNormalization()
+  private val RANDOM_EFFECT_REGULARIZATION_WEIGHT = 1D
+  private val ALPHA = 0.4
+  private val EPSILON = 1e-3
+
   doReturn(NORMALIZATION).when(NORMALIZATION_MOCK).value
   doReturn(L2RegularizationContext).when(L2_REG_CONFIGURATION_MOCK).regularizationContext
   doReturn(REGULARIZATION_WEIGHT).when(L2_REG_CONFIGURATION_MOCK).regularizationWeight
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/function/glm/GLMLossFunctionTest.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/function/glm/GLMLossFunctionTest.scala
deleted file mode 100644
index 4905b355..00000000
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/function/glm/GLMLossFunctionTest.scala
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright 2017 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.function.glm
-
-import org.testng.Assert._
-import org.testng.annotations.{DataProvider, Test}
-
-import com.linkedin.photon.ml.function.ObjectiveFunction
-import com.linkedin.photon.ml.optimization.{OptimizerConfig, OptimizerType}
-import com.linkedin.photon.ml.optimization.game.{CoordinateOptimizationConfiguration, FixedEffectOptimizationConfiguration, RandomEffectOptimizationConfiguration}
-import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
-
-/**
- * Unit tests for [[GLMLossFunction]].
- */
-class GLMLossFunctionTest {
-
-  import GLMLossFunctionTest._
-
-  @DataProvider
-  def coordinateOptimizationProblemProvider(): Array[Array[Any]] = {
-
-    val optConfig = OptimizerConfig(OptimizerType.LBFGS, MAXIMUM_ITERATIONS, TOLERANCE)
-
-    Array(
-      Array(FixedEffectOptimizationConfiguration(optConfig)),
-      Array(RandomEffectOptimizationConfiguration(optConfig)))
-  }
-
-  /**
-   * Test that the [[ObjectiveFunction]] generated by the factory function returned by the [[GLMLossFunction]]
-   * is of the appropriate type for the given coordinate optimization task.
-   *
-   * @param coordinateOptConfig The coordinate optimization task
-   */
-  @Test(dataProvider = "coordinateOptimizationProblemProvider")
-  def testBuildFactory(coordinateOptConfig: CoordinateOptimizationConfiguration): Unit = {
-
-    val objectiveFunction = GLMLossFunction.buildFactory(LOSS_FUNCTION, TREE_AGGREGATE_DEPTH)(coordinateOptConfig)
-
-    coordinateOptConfig match {
-      case _: FixedEffectOptimizationConfiguration =>
-        assertTrue(
-          objectiveFunction.isInstanceOf[(Option[GeneralizedLinearModel], Option[Int]) => DistributedGLMLossFunction])
-
-      case _: RandomEffectOptimizationConfiguration =>
-        assertTrue(
-          objectiveFunction.isInstanceOf[(Option[GeneralizedLinearModel], Option[Int]) => SingleNodeGLMLossFunction])
-
-      case _ =>
-        assertTrue(false)
-    }
-  }
-}
-
-object GLMLossFunctionTest {
-
-  val LOSS_FUNCTION = LogisticLossFunction
-  val MAXIMUM_ITERATIONS = 1
-  val TOLERANCE = 2e-2
-  val TREE_AGGREGATE_DEPTH = 3
-}
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunctionTest.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunctionTest.scala
deleted file mode 100644
index 9edee17f..00000000
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/function/glm/SingleNodeGLMLossFunctionTest.scala
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright 2019 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.function.glm
-
-import breeze.linalg.DenseVector
-import org.mockito.Mockito._
-import org.testng.Assert.assertEquals
-import org.testng.annotations.Test
-
-import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.normalization.NoNormalization
-import com.linkedin.photon.ml.optimization._
-import com.linkedin.photon.ml.optimization.game.RandomEffectOptimizationConfiguration
-import com.linkedin.photon.ml.util.PhotonNonBroadcast
-
-/**
- * Unit tests for [[SingleNodeGLMLossFunction]].
- */
-class SingleNodeGLMLossFunctionTest {
-
-  import SingleNodeGLMLossFunctionTest._
-
-  /**
-   * Verify the value of loss function without regularization.
-   */
-  @Test()
-  def testValueNoRegularization(): Unit = {
-
-    val labeledPoints = Iterable(LABELED_POINT_1, LABELED_POINT_2)
-    val coefficients = COEFFICIENT_VECTOR
-
-    val randomEffectRegularizationContext = NoRegularizationContext
-    val randomEffectOptimizationConfiguration = RandomEffectOptimizationConfiguration(
-      RANDOM_EFFECT_OPTIMIZER_CONFIG,
-      randomEffectRegularizationContext)
-    val singleNodeGLMLossFunction = SingleNodeGLMLossFunction(
-      randomEffectOptimizationConfiguration,
-      LogisticLossFunction)
-    val value = singleNodeGLMLossFunction.value(
-      labeledPoints,
-      coefficients,
-      PhotonNonBroadcast(NORMALIZATION_CONTEXT))
-
-    // expectValue = log(1 + exp(3)) + log(1 + exp(2)) = 5.1755
-    assertEquals(value, 5.1755, EPSILON)
-  }
-
-  /**
-   * Verify the value of loss function with L2 regularization.
-   */
-  @Test()
-  def testValueWithL2Regularization(): Unit = {
-
-    val labeledPoints = Iterable(LABELED_POINT_1, LABELED_POINT_2)
-    val coefficients = COEFFICIENT_VECTOR
-
-    val randomEffectRegularizationContext = L2RegularizationContext
-    val randomEffectOptimizationConfiguration = RandomEffectOptimizationConfiguration(
-      RANDOM_EFFECT_OPTIMIZER_CONFIG,
-      randomEffectRegularizationContext,
-      RANDOM_EFFECT_REGULARIZATION_WEIGHT)
-    val singleNodeGLMLossFunction = SingleNodeGLMLossFunction(
-      randomEffectOptimizationConfiguration,
-      LogisticLossFunction)
-    val value = singleNodeGLMLossFunction.value(
-      labeledPoints,
-      coefficients,
-      PhotonNonBroadcast(NORMALIZATION_CONTEXT))
-
-    // expectedValue = log(1 + exp(3)) + log(1 + exp(2)) + 1 * ((-2)^2 + 3^2) / 2 = 11.6755
-    assertEquals(value, 11.6755, EPSILON)
-  }
-
-  /**
-   * Verify the value of loss function with elastic net regularization.
-   */
-  @Test()
-  def testValueWithElasticNetRegularization(): Unit = {
-
-    val labeledPoints = Iterable(LABELED_POINT_1, LABELED_POINT_2)
-    val coefficients = COEFFICIENT_VECTOR
-
-    val randomEffectRegularizationContext = ElasticNetRegularizationContext(ALPHA)
-    val randomEffectOptimizationConfiguration = RandomEffectOptimizationConfiguration(
-      RANDOM_EFFECT_OPTIMIZER_CONFIG,
-      randomEffectRegularizationContext,
-      RANDOM_EFFECT_REGULARIZATION_WEIGHT)
-    val singleNodeGLMLossFunction = SingleNodeGLMLossFunction(
-      randomEffectOptimizationConfiguration,
-      LogisticLossFunction)
-    val value = singleNodeGLMLossFunction.value(
-      labeledPoints,
-      coefficients,
-      PhotonNonBroadcast(NORMALIZATION_CONTEXT))
-
-    // L1 is computed by the optimizer.
-    // expectedValue = log(1 + exp(3)) + log(1 + exp(2)) + (1 - 0.4) * 1 * ((-2)^2 + 3^2) / 2 = 9.0755
-    assertEquals(value, 9.0755, EPSILON)
-  }
-}
-
-object SingleNodeGLMLossFunctionTest {
-
-  private val RANDOM_EFFECT_OPTIMIZER_CONFIG = mock(classOf[OptimizerConfig])
-  private val LABELED_POINT_1 = new LabeledPoint(0, DenseVector(0.0, 1.0))
-  private val LABELED_POINT_2 = new LabeledPoint(1, DenseVector(1.0, 0.0))
-  private val COEFFICIENT_VECTOR = DenseVector(-2.0, 3.0)
-  private val NORMALIZATION_CONTEXT = NoNormalization()
-  private val RANDOM_EFFECT_REGULARIZATION_WEIGHT = 1D
-  private val ALPHA = 0.4
-  private val EPSILON = 1e-3
-}
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/function/svm/SingleNodeSmoothedHingeLossFunctionTest.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/function/svm/SingleNodeSmoothedHingeLossFunctionTest.scala
deleted file mode 100644
index fb9b1349..00000000
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/function/svm/SingleNodeSmoothedHingeLossFunctionTest.scala
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright 2019 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.function.svm
-
-import breeze.linalg.DenseVector
-import org.mockito.Mockito._
-import org.testng.Assert.assertEquals
-import org.testng.annotations.Test
-
-import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.normalization.NoNormalization
-import com.linkedin.photon.ml.optimization._
-import com.linkedin.photon.ml.optimization.game.RandomEffectOptimizationConfiguration
-import com.linkedin.photon.ml.util.PhotonNonBroadcast
-
-/**
- * Unit tests for [[SingleNodeSmoothedHingeLossFunction]].
- */
-class SingleNodeSmoothedHingeLossFunctionTest {
-
-  import SingleNodeSmoothedHingeLossFunctionTest._
-
-  /**
-   * Verify the value of loss function without regularization.
-   */
-  @Test()
-  def testValueNoRegularization(): Unit = {
-
-    val labeledPoints = Iterable(LABELED_POINT_1, LABELED_POINT_2)
-    val coefficients = COEFFICIENT_VECTOR
-
-    val randomEffectRegularizationContext = NoRegularizationContext
-    val randomEffectOptimizationConfiguration = RandomEffectOptimizationConfiguration(
-      RANDOM_EFFECT_OPTIMIZER_CONFIG,
-      randomEffectRegularizationContext)
-    val singleNodeSmoothedHingeLossFunction = SingleNodeSmoothedHingeLossFunction(randomEffectOptimizationConfiguration)
-    val value = singleNodeSmoothedHingeLossFunction.value(
-      labeledPoints,
-      coefficients,
-      PhotonNonBroadcast(NORMALIZATION_CONTEXT))
-
-    assertEquals(value, 6.0, EPSILON)
-  }
-
-  /**
-   * Verify the value of loss function with L2 regularization.
-   */
-  @Test()
-  def testValueWithL2Regularization(): Unit = {
-
-    val labeledPoints = Iterable(LABELED_POINT_1, LABELED_POINT_2)
-    val coefficients = COEFFICIENT_VECTOR
-
-    val randomEffectRegularizationContext = L2RegularizationContext
-    val randomEffectOptimizationConfiguration = RandomEffectOptimizationConfiguration(
-      RANDOM_EFFECT_OPTIMIZER_CONFIG,
-      randomEffectRegularizationContext,
-      RANDOM_EFFECT_REGULARIZATION_WEIGHT)
-    val singleNodeSmoothedHingeLossFunction = SingleNodeSmoothedHingeLossFunction(randomEffectOptimizationConfiguration)
-    val value = singleNodeSmoothedHingeLossFunction.value(
-      labeledPoints,
-      coefficients,
-      PhotonNonBroadcast(NORMALIZATION_CONTEXT))
-
-    // expectedValue = 6 + 1 * ((-2)^2 + 3^2) / 2 = 12.5
-    assertEquals(value, 12.5, EPSILON)
-  }
-
-  /**
-   * Verify the value of loss function with elastic net regularization.
-   */
-  @Test()
-  def testValueWithElasticNetRegularization(): Unit = {
-
-    val labeledPoints = Iterable(LABELED_POINT_1, LABELED_POINT_2)
-    val coefficients = COEFFICIENT_VECTOR
-
-    val randomEffectRegularizationContext = ElasticNetRegularizationContext(ALPHA)
-    val randomEffectOptimizationConfiguration = RandomEffectOptimizationConfiguration(
-      RANDOM_EFFECT_OPTIMIZER_CONFIG,
-      randomEffectRegularizationContext,
-      RANDOM_EFFECT_REGULARIZATION_WEIGHT)
-    val singleNodeSmoothedHingeLossFunction = SingleNodeSmoothedHingeLossFunction(randomEffectOptimizationConfiguration)
-    val value = singleNodeSmoothedHingeLossFunction.value(
-      labeledPoints,
-      coefficients,
-      PhotonNonBroadcast(NORMALIZATION_CONTEXT))
-
-    // L1 is computed by the optimizer.
-    // expectedValue = 6 + (1 - 0.4) * 1 * ((-2)^2 + 3^2) / 2 = 9.9
-    assertEquals(value, 9.9, EPSILON)
-  }
-}
-
-object SingleNodeSmoothedHingeLossFunctionTest {
-
-  private val RANDOM_EFFECT_OPTIMIZER_CONFIG = mock(classOf[OptimizerConfig])
-  private val LABELED_POINT_1 = new LabeledPoint(0, DenseVector(0.0, 1.0))
-  private val LABELED_POINT_2 = new LabeledPoint(1, DenseVector(1.0, 0.0))
-  private val COEFFICIENT_VECTOR = DenseVector(-2.0, 3.0)
-  private val NORMALIZATION_CONTEXT = NoNormalization()
-  private val RANDOM_EFFECT_REGULARIZATION_WEIGHT = 1D
-  private val ALPHA = 0.4
-  private val EPSILON = 1e-3
-}
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/function/svm/SmoothedHingeLossFunctionTest.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/function/svm/SmoothedHingeLossFunctionTest.scala
deleted file mode 100644
index 9d4eda16..00000000
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/function/svm/SmoothedHingeLossFunctionTest.scala
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright 2017 LinkedIn Corp. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License. You may obtain a
- * copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package com.linkedin.photon.ml.function.svm
-
-import org.testng.Assert._
-import org.testng.annotations.{DataProvider, Test}
-
-import com.linkedin.photon.ml.function.ObjectiveFunction
-import com.linkedin.photon.ml.optimization.{OptimizerConfig, OptimizerType}
-import com.linkedin.photon.ml.optimization.game.{CoordinateOptimizationConfiguration, FixedEffectOptimizationConfiguration, RandomEffectOptimizationConfiguration}
-import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
-
-/**
- * Unit tests for [[SmoothedHingeLossFunction]].
- */
-class SmoothedHingeLossFunctionTest {
-
-  import SmoothedHingeLossFunctionTest._
-
-  @DataProvider
-  def coordinateOptimizationProblemProvider(): Array[Array[Any]] = {
-
-    val optConfig = OptimizerConfig(OptimizerType.LBFGS, MAXIMUM_ITERATIONS, TOLERANCE)
-
-    Array(
-      Array(FixedEffectOptimizationConfiguration(optConfig)),
-      Array(RandomEffectOptimizationConfiguration(optConfig)))
-  }
-
-  /**
-   * Test that the [[ObjectiveFunction]] generated by the factory function returned by the [[SmoothedHingeLossFunction]]
-   * is of the appropriate type for the given coordinate optimization task.
-   *
-   * @param coordinateOptConfig The coordinate optimization task
-   */
-  @Test(dataProvider = "coordinateOptimizationProblemProvider")
-  def testBuildFactory(coordinateOptConfig: CoordinateOptimizationConfiguration): Unit = {
-
-    val objectiveFunctionFactory = SmoothedHingeLossFunction.buildFactory(TREE_AGGREGATE_DEPTH)(coordinateOptConfig)
-
-    coordinateOptConfig match {
-      case _: FixedEffectOptimizationConfiguration =>
-        assertTrue(
-          objectiveFunctionFactory
-            .isInstanceOf[(Option[GeneralizedLinearModel], Option[Int]) => DistributedSmoothedHingeLossFunction])
-
-      case _: RandomEffectOptimizationConfiguration =>
-        assertTrue(
-          objectiveFunctionFactory
-            .isInstanceOf[(Option[GeneralizedLinearModel], Option[Int]) => SingleNodeSmoothedHingeLossFunction])
-
-      case _ =>
-        assertTrue(false)
-    }
-  }
-}
-
-object SmoothedHingeLossFunctionTest {
-
-  val MAXIMUM_ITERATIONS = 1
-  val TOLERANCE = 2e-2
-  val TREE_AGGREGATE_DEPTH = 3
-}
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblemTest.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblemTest.scala
index ef4ab17b..a26cb435 100644
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblemTest.scala
+++ b/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/DistributedOptimizationProblemTest.scala
@@ -25,14 +25,15 @@ import org.testng.annotations.{DataProvider, Test}
 
 import com.linkedin.photon.ml.constants.MathConst
 import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.function.DistributedObjectiveFunction
-import com.linkedin.photon.ml.function.glm.DistributedGLMLossFunction
-import com.linkedin.photon.ml.function.svm.DistributedSmoothedHingeLossFunction
+import com.linkedin.photon.ml.function.glm.LogisticLossFunction
+import com.linkedin.photon.ml.function.{DistributedObjectiveFunction, L2RegularizationTwiceDiff}
 import com.linkedin.photon.ml.model.Coefficients
 import com.linkedin.photon.ml.normalization.{NoNormalization, NormalizationContext}
 import com.linkedin.photon.ml.optimization.VarianceComputationType.VarianceComputationType
+import com.linkedin.photon.ml.optimization.game.FixedEffectOptimizationConfiguration
 import com.linkedin.photon.ml.supervised.classification.LogisticRegressionModel
 import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
+import com.linkedin.photon.ml.test.CommonTestUtils
 import com.linkedin.photon.ml.util.BroadcastWrapper
 
 /**
@@ -40,6 +41,8 @@ import com.linkedin.photon.ml.util.BroadcastWrapper
  */
 class DistributedOptimizationProblemTest {
 
+  import DistributedOptimizationProblemTest._
+
   private val DIMENSIONS: Int = 5
 
   /**
@@ -48,16 +51,13 @@ class DistributedOptimizationProblemTest {
   @DataProvider
   def varianceInput(): Array[Array[Any]] = {
 
-    val mockDiffFunction = mock(classOf[DistributedSmoothedHingeLossFunction])
-    val mockTwiceDiffFunction = mock(classOf[DistributedGLMLossFunction])
-    val mockOptimizerDiff = mock(classOf[Optimizer[DistributedSmoothedHingeLossFunction]])
-    val mockOptimizerTwiceDiff = mock(classOf[Optimizer[DistributedGLMLossFunction]])
+    val mockTwiceDiffFunction = mock(classOf[DistributedObjectiveFunction])
+    val mockOptimizerTwiceDiff = mock(classOf[Optimizer[DistributedObjectiveFunction]])
     val mockStatesTracker = mock(classOf[OptimizationStatesTracker])
 
     val hessianDiagonal = DenseVector(Array(1D, 0D, 2D))
     val hessianMatrix = DenseMatrix.eye[Double](DIMENSIONS)
 
-    doReturn(mockStatesTracker).when(mockOptimizerDiff).getStateTracker
     doReturn(mockStatesTracker).when(mockOptimizerTwiceDiff).getStateTracker
     doReturn(hessianDiagonal)
       .when(mockTwiceDiffFunction)
@@ -70,12 +70,8 @@ class DistributedOptimizationProblemTest {
     val matrixVariance = DenseVector(Array.fill(DIMENSIONS)(1D))
 
     Array(
-      // var type, function, expected result
-      Array(VarianceComputationType.NONE, mockOptimizerDiff, mockDiffFunction, None),
       Array(VarianceComputationType.NONE, mockOptimizerTwiceDiff, mockTwiceDiffFunction, None),
-      Array(VarianceComputationType.SIMPLE, mockOptimizerDiff, mockDiffFunction, None),
       Array(VarianceComputationType.SIMPLE, mockOptimizerTwiceDiff, mockTwiceDiffFunction, Some(diagonalVariance)),
-      Array(VarianceComputationType.FULL, mockOptimizerDiff, mockDiffFunction, None),
       Array(VarianceComputationType.FULL, mockOptimizerTwiceDiff, mockTwiceDiffFunction, Some(matrixVariance)))
   }
 
@@ -125,8 +121,8 @@ class DistributedOptimizationProblemTest {
 
     val sparkContext = mock(classOf[SparkContext])
     val trainingData = mock(classOf[RDD[LabeledPoint]])
-    val objectiveFunction = mock(classOf[DistributedGLMLossFunction])
-    val optimizer = mock(classOf[Optimizer[DistributedGLMLossFunction]])
+    val objectiveFunction = mock(classOf[DistributedObjectiveFunction])
+    val optimizer = mock(classOf[Optimizer[DistributedObjectiveFunction]])
     val statesTracker = mock(classOf[OptimizationStatesTracker])
     val state = mock(classOf[OptimizerState])
     val broadcastCoefficients = mock(classOf[Broadcast[Vector[Double]]])
@@ -157,4 +153,101 @@ class DistributedOptimizationProblemTest {
 
     assertTrue(means.eq(model.coefficients.means))
   }
+
+  /**
+   * Test that regularization weights can be updated.
+   */
+  @Test
+  def testUpdateRegularizationWeight(): Unit = {
+
+    val normalization = NoNormalization()
+    val initL1Weight = 1D
+    val initL2Weight = 2D
+    val finalL1Weight = 3D
+    val finalL2Weight = 4D
+    val finalElasticWeight = 5D
+    val alpha = 0.75
+    val elasticFinalL1Weight = finalElasticWeight * alpha
+    val elasticFinalL2Weight = finalElasticWeight * (1 - alpha)
+
+    val normalizationMock = mock(classOf[BroadcastWrapper[NormalizationContext]])
+    val optimizer = mock(classOf[Optimizer[DistributedObjectiveFunction]])
+    val statesTracker = mock(classOf[OptimizationStatesTracker])
+    val objectiveFunction = mock(classOf[DistributedObjectiveFunction])
+
+    doReturn(normalization).when(normalizationMock).value
+    doReturn(statesTracker).when(optimizer).getStateTracker
+
+    val optimizerL1 = new OWLQN(initL1Weight, normalizationMock)
+    val objectiveFunctionL2 = MOCK_DISTRIBUTED_OBJECTIVE_FUNCTION
+    objectiveFunctionL2.l2RegularizationWeight = initL2Weight
+
+    val l1Problem = new DistributedOptimizationProblem(
+      optimizerL1,
+      objectiveFunction,
+      samplerOption = None,
+      LogisticRegressionModel.apply,
+      L1RegularizationContext,
+      VarianceComputationType.NONE)
+    val l2Problem = new DistributedOptimizationProblem(
+      optimizer,
+      objectiveFunctionL2,
+      samplerOption = None,
+      LogisticRegressionModel.apply,
+      L2RegularizationContext,
+      VarianceComputationType.NONE)
+    val elasticProblem = new DistributedOptimizationProblem(
+      optimizerL1,
+      objectiveFunctionL2,
+      samplerOption = None,
+      LogisticRegressionModel.apply,
+      ElasticNetRegularizationContext(alpha),
+      VarianceComputationType.NONE)
+
+    // Check update to L1/L2 weights individually
+    assertNotEquals(optimizerL1.l1RegularizationWeight, finalL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+    assertNotEquals(objectiveFunctionL2.l2RegularizationWeight, finalL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+    assertEquals(optimizerL1.l1RegularizationWeight, initL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+    assertEquals(objectiveFunctionL2.l2RegularizationWeight, initL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+
+    l1Problem.updateRegularizationWeight(finalL1Weight)
+    l2Problem.updateRegularizationWeight(finalL2Weight)
+
+    assertNotEquals(optimizerL1.l1RegularizationWeight, initL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+    assertNotEquals(objectiveFunctionL2.l2RegularizationWeight, initL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+    assertEquals(optimizerL1.l1RegularizationWeight, finalL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+    assertEquals(objectiveFunctionL2.l2RegularizationWeight, finalL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+
+    // Check updates to L1/L2 weights together
+    optimizerL1.l1RegularizationWeight = initL1Weight
+    objectiveFunctionL2.l2RegularizationWeight = initL2Weight
+
+    assertNotEquals(optimizerL1.l1RegularizationWeight, elasticFinalL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+    assertNotEquals(objectiveFunctionL2.l2RegularizationWeight, elasticFinalL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+    assertEquals(optimizerL1.l1RegularizationWeight, initL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+    assertEquals(objectiveFunctionL2.l2RegularizationWeight, initL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+
+    elasticProblem.updateRegularizationWeight(finalElasticWeight)
+
+    assertNotEquals(optimizerL1.l1RegularizationWeight, initL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+    assertNotEquals(objectiveFunctionL2.l2RegularizationWeight, initL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+    assertEquals(optimizerL1.l1RegularizationWeight, elasticFinalL1Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+    assertEquals(objectiveFunctionL2.l2RegularizationWeight, elasticFinalL2Weight, CommonTestUtils.HIGH_PRECISION_TOLERANCE)
+  }
+}
+
+object DistributedOptimizationProblemTest {
+
+  type L2DistributedObjectiveFunction = DistributedObjectiveFunction with L2RegularizationTwiceDiff
+
+  private val MOCK_OPTIMIZER_CONFIG = mock(classOf[OptimizerConfig])
+  private val MOCK_COORDINATE_CONFIG = FixedEffectOptimizationConfiguration(
+    MOCK_OPTIMIZER_CONFIG,
+    L2RegularizationContext)
+  private val MOCK_DISTRIBUTED_OBJECTIVE_FUNCTION = DistributedObjectiveFunction(
+      MOCK_COORDINATE_CONFIG,
+      LogisticLossFunction,
+      treeAggregateDepth = 1,
+      interceptIndexOpt = None)
+    .asInstanceOf[DistributedObjectiveFunction with L2RegularizationTwiceDiff]
 }
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/GeneralizedLinearOptimizationProblemTest.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/GeneralizedLinearOptimizationProblemTest.scala
index cde504b9..ad973775 100644
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/GeneralizedLinearOptimizationProblemTest.scala
+++ b/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/GeneralizedLinearOptimizationProblemTest.scala
@@ -14,21 +14,17 @@
  */
 package com.linkedin.photon.ml.optimization
 
-import scala.math.abs
-
-import breeze.linalg.{Vector, sum}
+import breeze.linalg.Vector
 import org.mockito.Mockito._
 import org.testng.Assert._
 import org.testng.annotations.Test
 
 import com.linkedin.photon.ml.data.LabeledPoint
 import com.linkedin.photon.ml.function._
-import com.linkedin.photon.ml.function.svm.SingleNodeSmoothedHingeLossFunction
 import com.linkedin.photon.ml.model.Coefficients
-import com.linkedin.photon.ml.supervised.classification.{LogisticRegressionModel, SmoothedHingeLossLinearSVMModel}
+import com.linkedin.photon.ml.supervised.classification.LogisticRegressionModel
 import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
 import com.linkedin.photon.ml.supervised.regression.{LinearRegressionModel, PoissonRegressionModel}
-import com.linkedin.photon.ml.test.CommonTestUtils
 import com.linkedin.photon.ml.test.CommonTestUtils.generateDenseVector
 
 /**
@@ -42,7 +38,7 @@ class GeneralizedLinearOptimizationProblemTest {
   def testInitializeZeroModel(): Unit = {
     val optimizer = mock(classOf[Optimizer[ObjectiveFunction]])
     val statesTracker = mock(classOf[OptimizationStatesTracker])
-    val objective = mock(classOf[SingleNodeSmoothedHingeLossFunction])
+    val objective = mock(classOf[SingleNodeObjectiveFunction])
     val regularization = NoRegularizationContext
 
     doReturn(statesTracker).when(optimizer).getStateTracker
@@ -62,16 +58,10 @@ class GeneralizedLinearOptimizationProblemTest {
       objective,
       PoissonRegressionModel.apply,
       regularization)
-    val hingeProblem = new MockOptimizationProblem(
-      optimizer,
-      objective,
-      SmoothedHingeLossLinearSVMModel.apply,
-      regularization)
 
     val logisticModel = logisticProblem.publicInitializeZeroModel(DIMENSION)
     val linearModel = linearProblem.publicInitializeZeroModel(DIMENSION)
     val poissonModel = poissonProblem.publicInitializeZeroModel(DIMENSION)
-    val hingeModel = hingeProblem.publicInitializeZeroModel(DIMENSION)
 
     assertTrue(logisticModel.isInstanceOf[LogisticRegressionModel])
     assertEquals(logisticModel.coefficients.means.length, DIMENSION)
@@ -81,16 +71,14 @@ class GeneralizedLinearOptimizationProblemTest {
 
     assertTrue(poissonModel.isInstanceOf[PoissonRegressionModel])
     assertEquals(poissonModel.coefficients.means.length, DIMENSION)
-
-    assertTrue(hingeModel.isInstanceOf[SmoothedHingeLossLinearSVMModel])
-    assertEquals(hingeModel.coefficients.means.length, DIMENSION)
   }
 
   @Test
   def testCreateModel(): Unit = {
+
     val optimizer = mock(classOf[Optimizer[ObjectiveFunction]])
     val statesTracker = mock(classOf[OptimizationStatesTracker])
-    val objective = mock(classOf[SingleNodeSmoothedHingeLossFunction])
+    val objective = mock(classOf[SingleNodeObjectiveFunction])
     val regularization = NoRegularizationContext
 
     doReturn(statesTracker).when(optimizer).getStateTracker
@@ -110,17 +98,11 @@ class GeneralizedLinearOptimizationProblemTest {
       objective,
       PoissonRegressionModel.apply,
       regularization)
-    val hingeProblem = new MockOptimizationProblem(
-      optimizer,
-      objective,
-      SmoothedHingeLossLinearSVMModel.apply,
-      regularization)
     val coefficients = generateDenseVector(DIMENSION)
 
     val logisticModel = logisticProblem.publicCreateModel(coefficients, None)
     val linearModel = linearProblem.publicCreateModel(coefficients, None)
     val poissonModel = poissonProblem.publicCreateModel(coefficients, None)
-    val hingeModel = hingeProblem.publicCreateModel(coefficients, None)
 
     assertTrue(logisticModel.isInstanceOf[LogisticRegressionModel])
     assertEquals(coefficients, logisticModel.coefficients.means)
@@ -130,70 +112,6 @@ class GeneralizedLinearOptimizationProblemTest {
 
     assertTrue(poissonModel.isInstanceOf[PoissonRegressionModel])
     assertEquals(coefficients, poissonModel.coefficients.means)
-
-    assertTrue(hingeModel.isInstanceOf[SmoothedHingeLossLinearSVMModel])
-    assertEquals(coefficients, hingeModel.coefficients.means)
-  }
-
-  @Test
-  def testGetRegularizationTermValue(): Unit = {
-    val coefficients = new Coefficients(generateDenseVector(DIMENSION))
-    val regWeight = 10D
-    val alpha = 0.25
-    val l1RegWeight = alpha * regWeight
-    val l2RegWeight = (1 - alpha) * regWeight
-    val expectedL1Term = sum(coefficients.means.map(abs)) * l1RegWeight
-    val expectedL2Term = coefficients.means.dot(coefficients.means) * l2RegWeight / 2.0
-    val expectedElasticNetTerm = expectedL1Term + expectedL2Term
-
-    val optimizerNoReg = mock(classOf[LBFGS])
-    val optimizerL1Reg = mock(classOf[OWLQN])
-    val objectiveNoReg = mock(classOf[SingleNodeSmoothedHingeLossFunction])
-    val objectiveL2Reg = mock(classOf[L2LossFunction])
-    val statesTracker = mock(classOf[OptimizationStatesTracker])
-    val initialModel = mock(classOf[GeneralizedLinearModel])
-
-    doReturn(statesTracker).when(optimizerNoReg).getStateTracker
-    doReturn(statesTracker).when(optimizerL1Reg).getStateTracker
-
-    val problemNone = new MockOptimizationProblem(
-      optimizerNoReg,
-      objectiveNoReg,
-      LogisticRegressionModel.apply,
-      NoRegularizationContext)
-    val problemL1 = new MockOptimizationProblem(
-      optimizerL1Reg,
-      objectiveNoReg,
-      LogisticRegressionModel.apply,
-      L1RegularizationContext)
-    val problemL2 = new MockOptimizationProblem(
-      optimizerNoReg,
-      objectiveL2Reg,
-      LogisticRegressionModel.apply,
-      L2RegularizationContext)
-    val problemElasticNet = new MockOptimizationProblem(
-      optimizerL1Reg,
-      objectiveL2Reg,
-      LogisticRegressionModel.apply,
-      ElasticNetRegularizationContext(alpha))
-
-    doReturn(l1RegWeight).when(optimizerL1Reg).l1RegularizationWeight
-    doReturn(l2RegWeight).when(objectiveL2Reg).l2RegularizationWeight
-    doReturn(coefficients).when(initialModel).coefficients
-
-    assertEquals(0.0, problemNone.getRegularizationTermValue(initialModel), CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(
-      expectedL1Term,
-      problemL1.getRegularizationTermValue(initialModel),
-      CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(
-      expectedL2Term,
-      problemL2.getRegularizationTermValue(initialModel),
-      CommonTestUtils.HIGH_PRECISION_TOLERANCE)
-    assertEquals(
-      expectedElasticNetTerm,
-      problemElasticNet.getRegularizationTermValue(initialModel),
-      CommonTestUtils.HIGH_PRECISION_TOLERANCE)
   }
 }
 
@@ -202,11 +120,11 @@ object GeneralizedLinearOptimizationProblemTest {
   private val DIMENSION = 10
 
   private class MockOptimizationProblem(
-      optimizer: Optimizer[SingleNodeSmoothedHingeLossFunction],
-      objectiveFunction: SingleNodeSmoothedHingeLossFunction,
+      optimizer: Optimizer[SingleNodeObjectiveFunction],
+      objectiveFunction: SingleNodeObjectiveFunction,
       glmConstructor: Coefficients => GeneralizedLinearModel,
       regularizationContext: RegularizationContext)
-    extends GeneralizedLinearOptimizationProblem[SingleNodeSmoothedHingeLossFunction](
+    extends GeneralizedLinearOptimizationProblem[SingleNodeObjectiveFunction](
       optimizer,
       objectiveFunction,
       glmConstructor,
@@ -251,7 +169,4 @@ object GeneralizedLinearOptimizationProblemTest {
     override def run(input: Iterable[LabeledPoint], initialModel: GeneralizedLinearModel): (GeneralizedLinearModel, OptimizationStatesTracker) =
       (mockGLM, mockStateTracker)
   }
-
-  // No way to pass Mixin class type to Mockito, need to define a concrete class
-  private class L2LossFunction extends SingleNodeSmoothedHingeLossFunction with L2RegularizationDiff
 }
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblemTest.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblemTest.scala
index 54459051..bd157484 100644
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblemTest.scala
+++ b/photon-api/src/test/scala/com/linkedin/photon/ml/optimization/SingleNodeOptimizationProblemTest.scala
@@ -23,8 +23,6 @@ import org.testng.annotations.{DataProvider, Test}
 import com.linkedin.photon.ml.constants.MathConst
 import com.linkedin.photon.ml.data.LabeledPoint
 import com.linkedin.photon.ml.function.SingleNodeObjectiveFunction
-import com.linkedin.photon.ml.function.glm.SingleNodeGLMLossFunction
-import com.linkedin.photon.ml.function.svm.SingleNodeSmoothedHingeLossFunction
 import com.linkedin.photon.ml.model.Coefficients
 import com.linkedin.photon.ml.normalization.{NoNormalization, NormalizationContext}
 import com.linkedin.photon.ml.optimization.VarianceComputationType.VarianceComputationType
@@ -45,16 +43,13 @@ class SingleNodeOptimizationProblemTest {
   @DataProvider
   def varianceInput(): Array[Array[Any]] = {
 
-    val mockDiffFunction = mock(classOf[SingleNodeSmoothedHingeLossFunction])
-    val mockTwiceDiffFunction = mock(classOf[SingleNodeGLMLossFunction])
-    val mockOptimizerDiff = mock(classOf[Optimizer[SingleNodeSmoothedHingeLossFunction]])
-    val mockOptimizerTwiceDiff = mock(classOf[Optimizer[SingleNodeGLMLossFunction]])
+    val mockTwiceDiffFunction = mock(classOf[SingleNodeObjectiveFunction])
+    val mockOptimizerTwiceDiff = mock(classOf[Optimizer[SingleNodeObjectiveFunction]])
     val mockStatesTracker = mock(classOf[OptimizationStatesTracker])
 
     val hessianDiagonal = DenseVector(Array(1D, 0D, 2D))
     val hessianMatrix = DenseMatrix.eye[Double](DIMENSIONS)
 
-    doReturn(mockStatesTracker).when(mockOptimizerDiff).getStateTracker
     doReturn(mockStatesTracker).when(mockOptimizerTwiceDiff).getStateTracker
     doReturn(hessianDiagonal)
       .when(mockTwiceDiffFunction)
@@ -67,12 +62,8 @@ class SingleNodeOptimizationProblemTest {
     val matrixVariance = DenseVector(Array.fill(DIMENSIONS)(1D))
 
     Array(
-      // var type, function, expected result
-      Array(VarianceComputationType.NONE, mockOptimizerDiff, mockDiffFunction, None),
       Array(VarianceComputationType.NONE, mockOptimizerTwiceDiff, mockTwiceDiffFunction, None),
-      Array(VarianceComputationType.SIMPLE, mockOptimizerDiff, mockDiffFunction, None),
       Array(VarianceComputationType.SIMPLE, mockOptimizerTwiceDiff, mockTwiceDiffFunction, Some(diagonalVariance)),
-      Array(VarianceComputationType.FULL, mockOptimizerDiff, mockDiffFunction, None),
       Array(VarianceComputationType.FULL, mockOptimizerTwiceDiff, mockTwiceDiffFunction, Some(matrixVariance)))
   }
 
@@ -112,8 +103,8 @@ class SingleNodeOptimizationProblemTest {
     val normalization = NoNormalization()
 
     val trainingData = mock(classOf[Iterable[LabeledPoint]])
-    val objectiveFunction = mock(classOf[SingleNodeGLMLossFunction])
-    val optimizer = mock(classOf[Optimizer[SingleNodeGLMLossFunction]])
+    val objectiveFunction = mock(classOf[SingleNodeObjectiveFunction])
+    val optimizer = mock(classOf[Optimizer[SingleNodeObjectiveFunction]])
     val statesTracker = mock(classOf[OptimizationStatesTracker])
     val state = mock(classOf[OptimizerState])
     val broadcastNormalization = mock(classOf[BroadcastWrapper[NormalizationContext]])
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/sampling/DownSamplerHelperTest.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/sampling/DownSamplerHelperTest.scala
index 4881c7c4..376df696 100644
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/sampling/DownSamplerHelperTest.scala
+++ b/photon-api/src/test/scala/com/linkedin/photon/ml/sampling/DownSamplerHelperTest.scala
@@ -32,8 +32,7 @@ class DownSamplerHelperTest {
     Array(
       Array(TaskType.LOGISTIC_REGRESSION),
       Array(TaskType.LINEAR_REGRESSION),
-      Array(TaskType.POISSON_REGRESSION),
-      Array(TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM))
+      Array(TaskType.POISSON_REGRESSION))
 
   /**
    * Test that the [[DownSampler]] generated by the factory function returned by the [[DownSamplerHelper]] is of the
@@ -52,7 +51,7 @@ class DownSamplerHelperTest {
       case TaskType.LINEAR_REGRESSION | TaskType.POISSON_REGRESSION =>
         assertTrue(downSampler.isInstanceOf[DefaultDownSampler])
 
-      case TaskType.LOGISTIC_REGRESSION | TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM =>
+      case TaskType.LOGISTIC_REGRESSION =>
         assertTrue(downSampler.isInstanceOf[BinaryClassificationDownSampler])
     }
   }
diff --git a/photon-api/src/test/scala/com/linkedin/photon/ml/util/GameTestUtils.scala b/photon-api/src/test/scala/com/linkedin/photon/ml/util/GameTestUtils.scala
index 2317b86f..a6a7e7cc 100644
--- a/photon-api/src/test/scala/com/linkedin/photon/ml/util/GameTestUtils.scala
+++ b/photon-api/src/test/scala/com/linkedin/photon/ml/util/GameTestUtils.scala
@@ -25,7 +25,8 @@ import com.linkedin.photon.ml.{SparkSessionConfiguration, TaskType}
 import com.linkedin.photon.ml.Types.{FeatureShardId, REId, REType, UniqueSampleId}
 import com.linkedin.photon.ml.algorithm.{FixedEffectCoordinate, RandomEffectCoordinate}
 import com.linkedin.photon.ml.data._
-import com.linkedin.photon.ml.function.glm.{DistributedGLMLossFunction, LogisticLossFunction, SingleNodeGLMLossFunction}
+import com.linkedin.photon.ml.function.{DistributedObjectiveFunction, SingleNodeObjectiveFunction}
+import com.linkedin.photon.ml.function.glm.LogisticLossFunction
 import com.linkedin.photon.ml.model.{Coefficients, FixedEffectModel, RandomEffectModel}
 import com.linkedin.photon.ml.normalization.NoNormalization
 import com.linkedin.photon.ml.optimization.OptimizerType.OptimizerType
@@ -144,13 +145,13 @@ trait GameTestUtils extends SparkTestUtils {
    *
    * @return A newly generated fixed effect optimization problem
    */
-  def generateFixedEffectOptimizationProblem: DistributedOptimizationProblem[DistributedGLMLossFunction] = {
+  def generateFixedEffectOptimizationProblem: DistributedOptimizationProblem[DistributedObjectiveFunction] = {
 
     val configuration = FixedEffectOptimizationConfiguration(generateOptimizerConfig())
 
     DistributedOptimizationProblem(
       configuration,
-      DistributedGLMLossFunction(configuration, LogisticLossFunction, treeAggregateDepth = 1),
+      DistributedObjectiveFunction(configuration, LogisticLossFunction, treeAggregateDepth = 1),
       None,
       LogisticRegressionModel.apply,
       PhotonBroadcast(sc.broadcast(NoNormalization())),
@@ -182,7 +183,7 @@ trait GameTestUtils extends SparkTestUtils {
       featureShardId: FeatureShardId,
       size: Int,
       dimensions: Int,
-      seed: Int = DefaultSeed): (FixedEffectCoordinate[DistributedGLMLossFunction], FixedEffectModel) = {
+      seed: Int = DefaultSeed): (FixedEffectCoordinate[DistributedObjectiveFunction], FixedEffectModel) = {
 
     val dataset = generateFixedEffectDataset(featureShardId, size, dimensions, seed)
     val optimizationProblem = generateFixedEffectOptimizationProblem
@@ -266,7 +267,7 @@ trait GameTestUtils extends SparkTestUtils {
    * @return A newly generated random effect optimization problem
    */
   def generateRandomEffectOptimizationProblem(
-      dataset: RandomEffectDataset): RandomEffectOptimizationProblem[SingleNodeGLMLossFunction] = {
+      dataset: RandomEffectDataset): RandomEffectOptimizationProblem[SingleNodeObjectiveFunction] = {
 
     val configuration = RandomEffectOptimizationConfiguration(generateOptimizerConfig())
     val normalizationBroadcast = sc.broadcast(NoNormalization())
@@ -275,7 +276,7 @@ trait GameTestUtils extends SparkTestUtils {
       .mapValues { _ =>
         SingleNodeOptimizationProblem(
           configuration,
-          SingleNodeGLMLossFunction(configuration, LogisticLossFunction),
+          SingleNodeObjectiveFunction(configuration, LogisticLossFunction),
           LogisticRegressionModel.apply,
           PhotonBroadcast(normalizationBroadcast),
           VarianceComputationType.NONE)
@@ -301,7 +302,7 @@ trait GameTestUtils extends SparkTestUtils {
       numEntities: Int,
       size: Int,
       dimensions: Int,
-      seed: Int = DefaultSeed): (RandomEffectCoordinate[SingleNodeGLMLossFunction], RandomEffectModel) = {
+      seed: Int = DefaultSeed): (RandomEffectCoordinate[SingleNodeObjectiveFunction], RandomEffectModel) = {
 
     val randomEffectIds = (1 to numEntities).map("re" + _)
 
diff --git a/photon-client/src/integTest/scala/com/linkedin/photon/ml/data/DataValidatorsIntegTest.scala b/photon-client/src/integTest/scala/com/linkedin/photon/ml/data/DataValidatorsIntegTest.scala
index 4587e6ee..7f8b3c23 100644
--- a/photon-client/src/integTest/scala/com/linkedin/photon/ml/data/DataValidatorsIntegTest.scala
+++ b/photon-client/src/integTest/scala/com/linkedin/photon/ml/data/DataValidatorsIntegTest.scala
@@ -97,16 +97,6 @@ class DataValidatorsIntegTest extends SparkTestUtils {
       Array(Seq(negativeLabel), TaskType.LOGISTIC_REGRESSION, DataValidationType.VALIDATE_FULL, false),
       Array(Seq(nanLabel), TaskType.LOGISTIC_REGRESSION, DataValidationType.VALIDATE_FULL, false),
 
-      // Test smoothed hinge loss checks for binary label
-      Array(
-        Seq(binaryLabel, zeroLabel),
-        TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM,
-        DataValidationType.VALIDATE_FULL,
-        true),
-      Array(Seq(positiveLabel), TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM, DataValidationType.VALIDATE_FULL, false),
-      Array(Seq(negativeLabel), TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM, DataValidationType.VALIDATE_FULL, false),
-      Array(Seq(nanLabel), TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM, DataValidationType.VALIDATE_FULL, false),
-
       // Test Poisson regression checks for non-negative label
       Array(
         Seq(positiveLabel, binaryLabel, zeroLabel),
@@ -119,19 +109,16 @@ class DataValidatorsIntegTest extends SparkTestUtils {
       // Test all task types require finite features
       Array(Seq(badFeatures), TaskType.LINEAR_REGRESSION, DataValidationType.VALIDATE_FULL, false),
       Array(Seq(badFeatures), TaskType.LOGISTIC_REGRESSION, DataValidationType.VALIDATE_FULL, false),
-      Array(Seq(badFeatures), TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM, DataValidationType.VALIDATE_FULL, false),
       Array(Seq(badFeatures), TaskType.POISSON_REGRESSION, DataValidationType.VALIDATE_FULL, false),
 
       // Test all task types require finite offset
       Array(Seq(badOffset), TaskType.LINEAR_REGRESSION, DataValidationType.VALIDATE_FULL, false),
       Array(Seq(badOffset), TaskType.LOGISTIC_REGRESSION, DataValidationType.VALIDATE_FULL, false),
-      Array(Seq(badOffset), TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM, DataValidationType.VALIDATE_FULL, false),
       Array(Seq(badOffset), TaskType.POISSON_REGRESSION, DataValidationType.VALIDATE_FULL, false),
 
       // Test all task types require valid weight
       Array(Seq(badWeight), TaskType.LINEAR_REGRESSION, DataValidationType.VALIDATE_FULL, false),
       Array(Seq(badWeight), TaskType.LOGISTIC_REGRESSION, DataValidationType.VALIDATE_FULL, false),
-      Array(Seq(badWeight), TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM, DataValidationType.VALIDATE_FULL, false),
       Array(Seq(badWeight), TaskType.POISSON_REGRESSION, DataValidationType.VALIDATE_FULL, false),
 
       // Test that even one bad sample causes failure
@@ -195,16 +182,6 @@ class DataValidatorsIntegTest extends SparkTestUtils {
       Array(Seq(negativeLabel), TaskType.LOGISTIC_REGRESSION, DataValidationType.VALIDATE_FULL, false),
       Array(Seq(nanLabel), TaskType.LOGISTIC_REGRESSION, DataValidationType.VALIDATE_FULL, false),
 
-      // Test smoothed hinge loss checks for binary label
-      Array(
-        Seq(binaryLabel, zeroLabel),
-        TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM,
-        DataValidationType.VALIDATE_FULL,
-        true),
-      Array(Seq(positiveLabel), TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM, DataValidationType.VALIDATE_FULL, false),
-      Array(Seq(negativeLabel), TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM, DataValidationType.VALIDATE_FULL, false),
-      Array(Seq(nanLabel), TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM, DataValidationType.VALIDATE_FULL, false),
-
       // Test Poisson regression checks for non-negative label
       Array(
         Seq(positiveLabel, binaryLabel, zeroLabel),
@@ -221,9 +198,6 @@ class DataValidatorsIntegTest extends SparkTestUtils {
       Array(Seq(badFeatures1), TaskType.LOGISTIC_REGRESSION, DataValidationType.VALIDATE_FULL, false),
       Array(Seq(badFeatures2), TaskType.LOGISTIC_REGRESSION, DataValidationType.VALIDATE_FULL, false),
       Array(Seq(badFeaturesBoth), TaskType.LOGISTIC_REGRESSION, DataValidationType.VALIDATE_FULL, false),
-      Array(Seq(badFeatures1), TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM, DataValidationType.VALIDATE_FULL, false),
-      Array(Seq(badFeatures2), TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM, DataValidationType.VALIDATE_FULL, false),
-      Array(Seq(badFeaturesBoth), TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM, DataValidationType.VALIDATE_FULL, false),
       Array(Seq(badFeatures1), TaskType.POISSON_REGRESSION, DataValidationType.VALIDATE_FULL, false),
       Array(Seq(badFeatures2), TaskType.POISSON_REGRESSION, DataValidationType.VALIDATE_FULL, false),
       Array(Seq(badFeaturesBoth), TaskType.POISSON_REGRESSION, DataValidationType.VALIDATE_FULL, false),
@@ -231,13 +205,11 @@ class DataValidatorsIntegTest extends SparkTestUtils {
       // Test all task types require finite offset
       Array(Seq(badOffset), TaskType.LINEAR_REGRESSION, DataValidationType.VALIDATE_FULL, false),
       Array(Seq(badOffset), TaskType.LOGISTIC_REGRESSION, DataValidationType.VALIDATE_FULL, false),
-      Array(Seq(badOffset), TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM, DataValidationType.VALIDATE_FULL, false),
       Array(Seq(badOffset), TaskType.POISSON_REGRESSION, DataValidationType.VALIDATE_FULL, false),
 
       // Test all task types require valid weight
       Array(Seq(badWeight), TaskType.LINEAR_REGRESSION, DataValidationType.VALIDATE_FULL, false),
       Array(Seq(badWeight), TaskType.LOGISTIC_REGRESSION, DataValidationType.VALIDATE_FULL, false),
-      Array(Seq(badWeight), TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM, DataValidationType.VALIDATE_FULL, false),
       Array(Seq(badWeight), TaskType.POISSON_REGRESSION, DataValidationType.VALIDATE_FULL, false),
 
       // Test that even one bad sample causes failure
diff --git a/photon-client/src/main/scala/com/linkedin/photon/ml/Driver.scala b/photon-client/src/main/scala/com/linkedin/photon/ml/Driver.scala
index 0ca94694..a16ac8d2 100644
--- a/photon-client/src/main/scala/com/linkedin/photon/ml/Driver.scala
+++ b/photon-client/src/main/scala/com/linkedin/photon/ml/Driver.scala
@@ -33,7 +33,7 @@ import com.linkedin.photon.ml.model.Coefficients
 import com.linkedin.photon.ml.normalization.{NoNormalization, NormalizationContext, NormalizationType}
 import com.linkedin.photon.ml.optimization.{OptimizationStatesTracker, RegularizationContext}
 import com.linkedin.photon.ml.stat.FeatureDataStatistics
-import com.linkedin.photon.ml.supervised.classification.{LogisticRegressionModel, SmoothedHingeLossLinearSVMModel}
+import com.linkedin.photon.ml.supervised.classification.LogisticRegressionModel
 import com.linkedin.photon.ml.supervised.model.GeneralizedLinearModel
 import com.linkedin.photon.ml.supervised.regression.{LinearRegressionModel, PoissonRegressionModel}
 import com.linkedin.photon.ml.util.{IOUtils, PhotonLogger, Utils}
@@ -428,9 +428,6 @@ protected[ml] class Driver(
       case TaskType.LOGISTIC_REGRESSION =>
         val models = lambdaModelAndTrackers.map(x => (x._1, x._2.asInstanceOf[LogisticRegressionModel]))
         ModelSelection.selectBestLinearClassifier(models, perModelMetrics)
-      case TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM =>
-        val models = lambdaModelAndTrackers.map(x => (x._1, x._2.asInstanceOf[SmoothedHingeLossLinearSVMModel]))
-        ModelSelection.selectBestLinearClassifier(models, perModelMetrics)
     }
     val bestModelDir = new Path(params.outputDir, BEST_MODEL_TEXT).toString
 
diff --git a/photon-client/src/main/scala/com/linkedin/photon/ml/data/DataValidators.scala b/photon-client/src/main/scala/com/linkedin/photon/ml/data/DataValidators.scala
index a51dca40..fc90c26b 100644
--- a/photon-client/src/main/scala/com/linkedin/photon/ml/data/DataValidators.scala
+++ b/photon-client/src/main/scala/com/linkedin/photon/ml/data/DataValidators.scala
@@ -243,7 +243,6 @@ object DataValidators extends Logging {
       case TaskType.LINEAR_REGRESSION => linearRegressionValidators
       case TaskType.LOGISTIC_REGRESSION => logisticRegressionValidators
       case TaskType.POISSON_REGRESSION => poissonRegressionValidators
-      case TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM => logisticRegressionValidators
     }
 
     // Check the data properties
@@ -328,7 +327,6 @@ object DataValidators extends Logging {
       case TaskType.LINEAR_REGRESSION => dataFrameLinearRegressionValidators
       case TaskType.LOGISTIC_REGRESSION => dataFrameLogisticRegressionValidators
       case TaskType.POISSON_REGRESSION => dataFramePoissonRegressionValidators
-      case TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM => dataFrameLogisticRegressionValidators
     }
 
     // Check the data properties
diff --git a/photon-client/src/main/scala/com/linkedin/photon/ml/util/Utils.scala b/photon-client/src/main/scala/com/linkedin/photon/ml/util/Utils.scala
index 76f92a23..179a7337 100644
--- a/photon-client/src/main/scala/com/linkedin/photon/ml/util/Utils.scala
+++ b/photon-client/src/main/scala/com/linkedin/photon/ml/util/Utils.scala
@@ -312,7 +312,6 @@ object Utils {
     case RMSE.name => RMSE
     case LogisticLoss.name | "LOGISTICLOSS" => LogisticLoss
     case PoissonLoss.name | "POISSONLOSS" => PoissonLoss
-    case SmoothedHingeLoss.name | "SMOOTHEDHINGELOSS" => SmoothedHingeLoss
     case SquaredLoss.name | "SQUAREDLOSS" => SquaredLoss
     case MultiPrecisionAtK.batchPrecisionAtKPattern(k, _) =>
       val MultiPrecisionAtK.batchPrecisionAtKPattern(_, idName) = name.trim
diff --git a/photon-client/src/test/scala/com/linkedin/photon/ml/util/UtilsTest.scala b/photon-client/src/test/scala/com/linkedin/photon/ml/util/UtilsTest.scala
index 37efac64..e4f83e06 100644
--- a/photon-client/src/test/scala/com/linkedin/photon/ml/util/UtilsTest.scala
+++ b/photon-client/src/test/scala/com/linkedin/photon/ml/util/UtilsTest.scala
@@ -417,11 +417,6 @@ class UtilsTest extends TestTemplateWithTmpDir {
     val poissonLoss2 = "pOISson_lOSS"
     assertEquals(PoissonLoss, Utils.evaluatorWithName(poissonLoss2))
 
-    val smoothedHingeLoss1 = "  sMooThEDHingELoss"
-    assertEquals(SmoothedHingeLoss, Utils.evaluatorWithName(smoothedHingeLoss1))
-    val smoothedHingeLoss2 = "SmOOTheD_Hinge_LOSS"
-    assertEquals(SmoothedHingeLoss, Utils.evaluatorWithName(smoothedHingeLoss2))
-
     val squareLoss1 = "sQUAREDlosS "
     assertEquals(SquaredLoss, Utils.evaluatorWithName(squareLoss1))
     val squareLoss2 = "SquAREd_LOss"
diff --git a/photon-lib/src/integTest/scala/com/linkedin/photon/ml/optimization/IntegTestObjective.scala b/photon-lib/src/integTest/scala/com/linkedin/photon/ml/optimization/IntegTestObjective.scala
index 8fd6d3f4..91a9206f 100644
--- a/photon-lib/src/integTest/scala/com/linkedin/photon/ml/optimization/IntegTestObjective.scala
+++ b/photon-lib/src/integTest/scala/com/linkedin/photon/ml/optimization/IntegTestObjective.scala
@@ -22,6 +22,7 @@ import com.linkedin.photon.ml.data.LabeledPoint
 import com.linkedin.photon.ml.function.{ObjectiveFunction, TwiceDiffFunction}
 import com.linkedin.photon.ml.model.Coefficients
 import com.linkedin.photon.ml.normalization.NormalizationContext
+import com.linkedin.photon.ml.optimization.TestObjective.MockPointwiseLossFunction
 import com.linkedin.photon.ml.util.{BroadcastWrapper, VectorUtils}
 
 /**
@@ -29,7 +30,9 @@ import com.linkedin.photon.ml.util.{BroadcastWrapper, VectorUtils}
  *
  * This function has known minimum at [[IntegTestObjective.CENTROID]].
  */
-class IntegTestObjective(sc: SparkContext, treeAggregateDepth: Int) extends ObjectiveFunction with TwiceDiffFunction {
+class IntegTestObjective(sc: SparkContext, treeAggregateDepth: Int)
+  extends ObjectiveFunction(new MockPointwiseLossFunction)
+    with TwiceDiffFunction {
 
   type Data = RDD[LabeledPoint]
 
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/TaskType.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/TaskType.scala
index 9f9dbc2a..cf86b0e5 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/TaskType.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/TaskType.scala
@@ -21,5 +21,5 @@ object TaskType extends Enumeration {
 
   type TaskType = Value
 
-  val LINEAR_REGRESSION, POISSON_REGRESSION, LOGISTIC_REGRESSION, SMOOTHED_HINGE_LOSS_LINEAR_SVM = Value
+  val LINEAR_REGRESSION, POISSON_REGRESSION, LOGISTIC_REGRESSION = Value
 }
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/evaluation/EvaluatorType.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/evaluation/EvaluatorType.scala
index 99c7d09e..b5e299fd 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/evaluation/EvaluatorType.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/evaluation/EvaluatorType.scala
@@ -54,13 +54,12 @@ trait EvaluatorType {
 object EvaluatorType {
 
   // Comparable to the valueSet, if this were an enumeration
-  val all: Seq[EvaluatorType] = Seq(AUC, AUPR, RMSE, LogisticLoss, PoissonLoss, SmoothedHingeLoss, SquaredLoss)
+  val all: Seq[EvaluatorType] = Seq(AUC, AUPR, RMSE, LogisticLoss, PoissonLoss, SquaredLoss)
 
   case object AUC extends EvaluatorType { val name = "AUC"; val op = MathUtils.greaterThan _ }
   case object AUPR extends EvaluatorType { val name = "AUPR"; val op = MathUtils.greaterThan _ }
   case object RMSE extends EvaluatorType { val name = "RMSE"; val op = MathUtils.lessThan _ }
   case object LogisticLoss extends EvaluatorType { val name = "LOGISTIC_LOSS"; val op = MathUtils.lessThan _ }
   case object PoissonLoss extends EvaluatorType { val name = "POISSON_LOSS"; val op = MathUtils.lessThan _ }
-  case object SmoothedHingeLoss extends EvaluatorType { val name = "SMOOTHED_HINGE_LOSS"; val op = MathUtils.lessThan _ }
   case object SquaredLoss extends EvaluatorType { val name = "SQUARED_LOSS"; val op = MathUtils.lessThan _ }
 }
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/ObjectiveFunction.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/function/ObjectiveFunction.scala
index 24561cde..68a3c9c1 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/ObjectiveFunction.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/function/ObjectiveFunction.scala
@@ -16,13 +16,14 @@ package com.linkedin.photon.ml.function
 
 import breeze.linalg.Vector
 
+import com.linkedin.photon.ml.function.glm.PointwiseLossFunction
 import com.linkedin.photon.ml.normalization.NormalizationContext
 import com.linkedin.photon.ml.util.BroadcastWrapper
 
 /**
  * The base objective function class for an optimization problem.
  */
-abstract class ObjectiveFunction {
+abstract class ObjectiveFunction(singlePointLossFunction: PointwiseLossFunction) {
 
   type Data
 
diff --git a/photon-lib/src/test/scala/com/linkedin/photon/ml/evaluation/EvaluatorTypeTest.scala b/photon-lib/src/test/scala/com/linkedin/photon/ml/evaluation/EvaluatorTypeTest.scala
index 7e54adc1..e9088718 100644
--- a/photon-lib/src/test/scala/com/linkedin/photon/ml/evaluation/EvaluatorTypeTest.scala
+++ b/photon-lib/src/test/scala/com/linkedin/photon/ml/evaluation/EvaluatorTypeTest.scala
@@ -53,11 +53,6 @@ class EvaluatorTypeTest {
     assertFalse(EvaluatorType.PoissonLoss.betterThan(1D, 1D))
     assertFalse(EvaluatorType.PoissonLoss.betterThan(1D, 0D))
 
-    // Smoothed Hinge Loss
-    assertTrue(EvaluatorType.SmoothedHingeLoss.betterThan(0D, 1D))
-    assertFalse(EvaluatorType.SmoothedHingeLoss.betterThan(1D, 1D))
-    assertFalse(EvaluatorType.SmoothedHingeLoss.betterThan(1D, 0D))
-
     // Squared Loss
     assertTrue(EvaluatorType.SquaredLoss.betterThan(0D, 1D))
     assertFalse(EvaluatorType.SquaredLoss.betterThan(1D, 1D))
diff --git a/photon-lib/src/test/scala/com/linkedin/photon/ml/function/L2RegularizationTest.scala b/photon-lib/src/test/scala/com/linkedin/photon/ml/function/L2RegularizationTest.scala
index c70556a1..fc2d5c85 100644
--- a/photon-lib/src/test/scala/com/linkedin/photon/ml/function/L2RegularizationTest.scala
+++ b/photon-lib/src/test/scala/com/linkedin/photon/ml/function/L2RegularizationTest.scala
@@ -20,6 +20,7 @@ import org.testng.annotations.Test
 import org.testng.Assert.assertEquals
 
 import com.linkedin.photon.ml.normalization.NormalizationContext
+import com.linkedin.photon.ml.optimization.TestObjective
 import com.linkedin.photon.ml.util.BroadcastWrapper
 
 /**
@@ -109,7 +110,9 @@ object L2RegularizationTest {
   /**
    * Mock [[ObjectiveFunction]] for testing [[L2Regularization]].
    */
-  class MockObjectiveFunction extends ObjectiveFunction with TwiceDiffFunction {
+  class MockObjectiveFunction
+    extends ObjectiveFunction(new TestObjective.MockPointwiseLossFunction)
+      with TwiceDiffFunction {
 
     import MockObjectiveFunction._
 
diff --git a/photon-lib/src/test/scala/com/linkedin/photon/ml/optimization/TestObjective.scala b/photon-lib/src/test/scala/com/linkedin/photon/ml/optimization/TestObjective.scala
index e64fed87..0e26d55f 100644
--- a/photon-lib/src/test/scala/com/linkedin/photon/ml/optimization/TestObjective.scala
+++ b/photon-lib/src/test/scala/com/linkedin/photon/ml/optimization/TestObjective.scala
@@ -17,6 +17,7 @@ package com.linkedin.photon.ml.optimization
 import breeze.linalg.{DenseMatrix, Vector, sum}
 
 import com.linkedin.photon.ml.data.LabeledPoint
+import com.linkedin.photon.ml.function.glm.PointwiseLossFunction
 import com.linkedin.photon.ml.function.{ObjectiveFunction, TwiceDiffFunction}
 import com.linkedin.photon.ml.model.Coefficients
 import com.linkedin.photon.ml.normalization.NormalizationContext
@@ -25,7 +26,7 @@ import com.linkedin.photon.ml.util.{BroadcastWrapper, VectorUtils}
 /**
  * Test objective function used solely to exercise the optimizers.
  */
-class TestObjective extends ObjectiveFunction with TwiceDiffFunction {
+class TestObjective extends ObjectiveFunction(new TestObjective.MockPointwiseLossFunction) with TwiceDiffFunction {
 
   type Data = Iterable[LabeledPoint]
 
@@ -131,6 +132,13 @@ object TestObjective {
 
   val CENTROID = 4.0
 
+  class MockPointwiseLossFunction extends PointwiseLossFunction {
+
+    def lossAndDzLoss(margin: Double, label: Double): (Double, Double) = (0, 0)
+
+    def DzzLoss(margin: Double, label: Double): Double = 0
+  }
+
   /**
    * Compute the value and gradient at a single data point. Since the function has known minimum, the input data is
    * ignored.

From ca7636632482900042df57b1c80c851eb0c75a04 Mon Sep 17 00:00:00 2001
From: Alex Shelkovnykov <Alex.Shelkovnykov@protonmail.com>
Date: Thu, 7 May 2020 12:13:20 -0400
Subject: [PATCH 2/2] Refactor package location of aggregators

---
 .../com/linkedin/photon/ml/SparkSessionConfiguration.scala    | 4 +++-
 .../photon/ml/function/DistributedObjectiveFunction.scala     | 3 ++-
 .../photon/ml/function/SingleNodeObjectiveFunction.scala      | 3 ++-
 .../glm => aggregators}/HessianDiagonalAggregator.scala       | 3 ++-
 .../glm => aggregators}/HessianMatrixAggregator.scala         | 3 ++-
 .../glm => aggregators}/HessianVectorAggregator.scala         | 3 ++-
 .../glm => aggregators}/ValueAndGradientAggregator.scala      | 3 ++-
 7 files changed, 15 insertions(+), 7 deletions(-)
 rename photon-lib/src/main/scala/com/linkedin/photon/ml/{function/glm => aggregators}/HessianDiagonalAggregator.scala (98%)
 rename photon-lib/src/main/scala/com/linkedin/photon/ml/{function/glm => aggregators}/HessianMatrixAggregator.scala (98%)
 rename photon-lib/src/main/scala/com/linkedin/photon/ml/{function/glm => aggregators}/HessianVectorAggregator.scala (98%)
 rename photon-lib/src/main/scala/com/linkedin/photon/ml/{function/glm => aggregators}/ValueAndGradientAggregator.scala (99%)

diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/SparkSessionConfiguration.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/SparkSessionConfiguration.scala
index b8f1be57..d51a8d19 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/SparkSessionConfiguration.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/SparkSessionConfiguration.scala
@@ -21,9 +21,9 @@ import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.SparkConf
 
+import com.linkedin.photon.ml.aggregators._
 import com.linkedin.photon.ml.data.{GameDatum, LabeledPoint, LocalDataset}
 import com.linkedin.photon.ml.function._
-import com.linkedin.photon.ml.function.glm.{HessianVectorAggregator, ValueAndGradientAggregator}
 import com.linkedin.photon.ml.model.Coefficients
 import com.linkedin.photon.ml.normalization.NormalizationContext
 import com.linkedin.photon.ml.optimization._
@@ -52,6 +52,8 @@ object SparkSessionConfiguration {
     classOf[GeneralizedLinearModel],
     classOf[GeneralizedLinearOptimizationProblem[_]],
     classOf[GLMOptimizationConfiguration],
+    classOf[HessianDiagonalAggregator],
+    classOf[HessianMatrixAggregator],
     classOf[HessianVectorAggregator],
     classOf[LinearSubspaceProjector],
     classOf[LabeledPoint],
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/DistributedObjectiveFunction.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/DistributedObjectiveFunction.scala
index 78b5bcf2..42fe3ec5 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/DistributedObjectiveFunction.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/function/DistributedObjectiveFunction.scala
@@ -17,8 +17,9 @@ package com.linkedin.photon.ml.function
 import breeze.linalg.{DenseMatrix, Vector}
 import org.apache.spark.rdd.RDD
 
+import com.linkedin.photon.ml.aggregators._
 import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.function.glm.{HessianDiagonalAggregator, HessianMatrixAggregator, HessianVectorAggregator, PointwiseLossFunction, ValueAndGradientAggregator}
+import com.linkedin.photon.ml.function.glm.PointwiseLossFunction
 import com.linkedin.photon.ml.model.{Coefficients => ModelCoefficients}
 import com.linkedin.photon.ml.normalization.NormalizationContext
 import com.linkedin.photon.ml.optimization.RegularizationType
diff --git a/photon-api/src/main/scala/com/linkedin/photon/ml/function/SingleNodeObjectiveFunction.scala b/photon-api/src/main/scala/com/linkedin/photon/ml/function/SingleNodeObjectiveFunction.scala
index 2433c2f8..5f88c992 100644
--- a/photon-api/src/main/scala/com/linkedin/photon/ml/function/SingleNodeObjectiveFunction.scala
+++ b/photon-api/src/main/scala/com/linkedin/photon/ml/function/SingleNodeObjectiveFunction.scala
@@ -16,8 +16,9 @@ package com.linkedin.photon.ml.function
 
 import breeze.linalg.{DenseMatrix, Vector}
 
+import com.linkedin.photon.ml.aggregators._
 import com.linkedin.photon.ml.data.LabeledPoint
-import com.linkedin.photon.ml.function.glm.{HessianDiagonalAggregator, HessianMatrixAggregator, HessianVectorAggregator, PointwiseLossFunction, ValueAndGradientAggregator}
+import com.linkedin.photon.ml.function.glm.PointwiseLossFunction
 import com.linkedin.photon.ml.model.{Coefficients => ModelCoefficients}
 import com.linkedin.photon.ml.normalization.NormalizationContext
 import com.linkedin.photon.ml.optimization.RegularizationType
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/glm/HessianDiagonalAggregator.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/aggregators/HessianDiagonalAggregator.scala
similarity index 98%
rename from photon-lib/src/main/scala/com/linkedin/photon/ml/function/glm/HessianDiagonalAggregator.scala
rename to photon-lib/src/main/scala/com/linkedin/photon/ml/aggregators/HessianDiagonalAggregator.scala
index 0d7767f9..71225864 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/glm/HessianDiagonalAggregator.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/aggregators/HessianDiagonalAggregator.scala
@@ -12,12 +12,13 @@
  * License for the specific language governing permissions and limitations
  * under the License.
  */
-package com.linkedin.photon.ml.function.glm
+package com.linkedin.photon.ml.aggregators
 
 import breeze.linalg._
 import org.apache.spark.rdd.RDD
 
 import com.linkedin.photon.ml.data.LabeledPoint
+import com.linkedin.photon.ml.function.glm.PointwiseLossFunction
 import com.linkedin.photon.ml.util.{BroadcastWrapper, PhotonBroadcast, PhotonNonBroadcast}
 
 /**
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/glm/HessianMatrixAggregator.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/aggregators/HessianMatrixAggregator.scala
similarity index 98%
rename from photon-lib/src/main/scala/com/linkedin/photon/ml/function/glm/HessianMatrixAggregator.scala
rename to photon-lib/src/main/scala/com/linkedin/photon/ml/aggregators/HessianMatrixAggregator.scala
index 16eb1f49..0f9ac06c 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/glm/HessianMatrixAggregator.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/aggregators/HessianMatrixAggregator.scala
@@ -12,12 +12,13 @@
  * License for the specific language governing permissions and limitations
  * under the License.
  */
-package com.linkedin.photon.ml.function.glm
+package com.linkedin.photon.ml.aggregators
 
 import breeze.linalg._
 import org.apache.spark.rdd.RDD
 
 import com.linkedin.photon.ml.data.LabeledPoint
+import com.linkedin.photon.ml.function.glm.PointwiseLossFunction
 import com.linkedin.photon.ml.util.{BroadcastWrapper, PhotonBroadcast, PhotonNonBroadcast}
 
 /**
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/glm/HessianVectorAggregator.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/aggregators/HessianVectorAggregator.scala
similarity index 98%
rename from photon-lib/src/main/scala/com/linkedin/photon/ml/function/glm/HessianVectorAggregator.scala
rename to photon-lib/src/main/scala/com/linkedin/photon/ml/aggregators/HessianVectorAggregator.scala
index 4038541d..2464ad5f 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/glm/HessianVectorAggregator.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/aggregators/HessianVectorAggregator.scala
@@ -12,12 +12,13 @@
  * License for the specific language governing permissions and limitations
  * under the License.
  */
-package com.linkedin.photon.ml.function.glm
+package com.linkedin.photon.ml.aggregators
 
 import breeze.linalg.{Vector, axpy}
 import org.apache.spark.rdd.RDD
 
 import com.linkedin.photon.ml.data.LabeledPoint
+import com.linkedin.photon.ml.function.glm.PointwiseLossFunction
 import com.linkedin.photon.ml.normalization.NormalizationContext
 import com.linkedin.photon.ml.util.{BroadcastWrapper, PhotonBroadcast, PhotonNonBroadcast}
 
diff --git a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/glm/ValueAndGradientAggregator.scala b/photon-lib/src/main/scala/com/linkedin/photon/ml/aggregators/ValueAndGradientAggregator.scala
similarity index 99%
rename from photon-lib/src/main/scala/com/linkedin/photon/ml/function/glm/ValueAndGradientAggregator.scala
rename to photon-lib/src/main/scala/com/linkedin/photon/ml/aggregators/ValueAndGradientAggregator.scala
index b8344d26..4166f44e 100644
--- a/photon-lib/src/main/scala/com/linkedin/photon/ml/function/glm/ValueAndGradientAggregator.scala
+++ b/photon-lib/src/main/scala/com/linkedin/photon/ml/aggregators/ValueAndGradientAggregator.scala
@@ -12,12 +12,13 @@
  * License for the specific language governing permissions and limitations
  * under the License.
  */
-package com.linkedin.photon.ml.function.glm
+package com.linkedin.photon.ml.aggregators
 
 import breeze.linalg.{DenseVector, Vector, axpy}
 import org.apache.spark.rdd.RDD
 
 import com.linkedin.photon.ml.data.LabeledPoint
+import com.linkedin.photon.ml.function.glm.PointwiseLossFunction
 import com.linkedin.photon.ml.normalization.NormalizationContext
 import com.linkedin.photon.ml.util.{BroadcastWrapper, PhotonBroadcast, PhotonNonBroadcast}