Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove experimental SmoothedHingeLoss training task #455

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import org.testng.annotations.{DataProvider, Test}

import com.linkedin.photon.ml.constants.MathConst
import com.linkedin.photon.ml.data.RandomEffectDataset
import com.linkedin.photon.ml.function.glm.SingleNodeGLMLossFunction
import com.linkedin.photon.ml.function.SingleNodeObjectiveFunction
import com.linkedin.photon.ml.model.{Coefficients, RandomEffectModel}
import com.linkedin.photon.ml.optimization.game.RandomEffectOptimizationProblem
import com.linkedin.photon.ml.projector.{LinearSubspaceProjector, LinearSubspaceProjectorTest}
Expand All @@ -36,7 +36,6 @@ class RandomEffectCoordinateIntegTest extends SparkTestUtils with GameTestUtils

import RandomEffectCoordinateIntegTest._


/**
*
*/
Expand Down Expand Up @@ -64,7 +63,7 @@ class RandomEffectCoordinateIntegTest extends SparkTestUtils with GameTestUtils
"someShard")

val mockRandomEffectDataset = mock(classOf[RandomEffectDataset])
val mockRandomEffectOptimizationProblem = mock(classOf[RandomEffectOptimizationProblem[SingleNodeGLMLossFunction]])
val mockRandomEffectOptimizationProblem = mock(classOf[RandomEffectOptimizationProblem[SingleNodeObjectiveFunction]])

doReturn(linearSubspaceProjectors).when(mockRandomEffectDataset).projectors

Expand Down Expand Up @@ -120,7 +119,7 @@ class RandomEffectCoordinateIntegTest extends SparkTestUtils with GameTestUtils
"someShard")

val mockRandomEffectDataset = mock(classOf[RandomEffectDataset])
val mockRandomEffectOptimizationProblem = mock(classOf[RandomEffectOptimizationProblem[SingleNodeGLMLossFunction]])
val mockRandomEffectOptimizationProblem = mock(classOf[RandomEffectOptimizationProblem[SingleNodeObjectiveFunction]])

doReturn(linearSubspaceProjectors).when(mockRandomEffectDataset).projectors

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,6 @@ class GameEstimatorIntegTest extends SparkTestUtils with TestTemplateWithTmpDir
Array(
Array(TaskType.LINEAR_REGRESSION, RMSE),
Array(TaskType.LOGISTIC_REGRESSION, AUC),
Array(TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM, AUC),
Array(TaskType.POISSON_REGRESSION, PoissonLoss))

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ class EvaluatorFactoryIntegTest extends SparkTestUtils {
Array(RMSE),
Array(PoissonLoss),
Array(LogisticLoss),
Array(SmoothedHingeLoss),
Array(SquaredLoss),
Array(MultiPrecisionAtK(1, ID_TAG)),
Array(MultiPrecisionAtK(5, ID_TAG)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,12 @@ import org.testng.annotations.{DataProvider, Test}

import com.linkedin.photon.ml.TaskType
import com.linkedin.photon.ml.data.LabeledPoint
import com.linkedin.photon.ml.function.glm.{DistributedGLMLossFunction, LogisticLossFunction, PoissonLossFunction, SquaredLossFunction}
import com.linkedin.photon.ml.function.svm.DistributedSmoothedHingeLossFunction
import com.linkedin.photon.ml.function.glm.{LogisticLossFunction, PoissonLossFunction, SquaredLossFunction}
import com.linkedin.photon.ml.normalization.NoNormalization
import com.linkedin.photon.ml.optimization.game.GLMOptimizationConfiguration
import com.linkedin.photon.ml.optimization.{L2RegularizationContext, NoRegularizationContext}
import com.linkedin.photon.ml.optimization.game.{FixedEffectOptimizationConfiguration, GLMOptimizationConfiguration}
import com.linkedin.photon.ml.optimization.{ElasticNetRegularizationContext, L2RegularizationContext, NoRegularizationContext, OptimizerConfig}
import com.linkedin.photon.ml.test.SparkTestUtils
import com.linkedin.photon.ml.util.PhotonBroadcast
import com.linkedin.photon.ml.util.{PhotonBroadcast, PhotonNonBroadcast}

/**
* Integration tests for [[DistributedObjectiveFunction]] to verify that the loss functions compute gradients & Hessians
Expand All @@ -39,11 +38,10 @@ class DistributedObjectiveFunctionIntegTest extends SparkTestUtils {

import DistributedObjectiveFunctionIntegTest._

private val twiceDiffTasks = Array(
private val tasks = Array(
TaskType.LOGISTIC_REGRESSION,
TaskType.LINEAR_REGRESSION,
TaskType.POISSON_REGRESSION)
private val diffTasks = twiceDiffTasks ++ Array(TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM)
private val binaryClassificationDatasetGenerationFuncs = Array(
generateBenignDatasetBinaryClassification _,
generateWeightedBenignDatasetBinaryClassification _,
Expand All @@ -67,15 +65,15 @@ class DistributedObjectiveFunctionIntegTest extends SparkTestUtils {
* @return Anonymous functions to generate the loss function and training data for the gradient tests
*/
@DataProvider(parallel = true)
def getDifferentiableFunctions: Array[Array[Object]] = diffTasks
def getDifferentiableFunctions: Array[Array[Object]] = tasks
.flatMap {
case TaskType.LOGISTIC_REGRESSION =>
treeAggregateDepths.flatMap { treeAggDepth =>
def lossFuncBuilder =
() => DistributedGLMLossFunction(NO_REG_CONFIGURATION_MOCK, LogisticLossFunction, treeAggDepth)
() => DistributedObjectiveFunction(NO_REG_CONFIGURATION_MOCK, LogisticLossFunction, treeAggDepth)

def lossFuncWithL2Builder =
() => DistributedGLMLossFunction(L2_REG_CONFIGURATION_MOCK, LogisticLossFunction, treeAggDepth)
() => DistributedObjectiveFunction(L2_REG_CONFIGURATION_MOCK, LogisticLossFunction, treeAggDepth)

binaryClassificationDatasetGenerationFuncs.flatMap { dataGenFunc =>
Seq[(Object, Object)]((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
Expand All @@ -85,10 +83,10 @@ class DistributedObjectiveFunctionIntegTest extends SparkTestUtils {
case TaskType.LINEAR_REGRESSION =>
treeAggregateDepths.flatMap { treeAggDepth =>
def lossFuncBuilder =
() => DistributedGLMLossFunction(NO_REG_CONFIGURATION_MOCK, SquaredLossFunction, treeAggDepth)
() => DistributedObjectiveFunction(NO_REG_CONFIGURATION_MOCK, SquaredLossFunction, treeAggDepth)

def lossFuncWithL2Builder =
() => DistributedGLMLossFunction(L2_REG_CONFIGURATION_MOCK, SquaredLossFunction, treeAggDepth)
() => DistributedObjectiveFunction(L2_REG_CONFIGURATION_MOCK, SquaredLossFunction, treeAggDepth)

linearRegressionDatasetGenerationFuncs.flatMap { dataGenFunc =>
Seq[(Object, Object)]((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
Expand All @@ -98,29 +96,16 @@ class DistributedObjectiveFunctionIntegTest extends SparkTestUtils {
case TaskType.POISSON_REGRESSION =>
treeAggregateDepths.flatMap { treeAggDepth =>
def lossFuncBuilder =
() => DistributedGLMLossFunction(NO_REG_CONFIGURATION_MOCK, PoissonLossFunction, treeAggDepth)
() => DistributedObjectiveFunction(NO_REG_CONFIGURATION_MOCK, PoissonLossFunction, treeAggDepth)

def lossFuncWithL2Builder =
() => DistributedGLMLossFunction(L2_REG_CONFIGURATION_MOCK, PoissonLossFunction, treeAggDepth)
() => DistributedObjectiveFunction(L2_REG_CONFIGURATION_MOCK, PoissonLossFunction, treeAggDepth)

poissonRegressionDatasetGenerationFuncs.flatMap { dataGenFunc =>
Seq[(Object, Object)]((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
}
}

case TaskType.SMOOTHED_HINGE_LOSS_LINEAR_SVM =>
treeAggregateDepths.flatMap { treeAggDepth =>
def lossFuncBuilder =
() => DistributedSmoothedHingeLossFunction(NO_REG_CONFIGURATION_MOCK, treeAggDepth)

def lossFuncWithL2Builder =
() => DistributedSmoothedHingeLossFunction(L2_REG_CONFIGURATION_MOCK, treeAggDepth)

binaryClassificationDatasetGenerationFuncs.flatMap { dataGenFunc =>
Seq[(Object, Object)]((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
}
}

case other =>
throw new IllegalArgumentException(s"Unrecognized task type: $other")
}
Expand All @@ -132,15 +117,15 @@ class DistributedObjectiveFunctionIntegTest extends SparkTestUtils {
* @return Anonymous functions to generate the loss function and training data for the Hessian tests
*/
@DataProvider(parallel = true)
def getTwiceDifferentiableFunctions: Array[Array[Object]] = twiceDiffTasks
def getTwiceDifferentiableFunctions: Array[Array[Object]] = tasks
.flatMap {
case TaskType.LOGISTIC_REGRESSION =>
treeAggregateDepths.flatMap { treeAggDepth =>
def lossFuncBuilder =
() => DistributedGLMLossFunction(NO_REG_CONFIGURATION_MOCK, LogisticLossFunction, treeAggDepth)
() => DistributedObjectiveFunction(NO_REG_CONFIGURATION_MOCK, LogisticLossFunction, treeAggDepth)

def lossFuncWithL2Builder =
() => DistributedGLMLossFunction(L2_REG_CONFIGURATION_MOCK, LogisticLossFunction, treeAggDepth)
() => DistributedObjectiveFunction(L2_REG_CONFIGURATION_MOCK, LogisticLossFunction, treeAggDepth)

binaryClassificationDatasetGenerationFuncs.flatMap { dataGenFunc =>
Seq((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
Expand All @@ -150,10 +135,10 @@ class DistributedObjectiveFunctionIntegTest extends SparkTestUtils {
case TaskType.LINEAR_REGRESSION =>
treeAggregateDepths.flatMap { treeAggDepth =>
def lossFuncBuilder =
() => DistributedGLMLossFunction(NO_REG_CONFIGURATION_MOCK, SquaredLossFunction, treeAggDepth)
() => DistributedObjectiveFunction(NO_REG_CONFIGURATION_MOCK, SquaredLossFunction, treeAggDepth)

def lossFuncWithL2Builder =
() => DistributedGLMLossFunction(L2_REG_CONFIGURATION_MOCK, SquaredLossFunction, treeAggDepth)
() => DistributedObjectiveFunction(L2_REG_CONFIGURATION_MOCK, SquaredLossFunction, treeAggDepth)

linearRegressionDatasetGenerationFuncs.flatMap { dataGenFunc =>
Seq((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
Expand All @@ -163,10 +148,10 @@ class DistributedObjectiveFunctionIntegTest extends SparkTestUtils {
case TaskType.POISSON_REGRESSION =>
treeAggregateDepths.flatMap { treeAggDepth =>
def lossFuncBuilder =
() => DistributedGLMLossFunction(NO_REG_CONFIGURATION_MOCK, PoissonLossFunction, treeAggDepth)
() => DistributedObjectiveFunction(NO_REG_CONFIGURATION_MOCK, PoissonLossFunction, treeAggDepth)

def lossFuncWithL2Builder =
() => DistributedGLMLossFunction(L2_REG_CONFIGURATION_MOCK, PoissonLossFunction, treeAggDepth)
() => DistributedObjectiveFunction(L2_REG_CONFIGURATION_MOCK, PoissonLossFunction, treeAggDepth)

poissonRegressionDatasetGenerationFuncs.flatMap { dataGenFunc =>
Seq((lossFuncBuilder, dataGenFunc), (lossFuncWithL2Builder, dataGenFunc))
Expand Down Expand Up @@ -573,10 +558,92 @@ class DistributedObjectiveFunctionIntegTest extends SparkTestUtils {

normalizationContextBroadcast.bv.unpersist()
}

/**
* Verify the value of loss function without regularization.
*/
@Test
def testValueNoRegularization(): Unit = sparkTest("testValueNoRegularization") {

val labeledPoints = sc.parallelize(Array(LABELED_POINT_1, LABELED_POINT_2))
val coefficients = COEFFICIENT_VECTOR

val fixedEffectRegularizationContext = NoRegularizationContext
val fixedEffectOptimizationConfiguration = FixedEffectOptimizationConfiguration(
FIXED_EFFECT_OPTIMIZER_CONFIG,
fixedEffectRegularizationContext)
val distributedGLMLossFunction = DistributedObjectiveFunction(
fixedEffectOptimizationConfiguration,
LogisticLossFunction,
TREE_AGGREGATE_DEPTH)
val value = distributedGLMLossFunction.value(
labeledPoints,
coefficients,
PhotonNonBroadcast(NORMALIZATION_CONTEXT))

// expectValue = log(1 + exp(3)) + log(1 + exp(2)) = 5.1755
assertEquals(value, 5.1755, EPSILON)
}

/**
* Verify the value of loss function with L2 regularization.
*/
@Test
def testValueWithL2Regularization(): Unit = sparkTest("testValueWithL2Regularization") {

val labeledPoints = sc.parallelize(Array(LABELED_POINT_1, LABELED_POINT_2))
val coefficients = COEFFICIENT_VECTOR

val fixedEffectRegularizationContext = L2RegularizationContext
val fixedEffectOptimizationConfiguration = FixedEffectOptimizationConfiguration(
FIXED_EFFECT_OPTIMIZER_CONFIG,
fixedEffectRegularizationContext,
FIXED_EFFECT_REGULARIZATION_WEIGHT)
val distributedGLMLossFunction = DistributedObjectiveFunction(
fixedEffectOptimizationConfiguration,
LogisticLossFunction,
TREE_AGGREGATE_DEPTH)
val value = distributedGLMLossFunction.value(
labeledPoints,
coefficients,
PhotonNonBroadcast(NORMALIZATION_CONTEXT))

// expectedValue = log(1 + exp(3)) + log(1 + exp(2)) + 1 * ((-2)^2 + 3^2) / 2 = 11.6755
assertEquals(value, 11.6755, EPSILON)
}

/**
* Verify the value of loss function with elastic net regularization.
*/
@Test
def testValueWithElasticNetRegularization(): Unit = sparkTest("testValueWithElasticNetRegularization") {

val labeledPoints = sc.parallelize(Array(LABELED_POINT_1, LABELED_POINT_2))
val coefficients = COEFFICIENT_VECTOR

val fixedEffectRegularizationContext = ElasticNetRegularizationContext(ALPHA)
val fixedEffectOptimizationConfiguration = FixedEffectOptimizationConfiguration(
FIXED_EFFECT_OPTIMIZER_CONFIG,
fixedEffectRegularizationContext,
FIXED_EFFECT_REGULARIZATION_WEIGHT)
val distributedGLMLossFunction = DistributedObjectiveFunction(
fixedEffectOptimizationConfiguration,
LogisticLossFunction,
TREE_AGGREGATE_DEPTH)
val value = distributedGLMLossFunction.value(
labeledPoints,
coefficients,
PhotonNonBroadcast(NORMALIZATION_CONTEXT))

// L1 is computed by the optimizer.
// expectedValue = log(1 + exp(3)) + log(1 + exp(2)) + (1 - 0.4) * 1 * ((-2)^2 + 3^2) / 2 = 9.0755
assertEquals(value, 9.0755, EPSILON)
}
}

object DistributedObjectiveFunctionIntegTest {

// Gradient and Hessian test constants
private val SPARK_CONSISTENCY_CHECK_SAMPLES = 5
private val NUM_PARTITIONS = 4
private val PROBLEM_DIMENSION = 5
Expand All @@ -593,6 +660,17 @@ object DistributedObjectiveFunctionIntegTest {
private val WEIGHT_RANDOM_MAX = 10
private val TRAINING_SAMPLES = PROBLEM_DIMENSION * PROBLEM_DIMENSION

// Regularization test constants
private val FIXED_EFFECT_OPTIMIZER_CONFIG = mock(classOf[OptimizerConfig])
private val LABELED_POINT_1 = new LabeledPoint(0, DenseVector(0.0, 1.0))
private val LABELED_POINT_2 = new LabeledPoint(1, DenseVector(1.0, 0.0))
private val COEFFICIENT_VECTOR = Vector(-2.0, 3.0)
private val NORMALIZATION_CONTEXT = NoNormalization()
private val FIXED_EFFECT_REGULARIZATION_WEIGHT = 1D
private val ALPHA = 0.4
private val TREE_AGGREGATE_DEPTH = 2
private val EPSILON = 1e-3

doReturn(L2RegularizationContext).when(L2_REG_CONFIGURATION_MOCK).regularizationContext
doReturn(REGULARIZATION_WEIGHT).when(L2_REG_CONFIGURATION_MOCK).regularizationWeight
doReturn(NoRegularizationContext).when(NO_REG_CONFIGURATION_MOCK).regularizationContext
Expand Down
Loading