From d665e2b8286923f091469e2b57a40daa598510c1 Mon Sep 17 00:00:00 2001
From: wendycwong <wendy@h2o.ai>
Date: Fri, 9 Aug 2024 10:23:24 -0700
Subject: [PATCH] GH-16312 constrainted glm issues [nocheck] (#16317)

* Continue to double check algo.

* fix bug in gradient update.

* implemented various version of IRLSM

* Found GLM original with gradient magnitude change best

* GH-16312: fix wrong error raised by duplicated/conflicted constraints.

* force beta constraint to be satisfied at the end if it is not.

* GH-16312: add assert check to test per Veronika suggestion.

* GH-16312: fix tests after fixing constrained GLM bugs.

* GH-16312: fixed NPE error in checkCoeffsBounds

* GH-16312: fix test failure.

* remove conflicting constraint tests as we currently do not have the capability to do so right now.
* change dataset path from AWS to local
---
 .../main/java/hex/glm/ComputationState.java   |   5 +-
 .../java/hex/glm/ConstrainedGLMUtils.java     |  75 ++-
 h2o-algos/src/main/java/hex/glm/GLM.java      | 550 ++++++++++++++++--
 h2o-algos/src/main/java/hex/glm/GLMTask.java  |   9 -
 .../hex/optimization/OptimizationUtils.java   |  11 +-
 .../test/java/hex/glm/GLMConstrainedTest.java |  63 +-
 ...12_contrained_GLM_bad_constraints_large.py | 156 +++++
 ...ontrained_GLM_beta_constraint_NPE_large.py | 163 ++++++
 ...unit_GH_16312_contrained_GLM_test_large.py | 108 ++++
 ...se_lessthan_linear_constraints_binomial.py |   6 +-
 ...6722_equality_constraints_only_binomial.py |   5 +-
 ...se_lessthan_linear_constraints_binomial.py |   6 +-
 ..._equality_lessthan_constraints_binomial.py |   2 +-
 ..._tight_linear_constraints_only_binomial.py |   2 +-
 .../pyunit_GH_6722_redundant_constraints.py   |   2 +-
 ...ht_equality_linear_constraints_binomial.py |   2 +-
 ..._tight_linear_constraints_only_binomial.py |   2 +-
 .../glm/runit_GH_6722_redundant_constraints.R |   2 +-
 18 files changed, 1015 insertions(+), 154 deletions(-)
 create mode 100644 h2o-py/tests/testdir_algos/glm/pyunit_GH_16312_contrained_GLM_bad_constraints_large.py
 create mode 100644 h2o-py/tests/testdir_algos/glm/pyunit_GH_16312_contrained_GLM_beta_constraint_NPE_large.py
 create mode 100644 h2o-py/tests/testdir_algos/glm/pyunit_GH_16312_contrained_GLM_test_large.py

diff --git a/h2o-algos/src/main/java/hex/glm/ComputationState.java b/h2o-algos/src/main/java/hex/glm/ComputationState.java
index 23c343202e0d..1aaaf697e3bc 100644
--- a/h2o-algos/src/main/java/hex/glm/ComputationState.java
+++ b/h2o-algos/src/main/java/hex/glm/ComputationState.java
@@ -91,7 +91,6 @@ public final class ComputationState {
   int _totalBetaLength; // actual coefficient length without taking into account active columns only
   int _betaLengthPerClass;
   public boolean _noReg;
-  public boolean _hasConstraints;
   public ConstrainedGLMUtils.ConstraintGLMStates _csGLMState;
   
   public ComputationState(Job job, GLMParameters parms, DataInfo dinfo, BetaConstraint bc, GLM.BetaInfo bi){
@@ -1414,7 +1413,7 @@ protected GramXY computeNewGram(DataInfo activeData, double [] beta, GLMParamete
     double obj_reg = _parms._obj_reg;
     if(_glmw == null) _glmw = new GLMModel.GLMWeightsFun(_parms);
     GLMTask.GLMIterationTask gt = new GLMTask.GLMIterationTask(_job._key, activeData, _glmw, beta,
-            _activeClass, _hasConstraints).doAll(activeData._adaptedFrame);
+            _activeClass).doAll(activeData._adaptedFrame);
     gt._gram.mul(obj_reg);
     if (_parms._glmType.equals(GLMParameters.GLMType.gam)) { // add contribution from GAM smoothness factor
         Integer[] activeCols=null;
@@ -1463,7 +1462,7 @@ protected GramGrad computeGram(double [] beta, GLMGradientInfo gradientInfo){
     double obj_reg = _parms._obj_reg;
     if(_glmw == null) _glmw = new GLMModel.GLMWeightsFun(_parms);
     GLMTask.GLMIterationTask gt = new GLMTask.GLMIterationTask(_job._key, activeData, _glmw, beta,
-            _activeClass, _hasConstraints).doAll(activeData._adaptedFrame);
+            _activeClass).doAll(activeData._adaptedFrame);
     double[][] fullGram = gt._gram.getXX(); // only extract gram matrix
     mult(fullGram, obj_reg);
     if (_gramEqual != null)
diff --git a/h2o-algos/src/main/java/hex/glm/ConstrainedGLMUtils.java b/h2o-algos/src/main/java/hex/glm/ConstrainedGLMUtils.java
index ac198226e727..476ba8392f92 100644
--- a/h2o-algos/src/main/java/hex/glm/ConstrainedGLMUtils.java
+++ b/h2o-algos/src/main/java/hex/glm/ConstrainedGLMUtils.java
@@ -84,7 +84,7 @@ public static class ConstraintGLMStates {
     double _ckCS;
     double _ckCSHalf; // = ck/2
     double _epsilonkCS;
-    double _epsilonkCSSquare;
+    public double _epsilonkCSSquare;
     double _etakCS;
     double _etakCSSquare;
     double _epsilon0;
@@ -137,30 +137,35 @@ public static int[] extractBetaConstraints(ComputationState state, String[] coef
     List<LinearConstraints> equalityC = new ArrayList<>();
     List<LinearConstraints> lessThanEqualToC = new ArrayList<>();
     List<Integer> betaIndexOnOff = new ArrayList<>();
-    if (betaC._betaLB != null) {
-      int numCons = betaC._betaLB.length-1;
-      for (int index=0; index<numCons; index++) {
-        if (!Double.isInfinite(betaC._betaUB[index]) && (betaC._betaLB[index] == betaC._betaUB[index])) { // equality constraint
-          addBCEqualityConstraint(equalityC, betaC, coefNames, index);
-          betaIndexOnOff.add(1);
-        } else if (!Double.isInfinite(betaC._betaUB[index]) && !Double.isInfinite(betaC._betaLB[index]) && 
-                (betaC._betaLB[index] < betaC._betaUB[index])) { // low < beta < high, generate two lessThanEqualTo constraints
-          addBCGreaterThanConstraint(lessThanEqualToC, betaC, coefNames, index);
-          addBCLessThanConstraint(lessThanEqualToC, betaC, coefNames, index);
-          betaIndexOnOff.add(1);
-          betaIndexOnOff.add(0);
-        } else if (Double.isInfinite(betaC._betaUB[index]) && !Double.isInfinite(betaC._betaLB[index])) {  // low < beta < infinity
-          addBCGreaterThanConstraint(lessThanEqualToC, betaC, coefNames, index);
-          betaIndexOnOff.add(1);
-        } else if (!Double.isInfinite(betaC._betaUB[index]) && Double.isInfinite(betaC._betaLB[index])) { // -infinity < beta < high
-          addBCLessThanConstraint(lessThanEqualToC, betaC, coefNames, index);
-          betaIndexOnOff.add(1);
-        }
+    boolean bothEndsPresent = (betaC._betaUB != null) && (betaC._betaLB != null);
+    boolean lowerEndPresentOnly = (betaC._betaUB == null) && (betaC._betaLB != null);
+    boolean upperEndPresentOnly = (betaC._betaUB != null) && (betaC._betaLB == null);
+
+    int numCons = betaC._betaLB != null ? betaC._betaLB.length - 1 : betaC._betaUB.length - 1;
+    for (int index = 0; index < numCons; index++) {
+      if (bothEndsPresent && !Double.isInfinite(betaC._betaUB[index]) && (betaC._betaLB[index] == betaC._betaUB[index])) { // equality constraint
+        addBCEqualityConstraint(equalityC, betaC, coefNames, index);
+        betaIndexOnOff.add(1);
+      } else if (bothEndsPresent && !Double.isInfinite(betaC._betaUB[index]) && !Double.isInfinite(betaC._betaLB[index]) &&
+              (betaC._betaLB[index] < betaC._betaUB[index])) { // low < beta < high, generate two lessThanEqualTo constraints
+        addBCGreaterThanConstraint(lessThanEqualToC, betaC, coefNames, index);
+        addBCLessThanConstraint(lessThanEqualToC, betaC, coefNames, index);
+        betaIndexOnOff.add(1);
+        betaIndexOnOff.add(0);
+      } else if ((lowerEndPresentOnly || (betaC._betaUB != null && Double.isInfinite(betaC._betaUB[index]))) &&
+              betaC._betaLB != null && !Double.isInfinite(betaC._betaLB[index])) {  // low < beta < infinity
+        addBCGreaterThanConstraint(lessThanEqualToC, betaC, coefNames, index);
+        betaIndexOnOff.add(1);
+      } else if ((upperEndPresentOnly || (betaC._betaLB != null && Double.isInfinite(betaC._betaLB[index]))) &&
+              betaC._betaUB != null && !Double.isInfinite(betaC._betaUB[index])) { // -infinity < beta < high
+        addBCLessThanConstraint(lessThanEqualToC, betaC, coefNames, index);
+        betaIndexOnOff.add(1);
       }
     }
+
     state.setLinearConstraints(equalityC.toArray(new LinearConstraints[0]),
             lessThanEqualToC.toArray(new LinearConstraints[0]), true);
-    return betaIndexOnOff.size()==0 ? null : betaIndexOnOff.stream().mapToInt(x->x).toArray();
+    return betaIndexOnOff.size() == 0 ? null : betaIndexOnOff.stream().mapToInt(x -> x).toArray();
   }
 
   /***
@@ -506,11 +511,10 @@ public static boolean extractCoeffNames(List<String> coeffList, LinearConstraint
   
   public static List<String> foundRedundantConstraints(ComputationState state, final double[][] initConstraintMatrix) {
     Matrix constMatrix = new Matrix(initConstraintMatrix);
-    Matrix constMatrixLessConstant = constMatrix.getMatrix(0, constMatrix.getRowDimension() -1, 1, constMatrix.getColumnDimension()-1);
-    Matrix constMatrixTConstMatrix = constMatrixLessConstant.times(constMatrixLessConstant.transpose());
-    int rank = constMatrixLessConstant.rank();
+    Matrix matrixSquare = constMatrix.times(constMatrix.transpose());
+    int rank = matrixSquare.rank();
     if (rank < constMatrix.getRowDimension()) { // redundant constraints are specified
-      double[][] rMatVal = constMatrixTConstMatrix.qr().getR().getArray();
+      double[][] rMatVal = matrixSquare.qr().getR().getArray();
       List<Double> diag = IntStream.range(0, rMatVal.length).mapToDouble(x->Math.abs(rMatVal[x][x])).boxed().collect(Collectors.toList());
       int[] sortedIndices = IntStream.range(0, diag.size()).boxed().sorted((i, j) -> diag.get(i).compareTo(diag.get(j))).mapToInt(ele->ele).toArray();
       List<Integer> duplicatedEleIndice = IntStream.range(0, diag.size()-rank).map(x -> sortedIndices[x]).boxed().collect(Collectors.toList());
@@ -645,6 +649,16 @@ public static void genInitialLambda(Random randObj, LinearConstraints[] constrai
     }
   }
   
+  public static void adjustLambda(LinearConstraints[] constraints, double[] lambda) {
+    int numC = constraints.length;
+    LinearConstraints oneC;
+    for (int index=0; index<numC; index++) {
+      oneC = constraints[index];
+      if (!oneC._active)
+        lambda[index]=0.0;
+    }
+  }
+  
   
   public static double[][] sumGramConstribution(ConstraintsGram[] gramConstraints, int numCoefs) {
     if (gramConstraints == null)
@@ -714,8 +728,8 @@ public static void updateConstraintParameters(ComputationState state, double[] l
                                                 LinearConstraints[] equalConst, LinearConstraints[] lessThanConst, 
                                                 GLMModel.GLMParameters parms) {
     // calculate ||h(beta)|| square, ||gradient|| square
-    double hBetaMag = state._csGLMState._constraintMagSquare;
-    if (hBetaMag <= state._csGLMState._etakCSSquare) {  // implement line 26 to line 29 of Algorithm 19.1
+    double hBetaMagSquare = state._csGLMState._constraintMagSquare;
+    if (hBetaMagSquare <= state._csGLMState._etakCSSquare) {  // implement line 26 to line 29 of Algorithm 19.1
       if (equalConst != null)
         updateLambda(lambdaEqual, state._csGLMState._ckCS, equalConst);
       if (lessThanConst != null)
@@ -723,7 +737,7 @@ public static void updateConstraintParameters(ComputationState state, double[] l
       state._csGLMState._epsilonkCS = state._csGLMState._epsilonkCS/state._csGLMState._ckCS;
       state ._csGLMState._etakCS = state._csGLMState._etakCS/Math.pow(state._csGLMState._ckCS, parms._constraint_beta);
     } else {  // implement line 31 to 34 of Algorithm 19.1
-      state._csGLMState._ckCS = state._csGLMState._ckCS*parms._constraint_tau;
+      state._csGLMState._ckCS = state._csGLMState._ckCS*parms._constraint_tau; // tau belongs to [4,10]
       state._csGLMState._ckCSHalf = state._csGLMState._ckCS*0.5;
       state._csGLMState._epsilonkCS = state._csGLMState._epsilon0/state._csGLMState._ckCS;
       state._csGLMState._etakCS = parms._constraint_eta0/Math.pow(state._csGLMState._ckCS, parms._constraint_alpha);
@@ -813,13 +827,14 @@ public static void updateConstraintValues(double[] betaCnd, List<String> coefNam
     if (equalityConstraints != null) // equality constraints
       Arrays.stream(equalityConstraints).forEach(constraint -> {
         evalOneConstraint(constraint, betaCnd, coefNames);
-        constraint._active = (Math.abs(constraint._constraintsVal) > EPS2);
+       // constraint._active = (Math.abs(constraint._constraintsVal) > EPS2);
+        constraint._active = true;
       });
 
     if (lessThanEqualToConstraints != null) // less than or equal to constraints
       Arrays.stream(lessThanEqualToConstraints).forEach(constraint -> {
         evalOneConstraint(constraint, betaCnd, coefNames);
-        constraint._active = constraint._constraintsVal > 0;
+        constraint._active = constraint._constraintsVal >= 0;
       });
   }
   
diff --git a/h2o-algos/src/main/java/hex/glm/GLM.java b/h2o-algos/src/main/java/hex/glm/GLM.java
index 8e13acb10215..29b51f5db4ff 100644
--- a/h2o-algos/src/main/java/hex/glm/GLM.java
+++ b/h2o-algos/src/main/java/hex/glm/GLM.java
@@ -1403,7 +1403,8 @@ void checkInitLinearConstraints() {
     }
     // no regularization for constrainted GLM except during testing
     if ((notZeroLambdas(_parms._lambda) || _parms._lambda_search) && !_parms._testCSZeroGram) {
-      error("lambda or lambda_search", "Regularization is not allowed for constrained GLM.");
+      error("lambda or lambda_search", "Regularization is not allowed for constrained GLM.  Set" +
+              " lambda to 0.0.");
       return;
     }
     if ("multinomial".equals(_parms._solver) || "ordinal".equals(_parms._solver)) {
@@ -1439,16 +1440,14 @@ void checkAssignLinearConstraints() {
     String[] constraintCoefficientNames = constraintNames.toArray(new String[0]);
     if (countNumConst(_state) > coefNames.length)
       warn("number of constraints", " exceeds the number of coefficients.  The system is" +
-              " over-constraints, and probably may not yield a valid solution due to possible conflicting " +
-              "constraints.  Consider reducing the number of constraints.");
+              " over-constraints with duplicated constraints.  Consider reducing the number of constraints.");
     List<String> redundantConstraints = foundRedundantConstraints(_state, initConstraintMatrix);
     if (redundantConstraints != null) {
       int numRedundant = redundantConstraints.size();
       for (int index = 0; index < numRedundant; index++)
-        error("redundant and possibly conflicting linear constraints", redundantConstraints.get(index));
+        error("redundant linear constraints", redundantConstraints.get(index));
     } else {
       _state._csGLMState = new ConstraintGLMStates(constraintCoefficientNames, initConstraintMatrix, _parms);
-      _state._hasConstraints = true;
     }
   }
   
@@ -2352,9 +2351,9 @@ private void fitIRLSM(Solver s) {
      * This method fits the constraint GLM for IRLSM.  We implemented the algorithm depicted in the document (H2O 
      * Constrained GLM Implementation.pdf) attached to this issue: https://github.com/h2oai/h2o-3/issues/6722.  We will
      * hereby use the word the doc to refere to this document.  In particular, we following the algorithm described in
-     * Section VII (and table titled Algorithm 19.1) of the doc.
+     * Section VII (and table titled Algorithm 19.1) of the doc.  Not as good as when considering magnitude of gradient.
      */
-    private void fitIRLSMCS() {
+    private void fitIRLSMCS9() {
       double[] betaCnd = _checkPointFirstIter ? _model._betaCndCheckpoint : _state.beta();
       double[] tempBeta = _parms._separate_linear_beta ? new double[betaCnd.length] : null;
       List<String> coefNames = Arrays.stream(_state.activeData()._coefNames).collect(Collectors.toList());
@@ -2389,7 +2388,153 @@ private void fitIRLSMCS() {
               _state.activeBC(), _betaInfo, _penaltyMatrix, _gamColIndices) : new GLMGradientSolver(_job, _parms, 
               _dinfo, 0, _state.activeBC(), _betaInfo);
       GLMGradientInfo gradientInfo = calGradient(betaCnd, _state, ginfo, lambdaEqual, lambdaLessThan, 
-              equalityConstraints, lessThanEqualToConstraints);
+              equalityConstraints, lessThanEqualToConstraints); // add dpenalty/dx to gradient from penalty term
+      _state.setConstraintInfo(gradientInfo, equalityConstraints, lessThanEqualToConstraints, lambdaEqual, lambdaLessThan);  // update state ginfo with contributions from GLMGradientInfo
+      boolean predictorSizeChange;
+      boolean applyBetaConstraints = _parms._separate_linear_beta && _betaConstraintsOn;
+      // short circuit check here: if gradient magnitude is small and all constraints are satisfied, quit right away
+      if (constraintsStop(gradientInfo, _state)) {
+        Log.info(LogMsg("GLM with constraints model building completed successfully!!"));
+        return;
+      }
+      double gradMagSquare = ArrayUtils.innerProduct(gradientInfo._gradient, gradientInfo._gradient);
+      boolean done;
+      boolean gradSmallEnough = (gradMagSquare <= _state._csGLMState._epsilonkCSSquare);
+      int origIter = iterCnt+1;
+      boolean lineSearchSuccess;
+      try {
+        while (true) {
+          do { // implement Algorithm 11.8 of the doc to find coefficients with epsilon k as the precision
+            iterCnt++;
+            long t1 = System.currentTimeMillis();
+            ComputationState.GramGrad gram = _state.computeGram(betaCnd, gradientInfo);  // calculate gram (hessian), xy, objective values
+            if (iterCnt == origIter) {
+              Matrix gramMatrix = new Matrix(gram._gram);
+              if (gramMatrix.cond() >= BAD_CONDITION_NUMBER)
+                if (_parms._init_optimal_glm) {
+                  warn("init_optimal_glm", " should be disabled.  This lead to gram matrix being close to" +
+                          " singular.  Please re-run with init_optimal_glm set to false.");
+                }
+            }
+            predictorSizeChange = !coefNames.equals(Arrays.asList(_state.activeData().coefNames()));
+            if (predictorSizeChange) {  // reset if predictors changed
+              coefNames = changeCoeffBetainfo(_state.activeData()._coefNames);
+              _state.resizeConstraintInfo(equalityConstraints, lessThanEqualToConstraints);
+              ginfo = gam.equals(_parms._glmType) ? new GLMGradientSolver(_job, _parms, _state.activeData(), 0,
+                      _state.activeBC(), _betaInfo, _penaltyMatrix, _gamColIndices) : new GLMGradientSolver(_job, _parms,
+                      _state.activeData(), 0, _state.activeBC(), _betaInfo);
+              tempBeta = new double[coefNames.size()];
+            }
+            // solve for GLM coefficients
+            betaCnd = constraintGLM_solve(gram);  // beta_k+1 = beta_k+dk where dk = beta_k+1-beta_k   
+            predictorSizeChange = !coefNames.equals(Arrays.asList(_state.activeData().coefNames()));
+            if (predictorSizeChange) {  // reset if predictors changed
+              coefNames = changeCoeffBetainfo(_state.activeData()._coefNames);
+              _state.resizeConstraintInfo(equalityConstraints, lessThanEqualToConstraints);
+              ginfo = gam.equals(_parms._glmType) ? new GLMGradientSolver(_job, _parms, _state.activeData(), 0,
+                      _state.activeBC(), _betaInfo, _penaltyMatrix, _gamColIndices) : new GLMGradientSolver(_job, _parms,
+                      _state.activeData(), 0, _state.activeBC(), _betaInfo);
+              tempBeta = new double[betaCnd.length];
+            }
+            // add exact line search for GLM coefficients.  Refer to the doc, Algorithm 11.5
+            if (ls == null)
+              ls = new ExactLineSearch(betaCnd, _state, coefNames);
+            else
+              ls.reset(betaCnd, _state, coefNames);
+
+            // line search can fail when the gradient is close to zero.  In this case, we need to update the 
+            // constraint parameters.
+            lineSearchSuccess = ls.findAlpha(lambdaEqual, lambdaLessThan, _state, equalityConstraints, 
+                    lessThanEqualToConstraints, ginfo);
+            gradMagSquare = ArrayUtils.innerProduct(ls._ginfoOriginal._gradient, ls._ginfoOriginal._gradient);
+            gradSmallEnough = gradMagSquare <= _state._csGLMState._epsilonkCSSquare;
+            if (lineSearchSuccess) {
+              betaCnd = ls._newBeta;
+              gradientInfo = ls._ginfoOriginal;
+            } else {  // ls failed, reset to
+                if (applyBetaConstraints) // separate beta and linear constraints
+                  bc.applyAllBounds(_state.beta());
+                ls.setBetaConstraintsDeriv(lambdaEqual, lambdaLessThan, _state, equalityConstraints, lessThanEqualToConstraints,
+                        ginfo, _state.beta());
+                Log.info(LogMsg("Line search failed " + ls));
+                return;
+            }
+
+            if (applyBetaConstraints) { // if beta constraints are applied, may need to update constraints, derivatives, gradientInfo
+              System.arraycopy(betaCnd, 0, tempBeta, 0, betaCnd.length);
+              bc.applyAllBounds(betaCnd);
+              ArrayUtils.subtract(betaCnd, tempBeta, tempBeta);
+              ls.setBetaConstraintsDeriv(lambdaEqual, lambdaLessThan, _state, equalityConstraints, 
+                      lessThanEqualToConstraints, ginfo, betaCnd);
+              gradientInfo = ls._ginfoOriginal;
+            }
+            
+            // check for stopping conditions which also updates the variables in state.
+            // stopping condition is to stop us getting stuck in improvements that are too insignificant.
+            // However, we will only exit the while loop when the gradMagSquare is still too high.  There is no hope
+            // for improvement here anymore since the beta values and gradient values are not changing much anymore.
+            done = stop_requested() || (_state._iter >= _parms._max_iterations) || _earlyStop;  // time to go
+            if (!progress(betaCnd, gradientInfo)) {
+              checkKKTConditions(betaCnd, gradientInfo, iterCnt);
+              return;
+            }
+            
+            Log.info(LogMsg("computed in " + (System.currentTimeMillis() - t1)  + "ms, step = " + iterCnt + 
+                    ((_lslvr != null) ? ", l1solver " + _lslvr : "")));
+          } while (!gradSmallEnough);
+          // update constraint parameters, ck, lambdas and others
+           updateConstraintParameters(_state, lambdaEqual, lambdaLessThan, equalityConstraints, 
+                  lessThanEqualToConstraints, _parms);
+          // update gradient calculation with new value (lambda and/or ck).
+          gradientInfo = calGradient(betaCnd, _state, ginfo, lambdaEqual, lambdaLessThan,
+                  equalityConstraints, lessThanEqualToConstraints);
+          _state.updateState(betaCnd, gradientInfo); // update computation state with new info
+        }
+      } catch (NonSPDMatrixException e) {
+        Log.warn(LogMsg("Got Non SPD matrix, stopped."));
+      }
+    }
+
+    // original algo, set lambda = 0 for inactive constraints, no good effect.
+    private void fitIRLSMCS8() {
+      double[] betaCnd = _checkPointFirstIter ? _model._betaCndCheckpoint : _state.beta();
+      double[] tempBeta = _parms._separate_linear_beta ? new double[betaCnd.length] : null;
+      List<String> coefNames = Arrays.stream(_state.activeData()._coefNames).collect(Collectors.toList());
+      LinearConstraints[] equalityConstraints;
+      LinearConstraints[] lessThanEqualToConstraints;
+      final BetaConstraint bc = _state.activeBC();
+      if (_parms._separate_linear_beta) { // keeping linear and beta constraints separate in this case
+        equalityConstraints = _state._equalityConstraintsLinear;
+        lessThanEqualToConstraints = _state._lessThanEqualToConstraintsLinear;
+      } else {  // combine beta and linear constraints together
+        equalityConstraints = combineConstraints(_state._equalityConstraintsBeta, _state._equalityConstraintsLinear);
+        lessThanEqualToConstraints = combineConstraints(_state._lessThanEqualToConstraintsBeta,
+                _state._lessThanEqualToConstraintsLinear);
+      }
+      boolean hasEqualityConstraints = equalityConstraints != null;
+      boolean hasLessConstraints = lessThanEqualToConstraints != null;
+      double[] lambdaEqual = hasEqualityConstraints ? new double[equalityConstraints.length] : null;
+      double[] lambdaLessThan = hasLessConstraints ? new double[lessThanEqualToConstraints.length] : null;
+      Long startSeed = _parms._seed == -1 ? new Random().nextLong() : _parms._seed;
+      Random randObj = new Random(startSeed);
+      updateConstraintValues(betaCnd, coefNames, equalityConstraints, lessThanEqualToConstraints);
+      if (hasEqualityConstraints) // set lambda values for constraints
+        genInitialLambda(randObj, equalityConstraints, lambdaEqual);
+      if (hasLessConstraints) {
+        genInitialLambda(randObj, lessThanEqualToConstraints, lambdaLessThan);
+        adjustLambda(lessThanEqualToConstraints, lambdaLessThan);
+      }
+      
+      ExactLineSearch ls = null;
+      int iterCnt = (_checkPointFirstIter ? _state._iter : 0)+_initIter;
+      // contribution to gradient and hessian from constraints
+      _state.initConstraintDerivatives(equalityConstraints, lessThanEqualToConstraints, coefNames);
+
+      GLMGradientSolver ginfo = gam.equals(_parms._glmType) ? new GLMGradientSolver(_job, _parms, _dinfo, 0,
+              _state.activeBC(), _betaInfo, _penaltyMatrix, _gamColIndices) : new GLMGradientSolver(_job, _parms,
+              _dinfo, 0, _state.activeBC(), _betaInfo);
+      GLMGradientInfo gradientInfo = calGradient(betaCnd, _state, ginfo, lambdaEqual, lambdaLessThan,
+              equalityConstraints, lessThanEqualToConstraints); // add dpenalty/dx to gradient from penalty term
       _state.setConstraintInfo(gradientInfo, equalityConstraints, lessThanEqualToConstraints, lambdaEqual, lambdaLessThan);  // update state ginfo with contributions from GLMGradientInfo
       boolean predictorSizeChange;
       boolean applyBetaConstraints = _parms._separate_linear_beta && _betaConstraintsOn;
@@ -2398,8 +2543,11 @@ private void fitIRLSMCS() {
         Log.info(LogMsg("GLM with constraints model building completed successfully!!"));
         return;
       }
-      double gradMagSquare;
+      double gradMagSquare = ArrayUtils.innerProduct(gradientInfo._gradient, gradientInfo._gradient);
+      boolean done;
+      boolean gradSmallEnough = (gradMagSquare <= _state._csGLMState._epsilonkCSSquare);
       int origIter = iterCnt+1;
+      boolean lineSearchSuccess;
       try {
         while (true) {
           do { // implement Algorithm 11.8 of the doc to find coefficients with epsilon k as the precision
@@ -2440,11 +2588,16 @@ private void fitIRLSMCS() {
             else
               ls.reset(betaCnd, _state, coefNames);
 
-            if (ls.findAlpha(lambdaEqual, lambdaLessThan, _state, equalityConstraints, lessThanEqualToConstraints, ginfo)) {
-              gradMagSquare = ArrayUtils.innerProduct(gradientInfo._gradient, gradientInfo._gradient);
+            // line search can fail when the gradient is close to zero.  In this case, we need to update the 
+            // constraint parameters.
+            lineSearchSuccess = ls.findAlpha(lambdaEqual, lambdaLessThan, _state, equalityConstraints,
+                    lessThanEqualToConstraints, ginfo);
+            gradMagSquare = ArrayUtils.innerProduct(ls._ginfoOriginal._gradient, ls._ginfoOriginal._gradient);
+            gradSmallEnough = gradMagSquare <= _state._csGLMState._epsilonkCSSquare;
+            if (lineSearchSuccess) {
               betaCnd = ls._newBeta;
               gradientInfo = ls._ginfoOriginal;
-            } else {  // ls failed, reset to 
+            } else {  // ls failed, reset to
               if (applyBetaConstraints) // separate beta and linear constraints
                 bc.applyAllBounds(_state.beta());
               ls.setBetaConstraintsDeriv(lambdaEqual, lambdaLessThan, _state, equalityConstraints, lessThanEqualToConstraints,
@@ -2457,19 +2610,329 @@ private void fitIRLSMCS() {
               System.arraycopy(betaCnd, 0, tempBeta, 0, betaCnd.length);
               bc.applyAllBounds(betaCnd);
               ArrayUtils.subtract(betaCnd, tempBeta, tempBeta);
-              ls.setBetaConstraintsDeriv(lambdaEqual, lambdaLessThan, _state, equalityConstraints, 
+              ls.setBetaConstraintsDeriv(lambdaEqual, lambdaLessThan, _state, equalityConstraints,
                       lessThanEqualToConstraints, ginfo, betaCnd);
               gradientInfo = ls._ginfoOriginal;
             }
-            
-            // check for stopping conditions
-            if (checkIterationDone(betaCnd, gradientInfo, iterCnt)) // ratio of objective drops.
+
+            // check for stopping conditions which also updates the variables in state.
+            // stopping condition is to stop us getting stuck in improvements that are too insignificant.
+            // However, we will only exit the while loop when the gradMagSquare is still too high.  There is no hope
+            // for improvement here anymore since the beta values and gradient values are not changing much anymore.
+            done = stop_requested() || (_state._iter >= _parms._max_iterations) || _earlyStop;  // time to go
+            if (!progress(betaCnd, gradientInfo)) {
+              checkKKTConditions(betaCnd, gradientInfo, iterCnt);
               return;
-            Log.info(LogMsg("computed in " + (System.currentTimeMillis() - t1)  + "ms, step = " + iterCnt + 
+            }
+
+            Log.info(LogMsg("computed in " + (System.currentTimeMillis() - t1)  + "ms, step = " + iterCnt +
                     ((_lslvr != null) ? ", l1solver " + _lslvr : "")));
-          } while (gradMagSquare > _state._csGLMState._epsilonkCSSquare);
+          } while (!gradSmallEnough);
           // update constraint parameters, ck, lambdas and others
-          updateConstraintParameters(_state, lambdaEqual, lambdaLessThan, equalityConstraints, lessThanEqualToConstraints, _parms);
+          updateConstraintParameters(_state, lambdaEqual, lambdaLessThan, equalityConstraints,
+                  lessThanEqualToConstraints, _parms);
+          if (hasLessConstraints)
+            adjustLambda(lessThanEqualToConstraints, lambdaLessThan);
+          // update gradient calculation with new value (lambda and/or ck).
+          gradientInfo = calGradient(betaCnd, _state, ginfo, lambdaEqual, lambdaLessThan,
+                  equalityConstraints, lessThanEqualToConstraints);
+          _state.updateState(betaCnd, gradientInfo); // update computation state with new info
+        }
+      } catch (NonSPDMatrixException e) {
+        Log.warn(LogMsg("Got Non SPD matrix, stopped."));
+      }
+    }
+
+    // original implementation but will not quit when magnitude of gradient is small.  If exit condition is triggered 
+    // (either ls failed or no progress is made, if the magnitude of gradient is small, we will exit thw while loop
+    // but will arrive at the part to change the constrained parameters.  This seems to help.
+    private void fitIRLSMCS() {
+      double[] betaCnd = _checkPointFirstIter ? _model._betaCndCheckpoint : _state.beta();
+      double[] tempBeta = _parms._separate_linear_beta ? new double[betaCnd.length] : null;
+      List<String> coefNames = Arrays.stream(_state.activeData()._coefNames).collect(Collectors.toList());
+      LinearConstraints[] equalityConstraints;
+      LinearConstraints[] lessThanEqualToConstraints;
+      final BetaConstraint bc = _state.activeBC();
+      if (_parms._separate_linear_beta) { // keeping linear and beta constraints separate in this case
+        equalityConstraints = _state._equalityConstraintsLinear;
+        lessThanEqualToConstraints = _state._lessThanEqualToConstraintsLinear;
+      } else {  // combine beta and linear constraints together
+        equalityConstraints = combineConstraints(_state._equalityConstraintsBeta, _state._equalityConstraintsLinear);
+        lessThanEqualToConstraints = combineConstraints(_state._lessThanEqualToConstraintsBeta,
+                _state._lessThanEqualToConstraintsLinear);
+      }
+      boolean hasEqualityConstraints = equalityConstraints != null;
+      boolean hasLessConstraints = lessThanEqualToConstraints != null;
+      double[] lambdaEqual = hasEqualityConstraints ? new double[equalityConstraints.length] : null;
+      double[] lambdaLessThan = hasLessConstraints ? new double[lessThanEqualToConstraints.length] : null;
+      Long startSeed = _parms._seed == -1 ? new Random().nextLong() : _parms._seed;
+      Random randObj = new Random(startSeed);
+      updateConstraintValues(betaCnd, coefNames, equalityConstraints, lessThanEqualToConstraints);
+      if (hasEqualityConstraints) // set lambda values for constraints
+        genInitialLambda(randObj, equalityConstraints, lambdaEqual);
+      if (hasLessConstraints)
+        genInitialLambda(randObj, lessThanEqualToConstraints, lambdaLessThan);
+      ExactLineSearch ls = null;
+      int iterCnt = (_checkPointFirstIter ? _state._iter : 0)+_initIter;
+      // contribution to gradient and hessian from constraints
+      _state.initConstraintDerivatives(equalityConstraints, lessThanEqualToConstraints, coefNames);
+
+      GLMGradientSolver ginfo = gam.equals(_parms._glmType) ? new GLMGradientSolver(_job, _parms, _dinfo, 0,
+              _state.activeBC(), _betaInfo, _penaltyMatrix, _gamColIndices) : new GLMGradientSolver(_job, _parms,
+              _dinfo, 0, _state.activeBC(), _betaInfo);
+      GLMGradientInfo gradientInfo = calGradient(betaCnd, _state, ginfo, lambdaEqual, lambdaLessThan,
+              equalityConstraints, lessThanEqualToConstraints); // add dpenalty/dx to gradient from penalty term
+      _state.setConstraintInfo(gradientInfo, equalityConstraints, lessThanEqualToConstraints, lambdaEqual, lambdaLessThan);  // update state ginfo with contributions from GLMGradientInfo
+      boolean predictorSizeChange;
+      boolean applyBetaConstraints = _parms._separate_linear_beta && _betaConstraintsOn;
+      // short circuit check here: if gradient magnitude is small and all constraints are satisfied, quit right away
+      if (constraintsStop(gradientInfo, _state)) {
+        Log.info(LogMsg("GLM with constraints model building completed successfully!!"));
+        return;
+      }
+      double gradMagSquare = ArrayUtils.innerProduct(gradientInfo._gradient, gradientInfo._gradient);
+      boolean done;
+      boolean gradSmallEnough = (gradMagSquare <= _state._csGLMState._epsilonkCSSquare);
+      int origIter = iterCnt+1;
+      boolean lineSearchSuccess;
+      try {
+        while (true) {
+          do { // implement Algorithm 11.8 of the doc to find coefficients with epsilon k as the precision
+            iterCnt++;
+            long t1 = System.currentTimeMillis();
+            ComputationState.GramGrad gram = _state.computeGram(betaCnd, gradientInfo);  // calculate gram (hessian), xy, objective values
+            if (iterCnt == origIter) {
+              Matrix gramMatrix = new Matrix(gram._gram);
+              if (gramMatrix.cond() >= BAD_CONDITION_NUMBER)
+                if (_parms._init_optimal_glm) {
+                  warn("init_optimal_glm", " should be disabled.  This lead to gram matrix being close to" +
+                          " singular.  Please re-run with init_optimal_glm set to false.");
+                }
+            }
+            predictorSizeChange = !coefNames.equals(Arrays.asList(_state.activeData().coefNames()));
+            if (predictorSizeChange) {  // reset if predictors changed
+              coefNames = changeCoeffBetainfo(_state.activeData()._coefNames);
+              _state.resizeConstraintInfo(equalityConstraints, lessThanEqualToConstraints);
+              ginfo = gam.equals(_parms._glmType) ? new GLMGradientSolver(_job, _parms, _state.activeData(), 0,
+                      _state.activeBC(), _betaInfo, _penaltyMatrix, _gamColIndices) : new GLMGradientSolver(_job, _parms,
+                      _state.activeData(), 0, _state.activeBC(), _betaInfo);
+              tempBeta = new double[coefNames.size()];
+            }
+            // solve for GLM coefficients
+            betaCnd = constraintGLM_solve(gram);  // beta_k+1 = beta_k+dk where dk = beta_k+1-beta_k   
+            predictorSizeChange = !coefNames.equals(Arrays.asList(_state.activeData().coefNames()));
+            if (predictorSizeChange) {  // reset if predictors changed
+              coefNames = changeCoeffBetainfo(_state.activeData()._coefNames);
+              _state.resizeConstraintInfo(equalityConstraints, lessThanEqualToConstraints);
+              ginfo = gam.equals(_parms._glmType) ? new GLMGradientSolver(_job, _parms, _state.activeData(), 0,
+                      _state.activeBC(), _betaInfo, _penaltyMatrix, _gamColIndices) : new GLMGradientSolver(_job, _parms,
+                      _state.activeData(), 0, _state.activeBC(), _betaInfo);
+              tempBeta = new double[betaCnd.length];
+            }
+            // add exact line search for GLM coefficients.  Refer to the doc, Algorithm 11.5
+            if (ls == null)
+              ls = new ExactLineSearch(betaCnd, _state, coefNames);
+            else
+              ls.reset(betaCnd, _state, coefNames);
+
+            // line search can fail when the gradient is close to zero.  In this case, we need to update the 
+            // constraint parameters.
+            lineSearchSuccess = ls.findAlpha(lambdaEqual, lambdaLessThan, _state, equalityConstraints,
+                    lessThanEqualToConstraints, ginfo);
+            gradMagSquare = ArrayUtils.innerProduct(ls._ginfoOriginal._gradient, ls._ginfoOriginal._gradient);
+            gradSmallEnough = gradMagSquare <= _state._csGLMState._epsilonkCSSquare;
+            if (lineSearchSuccess ||gradSmallEnough) {
+              betaCnd = ls._newBeta;
+              gradientInfo = ls._ginfoOriginal;
+            } else {  // ls failed, reset to
+              if (_betaConstraintsOn) // separate beta and linear constraints
+                bc.applyAllBounds(_state.beta());
+              ls.setBetaConstraintsDeriv(lambdaEqual, lambdaLessThan, _state, equalityConstraints, lessThanEqualToConstraints,
+                      ginfo, _state.beta());
+              Log.info(LogMsg("Line search failed " + ls));
+              return;
+            }
+
+            if (applyBetaConstraints) { // if beta constraints are applied separately, may need to update constraints, derivatives, gradientInfo
+              System.arraycopy(betaCnd, 0, tempBeta, 0, betaCnd.length);
+              bc.applyAllBounds(betaCnd);
+              ArrayUtils.subtract(betaCnd, tempBeta, tempBeta);
+              ls.setBetaConstraintsDeriv(lambdaEqual, lambdaLessThan, _state, equalityConstraints,
+                      lessThanEqualToConstraints, ginfo, betaCnd);
+              gradientInfo = ls._ginfoOriginal;
+            }
+
+            // check for stopping conditions which also updates the variables in state.
+            // stopping condition is to stop us getting stuck in improvements that are too insignificant.
+            // However, we will only exit the while loop when the gradMagSquare is still too high.  There is no hope
+            // for improvement here anymore since the beta values and gradient values are not changing much anymore.
+            done = stop_requested() || (_state._iter >= _parms._max_iterations) || _earlyStop;  // time to go
+            if ((!progress(betaCnd, gradientInfo) && !gradSmallEnough) || done) {
+              checkKKTConditions(betaCnd, gradientInfo, iterCnt);
+              if (_betaConstraintsOn)
+                bc.applyAllBounds(_state.beta());
+              return;
+            }
+
+            Log.info(LogMsg("computed in " + (System.currentTimeMillis() - t1)  + "ms, step = " + iterCnt +
+                    ((_lslvr != null) ? ", l1solver " + _lslvr : "")));
+          } while (!gradSmallEnough);
+          // update constraint parameters, ck, lambdas and others
+          updateConstraintParameters(_state, lambdaEqual, lambdaLessThan, equalityConstraints,
+                  lessThanEqualToConstraints, _parms);
+          // update gradient calculation with new value (lambda and/or ck).
+          gradientInfo = calGradient(betaCnd, _state, ginfo, lambdaEqual, lambdaLessThan,
+                  equalityConstraints, lessThanEqualToConstraints);
+          _state.updateState(betaCnd, gradientInfo); // update computation state with new info
+        }
+      } catch (NonSPDMatrixException e) {
+        Log.warn(LogMsg("Got Non SPD matrix, stopped."));
+      }
+    }
+
+    // only has penalty and no constrained multipliers, original algorithm
+    private void fitIRLSMCS2() {
+      double[] betaCnd = _checkPointFirstIter ? _model._betaCndCheckpoint : _state.beta();
+      double[] tempBeta = _parms._separate_linear_beta ? new double[betaCnd.length] : null;
+      List<String> coefNames = Arrays.stream(_state.activeData()._coefNames).collect(Collectors.toList());
+      LinearConstraints[] equalityConstraints;
+      LinearConstraints[] lessThanEqualToConstraints;
+      final BetaConstraint bc = _state.activeBC();
+      if (_parms._separate_linear_beta) { // keeping linear and beta constraints separate in this case
+        equalityConstraints = _state._equalityConstraintsLinear;
+        lessThanEqualToConstraints = _state._lessThanEqualToConstraintsLinear;
+      } else {  // combine beta and linear constraints together
+        equalityConstraints = combineConstraints(_state._equalityConstraintsBeta, _state._equalityConstraintsLinear);
+        lessThanEqualToConstraints = combineConstraints(_state._lessThanEqualToConstraintsBeta,
+                _state._lessThanEqualToConstraintsLinear);
+      }
+      boolean hasEqualityConstraints = equalityConstraints != null;
+      boolean hasLessConstraints = lessThanEqualToConstraints != null;
+      double[] lambdaEqual = hasEqualityConstraints ? new double[equalityConstraints.length] : null;
+      double[] lambdaLessThan = hasLessConstraints ? new double[lessThanEqualToConstraints.length] : null;
+      Long startSeed = _parms._seed == -1 ? new Random().nextLong() : _parms._seed;
+      Random randObj = new Random(startSeed);
+      updateConstraintValues(betaCnd, coefNames, equalityConstraints, lessThanEqualToConstraints);
+/*
+      if (hasEqualityConstraints) // set lambda values for constraints
+        genInitialLambda(randObj, equalityConstraints, lambdaEqual);
+      if (hasLessConstraints)
+        genInitialLambda(randObj, lessThanEqualToConstraints, lambdaLessThan);
+*/
+      ExactLineSearch ls = null;
+      int iterCnt = (_checkPointFirstIter ? _state._iter : 0)+_initIter;
+      // contribution to gradient and hessian from constraints
+      _state.initConstraintDerivatives(equalityConstraints, lessThanEqualToConstraints, coefNames);
+
+      GLMGradientSolver ginfo = gam.equals(_parms._glmType) ? new GLMGradientSolver(_job, _parms, _dinfo, 0,
+              _state.activeBC(), _betaInfo, _penaltyMatrix, _gamColIndices) : new GLMGradientSolver(_job, _parms,
+              _dinfo, 0, _state.activeBC(), _betaInfo);
+      GLMGradientInfo gradientInfo = calGradient(betaCnd, _state, ginfo, lambdaEqual, lambdaLessThan,
+              equalityConstraints, lessThanEqualToConstraints); // add dpenalty/dx to gradient from penalty term
+      _state.setConstraintInfo(gradientInfo, equalityConstraints, lessThanEqualToConstraints, lambdaEqual, lambdaLessThan);  // update state ginfo with contributions from GLMGradientInfo
+      boolean predictorSizeChange;
+      boolean applyBetaConstraints = _parms._separate_linear_beta && _betaConstraintsOn;
+      // short circuit check here: if gradient magnitude is small and all constraints are satisfied, quit right away
+      if (constraintsStop(gradientInfo, _state)) {
+        Log.info(LogMsg("GLM with constraints model building completed successfully!!"));
+        return;
+      }
+      double gradMagSquare = ArrayUtils.innerProduct(gradientInfo._gradient, gradientInfo._gradient);
+      boolean done;
+      boolean gradSmallEnough = (gradMagSquare <= _state._csGLMState._epsilonkCSSquare);
+      int origIter = iterCnt+1;
+      boolean lineSearchSuccess;
+      try {
+        while (true) {
+          do { // implement Algorithm 11.8 of the doc to find coefficients with epsilon k as the precision
+            iterCnt++;
+            long t1 = System.currentTimeMillis();
+            ComputationState.GramGrad gram = _state.computeGram(betaCnd, gradientInfo);  // calculate gram (hessian), xy, objective values
+            if (iterCnt == origIter) {
+              Matrix gramMatrix = new Matrix(gram._gram);
+              if (gramMatrix.cond() >= BAD_CONDITION_NUMBER)
+                if (_parms._init_optimal_glm) {
+                  warn("init_optimal_glm", " should be disabled.  This lead to gram matrix being close to" +
+                          " singular.  Please re-run with init_optimal_glm set to false.");
+                }
+            }
+            predictorSizeChange = !coefNames.equals(Arrays.asList(_state.activeData().coefNames()));
+            if (predictorSizeChange) {  // reset if predictors changed
+              coefNames = changeCoeffBetainfo(_state.activeData()._coefNames);
+              _state.resizeConstraintInfo(equalityConstraints, lessThanEqualToConstraints);
+              ginfo = gam.equals(_parms._glmType) ? new GLMGradientSolver(_job, _parms, _state.activeData(), 0,
+                      _state.activeBC(), _betaInfo, _penaltyMatrix, _gamColIndices) : new GLMGradientSolver(_job, _parms,
+                      _state.activeData(), 0, _state.activeBC(), _betaInfo);
+              tempBeta = new double[coefNames.size()];
+            }
+            // solve for GLM coefficients
+            betaCnd = constraintGLM_solve(gram);  // beta_k+1 = beta_k+dk where dk = beta_k+1-beta_k   
+            predictorSizeChange = !coefNames.equals(Arrays.asList(_state.activeData().coefNames()));
+            if (predictorSizeChange) {  // reset if predictors changed
+              coefNames = changeCoeffBetainfo(_state.activeData()._coefNames);
+              _state.resizeConstraintInfo(equalityConstraints, lessThanEqualToConstraints);
+              ginfo = gam.equals(_parms._glmType) ? new GLMGradientSolver(_job, _parms, _state.activeData(), 0,
+                      _state.activeBC(), _betaInfo, _penaltyMatrix, _gamColIndices) : new GLMGradientSolver(_job, _parms,
+                      _state.activeData(), 0, _state.activeBC(), _betaInfo);
+              tempBeta = new double[betaCnd.length];
+            }
+            // add exact line search for GLM coefficients.  Refer to the doc, Algorithm 11.5
+            if (ls == null)
+              ls = new ExactLineSearch(betaCnd, _state, coefNames);
+            else
+              ls.reset(betaCnd, _state, coefNames);
+
+            // line search can fail when the gradient is close to zero.  In this case, we need to update the 
+            // constraint parameters.
+            lineSearchSuccess = ls.findAlpha(lambdaEqual, lambdaLessThan, _state, equalityConstraints,
+                    lessThanEqualToConstraints, ginfo);
+            if (lineSearchSuccess) {
+              betaCnd = ls._newBeta;
+              gradientInfo = ls._ginfoOriginal;
+              gradMagSquare = ArrayUtils.innerProduct(ls._ginfoOriginal._gradient, ls._ginfoOriginal._gradient);
+              gradSmallEnough = gradMagSquare <= _state._csGLMState._epsilonkCSSquare;
+            } else {  // ls failed, reset to
+              if (applyBetaConstraints) // separate beta and linear constraints
+                bc.applyAllBounds(_state.beta());
+              ls.setBetaConstraintsDeriv(lambdaEqual, lambdaLessThan, _state, equalityConstraints, lessThanEqualToConstraints,
+                      ginfo, _state.beta());
+              Log.info(LogMsg("Line search failed " + ls));
+              return;
+            }
+
+            if (applyBetaConstraints) { // if beta constraints are applied, may need to update constraints, derivatives, gradientInfo
+              System.arraycopy(betaCnd, 0, tempBeta, 0, betaCnd.length);
+              bc.applyAllBounds(betaCnd);
+              ArrayUtils.subtract(betaCnd, tempBeta, tempBeta);
+              ls.setBetaConstraintsDeriv(lambdaEqual, lambdaLessThan, _state, equalityConstraints,
+                      lessThanEqualToConstraints, ginfo, betaCnd);
+              gradientInfo = ls._ginfoOriginal;
+            }
+
+            // check for stopping conditions which also updates the variables in state.
+            // stopping condition is to stop us getting stuck in improvements that are too insignificant.
+            // However, we will only exit the while loop when the gradMagSquare is still too high.  There is no hope
+            // for improvement here anymore since the beta values and gradient values are not changing much anymore.
+            done = stop_requested() || (_state._iter >= _parms._max_iterations) || _earlyStop;  // time to go
+            if (!progress(betaCnd, gradientInfo)) {
+              checkKKTConditions(betaCnd, gradientInfo, iterCnt);
+              return;
+            }
+
+            Log.info(LogMsg("computed in " + (System.currentTimeMillis() - t1)  + "ms, step = " + iterCnt +
+                    ((_lslvr != null) ? ", l1solver " + _lslvr : "")));
+          } while (!gradSmallEnough);
+          // update constraint parameters, ck, lambdas and others
+          updateConstraintParameters(_state, lambdaEqual, lambdaLessThan, equalityConstraints,
+                  lessThanEqualToConstraints, _parms);
+          // update gradient calculation with new value (lambda and/or ck).
+          // set lambda to all zeros
+          lambdaEqual = hasEqualityConstraints ? new double[lambdaEqual.length] : null;
+          lambdaLessThan = hasLessConstraints ? new double[lambdaLessThan.length] : null;
+          
+          gradientInfo = calGradient(betaCnd, _state, ginfo, lambdaEqual, lambdaLessThan,
+                  equalityConstraints, lessThanEqualToConstraints);
+          _state.updateState(betaCnd, gradientInfo); // update computation state with new info
         }
       } catch (NonSPDMatrixException e) {
         Log.warn(LogMsg("Got Non SPD matrix, stopped."));
@@ -2477,25 +2940,17 @@ private void fitIRLSMCS() {
     }
 
     /***
-     * This method will first check if enough progress has been made with progress method.
-     * If no more progress is made, we will check it the constraint stopping conditions are met. 
-     * The model building process will stop if no more progress is made regardless of whether the constraint stopping
-     * conditions are met or not.
+     * We will check it the constraint stopping conditions are met.
      */
-    public boolean checkIterationDone(double[] betaCnd, GLMGradientInfo gradientInfo, int iterCnt) {
+    public void checkKKTConditions(double[] betaCnd, GLMGradientInfo gradientInfo, int iterCnt) {
       // check for stopping conditions
-      boolean done = !progress(betaCnd, gradientInfo); // no good change in coeff, time-out or max_iteration reached
-      if (done) {
-        _model._betaCndCheckpoint = betaCnd;
-        boolean kktAchieved = constraintsStop(gradientInfo, _state);
-        if (kktAchieved)
-          Log.info("KKT Conditions achieved after " + iterCnt + " iterations ");
-        else
-          Log.warn("KKT Conditions not achieved but no further progress made due to time out or no changes" +
-                  " to coefficients after " + iterCnt + " iterations");
-        return true;
-      }
-      return false;
+      _model._betaCndCheckpoint = betaCnd;
+      boolean kktAchieved = constraintsStop(gradientInfo, _state);
+      if (kktAchieved)
+        Log.info("KKT Conditions achieved after " + iterCnt + " iterations ");
+      else
+        Log.warn("KKT Conditions not achieved but no further progress made due to time out or no changes" +
+                " to coefficients after " + iterCnt + " iterations");
     }
     
     public List<String> changeCoeffBetainfo(String[] coefNames) {
@@ -4151,14 +4606,27 @@ private void checkCoeffsBounds() {
       BetaConstraint bc = _parms._beta_constraints != null ? new BetaConstraint(_parms._beta_constraints.get())
               : new BetaConstraint(); // bounds for columns _dinfo.fullN()+1 only
       double[] coeffs = _parms._standardize ? _model._output.getNormBeta() :_model._output.beta();
-      if (bc._betaLB == null || bc._betaUB == null || coeffs == null)
+      if (coeffs == null)
+        return;
+      if (bc._betaLB == null && bc._betaUB == null)
         return;
-      int coeffsLen = bc._betaLB.length;
+      int coeffsLen = bc._betaLB != null ? bc._betaLB.length : bc._betaUB.length;
       StringBuffer errorMessage = new StringBuffer();
+      boolean lowerBoundNull = bc._betaLB == null;
+      boolean upperBoundNull = bc._betaUB == null;
       for (int index=0; index < coeffsLen; index++) {
-        if (!(coeffs[index] == 0 || (coeffs[index] >= bc._betaLB[index] && coeffs[index] <= bc._betaUB[index])))
-          errorMessage.append("GLM model coefficient " + coeffs[index]+" exceeds beta constraint bounds.  Lower: "
-                  +bc._betaLB[index]+", upper: "+bc._betaUB[index]+"\n");
+        if (coeffs[index] != 0) {
+          if (lowerBoundNull && !Double.isInfinite(bc._betaUB[index]) && (coeffs[index] > bc._betaUB[index])) {
+            errorMessage.append("GLM model coefficient " + coeffs[index]+" exceeds beta constraint upper bounds: " +
+                    "upper: "+bc._betaUB[index]+"\n");
+          } else if (upperBoundNull && !Double.isInfinite(bc._betaLB[index]) &&  (coeffs[index] < bc._betaLB[index])) {
+            errorMessage.append("GLM model coefficient " + coeffs[index]+" falls below beta constraint lower bounds: " +
+                    "upper: "+bc._betaLB[index]+"\n");
+          } else if (!lowerBoundNull && !upperBoundNull && (coeffs[index] < bc._betaLB[index] && coeffs[index] > bc._betaUB[index])) {
+            errorMessage.append("GLM model coefficient " + coeffs[index]+" exceeds beta constraint bounds.  Lower: "
+                    +bc._betaLB[index]+", upper: "+bc._betaUB[index]+"\n");
+          }
+        }
       }
       if (errorMessage.length() > 0)
         throw new H2OFailException("\n"+errorMessage.toString());
diff --git a/h2o-algos/src/main/java/hex/glm/GLMTask.java b/h2o-algos/src/main/java/hex/glm/GLMTask.java
index 75fac2d5c4c2..4449014c186d 100644
--- a/h2o-algos/src/main/java/hex/glm/GLMTask.java
+++ b/h2o-algos/src/main/java/hex/glm/GLMTask.java
@@ -1547,15 +1547,6 @@ public  GLMIterationTask(Key jobKey, DataInfo dinfo, GLMWeightsFun glmw, double
       _c = c;
     }
 
-    public  GLMIterationTask(Key jobKey, DataInfo dinfo, GLMWeightsFun glmw, double [] beta, int c, boolean hasConst) {
-      super(null,dinfo,jobKey);
-      _beta = beta;
-      _ymu = null;
-      _glmf = glmw;
-      _c = c;
-      _hasConstraints = hasConst;
-    }
-
     @Override public boolean handlesSparseData(){return true;}
 
     transient private double _sparseOffset;
diff --git a/h2o-algos/src/main/java/hex/optimization/OptimizationUtils.java b/h2o-algos/src/main/java/hex/optimization/OptimizationUtils.java
index 221afa9779cc..d75ebcaa5dc5 100644
--- a/h2o-algos/src/main/java/hex/optimization/OptimizationUtils.java
+++ b/h2o-algos/src/main/java/hex/optimization/OptimizationUtils.java
@@ -554,6 +554,7 @@ public boolean findAlpha(double[] lambdaEqual, double[] lambdaLessThan, Computat
                              ConstrainedGLMUtils.LinearConstraints[] lessThanEqualToConstraints,
                              GLM.GLMGradientSolver gradientSolver) {
       if (_currGradDirIP > 0) {
+        _newBeta = _originalBeta;
         return false;
       }
       GLM.GLMGradientInfo newGrad;
@@ -563,6 +564,8 @@ public boolean findAlpha(double[] lambdaEqual, double[] lambdaLessThan, Computat
       boolean firstWolfe;
       boolean secondWolfe;
       boolean alphaiChange;
+      double gradMagSquare;
+      boolean gradSmallEnough;
       for (int index=0; index<_maxIteration; index++) {
         ArrayUtils.mult(_direction, tempDirection, _alphai);    // tempCoef=alpha_i*direction
         newCoef = ArrayUtils.add(tempDirection, _originalBeta); // newCoef = coef + alpha_i*direction
@@ -575,6 +578,8 @@ public boolean findAlpha(double[] lambdaEqual, double[] lambdaLessThan, Computat
         // calculate new gradient and objective function for new coefficients newCoef
         newGrad =  calGradient(newCoef, state, gradientSolver, lambdaEqual, lambdaLessThan,
                 equalityConstraints, lessThanEqualToConstraints);
+        gradMagSquare = ArrayUtils.innerProduct(newGrad._gradient, newGrad._gradient);
+        gradSmallEnough = gradMagSquare <= state._csGLMState._epsilonkCSSquare;
         // evaluate if first Wolfe condition is satisfied;
         firstWolfe = evaluateFirstWolfe(newGrad);
         // evaluate if second Wolfe condition is satisfied;
@@ -589,7 +594,11 @@ public boolean findAlpha(double[] lambdaEqual, double[] lambdaLessThan, Computat
         // set alphai if first Wolfe condition is not satisfied, set alpha i if second Wolfe condition is not satisfied;
         alphaiChange = setAlphai(firstWolfe, secondWolfe);
         if (!alphaiChange || _alphar < EPS_CS_SQUARE) { // if alphai, alphar value are not changed and alphar is too small, quit
-          return false;
+          if (gradSmallEnough) {
+            _newBeta = newCoef;
+            _ginfoOriginal = newGrad;            
+          }
+           return false;
         }
       }
       return false;
diff --git a/h2o-algos/src/test/java/hex/glm/GLMConstrainedTest.java b/h2o-algos/src/test/java/hex/glm/GLMConstrainedTest.java
index 77aa7031195f..a4ae87ea0b03 100644
--- a/h2o-algos/src/test/java/hex/glm/GLMConstrainedTest.java
+++ b/h2o-algos/src/test/java/hex/glm/GLMConstrainedTest.java
@@ -77,8 +77,6 @@ public class GLMConstrainedTest extends TestUtil {
   double[] _lessGradContr;
   double _ck = 10;
   double[] _beta;
-  double[] _equalGradPenalty;
-  double[] _lessGradPenalty;
   Random _obj = new Random(123);
   
   
@@ -324,15 +322,16 @@ public void generateConstraint4FrameNAnswer() {
                     _coeffNames1.get(19), _coeffNames1.get(20), _coeffNames1.get(21), _coeffNames1.get(22), "constant",
                     _coeffNames1.get(4), _coeffNames1.get(5), _coeffNames1.get(6), "constant", _coeffNames1.get(6),
                     _coeffNames1.get(33), _coeffNames1.get(7), _coeffNames1.get(24), _coeffNames1.get(25), "constant",
-                    _coeffNames1.get(1), _coeffNames1.get(coefLen-3), "constant"})
+                    _coeffNames1.get(1), _coeffNames1.get(coefLen-3), "constant", _coeffNames1.get(0), _coeffNames1.get(1), "constant"})
             .withDataForCol(1, new double [] {-0.3, 0.5, 1.0, -3.0, 3, -4, 0.5, 0.1, -0.2, 2.0, -0.1, -0.4,
-                    0.8, 0.1, -0.5, 0.7, -1.1, 2.0, 0.5, -0.3, 0.5, -1.5, -0.3, -1.0, 1.0, -9.0})
+                    0.8, 0.1, -0.5, 0.7, -1.1, 2.0, 0.5, -0.3, 0.5, -1.5, -0.3, -1.0, 1.0, -9.0,-1, -1, 0})
             .withDataForCol(2, new String[] {"lessthanequal", "lessthanequal", "lessthanequal", "lessthanequal",
                     "lessthanequal", "lessthanequal", "lessthanequal", "equal", "equal", "lessthanequal",
                     "lessthanequal", "lessthanequal", "lessthanequal", "equal", "equal", "equal", "equal", "equal",
-                    "equal", "equal", "equal", "equal", "equal", "lessthanequal", "lessthanequal", "lessthanequal"})
+                    "equal", "equal", "equal", "equal", "equal", "lessthanequal", "lessthanequal", "lessthanequal", 
+                    "lessthanequal", "lessthanequal", "lessthanequal"})
             .withDataForCol(3, new int[]{0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 6, 6,
-                    6, 7, 7 ,7}).build();
+                    6, 7, 7 ,7, 8, 8, 8}).build();
     Scope.track(_linearConstraint4);
   }
 
@@ -595,54 +594,6 @@ public void assertCorrectGramMaps(IcedHashMap<ConstrainedGLMUtils.CoefIndices, D
     }
   }
 
-  // beta and linear constraints conflict and we should catch it
-  @Test
-  public void testConflictConstraints() {
-    Scope.enter();
-    try {
-      // beta constraints: beta0 >= 2, beta1 >= 2
-      Frame betaConstraint =
-              new TestFrameBuilder()
-                      .withColNames("names", "lower_bounds", "upper_bounds")
-                      .withVecTypes(T_STR, T_NUM, T_NUM)
-                      .withDataForCol(0, new String[] {_coeffNames1.get(30), _coeffNames1.get(31)})
-                      .withDataForCol(1, new double [] {2, 2})
-                      .withDataForCol(2, new double[] {Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY}).build();
-      Scope.track(betaConstraint);
-
-      // linear constraint: beta0 + beta1 <= 2, contradicts with beta0 >= 2 and beta1 >= 2
-      Frame linearConstraint = new TestFrameBuilder()
-              .withColNames("names", "values", "types", "constraint_numbers")
-              .withVecTypes(T_STR, T_NUM, T_STR, T_NUM)
-              .withDataForCol(0, new String[] {_coeffNames1.get(30), _coeffNames1.get(31), "constant"})
-              .withDataForCol(1, new double [] {1,1,-2})
-              .withDataForCol(2, new String[] {"lessthanequal", "lessthanequal", "lessthanequal"})
-              .withDataForCol(3, new int[]{0,0,0}).build();
-      Scope.track(linearConstraint);
-      
-      Frame train = parseAndTrackTestFile("smalldata/glm_test/gaussian_20cols_10000Rows.csv");
-      transformFrameCreateCoefNames(train);
-      GLMModel.GLMParameters params = new GLMModel.GLMParameters(gaussian);
-      params._standardize = false;
-      params._response_column = "C21";
-      params._solver = IRLSM;
-      params._train = train._key;
-      params._beta_constraints = betaConstraint._key;
-      params._max_iterations = 1;
-      params._expose_constraints = true;
-      params._linear_constraints = linearConstraint._key;
-      params._lambda = new double[]{0};
-      GLMModel glm2 = new GLM(params).trainModel().get();
-      Scope.track_generic(glm2);
-      assertTrue("Should have thrown an error due to duplicated constraints.", 1==2);
-    } catch(IllegalArgumentException ex) {
-      assert ex.getMessage().contains("redundant and possibly conflicting linear constraints") : "Wrong error message.  Error should be about" +
-              " redundant linear constraints";
-    } finally {
-      Scope.exit();
-    }
-  }
-
   // linear constraints with two duplicated constraints
   @Test
   public void testDuplicateLinearConstraints() {
@@ -663,7 +614,7 @@ public void testDuplicateLinearConstraints() {
       Scope.track_generic(glm2);
       assert 1==2 : "Should have thrown an error due to duplicated constraints.";
     } catch(IllegalArgumentException ex) {
-      assert ex.getMessage().contains("redundant and possibly conflicting linear constraints") : "Wrong error message.  Error should be about" +
+      assert ex.getMessage().contains("redundant linear constraints:") : "Wrong error message.  Error should be about" +
               " redundant linear constraints";
     } finally {
       Scope.exit();
@@ -691,7 +642,7 @@ public void testDuplicateBetaLinearConstraints() {
       Scope.track_generic(glm2);
       assert 1==2 : "Should have thrown an error due to duplicated constraints.";
     } catch(IllegalArgumentException ex) {
-      assert ex.getMessage().contains("redundant and possibly conflicting linear constraints") : "Wrong error message.  Error should be about" +
+      assert ex.getMessage().contains("redundant linear constraints") : "Wrong error message.  Error should be about" +
               " redundant linear constraints";
     } finally {
       Scope.exit();
diff --git a/h2o-py/tests/testdir_algos/glm/pyunit_GH_16312_contrained_GLM_bad_constraints_large.py b/h2o-py/tests/testdir_algos/glm/pyunit_GH_16312_contrained_GLM_bad_constraints_large.py
new file mode 100644
index 000000000000..c9a81a79a75f
--- /dev/null
+++ b/h2o-py/tests/testdir_algos/glm/pyunit_GH_16312_contrained_GLM_bad_constraints_large.py
@@ -0,0 +1,156 @@
+import h2o
+from h2o.estimators.glm import H2OGeneralizedLinearEstimator as glm
+import numpy as np
+import pandas as pd
+from tests import pyunit_utils
+
+# this test needs to run into completion duplicating/conflicting constraints
+def data_prep(seed):
+    np.random.seed(seed)
+    x1 = np.random.normal(0, 10, 100000)
+    x2 = np.random.normal(10, 100 , 100000)
+    x3 = np.random.normal(20, 200, 100000)
+    x4 = np.random.normal(30, 3000, 100000)
+    x5 = np.random.normal(400, 4000, 100000)
+
+    y_raw = np.sin(x1)*100 + np.sin(x2)*100 + x3/20 + x3/30 + x5/400
+    y = np.random.normal(y_raw, 20)
+
+    data = {
+        'x1': x1,
+        'x2': x2,
+        'x3': x3,
+        'x4': x4,
+        'x5': x5,
+        'y': y,
+    }
+    return h2o.H2OFrame(pd.DataFrame(data))
+
+def test_duplicate_conflicting_constraints():
+    train_data = data_prep(123)
+    family = 'gaussian'
+    link = 'identity'
+    nfolds = 0
+    lambda_ = 0.0
+    seed = 1234
+    calc_like = True
+    compute_p_values = True
+    solver = 'irlsm'
+    predictors = ['x1', 'x2', 'x3', 'x4', 'x5']
+    response = "y"
+
+    linear_constraints2 = []
+    
+    name = "x2"
+    values = 1
+    types = "LessThanEqual"
+    contraint_numbers = 0
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "x3"
+    values = -1
+    types = "LessThanEqual"
+    contraint_numbers = 0
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "constant"
+    values = 0
+    types = "LessThanEqual"
+    contraint_numbers = 0
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "x3"
+    values = 1
+    types = "LessThanEqual"
+    contraint_numbers = 1
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "x4"
+    values = -1
+    types = "LessThanEqual"
+    contraint_numbers = 1
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "constant"
+    values = 0
+    types = "LessThanEqual"
+    contraint_numbers = 1
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "x2"
+    values = 1
+    types = "LessThanEqual"
+    contraint_numbers = 2
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "x3"
+    values = 1
+    types = "LessThanEqual"
+    contraint_numbers = 2
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "x4"
+    values = 1
+    types = "LessThanEqual"
+    contraint_numbers = 2
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "constant"
+    values = 0
+    types = "LessThanEqual"
+    contraint_numbers = 2
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    linear_constraints = h2o.H2OFrame(linear_constraints2)
+    linear_constraints.set_names(["names", "values", "types", "constraint_numbers"])
+
+    params = {
+        "family" : family,
+        "link": link,
+        "lambda_" : lambda_,
+        "seed" : seed,
+        "nfolds" : nfolds,
+        "compute_p_values" : compute_p_values,
+        "calc_like" : calc_like,
+        "solver" : solver,
+        "linear_constraints": linear_constraints    
+    }
+
+    model = glm(**params)
+    model.train(x = predictors, y = response, training_frame = train_data)
+    print(model.coef())
+    coef_constrained = model.coef()
+    print(glm.getConstraintsInfo(model))
+
+    params = {
+        "family" : family,
+        "link": link,
+        "lambda_" : lambda_,
+        "seed" : seed,
+        "nfolds" : nfolds,
+        "compute_p_values" : compute_p_values,
+        "calc_like" : calc_like,
+        "solver" : solver,
+    }
+
+    model_no_constraints = glm(**params)
+    model_no_constraints.train(x = predictors, y = response, training_frame = train_data)
+    coef_no_constraints = model_no_constraints.coef()
+    print("model built without constraints")
+    print(coef_no_constraints)
+    print("x2-x3: {0}".format(coef_no_constraints['x2']-coef_no_constraints['x3']))
+    print("x3-x4: {0}".format(coef_no_constraints['x3']-coef_no_constraints['x4']))
+    print("x2+x3+x4: {0}".format(coef_no_constraints['x2']+coef_no_constraints['x3']+coef_no_constraints['x4']))
+    # assert that model with linear constraints does a better job than model without constraints 
+    assert (coef_constrained['x2']-coef_constrained['x3']) < (coef_no_constraints['x2']-coef_no_constraints['x3']), \
+        "Model built with constraints should be closer to the constraint x2-x3 <= 0"
+    assert (coef_constrained['x3']-coef_constrained['x4']) < (coef_no_constraints['x3']-coef_no_constraints['x4']), \
+        "Model built with constraints should be closer to the constraint x3-x4 <= 0"
+    assert (coef_constrained['x2']+coef_constrained['x3']+coef_constrained['x4']) < \
+           (coef_no_constraints['x2']+coef_no_constraints['x3']+coef_no_constraints['x4']), \
+        "Model built with constraints should be closer to the constraint x2+x3+x4 <= 0"
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(test_duplicate_conflicting_constraints)
+else:
+    test_duplicate_conflicting_constraints()
diff --git a/h2o-py/tests/testdir_algos/glm/pyunit_GH_16312_contrained_GLM_beta_constraint_NPE_large.py b/h2o-py/tests/testdir_algos/glm/pyunit_GH_16312_contrained_GLM_beta_constraint_NPE_large.py
new file mode 100644
index 000000000000..7c7b18ef72c7
--- /dev/null
+++ b/h2o-py/tests/testdir_algos/glm/pyunit_GH_16312_contrained_GLM_beta_constraint_NPE_large.py
@@ -0,0 +1,163 @@
+import h2o
+from h2o.estimators.glm import H2OGeneralizedLinearEstimator as glm
+from tests import pyunit_utils
+import numpy as np
+import pandas as pd
+
+# For beta constraints, if only upper_bounds are specified, there are NPE errors because the code expects both upper
+# and lower bounds to be specified.  I have since fixed this error.
+def data_prep(seed):
+    np.random.seed(seed)
+    x1 = np.random.normal(0, 10, 100000)
+    x2 = np.random.normal(10, 100 , 100000)
+    x3 = np.random.normal(20, 200, 100000)
+    x4 = np.random.normal(30, 3000, 100000)
+    x5 = np.random.normal(400, 4000, 100000)
+
+    y_raw = np.sin(x1)*100 + np.sin(x2)*100 + x3/20 + x3/30 + x5/400
+    y = np.random.normal(y_raw, 20)
+
+    data = {
+        'x1': x1,
+        'x2': x2,
+        'x3': x3,
+        'x4': x4,
+        'x5': x5,
+        'y': y,
+    }
+    return h2o.H2OFrame(pd.DataFrame(data))
+
+def test_bad_lambda_specification():
+    train_data = data_prep(123)
+    family = 'gaussian'
+    link = 'identity'
+    nfolds = 0
+    lambda_ = 0.0
+    seed = 1234
+    calc_like = True
+    compute_p_values = True
+    solver = 'irlsm'
+    predictors = ['x1', 'x2', 'x3', 'x4', 'x5']
+    response = "y"
+    
+    # beta constraints
+    bc = []
+    name = 'x1'
+    lower_bound = 0.03
+    bc.append([name, lower_bound])
+    
+    beta_constraints = h2o.H2OFrame(bc)
+    beta_constraints.set_names(["names", "lower_bounds"])
+
+    linear_constraints2 = []
+    
+    name = "x2"
+    values = 1
+    types = "LessThanEqual"
+    contraint_numbers = 0
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "x3"
+    values = -1
+    types = "LessThanEqual"
+    contraint_numbers = 0
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "constant"
+    values = 0
+    types = "LessThanEqual"
+    contraint_numbers = 0
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "x3"
+    values = 1
+    types = "LessThanEqual"
+    contraint_numbers = 1
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "x4"
+    values = -1
+    types = "LessThanEqual"
+    contraint_numbers = 1
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "constant"
+    values = 0
+    types = "LessThanEqual"
+    contraint_numbers = 1
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "x2"
+    values = 1
+    types = "LessThanEqual"
+    contraint_numbers = 2
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "x3"
+    values = 1
+    types = "LessThanEqual"
+    contraint_numbers = 2
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "x4"
+    values = 1
+    types = "LessThanEqual"
+    contraint_numbers = 2
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "constant"
+    values = 0
+    types = "LessThanEqual"
+    contraint_numbers = 2
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    
+    linear_constraints = h2o.H2OFrame(linear_constraints2)
+    linear_constraints.set_names(["names", "values", "types", "constraint_numbers"])
+
+    linear_constraints = h2o.H2OFrame(linear_constraints2)
+    linear_constraints.set_names(["names", "values", "types", "constraint_numbers"])
+    # check lower bound of beta constraint will not generate error but lambda will.
+    params = {
+        "family" : family,
+        "link": link,
+        "lambda_" : lambda_,
+        "seed" : seed,
+        "nfolds" : nfolds,
+        "compute_p_values" : compute_p_values,
+        "calc_like" : calc_like,
+        "solver" : solver,
+        "linear_constraints": linear_constraints,
+        "beta_constraints": beta_constraints
+    }
+
+    model = glm(**params)
+    model.train(x = predictors, y = response, training_frame = train_data)
+    coefs = model.coef()
+    print(coefs)
+    print(glm.getConstraintsInfo(model))
+    # beta constraints should be satisfied
+    assert coefs["x1"] >= 0.03 or abs(coefs["x1"]-0.03) < 1e-6, "beta constraint x1 ({0}) >= 0.03 is violated!".format(coefs["x1"])
+
+    # beta constraints
+    bc = []
+    name = 'x1'
+    upper_bound = 1.5
+    bc.append([name, upper_bound])
+
+    beta_constraints2 = h2o.H2OFrame(bc)
+    beta_constraints2.set_names(["names", "upper_bounds"])
+    
+    params['beta_constraints'] = beta_constraints2
+    model = glm(**params)
+    model.train(x = predictors, y = response, training_frame = train_data)
+    coefs = model.coef()
+    print(coefs)
+    print(glm.getConstraintsInfo(model))
+    # beta constraints should always be satisfied
+    assert coefs["x1"] <= 1.5 or abs(1.5-coefs["x1"])<1e-6, "beta constraint x1 ({0}) >= 1.5 is violated.".format(coefs["x1"])
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(test_bad_lambda_specification)
+else:
+    test_bad_lambda_specification()
diff --git a/h2o-py/tests/testdir_algos/glm/pyunit_GH_16312_contrained_GLM_test_large.py b/h2o-py/tests/testdir_algos/glm/pyunit_GH_16312_contrained_GLM_test_large.py
new file mode 100644
index 000000000000..dc337ae33cf6
--- /dev/null
+++ b/h2o-py/tests/testdir_algos/glm/pyunit_GH_16312_contrained_GLM_test_large.py
@@ -0,0 +1,108 @@
+import h2o
+from h2o.estimators.glm import H2OGeneralizedLinearEstimator as glm
+from tests import pyunit_utils
+import numpy as np
+import pandas as pd
+
+def data_prep(seed):
+    np.random.seed(seed)
+    x1 = np.random.normal(0, 10, 100000)
+    x2 = np.random.normal(10, 100 , 100000)
+    x3 = np.random.normal(20, 200, 100000)
+    x4 = np.random.normal(30, 3000, 100000)
+    x5 = np.random.normal(400, 4000, 100000)
+
+    y_raw = np.sin(x1)*100 + np.sin(x2)*100 + x3/20 + x3/30 + x5/400
+    y = np.random.normal(y_raw, 20)
+
+    data = {
+        'x1': x1,
+        'x2': x2,
+        'x3': x3,
+        'x4': x4,
+        'x5': x5,
+        'y': y,
+    }
+    return h2o.H2OFrame(pd.DataFrame(data))
+
+def test_bad_linear_constraints():
+    train_data = data_prep(123)
+    family = 'gaussian'
+    link = 'identity'
+    nfolds = 0
+    lambda_ = 0
+    seed = 1234
+    calc_like = True
+    compute_p_values = True
+    solver = 'irlsm'
+    predictors = ['x1', 'x2', 'x3', 'x4', 'x5']
+    response = "y"
+
+    linear_constraints2 = []
+
+    name = "x2"
+    values = 1
+    types = "Equal"
+    contraint_numbers = 0
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "x3"
+    values = 1
+    types = "Equal"
+    contraint_numbers = 0
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    name = "constant"
+    values = 0
+    types = "Equal"
+    contraint_numbers = 0
+    linear_constraints2.append([name, values, types, contraint_numbers])
+    
+    
+    linear_constraints = h2o.H2OFrame(linear_constraints2)
+    linear_constraints.set_names(["names", "values", "types", "constraint_numbers"])
+    
+    params3 = {
+        "family" : family,
+        "link": link,
+        "lambda_" : lambda_,
+        "seed" : seed,
+        "nfolds" : nfolds,
+        "compute_p_values" : compute_p_values,
+        "calc_like" : calc_like,
+        "solver" : solver,
+        "linear_constraints": linear_constraints,
+        "standardize": True,
+    }
+
+    glm3 = glm(**params3)
+    glm3.train(x = predictors, y = response, training_frame = train_data)
+    print(glm.getConstraintsInfo(glm3))
+    coef3 = glm3.coef()
+    print(glm3.coef())
+
+    params2 = {
+        "family" : family,
+        "link": link,
+        "lambda_" : lambda_,
+        "seed" : seed,
+        "nfolds" : nfolds,
+        "compute_p_values" : compute_p_values,
+        "calc_like" : calc_like,
+        "solver" : solver
+    }
+    glm2 = glm(**params2)
+    glm2.train(x = predictors, y = response, training_frame = train_data)
+    print("Models built without linear constraints")
+    coef2 = glm2.coef()
+    print(coef2)
+    print("x2 + x3: {0}".format(coef2["x2"]+coef2["x3"]))
+    
+    # check that model with constraints are closer to the constraints than models without constraints
+    assert (coef3["x2"]+coef3["x3"])<(coef2["x2"]+coef3["x3"]), \
+        "models built with constraints should be closer to the constraints x2+x3 but is not."
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(test_bad_linear_constraints)
+else:
+    test_bad_linear_constraints()
diff --git a/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_beta_equality_loose_lessthan_linear_constraints_binomial.py b/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_beta_equality_loose_lessthan_linear_constraints_binomial.py
index 4c01ed1f67d7..569a4304268b 100644
--- a/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_beta_equality_loose_lessthan_linear_constraints_binomial.py
+++ b/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_beta_equality_loose_lessthan_linear_constraints_binomial.py
@@ -195,13 +195,13 @@ def test_constraints_binomial():
     print(glm.getConstraintsInfo(h2o_glm_default_init))
 
 
-    assert abs(logloss-init_logloss)<2e-6, "logloss from optimal GLM {0} and logloss from GLM with loose constraints " \
+    assert abs(logloss-init_logloss)<1e-6, "logloss from optimal GLM {0} and logloss from GLM with loose constraints " \
                                            "and initialized with optimal GLM {1} should equal but is not." \
                                            "".format(logloss, init_logloss)
-    assert logloss <= init_random_logloss, "logloss from optimal GLM {0} should be less than GLM with constraints " \
+    assert abs(logloss-init_random_logloss)<1e-6, "logloss from optimal GLM {0} should be close to GLM with constraints " \
                                                    "and with random initial coefficients {1} but is" \
                                                    " not.".format(logloss, init_random_logloss)
-    assert logloss <= default_init_logloss, "logloss from optimal GLM {0} should be less than GLM with constraints " \
+    assert abs(logloss-default_init_logloss)<1e-6, "logloss from optimal GLM {0} should be close to GLM with constraints " \
                                             "and with default initial coefficients {1} but is" \
                                             " not.".format(logloss, default_init_logloss)
 
diff --git a/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_equality_constraints_only_binomial.py b/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_equality_constraints_only_binomial.py
index d9d2c737ddcd..8c29822f0b87 100644
--- a/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_equality_constraints_only_binomial.py
+++ b/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_equality_constraints_only_binomial.py
@@ -124,8 +124,9 @@ def test_equality_constraints_only_binomial():
           "".format(default_init_logloss, h2o_glm_default_init._model_json["output"]["model_summary"].cell_values[0][6]))
     print(glm.getConstraintsInfo(h2o_glm_default_init))
     
-    assert init_random_logloss >= logloss, "Random initialization logloss with constraints should be worst than GLM " \
-                                           "without constraints but is not."
+    assert abs(init_random_logloss - logloss) < 1e-6, \
+        "Random initialization logloss {0} with constraints should be similary to than GLM without constraints {1} but" \
+        " is not.".format(init_random_logloss, logloss)
  
 
 if __name__ == "__main__":
diff --git a/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_equality_loose_lessthan_linear_constraints_binomial.py b/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_equality_loose_lessthan_linear_constraints_binomial.py
index 439192204ef2..dd884aaea48b 100644
--- a/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_equality_loose_lessthan_linear_constraints_binomial.py
+++ b/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_equality_loose_lessthan_linear_constraints_binomial.py
@@ -156,13 +156,13 @@ def test_equality_linear_constraints_binomial():
           " taken to build the model: {1}".format(default_init_logloss, utils_for_glm_tests.find_glm_iterations(h2o_glm_default_init)))
     print(glm.getConstraintsInfo(h2o_glm_default_init))
 
-    assert abs(logloss-init_logloss)<2e-6, "logloss from optimal GLM {0} and logloss from GLM with loose constraints " \
+    assert abs(logloss-init_logloss)<1e-6, "logloss from optimal GLM {0} and logloss from GLM with loose constraints " \
                                            "and initialized with optimal GLM {1} should equal but is not." \
                                            "".format(logloss, init_logloss)
-    assert logloss<=init_random_logloss, "logloss from optimal GLM {0} should be lower than GLM with constraints " \
+    assert abs(logloss-init_random_logloss)<1e-6, "logloss from optimal GLM {0} should be close to GLM with constraints " \
                                                    "and with random initial coefficients {1} but is" \
                                                    " not.".format(logloss, init_random_logloss)
-    assert logloss<=default_init_logloss, "logloss from optimal GLM {0} should be less than GLM with constraints " \
+    assert abs(logloss-default_init_logloss)<1e-6, "logloss from optimal GLM {0} should be close to GLM with constraints " \
                                             "and with default initial coefficients {1} but is" \
                                             " not.".format(logloss, default_init_logloss)
 
diff --git a/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_light_tight_equality_lessthan_constraints_binomial.py b/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_light_tight_equality_lessthan_constraints_binomial.py
index ce9d2facb414..8e50603e9f15 100644
--- a/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_light_tight_equality_lessthan_constraints_binomial.py
+++ b/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_light_tight_equality_lessthan_constraints_binomial.py
@@ -173,7 +173,7 @@ def test_light_tight_linear_constraints_only_binomial():
           "{1}".format(random_init_logloss, utils_for_glm_tests.find_glm_iterations(h2o_glm_random_init)))
     print(glm.getConstraintsInfo(h2o_glm_random_init))
 
-    assert logloss <= optimal_init_logloss, "logloss from optimal GLM {0} should be lower than logloss from GLM with light tight" \
+    assert abs(logloss - optimal_init_logloss)<1e-6, "logloss from optimal GLM {0} should be close to logloss from GLM with light tight" \
                                      " constraints and initialized with optimal GLM {1} but is not.".format(logloss, optimal_init_logloss)
 
     assert logloss <= default_init_logloss, "logloss from optimal GLM {0} should be lower than logloss from GLM with light tight" \
diff --git a/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_light_tight_linear_constraints_only_binomial.py b/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_light_tight_linear_constraints_only_binomial.py
index ea7802c5cdb5..d59c80fd99a5 100644
--- a/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_light_tight_linear_constraints_only_binomial.py
+++ b/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_light_tight_linear_constraints_only_binomial.py
@@ -189,7 +189,7 @@ def test_light_tight_linear_constraints_only_binomial():
     print(glm.getConstraintsInfo(h2o_glm_random_init))
     print("All constraints satisfied: {0}".format(glm.allConstraintsPassed(h2o_glm_random_init)))
 
-    assert logloss <= optimal_init_logloss, "logloss from optimal GLM {0} should be lower than logloss from GLM with light tight" \
+    assert abs(logloss - optimal_init_logloss)<1e-6, "logloss from optimal GLM {0} should be close to logloss from GLM with light tight" \
                                      " constraints and initialized with optimal GLM {1} but is not.".format(logloss, optimal_init_logloss)
 
     assert logloss <= default_init_logloss, "logloss from optimal GLM {0} should be lower than logloss from GLM with light tight" \
diff --git a/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_redundant_constraints.py b/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_redundant_constraints.py
index 56a9c052625b..81fd336e2603 100644
--- a/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_redundant_constraints.py
+++ b/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_redundant_constraints.py
@@ -198,7 +198,7 @@ def test_redundant_constraints():
     except Exception as ex:
         print(ex)
         temp = str(ex)
-        assert ("redundant and possibly conflicting linear constraints" in temp), "Wrong exception was received."
+        assert ("redundant linear constraints:" in temp), "Wrong exception was received."
         print("redundant constraint test passed!")
 
 if __name__ == "__main__":
diff --git a/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_tight_equality_linear_constraints_binomial.py b/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_tight_equality_linear_constraints_binomial.py
index a16ebc336507..94ac1155c494 100644
--- a/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_tight_equality_linear_constraints_binomial.py
+++ b/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_tight_equality_linear_constraints_binomial.py
@@ -225,7 +225,7 @@ def test_tight_equality_linear_constraints_binomial():
     print(glm.getConstraintsInfo(h2o_glm_random_init))
     print("All constraints satisfied: {0}".format(glm.allConstraintsPassed(h2o_glm_random_init)))
 
-    assert logloss <= optimal_init_logloss, "logloss from optimal GLM {0} should be lower than logloss from GLM with light tight" \
+    assert abs(logloss - optimal_init_logloss)<1e-6, "logloss from optimal GLM {0} should be close to logloss from GLM with light tight" \
                                      " constraints and initialized with optimal GLM {1} but is not.".format(logloss, optimal_init_logloss)
 
     assert logloss <= default_init_logloss, "logloss from optimal GLM {0} should be lower than logloss from GLM with light tight" \
diff --git a/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_tight_linear_constraints_only_binomial.py b/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_tight_linear_constraints_only_binomial.py
index d5fd4df1cbf6..cc5b5385c8d3 100644
--- a/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_tight_linear_constraints_only_binomial.py
+++ b/h2o-py/tests/testdir_algos/glm/pyunit_GH_6722_tight_linear_constraints_only_binomial.py
@@ -189,7 +189,7 @@ def test_tight_linear_constraints_binomial():
     print(glm.getConstraintsInfo(h2o_glm_random_init))
     print("All constraints satisfied: {0}".format(glm.allConstraintsPassed(h2o_glm_random_init)))
 
-    assert logloss <= optimal_init_logloss, "logloss from optimal GLM {0} should be lower than logloss from GLM with light tight" \
+    assert abs(logloss - optimal_init_logloss)<1e-6, "logloss from optimal GLM {0} should be close to logloss from GLM with light tight" \
                                      " constraints and initialized with optimal GLM {1} but is not.".format(logloss, optimal_init_logloss)
 
     assert logloss <= default_init_logloss, "logloss from optimal GLM {0} should be lower than logloss from GLM with light tight" \
diff --git a/h2o-r/tests/testdir_algos/glm/runit_GH_6722_redundant_constraints.R b/h2o-r/tests/testdir_algos/glm/runit_GH_6722_redundant_constraints.R
index d41fd01ded8c..59d0ca896cbe 100644
--- a/h2o-r/tests/testdir_algos/glm/runit_GH_6722_redundant_constraints.R
+++ b/h2o-r/tests/testdir_algos/glm/runit_GH_6722_redundant_constraints.R
@@ -24,7 +24,7 @@ test_constraints_redundant <- function() {
   }, error = function(e) {
     print("***")
     print(e)
-    expect_true(grepl("redundant and possibly conflicting linear constraints:", e))
+    expect_true(grepl("redundant linear constraints:", e))
   })
 }