diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
index 3165cee..6d4a48f 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -6,7 +6,7 @@ labels: enhancement
 ---
 
 <!--
-Thank you for contributing ideas to smol!
+Thank you for contributing ideas to sparse-lm!
 
 Please fill in as much of the template below as you're able.
 -->
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e4e12ab..44f9a49 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,8 +1,8 @@
 # See https://pre-commit.com for more information
 # See https://pre-commit.com/hooks.html for more hooks
 
-# ci:  when open add this
-#  autoupdate_schedule: monthly
+ci:
+  autoupdate_schedule: monthly
 
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
@@ -54,23 +54,24 @@ repos:
         - --expand-star-imports
         - --ignore-init-module-imports
 
-- repo: https://github.com/PyCQA/flake8
-  rev: 5.0.4
-  hooks:
-  - id: flake8
-    files: ^smol/
-    language_version: python3
-    additional_dependencies:
-    - flake8-typing-imports==1.10.1
+# flake8 can not be configured in pytoml, so line lengths will cause many failures
+#- repo: https://github.com/PyCQA/flake8
+#  rev: 5.0.4
+#  hooks:
+#  - id: flake8
+#    files: ^sparselm/
+#    language_version: python3
+#    additional_dependencies:
+#    - flake8-typing-imports==1.10.1
     # - flake8-docstrings==1.6.0  # fix these
-    # - flake8-rst-docstrings==0.2.3
-    - flake8-rst==0.8.0
+#    - flake8-rst-docstrings==0.2.3
+#    - flake8-rst==0.8.0
 
 - repo: https://github.com/pycqa/pydocstyle
   rev: 6.1.1  # pick a git hash / tag to point to
   hooks:
   - id: pydocstyle
-    files: ^smol/
+    files: ^sparselm/
 
 - repo: https://github.com/pre-commit/pygrep-hooks
   rev: v1.9.0
diff --git a/README.md b/README.md
index 3b3317e..5713052 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,9 @@
 Sparse Linear Regression Models
 ===============================
 
+[![pre-commit.ci status](https://results.pre-commit.ci/badge/github/CederGroupHub/sparse-lm/main.svg)](https://results.pre-commit.ci/latest/github/CederGroupHub/sparse-lm/main)
+[![pypi version](https://img.shields.io/pypi/v/sparse-lm?color=blue)](https://pypi.org/project/sparse-lm)
+
 > :warning: this package is currently largely lacking in unit-tests.
 > Use at your own risk!
 
diff --git a/sparselm/model/__init__.py b/sparselm/model/__init__.py
index e3c7bc2..f2ff86b 100644
--- a/sparselm/model/__init__.py
+++ b/sparselm/model/__init__.py
@@ -1,33 +1,31 @@
 """Classes implementing generalized linear regression estimators."""
 
-from sparselm.model.adaptive_lasso import (
+from sparselm.model._adaptive_lasso import (
     AdaptiveGroupLasso,
     AdaptiveLasso,
     AdaptiveOverlapGroupLasso,
     AdaptiveRidgedGroupLasso,
     AdaptiveSparseGroupLasso,
 )
-from sparselm.model.lasso import (
+from sparselm.model._lasso import (
     GroupLasso,
     Lasso,
     OverlapGroupLasso,
     RidgedGroupLasso,
     SparseGroupLasso,
 )
-from sparselm.model.miqp.best_subset import (
-    BestGroupSelection,
-    BestSubsetSelection,
-    RidgedBestGroupSelection,
-    RidgedBestSubsetSelection,
-)
-from sparselm.model.miqp.regularized_l0 import (
+from sparselm.model._ols import OrdinaryLeastSquares
+from sparselm.model.miqp import (
     L1L0,
     L2L0,
+    BestGroupSelection,
+    BestSubsetSelection,
     GroupedL0,
     GroupedL2L0,
     RegularizedL0,
+    RidgedBestGroupSelection,
+    RidgedBestSubsetSelection,
 )
-from sparselm.model.ols import OrdinaryLeastSquares
 
 __all__ = [
     "OrdinaryLeastSquares",
diff --git a/sparselm/model/adaptive_lasso.py b/sparselm/model/_adaptive_lasso.py
similarity index 84%
rename from sparselm/model/adaptive_lasso.py
rename to sparselm/model/_adaptive_lasso.py
index f1bc74a..7a240d4 100644
--- a/sparselm/model/adaptive_lasso.py
+++ b/sparselm/model/_adaptive_lasso.py
@@ -1,4 +1,5 @@
 """A set of generalized adaptive lasso estimators.
+
 * Adaptive Lasso
 * Adaptive Group Lasso
 * Adaptive Overlap Group Lasso
@@ -22,7 +23,7 @@
 import cvxpy as cp
 import numpy as np
 
-from sparselm.model.lasso import (
+from sparselm.model._lasso import (
     GroupLasso,
     Lasso,
     OverlapGroupLasso,
@@ -48,10 +49,10 @@ def __init__(
         tol=1e-10,
         update_function=None,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
+        solver_options=None,
         **kwargs,
     ):
         """Initialize estimator.
@@ -62,26 +63,16 @@ def __init__(
             max_iter (int):
                 Maximum number of re-weighting iteration steps.
             eps (float):
-                Value to add to denominatar of weights.
+                Value to add to denominator of weights.
             tol (float):
                 Absolute convergence tolerance for difference between weights
                 at successive steps.
             update_function (Callable): optional
                 A function with signature f(beta, eps) used to update the
                 weights at each iteration. Default is 1/(|beta| + eps)
-            standardize (bool): optional
-                Whether to standardize the group regularization penalty using
-                the feature matrix. See the following for reference:
-                http://faculty.washington.edu/nrsimon/standGL.pdf
             fit_intercept (bool):
                 Whether the intercept should be estimated or not.
                 If False, the data is assumed to be already centered.
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -92,17 +83,17 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked in CVXEstimator base class for more info.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         super().__init__(
             alpha=alpha,
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
+            solver_options=solver_options,
             **kwargs,
         )
         self.tol = tol
@@ -130,14 +121,14 @@ def _update_weights(self, beta):
     def _weights_converged(self):
         return np.linalg.norm(self._weights.value - self._previous_weights) <= self.tol
 
-    def _solve(self, X, y):
+    def _solve(self, X, y, *args, **kwargs):
         problem = self._get_problem(X, y)
         problem.solve(
-            solver=self.solver, warm_start=self.warm_start, **self.solver_opts
+            solver=self.solver, warm_start=self.warm_start, **self.solver_options
         )
         for _ in range(self.max_iter - 1):
             self._update_weights(self._beta.value)
-            problem.solve(solver=self.solver, warm_start=True, **self.solver_opts)
+            problem.solve(solver=self.solver, warm_start=True, **self.solver_options)
             if self._weights_converged():
                 break
         return self._beta.value
@@ -163,10 +154,10 @@ def __init__(
         update_function=None,
         standardize=False,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
+        solver_options=None,
         **kwargs,
     ):
         """Initialize estimator.
@@ -187,7 +178,7 @@ def __init__(
             max_iter (int):
                 Maximum number of re-weighting iteration steps.
             eps (float):
-                Value to add to denominatar of weights.
+                Value to add to denominator of weights.
             tol (float):
                 Absolute convergence tolerance for difference between weights
                 at successive steps.
@@ -203,12 +194,6 @@ def __init__(
             fit_intercept (bool):
                 Whether the intercept should be estimated or not.
                 If False, the data is assumed to be already centered.
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -219,9 +204,9 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked in CVXEstimator base class for more info.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         # call with keywords to avoid MRO issues
         super().__init__(
@@ -234,10 +219,10 @@ def __init__(
             update_function=update_function,
             standardize=standardize,
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
+            solver_options=solver_options,
             **kwargs,
         )
 
@@ -277,11 +262,10 @@ def __init__(
         update_function=None,
         standardize=False,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
-        **kwargs,
+        solver_options=None,
     ):
         """Initialize estimator.
 
@@ -306,7 +290,7 @@ def __init__(
             max_iter (int):
                 Maximum number of re-weighting iteration steps.
             eps (float):
-                Value to add to denominatar of weights.
+                Value to add to denominator of weights.
             tol (float):
                 Absolute convergence tolerance for difference between weights
                 at successive steps.
@@ -322,12 +306,6 @@ def __init__(
             fit_intercept (bool):
                 Whether the intercept should be estimated or not.
                 If False, the data is assumed to be already centered.
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -338,9 +316,9 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked in CVXEstimator base class for more info.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         # call with keywords to avoid MRO issues
         super().__init__(
@@ -353,11 +331,10 @@ def __init__(
             update_function=update_function,
             standardize=standardize,
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
-            **kwargs,
+            solver_options=solver_options,
         )
 
     def _gen_objective(self, X, y):
@@ -394,11 +371,10 @@ def __init__(
         update_function=None,
         standardize=False,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
-        **kwargs,
+        solver_options=None,
     ):
         """Initialize estimator.
 
@@ -420,7 +396,7 @@ def __init__(
             max_iter (int):
                 Maximum number of re-weighting iteration steps.
             eps (float):
-                Value to add to denominatar of weights.
+                Value to add to denominator of weights.
             tol (float):
                 Absolute convergence tolerance for difference between weights
                 at successive steps.
@@ -436,12 +412,6 @@ def __init__(
             fit_intercept (bool):
                 Whether the intercept should be estimated or not.
                 If False, the data is assumed to be already centered.
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -452,9 +422,9 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked in CVXEstimator base class for more info.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         # call with keywords to avoid MRO issues
         super().__init__(
@@ -468,11 +438,10 @@ def __init__(
             update_function=update_function,
             standardize=standardize,
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
-            **kwargs,
+            solver_options=solver_options,
         )
 
     def _gen_regularization(self, X):
@@ -484,7 +453,7 @@ def _gen_regularization(self, X):
                 value=self._lambda1.value * np.ones(X.shape[1]),
             ),
             cp.Parameter(
-                shape=len(self.group_masks),
+                shape=(len(self.group_masks),),
                 nonneg=True,
                 value=self._lambda2.value * self.group_weights,
             ),
@@ -540,11 +509,10 @@ def __init__(
         update_function=None,
         standardize=False,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
-        **kwargs,
+        solver_options=None,
     ):
         """Initialize estimator.
 
@@ -569,7 +537,7 @@ def __init__(
             max_iter (int):
                 Maximum number of re-weighting iteration steps.
             eps (float):
-                Value to add to denominatar of weights.
+                Value to add to denominator of weights.
             tol (float):
                 Absolute convergence tolerance for difference between weights
                 at successive steps.
@@ -578,12 +546,6 @@ def __init__(
                 weights at each iteration. Where group_norms are the norms of
                 the coefficients Beta for each group.
                 Default is 1/(group_norms + eps)
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -594,9 +556,9 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked in CVXEstimator base class for more info.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         super().__init__(
             groups=groups,
@@ -609,11 +571,10 @@ def __init__(
             group_weights=group_weights,
             standardize=standardize,
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
-            **kwargs,
+            solver_options=solver_options,
         )
 
     def _gen_group_norms(self, X):
diff --git a/sparselm/model/base.py b/sparselm/model/_base.py
similarity index 74%
rename from sparselm/model/base.py
rename to sparselm/model/_base.py
index 0087795..612bc14 100644
--- a/sparselm/model/base.py
+++ b/sparselm/model/_base.py
@@ -13,14 +13,14 @@
 from sklearn.linear_model._base import (
     LinearModel,
     _check_sample_weight,
-    _deprecate_normalize,
     _preprocess_data,
     _rescale_data,
 )
 
 
-class Estimator(LinearModel, RegressorMixin, metaclass=ABCMeta):
-    """
+class Estimator(RegressorMixin, LinearModel, metaclass=ABCMeta):
+    """Abstract estimator base class.
+
     Simple abstract estimator class based on sklearn linear model api to use
     different 'in-house'  solvers to fit a linear model. This should be used to
     create specific estimator classes by inheriting. New classes simply need to
@@ -29,30 +29,17 @@ class Estimator(LinearModel, RegressorMixin, metaclass=ABCMeta):
     Keyword arguments are the same as those found in sklearn linear models.
     """
 
-    def __init__(
-        self, fit_intercept: bool = False, normalize: bool = False, copy_X: bool = True
-    ):
-        """
-        fit_intercept : bool, default=True
+    def __init__(self, fit_intercept: bool = False, copy_X: bool = True):
+        """Initialize estimator.
 
-        If you wish to standardize, please use
-        :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
-        on an estimator with ``normalize=False``.
         Args:
             fit_intercept (bool):
                 Whether the intercept should be estimated or not. If ``False``,
-                the data is assumed to be already centered. normalize : bool
-                default=False.
-            normalize (bool):
-                This parameter is ignored when ``fit_intercept`` is set to
-                False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
+                the data is assumed to be already centered.
             copy_X (bool):
                 If ``True``, X will be copied; else, it may be overwritten.
         """
         self.fit_intercept = fit_intercept
-        self.normalize = normalize
         self.copy_X = copy_X
 
     def fit(self, X, y, sample_weight=None, *args, **kwargs):
@@ -101,15 +88,11 @@ def _preprocess_data(self, X, y, copy=True, sample_weight=None):
         In the future, may add additional functionalities beyond sklearn
         basics.
         """
-        _normalize = _deprecate_normalize(
-            self.normalize, default=False, estimator_name=self.__class__.__name__
-        )
         return _preprocess_data(
             X,
             y,
             copy=copy,
             fit_intercept=self.fit_intercept,
-            normalize=_normalize,
             sample_weight=sample_weight,
         )
 
@@ -124,33 +107,28 @@ class CVXEstimator(Estimator, metaclass=ABCMeta):
     Base class for estimators using cvxpy with a sklearn interface.
 
     Note cvxpy can use one of many 3rd party solvers, default is most often
-    CVXOPT. The solver can be specified by providing arguments to the cvxpy
-    problem.solve function. And can be set by passing those arguments to the
-    constructur of this class
-    See documentation for more:
-    https://ajfriendcvxpy.readthedocs.io/en/latest/tutorial/advanced/index.html#solve-method-options
+    CVXOPT. The solver can be specified by setting the solver keyword argument.
+    And can solver specific settings can be set by passing a dictionary of
+    solver_options.
+
+    See "Setting solver options" in documentation for details of available options:
+    https://www.cvxpy.org/tutorial/advanced/index.html#advanced
     """
 
     def __init__(
         self,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
-        **kwargs
+        solver_options=None,
     ):
-        """
+        """Initialize estimator.
+
         Args:
             fit_intercept (bool):
                 Whether the intercept should be estimated or not.
                 If False, the data is assumed to be already centered.
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -161,15 +139,20 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
                 See docs linked above for more information.
         """
         self.warm_start = warm_start
         self.solver = solver
-        self.solver_opts = kwargs
+
+        if solver_options is None:
+            self.solver_options = {}
+        else:
+            self.solver_options = solver_options
+
         self._problem, self._beta, self._X, self._y = None, None, None, None
-        super().__init__(fit_intercept, normalize, copy_X)
+        super().__init__(fit_intercept, copy_X)
 
     @abstractmethod
     def _gen_objective(self, X, y):
@@ -203,7 +186,7 @@ def _gen_constraints(self, X, y):
         return None
 
     def _initialize_problem(self, X, y):
-        """Initialize cvxpy problem from the generated objective function
+        """Initialize cvxpy problem from the generated objective function.
 
         Args:
             X (ndarray):
@@ -219,7 +202,7 @@ def _initialize_problem(self, X, y):
         self._problem = cp.Problem(cp.Minimize(objective), constraints)
 
     def _get_problem(self, X, y):
-        """Define and create cvxpy optimization problem"""
+        """Define and create cvxpy optimization problem."""
         if self._problem is None:
             self._initialize_problem(X, y)
         elif not np.array_equal(X, self._X) or not np.array_equal(y, self._y):
@@ -230,6 +213,6 @@ def _solve(self, X, y, *args, **kwargs):
         """Solve the cvxpy problem."""
         problem = self._get_problem(X, y)
         problem.solve(
-            solver=self.solver, warm_start=self.warm_start, **self.solver_opts
+            solver=self.solver, warm_start=self.warm_start, **self.solver_options
         )
         return self._beta.value
diff --git a/sparselm/model/lasso.py b/sparselm/model/_lasso.py
similarity index 85%
rename from sparselm/model/lasso.py
rename to sparselm/model/_lasso.py
index 4ae6a00..dc53bed 100644
--- a/sparselm/model/lasso.py
+++ b/sparselm/model/_lasso.py
@@ -1,4 +1,5 @@
 """A set of generalized lasso estimators.
+
 * Lasso
 * Group Lasso
 * Overlap Group Lasso
@@ -17,7 +18,7 @@
 import numpy as np
 from scipy.linalg import sqrtm
 
-from sparselm.model.base import CVXEstimator
+from sparselm.model._base import CVXEstimator
 
 
 class Lasso(CVXEstimator):
@@ -33,25 +34,19 @@ def __init__(
         self,
         alpha=1.0,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
-        **kwargs,
+        solver_options=None,
     ):
-        """
+        """Initialize a Lasso estimator.
+
         Args:
             alpha (float):
                 Regularization hyper-parameter.
             fit_intercept (bool):
                 Whether the intercept should be estimated or not.
                 If False, the data is assumed to be already centered.
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -62,26 +57,27 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked above for more information.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         self._alpha = cp.Parameter(value=alpha, nonneg=True)
         super().__init__(
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
-            **kwargs,
+            solver_options=solver_options,
         )
 
     @property
     def alpha(self):
+        """Get alpha hyperparameter value."""
         return self._alpha.value
 
     @alpha.setter
     def alpha(self, val):
+        """Set alpha hyperparameter value."""
         self._alpha.value = val
 
     def _gen_regularization(self, X):
@@ -109,10 +105,10 @@ def __init__(
         group_weights=None,
         standardize=False,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
+        solver_options=None,
         **kwargs,
     ):
         """Initialize estimator.
@@ -137,12 +133,6 @@ def __init__(
                 Whether to standardize the group regularization penalty using
                 the feature matrix. See the following for reference:
                 http://faculty.washington.edu/nrsimon/standGL.pdf
-            normalize (bool): optional
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -153,9 +143,9 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked in CVXEstimator base class for more info.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         self.groups = np.asarray(groups)
         self.group_masks = [self.groups == i for i in np.unique(groups)]
@@ -177,10 +167,10 @@ def __init__(
         super().__init__(
             alpha=alpha,
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
+            solver_options=solver_options,
             **kwargs,
         )
 
@@ -216,10 +206,10 @@ def __init__(
         group_weights=None,
         standardize=False,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
+        solver_options=None,
         **kwargs,
     ):
         """Initialize estimator.
@@ -249,12 +239,6 @@ def __init__(
             fit_intercept (bool):
                 Whether the intercept should be estimated or not.
                 If False, the data is assumed to be already centered.
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -265,9 +249,9 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked in CVXEstimator base class for more info.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         self.group_list = group_list
         self.group_ids = np.unique([gid for grp in group_list for gid in grp])
@@ -293,10 +277,10 @@ def __init__(
             group_weights=group_weights,
             standardize=standardize,
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
+            solver_options=solver_options,
             **kwargs,
         )
 
@@ -304,7 +288,7 @@ def _solve(self, X, y, *args, **kwargs):
         """Solve the cvxpy problem."""
         problem = self._get_problem(X[:, self.beta_indices], y)
         problem.solve(
-            solver=self.solver, warm_start=self.warm_start, **self.solver_opts
+            solver=self.solver, warm_start=self.warm_start, **self.solver_options
         )
         beta = np.array(
             [sum(self._beta.value[self.beta_indices == i]) for i in range(X.shape[1])]
@@ -330,10 +314,10 @@ def __init__(
         group_weights=None,
         standardize=False,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
+        solver_options=None,
         **kwargs,
     ):
         """Initialize estimator.
@@ -360,12 +344,6 @@ def __init__(
             fit_intercept (bool):
                 Whether the intercept should be estimated or not.
                 If False, the data is assumed to be already centered.
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -376,9 +354,9 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked in CVXEstimator base class for more info.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         super().__init__(
             groups=groups,
@@ -386,10 +364,10 @@ def __init__(
             group_weights=group_weights,
             standardize=standardize,
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
+            solver_options=solver_options,
             **kwargs,
         )
 
@@ -415,16 +393,19 @@ def __init__(
 
     @Lasso.alpha.setter
     def alpha(self, val):
+        """Set hyperparameter values."""
         self._alpha.value = val
         self._lambda1.value = self.l1_ratio * val
         self._lambda2.value = (1 - self.l1_ratio) * val
 
     @property
     def l1_ratio(self):
+        """Get l1 ratio."""
         return self._l1_ratio
 
     @l1_ratio.setter
     def l1_ratio(self, val):
+        """Set hyper-parameter values."""
         if not 0 <= val <= 1:
             raise ValueError("l1_ratio must be between 0 and 1.")
         self._l1_ratio = val
@@ -458,10 +439,10 @@ def __init__(
         group_weights=None,
         standardize=False,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
+        solver_options=None,
         **kwargs,
     ):
         """Initialize estimator.
@@ -488,12 +469,6 @@ def __init__(
             fit_intercept (bool):
                 Whether the intercept should be estimated or not.
                 If False, the data is assumed to be already centered.
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -504,9 +479,9 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked in CVXEstimator base class for more info.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         super().__init__(
             groups=groups,
@@ -514,10 +489,10 @@ def __init__(
             group_weights=group_weights,
             standardize=standardize,
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
+            solver_options=solver_options,
             **kwargs,
         )
 
diff --git a/sparselm/model/ols.py b/sparselm/model/_ols.py
similarity index 71%
rename from sparselm/model/ols.py
rename to sparselm/model/_ols.py
index b1fdee3..b41f13a 100644
--- a/sparselm/model/ols.py
+++ b/sparselm/model/_ols.py
@@ -4,13 +4,11 @@
 
 import cvxpy as cp
 
-from .base import CVXEstimator
+from ._base import CVXEstimator
 
 
 class OrdinaryLeastSquares(CVXEstimator):
-    """
-    OLS Linear Regression Estimator implemented with cvxpy.
-    """
+    """OLS Linear Regression Estimator implemented with cvxpy."""
 
     def _gen_objective(self, X, y):
         return 1 / (2 * X.shape[0]) * cp.sum_squares(X @ self._beta - y)
diff --git a/sparselm/model/miqp/__init__.py b/sparselm/model/miqp/__init__.py
index e69de29..7f53fd4 100644
--- a/sparselm/model/miqp/__init__.py
+++ b/sparselm/model/miqp/__init__.py
@@ -0,0 +1,28 @@
+"""MIQP based regression estimators."""
+
+
+from sparselm.model.miqp._best_subset import (
+    BestGroupSelection,
+    BestSubsetSelection,
+    RidgedBestGroupSelection,
+    RidgedBestSubsetSelection,
+)
+from sparselm.model.miqp._regularized_l0 import (
+    L1L0,
+    L2L0,
+    GroupedL0,
+    GroupedL2L0,
+    RegularizedL0,
+)
+
+__all__ = [
+    "BestSubsetSelection",
+    "BestGroupSelection",
+    "RidgedBestSubsetSelection",
+    "RidgedBestGroupSelection",
+    "RegularizedL0",
+    "L1L0",
+    "L2L0",
+    "GroupedL0",
+    "GroupedL2L0",
+]
diff --git a/sparselm/model/miqp/best_subset.py b/sparselm/model/miqp/_best_subset.py
similarity index 81%
rename from sparselm/model/miqp/best_subset.py
rename to sparselm/model/miqp/_best_subset.py
index a2dde03..2151db9 100644
--- a/sparselm/model/miqp/best_subset.py
+++ b/sparselm/model/miqp/_best_subset.py
@@ -9,14 +9,14 @@
 import numpy as np
 from cvxpy.atoms.affine.wraps import psd_wrap
 
-from sparselm.model.base import CVXEstimator
+from sparselm.model._base import CVXEstimator
 
 
 class BestSubsetSelection(CVXEstimator):
-    """MIQP Best Subset Selection estimator
+    """MIQP Best Subset Selection estimator.
 
     WARNING: Even with gurobi solver, this can take a very long time to
-    converge for large problems and underdetermined problems.
+    converge for large problems and under-determined problems.
     """
 
     def __init__(
@@ -26,13 +26,13 @@ def __init__(
         hierarchy=None,
         ignore_psd_check=True,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
+        solver_options=None,
         **kwargs,
     ):
-        """
+        """Initialize estimator.
 
         Args:
             sparse_bound (int):
@@ -50,18 +50,12 @@ def __init__(
                 coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
                 dependence.
             ignore_psd_check (bool):
-                Wether to ignore cvxpy's PSD checks  of matrix used in quadratic
+                Whether to ignore cvxpy's PSD checks  of matrix used in quadratic
                 form. Default is True to avoid raising errors for poorly
                 conditioned matrices. But if you want to be strict set to False.
             fit_intercept (bool):
                 Whether the intercept should be estimated or not.
                 If False, the data is assumed to be already centered.
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -72,17 +66,16 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked above for more information.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         super().__init__(
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
-            **kwargs,
+            solver_options=solver_options,
         )
 
         self._bound = cp.Parameter(nonneg=True, value=sparse_bound)
@@ -93,24 +86,28 @@ def __init__(
 
     @property
     def sparse_bound(self):
+        """Get sparse bound value."""
         return self._bound.value
 
     @sparse_bound.setter
     def sparse_bound(self, val):
+        """Set sparse bound value."""
         if val <= 0:
             raise ValueError(f"sparse_bound must be > 0")
         self._bound.value = val
 
     @property
     def big_M(self):
+        """Get MIQP big M value."""
         return self._big_M.value
 
     @big_M.setter
     def big_M(self, val):
+        """Set MIQP big M value."""
         self._big_M.value = val
 
     def _gen_objective(self, X, y):
-        """Generate the quadratic form portion of objective"""
+        """Generate the quadratic form portion of objective."""
         # psd_wrap will ignore cvxpy PSD checks, without it errors will
         # likely be raised since correlation matrices are usually very
         # poorly conditioned
@@ -120,7 +117,7 @@ def _gen_objective(self, X, y):
         return objective
 
     def _gen_constraints(self, X, y):
-        """Generate the constraints used to solve l0 regularization"""
+        """Generate the constraints used to solve l0 regularization."""
         self._z0 = cp.Variable(X.shape[1], boolean=True)
         constraints = [
             self._big_M * self._z0 >= self._beta,
@@ -133,7 +130,7 @@ def _gen_constraints(self, X, y):
         return constraints
 
     def _gen_hierarchy_constraints(self):
-        """Generate single feature hierarchy constraints"""
+        """Generate single feature hierarchy constraints."""
         return [
             self._z0[high_id] <= self._z0[sub_id]
             for high_id, sub_ids in enumerate(self.hierarchy)
@@ -152,13 +149,14 @@ def __init__(
         hierarchy=None,
         ignore_psd_check=True,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
+        solver_options=None,
         **kwargs,
     ):
-        """
+        """Initialize estimator.
+
         Args:
             sparse_bound (int):
                 Upper bound on sparsity. The upper bound on total number of
@@ -181,12 +179,6 @@ def __init__(
             fit_intercept (bool):
                 Whether the intercept should be estimated or not.
                 If False, the data is assumed to be already centered.
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -197,9 +189,9 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked above for more information.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         super().__init__(
             sparse_bound=sparse_bound,
@@ -207,24 +199,26 @@ def __init__(
             hierarchy=hierarchy,
             ignore_psd_check=ignore_psd_check,
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
+            solver_options=solver_options,
             **kwargs,
         )
         self._alpha = cp.Parameter(nonneg=True, value=alpha)
 
     @property
     def alpha(self):
+        """Get alpha hyper-parameter value."""
         return self._alpha.value
 
     @alpha.setter
     def alpha(self, val):
+        """Set alpha hyper-parameter value."""
         self._alpha.value = val
 
     def _gen_objective(self, X, y):
-        """Generate the objective function used in l2l0 regression model"""
+        """Generate the objective function used in l2l0 regression model."""
         c0 = 2 * X.shape[0]  # keeps hyperparameter scale independent
         objective = super()._gen_objective(X, y) + c0 * self._alpha * cp.sum_squares(
             self._beta
@@ -243,13 +237,14 @@ def __init__(
         hierarchy=None,
         ignore_psd_check=True,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
+        solver_options=None,
         **kwargs,
     ):
-        """
+        """Initialize a Lasso estimator.
+
         Args:
             groups (list or ndarray):
                 array-like of integers specifying groups. Length should be the
@@ -270,18 +265,12 @@ def __init__(
                 coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
                 dependence.
             ignore_psd_check (bool):
-                Wether to ignore cvxpy's PSD checks  of matrix used in quadratic
+                Whether to ignore cvxpy's PSD checks  of matrix used in quadratic
                 form. Default is True to avoid raising errors for poorly
                 conditioned matrices. But if you want to be strict set to False.
             fit_intercept (bool):
                 Whether the intercept should be estimated or not.
                 If False, the data is assumed to be already centered.
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -292,9 +281,9 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked above for more information.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         super().__init__(
             sparse_bound=sparse_bound,
@@ -302,10 +291,10 @@ def __init__(
             hierarchy=hierarchy,
             ignore_psd_check=ignore_psd_check,
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
+            solver_options=solver_options,
             **kwargs,
         )
         self.groups = np.asarray(groups)
@@ -313,7 +302,7 @@ def __init__(
         self._z0 = cp.Variable(len(self._group_masks), boolean=True)
 
     def _gen_constraints(self, X, y):
-        """Generate the constraints used to solve l0 regularization"""
+        """Generate the constraints used to solve l0 regularization."""
         constraints = []
         for i, mask in enumerate(self._group_masks):
             constraints += [
@@ -338,13 +327,13 @@ def __init__(
         hierarchy=None,
         ignore_psd_check=True,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
-        **kwargs,
+        solver_options=None,
     ):
-        """
+        """Initialize estimator.
+
         Args:
             groups (list or ndarray):
                 array-like of integers specifying groups. Length should be the
@@ -367,18 +356,12 @@ def __init__(
                 coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
                 dependence.
             ignore_psd_check (bool):
-                Wether to ignore cvxpy's PSD checks  of matrix used in quadratic
+                Whether to ignore cvxpy's PSD checks  of matrix used in quadratic
                 form. Default is True to avoid raising errors for poorly
                 conditioned matrices. But if you want to be strict set to False.
             fit_intercept (bool):
                 Whether the intercept should be estimated or not.
                 If False, the data is assumed to be already centered.
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -389,9 +372,9 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked above for more information.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         # need to call super for sklearn clone function
         super().__init__(
@@ -402,11 +385,10 @@ def __init__(
             hierarchy=hierarchy,
             ignore_psd_check=ignore_psd_check,
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
-            **kwargs,
+            solver_options=solver_options,
         )
 
     def _gen_objective(self, X, y):
diff --git a/sparselm/model/miqp/regularized_l0.py b/sparselm/model/miqp/_regularized_l0.py
similarity index 82%
rename from sparselm/model/miqp/regularized_l0.py
rename to sparselm/model/miqp/_regularized_l0.py
index 2f0b5d5..c9dae3b 100644
--- a/sparselm/model/miqp/regularized_l0.py
+++ b/sparselm/model/miqp/_regularized_l0.py
@@ -1,8 +1,10 @@
-"""MIQP based solvers for sparse solutions with hierarchical constraints
+"""MIQP based solvers for sparse solutions with hierarchical constraints.
 
 Mixed L1L0 and L2L0 solvers.
-L1L0 proposed by Wenxuan Huang: https://arxiv.org/abs/1807.10753
-L2L0 proposed by Peichen Zhong
+L1L0 proposed by Wenxuan Huang:
+    https://arxiv.org/abs/1807.10753
+L2L0 proposed by Peichen Zhong:
+    https://journals.aps.org/prb/abstract/10.1103/PhysRevB.106.024203
 
 Estimators allow optional inclusion of hierarchical at the single feature
 single coefficient level.
@@ -18,7 +20,7 @@
 import numpy as np
 from cvxpy.atoms.affine.wraps import psd_wrap
 
-from sparselm.model.base import CVXEstimator
+from sparselm.model._base import CVXEstimator
 
 
 class RegularizedL0(CVXEstimator):
@@ -31,13 +33,13 @@ def __init__(
         hierarchy=None,
         ignore_psd_check=True,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
-        **kwargs
+        solver_options=None,
     ):
-        """
+        """Initialize estimator.
+
         Args:
             alpha (float):
                 Regularization hyper-parameter.
@@ -53,18 +55,12 @@ def __init__(
                 coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
                 dependence.
             ignore_psd_check (bool):
-                Wether to ignore cvxpy's PSD checks  of matrix used in quadratic
+                Whether to ignore cvxpy's PSD checks  of matrix used in quadratic
                 form. Default is True to avoid raising errors for poorly
                 conditioned matrices. But if you want to be strict set to False.
             fit_intercept (bool):
                 Whether the intercept should be estimated or not.
                 If False, the data is assumed to be already centered.
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -75,17 +71,16 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked above for more information.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         super().__init__(
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
-            **kwargs
+            solver_options=solver_options,
         )
 
         self.hierarchy = hierarchy
@@ -97,23 +92,27 @@ def __init__(
 
     @property
     def alpha(self):
+        """Get alpha hyperparameter value."""
         return self._alpha
 
     @alpha.setter
     def alpha(self, val):
+        """Set alpha hyperparameter value."""
         self._alpha = val
         self._lambda0.value = val
 
     @property
     def big_M(self):
+        """Get MIQP big M value."""
         return self._big_M.value
 
     @big_M.setter
     def big_M(self, val):
+        """Set MIQP big M value."""
         self._big_M.value = val
 
     def _gen_objective(self, X, y):
-        """Generate the quadratic form portion of objective"""
+        """Generate the quadratic form portion of objective."""
         # psd_wrap will ignore cvxpy PSD checks, without it errors will
         # likely be raised since correlation matrices are usually very
         # poorly conditioned
@@ -129,7 +128,7 @@ def _gen_objective(self, X, y):
         return objective
 
     def _gen_constraints(self, X, y):
-        """Generate the constraints used to solve l0 regularization"""
+        """Generate the constraints used to solve l0 regularization."""
         constraints = [
             self._big_M * self._z0 >= self._beta,
             self._big_M * self._z0 >= -self._beta,
@@ -140,7 +139,7 @@ def _gen_constraints(self, X, y):
         return constraints
 
     def _gen_hierarchy_constraints(self):
-        """Generate single feature hierarchy constraints"""
+        """Generate single feature hierarchy constraints."""
         return [
             self._z0[high_id] <= self._z0[sub_id]
             for high_id, sub_ids in enumerate(self.hierarchy)
@@ -159,13 +158,14 @@ def __init__(
         hierarchy=None,
         ignore_psd_check=True,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
+        solver_options=None,
         **kwargs
     ):
-        """
+        """Initialize estimator.
+
         Args:
             alpha (float):
                 Regularization hyper-parameter.
@@ -183,18 +183,12 @@ def __init__(
                 coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
                 dependence.
             ignore_psd_check (bool):
-                Wether to ignore cvxpy's PSD checks  of matrix used in quadratic
+                Whether to ignore cvxpy's PSD checks  of matrix used in quadratic
                 form. Default is True to avoid raising errors for poorly
                 conditioned matrices. But if you want to be strict set to False.
             fit_intercept (bool):
                 Whether the intercept should be estimated or not.
                 If False, the data is assumed to be already centered.
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -205,9 +199,9 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked above for more information.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         super().__init__(
             alpha=alpha,
@@ -215,10 +209,10 @@ def __init__(
             hierarchy=hierarchy,
             ignore_psd_check=ignore_psd_check,
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
+            solver_options=solver_options,
             **kwargs
         )
 
@@ -237,16 +231,19 @@ def __init__(
 
     @RegularizedL0.alpha.setter
     def alpha(self, val):
+        """Set hyperparameter values."""
         self._alpha = val
         self._lambda0.value = self.l0_ratio * val
         self._lambda1.value = (1 - self.l0_ratio) * val
 
     @property
     def l0_ratio(self):
+        """Get l0 ratio."""
         return self._l0_ratio
 
     @l0_ratio.setter
     def l0_ratio(self, val):
+        """Set l0 ratio."""
         if not 0 <= val <= 1:
             raise ValueError("l0_ratio must be between 0 and 1.")
         self._l0_ratio = val
@@ -260,7 +257,8 @@ def _gen_objective(self, X, y):
 
 
 class L1L0(MixedL0):
-    """
+    """L1L0 regularized estimator.
+
     Estimator with L1L0 regularization solved with mixed integer programming
     as discussed in:
     https://arxiv.org/abs/1807.10753
@@ -288,13 +286,13 @@ def __init__(
         hierarchy=None,
         ignore_psd_check=True,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
-        **kwargs
+        solver_options=None,
     ):
-        """
+        """Initialize estimator.
+
         Args:
             alpha (float):
                 Regularization hyper-parameter.
@@ -312,18 +310,12 @@ def __init__(
                 coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
                 dependence.
             ignore_psd_check (bool):
-                Wether to ignore cvxpy's PSD checks of matrix used in quadratic
+                Whether to ignore cvxpy's PSD checks of matrix used in quadratic
                 form. Default is True to avoid raising errors for poorly
                 conditioned matrices. But if you want to be strict set to False.
             fit_intercept (bool):
                 Whether the intercept should be estimated or not.
                 If False, the data is assumed to be already centered.
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -334,9 +326,9 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked above for more information.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         super().__init__(
             alpha=alpha,
@@ -345,16 +337,15 @@ def __init__(
             hierarchy=hierarchy,
             ignore_psd_check=ignore_psd_check,
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
-            **kwargs
+            solver_options=solver_options,
         )
         self._z1 = None
 
     def _gen_constraints(self, X, y):
-        """Generate the constraints used to solve l1l0 regularization"""
+        """Generate the constraints used to solve l1l0 regularization."""
         constraints = super()._gen_constraints(X, y)
         # L1 constraints (why not do an l1 norm in the objective instead?)
         constraints += [self._z1 >= self._beta, self._z1 >= -1.0 * self._beta]
@@ -362,7 +353,7 @@ def _gen_constraints(self, X, y):
         return constraints
 
     def _gen_objective(self, X, y):
-        """Generate the objective function used in l1l0 regression model"""
+        """Generate the objective function used in l1l0 regression model."""
         self._z1 = cp.Variable(X.shape[1])
         c0 = 2 * X.shape[0]  # keeps hyperparameter scale independent
         objective = super()._gen_objective(X, y) + c0 * self._lambda1 * cp.sum(self._z1)
@@ -371,7 +362,8 @@ def _gen_objective(self, X, y):
 
 
 class L2L0(MixedL0):
-    """
+    """L2L0 regularized estimator.
+
     Estimator with L2L0 regularization solved with mixed integer programming
     proposed by Peichen Zhong.
 
@@ -383,7 +375,7 @@ class L2L0(MixedL0):
     """
 
     def _gen_objective(self, X, y):
-        """Generate the objective function used in l2l0 regression model"""
+        """Generate the objective function used in l2l0 regression model."""
         c0 = 2 * X.shape[0]  # keeps hyperparameter scale independent
         objective = super()._gen_objective(X, y) + c0 * self._lambda1 * cp.sum_squares(
             self._beta
@@ -403,13 +395,14 @@ def __init__(
         hierarchy=None,
         ignore_psd_check=True,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
+        solver_options=None,
         **kwargs
     ):
-        """
+        """Initialize estimator.
+
         Args:
             groups (list or ndarray):
                 array-like of integers specifying groups. Length should be the
@@ -429,18 +422,12 @@ def __init__(
                 coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
                 dependence.
             ignore_psd_check (bool):
-                Wether to ignore cvxpy's PSD checks  of matrix used in quadratic
+                Whether to ignore cvxpy's PSD checks  of matrix used in quadratic
                 form. Default is True to avoid raising errors for poorly
                 conditioned matrices. But if you want to be strict set to False.
             fit_intercept (bool):
                 Whether the intercept should be estimated or not.
                 If False, the data is assumed to be already centered.
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -451,9 +438,9 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked above for more information.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         super().__init__(
             alpha=alpha,
@@ -461,10 +448,10 @@ def __init__(
             hierarchy=hierarchy,
             ignore_psd_check=ignore_psd_check,
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
+            solver_options=solver_options,
             **kwargs
         )
 
@@ -473,8 +460,7 @@ def __init__(
         self._z0 = cp.Variable(len(self._group_masks), boolean=True)
 
     def _gen_objective(self, X, y):
-        """Generate the quadratic form portion of objective"""
-        print("called GL0!")
+        """Generate the quadratic form portion of objective."""
         c0 = 2 * X.shape[0]  # keeps hyperparameter scale independent
         XTX = psd_wrap(X.T @ X) if self.ignore_psd_check else X.T @ X
         objective = (
@@ -485,7 +471,7 @@ def _gen_objective(self, X, y):
         return objective
 
     def _gen_constraints(self, X, y):
-        """Generate the constraints used to solve l0 regularization"""
+        """Generate the constraints used to solve l0 regularization."""
         constraints = []
         for i, mask in enumerate(self._group_masks):
             constraints += [
@@ -499,9 +485,7 @@ def _gen_constraints(self, X, y):
 
 
 class GroupedL2L0(GroupedL0, MixedL0):
-    """
-    Estimator with grouped L2L0 regularization solved with mixed integer programming
-    """
+    """Estimator with grouped L2L0 regularization solved with MIQP."""
 
     def __init__(
         self,
@@ -512,13 +496,13 @@ def __init__(
         hierarchy=None,
         ignore_psd_check=True,
         fit_intercept=False,
-        normalize=False,
         copy_X=True,
         warm_start=False,
         solver=None,
-        **kwargs
+        solver_options=None,
     ):
-        """
+        """Initialize estimator.
+
         Args:
             groups (list or ndarray):
                 array-like of integers specifying groups. Length should be the
@@ -540,18 +524,12 @@ def __init__(
                 coefficient 0 depends on 1, and 2; 1 depends on 0, and 2 has no
                 dependence.
             ignore_psd_check (bool):
-                Wether to ignore cvxpy's PSD checks  of matrix used in quadratic
+                Whether to ignore cvxpy's PSD checks  of matrix used in quadratic
                 form. Default is True to avoid raising errors for poorly
                 conditioned matrices. But if you want to be strict set to False.
             fit_intercept (bool):
                 Whether the intercept should be estimated or not.
                 If False, the data is assumed to be already centered.
-            normalize (bool):
-                This parameter is ignored when fit_intercept is set to False.
-                If True, the regressors X will be normalized before regression
-                by subtracting the mean and dividing by the l2-norm.
-                If you wish to standardize, please use StandardScaler before
-                calling fit on an estimator with normalize=False
             copy_X (bool):
                 If True, X will be copied; else, it may be overwritten.
             warm_start (bool):
@@ -562,9 +540,9 @@ def __init__(
                 cvxpy backend solver to use. Supported solvers are:
                 ECOS, ECOS_BB, CVXOPT, SCS, GUROBI, Elemental.
                 GLPK and GLPK_MI (via CVXOPT GLPK interface)
-            **kwargs:
-                Kewyard arguments passed to cvxpy solve.
-                See docs linked above for more information.
+            solver_options:
+                dictionary of keyword arguments passed to cvxpy solve.
+                See docs in CVXEstimator for more information.
         """
         # need to call super for sklearn clone function
         super().__init__(
@@ -575,15 +553,14 @@ def __init__(
             hierarchy=hierarchy,
             ignore_psd_check=ignore_psd_check,
             fit_intercept=fit_intercept,
-            normalize=normalize,
             copy_X=copy_X,
             warm_start=warm_start,
             solver=solver,
-            **kwargs
+            solver_options=solver_options,
         )
 
     def _gen_objective(self, X, y):
-        """Generate the objective function used in l2l0 regression model"""
+        """Generate the objective function used in l2l0 regression model."""
         c0 = 2 * X.shape[0]  # keeps hyperparameter scale independent
         objective = super()._gen_objective(X, y) + c0 * self._lambda1 * cp.sum_squares(
             self._beta
diff --git a/sparselm/optimizer.py b/sparselm/optimizer.py
index 9073171..4906018 100644
--- a/sparselm/optimizer.py
+++ b/sparselm/optimizer.py
@@ -56,7 +56,8 @@ def __init__(
         error_score=np.nan,
         return_train_score=False,
     ):
-        """
+        """Initialize CVSearch tool.
+
         Args:
             estimator(Estimator):
                 A object of that type is instantiated for each grid point.
@@ -423,7 +424,8 @@ def __init__(
         error_score=np.nan,
         return_train_score=False,
     ):
-        """
+        """Initialize a LineSearch.
+
         Args:
             estimator(Estimator):
                 A object of that type is instantiated for each grid point.
diff --git a/sparselm/tools.py b/sparselm/tools.py
index 3d93676..43285fd 100644
--- a/sparselm/tools.py
+++ b/sparselm/tools.py
@@ -29,7 +29,6 @@ def your_fit_method(X, y):
         low (float or array):
             lower bounds for indices
     """
-
     indices = np.array(indices)
     if high is not None:
         high = (
diff --git a/tests/test_optimizers.py b/tests/test_optimizers.py
index d4d712d..1cab19f 100644
--- a/tests/test_optimizers.py
+++ b/tests/test_optimizers.py
@@ -7,7 +7,7 @@
 import numpy as np
 import pytest
 
-from sparselm.model.miqp.regularized_l0 import L1L0, L2L0
+from sparselm.model.miqp._regularized_l0 import L1L0, L2L0
 from sparselm.optimizer import GridSearch, LineSearch
 
 ALL_CRITERION = ["max_r2", "one_std_r2"]