diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000..eb07bbd
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,34 @@
+# .readthedocs.yaml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Set the OS, Python version and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "mambaforge-22.9"
+    # You can also specify other tool versions:
+    # nodejs: "19"
+    # rust: "1.64"
+    # golang: "1.19"
+
+conda:
+  environment: environment.yml
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+  configuration: docs/source/conf.py
+
+# Optionally build your docs in additional formats such as PDF and ePub
+# formats:
+#    - pdf
+#    - epub
+
+# Optional but recommended, declare the Python requirements required
+# to build your documentation
+# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+# python:
+#    install:
+#    - requirements: docs/requirements.txt
\ No newline at end of file
diff --git a/mlguess/keras/models.py b/mlguess/keras/models.py
index bbf0d26..b46a7ab 100644
--- a/mlguess/keras/models.py
+++ b/mlguess/keras/models.py
@@ -2,14 +2,15 @@
 import sys
 import glob
 import keras
+import keras.ops as ops
 import numpy as np
 import pandas as pd
-import tensorflow as tf
-from tensorflow.keras import Input, Model
-from tensorflow.python.keras import backend as K
-from tensorflow.keras.regularizers import L1, L2, L1L2
-from tensorflow.keras.layers import Dense, LeakyReLU, GaussianNoise, Dropout
-from tensorflow.keras.optimizers import Adam, SGD
+# import tensorflow as tf
+from keras import Input, Model
+# from tensorflow.python.keras import backend as K
+from keras.regularizers import L1, L2, L1L2
+from keras.layers import Dense, LeakyReLU, GaussianNoise, Dropout
+from keras.optimizers import Adam, SGD
 from mlguess.keras.layers import DenseNormalGamma, DenseNormal
 from mlguess.keras.losses import EvidentialRegressionLoss, EvidentialRegressionCoupledLoss, gaussian_nll
 from mlguess.keras.losses import DirichletEvidentialLoss
@@ -248,7 +249,7 @@ def save_model(self):
         if not os.path.exists(self.save_path):
             os.makedirs(self.save_path)
         model_path = os.path.join(self.save_path, self.model_name)
-        tf.keras.models.save_model(
+        keras.models.save_model(
             self.model, model_path, save_format="h5"
         )
         logging.info(f"Saved model to {model_path}")
@@ -311,9 +312,9 @@ def mae(self, y_true, y_pred):
         """ Compute the MAE """
         num_splits = y_pred.shape[-1]
         if num_splits == 4:
-            mu, _, _, _ = tf.split(y_pred, num_splits, axis=-1)
+            mu, _, _, _ = ops.split(y_pred, num_splits, axis=-1)
         elif num_splits == 2:
-            mu, _ = tf.split(y_pred, num_splits, axis=-1)
+            mu, _ = ops.split(y_pred, num_splits, axis=-1)
         else:
             mu = y_pred  # Assuming num_splits is 1
         return keras.metrics.mean_absolute_error(y_true, mu)
@@ -322,9 +323,9 @@ def mse(self, y_true, y_pred):
         """ Compute the MSE """
         num_splits = y_pred.shape[-1]
         if num_splits == 4:
-            mu, _, _, _ = tf.split(y_pred, num_splits, axis=-1)
+            mu, _, _, _ = ops.split(y_pred, num_splits, axis=-1)
         elif num_splits == 2:
-            mu, _ = tf.split(y_pred, num_splits, axis=-1)
+            mu, _ = ops.split(y_pred, num_splits, axis=-1)
         else:
             mu = y_pred  # Assuming num_splits is 1
 
@@ -1041,7 +1042,7 @@ def build_neural_network(self, inputs, outputs):
         else:
             self.kernel_reg = None
 
-        self.model = tf.keras.models.Sequential()
+        self.model = keras.models.Sequential()
         self.model.add(
             Dense(
                 inputs,
@@ -1116,7 +1117,7 @@ def fit(self, x_train, y_train, validation_data=None):
         outputs = y_train.shape[-1]
 
         if self.loss == "evidential":
-            this_epoch_num = K.variable(value=0)
+            this_epoch_num = keras.variable(value=0)
             report_epoch_callback = ReportEpoch(self.annealing_coeff, this_epoch_num)
             self.callbacks.insert(0, report_epoch_callback)
             self.loss = DirichletEvidentialLoss(
@@ -1210,7 +1211,7 @@ def load_model(cls, conf):
         return model_class
 
     def save_model(self, model_path):
-        tf.keras.models.save_model(self.model, model_path, save_format="h5")
+        keras.models.save_model(self.model, model_path, save_format="h5")
         return
 
     def predict(self, x, batch_size=None):
@@ -1229,7 +1230,7 @@ def predict_dropout(self, x, mc_forward_passes=10, batch_size=None):
             [
                 np.vstack(
                     [
-                        self.model(tf.expand_dims(lx, axis=-1), training=True)
+                        self.model(ops.expand_dims(lx, axis=-1), training=True)
                         for lx in np.array_split(x, x.shape[0] // _batch_size)
                     ]
                 )
@@ -1278,9 +1279,9 @@ def predict_ensemble(self, x, batch_size=None):
     def predict_uncertainty(self, x):
         num_classes = self.model.output_shape[-1]
         y_pred = self.predict(x)
-        evidence = tf.nn.relu(y_pred)
+        evidence = ops.relu(y_pred)
         alpha = evidence + 1
-        S = tf.keras.backend.sum(alpha, axis=1, keepdims=True)
+        S = ops.sum(alpha, axis=1, keepdims=True)
         u = num_classes / S
         prob = alpha / S
         epistemic = prob * (1 - prob) / (S + 1)
@@ -1471,7 +1472,7 @@ def load_model(cls, conf):
         return model_class
 
     def save_model(self, model_path):
-        tf.keras.models.save_model(self.model, model_path, save_format="h5")
+        keras.models.save_model(self.model, model_path, save_format="h5")
         return
 
 
@@ -1486,7 +1487,7 @@ def predict_dropout(self, x, mc_forward_passes=10, batch_size=None):
             [
                 np.vstack(
                     [
-                        self.model(tf.expand_dims(lx, axis=-1), training=True)
+                        self.model(ops.expand_dims(lx, axis=-1), training=True)
                         for lx in np.array_split(x, x.shape[0] // _batch_size)
                     ]
                 )
@@ -1535,11 +1536,215 @@ def predict_ensemble(self, x, batch_size=None):
     def predict_uncertainty(self, x):
         num_classes = self.model.output_shape[-1]
         y_pred = self.predict(x)
-        evidence = tf.nn.relu(y_pred)
+        evidence = ops.relu(y_pred)
         alpha = evidence + 1
-        S = tf.keras.backend.sum(alpha, axis=1, keepdims=True)
+        S = ops.sum(alpha, axis=1, keepdims=True)
         u = num_classes / S
         prob = alpha / S
         epistemic = prob * (1 - prob) / (S + 1)
         aleatoric = prob - prob**2 - epistemic
         return prob, u, aleatoric, epistemic
+
+class EvidentialRegressorDNN_keras3(keras.models.Model):
+    """
+    A Dense Neural Network Model that can support arbitrary numbers of hidden layers
+    and provides evidential uncertainty estimation.
+    Inherits from BaseRegressor.
+
+    Attributes:
+        hidden_layers: Number of hidden layers.
+        hidden_neurons: Number of neurons in each hidden layer.
+        activation: Type of activation function.
+        optimizer: Name of optimizer or optimizer object.
+        loss: Name of loss function or loss object.
+        use_noise: Whether additive Gaussian noise layers are included in the network.
+        noise_sd: The standard deviation of the Gaussian noise layers.
+        use_dropout: Whether Dropout layers are added to the network.
+        dropout_alpha: Proportion of neurons randomly set to 0.
+        batch_size: Number of examples per batch.
+        epochs: Number of epochs to train.
+        verbose: Level of detail to provide during training.
+        model: Keras Model object.
+        evidential_coef: Evidential regularization coefficient.
+        metrics: Optional list of metrics to monitor during training.
+    """
+    def __init__(
+        self,
+        hidden_layers=1,
+        hidden_neurons=4,
+        activation="relu",
+        loss="evidentialReg",
+        coupling_coef=1.0,  # right now we have alpha = ... v.. so alpha will be coupled in new loss
+        evidential_coef=0.05,
+        output_activation='linear',
+        optimizer="adam",
+        loss_weights=None,
+        use_noise=False,
+        noise_sd=0.01,
+        lr=0.001,
+        use_dropout=False,
+        dropout_alpha=0.1,
+        batch_size=128,
+        epochs=2,
+        kernel_reg="l2",
+        l1_weight=0.01,
+        l2_weight=0.01,
+        sgd_momentum=0.9,
+        adam_beta_1=0.9,
+        adam_beta_2=0.999,
+        verbose=0,
+        save_path=".",
+        model_name="model.h5",
+        metrics=None,
+        eps=1e-7,
+        **kwargs):
+
+        self.hidden_layers = hidden_layers
+        self.hidden_neurons = hidden_neurons
+        self.activation = activation
+        self.output_activation = output_activation
+        self.optimizer = optimizer
+        self.optimizer_obj = None
+        self.sgd_momentum = sgd_momentum
+        self.adam_beta_1 = adam_beta_1
+        self.adam_beta_2 = adam_beta_2
+        self.loss = loss
+        self.loss_weights = loss_weights
+        self.lr = lr
+        self.kernel_reg = kernel_reg
+        self.l1_weight = l1_weight
+        self.l2_weight = l2_weight
+        self.batch_size = batch_size
+        self.use_noise = use_noise
+        self.noise_sd = noise_sd
+        self.use_dropout = use_dropout
+        self.dropout_alpha = dropout_alpha
+        self.epochs = epochs
+        self.verbose = verbose
+        self.save_path = save_path
+        self.model_name = model_name
+        self.model = None
+        self.optimizer_obj = None
+        self.training_std = None
+        self.training_var = []
+        self.metrics = metrics
+        self.eps = eps
+        self.ensemble_member_files = []
+        self.n_output_params = 4
+        super().__init__(**kwargs)
+
+        if self.activation == "leaky":
+            self.activation = LeakyReLU()
+        if self.kernel_reg == "l1":
+            self.kernel_reg = L1(self.l1_weight)
+        elif self.kernel_reg == "l2":
+            self.kernel_reg = L2(self.l2_weight)
+        elif self.kernel_reg == "l1_l2":
+            self.kernel_reg = L1L2(self.l1_weight, self.l2_weight)
+        else:
+            self.kernel_reg = None
+        self.model_layers = []
+        for h in range(self.hidden_layers):
+            self.model_layers.append(Dense(self.hidden_neurons,
+                                           activation=self.activation,
+                                           kernel_regularizer=self.kernel_reg,
+                                           name=f"dense_{h:02d}"))
+            if self.use_dropout:
+                self.model_layers.append(Dropout(self.dropout_alpha, name=f"dropout_{h:02d}"))
+            if self.use_noise:
+                self.model_layers.append(GaussianNoise(self.noise_sd, name=f"noise_{h:02d}"))
+
+        self.model_layers.append(Dense(self.n_output_params, activation=self.output_activation, name="dense_output"))
+
+    def call(self, inputs):
+
+        layer_output = self.model_layers[0](inputs)
+
+        for l in range(1, len(self.model_layers)):
+            layer_output = self.model_layers[l](layer_output)
+
+        return layer_output
+
+    def get_config(self):
+        base_config = super().get_config()
+        # parameter_config = {hp: getattr(self, hp) for hp in self.hyperparameters}
+        return base_config
+
+
+
+        # self.coupling_coef = coupling_coef
+        # self.evidential_coef = evidential_coef
+        # self.eps = eps
+        #
+        # if (
+        #     loss == "evidentialReg"
+        # ):  # retains backwards compatibility since default without loss arg is original loss
+        #     self.loss = EvidentialRegressionLoss(coeff=self.evidential_coef)
+        # elif (
+        #     loss == "evidentialFix"
+        # ):  # by default we do not regularize this loss as per meinert and lavin
+        #     self.loss = EvidentialRegressionCoupledLoss(
+        #         coeff=self.evidential_coef, r=self.coupling_coef
+        #     )
+        # else:
+            # raise ValueError("loss needs to be one of 'evidentialReg' or 'evidentialFix'")
+
+        # logging.info(f"Using loss: {loss}")
+
+    def calc_uncertainties(self, preds, y_scaler=None):
+        mu, v, alpha, beta = np.split(preds, 4, axis=-1)
+
+        if isinstance(self.loss, EvidentialRegressionCoupledLoss):
+            v = (
+                2 * (alpha - 1) / self.coupling_coef
+            )  # need to couple this way otherwise alpha could be negative
+        aleatoric = beta / (alpha - 1)
+        epistemic = beta / (v * (alpha - 1))
+
+        if len(mu.shape) == 1:
+            mu = np.expand_dims(mu, 1)
+            aleatoric = np.expand_dims(aleatoric, 1)
+            epistemic = np.expand_dims(epistemic, 1)
+
+        if y_scaler:
+            mu = y_scaler.inverse_transform(mu)
+
+        for i in range(mu.shape[-1]):
+            aleatoric[:, i] *= self.training_var[i]
+            epistemic[:, i] *= self.training_var[i]
+
+        return mu, aleatoric, epistemic
+
+    def predict_uncertainty(self, x, scaler=None, batch_size=None):
+        _batch_size = self.batch_size if batch_size is None else batch_size
+        y_out = self.model.predict(x, batch_size=_batch_size)
+        y_out = self.calc_uncertainties(
+            y_out, scaler
+        )  # todo calc uncertainty for coupled params
+        return y_out
+
+    def predict_dist_params(self, x, y_scaler=None, batch_size=None):
+        _batch_size = self.batch_size if batch_size is None else batch_size
+        preds = self.model.predict(x, batch_size=_batch_size)
+        mu, v, alpha, beta = np.split(preds, 4, axis=-1)
+        if isinstance(self.loss, EvidentialRegressionCoupledLoss):
+            v = (
+                2 * (alpha - 1) / self.coupling_coef
+            )  # need to couple this way otherwise alpha could be negative
+
+        if mu.shape[-1] == 1:
+            mu = np.expand_dims(mu, 1)
+        if y_scaler is not None:
+            mu = y_scaler.inverse_transform(mu)
+
+        return mu, v, alpha, beta
+
+    def predict_ensemble(
+        self, x_test, scaler=None, batch_size=None
+    ):
+        return super().predict_ensemble(x_test, scaler=scaler, batch_size=batch_size, num_outputs=3)
+
+    def predict_monte_carlo(
+        self, x_test, forward_passes, scaler=None, batch_size=None
+    ):
+        return super().predict_monte_carlo(x_test, forward_passes, scaler=scaler, batch_size=batch_size, num_outputs=3)
diff --git a/pyproject.toml b/pyproject.toml
index 9b65b63..7008802 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,7 @@ requires = ["setuptools", "setuptools-scm"]
 build-backend = "setuptools.build_meta"
 
 [project]
-name = "mlguess"
+name = "miles-guess"
 authors = [{name = "John Schreck, David John Gagne, Charlie Becker, Gabrielle Gantos", email = "miles@ucar.edu"}]
 readme = "README.md"
 license = {file = "LICENSE"}