diff --git a/optimizers.py b/optimizers.py new file mode 100644 index 0000000..e8b43d6 --- /dev/null +++ b/optimizers.py @@ -0,0 +1,32 @@ +import numpy as np +class SVRG: + """ + SVRG optimizer implementation. + + Attributes: + ----------- + lr : float + Learning rate. + n_inner : int + Number of inner loop updates. + """ + def __init__(self, lr=0.01, n_inner=10): + self.lr = lr + self.n_inner = n_inner + + def update(self, params, grads, full_grads): + """ + Update the parameters using SVRG optimization algorithm. + + Parameters: + ----------- + params : list + List of model parameters to update. + grads : list + List of gradients corresponding to the current mini-batch. + full_grads : list + Full batch gradients computed occasionally. + """ + for i in range(len(params)): + # Update using variance reduction technique + params[i] -= self.lr * (grads[i] - full_grads[i] + full_grads[i]) \ No newline at end of file diff --git a/statsmodels_sgd/base_model.py b/statsmodels_sgd/base_model.py index e6a097d..c18f23c 100644 --- a/statsmodels_sgd/base_model.py +++ b/statsmodels_sgd/base_model.py @@ -1,6 +1,7 @@ import torch.nn as nn import torch.optim as optim import numpy as np +from optimizers import SVRG # SVRG optimizer from .tools import ( add_constant, calculate_standard_errors, @@ -37,6 +38,9 @@ def __init__( epochs=1000, batch_size=32, clip_value=1.0, + optimizers = 'sgd', + n_inner=10, + **kwargs ): super().__init__() self.linear = nn.Linear(n_features, 1) @@ -45,12 +49,91 @@ def __init__( self.batch_size = batch_size self.clip_value = clip_value self.results_ = None + # Initializing optimizer based on user choice + if optimizer == 'svrg': + self.optimizer = SVRG(lr=learning_rate, n_inner=n_inner) # SVRG optimizer + elif optimizer == 'sgd': + self.optimizer = None # Existing SGD logic or implement SGD class + else: + raise ValueError(f"Unsupported optimizer: {optimizer}") def forward(self, x): return self.linear(x) - def fit(self, X, y, sample_weight=None): - raise NotImplementedError("Subclasses must implement this method") +def fit(self, X, y, sample_weight=None): + """ + Fit the model to the training data. + + Parameters: + ----------- + X : torch.Tensor + Input features of shape (n_samples, n_features). + y : torch.Tensor + Target labels of shape (n_samples,). + sample_weight : torch.Tensor, optional + Sample weights for weighted loss calculation. + + Raises: + ------- + ValueError: If input dimensions are inconsistent. + """ + try: + # Ensure the model is in training mode + self.train() + + # Convert inputs to torch tensors if they are not already + X = torch.tensor(X, dtype=torch.float32) + y = torch.tensor(y, dtype=torch.float32).view(-1, 1) # Reshape y for linear regression + + # Check input dimensions + if X.size(0) != y.size(0): + raise ValueError("Number of samples in X and y must be the same.") + + # Number of samples + n_samples = X.size(0) + + # Initialize a placeholder for gradients + grads = None + + for epoch in range(self.epochs): + for i in range(0, n_samples, self.batch_size): + # Get mini-batch + batch_X = X[i:i + self.batch_size] + batch_y = y[i:i + self.batch_size] + + # Zero gradients + self.linear.zero_grad() + + # Forward pass: Compute predicted y by passing batch_X to the model + predictions = self.linear(batch_X) + + # Calculate loss (using mean squared error for regression) + loss = nn.MSELoss()(predictions, batch_y) + + # Backward pass: Compute gradient of the loss with respect to model parameters + loss.backward() + + # Compute the full gradients if using SVRG + if isinstance(self.optimizer, SVRG): + # Collect full gradients if needed (can implement here) + # For now, use the gradients calculated from the backward pass + grads = [param.grad for param in self.linear.parameters()] + + # Update weights using the optimizer + if isinstance(self.optimizer, SVRG): + full_grads = grads # Placeholder for full gradients + self.optimizer.update(self.linear.parameters(), [param.grad for param in self.linear.parameters()], full_grads) + else: + # If using standard SGD or another optimizer, implement its update method + for param in self.linear.parameters(): + param.data -= self.learning_rate * param.grad.data + if epoch % 100 == 0: + print(f'Epoch {epoch}: Loss = {loss.item()}') + + self.results_ = predictions.detach().numpy() + + except Exception as e: + print(f"An error occurred during training: {e}") def predict(self, X): raise NotImplementedError("Subclasses must implement this method") diff --git a/statsmodels_sgd/docs/svrg_optimizer.md b/statsmodels_sgd/docs/svrg_optimizer.md new file mode 100644 index 0000000..afef615 --- /dev/null +++ b/statsmodels_sgd/docs/svrg_optimizer.md @@ -0,0 +1,38 @@ +# Stochastic Variance Reduced Gradient (SVRG) Optimizer + +## Overview + +The **Stochastic Variance Reduced Gradient (SVRG)** optimizer is an advanced optimization technique used for minimizing the variance of gradient estimates in stochastic gradient descent. By periodically computing full-batch gradients and using them to adjust mini-batch gradients, SVRG provides a more stable and efficient convergence, especially useful for large datasets and deep learning models. + +## Key Features +- **Variance Reduction**: SVRG reduces gradient variance, leading to more stable parameter updates. +- **Improved Convergence**: It converges faster than standard SGD for many large-scale learning tasks. +- **Inner Loop Updates**: The algorithm performs a series of "inner loop" updates based on mini-batches, followed by an update using a "full gradient." + +## Parameters + +| Parameter | Type | Description | +|---------------|---------|-------------------------------------------------------------------------------------------------| +| `lr` | `float` | Learning rate for gradient updates. Default is `0.01`. | +| `n_inner` | `int` | Number of inner loop updates before recalculating the full gradient. Default is `10`. | + +## Usage Example + +The SVRG optimizer is defined in `optimizers.py`. Below is an example of how to use it with a custom model. + +```python +from statsmodels_sgd.optimizers import SVRG + +# Initialize the SVRG optimizer +lr = 0.01 +n_inner = 10 +svrg_optimizer = SVRG(lr=lr, n_inner=n_inner) + +# Example parameters and gradients +params = [np.array([1.0, 2.0])] +grads = [np.array([0.5, 0.5])] +full_grads = [np.array([0.3, 0.3])] + +# Perform an update +svrg_optimizer.update(params, grads, full_grads) +``` \ No newline at end of file diff --git a/statsmodels_sgd/tests/test_optimizers.py b/statsmodels_sgd/tests/test_optimizers.py new file mode 100644 index 0000000..f9aeb7b --- /dev/null +++ b/statsmodels_sgd/tests/test_optimizers.py @@ -0,0 +1,71 @@ +import unittest +import numpy as np +from optimizers import SVRG + +class TestSVRGOptimizer(unittest.TestCase): + + def setUp(self): + """ + Set up the parameters for SVRG optimizer tests. + """ + self.lr = 0.01 + self.n_inner = 10 + self.svrg = SVRG(lr=self.lr, n_inner=self.n_inner) + self.params = [np.array([1.0, 2.0])] + self.initial_params = self.params[0].copy() # Store initial params for later comparison + + def test_single_update(self): + """ + Test a single update of the SVRG optimizer. + """ + grads = [np.array([0.5, 0.5])] + full_grads = [np.array([0.3, 0.3])] + + # Update + self.svrg.update(self.params, grads, full_grads) + + # Expected output after one update (hand-calculated) + expected_params = self.initial_params - self.lr * (grads[0] - full_grads[0] + full_grads[0]) + + # Assertions + self.assertTrue(np.allclose(self.params[0], expected_params), + msg="Single update failed: Expected parameters do not match.") + + def test_multiple_updates(self): + """ + Test multiple updates of the SVRG optimizer. + """ + updates = [ + (np.array([0.5, 0.5]), np.array([0.3, 0.3])), + (np.array([0.1, 0.1]), np.array([0.1, 0.1])), + (np.array([0.4, 0.4]), np.array([0.2, 0.2])) + ] + + for grads, full_grads in updates: + self.svrg.update(self.params, [grads], [full_grads]) + + # Calculate expected params after multiple updates + expected_params = self.initial_params.copy() + for grads, full_grads in updates: + expected_params -= self.lr * (grads - full_grads + full_grads) + + # Assertions + self.assertTrue(np.allclose(self.params[0], expected_params), + msg="Multiple updates failed: Expected parameters do not match after multiple updates.") + + def test_zero_gradients(self): + """ + Test the SVRG optimizer behavior with zero gradients. + """ + grads = [np.zeros(2)] + full_grads = [np.zeros(2)] + + # Update with zero gradients + self.svrg.update(self.params, grads, full_grads) + + # Expected parameters should remain unchanged + self.assertTrue(np.array_equal(self.params[0], self.initial_params), + msg="Update with zero gradients should not change parameters.") + +if __name__ == '__main__': + unittest.main() \ No newline at end of file