-
-
Notifications
You must be signed in to change notification settings - Fork 46.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
added ridge_regression.py #12553
base: master
Are you sure you want to change the base?
added ridge_regression.py #12553
Changes from 8 commits
6991fb0
a4f585c
fb1b7a7
b7f49ae
2722754
2b4bf7d
106b6c7
c110a12
d564698
c8c1d9a
ffd18e0
351e83b
a99a729
3547b71
0c01bb7
7e0bb71
5f56976
452dbd6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
import numpy as np | ||
from matplotlib import pyplot as plt | ||
from sklearn import datasets | ||
|
||
|
||
# Ridge Regression function | ||
# reference : https://en.wikipedia.org/wiki/Ridge_regression | ||
def ridge_cost_function( | ||
x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float | ||
) -> float: | ||
""" | ||
Compute the Ridge regression cost function with L2 regularization. | ||
|
||
J(θ) = (1/2m) * Σ (y_i - hθ(x))^2 + (a/2) * Σ θ_j^2 (for j=1 to n) | ||
|
||
Where: | ||
- J(θ) is the cost function we aim to minimize | ||
- m is the number of training examples | ||
- hθ(x) = X * θ (prediction) | ||
- y_i is the actual target value for example i | ||
- a is the regularization parameter | ||
|
||
@param X: The feature matrix (m x n) | ||
@param y: The target vector (m,) | ||
@param theta: The parameters (weights) of the model (n,) | ||
@param alpha: The regularization parameter | ||
|
||
@returns: The computed cost value | ||
""" | ||
m = len(y) | ||
predictions = np.dot(x, theta) | ||
cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + \ | ||
(alpha / 2) * np.sum(theta[1:] ** 2) | ||
|
||
return cost | ||
|
||
|
||
def ridge_gradient_descent( | ||
x: np.ndarray, | ||
y: np.ndarray, | ||
theta: np.ndarray, | ||
alpha: float, | ||
learning_rate: float, | ||
max_iterations: int, | ||
) -> np.ndarray: | ||
""" | ||
Perform gradient descent to minimize the cost function and fit the Ridge regression model. | ||
|
||
@param X: The feature matrix (m x n) | ||
@param y: The target vector (m,) | ||
@param theta: The initial parameters (weights) of the model (n,) | ||
@param alpha: The regularization parameter | ||
@param learning_rate: The learning rate for gradient descent | ||
@param max_iterations: The number of iterations for gradient descent | ||
|
||
@returns: The optimized parameters (weights) of the model (n,) | ||
""" | ||
m = len(y) | ||
|
||
for iteration in range(max_iterations): | ||
predictions = np.dot(x, theta) | ||
error = predictions - y | ||
|
||
# calculate the gradient | ||
gradient = (1 / m) * np.dot(x.T, error) | ||
gradient[1:] += (alpha / m) * theta[1:] | ||
theta -= learning_rate * gradient | ||
|
||
if iteration % 100 == 0: | ||
cost = ridge_cost_function(x, y, theta, alpha) | ||
print(f"Iteration {iteration}, Cost: {cost}") | ||
|
||
return theta | ||
|
||
|
||
if __name__ == "__main__": | ||
import doctest | ||
|
||
doctest.testmod() | ||
|
||
# Load California Housing dataset | ||
california_housing = datasets.fetch_california_housing() | ||
x = california_housing.data[:, :2] # 2 features for simplicity | ||
y = california_housing.target | ||
x = (x - np.mean(x, axis=0)) / np.std(x, axis=0) | ||
|
||
# Add a bias column (intercept) to X | ||
x = np.c_[np.ones(x.shape[0]), x] | ||
|
||
# Initialize parameters (theta) | ||
theta_initial = np.zeros(x.shape[1]) | ||
|
||
# Set hyperparameters | ||
alpha = 0.1 | ||
learning_rate = 0.01 | ||
max_iterations = 1000 | ||
|
||
<<<<<<< HEAD | ||
Check failure on line 98 in machine_learning/ridge_regression.py
|
||
optimized_theta = ridge_gradient_descent(x, y, theta_initial, alpha, learning_rate, max_iterations) | ||
Check failure on line 99 in machine_learning/ridge_regression.py
|
||
print(f"Optimized theta: {optimized_theta}") | ||
|
||
# Prediction | ||
def predict(x, theta): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please provide return type hint for the function: As there is no test file in this pull request nor any test function or class in the file Please provide descriptive name for the parameter: Please provide type hint for the parameter: Please provide type hint for the parameter: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please provide return type hint for the function: As there is no test file in this pull request nor any test function or class in the file Please provide descriptive name for the parameter: Please provide type hint for the parameter: Please provide type hint for the parameter: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please provide return type hint for the function: As there is no test file in this pull request nor any test function or class in the file Please provide descriptive name for the parameter: Please provide type hint for the parameter: Please provide type hint for the parameter: |
||
return np.dot(x, theta) | ||
y_pred = predict(x, optimized_theta) | ||
======= | ||
Check failure on line 106 in machine_learning/ridge_regression.py
|
||
optimized_theta = ridge_gradient_descent( | ||
X, y, theta_initial, alpha, learning_rate, max_iterations | ||
) | ||
print(f"Optimized theta: {optimized_theta}") | ||
|
||
# Prediction | ||
def predict(X, theta): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please provide return type hint for the function: As there is no test file in this pull request nor any test function or class in the file Please provide descriptive name for the parameter: Please provide type hint for the parameter: Please provide type hint for the parameter: |
||
return np.dot(X, theta) | ||
|
||
y_pred = predict(X, optimized_theta) | ||
>>>>>>> 2b4bf7dba7715b721dc9597852100a44acf47566 | ||
|
||
# Plotting the results (here we visualize predicted vs actual values) | ||
plt.figure(figsize=(10, 6)) | ||
plt.scatter(y, y_pred, color="b", label="Predictions vs Actual") | ||
plt.plot([min(y), max(y)], [min(y), max(y)], color="r", label="Perfect Fit") | ||
plt.xlabel("Actual values") | ||
plt.ylabel("Predicted values") | ||
plt.title("Ridge Regression: Actual vs Predicted Values") | ||
plt.legend() | ||
plt.show() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
An error occurred while parsing the file:
machine_learning/ridge_regression.py