From 6991fb0dbf8be5263207287a7677c7ac1b8ddd05 Mon Sep 17 00:00:00 2001 From: prajwal-38 Date: Mon, 3 Feb 2025 23:13:09 +0530 Subject: [PATCH 01/14] added ridge_regression.py --- machine_learning/ridge_regression.py | 100 +++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 machine_learning/ridge_regression.py diff --git a/machine_learning/ridge_regression.py b/machine_learning/ridge_regression.py new file mode 100644 index 000000000000..179f5381a887 --- /dev/null +++ b/machine_learning/ridge_regression.py @@ -0,0 +1,100 @@ +import numpy as np +from matplotlib import pyplot as plt +from sklearn import datasets + +# Ridge Regression function +# reference : https://en.wikipedia.org/wiki/Ridge_regression +def ridge_cost_function(X: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float) -> float: + """ + Compute the Ridge regression cost function with L2 regularization. + + J(θ) = (1/2m) * Σ (y_i - hθ(x))^2 + (α/2) * Σ θ_j^2 (for j=1 to n) + + Where: + - J(θ) is the cost function we aim to minimize + - m is the number of training examples + - hθ(x) = X * θ (prediction) + - y_i is the actual target value for example i + - α is the regularization parameter + + @param X: The feature matrix (m x n) + @param y: The target vector (m,) + @param theta: The parameters (weights) of the model (n,) + @param alpha: The regularization parameter + + @returns: The computed cost value + """ + m = len(y) + predictions = np.dot(X, theta) + cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (alpha / 2) * np.sum(theta[1:] ** 2) + return cost + +def ridge_gradient_descent(X: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float, learning_rate: float, max_iterations: int) -> np.ndarray: + """ + Perform gradient descent to minimize the cost function and fit the Ridge regression model. + + @param X: The feature matrix (m x n) + @param y: The target vector (m,) + @param theta: The initial parameters (weights) of the model (n,) + @param alpha: The regularization parameter + @param learning_rate: The learning rate for gradient descent + @param max_iterations: The number of iterations for gradient descent + + @returns: The optimized parameters (weights) of the model (n,) + """ + m = len(y) + + for iteration in range(max_iterations): + predictions = np.dot(X, theta) + error = predictions - y + + # calculate the gradient + gradient = (1 / m) * np.dot(X.T, error) + gradient[1:] += (alpha / m) * theta[1:] + theta -= learning_rate * gradient + + if iteration % 100 == 0: + cost = ridge_cost_function(X, y, theta, alpha) + print(f"Iteration {iteration}, Cost: {cost}") + + return theta + + + +if __name__ == "__main__": + import doctest + + # Load California Housing dataset + california_housing = datasets.fetch_california_housing() + X = california_housing.data[:, :2] # 2 features for simplicity + y = california_housing.target + X = (X - np.mean(X, axis=0)) / np.std(X, axis=0) + + # Add a bias column (intercept) to X + X = np.c_[np.ones(X.shape[0]), X] + + # Initialize parameters (theta) + theta_initial = np.zeros(X.shape[1]) + + # Set hyperparameters + alpha = 0.1 + learning_rate = 0.01 + max_iterations = 1000 + + optimized_theta = ridge_gradient_descent(X, y, theta_initial, alpha, learning_rate, max_iterations) + print(f"Optimized theta: {optimized_theta}") + + # Prediction + def predict(X, theta): + return np.dot(X, theta) + y_pred = predict(X, optimized_theta) + + # Plotting the results (here we visualize predicted vs actual values) + plt.figure(figsize=(10, 6)) + plt.scatter(y, y_pred, color='b', label='Predictions vs Actual') + plt.plot([min(y), max(y)], [min(y), max(y)], color='r', label='Perfect Fit') + plt.xlabel("Actual values") + plt.ylabel("Predicted values") + plt.title("Ridge Regression: Actual vs Predicted Values") + plt.legend() + plt.show() From a4f585c89d4426f2ddace3ead610ff1742922713 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Feb 2025 17:50:15 +0000 Subject: [PATCH 02/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/ridge_regression.py | 29 +++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/machine_learning/ridge_regression.py b/machine_learning/ridge_regression.py index 179f5381a887..7d0598edb38a 100644 --- a/machine_learning/ridge_regression.py +++ b/machine_learning/ridge_regression.py @@ -2,9 +2,12 @@ from matplotlib import pyplot as plt from sklearn import datasets + # Ridge Regression function # reference : https://en.wikipedia.org/wiki/Ridge_regression -def ridge_cost_function(X: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float) -> float: +def ridge_cost_function( + X: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float +) -> float: """ Compute the Ridge regression cost function with L2 regularization. @@ -26,10 +29,20 @@ def ridge_cost_function(X: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: """ m = len(y) predictions = np.dot(X, theta) - cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (alpha / 2) * np.sum(theta[1:] ** 2) + cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (alpha / 2) * np.sum( + theta[1:] ** 2 + ) return cost -def ridge_gradient_descent(X: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float, learning_rate: float, max_iterations: int) -> np.ndarray: + +def ridge_gradient_descent( + X: np.ndarray, + y: np.ndarray, + theta: np.ndarray, + alpha: float, + learning_rate: float, + max_iterations: int, +) -> np.ndarray: """ Perform gradient descent to minimize the cost function and fit the Ridge regression model. @@ -60,7 +73,6 @@ def ridge_gradient_descent(X: np.ndarray, y: np.ndarray, theta: np.ndarray, alph return theta - if __name__ == "__main__": import doctest @@ -81,18 +93,21 @@ def ridge_gradient_descent(X: np.ndarray, y: np.ndarray, theta: np.ndarray, alph learning_rate = 0.01 max_iterations = 1000 - optimized_theta = ridge_gradient_descent(X, y, theta_initial, alpha, learning_rate, max_iterations) + optimized_theta = ridge_gradient_descent( + X, y, theta_initial, alpha, learning_rate, max_iterations + ) print(f"Optimized theta: {optimized_theta}") # Prediction def predict(X, theta): return np.dot(X, theta) + y_pred = predict(X, optimized_theta) # Plotting the results (here we visualize predicted vs actual values) plt.figure(figsize=(10, 6)) - plt.scatter(y, y_pred, color='b', label='Predictions vs Actual') - plt.plot([min(y), max(y)], [min(y), max(y)], color='r', label='Perfect Fit') + plt.scatter(y, y_pred, color="b", label="Predictions vs Actual") + plt.plot([min(y), max(y)], [min(y), max(y)], color="r", label="Perfect Fit") plt.xlabel("Actual values") plt.ylabel("Predicted values") plt.title("Ridge Regression: Actual vs Predicted Values") From fb1b7a72d0dc9e68948d536dfba3973db044c37d Mon Sep 17 00:00:00 2001 From: prajwal-38 Date: Mon, 3 Feb 2025 23:53:17 +0530 Subject: [PATCH 03/14] Updated code as per PR feedback --- machine_learning/ridge_regression.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/machine_learning/ridge_regression.py b/machine_learning/ridge_regression.py index 179f5381a887..0dbe42419738 100644 --- a/machine_learning/ridge_regression.py +++ b/machine_learning/ridge_regression.py @@ -4,18 +4,18 @@ # Ridge Regression function # reference : https://en.wikipedia.org/wiki/Ridge_regression -def ridge_cost_function(X: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float) -> float: +def ridge_cost_function(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float) -> float: """ Compute the Ridge regression cost function with L2 regularization. - J(θ) = (1/2m) * Σ (y_i - hθ(x))^2 + (α/2) * Σ θ_j^2 (for j=1 to n) + J(θ) = (1/2m) * Σ (y_i - hθ(x))^2 + (a/2) * Σ θ_j^2 (for j=1 to n) Where: - J(θ) is the cost function we aim to minimize - m is the number of training examples - hθ(x) = X * θ (prediction) - y_i is the actual target value for example i - - α is the regularization parameter + - a is the regularization parameter @param X: The feature matrix (m x n) @param y: The target vector (m,) @@ -26,10 +26,11 @@ def ridge_cost_function(X: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: """ m = len(y) predictions = np.dot(X, theta) - cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (alpha / 2) * np.sum(theta[1:] ** 2) + cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + cost += (alpha / 2) * np.sum(theta[1:] ** 2) return cost -def ridge_gradient_descent(X: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float, learning_rate: float, max_iterations: int) -> np.ndarray: +def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float, learning_rate: float, max_iterations: int) -> np.ndarray: """ Perform gradient descent to minimize the cost function and fit the Ridge regression model. @@ -63,6 +64,7 @@ def ridge_gradient_descent(X: np.ndarray, y: np.ndarray, theta: np.ndarray, alph if __name__ == "__main__": import doctest + doctest.testmod() # Load California Housing dataset california_housing = datasets.fetch_california_housing() @@ -97,4 +99,5 @@ def predict(X, theta): plt.ylabel("Predicted values") plt.title("Ridge Regression: Actual vs Predicted Values") plt.legend() + #plots on a graph plt.show() From 2722754fea454382301fe5a20d7c6319cb54ba54 Mon Sep 17 00:00:00 2001 From: prajwal-38 Date: Tue, 4 Feb 2025 00:12:06 +0530 Subject: [PATCH 04/14] Updated code as per PR feedback 2 --- machine_learning/ridge_regression.py | 35 ++++------------------------ 1 file changed, 4 insertions(+), 31 deletions(-) diff --git a/machine_learning/ridge_regression.py b/machine_learning/ridge_regression.py index e95a2ba7c843..d58bd4dbb83b 100644 --- a/machine_learning/ridge_regression.py +++ b/machine_learning/ridge_regression.py @@ -2,16 +2,9 @@ from matplotlib import pyplot as plt from sklearn import datasets - # Ridge Regression function # reference : https://en.wikipedia.org/wiki/Ridge_regression -<<<<<<< HEAD def ridge_cost_function(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float) -> float: -======= -def ridge_cost_function( - X: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float -) -> float: ->>>>>>> a4f585c89d4426f2ddace3ead610ff1742922713 """ Compute the Ridge regression cost function with L2 regularization. @@ -33,28 +26,11 @@ def ridge_cost_function( """ m = len(y) predictions = np.dot(X, theta) -<<<<<<< HEAD cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) cost += (alpha / 2) * np.sum(theta[1:] ** 2) return cost def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float, learning_rate: float, max_iterations: int) -> np.ndarray: -======= - cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (alpha / 2) * np.sum( - theta[1:] ** 2 - ) - return cost - - -def ridge_gradient_descent( - X: np.ndarray, - y: np.ndarray, - theta: np.ndarray, - alpha: float, - learning_rate: float, - max_iterations: int, -) -> np.ndarray: ->>>>>>> a4f585c89d4426f2ddace3ead610ff1742922713 """ Perform gradient descent to minimize the cost function and fit the Ridge regression model. @@ -85,6 +61,7 @@ def ridge_gradient_descent( return theta + if __name__ == "__main__": import doctest doctest.testmod() @@ -106,24 +83,20 @@ def ridge_gradient_descent( learning_rate = 0.01 max_iterations = 1000 - optimized_theta = ridge_gradient_descent( - X, y, theta_initial, alpha, learning_rate, max_iterations - ) + optimized_theta = ridge_gradient_descent(X, y, theta_initial, alpha, learning_rate, max_iterations) print(f"Optimized theta: {optimized_theta}") # Prediction def predict(X, theta): return np.dot(X, theta) - y_pred = predict(X, optimized_theta) # Plotting the results (here we visualize predicted vs actual values) plt.figure(figsize=(10, 6)) - plt.scatter(y, y_pred, color="b", label="Predictions vs Actual") - plt.plot([min(y), max(y)], [min(y), max(y)], color="r", label="Perfect Fit") + plt.scatter(y, y_pred, color='b', label='Predictions vs Actual') + plt.plot([min(y), max(y)], [min(y), max(y)], color='r', label='Perfect Fit') plt.xlabel("Actual values") plt.ylabel("Predicted values") plt.title("Ridge Regression: Actual vs Predicted Values") plt.legend() - #plots on a graph plt.show() From 2b4bf7dba7715b721dc9597852100a44acf47566 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Feb 2025 18:42:37 +0000 Subject: [PATCH 05/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/ridge_regression.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/machine_learning/ridge_regression.py b/machine_learning/ridge_regression.py index d58bd4dbb83b..d99bf906f1a2 100644 --- a/machine_learning/ridge_regression.py +++ b/machine_learning/ridge_regression.py @@ -2,9 +2,12 @@ from matplotlib import pyplot as plt from sklearn import datasets + # Ridge Regression function # reference : https://en.wikipedia.org/wiki/Ridge_regression -def ridge_cost_function(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float) -> float: +def ridge_cost_function( + x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float +) -> float: """ Compute the Ridge regression cost function with L2 regularization. @@ -30,7 +33,15 @@ def ridge_cost_function(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: cost += (alpha / 2) * np.sum(theta[1:] ** 2) return cost -def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float, learning_rate: float, max_iterations: int) -> np.ndarray: + +def ridge_gradient_descent( + x: np.ndarray, + y: np.ndarray, + theta: np.ndarray, + alpha: float, + learning_rate: float, + max_iterations: int, +) -> np.ndarray: """ Perform gradient descent to minimize the cost function and fit the Ridge regression model. @@ -61,9 +72,9 @@ def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alph return theta - if __name__ == "__main__": import doctest + doctest.testmod() # Load California Housing dataset @@ -83,18 +94,21 @@ def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alph learning_rate = 0.01 max_iterations = 1000 - optimized_theta = ridge_gradient_descent(X, y, theta_initial, alpha, learning_rate, max_iterations) + optimized_theta = ridge_gradient_descent( + X, y, theta_initial, alpha, learning_rate, max_iterations + ) print(f"Optimized theta: {optimized_theta}") # Prediction def predict(X, theta): return np.dot(X, theta) + y_pred = predict(X, optimized_theta) # Plotting the results (here we visualize predicted vs actual values) plt.figure(figsize=(10, 6)) - plt.scatter(y, y_pred, color='b', label='Predictions vs Actual') - plt.plot([min(y), max(y)], [min(y), max(y)], color='r', label='Perfect Fit') + plt.scatter(y, y_pred, color="b", label="Predictions vs Actual") + plt.plot([min(y), max(y)], [min(y), max(y)], color="r", label="Perfect Fit") plt.xlabel("Actual values") plt.ylabel("Predicted values") plt.title("Ridge Regression: Actual vs Predicted Values") From 106b6c7c571247a2c5f9654aa2034fcdbcc9f7c5 Mon Sep 17 00:00:00 2001 From: prajwal-38 Date: Tue, 4 Feb 2025 00:23:15 +0530 Subject: [PATCH 06/14] Updated code as per PR feedback 3 --- machine_learning/ridge_regression.py | 29 ++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/machine_learning/ridge_regression.py b/machine_learning/ridge_regression.py index d58bd4dbb83b..81c46e8cc267 100644 --- a/machine_learning/ridge_regression.py +++ b/machine_learning/ridge_regression.py @@ -25,9 +25,10 @@ def ridge_cost_function(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: @returns: The computed cost value """ m = len(y) - predictions = np.dot(X, theta) - cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) - cost += (alpha / 2) * np.sum(theta[1:] ** 2) + predictions = np.dot(x, theta) + cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + \ + (alpha / 2) * np.sum(theta[1:] ** 2) + return cost def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float, learning_rate: float, max_iterations: int) -> np.ndarray: @@ -46,16 +47,16 @@ def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alph m = len(y) for iteration in range(max_iterations): - predictions = np.dot(X, theta) + predictions = np.dot(x, theta) error = predictions - y # calculate the gradient - gradient = (1 / m) * np.dot(X.T, error) + gradient = (1 / m) * np.dot(x.T, error) gradient[1:] += (alpha / m) * theta[1:] theta -= learning_rate * gradient if iteration % 100 == 0: - cost = ridge_cost_function(X, y, theta, alpha) + cost = ridge_cost_function(x, y, theta, alpha) print(f"Iteration {iteration}, Cost: {cost}") return theta @@ -68,28 +69,28 @@ def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alph # Load California Housing dataset california_housing = datasets.fetch_california_housing() - X = california_housing.data[:, :2] # 2 features for simplicity + x = california_housing.data[:, :2] # 2 features for simplicity y = california_housing.target - X = (X - np.mean(X, axis=0)) / np.std(X, axis=0) + x = (x - np.mean(x, axis=0)) / np.std(x, axis=0) # Add a bias column (intercept) to X - X = np.c_[np.ones(X.shape[0]), X] + x = np.c_[np.ones(x.shape[0]), x] # Initialize parameters (theta) - theta_initial = np.zeros(X.shape[1]) + theta_initial = np.zeros(x.shape[1]) # Set hyperparameters alpha = 0.1 learning_rate = 0.01 max_iterations = 1000 - optimized_theta = ridge_gradient_descent(X, y, theta_initial, alpha, learning_rate, max_iterations) + optimized_theta = ridge_gradient_descent(x, y, theta_initial, alpha, learning_rate, max_iterations) print(f"Optimized theta: {optimized_theta}") # Prediction - def predict(X, theta): - return np.dot(X, theta) - y_pred = predict(X, optimized_theta) + def predict(x, theta): + return np.dot(x, theta) + y_pred = predict(x, optimized_theta) # Plotting the results (here we visualize predicted vs actual values) plt.figure(figsize=(10, 6)) From d5646985a2ac9adeec16fc024c817383c88a6291 Mon Sep 17 00:00:00 2001 From: prajwal-38 Date: Tue, 4 Feb 2025 00:25:52 +0530 Subject: [PATCH 07/14] Updated code as per PR feedback 5 --- machine_learning/ridge_regression.py | 34 ++++------------------------ 1 file changed, 5 insertions(+), 29 deletions(-) diff --git a/machine_learning/ridge_regression.py b/machine_learning/ridge_regression.py index 9ede10ecb45a..81c46e8cc267 100644 --- a/machine_learning/ridge_regression.py +++ b/machine_learning/ridge_regression.py @@ -2,12 +2,9 @@ from matplotlib import pyplot as plt from sklearn import datasets - # Ridge Regression function # reference : https://en.wikipedia.org/wiki/Ridge_regression -def ridge_cost_function( - x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float -) -> float: +def ridge_cost_function(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float) -> float: """ Compute the Ridge regression cost function with L2 regularization. @@ -34,15 +31,7 @@ def ridge_cost_function( return cost - -def ridge_gradient_descent( - x: np.ndarray, - y: np.ndarray, - theta: np.ndarray, - alpha: float, - learning_rate: float, - max_iterations: int, -) -> np.ndarray: +def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float, learning_rate: float, max_iterations: int) -> np.ndarray: """ Perform gradient descent to minimize the cost function and fit the Ridge regression model. @@ -73,9 +62,9 @@ def ridge_gradient_descent( return theta + if __name__ == "__main__": import doctest - doctest.testmod() # Load California Housing dataset @@ -95,7 +84,6 @@ def ridge_gradient_descent( learning_rate = 0.01 max_iterations = 1000 -<<<<<<< HEAD optimized_theta = ridge_gradient_descent(x, y, theta_initial, alpha, learning_rate, max_iterations) print(f"Optimized theta: {optimized_theta}") @@ -103,23 +91,11 @@ def ridge_gradient_descent( def predict(x, theta): return np.dot(x, theta) y_pred = predict(x, optimized_theta) -======= - optimized_theta = ridge_gradient_descent( - X, y, theta_initial, alpha, learning_rate, max_iterations - ) - print(f"Optimized theta: {optimized_theta}") - - # Prediction - def predict(X, theta): - return np.dot(X, theta) - - y_pred = predict(X, optimized_theta) ->>>>>>> 2b4bf7dba7715b721dc9597852100a44acf47566 # Plotting the results (here we visualize predicted vs actual values) plt.figure(figsize=(10, 6)) - plt.scatter(y, y_pred, color="b", label="Predictions vs Actual") - plt.plot([min(y), max(y)], [min(y), max(y)], color="r", label="Perfect Fit") + plt.scatter(y, y_pred, color='b', label='Predictions vs Actual') + plt.plot([min(y), max(y)], [min(y), max(y)], color='r', label='Perfect Fit') plt.xlabel("Actual values") plt.ylabel("Predicted values") plt.title("Ridge Regression: Actual vs Predicted Values") From c8c1d9a5896ed6f64a71a2f9392eb4ecc7ffff12 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Feb 2025 18:56:22 +0000 Subject: [PATCH 08/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/ridge_regression.py | 31 +++++++++++++++++++++------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/machine_learning/ridge_regression.py b/machine_learning/ridge_regression.py index 81c46e8cc267..dcc17e035113 100644 --- a/machine_learning/ridge_regression.py +++ b/machine_learning/ridge_regression.py @@ -2,9 +2,12 @@ from matplotlib import pyplot as plt from sklearn import datasets + # Ridge Regression function # reference : https://en.wikipedia.org/wiki/Ridge_regression -def ridge_cost_function(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float) -> float: +def ridge_cost_function( + x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float +) -> float: """ Compute the Ridge regression cost function with L2 regularization. @@ -26,12 +29,21 @@ def ridge_cost_function(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: """ m = len(y) predictions = np.dot(x, theta) - cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + \ - (alpha / 2) * np.sum(theta[1:] ** 2) + cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (alpha / 2) * np.sum( + theta[1:] ** 2 + ) return cost -def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float, learning_rate: float, max_iterations: int) -> np.ndarray: + +def ridge_gradient_descent( + x: np.ndarray, + y: np.ndarray, + theta: np.ndarray, + alpha: float, + learning_rate: float, + max_iterations: int, +) -> np.ndarray: """ Perform gradient descent to minimize the cost function and fit the Ridge regression model. @@ -62,9 +74,9 @@ def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alph return theta - if __name__ == "__main__": import doctest + doctest.testmod() # Load California Housing dataset @@ -84,18 +96,21 @@ def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alph learning_rate = 0.01 max_iterations = 1000 - optimized_theta = ridge_gradient_descent(x, y, theta_initial, alpha, learning_rate, max_iterations) + optimized_theta = ridge_gradient_descent( + x, y, theta_initial, alpha, learning_rate, max_iterations + ) print(f"Optimized theta: {optimized_theta}") # Prediction def predict(x, theta): return np.dot(x, theta) + y_pred = predict(x, optimized_theta) # Plotting the results (here we visualize predicted vs actual values) plt.figure(figsize=(10, 6)) - plt.scatter(y, y_pred, color='b', label='Predictions vs Actual') - plt.plot([min(y), max(y)], [min(y), max(y)], color='r', label='Perfect Fit') + plt.scatter(y, y_pred, color="b", label="Predictions vs Actual") + plt.plot([min(y), max(y)], [min(y), max(y)], color="r", label="Perfect Fit") plt.xlabel("Actual values") plt.ylabel("Predicted values") plt.title("Ridge Regression: Actual vs Predicted Values") From ffd18e02af436542d098107ed0220df24bbe7126 Mon Sep 17 00:00:00 2001 From: prajwal-38 Date: Tue, 4 Feb 2025 00:33:50 +0530 Subject: [PATCH 09/14] Updated code as per PR feedback 6 --- machine_learning/ridge_regression.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/machine_learning/ridge_regression.py b/machine_learning/ridge_regression.py index 81c46e8cc267..f629b723ffd1 100644 --- a/machine_learning/ridge_regression.py +++ b/machine_learning/ridge_regression.py @@ -1,3 +1,4 @@ + import numpy as np from matplotlib import pyplot as plt from sklearn import datasets @@ -27,7 +28,7 @@ def ridge_cost_function(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: m = len(y) predictions = np.dot(x, theta) cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + \ - (alpha / 2) * np.sum(theta[1:] ** 2) + (alpha / 2) * np.sum(theta[1:] ** 2) return cost @@ -61,8 +62,6 @@ def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alph return theta - - if __name__ == "__main__": import doctest doctest.testmod() @@ -90,6 +89,7 @@ def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alph # Prediction def predict(x, theta): return np.dot(x, theta) + y_pred = predict(x, optimized_theta) # Plotting the results (here we visualize predicted vs actual values) @@ -101,3 +101,4 @@ def predict(x, theta): plt.title("Ridge Regression: Actual vs Predicted Values") plt.legend() plt.show() + From a99a729c6b62980a5d2dfaa4960f6d067de381f6 Mon Sep 17 00:00:00 2001 From: prajwal-38 Date: Tue, 4 Feb 2025 00:34:52 +0530 Subject: [PATCH 10/14] Updated code as per PR feedback 6 --- machine_learning/ridge_regression.py | 34 ++++------------------------ 1 file changed, 5 insertions(+), 29 deletions(-) diff --git a/machine_learning/ridge_regression.py b/machine_learning/ridge_regression.py index 0aba9eb26095..f629b723ffd1 100644 --- a/machine_learning/ridge_regression.py +++ b/machine_learning/ridge_regression.py @@ -3,12 +3,9 @@ from matplotlib import pyplot as plt from sklearn import datasets - # Ridge Regression function # reference : https://en.wikipedia.org/wiki/Ridge_regression -def ridge_cost_function( - x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float -) -> float: +def ridge_cost_function(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float) -> float: """ Compute the Ridge regression cost function with L2 regularization. @@ -30,26 +27,12 @@ def ridge_cost_function( """ m = len(y) predictions = np.dot(x, theta) -<<<<<<< HEAD cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + \ (alpha / 2) * np.sum(theta[1:] ** 2) -======= - cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (alpha / 2) * np.sum( - theta[1:] ** 2 - ) ->>>>>>> c8c1d9a5896ed6f64a71a2f9392eb4ecc7ffff12 return cost - -def ridge_gradient_descent( - x: np.ndarray, - y: np.ndarray, - theta: np.ndarray, - alpha: float, - learning_rate: float, - max_iterations: int, -) -> np.ndarray: +def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float, learning_rate: float, max_iterations: int) -> np.ndarray: """ Perform gradient descent to minimize the cost function and fit the Ridge regression model. @@ -79,13 +62,8 @@ def ridge_gradient_descent( return theta -<<<<<<< HEAD -======= - ->>>>>>> c8c1d9a5896ed6f64a71a2f9392eb4ecc7ffff12 if __name__ == "__main__": import doctest - doctest.testmod() # Load California Housing dataset @@ -105,9 +83,7 @@ def ridge_gradient_descent( learning_rate = 0.01 max_iterations = 1000 - optimized_theta = ridge_gradient_descent( - x, y, theta_initial, alpha, learning_rate, max_iterations - ) + optimized_theta = ridge_gradient_descent(x, y, theta_initial, alpha, learning_rate, max_iterations) print(f"Optimized theta: {optimized_theta}") # Prediction @@ -118,8 +94,8 @@ def predict(x, theta): # Plotting the results (here we visualize predicted vs actual values) plt.figure(figsize=(10, 6)) - plt.scatter(y, y_pred, color="b", label="Predictions vs Actual") - plt.plot([min(y), max(y)], [min(y), max(y)], color="r", label="Perfect Fit") + plt.scatter(y, y_pred, color='b', label='Predictions vs Actual') + plt.plot([min(y), max(y)], [min(y), max(y)], color='r', label='Perfect Fit') plt.xlabel("Actual values") plt.ylabel("Predicted values") plt.title("Ridge Regression: Actual vs Predicted Values") From 3547b71bfa3ba45e37894564930a87bb1a8ee97b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Feb 2025 19:05:19 +0000 Subject: [PATCH 11/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/ridge_regression.py | 32 ++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/machine_learning/ridge_regression.py b/machine_learning/ridge_regression.py index f629b723ffd1..dcc17e035113 100644 --- a/machine_learning/ridge_regression.py +++ b/machine_learning/ridge_regression.py @@ -1,11 +1,13 @@ - import numpy as np from matplotlib import pyplot as plt from sklearn import datasets + # Ridge Regression function # reference : https://en.wikipedia.org/wiki/Ridge_regression -def ridge_cost_function(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float) -> float: +def ridge_cost_function( + x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float +) -> float: """ Compute the Ridge regression cost function with L2 regularization. @@ -27,12 +29,21 @@ def ridge_cost_function(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: """ m = len(y) predictions = np.dot(x, theta) - cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + \ - (alpha / 2) * np.sum(theta[1:] ** 2) + cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (alpha / 2) * np.sum( + theta[1:] ** 2 + ) return cost -def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float, learning_rate: float, max_iterations: int) -> np.ndarray: + +def ridge_gradient_descent( + x: np.ndarray, + y: np.ndarray, + theta: np.ndarray, + alpha: float, + learning_rate: float, + max_iterations: int, +) -> np.ndarray: """ Perform gradient descent to minimize the cost function and fit the Ridge regression model. @@ -62,8 +73,10 @@ def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alph return theta + if __name__ == "__main__": import doctest + doctest.testmod() # Load California Housing dataset @@ -83,7 +96,9 @@ def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alph learning_rate = 0.01 max_iterations = 1000 - optimized_theta = ridge_gradient_descent(x, y, theta_initial, alpha, learning_rate, max_iterations) + optimized_theta = ridge_gradient_descent( + x, y, theta_initial, alpha, learning_rate, max_iterations + ) print(f"Optimized theta: {optimized_theta}") # Prediction @@ -94,11 +109,10 @@ def predict(x, theta): # Plotting the results (here we visualize predicted vs actual values) plt.figure(figsize=(10, 6)) - plt.scatter(y, y_pred, color='b', label='Predictions vs Actual') - plt.plot([min(y), max(y)], [min(y), max(y)], color='r', label='Perfect Fit') + plt.scatter(y, y_pred, color="b", label="Predictions vs Actual") + plt.plot([min(y), max(y)], [min(y), max(y)], color="r", label="Perfect Fit") plt.xlabel("Actual values") plt.ylabel("Predicted values") plt.title("Ridge Regression: Actual vs Predicted Values") plt.legend() plt.show() - From 0c01bb7391fc6408bb46d13dc9dadc8b62abf2b5 Mon Sep 17 00:00:00 2001 From: prajwal-38 Date: Tue, 4 Feb 2025 00:40:45 +0530 Subject: [PATCH 12/14] Updated code as per PR feedback 7 --- machine_learning/ridge_regression.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/machine_learning/ridge_regression.py b/machine_learning/ridge_regression.py index f629b723ffd1..85cb403de156 100644 --- a/machine_learning/ridge_regression.py +++ b/machine_learning/ridge_regression.py @@ -34,7 +34,8 @@ def ridge_cost_function(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float, learning_rate: float, max_iterations: int) -> np.ndarray: """ - Perform gradient descent to minimize the cost function and fit the Ridge regression model. + Perform gradient descent to minimize the + cost function and fit the Ridge regression model. @param X: The feature matrix (m x n) @param y: The target vector (m,) From 5f56976fcefbf7d2e1decdff1b4f70a73fb9ff8c Mon Sep 17 00:00:00 2001 From: prajwal-38 Date: Tue, 4 Feb 2025 00:42:27 +0530 Subject: [PATCH 13/14] Updated code as per PR feedback 8 --- machine_learning/ridge_regression.py | 32 ++++++++-------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/machine_learning/ridge_regression.py b/machine_learning/ridge_regression.py index 9c8a53df3f93..85cb403de156 100644 --- a/machine_learning/ridge_regression.py +++ b/machine_learning/ridge_regression.py @@ -1,13 +1,11 @@ + import numpy as np from matplotlib import pyplot as plt from sklearn import datasets - # Ridge Regression function # reference : https://en.wikipedia.org/wiki/Ridge_regression -def ridge_cost_function( - x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float -) -> float: +def ridge_cost_function(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float) -> float: """ Compute the Ridge regression cost function with L2 regularization. @@ -29,21 +27,12 @@ def ridge_cost_function( """ m = len(y) predictions = np.dot(x, theta) - cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (alpha / 2) * np.sum( - theta[1:] ** 2 - ) + cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + \ + (alpha / 2) * np.sum(theta[1:] ** 2) return cost - -def ridge_gradient_descent( - x: np.ndarray, - y: np.ndarray, - theta: np.ndarray, - alpha: float, - learning_rate: float, - max_iterations: int, -) -> np.ndarray: +def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float, learning_rate: float, max_iterations: int) -> np.ndarray: """ Perform gradient descent to minimize the cost function and fit the Ridge regression model. @@ -74,10 +63,8 @@ def ridge_gradient_descent( return theta - if __name__ == "__main__": import doctest - doctest.testmod() # Load California Housing dataset @@ -97,9 +84,7 @@ def ridge_gradient_descent( learning_rate = 0.01 max_iterations = 1000 - optimized_theta = ridge_gradient_descent( - x, y, theta_initial, alpha, learning_rate, max_iterations - ) + optimized_theta = ridge_gradient_descent(x, y, theta_initial, alpha, learning_rate, max_iterations) print(f"Optimized theta: {optimized_theta}") # Prediction @@ -110,10 +95,11 @@ def predict(x, theta): # Plotting the results (here we visualize predicted vs actual values) plt.figure(figsize=(10, 6)) - plt.scatter(y, y_pred, color="b", label="Predictions vs Actual") - plt.plot([min(y), max(y)], [min(y), max(y)], color="r", label="Perfect Fit") + plt.scatter(y, y_pred, color='b', label='Predictions vs Actual') + plt.plot([min(y), max(y)], [min(y), max(y)], color='r', label='Perfect Fit') plt.xlabel("Actual values") plt.ylabel("Predicted values") plt.title("Ridge Regression: Actual vs Predicted Values") plt.legend() plt.show() + From 452dbd6bdeb2aad0c67ba682375da4d3e99c751f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Feb 2025 19:12:56 +0000 Subject: [PATCH 14/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/ridge_regression.py | 32 ++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/machine_learning/ridge_regression.py b/machine_learning/ridge_regression.py index 85cb403de156..9c8a53df3f93 100644 --- a/machine_learning/ridge_regression.py +++ b/machine_learning/ridge_regression.py @@ -1,11 +1,13 @@ - import numpy as np from matplotlib import pyplot as plt from sklearn import datasets + # Ridge Regression function # reference : https://en.wikipedia.org/wiki/Ridge_regression -def ridge_cost_function(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float) -> float: +def ridge_cost_function( + x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float +) -> float: """ Compute the Ridge regression cost function with L2 regularization. @@ -27,12 +29,21 @@ def ridge_cost_function(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: """ m = len(y) predictions = np.dot(x, theta) - cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + \ - (alpha / 2) * np.sum(theta[1:] ** 2) + cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (alpha / 2) * np.sum( + theta[1:] ** 2 + ) return cost -def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float, learning_rate: float, max_iterations: int) -> np.ndarray: + +def ridge_gradient_descent( + x: np.ndarray, + y: np.ndarray, + theta: np.ndarray, + alpha: float, + learning_rate: float, + max_iterations: int, +) -> np.ndarray: """ Perform gradient descent to minimize the cost function and fit the Ridge regression model. @@ -63,8 +74,10 @@ def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alph return theta + if __name__ == "__main__": import doctest + doctest.testmod() # Load California Housing dataset @@ -84,7 +97,9 @@ def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alph learning_rate = 0.01 max_iterations = 1000 - optimized_theta = ridge_gradient_descent(x, y, theta_initial, alpha, learning_rate, max_iterations) + optimized_theta = ridge_gradient_descent( + x, y, theta_initial, alpha, learning_rate, max_iterations + ) print(f"Optimized theta: {optimized_theta}") # Prediction @@ -95,11 +110,10 @@ def predict(x, theta): # Plotting the results (here we visualize predicted vs actual values) plt.figure(figsize=(10, 6)) - plt.scatter(y, y_pred, color='b', label='Predictions vs Actual') - plt.plot([min(y), max(y)], [min(y), max(y)], color='r', label='Perfect Fit') + plt.scatter(y, y_pred, color="b", label="Predictions vs Actual") + plt.plot([min(y), max(y)], [min(y), max(y)], color="r", label="Perfect Fit") plt.xlabel("Actual values") plt.ylabel("Predicted values") plt.title("Ridge Regression: Actual vs Predicted Values") plt.legend() plt.show() -