-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add l2 regularization to logistic reg
- Loading branch information
sewade
committed
Nov 25, 2019
1 parent
c597288
commit 3af9ca4
Showing
6 changed files
with
1,213 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,300 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Logistic Regression using Gradient Descent" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np\n", | ||
"import pandas as pd" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"['setosa', 'versicolor', 'virginica']\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from sklearn.datasets import load_iris\n", | ||
"dataset = load_iris()\n", | ||
"X = dataset.data\n", | ||
"y = dataset.target\n", | ||
"\n", | ||
"target_names = list(dataset.target_names)\n", | ||
"print(target_names)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | ||
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | ||
" 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", | ||
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", | ||
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", | ||
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", | ||
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])" | ||
] | ||
}, | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"# Change to binary class\n", | ||
"y = (y > 0).astype(int)\n", | ||
"y" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 13, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Model: Linear Regression\n", | ||
"class LogReg:\n", | ||
" \"\"\"\n", | ||
" This implementation of Logistic Regression uses batch gradient descent with regularization.\n", | ||
" \"\"\"\n", | ||
" def __init__(self, num_iters=2800, tolerance = 1e-10, alpha=0.00001, lambd=10, threshold=0.5, verbose=False):\n", | ||
" self.num_iters = num_iters\n", | ||
" self.alpha = alpha # Learning rate\n", | ||
" self.lambd = lambd # Regularization parameter\n", | ||
" self.tolerance = tolerance\n", | ||
" self.threshold = threshold\n", | ||
" self.verbose = verbose\n", | ||
" \n", | ||
" def add_ones(self, X):\n", | ||
" return np.concatenate((np.ones((len(X),1)), X), axis = 1)\n", | ||
" \n", | ||
" def sigmoid(self, X, theta):\n", | ||
" return 1/(1 + np.exp(X@theta))\n", | ||
" \n", | ||
" def cost(self, X, y_true):\n", | ||
" m = X.shape[0]\n", | ||
" y_hat = self.sigmoid(X, self.theta)\n", | ||
" temp_theta = self.theta[1:].copy()\n", | ||
" \n", | ||
" Cost = np.sum(-1*y_true*np.log(y_hat)-(1-y_true)*np.log(1-y_hat)) + + self.lambd * np.sum(temp_theta**2)\n", | ||
" \n", | ||
" return Cost\n", | ||
" \n", | ||
" def fit(self, X, y):\n", | ||
" X = X.copy()\n", | ||
" X = self.add_ones(X)\n", | ||
" y = y.reshape(-1, 1)\n", | ||
" \n", | ||
" self.theta = np.zeros((len(X[0]), 1))\n", | ||
" \n", | ||
" current_iter = 1\n", | ||
" norm = 1\n", | ||
" while (norm >= self.tolerance and current_iter < self.num_iters):\n", | ||
" old_theta = self.theta.copy()\n", | ||
" #grad = np.dot(np.transpose(y_hat-self.y), self.X)\n", | ||
" temp_theta = self.theta[1:].copy()\n", | ||
" grad = X.T@(y - self.sigmoid(X, self.theta)) + self.lambd * np.sum(temp_theta)\n", | ||
" grad= grad.reshape(-1, 1)\n", | ||
" \n", | ||
" self.theta = self.theta - self.alpha*grad\n", | ||
" \n", | ||
" if self.verbose and (current_iter%100 == 0):\n", | ||
" print(f'cost for {current_iter} iteration : {self.cost(X, y)}')\n", | ||
" norm = np.linalg.norm(old_theta - self.theta)\n", | ||
" current_iter += 1\n", | ||
" \n", | ||
" return self.theta\n", | ||
" \n", | ||
" def evaluate(self, X, y):\n", | ||
" \"\"\"\n", | ||
" Returns mse loss for a dataset evaluated on the hypothesis\n", | ||
" \"\"\"\n", | ||
" X = self.add_ones(X)\n", | ||
" return self.cost(X, y)\n", | ||
" \n", | ||
" def predict(self, X):\n", | ||
" prob = self.predict_proba(X)\n", | ||
" return (prob > self.threshold).astype(int)\n", | ||
" \n", | ||
" def predict_proba(self, X):\n", | ||
" \"\"\"\n", | ||
" Returns probability of predictions.\n", | ||
" \"\"\"\n", | ||
" X = self.add_ones(X) \n", | ||
" return self.sigmoid(X, self.theta)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 14, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"logreg = LogReg(verbose=True)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 15, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"cost for 100 iteration : 78.72554244539407\n", | ||
"cost for 200 iteration : 71.7637559135921\n", | ||
"cost for 300 iteration : 66.07842247755053\n", | ||
"cost for 400 iteration : 61.249885580691895\n", | ||
"cost for 500 iteration : 57.14508887349538\n", | ||
"cost for 600 iteration : 53.654471784734895\n", | ||
"cost for 700 iteration : 50.68400001067733\n", | ||
"cost for 800 iteration : 48.153754835764694\n", | ||
"cost for 900 iteration : 45.99638378599468\n", | ||
"cost for 1000 iteration : 44.1554145494372\n", | ||
"cost for 1100 iteration : 42.5836123943238\n", | ||
"cost for 1200 iteration : 41.241486686288404\n", | ||
"cost for 1300 iteration : 40.09598899186002\n", | ||
"cost for 1400 iteration : 39.11940756500792\n", | ||
"cost for 1500 iteration : 38.288443875685104\n", | ||
"cost for 1600 iteration : 37.58344894655533\n", | ||
"cost for 1700 iteration : 36.98779553479041\n", | ||
"cost for 1800 iteration : 36.48736352452237\n", | ||
"cost for 1900 iteration : 36.070118517386234\n", | ||
"cost for 2000 iteration : 35.7257665818431\n", | ||
"cost for 2100 iteration : 35.44547098210861\n", | ||
"cost for 2200 iteration : 35.22161925432031\n", | ||
"cost for 2300 iteration : 35.04763117022852\n", | ||
"cost for 2400 iteration : 34.91779993503667\n", | ||
"cost for 2500 iteration : 34.827160443924576\n", | ||
"cost for 2600 iteration : 34.771379618925636\n", | ||
"cost for 2700 iteration : 34.74666481164581\n" | ||
] | ||
}, | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"array([[ 0.21239942],\n", | ||
" [ 0.1232903 ],\n", | ||
" [ 0.64963602],\n", | ||
" [-0.99385055],\n", | ||
" [-0.36693142]])" | ||
] | ||
}, | ||
"execution_count": 15, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"logreg.fit(X, y)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 16, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"predictions = logreg.predict(X)\n", | ||
"predictions = predictions.squeeze()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 17, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"1.0" | ||
] | ||
}, | ||
"execution_count": 17, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"np.sum(y == predictions) / len(y)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 18, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | ||
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | ||
" 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", | ||
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", | ||
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", | ||
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", | ||
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])" | ||
] | ||
}, | ||
"execution_count": 18, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"predictions" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
Oops, something went wrong.