Skip to content

Commit

Permalink
Add softmax regression
Browse files Browse the repository at this point in the history
  • Loading branch information
sewade committed Nov 28, 2019
1 parent 6b856dd commit 816a8a0
Show file tree
Hide file tree
Showing 2 changed files with 1,047 additions and 384 deletions.
145 changes: 80 additions & 65 deletions Logistic Regression L2Regularization.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -70,21 +70,30 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"class LogReg:\n",
" \"\"\"\n",
" This implementation of Logistic Regression uses batch gradient descent with regularization.\n",
" This implementation of Logistic Regression uses mini-batch gradient descent with l2-regularization.\n",
" \"\"\"\n",
" def __init__(self, num_iters=2800, tolerance = 1e-10, alpha=0.00001, lambd=10, threshold=0.5, verbose=False):\n",
" self.num_iters = num_iters\n",
" def __init__(self, \n",
" epochs=100, \n",
" tolerance = 1e-10, \n",
" alpha=0.001, \n",
" lambd=0, \n",
" threshold=0.5, \n",
" verbose=False,\n",
" minibatch_size=30,\n",
" ):\n",
" self.epochs = epochs\n",
" self.alpha = alpha # Learning rate\n",
" self.lambd = lambd # Regularization parameter\n",
" self.tolerance = tolerance\n",
" self.threshold = threshold\n",
" self.verbose = verbose\n",
" self.minibatch_size = minibatch_size\n",
" \n",
" def add_ones(self, X):\n",
" return np.concatenate((np.ones((len(X),1)), X), axis = 1)\n",
Expand All @@ -95,35 +104,67 @@
" def cost(self, X, y_true):\n",
" m = X.shape[0]\n",
" y_hat = self.sigmoid(X, self.theta)\n",
" temp_theta = self.theta[1:].copy()\n",
" temp_theta = self.theta[:, 1:].copy()\n",
" \n",
" Cost = np.sum(-1*y_true*np.log(y_hat)-(1-y_true)*np.log(1-y_hat)) + self.lambd * np.sum(temp_theta**2)\n",
" \n",
" return Cost\n",
" \n",
" def get_minibatch(self, X, y, minibatch):\n",
" X_mb = X[minibatch*self.minibatch_size: (minibatch+1)*self.minibatch_size]\n",
" y_mb = y[minibatch*self.minibatch_size: (minibatch+1)*self.minibatch_size]\n",
" return X_mb, y_mb\n",
" \n",
" def fit(self, X, y):\n",
" X = X.copy()\n",
" X = self.add_ones(X)\n",
" \n",
" n, d = X.shape\n",
" y = y.reshape(-1, 1)\n",
" \n",
" self.theta = np.zeros((d, 1))\n",
" self.classes = np.unique(y)\n",
" self.no_classes = len(self.classes)\n",
" \n",
" # Turn y into one-hot-labels if number of classes is greater than 2\n",
" if self.no_classes > 2:\n",
" y_encode = np.zeros((n, self.no_classes))\n",
" y_encode[range(n), y] = 1 #numpy advanced indexing\n",
" y = y_encode\n",
" else:\n",
" y = y.reshape(-1, 1) \n",
" \n",
" if self.no_classes > 2:\n",
" self.theta = np.zeros((d, self.no_classes))\n",
" else:\n",
" self.theta = np.zeros((d, 1))\n",
" \n",
" current_iter = 1\n",
" current_epoch = 1\n",
" norm = 1\n",
" while (norm >= self.tolerance and current_iter < self.num_iters):\n",
" \n",
" no_of_minibatch = int(n/self.minibatch_size)\n",
" \n",
" while (norm >= self.tolerance and current_epoch < self.epochs):\n",
" # Shuffle X for minibatch gradient descent\n",
" shuffled_index = np.random.permutation(n)\n",
" X_shuffled = X[shuffled_index]\n",
" y_shuffled = y[shuffled_index]\n",
" \n",
" old_theta = self.theta.copy()\n",
" theta_wo_bias = self.theta[:, 1:].copy()\n",
" \n",
" temp_theta = self.theta[1:].copy()\n",
" grad = X.T@(y - self.sigmoid(X, self.theta)) + self.lambd * np.sum(temp_theta)\n",
" grad= grad.reshape(-1, 1)\n",
" for mb in range(no_of_minibatch):\n",
" X_mb, y_mb = self.get_minibatch(X, y, mb)\n",
" \n",
" grad = X_mb.T@(y_mb - self.sigmoid(X_mb, self.theta)) + self.lambd * np.sum(theta_wo_bias)\n",
" \n",
" if self.no_classes <= 2:\n",
" grad= grad.reshape(-1, 1)\n",
" \n",
" self.theta = self.theta - self.alpha*grad\n",
" self.theta = self.theta - self.alpha*grad\n",
" \n",
" if self.verbose and (current_iter%100 == 0):\n",
" print(f'cost for {current_iter} iteration : {self.cost(X, y)}')\n",
" if self.verbose and (current_epoch%100 == 0):\n",
" print(f'cost for {current_epoch} epoch : {self.cost(X, y)}')\n",
" norm = np.linalg.norm(old_theta - self.theta)\n",
" current_iter += 1\n",
" current_epoch += 1\n",
" \n",
" return self.theta\n",
" \n",
Expand All @@ -135,20 +176,27 @@
" return self.cost(X, y)\n",
" \n",
" def predict(self, X):\n",
" prob = self.predict_proba(X)\n",
" return (prob >= self.threshold).astype(int)\n",
" proba = self.predict_proba(X)\n",
" if self.no_classes > 2:\n",
" # Multiclass classification\n",
" y_hat = np.argmax(proba, axis=1)\n",
" elif self.no_classes == 2:\n",
" # Binary classification\n",
" y_hat = (proba >= self.threshold).astype(int)\n",
" return y_hat\n",
" \n",
" def predict_proba(self, X):\n",
" \"\"\"\n",
" Returns probability of predictions.\n",
" \"\"\"\n",
" X = self.add_ones(X) \n",
" X = self.add_ones(X)\n",
" \n",
" return self.sigmoid(X, self.theta)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -157,53 +205,20 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"cost for 100 iteration : 78.72554244539407\n",
"cost for 200 iteration : 71.7637559135921\n",
"cost for 300 iteration : 66.07842247755053\n",
"cost for 400 iteration : 61.249885580691895\n",
"cost for 500 iteration : 57.14508887349538\n",
"cost for 600 iteration : 53.654471784734895\n",
"cost for 700 iteration : 50.68400001067733\n",
"cost for 800 iteration : 48.153754835764694\n",
"cost for 900 iteration : 45.99638378599468\n",
"cost for 1000 iteration : 44.1554145494372\n",
"cost for 1100 iteration : 42.5836123943238\n",
"cost for 1200 iteration : 41.241486686288404\n",
"cost for 1300 iteration : 40.09598899186002\n",
"cost for 1400 iteration : 39.11940756500792\n",
"cost for 1500 iteration : 38.288443875685104\n",
"cost for 1600 iteration : 37.58344894655533\n",
"cost for 1700 iteration : 36.98779553479041\n",
"cost for 1800 iteration : 36.48736352452237\n",
"cost for 1900 iteration : 36.070118517386234\n",
"cost for 2000 iteration : 35.7257665818431\n",
"cost for 2100 iteration : 35.44547098210861\n",
"cost for 2200 iteration : 35.22161925432031\n",
"cost for 2300 iteration : 35.04763117022852\n",
"cost for 2400 iteration : 34.91779993503667\n",
"cost for 2500 iteration : 34.827160443924576\n",
"cost for 2600 iteration : 34.771379618925636\n",
"cost for 2700 iteration : 34.74666481164581\n"
]
},
{
"data": {
"text/plain": [
"array([[ 0.21239942],\n",
" [ 0.1232903 ],\n",
" [ 0.64963602],\n",
" [-0.99385055],\n",
" [-0.36693142]])"
"array([[ 0.200352 ],\n",
" [ 0.30345859],\n",
" [ 1.08722447],\n",
" [-1.71817827],\n",
" [-0.76690365]])"
]
},
"execution_count": 15,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -214,7 +229,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -224,7 +239,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 8,
"metadata": {},
"outputs": [
{
Expand All @@ -233,7 +248,7 @@
"1.0"
]
},
"execution_count": 17,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -244,7 +259,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 9,
"metadata": {},
"outputs": [
{
Expand All @@ -259,7 +274,7 @@
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])"
]
},
"execution_count": 18,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
Loading

0 comments on commit 816a8a0

Please sign in to comment.