Add softmax regression

ogunlao · Nov 28, 2019 · 816a8a0 · 816a8a0
1 parent 6b856dd
commit 816a8a0
Show file tree

Hide file tree

Showing 2 changed files with 1,047 additions and 384 deletions.
diff --git a/Logistic Regression L2Regularization.ipynb b/Logistic Regression L2Regularization.ipynb
@@ -70,21 +70,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
     "class LogReg:\n",
     "    \"\"\"\n",
-    "    This implementation of Logistic Regression uses batch gradient descent with regularization.\n",
+    "    This implementation of Logistic Regression uses mini-batch gradient descent with l2-regularization.\n",
     "    \"\"\"\n",
-    "    def __init__(self, num_iters=2800, tolerance = 1e-10, alpha=0.00001, lambd=10, threshold=0.5, verbose=False):\n",
-    "        self.num_iters = num_iters\n",
+    "    def __init__(self, \n",
+    "                 epochs=100, \n",
+    "                 tolerance = 1e-10, \n",
+    "                 alpha=0.001, \n",
+    "                 lambd=0, \n",
+    "                 threshold=0.5, \n",
+    "                 verbose=False,\n",
+    "                 minibatch_size=30,\n",
+    "                ):\n",
+    "        self.epochs = epochs\n",
     "        self.alpha = alpha # Learning rate\n",
     "        self.lambd = lambd # Regularization parameter\n",
     "        self.tolerance = tolerance\n",
     "        self.threshold = threshold\n",
     "        self.verbose = verbose\n",
+    "        self.minibatch_size = minibatch_size\n",
     "        \n",
     "    def add_ones(self, X):\n",
     "        return np.concatenate((np.ones((len(X),1)), X), axis = 1)\n",
@@ -95,35 +104,67 @@
     "    def cost(self, X, y_true):\n",
     "        m = X.shape[0]\n",
     "        y_hat = self.sigmoid(X, self.theta)\n",
-    "        temp_theta = self.theta[1:].copy()\n",
+    "        temp_theta = self.theta[:, 1:].copy()\n",
     "        \n",
     "        Cost = np.sum(-1*y_true*np.log(y_hat)-(1-y_true)*np.log(1-y_hat)) + self.lambd * np.sum(temp_theta**2)\n",
     "        \n",
     "        return Cost\n",
     "    \n",
+    "    def get_minibatch(self, X, y,  minibatch):\n",
+    "        X_mb = X[minibatch*self.minibatch_size: (minibatch+1)*self.minibatch_size]\n",
+    "        y_mb = y[minibatch*self.minibatch_size: (minibatch+1)*self.minibatch_size]\n",
+    "        return X_mb, y_mb\n",
+    "    \n",
     "    def fit(self, X, y):\n",
     "        X = X.copy()\n",
     "        X = self.add_ones(X)\n",
+    "        \n",
     "        n, d = X.shape\n",
-    "        y = y.reshape(-1, 1)\n",
     "        \n",
-    "        self.theta = np.zeros((d, 1))\n",
+    "        self.classes = np.unique(y)\n",
+    "        self.no_classes = len(self.classes)\n",
+    "        \n",
+    "        # Turn y into one-hot-labels if number of classes is greater than 2\n",
+    "        if self.no_classes > 2:\n",
+    "            y_encode = np.zeros((n, self.no_classes))\n",
+    "            y_encode[range(n), y] = 1 #numpy advanced indexing\n",
+    "            y = y_encode\n",
+    "        else:\n",
+    "            y = y.reshape(-1, 1)        \n",
+    "        \n",
+    "        if self.no_classes > 2:\n",
+    "            self.theta = np.zeros((d, self.no_classes))\n",
+    "        else:\n",
+    "            self.theta = np.zeros((d, 1))\n",
     "        \n",
-    "        current_iter = 1\n",
+    "        current_epoch = 1\n",
     "        norm = 1\n",
-    "        while (norm >= self.tolerance and current_iter < self.num_iters):\n",
+    "        \n",
+    "        no_of_minibatch = int(n/self.minibatch_size)\n",
+    "        \n",
+    "        while (norm >= self.tolerance and current_epoch < self.epochs):\n",
+    "            # Shuffle X for minibatch gradient descent\n",
+    "            shuffled_index = np.random.permutation(n)\n",
+    "            X_shuffled = X[shuffled_index]\n",
+    "            y_shuffled = y[shuffled_index]\n",
+    "            \n",
     "            old_theta = self.theta.copy()\n",
+    "            theta_wo_bias = self.theta[:, 1:].copy()\n",
     "            \n",
-    "            temp_theta = self.theta[1:].copy()\n",
-    "            grad = X.T@(y - self.sigmoid(X, self.theta)) + self.lambd * np.sum(temp_theta)\n",
-    "            grad= grad.reshape(-1, 1)\n",
+    "            for mb in range(no_of_minibatch):\n",
+    "                X_mb, y_mb = self.get_minibatch(X, y, mb)\n",
+    "                \n",
+    "                grad = X_mb.T@(y_mb - self.sigmoid(X_mb, self.theta)) + self.lambd * np.sum(theta_wo_bias)\n",
+    "                \n",
+    "                if self.no_classes <= 2:\n",
+    "                    grad= grad.reshape(-1, 1)\n",
     "            \n",
-    "            self.theta = self.theta - self.alpha*grad\n",
+    "                self.theta = self.theta - self.alpha*grad\n",
     "            \n",
-    "            if self.verbose and (current_iter%100 == 0):\n",
-    "                print(f'cost for {current_iter} iteration : {self.cost(X, y)}')\n",
+    "            if self.verbose and (current_epoch%100 == 0):\n",
+    "                print(f'cost for {current_epoch} epoch : {self.cost(X, y)}')\n",
     "            norm = np.linalg.norm(old_theta - self.theta)\n",
-    "            current_iter += 1\n",
+    "            current_epoch += 1\n",
     "            \n",
     "        return self.theta\n",
     "    \n",
@@ -135,20 +176,27 @@
     "        return self.cost(X, y)\n",
     "    \n",
     "    def predict(self, X):\n",
-    "        prob = self.predict_proba(X)\n",
-    "        return (prob >= self.threshold).astype(int)\n",
+    "        proba = self.predict_proba(X)\n",
+    "        if self.no_classes > 2:\n",
+    "            # Multiclass classification\n",
+    "            y_hat = np.argmax(proba, axis=1)\n",
+    "        elif self.no_classes == 2:\n",
+    "            # Binary classification\n",
+    "            y_hat = (proba >= self.threshold).astype(int)\n",
+    "        return y_hat\n",
     "        \n",
     "    def predict_proba(self, X):\n",
     "        \"\"\"\n",
     "        Returns probability of predictions.\n",
     "        \"\"\"\n",
-    "        X = self.add_ones(X)  \n",
+    "        X = self.add_ones(X)\n",
+    "        \n",
     "        return self.sigmoid(X, self.theta)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -157,53 +205,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "cost for 100 iteration : 78.72554244539407\n",
-      "cost for 200 iteration : 71.7637559135921\n",
-      "cost for 300 iteration : 66.07842247755053\n",
-      "cost for 400 iteration : 61.249885580691895\n",
-      "cost for 500 iteration : 57.14508887349538\n",
-      "cost for 600 iteration : 53.654471784734895\n",
-      "cost for 700 iteration : 50.68400001067733\n",
-      "cost for 800 iteration : 48.153754835764694\n",
-      "cost for 900 iteration : 45.99638378599468\n",
-      "cost for 1000 iteration : 44.1554145494372\n",
-      "cost for 1100 iteration : 42.5836123943238\n",
-      "cost for 1200 iteration : 41.241486686288404\n",
-      "cost for 1300 iteration : 40.09598899186002\n",
-      "cost for 1400 iteration : 39.11940756500792\n",
-      "cost for 1500 iteration : 38.288443875685104\n",
-      "cost for 1600 iteration : 37.58344894655533\n",
-      "cost for 1700 iteration : 36.98779553479041\n",
-      "cost for 1800 iteration : 36.48736352452237\n",
-      "cost for 1900 iteration : 36.070118517386234\n",
-      "cost for 2000 iteration : 35.7257665818431\n",
-      "cost for 2100 iteration : 35.44547098210861\n",
-      "cost for 2200 iteration : 35.22161925432031\n",
-      "cost for 2300 iteration : 35.04763117022852\n",
-      "cost for 2400 iteration : 34.91779993503667\n",
-      "cost for 2500 iteration : 34.827160443924576\n",
-      "cost for 2600 iteration : 34.771379618925636\n",
-      "cost for 2700 iteration : 34.74666481164581\n"
-     ]
-    },
     {
      "data": {
       "text/plain": [
-       "array([[ 0.21239942],\n",
-       "       [ 0.1232903 ],\n",
-       "       [ 0.64963602],\n",
-       "       [-0.99385055],\n",
-       "       [-0.36693142]])"
+       "array([[ 0.200352  ],\n",
+       "       [ 0.30345859],\n",
+       "       [ 1.08722447],\n",
+       "       [-1.71817827],\n",
+       "       [-0.76690365]])"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -214,7 +229,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -224,7 +239,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -233,7 +248,7 @@
        "1.0"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -244,7 +259,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -259,7 +274,7 @@
        "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }