From a57cac980cd4f155b4c80841b8c2c65cab43cd32 Mon Sep 17 00:00:00 2001 From: ChunkeySun <112787606+ChunkeySun@users.noreply.github.com> Date: Thu, 27 Oct 2022 17:23:35 +0800 Subject: [PATCH] Update Chapter 4.ipynb --- Notebooks/Chapter 4.ipynb | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/Notebooks/Chapter 4.ipynb b/Notebooks/Chapter 4.ipynb index 3432898..5f633cf 100644 --- a/Notebooks/Chapter 4.ipynb +++ b/Notebooks/Chapter 4.ipynb @@ -178,11 +178,11 @@ "df_no = df[df.default2 == 0].sample(frac=0.15)\n", "# Take all samples where target value is 'yes'\n", "df_yes = df[df.default2 == 1]\n", - "df_ = df_no.append(df_yes)\n", + "df_ = df.concat([df_yes, df_no])\n", "\n", "ax1.scatter(df_[df_.default == 'Yes'].balance, df_[df_.default == 'Yes'].income, s=40, c='orange', marker='+',\n", " linewidths=1)\n", - "ax1.scatter(df_[df_.default == 'No'].balance, df_[df_.default == 'No'].income, s=40, marker='o', linewidths='1',\n", + "ax1.scatter(df_[df_.default == 'No'].balance, df_[df_.default == 'No'].income, s=40, marker='o', linewidths=1,\n", " edgecolors='lightblue', facecolors='white', alpha=.6)\n", "\n", "ax1.set_ylim(ymin=0)\n", @@ -228,7 +228,7 @@ "# and predicted classification.\n", "X_test = np.arange(df.balance.min(), df.balance.max()).reshape(-1,1)\n", "\n", - "clf = skl_lm.LogisticRegression(solver='newton-cg')\n", + "clf = skl_lm.LogisticRegression(solver='lbfgs')\n", "clf.fit(X_train,y)\n", "prob = clf.predict_proba(X_test)\n", "\n", @@ -297,7 +297,7 @@ "source": [ "# Using newton-cg solver, the coefficients are equal/closest to the ones in the book. \n", "# I do not know the details on the differences between the solvers.\n", - "clf = skl_lm.LogisticRegression(solver='newton-cg')\n", + "clf = skl_lm.LogisticRegression(solver='lbfgs')\n", "X_train = df.balance.values.reshape(-1,1)\n", "clf.fit(X_train,y)\n", "print(clf)\n", @@ -391,8 +391,7 @@ } ], "source": [ - "X_train = sm.add_constant(df.balance)\n", - "est = smf.Logit(y.ravel(), X_train).fit()\n", + "est = smf.logit(formula='default2 ~ 1 + balance', data=df).fit()\n", "est.summary2().tables[1]" ] }, @@ -481,10 +480,8 @@ } ], "source": [ - "X_train = sm.add_constant(df.student2)\n", - "y = df.default2\n", "\n", - "est = smf.Logit(y, X_train).fit()\n", + "est = smf.logit(formula='default2 ~ 1 + student2', data=df).fit()\n", "est.summary2().tables[1]" ] }, @@ -593,8 +590,7 @@ } ], "source": [ - "X_train = sm.add_constant(df[['balance', 'income', 'student2']])\n", - "est = smf.Logit(y, X_train).fit()\n", + "est = smf.logit(formula='default2 ~ 1 + student2', data=df).fit()\n", "est.summary2().tables[1]" ] }, @@ -622,8 +618,8 @@ "# Vector with balance values for plotting\n", "X_test = np.arange(df.balance.min(), df.balance.max()).reshape(-1,1)\n", "\n", - "clf = skl_lm.LogisticRegression(solver='newton-cg')\n", - "clf2 = skl_lm.LogisticRegression(solver='newton-cg')\n", + "clf = skl_lm.LogisticRegression(solver='lbfgs')\n", + "clf2 = skl_lm.LogisticRegression(solver='lbfgs')\n", "\n", "clf.fit(X_train,y)\n", "clf2.fit(X_train2,y2)\n", @@ -734,7 +730,7 @@ "ax1.legend(loc=2)\n", "\n", "# Right plot\n", - "sns.boxplot('student', 'balance', data=df, orient='v', ax=ax2, palette=c_palette);" + "sns.boxplot(x='student', y='balance', data=df, orient='v', ax=ax2, palette=c_palette);" ] }, { @@ -808,8 +804,8 @@ } ], "source": [ - "X = df[['balance', 'income', 'student2']].as_matrix()\n", - "y = df.default2.as_matrix()\n", + "X = df[['balance', 'income', 'student2']].values\n", + "y = df.default2.values\n", "\n", "lda = LinearDiscriminantAnalysis(solver='svd')\n", "y_pred = lda.fit(X, y).predict(X)\n", @@ -1310,8 +1306,8 @@ "X_test = X_scaled[:1000,:]\n", "y_test = y[:1000]\n", "\n", - "def KNN(n_neighbors=1, weights='uniform'):\n", - " clf = neighbors.KNeighborsClassifier(n_neighbors, weights)\n", + "def KNN(n_neighbors=1,):\n", + " clf = neighbors.KNeighborsClassifier(n_neighbors, weights = 'uniform')\n", " clf.fit(X_train, y_train)\n", " pred = clf.predict(X_test)\n", " score = clf.score(X_test, y_test)\n",