-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlogistic_regression.py
56 lines (43 loc) · 1.62 KB
/
logistic_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
class LogisticRegression:
def __init__(self, lr=0.01, n_iter=1000):
self.lr = lr
self.n_iter = n_iter
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def fit(self, X, y):
n_samples, n_features = X.shape
self.weights = np.zeros(n_features)
self.bias = 0
for _ in range(self.n_iter):
linear_preds = np.dot(X, self.weights) + self.bias
preds = self.sigmoid(linear_preds)
dw = (1 / n_samples) * np.dot(X.T, (preds - y))
db = (1 / n_samples) * np.sum(preds - y)
self.weights -= self.lr * dw
self.bias -= self.lr * db
def predict(self, X, thresh=0.5):
linear_preds = np.dot(X, self.weights) + self.bias
preds = [1 if self.sigmoid(pred) > thresh else 0 for pred in linear_preds]
return preds
def accuracy_score(self, y, preds):
return sum(y == preds) / len(y)
def run():
"""
Creates a dataset, splits into train and test, fits LR and tests it.
The mean square error is calculated and printed, the scatter plot of the
test data points along with the fitted line are also plotted.
"""
dataset = datasets.load_breast_cancer()
X, y = dataset.data, dataset.target
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=1
)
lr = LogisticRegression(lr=0.001)
lr.fit(X_train, y_train)
preds = lr.predict(X_test)
print(lr.accuracy_score(preds, y_test))
if __name__ == "__main__":
run()