Skip to content

Commit 90c97e2

Browse files
committed
lint
1 parent 8eab466 commit 90c97e2

11 files changed

+130
-89
lines changed

k_means_clustering.py

+21-14
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from sklearn.datasets import make_blobs
55
from utils import euclidean_distance
66

7+
78
class KMeans:
89
def __init__(self, k: int, iter_nums=100):
910
self.k = k
@@ -12,28 +13,30 @@ def __init__(self, k: int, iter_nums=100):
1213
def fit(self, X: np.array):
1314
self.X = X
1415
self.sample_nums, _ = self.X.shape
15-
16+
1617
# initialise centroids
1718
random_idx = np.random.choice(self.sample_nums, self.k, replace=False)
1819
centroids = [self.X[idx] for idx in random_idx]
19-
20+
2021
for _ in range(self.iter_nums):
2122
clusters = self._create_clusters(centroids)
2223
centroids_before_updates = centroids
23-
24+
2425
self.plot(clusters, centroids)
25-
26+
2627
centroids = self._update_centroids(clusters)
27-
28+
2829
if self._has_converged(centroids_before_updates, centroids):
2930
break
3031

3132
self.plot(clusters, centroids)
32-
33+
3334
def _create_clusters(self, centroids):
3435
clusters = [[] for _ in range(self.k)]
3536
for sample_idx in range(self.sample_nums):
36-
distance_to_centroids = [euclidean_distance(self.X[sample_idx], c) for c in centroids]
37+
distance_to_centroids = [
38+
euclidean_distance(self.X[sample_idx], c) for c in centroids
39+
]
3740
clusters[np.argmin(distance_to_centroids)].append(sample_idx)
3841
return clusters
3942

@@ -43,27 +46,31 @@ def _update_centroids(self, clusters):
4346
centroids.append(np.mean(self.X[point_idx], axis=0))
4447
print(centroids)
4548
return centroids
46-
49+
4750
def plot(self, clusters, centroids):
4851
_, ax = plt.subplots()
4952

5053
for _, idx in enumerate(clusters):
5154
points = self.X[idx].T
5255
ax.scatter(*points)
53-
56+
5457
for c in centroids:
55-
ax.scatter(*c, marker='x', color='black', linewidth=3)
58+
ax.scatter(*c, marker="x", color="black", linewidth=3)
5659

5760
plt.show()
58-
61+
5962
def _has_converged(self, old_centroids, centroids):
60-
distances = [euclidean_distance(old_centroids[i], centroids[i]) for i in range(self.k)]
63+
distances = [
64+
euclidean_distance(old_centroids[i], centroids[i]) for i in range(self.k)
65+
]
6166
return sum(distances) == 0
6267

6368

64-
if __name__=="__main__":
69+
if __name__ == "__main__":
6570
np.random.seed(42)
66-
X, y = make_blobs(centers=3, n_samples=200, n_features=2, shuffle=True, random_state=40)
71+
X, y = make_blobs(
72+
centers=3, n_samples=200, n_features=2, shuffle=True, random_state=40
73+
)
6774

6875
k = KMeans(len(np.unique(y)))
6976
k.fit(X)

knn.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66

77
from utils import euclidean_distance
88

9+
910
class KNN:
1011
def __init__(self, k=5):
1112
self.k = k
12-
13+
1314
def fit(self, X, Y):
1415
self.X_train = X
1516
self.Y_train = Y
@@ -19,14 +20,17 @@ def predict(self, X):
1920

2021
def find_nearest_neighbors(self, x):
2122
distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
22-
nearest_neighbors_idx = np.argsort(distances)[:self.k]
23+
nearest_neighbors_idx = np.argsort(distances)[: self.k]
2324
nearest_neighbors = [self.Y_train[i] for i in nearest_neighbors_idx]
2425
return Counter(nearest_neighbors).most_common()[0][0]
2526

27+
2628
def run():
2729
dataset = datasets.load_iris()
2830
X, y = dataset.data, dataset.target
29-
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=.2, random_state=1)
31+
X_train, X_test, Y_train, Y_test = train_test_split(
32+
X, y, test_size=0.2, random_state=1
33+
)
3034

3135
knn = KNN()
3236
knn.fit(X_train, Y_train)
@@ -35,5 +39,6 @@ def run():
3539
accuracy = sum(preds == Y_test) / len(preds)
3640
print(accuracy)
3741

42+
3843
if __name__ == "__main__":
39-
run()
44+
run()

linear_regression.py

+14-9
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,23 @@
44
from sklearn.model_selection import train_test_split
55
from sklearn import datasets
66

7+
78
class LinearRegression:
8-
def __init__(self, lr=.01, n_iter=1000):
9+
def __init__(self, lr=0.01, n_iter=1000):
910
self.lr = lr
1011
self.n_iter = n_iter
11-
12+
1213
def fit(self, X, y):
1314
n_samples, n_features = X.shape
1415
self.weights = np.zeros(n_features)
1516
self.bias = 0
1617
for _ in range(self.n_iter):
1718
preds = np.dot(X, self.weights) + self.bias
18-
dw = (2 / n_samples ) * np.dot(X.T, (preds - y))
19+
dw = (2 / n_samples) * np.dot(X.T, (preds - y))
1920
db = (2 / n_samples) * sum(preds - y)
2021
self.weights -= self.lr * dw
21-
self.bias -= self.lr * db
22-
22+
self.bias -= self.lr * db
23+
2324
def predict(self, X):
2425
return np.dot(X, self.weights) + self.bias
2526

@@ -30,13 +31,17 @@ def run():
3031
The mean square error is calculated and printed, the scatter plot of the
3132
test data points along with the fitted line are also plotted.
3233
"""
33-
X, y = datasets.make_regression(n_samples=1000, n_features=1, noise=10, random_state=1)
34-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=1)
35-
34+
X, y = datasets.make_regression(
35+
n_samples=1000, n_features=1, noise=10, random_state=1
36+
)
37+
X_train, X_test, y_train, y_test = train_test_split(
38+
X, y, test_size=0.2, random_state=1
39+
)
40+
3641
lr = LinearRegression()
3742
lr.fit(X_train, y_train)
3843
preds = lr.predict(X_test)
39-
44+
4045
mse = np.mean((preds - y_test) ** 2)
4146
print(mse)
4247

logistic_regression.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55

66

77
class LogisticRegression:
8-
def __init__(self, lr=.01, n_iter=1000):
8+
def __init__(self, lr=0.01, n_iter=1000):
99
self.lr = lr
1010
self.n_iter = n_iter
11-
11+
1212
def sigmoid(self, x):
1313
return 1 / (1 + np.exp(-x))
1414

@@ -21,19 +21,20 @@ def fit(self, X, y):
2121
preds = self.sigmoid(linear_preds)
2222

2323
dw = (1 / n_samples) * np.dot(X.T, (preds - y))
24-
db = (1 / n_samples) * np.sum(preds - y)
24+
db = (1 / n_samples) * np.sum(preds - y)
2525

2626
self.weights -= self.lr * dw
2727
self.bias -= self.lr * db
28-
29-
def predict(self, X, thresh=.5):
28+
29+
def predict(self, X, thresh=0.5):
3030
linear_preds = np.dot(X, self.weights) + self.bias
3131
preds = [1 if self.sigmoid(pred) > thresh else 0 for pred in linear_preds]
3232
return preds
33-
33+
3434
def accuracy_score(self, y, preds):
3535
return sum(y == preds) / len(y)
3636

37+
3738
def run():
3839
"""
3940
Creates a dataset, splits into train and test, fits LR and tests it.
@@ -42,12 +43,13 @@ def run():
4243
"""
4344
dataset = datasets.load_breast_cancer()
4445
X, y = dataset.data, dataset.target
45-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=1)
46-
lr = LogisticRegression(lr=.001)
46+
X_train, X_test, y_train, y_test = train_test_split(
47+
X, y, test_size=0.2, random_state=1
48+
)
49+
lr = LogisticRegression(lr=0.001)
4750
lr.fit(X_train, y_train)
4851
preds = lr.predict(X_test)
4952
print(lr.accuracy_score(preds, y_test))
50-
5153

5254

5355
if __name__ == "__main__":

naive_bayes.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
from sklearn.model_selection import train_test_split
44
from sklearn import datasets
55

6+
67
def accuracy(y_true, y_pred):
78
accuracy = np.sum(y_true == y_pred) / len(y_true)
89
return accuracy
910

11+
1012
class NaiveBayes:
1113
def fit(self, X, y):
1214
self.unique_classes = np.unique(y)
@@ -18,7 +20,7 @@ def fit(self, X, y):
1820
self._priors = np.zeros(classes_num, dtype=np.float64)
1921

2022
for idx, c in enumerate(self.unique_classes):
21-
X_c = X[y == c] # gives features of class c
23+
X_c = X[y == c] # gives features of class c
2224
self._mean[idx, :] = np.mean(X_c, axis=0)
2325
self._var[idx, :] = np.var(X_c, axis=0)
2426
self._priors[idx] = len(X_c) / samples_num
@@ -35,11 +37,11 @@ def _predict(self, x):
3537
posterior = prior + likelihood
3638
posteriors.append(posterior)
3739
return np.argmax(posteriors)
38-
40+
3941
def _compute_likelihood(self, idx, x):
40-
nominator = np.exp(-(x - self._mean[idx]) ** 2 / (2 * self._var[idx]))
42+
nominator = np.exp(-((x - self._mean[idx]) ** 2) / (2 * self._var[idx]))
4143
denominator = np.sqrt(2 * np.pi * self._var[idx])
42-
return np.sum(np.log(nominator / denominator))
44+
return np.sum(np.log(nominator / denominator))
4345

4446

4547
if __name__ == "__main__":
@@ -55,4 +57,4 @@ def _compute_likelihood(self, idx, x):
5557
nb.fit(X_train, y_train)
5658
predictions = nb.predict(X_test)
5759

58-
print("Naive Bayes classification accuracy", accuracy(y_test, predictions))
60+
print("Naive Bayes classification accuracy", accuracy(y_test, predictions))

neural_net/activations.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22

33
from layers import Activation
44

5+
56
class ReLU(Activation):
67
def __init__(self):
78
def relu(X):
89
return np.maximum(X, 0)
9-
10+
1011
def relu_prime(X):
1112
return X > 0
1213

@@ -23,12 +24,13 @@ def tanh_prime(X):
2324

2425
super().__init__(tanh, tanh_prime)
2526

27+
2628
class Sigmoid(Activation):
2729
def __init__(self):
2830
def sigmoid(X):
2931
return 1 / (1 + np.exp(-X))
30-
32+
3133
def sigmoid_prime(X):
3234
return sigmoid(X) * (1 - sigmoid(X))
3335

34-
super().__init__(sigmoid, sigmoid_prime)
36+
super().__init__(sigmoid, sigmoid_prime)

0 commit comments

Comments
 (0)