-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathAlgo.py
55 lines (46 loc) · 1.53 KB
/
Algo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import numpy as np
import random
from sklearn.datasets.samples_generator import make_regression
from scipy import stats
import pandas as pd
def gradient_descent_2(alpha, x, y, numIterations):
m = x.shape[0] # number of samples
theta = np.ones(2)
x_transpose = x.transpose()
for iter in range(0, numIterations):
hypothesis = np.dot(x, theta)
loss = hypothesis - y
J = np.sum(loss ** 2) / (2 * m) # cost
print "iter %s | J: %.3f" % (iter, J)
gradient = np.dot(x_transpose, loss) / m
theta = theta - alpha * gradient # update
return theta
if __name__ == '__main__':
# x, y = make_regression(n_samples=100, n_features=1, n_informative=1,
# random_state=0, noise=35)
x = []
y = []
# lol = []
for a in pd.read_csv('Train/Train_Combine.csv', chunksize=656):
df = pd.DataFrame(data=a)
for index, row in df.iterrows():
lol = []
lol.append(row['T'])
y.append(row['PM 2.5'])
x.append(lol)
# print len(lol)
# print len(y)
# print x
m, n = np.shape(x)
x = np.c_[np.ones(m), x] # insert column
# print x
alpha = 0.01 # learning rate
theta = gradient_descent_2(alpha, x, y, 1000)
lol = []
for a in pd.read_csv('Test/Test_Combine.csv', chunksize=656):
df = pd.DataFrame(data=a)
for index, row in df.iterrows():
lol.append(row['T'])
X2 = np.array(lol)
for i in xrange(X2.shape[0]):
print np.dot(X2[i], theta)