-
Notifications
You must be signed in to change notification settings - Fork 0
/
mlr.py
46 lines (35 loc) · 1.23 KB
/
mlr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import math
data = pd.read_csv('taxi.csv')
X = data.iloc[:,:-1].values
y = data.iloc[:,-1].values
print(data.describe())
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3, random_state = 0 )
from sklearn.linear_model import LinearRegression
lreg = LinearRegression()
lreg.fit(X_train,y_train)
lreg.score(X_train,y_train)
#predict the output
y_pred = lreg.predict(X_test)
#r2_score
from sklearn.metrics import r2_score
score = r2_score(y_test,y_pred)
#calculate mse
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test,y_pred)
#calculate rmse
rmse = math.sqrt(mse)
#perform cross validate
from sklearn.model_selection import cross_validate
scores = cross_validate(lreg, X, y, cv=3,scoring=('r2', 'neg_mean_squared_error'),
return_train_score=True)
sc1=scores['test_neg_mean_squared_error']
sc2 = scores['train_r2'].mean()
pickle.dump(lreg, open('taxi.pkl','wb')) #dump the lreg in taxi.pkl
model = pickle.load(open('taxi.pkl','rb')) #load the pickle file
print(model.predict([[80,1770000,6000,85]]))