-
Notifications
You must be signed in to change notification settings - Fork 0
/
rf_gridsearch (copy).txt
61 lines (47 loc) · 2.01 KB
/
rf_gridsearch (copy).txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.metrics import matthews_corrcoef, make_scorer
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import AdaBoostClassifier
df=pd.read_csv('train_upd.csv')
df=df.sample(frac=1).reset_index(drop=True)
le = preprocessing.LabelEncoder()
mvar47 = le.fit_transform(df['ran_vendor'].astype(str))
df["ran_vendor"] = mvar47
le = preprocessing.LabelEncoder()
mvar47 = le.fit_transform(df['Congestion_Type'].astype(str))
df["Congestion_Type"] = mvar47
import datetime
temp =[]
for i in range(len(df)):
temp.append(datetime.datetime(df['par_year'][i],df['par_month'][i],df['par_day'][i]).weekday())
df["week_day"] = temp
df=df.drop(['cell_name','par_year','par_month','par_day'],axis=1)
x=df.drop(['Congestion_Type',],axis=1)
x = x.apply(pd.to_numeric)
x=(x-x.mean())/x.std()
from sklearn.decomposition import PCA
pca = PCA(n_components=15)
principalComponents = pca.fit_transform(x)
principalDf = pd.DataFrame(data = principalComponents)
y=df['Congestion_Type']
# train_x,test_x,train_y,test_y=train_test_split(principalDf,y,test_size=0.15,random_state=1)
clf=RandomForestClassifier(600,n_jobs=-1)
# clf.fit(train_x[ranking_columns[:i]],train_y)
param_grid = {"max_depth": [10,15,20,25,30],
"max_features":[2,5,7,10]}
# ada = AdaBoostClassifier(n_estimators=10, base_estimator=clf,learning_rate=0.1)
# ada.fit(train_x,train_y)
my_scorer = make_scorer(matthews_corrcoef)
grid_search = GridSearchCV(clf, param_grid=param_grid, cv=4,scoring=my_scorer,verbose=10,n_jobs = -1)
grid_search.fit(principalDf,y)
# scores = cross_val_score(clf, train_x, train_y, cv=4, verbose=1, scoring=my_scorer)
# print(scores)
# print(scores.mean())