-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathlearntest.py
34 lines (23 loc) · 1.01 KB
/
learntest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from nlpio import *
from nlplearn import *
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.cross_validation import train_test_split
if __name__ == '__main__':
documents = loadDocumentsFromFile('testset.txt')
pipeline = Pipeline([
('clean',SimpleTextCleaner()),
('sentence',SentenceSplitter()),
('head',HeadlineEstimator()),
])
#parameters for cross-validation grid search go here
parameters = {
}
scorer = RougeScorer()
grid_search = GridSearchCV(pipeline, parameters, scoring=scorer, cv=(5 if parameters else 2), n_jobs=1, refit=True, verbose=3)
trainDocs,testDocs = train_test_split(documents,test_size=0.2)
grid_search.fit(documents)
print("Best score: %0.3f" % grid_search.best_score_)
best_parameters = grid_search.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
print("\t%s: %r" % (param_name, best_parameters[param_name]))