-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
98 lines (80 loc) · 4.36 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from parser import get_parser
from pipeline import TrainPipeline, PredPipeline, TestPipeline
from utils import check_path
import random
import argparse
import sys
random.seed(1337)
if sys.version_info[0] != 3 or sys.version_info[1] < 4 or sys.version_info[2] < 3:
print("This script requires at least Python version 3.4.3")
#sys.exit(1)
def main():
parser = argparse.ArgumentParser(description='Run Neural Semantic Role Labeling')
parser.add_argument('action', choices=['train', 'test', 'predict'])
parser.add_argument('input_file', type=argparse.FileType('r'))
parser.add_argument('file_format', default='conll2009', choices=['conll2008', 'conll2009'])
parser.add_argument('output_folder', nargs='?', default='./output/', type=check_path)
parser.add_argument('model_file', nargs='?', type=argparse.FileType('r'))
args = parser.parse_args()
if args.action != 'train' and args.model_file is None:
raise AttributeError('Need a trined model to predict or test')
action = args.action
input_file = args.input_file.name
file_format = args.file_format
output_folder = args.output_folder
if action == 'train':
train(input_file, file_format, output_folder)
if action == 'test':
model_file = args.model_file.name
test(input_file, file_format, output_folder, model_file)
if action == 'predict':
model_file = args.model_file.name
predict(input_file, file_format, output_folder, model_file)
def train(input_file_gold, input_file_format, output_path):
"""
Reads the input_file into a parsed_text object to forward this to the training pipeline.
The training pipeline earns a model from the gold labels of input file. Writes trained model to output_path.
:param input_file_gold: CoNLL annotated input file with gold labels
:param input_file_format: type of CoNLL annotation: 'conll2008' or 'conll2009'
:param output_path: model will be written into this folder
:return: writes trained model into output_path
"""
parser_gold = get_parser(input_file_format, gold=True, language='eng')
parsed_text_gold = parser_gold.get_parsed_text(input_file_gold)
pipeline = TrainPipeline(parsed_text_gold, output_path)
pipeline.start()
def test(input_file, input_file_format, output_path, trained_model_file):
"""
Reads the input_file into a parsed_text object to forward this to the testing pipeline.
The testing pipeline uses the trained model to predict predicates and arguments in input_file and
evaluates them with the CoNLL-Scorer against the gold labels of input_file.
Writes a *.RESULTS *.PRED and *.GOLD to output_path
:param input_file: CoNLL annotated input file with gold labels
:param input_file_format: type of CoNLL annotation: 'conll2008' or 'conll2009'
:param output_path: model will be written into this folder
:param trained_model_file: trained model from training step (TrainPipeline)
:return: writes output of CoNLL-Scorer (*.RESULTS), *.PRED, *.GOLD to output_path
"""
parser_gold = get_parser(input_file_format, gold=True, language='eng')
parsed_text_gold = parser_gold.get_parsed_text(input_file)
parser_input = get_parser(input_file_format, gold=False, language='eng')
parsed_text_test = parser_input.get_parsed_text(input_file)
pipeline = TestPipeline(parsed_text_test, parsed_text_gold, trained_model_file, output_path)
pipeline.start()
def predict(input_file, input_file_format, output_path, trained_model_file):
"""
Reads the input_file into a parsed_text object to forward this to the predicting pipeline.
The predicting pipeline uses the trained model to predict predicates and arguments in input_file and writes
them in a *.PRED file to output_path.
:param input_file: CoNLL annotated input file (no gold labels needed)
:param input_file_format: type of CoNLL annotation: 'conll2008' or 'conll2009'
:param output_path: model will be written into this folder
:param trained_model_file: trained model from training step (TrainPipeline)
:return: writes predicted predicated and arguments (*.PRED) to output_path
"""
parser = get_parser(input_file_format, gold=True, language='eng')
parsed_text = parser.get_parsed_text(input_file)
pipeline = PredPipeline(parsed_text, trained_model_file, output_path)
pipeline.start()
if __name__ == '__main__':
main()