-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtagger.py
35 lines (28 loc) · 1.07 KB
/
tagger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import time
# in-house libs
import string_util
class Tagger(object):
def __init__(self):
self.su = string_util.StringUtil()
def tag_for_sentence(sentence):
words = sentence.decode('utf-8').strip().split()
tags = []
for word in words:
if len(word) > 1:
tags.append('b')
for char in word[1:(len(word) - 1)]:
tags.append('m')
tags.append('e')
else:
tags.append('s')
return tags
def tag_for_file(self, input_path, output_path):
print 'Tagging for %s and output to %s...' % (input_path, output_path)
start = time.time()
with open(output_path, "w+") as output_file:
for line in open(input_path, "r").readlines():
line = line.strip().decode("utf-8")
tags = self.tag_for_sentence(line)
for tag in tags:
output_file.writelines(tag + self.su.NEWLINE)
print 'Done. Total time taken %d seconds' % (time.time() - start)