-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathuser_commands.py
307 lines (266 loc) · 9.77 KB
/
user_commands.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
# -*- coding: utf-8 -*-
# Katja Konermann
# Matrikelnummer: 802658
"""
A class that handles the different modes of string matching programm.
"""
import os
from string_matching import NaiveMatching, AhoCorasickMatching
class Search:
"""
A class that represents the user commands from the command line
and handles the different modes of the string matching programm.
Attributes:
pattern (list):
List of strings, the strings that should be searched for.
input (str):
A string, can be the name of a file or a dir where the text
should be read from. To indicate a directory, use \\ or /.
To indicate a file, use a valid file extension (from INPUT_EXT).
insensitive (bool):
Indicates case insensitive mode, where text from input and
strings in pattern will be treated as lowercase. Default is False.
naive (bool):
Indicates use of the naive matching algorithm when searching
in input. Default is False.
verbose (bool):
Indicates verbose output when matching in files. Default is
False.
Properties:
input_from_file (bool):
True if input ends with valid file extension,
False otherwise
input_from_dir (bool):
True if input ends with \\ or /, False otherwise.
Methods:
run:
Print indices of patterns in input
according to instance's mode
demo:
Get a demo of different modes that can be used with
examples and output.
"""
INPUT_EXT = (".txt",)
DIRS = ("\\", "/")
DEMOS = ({"pattern": ["he"],
"input_text": "She saw her."},
{"pattern": ["she"],
"input_text": "She saw her."},
{"pattern": ["she"],
"input_text": "She saw her.",
"insensitive": True},
{"pattern": ["her", "he"],
"input_text": "She saw her."},
{"pattern": ["she", "he", "his", "her"],
"input_text": "She saw her.",
"naive": True},
{"pattern": ["she", "he", "his", "her"],
"input_text": "demo/demo1.txt"},
{"pattern": ["she", "he", "his", "her"],
"input_text": "demo/demo1.txt",
"verbose": True},
{"pattern": ["she", "he", "his", "her"],
"input_text": "demo/"}
)
def __init__(self,
pattern,
input_text,
insensitive=False,
verbose=False,
naive=False):
self.pattern = pattern
self.input = input_text
self.insensitive = insensitive
self.verbose = verbose
self.naive = naive
self._match = self._create_match()
def __str__(self):
"""String representation of instance is command line input"""
commands = "search "
if self.insensitive:
commands += "-i "
if self.verbose:
commands += "-v "
if self.naive:
commands += "-n "
if self.input_from_dir or self.input_from_file:
commands += "{} ".format(self.input)
else:
commands += '"{}" '.format(self.input)
for pattern in self.pattern:
commands += '"{}" '.format(pattern)
return commands
def _create_match(self):
"""Creates a string matching object according to instance's attribute.
If self.insensitive is True. strings in self.pattern will be converted
to lowercase when creating matching object.
Returns:
NaiveMatching object if self.naive is True,
otherwise returns AhoCorasickMatching object.
"""
if self.insensitive:
self.pattern = [pattern.lower() for pattern in self.pattern]
if self.naive:
return NaiveMatching(self.pattern)
return AhoCorasickMatching(self.pattern)
@property
def input_from_file(self):
"""Determines whether input text should be read from file.
Returns:
bool:
True if self.input ends with classes file extension.
False otherwise.
"""
if self.input.endswith(self.INPUT_EXT):
return True
return False
@property
def input_from_dir(self):
"""Determines whether input text should be read from directory.
Returns:
bool:
True if input ends with / or \\.
False otherwise.
"""
if self.input.endswith(self.DIRS):
return True
return False
@staticmethod
def _print_matches(match_dict):
"""Prints found matches.
Args:
match_dict (dict):
keys are strings, values are sets of integers.
Returns:
None.
"""
if not match_dict:
print("No matches found.")
for match in match_dict:
string_index = map(str, match_dict[match])
print("{}: {}".format(match, ",".join(string_index)))
@staticmethod
def _update_matches(matches1, matches2):
"""Update two match dictionaries by extending list of indices.
Extend lists of integers for the same key. For example, for
{"she": [1], "me": [5]} and
{"she": [9], his: [15]}
returns {"she": [1, 9], "his": [15], "me": [5]}
Args:
matches1 (dict):
A match dictionary with words as keys and lists of
integers as values.
matches2 (dict):
A match dictionary with words as keys and lists of
integers as values.
Returns:
dict: the new updated dictionary
"""
for match in matches1:
if match in matches2:
matches1[match] += matches2[match]
matches2.update(matches1)
return matches2
def _match_in_file(self, file=None):
if file is None:
file = self.input
if file.endswith(".txt"):
self._match_txt_file(file)
def _match_txt_file(self, file):
"""Finds matches for pattern in text file and prints them.
If verbose is True, prints index of matches in each line. Otherwise
prints index in file content, counting EOL characters.
Args:
file (str):
Name of a file. If default is used, it's assumed that
text should be read from self.input.
Returns:
None.
"""
with open(os.path.join(file), encoding="utf-8") as file_in:
# Line count.
count = 1
# Index count.
index = 0
matches = dict()
for line in file_in:
# Case insensitive mode.
if self.insensitive:
line = line.lower()
# Verbose mode will match in each line individually.
if self.verbose:
line_match = self._match.match_pattern(line)
if line_match:
# Save matches by line.
matches[count] = line_match
else:
# If not verbose, we need to keep track of
# current index by using the start parameter
# of match_pattern method.
new_matches = self._match.match_pattern(line,
start=index)
# Update matches
matches = self._update_matches(new_matches, matches)
index += len(line)
count += 1
# Print no matches or matches without lines.
if not matches or not self.verbose:
self._print_matches(matches)
else:
# Print matches for each line.
for line in matches:
line_str = "Line {}".format(line)
print("{:-^30}".format(line_str))
self._print_matches(matches[line])
def _match_in_dir(self):
"""Finds matches for pattern in directory and prints them.
Will only try to match in files that end with extension defined
in class variable EXT.
Returns:
None
"""
files = [file for file in os.listdir(self.input)
if file.endswith(self.INPUT_EXT)]
if files:
for file in files:
print("{:=^30}".format(file))
path = os.path.join(self.input, file)
self._match_in_file(path)
else:
print("No files with valid file extension found. "
"Valid file extensions: {}".format(",".join(self.INPUT_EXT)))
def _match_in_str(self):
"""Matches pattern in string and prints them.
Returns:
None
"""
if self.insensitive:
self.input = self.input.lower()
matches = self._match.match_pattern(self.input)
self._print_matches(matches)
def run(self):
"""
Matches pattern in input (str, file or dir) and prints indices.
Returns:
None
"""
if self.input_from_file:
self._match_in_file()
elif self.input_from_dir:
self._match_in_dir()
else:
self._match_in_str()
@classmethod
def demo(cls):
"""Demo of different functionalities of Search class"""
for demo in cls.DEMOS:
search = cls(**demo)
# Command Line input.
print("\tCall:")
print(search)
# Command line output.
print("\tOutput:")
search.run()
print()
if __name__ == "__main__":
Search.demo()