-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfind_words.py
64 lines (50 loc) · 2.06 KB
/
find_words.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import os
from util import utils
from util import polish_slang_crawler
import crawler
import argparse
import logging
logging.basicConfig(level=logging.INFO)
LOG = logging.getLogger(__name__)
def includes_word(string, word):
"""
Sample usage: includes_word(default_string, "walewiadro")
"""
letter_index = []
current_letter = 0
for i, e in enumerate(string):
if string[i] == word[current_letter]:
current_letter += 1
letter_index.append((i, string[i]))
if current_letter >= len(word):
break
is_word_present = current_letter == len(word)
if is_word_present:
return word, letter_index
if __name__ == '__main__':
default_string = "wałpzwpowodzoadinisttojewódzkizarządmelioracjiiurządzeńwodnywroc" \
"ławulmatejki5aazrozkopywaniawbijanisszkadzaadarninyiinnychnpodstaapaez"
parser = argparse.ArgumentParser()
parser.add_argument("--input", default=default_string, help="input string")
parser.add_argument("--dict", default="resources/slangs.txt", help="dictionary file path (optional)")
# if slangs not present, then crawl
parser.add_argument("--out", default="resources/output.txt", help="output file path (optional)")
args = parser.parse_args()
cmd_line_args = list()
cmd_line_args.append(args.input)
cmd_line_args.append(args.dict)
cmd_line_args.append(args.out)
LOG.debug("Parsed args: %s", cmd_line_args)
input_string, dictionary_path, output_path = cmd_line_args
if not os.path.isfile(dictionary_path):
polish_slang_crawler.crawl_and_save(dictionary_path)
lines = utils.read_array(dictionary_path)
output_file = open(output_path, "w+")
for line in lines:
output = includes_word(input_string, line.strip().lower().replace(" ", ""))
if output:
word, indices = output
output_file.write(line.strip() + " " + str(indices) + "\n")
# START CRAWLING AGAIN
# all_slangs = crawler.crawl_slangs()
# utils.save_array(all_slangs, 'resources/slangs.txt')