-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfile_spellchecker.py
40 lines (35 loc) · 1.68 KB
/
file_spellchecker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import multiprocessing
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from dictionary import Dictionary
from string import punctuation
class FileSpellchecker:
def __init__(self, path):
self.dictionary = Dictionary()
self.path = path
def get_options(self):
with open(Path.cwd() / self.path, 'r', encoding='utf-8') as f:
options = dict()
for text in f:
words = text.translate(str.maketrans('', '', punctuation)).split()
cpu_count = multiprocessing.cpu_count()
with ThreadPoolExecutor() as executor:
count_parts_list = 1
if len(words) > cpu_count:
count_parts_list = len(words) // cpu_count + 1
start_ind_next_part = 0
length_part_list = len(words) // count_parts_list
for _ in range(count_parts_list):
for word, word_options in \
zip(words[start_ind_next_part:length_part_list],
executor.map(self._get_options,
words[start_ind_next_part:length_part_list])):
if not word_options is None:
options[word] = word_options
start_ind_next_part += length_part_list
length_part_list += length_part_list
return options
def _get_options(self, word):
if not word.isnumeric() \
and not self.dictionary.check_word_in_dictionary(word):
return self.dictionary.get_candidates(word)