-
Notifications
You must be signed in to change notification settings - Fork 1
/
typo.py
executable file
·48 lines (36 loc) · 1017 Bytes
/
typo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Generates misspelled words where misspellings can be:
* Upper case letters
* Repeated letters
* Wrong vowel
Usage:
./typo.py
./typo.py | ./spellcheck.py | grep "NO SUGGESTION"
Requirements:
Python 2.7
Uses /usr/share/dict/words for word list
@author: kristi
"""
from random import randint, choice, random
wordfile = "/usr/share/dict/words"
words = [line.strip().lower() for line in open(wordfile)]
num_words = len(words)
VOWEL = 'aeiouy'
def misspell(letter):
if letter in VOWEL and random() < 0.5:
letter = choice(VOWEL)
if random() < 0.2:
letter = letter.upper()
return letter * randint(1, 4)
if __name__ == "__main__":
for i in xrange(5000):
word = choice(words)
letters = list(word)
for i in xrange(len(letters)):
if random() < 0.3:
c = letters[i]
letters[i] = misspell(c)
print ''.join(letters)
print word