-
Notifications
You must be signed in to change notification settings - Fork 1
/
n_grams.py
12 lines (11 loc) · 917 Bytes
/
n_grams.py
1
2
3
4
5
6
7
8
9
10
11
12
# two_grams = ["th", "he", "in", "er", "an", "re", "on", "at", "en", "nd", "ti", "es", "or", "te", "of", "ed"]
# three_grams = ["ing", "ion", "tio", "ent", "ati", "for", "her", "ter", "hat", "tha", "ere", "ate", "his", "con", "res", "ver", "all", "ons", "nce", "men", "ith"]
# four_grams = ["tion", "atio", "that", "ther", "with", "ment", "ions", "this", "here", "from", "ould", "ting", "hich", "whic", "ctio", "ence", "have", "othe", "ight", "sion", "ever", "ical", "they", "inte", "ough", "ance"]
# five_grams = ["ation", "ration", "ction", "ement", "inter", "ition", "ectio"]
# words = ["make", "how", "one", "water", "write", "number", "many", "people", "have", "about", "into", "because", "were", "under", "state", "count", "these", "thing", "there", "their", "which", "other", "the", "and"]
# gram_word_frequencies = {
# two_grams: {},
# three_grams: {},
# four_grams: {},
# five_grams: {}
# }