-
Notifications
You must be signed in to change notification settings - Fork 5
/
plot_wordlength_frequencies.py
79 lines (62 loc) · 1.96 KB
/
plot_wordlength_frequencies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import matplotlib
# matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import pickle
from reading_common import get_stimulus_text_from_file
#function to get all indices of a value from a list
def all_indices(value, qlist):
indices = []
idx = -1
while True:
try:
idx = qlist.index(value, idx+1)
indices.append(idx)
except ValueError:
break
return indices
def draw_boxplot(classes_array,values_array,ax):
unique_classes = np.unique(fixation_position_lengths)
boxplot_values_list = []
for class_name in unique_classes:
indexes = all_indices(class_name,classes_array)
values = []
for value_index in indexes:
values.append(values_array[value_index])
boxplot_values_list.append(values)
plt.boxplot(boxplot_values_list)
plt.xticks(unique_classes)
plt.ylim(max( min(values_array) *0.8, 0), max(values_array)*1.25 )
input_text_filename = "texts/POS.txt"
textfile=get_stimulus_text_from_file(input_text_filename)
input_text_filename = "texts/descartes.txt"
textfile2=get_stimulus_text_from_file(input_text_filename)
individual_words = []
textsplitbyspace = textfile.split(" ")
for word in textsplitbyspace:
if word.strip()!="":
individual_words.append(word.strip())
word_lengths = []
for word in individual_words:
word_lengths.append(len(word))
individual_words = []
textsplitbyspace = textfile2.split(" ")
for word in textsplitbyspace:
if word.strip()!="":
individual_words.append(word.strip())
word_lengths2 = []
for word in individual_words:
word_lengths2.append(len(word))
minx = 0
maxx=18
binwidth =3
ax = plt.subplot(121)
ax.set_title("english word length frequencies")
ax.hist(word_lengths,bins=np.arange(0,20,binwidth),normed=1)
ax.axis([minx, maxx, 0, 0.25])
ax = plt.subplot(122)
ax.set_title("dutch word length frequencies")
ax.axis([minx, maxx, 0, 0.25])
ax.hist(word_lengths2,bins=np.arange(0,20,binwidth),normed=1)
plt.show()