-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.py
52 lines (37 loc) · 1.41 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from Utilities import Utilities
from BetaGeometric import BetaGeometric
from DPGMM import DPGMM
import matplotlib.pyplot as plt
# No. of restaurants == No. of splits
base_distribution = BetaGeometric(alpha = 1.0, beta = 2.0)
model = DPGMM(A = 0.5, R = 2, base_distribution = base_distribution)
utils = Utilities()
data_file = "data/raw_text.txt"
tokens = utils.tokenize(data_file)
splits = utils.generate_splits(no_of_splits=2, tokens=tokens)
model.fit(splits, iterations = 5)
es_proportions = model.get_estimated_mixture_proportions()
print('Estimated Proprtions')
print('----------------------')
for r in range(model.R):
print('\n')
print('For Restaurant', r)
print('----------------------')
print('Estimated no. of clusters:', model.K[r])
print('Proportions:', [round(k, 3) for k in es_proportions[r]])
for r in range(model.R):
plt.figure(figsize=(20, 5))
plt.plot(model.performance[r])
plt.title("Log-Likelihood of Restaurant " + str(r))
plt.xlabel("Number of Iteration")
plt.ylabel("log p(x_i,z_i|X_-i, Z_-i, theta)")
plt.xticks([i for i in range(1, model.iterations + 1)])
plt.show()
for r in range(model.R):
plt.figure(figsize=(20, 5))
plt.plot(model.cluster_size_per_iter[r])
plt.title("Cluster size per iteration for restaraunt " + str(r))
plt.xlabel("Number of Iteration")
plt.ylabel("Cluster Size")
plt.xticks([i for i in range(1, model.iterations + 1)])
plt.show()