-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathprint_topics.py
35 lines (25 loc) · 1009 Bytes
/
print_topics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import numpy
import re
import gensim
import sys
path = sys.argv[1]
# m = gensim.models.ldamodel.LdaModel.load(path)
m = gensim.models.wrappers.LdaMallet.load(path)
def get_topic_terms(model, topicid, topn, id2token):
"""
Return a list of `(word_id, probability)` 2-tuples for the most
probable words in topic `topicid`.
Only return 2-tuples for the topn most probable words (ignore the rest).
"""
topic = model.state.get_lambda()[topicid]
topic = topic / topic.sum() # normalize to probability distribution
bestn = numpy.argsort(topic)[::-1][:topn]
return [id2token[id] for id in bestn]
# for i in xrange(m.num_topics):
# print(' '.join(get_topic_terms(m, i, 10, m.id2word)))
for i, lst in enumerate(m.show_topics(num_topics=-1, formatted=False)):
print '{}: {}'.format(i, ' '.join([w for _, w in lst]))
# s = '\n'.join([' '.join([w for _, w in lst])
# for lst in m.show_topics(num_topics=-1, formatted=False)]
# )
# print s