Skip to content

Commit

Permalink
Example codes for GibbsLDA #4
Browse files Browse the repository at this point in the history
  • Loading branch information
dongwookim-ml committed Feb 9, 2016
1 parent 5956326 commit 7c15e06
Show file tree
Hide file tree
Showing 6 changed files with 436 additions and 23 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,5 @@ result/

# test folder
test/
logs/
notebook/.ipynb_checkpoints
388 changes: 388 additions & 0 deletions notebook/GibbsLDA_example.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,388 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Example of GibbsLDA"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This example requires to install three nltk corpora:nltk.corpus.reuters, nltk.corpus.words, nltk.corpus.stopwords.\n",
"\n",
"You can download the corpora via `nltk.download()`"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"from ptm import GibbsLDA\n",
"from ptm.nltk_corpus import get_reuters_cnt_ids\n",
"from ptm.utils import convert_cnt_to_list, get_top_words"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Loading Reuter corpus from NLTK"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load reuter corpus including 1000 documents with maximum vocabulary size of 10000 from NLTK corpus"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Vocabulary size:4654\n"
]
}
],
"source": [
"n_doc = 1000\n",
"voca, doc_ids, doc_cnt = get_reuters_cnt_ids(num_doc=n_doc, max_voca=10000)\n",
"docs = convert_cnt_to_list(doc_ids, doc_cnt)\n",
"print('Vocabulary size:%d' % len(voca))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Inferencen through the Gibbs sampling"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2016-02-10 10:39:05 INFO:GibbsLDA:[ITER] 0, 0.95, -492103.33\n",
"INFO:GibbsLDA:[ITER] 0, 0.95, -492103.33\n",
"2016-02-10 10:39:06 INFO:GibbsLDA:[ITER] 1, 0.97, -449060.17\n",
"INFO:GibbsLDA:[ITER] 1, 0.97, -449060.17\n",
"2016-02-10 10:39:07 INFO:GibbsLDA:[ITER] 2, 0.99, -425090.80\n",
"INFO:GibbsLDA:[ITER] 2, 0.99, -425090.80\n",
"2016-02-10 10:39:08 INFO:GibbsLDA:[ITER] 3, 1.04, -409972.35\n",
"INFO:GibbsLDA:[ITER] 3, 1.04, -409972.35\n",
"2016-02-10 10:39:09 INFO:GibbsLDA:[ITER] 4, 0.98, -399907.10\n",
"INFO:GibbsLDA:[ITER] 4, 0.98, -399907.10\n",
"2016-02-10 10:39:10 INFO:GibbsLDA:[ITER] 5, 0.96, -392280.97\n",
"INFO:GibbsLDA:[ITER] 5, 0.96, -392280.97\n",
"2016-02-10 10:39:11 INFO:GibbsLDA:[ITER] 6, 0.97, -387048.46\n",
"INFO:GibbsLDA:[ITER] 6, 0.97, -387048.46\n",
"2016-02-10 10:39:12 INFO:GibbsLDA:[ITER] 7, 1.03, -383034.02\n",
"INFO:GibbsLDA:[ITER] 7, 1.03, -383034.02\n",
"2016-02-10 10:39:13 INFO:GibbsLDA:[ITER] 8, 1.01, -378981.12\n",
"INFO:GibbsLDA:[ITER] 8, 1.01, -378981.12\n",
"2016-02-10 10:39:14 INFO:GibbsLDA:[ITER] 9, 0.97, -376289.35\n",
"INFO:GibbsLDA:[ITER] 9, 0.97, -376289.35\n",
"2016-02-10 10:39:15 INFO:GibbsLDA:[ITER] 10, 0.99, -374223.91\n",
"INFO:GibbsLDA:[ITER] 10, 0.99, -374223.91\n",
"2016-02-10 10:39:16 INFO:GibbsLDA:[ITER] 11, 0.96, -371534.23\n",
"INFO:GibbsLDA:[ITER] 11, 0.96, -371534.23\n",
"2016-02-10 10:39:17 INFO:GibbsLDA:[ITER] 12, 0.95, -369412.53\n",
"INFO:GibbsLDA:[ITER] 12, 0.95, -369412.53\n",
"2016-02-10 10:39:18 INFO:GibbsLDA:[ITER] 13, 0.98, -368282.84\n",
"INFO:GibbsLDA:[ITER] 13, 0.98, -368282.84\n",
"2016-02-10 10:39:19 INFO:GibbsLDA:[ITER] 14, 0.96, -367202.90\n",
"INFO:GibbsLDA:[ITER] 14, 0.96, -367202.90\n",
"2016-02-10 10:39:20 INFO:GibbsLDA:[ITER] 15, 0.96, -365800.45\n",
"INFO:GibbsLDA:[ITER] 15, 0.96, -365800.45\n",
"2016-02-10 10:39:21 INFO:GibbsLDA:[ITER] 16, 0.95, -364384.21\n",
"INFO:GibbsLDA:[ITER] 16, 0.95, -364384.21\n",
"2016-02-10 10:39:22 INFO:GibbsLDA:[ITER] 17, 0.98, -363365.58\n",
"INFO:GibbsLDA:[ITER] 17, 0.98, -363365.58\n",
"2016-02-10 10:39:23 INFO:GibbsLDA:[ITER] 18, 0.96, -362391.93\n",
"INFO:GibbsLDA:[ITER] 18, 0.96, -362391.93\n",
"2016-02-10 10:39:24 INFO:GibbsLDA:[ITER] 19, 1.00, -361615.87\n",
"INFO:GibbsLDA:[ITER] 19, 1.00, -361615.87\n",
"2016-02-10 10:39:25 INFO:GibbsLDA:[ITER] 20, 1.05, -360838.68\n",
"INFO:GibbsLDA:[ITER] 20, 1.05, -360838.68\n",
"2016-02-10 10:39:26 INFO:GibbsLDA:[ITER] 21, 1.02, -360048.97\n",
"INFO:GibbsLDA:[ITER] 21, 1.02, -360048.97\n",
"2016-02-10 10:39:27 INFO:GibbsLDA:[ITER] 22, 1.01, -359899.43\n",
"INFO:GibbsLDA:[ITER] 22, 1.01, -359899.43\n",
"2016-02-10 10:39:28 INFO:GibbsLDA:[ITER] 23, 0.99, -359558.00\n",
"INFO:GibbsLDA:[ITER] 23, 0.99, -359558.00\n",
"2016-02-10 10:39:29 INFO:GibbsLDA:[ITER] 24, 0.98, -359132.38\n",
"INFO:GibbsLDA:[ITER] 24, 0.98, -359132.38\n",
"2016-02-10 10:39:30 INFO:GibbsLDA:[ITER] 25, 1.06, -358597.54\n",
"INFO:GibbsLDA:[ITER] 25, 1.06, -358597.54\n",
"2016-02-10 10:39:31 INFO:GibbsLDA:[ITER] 26, 1.05, -358172.53\n",
"INFO:GibbsLDA:[ITER] 26, 1.05, -358172.53\n",
"2016-02-10 10:39:32 INFO:GibbsLDA:[ITER] 27, 0.97, -357632.83\n",
"INFO:GibbsLDA:[ITER] 27, 0.97, -357632.83\n",
"2016-02-10 10:39:33 INFO:GibbsLDA:[ITER] 28, 0.97, -357059.67\n",
"INFO:GibbsLDA:[ITER] 28, 0.97, -357059.67\n",
"2016-02-10 10:39:34 INFO:GibbsLDA:[ITER] 29, 1.00, -356616.19\n",
"INFO:GibbsLDA:[ITER] 29, 1.00, -356616.19\n",
"2016-02-10 10:39:35 INFO:GibbsLDA:[ITER] 30, 1.03, -356614.06\n",
"INFO:GibbsLDA:[ITER] 30, 1.03, -356614.06\n",
"2016-02-10 10:39:36 INFO:GibbsLDA:[ITER] 31, 0.96, -356382.31\n",
"INFO:GibbsLDA:[ITER] 31, 0.96, -356382.31\n",
"2016-02-10 10:39:37 INFO:GibbsLDA:[ITER] 32, 1.01, -355719.01\n",
"INFO:GibbsLDA:[ITER] 32, 1.01, -355719.01\n",
"2016-02-10 10:39:38 INFO:GibbsLDA:[ITER] 33, 1.04, -355607.53\n",
"INFO:GibbsLDA:[ITER] 33, 1.04, -355607.53\n",
"2016-02-10 10:39:39 INFO:GibbsLDA:[ITER] 34, 1.02, -355128.47\n",
"INFO:GibbsLDA:[ITER] 34, 1.02, -355128.47\n",
"2016-02-10 10:39:40 INFO:GibbsLDA:[ITER] 35, 0.98, -354985.22\n",
"INFO:GibbsLDA:[ITER] 35, 0.98, -354985.22\n",
"2016-02-10 10:39:41 INFO:GibbsLDA:[ITER] 36, 1.02, -354992.00\n",
"INFO:GibbsLDA:[ITER] 36, 1.02, -354992.00\n",
"2016-02-10 10:39:42 INFO:GibbsLDA:[ITER] 37, 0.97, -354565.29\n",
"INFO:GibbsLDA:[ITER] 37, 0.97, -354565.29\n",
"2016-02-10 10:39:43 INFO:GibbsLDA:[ITER] 38, 1.00, -354011.48\n",
"INFO:GibbsLDA:[ITER] 38, 1.00, -354011.48\n",
"2016-02-10 10:39:44 INFO:GibbsLDA:[ITER] 39, 1.00, -353808.40\n",
"INFO:GibbsLDA:[ITER] 39, 1.00, -353808.40\n",
"2016-02-10 10:39:46 INFO:GibbsLDA:[ITER] 40, 1.01, -353243.78\n",
"INFO:GibbsLDA:[ITER] 40, 1.01, -353243.78\n",
"2016-02-10 10:39:47 INFO:GibbsLDA:[ITER] 41, 1.03, -353491.12\n",
"INFO:GibbsLDA:[ITER] 41, 1.03, -353491.12\n",
"2016-02-10 10:39:48 INFO:GibbsLDA:[ITER] 42, 1.00, -353579.48\n",
"INFO:GibbsLDA:[ITER] 42, 1.00, -353579.48\n",
"2016-02-10 10:39:49 INFO:GibbsLDA:[ITER] 43, 0.97, -353030.64\n",
"INFO:GibbsLDA:[ITER] 43, 0.97, -353030.64\n",
"2016-02-10 10:39:50 INFO:GibbsLDA:[ITER] 44, 0.96, -352786.36\n",
"INFO:GibbsLDA:[ITER] 44, 0.96, -352786.36\n",
"2016-02-10 10:39:51 INFO:GibbsLDA:[ITER] 45, 0.99, -352842.34\n",
"INFO:GibbsLDA:[ITER] 45, 0.99, -352842.34\n",
"2016-02-10 10:39:52 INFO:GibbsLDA:[ITER] 46, 1.02, -352905.42\n",
"INFO:GibbsLDA:[ITER] 46, 1.02, -352905.42\n",
"2016-02-10 10:39:53 INFO:GibbsLDA:[ITER] 47, 1.05, -352675.95\n",
"INFO:GibbsLDA:[ITER] 47, 1.05, -352675.95\n",
"2016-02-10 10:39:54 INFO:GibbsLDA:[ITER] 48, 1.01, -352287.79\n",
"INFO:GibbsLDA:[ITER] 48, 1.01, -352287.79\n",
"2016-02-10 10:39:55 INFO:GibbsLDA:[ITER] 49, 1.01, -352322.47\n",
"INFO:GibbsLDA:[ITER] 49, 1.01, -352322.47\n",
"2016-02-10 10:39:56 INFO:GibbsLDA:[ITER] 50, 0.99, -352085.49\n",
"INFO:GibbsLDA:[ITER] 50, 0.99, -352085.49\n",
"2016-02-10 10:39:57 INFO:GibbsLDA:[ITER] 51, 1.01, -352525.78\n",
"INFO:GibbsLDA:[ITER] 51, 1.01, -352525.78\n",
"2016-02-10 10:39:58 INFO:GibbsLDA:[ITER] 52, 0.98, -352468.89\n",
"INFO:GibbsLDA:[ITER] 52, 0.98, -352468.89\n",
"2016-02-10 10:39:59 INFO:GibbsLDA:[ITER] 53, 0.97, -351834.87\n",
"INFO:GibbsLDA:[ITER] 53, 0.97, -351834.87\n",
"2016-02-10 10:40:00 INFO:GibbsLDA:[ITER] 54, 1.01, -351916.33\n",
"INFO:GibbsLDA:[ITER] 54, 1.01, -351916.33\n",
"2016-02-10 10:40:01 INFO:GibbsLDA:[ITER] 55, 1.04, -352065.68\n",
"INFO:GibbsLDA:[ITER] 55, 1.04, -352065.68\n",
"2016-02-10 10:40:02 INFO:GibbsLDA:[ITER] 56, 1.03, -351816.31\n",
"INFO:GibbsLDA:[ITER] 56, 1.03, -351816.31\n",
"2016-02-10 10:40:03 INFO:GibbsLDA:[ITER] 57, 1.09, -351507.51\n",
"INFO:GibbsLDA:[ITER] 57, 1.09, -351507.51\n",
"2016-02-10 10:40:04 INFO:GibbsLDA:[ITER] 58, 1.04, -351347.31\n",
"INFO:GibbsLDA:[ITER] 58, 1.04, -351347.31\n",
"2016-02-10 10:40:05 INFO:GibbsLDA:[ITER] 59, 0.96, -351306.13\n",
"INFO:GibbsLDA:[ITER] 59, 0.96, -351306.13\n",
"2016-02-10 10:40:06 INFO:GibbsLDA:[ITER] 60, 1.00, -351042.19\n",
"INFO:GibbsLDA:[ITER] 60, 1.00, -351042.19\n",
"2016-02-10 10:40:07 INFO:GibbsLDA:[ITER] 61, 1.06, -351417.68\n",
"INFO:GibbsLDA:[ITER] 61, 1.06, -351417.68\n",
"2016-02-10 10:40:08 INFO:GibbsLDA:[ITER] 62, 0.96, -351318.16\n",
"INFO:GibbsLDA:[ITER] 62, 0.96, -351318.16\n",
"2016-02-10 10:40:09 INFO:GibbsLDA:[ITER] 63, 0.97, -351301.51\n",
"INFO:GibbsLDA:[ITER] 63, 0.97, -351301.51\n",
"2016-02-10 10:40:10 INFO:GibbsLDA:[ITER] 64, 0.97, -350956.96\n",
"INFO:GibbsLDA:[ITER] 64, 0.97, -350956.96\n",
"2016-02-10 10:40:11 INFO:GibbsLDA:[ITER] 65, 0.98, -350700.36\n",
"INFO:GibbsLDA:[ITER] 65, 0.98, -350700.36\n",
"2016-02-10 10:40:12 INFO:GibbsLDA:[ITER] 66, 1.05, -350579.33\n",
"INFO:GibbsLDA:[ITER] 66, 1.05, -350579.33\n",
"2016-02-10 10:40:13 INFO:GibbsLDA:[ITER] 67, 0.99, -350598.48\n",
"INFO:GibbsLDA:[ITER] 67, 0.99, -350598.48\n",
"2016-02-10 10:40:14 INFO:GibbsLDA:[ITER] 68, 0.99, -350757.37\n",
"INFO:GibbsLDA:[ITER] 68, 0.99, -350757.37\n",
"2016-02-10 10:40:15 INFO:GibbsLDA:[ITER] 69, 1.09, -350525.69\n",
"INFO:GibbsLDA:[ITER] 69, 1.09, -350525.69\n",
"2016-02-10 10:40:16 INFO:GibbsLDA:[ITER] 70, 1.11, -350516.66\n",
"INFO:GibbsLDA:[ITER] 70, 1.11, -350516.66\n",
"2016-02-10 10:40:17 INFO:GibbsLDA:[ITER] 71, 0.99, -350455.19\n",
"INFO:GibbsLDA:[ITER] 71, 0.99, -350455.19\n",
"2016-02-10 10:40:19 INFO:GibbsLDA:[ITER] 72, 1.03, -350499.05\n",
"INFO:GibbsLDA:[ITER] 72, 1.03, -350499.05\n",
"2016-02-10 10:40:20 INFO:GibbsLDA:[ITER] 73, 1.08, -349934.15\n",
"INFO:GibbsLDA:[ITER] 73, 1.08, -349934.15\n",
"2016-02-10 10:40:21 INFO:GibbsLDA:[ITER] 74, 1.03, -349892.53\n",
"INFO:GibbsLDA:[ITER] 74, 1.03, -349892.53\n",
"2016-02-10 10:40:22 INFO:GibbsLDA:[ITER] 75, 1.09, -350195.13\n",
"INFO:GibbsLDA:[ITER] 75, 1.09, -350195.13\n",
"2016-02-10 10:40:23 INFO:GibbsLDA:[ITER] 76, 1.04, -350010.16\n",
"INFO:GibbsLDA:[ITER] 76, 1.04, -350010.16\n",
"2016-02-10 10:40:24 INFO:GibbsLDA:[ITER] 77, 0.97, -349705.84\n",
"INFO:GibbsLDA:[ITER] 77, 0.97, -349705.84\n",
"2016-02-10 10:40:25 INFO:GibbsLDA:[ITER] 78, 1.01, -349988.55\n",
"INFO:GibbsLDA:[ITER] 78, 1.01, -349988.55\n",
"2016-02-10 10:40:26 INFO:GibbsLDA:[ITER] 79, 1.07, -349738.13\n",
"INFO:GibbsLDA:[ITER] 79, 1.07, -349738.13\n",
"2016-02-10 10:40:27 INFO:GibbsLDA:[ITER] 80, 1.10, -349867.56\n",
"INFO:GibbsLDA:[ITER] 80, 1.10, -349867.56\n",
"2016-02-10 10:40:28 INFO:GibbsLDA:[ITER] 81, 1.04, -350055.30\n",
"INFO:GibbsLDA:[ITER] 81, 1.04, -350055.30\n",
"2016-02-10 10:40:29 INFO:GibbsLDA:[ITER] 82, 1.05, -349537.14\n",
"INFO:GibbsLDA:[ITER] 82, 1.05, -349537.14\n",
"2016-02-10 10:40:30 INFO:GibbsLDA:[ITER] 83, 1.07, -349465.11\n",
"INFO:GibbsLDA:[ITER] 83, 1.07, -349465.11\n",
"2016-02-10 10:40:31 INFO:GibbsLDA:[ITER] 84, 1.05, -349325.52\n",
"INFO:GibbsLDA:[ITER] 84, 1.05, -349325.52\n",
"2016-02-10 10:40:32 INFO:GibbsLDA:[ITER] 85, 1.08, -349304.29\n",
"INFO:GibbsLDA:[ITER] 85, 1.08, -349304.29\n",
"2016-02-10 10:40:34 INFO:GibbsLDA:[ITER] 86, 1.08, -349455.13\n",
"INFO:GibbsLDA:[ITER] 86, 1.08, -349455.13\n",
"2016-02-10 10:40:35 INFO:GibbsLDA:[ITER] 87, 1.07, -349244.57\n",
"INFO:GibbsLDA:[ITER] 87, 1.07, -349244.57\n",
"2016-02-10 10:40:36 INFO:GibbsLDA:[ITER] 88, 1.10, -349169.23\n",
"INFO:GibbsLDA:[ITER] 88, 1.10, -349169.23\n",
"2016-02-10 10:40:37 INFO:GibbsLDA:[ITER] 89, 1.05, -349327.64\n",
"INFO:GibbsLDA:[ITER] 89, 1.05, -349327.64\n",
"2016-02-10 10:40:38 INFO:GibbsLDA:[ITER] 90, 1.06, -349342.96\n",
"INFO:GibbsLDA:[ITER] 90, 1.06, -349342.96\n",
"2016-02-10 10:40:39 INFO:GibbsLDA:[ITER] 91, 1.02, -348635.82\n",
"INFO:GibbsLDA:[ITER] 91, 1.02, -348635.82\n",
"2016-02-10 10:40:40 INFO:GibbsLDA:[ITER] 92, 1.06, -349333.52\n",
"INFO:GibbsLDA:[ITER] 92, 1.06, -349333.52\n",
"2016-02-10 10:40:41 INFO:GibbsLDA:[ITER] 93, 1.02, -349200.81\n",
"INFO:GibbsLDA:[ITER] 93, 1.02, -349200.81\n",
"2016-02-10 10:40:42 INFO:GibbsLDA:[ITER] 94, 1.00, -348994.61\n",
"INFO:GibbsLDA:[ITER] 94, 1.00, -348994.61\n",
"2016-02-10 10:40:43 INFO:GibbsLDA:[ITER] 95, 1.05, -348928.84\n",
"INFO:GibbsLDA:[ITER] 95, 1.05, -348928.84\n",
"2016-02-10 10:40:44 INFO:GibbsLDA:[ITER] 96, 1.02, -348479.73\n",
"INFO:GibbsLDA:[ITER] 96, 1.02, -348479.73\n",
"2016-02-10 10:40:45 INFO:GibbsLDA:[ITER] 97, 1.02, -348653.66\n",
"INFO:GibbsLDA:[ITER] 97, 1.02, -348653.66\n",
"2016-02-10 10:40:46 INFO:GibbsLDA:[ITER] 98, 1.05, -348446.77\n",
"INFO:GibbsLDA:[ITER] 98, 1.05, -348446.77\n",
"2016-02-10 10:40:47 INFO:GibbsLDA:[ITER] 99, 1.03, -348462.25\n",
"INFO:GibbsLDA:[ITER] 99, 1.03, -348462.25\n"
]
}
],
"source": [
"max_iter=100\n",
"n_topic=30\n",
"model = GibbsLDA(n_doc, len(voca), n_topic)\n",
"model.fit(docs, max_iter=max_iter)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"### Print top 10 probability words for each topic"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Topic 0 :\t chemical,group,company,also,total,general,rubber,capital,sold,used\n",
"Topic 1 :\t dollar,yen,japan,bank,central,g,west,exchange,currency,policy\n",
"Topic 2 :\t april,record,one,may,prior,pay,div,split,dividend,note\n",
"Topic 3 :\t deficit,government,major,finance,economic,trade,cut,current,also,industrial\n",
"Topic 4 :\t stocks,production,use,start,end,supply,x,demand,total,cotton\n",
"Topic 5 :\t oil,dome,gas,debt,days,term,energy,plan,new,natural\n",
"Topic 6 :\t quarter,first,earnings,company,share,per,ago,period,fiscal,strong\n",
"Topic 7 :\t would,told,price,house,committee,de,government,official,consumer,meat\n",
"Topic 8 :\t fed,reserve,federal,two,market,repurchase,system,spokesman,wednesday,one\n",
"Topic 9 :\t week,march,february,april,last,fell,average,report,previous,ended\n",
"Topic 10 :\t nil,o,e,c,n,f,p,b,total,d\n",
"Topic 11 :\t fund,free,mine,april,yesterday,port,grain,franklin,management,stockpile\n",
"Topic 12 :\t national,new,coffee,sale,international,american,york,business,sell,unit\n",
"Topic 13 :\t stock,company,share,corp,common,group,merger,outstanding,financial,acquire\n",
"Topic 14 :\t united,world,foreign,would,european,international,currency,decision,community,trade\n",
"Topic 15 :\t two,would,economic,five,major,last,three,report,tax,growth\n",
"Topic 16 :\t last,wheat,month,crop,department,grain,agriculture,corn,soviet,report\n",
"Topic 17 :\t bank,market,around,money,three,day,today,four,two,k\n",
"Topic 18 :\t could,may,would,meeting,minister,think,however,made,business,still\n",
"Topic 19 :\t loss,profit,note,tax,gain,corp,nine,december,six,ended\n",
"Topic 20 :\t loan,gold,non,brazil,income,bond,corp,interest,trust,end\n",
"Topic 21 :\t would,company,canadian,exchange,stake,statement,chairman,commission,canada,told\n",
"Topic 22 :\t share,stock,company,board,dividend,issue,per,buy,assets,plan\n",
"Topic 23 :\t trade,japan,japanese,surplus,ministry,last,washington,open,official,new\n",
"Topic 24 :\t market,one,industry,also,trading,time,steel,say,economy,early\n",
"Topic 25 :\t corp,acquisition,agreement,unit,assets,purchase,cash,subject,company,buy\n",
"Topic 26 :\t offer,sugar,tender,per,export,may,warner,price,june,french\n",
"Topic 27 :\t rate,interest,rise,growth,would,money,rose,inflation,economic,australia\n",
"Topic 28 :\t price,per,borg,contract,china,production,output,crude,unchanged,high\n",
"Topic 29 :\t new,increase,lower,due,last,end,higher,one,since,president\n"
]
}
],
"source": [
"for ti in range(n_topic):\n",
" top_words = get_top_words(model.TW, voca, ti, n_words=10)\n",
" print('Topic', ti ,':\\t', ','.join(top_words))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Loading

0 comments on commit 7c15e06

Please sign in to comment.