-
Notifications
You must be signed in to change notification settings - Fork 171
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5956326
commit 7c15e06
Showing
6 changed files
with
436 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,3 +57,5 @@ result/ | |
|
||
# test folder | ||
test/ | ||
logs/ | ||
notebook/.ipynb_checkpoints |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,388 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Example of GibbsLDA" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"This example requires to install three nltk corpora:nltk.corpus.reuters, nltk.corpus.words, nltk.corpus.stopwords.\n", | ||
"\n", | ||
"You can download the corpora via `nltk.download()`" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np\n", | ||
"\n", | ||
"from ptm import GibbsLDA\n", | ||
"from ptm.nltk_corpus import get_reuters_cnt_ids\n", | ||
"from ptm.utils import convert_cnt_to_list, get_top_words" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Loading Reuter corpus from NLTK" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Load reuter corpus including 1000 documents with maximum vocabulary size of 10000 from NLTK corpus" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Vocabulary size:4654\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"n_doc = 1000\n", | ||
"voca, doc_ids, doc_cnt = get_reuters_cnt_ids(num_doc=n_doc, max_voca=10000)\n", | ||
"docs = convert_cnt_to_list(doc_ids, doc_cnt)\n", | ||
"print('Vocabulary size:%d' % len(voca))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Inferencen through the Gibbs sampling" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"2016-02-10 10:39:05 INFO:GibbsLDA:[ITER] 0, 0.95, -492103.33\n", | ||
"INFO:GibbsLDA:[ITER] 0, 0.95, -492103.33\n", | ||
"2016-02-10 10:39:06 INFO:GibbsLDA:[ITER] 1, 0.97, -449060.17\n", | ||
"INFO:GibbsLDA:[ITER] 1, 0.97, -449060.17\n", | ||
"2016-02-10 10:39:07 INFO:GibbsLDA:[ITER] 2, 0.99, -425090.80\n", | ||
"INFO:GibbsLDA:[ITER] 2, 0.99, -425090.80\n", | ||
"2016-02-10 10:39:08 INFO:GibbsLDA:[ITER] 3, 1.04, -409972.35\n", | ||
"INFO:GibbsLDA:[ITER] 3, 1.04, -409972.35\n", | ||
"2016-02-10 10:39:09 INFO:GibbsLDA:[ITER] 4, 0.98, -399907.10\n", | ||
"INFO:GibbsLDA:[ITER] 4, 0.98, -399907.10\n", | ||
"2016-02-10 10:39:10 INFO:GibbsLDA:[ITER] 5, 0.96, -392280.97\n", | ||
"INFO:GibbsLDA:[ITER] 5, 0.96, -392280.97\n", | ||
"2016-02-10 10:39:11 INFO:GibbsLDA:[ITER] 6, 0.97, -387048.46\n", | ||
"INFO:GibbsLDA:[ITER] 6, 0.97, -387048.46\n", | ||
"2016-02-10 10:39:12 INFO:GibbsLDA:[ITER] 7, 1.03, -383034.02\n", | ||
"INFO:GibbsLDA:[ITER] 7, 1.03, -383034.02\n", | ||
"2016-02-10 10:39:13 INFO:GibbsLDA:[ITER] 8, 1.01, -378981.12\n", | ||
"INFO:GibbsLDA:[ITER] 8, 1.01, -378981.12\n", | ||
"2016-02-10 10:39:14 INFO:GibbsLDA:[ITER] 9, 0.97, -376289.35\n", | ||
"INFO:GibbsLDA:[ITER] 9, 0.97, -376289.35\n", | ||
"2016-02-10 10:39:15 INFO:GibbsLDA:[ITER] 10, 0.99, -374223.91\n", | ||
"INFO:GibbsLDA:[ITER] 10, 0.99, -374223.91\n", | ||
"2016-02-10 10:39:16 INFO:GibbsLDA:[ITER] 11, 0.96, -371534.23\n", | ||
"INFO:GibbsLDA:[ITER] 11, 0.96, -371534.23\n", | ||
"2016-02-10 10:39:17 INFO:GibbsLDA:[ITER] 12, 0.95, -369412.53\n", | ||
"INFO:GibbsLDA:[ITER] 12, 0.95, -369412.53\n", | ||
"2016-02-10 10:39:18 INFO:GibbsLDA:[ITER] 13, 0.98, -368282.84\n", | ||
"INFO:GibbsLDA:[ITER] 13, 0.98, -368282.84\n", | ||
"2016-02-10 10:39:19 INFO:GibbsLDA:[ITER] 14, 0.96, -367202.90\n", | ||
"INFO:GibbsLDA:[ITER] 14, 0.96, -367202.90\n", | ||
"2016-02-10 10:39:20 INFO:GibbsLDA:[ITER] 15, 0.96, -365800.45\n", | ||
"INFO:GibbsLDA:[ITER] 15, 0.96, -365800.45\n", | ||
"2016-02-10 10:39:21 INFO:GibbsLDA:[ITER] 16, 0.95, -364384.21\n", | ||
"INFO:GibbsLDA:[ITER] 16, 0.95, -364384.21\n", | ||
"2016-02-10 10:39:22 INFO:GibbsLDA:[ITER] 17, 0.98, -363365.58\n", | ||
"INFO:GibbsLDA:[ITER] 17, 0.98, -363365.58\n", | ||
"2016-02-10 10:39:23 INFO:GibbsLDA:[ITER] 18, 0.96, -362391.93\n", | ||
"INFO:GibbsLDA:[ITER] 18, 0.96, -362391.93\n", | ||
"2016-02-10 10:39:24 INFO:GibbsLDA:[ITER] 19, 1.00, -361615.87\n", | ||
"INFO:GibbsLDA:[ITER] 19, 1.00, -361615.87\n", | ||
"2016-02-10 10:39:25 INFO:GibbsLDA:[ITER] 20, 1.05, -360838.68\n", | ||
"INFO:GibbsLDA:[ITER] 20, 1.05, -360838.68\n", | ||
"2016-02-10 10:39:26 INFO:GibbsLDA:[ITER] 21, 1.02, -360048.97\n", | ||
"INFO:GibbsLDA:[ITER] 21, 1.02, -360048.97\n", | ||
"2016-02-10 10:39:27 INFO:GibbsLDA:[ITER] 22, 1.01, -359899.43\n", | ||
"INFO:GibbsLDA:[ITER] 22, 1.01, -359899.43\n", | ||
"2016-02-10 10:39:28 INFO:GibbsLDA:[ITER] 23, 0.99, -359558.00\n", | ||
"INFO:GibbsLDA:[ITER] 23, 0.99, -359558.00\n", | ||
"2016-02-10 10:39:29 INFO:GibbsLDA:[ITER] 24, 0.98, -359132.38\n", | ||
"INFO:GibbsLDA:[ITER] 24, 0.98, -359132.38\n", | ||
"2016-02-10 10:39:30 INFO:GibbsLDA:[ITER] 25, 1.06, -358597.54\n", | ||
"INFO:GibbsLDA:[ITER] 25, 1.06, -358597.54\n", | ||
"2016-02-10 10:39:31 INFO:GibbsLDA:[ITER] 26, 1.05, -358172.53\n", | ||
"INFO:GibbsLDA:[ITER] 26, 1.05, -358172.53\n", | ||
"2016-02-10 10:39:32 INFO:GibbsLDA:[ITER] 27, 0.97, -357632.83\n", | ||
"INFO:GibbsLDA:[ITER] 27, 0.97, -357632.83\n", | ||
"2016-02-10 10:39:33 INFO:GibbsLDA:[ITER] 28, 0.97, -357059.67\n", | ||
"INFO:GibbsLDA:[ITER] 28, 0.97, -357059.67\n", | ||
"2016-02-10 10:39:34 INFO:GibbsLDA:[ITER] 29, 1.00, -356616.19\n", | ||
"INFO:GibbsLDA:[ITER] 29, 1.00, -356616.19\n", | ||
"2016-02-10 10:39:35 INFO:GibbsLDA:[ITER] 30, 1.03, -356614.06\n", | ||
"INFO:GibbsLDA:[ITER] 30, 1.03, -356614.06\n", | ||
"2016-02-10 10:39:36 INFO:GibbsLDA:[ITER] 31, 0.96, -356382.31\n", | ||
"INFO:GibbsLDA:[ITER] 31, 0.96, -356382.31\n", | ||
"2016-02-10 10:39:37 INFO:GibbsLDA:[ITER] 32, 1.01, -355719.01\n", | ||
"INFO:GibbsLDA:[ITER] 32, 1.01, -355719.01\n", | ||
"2016-02-10 10:39:38 INFO:GibbsLDA:[ITER] 33, 1.04, -355607.53\n", | ||
"INFO:GibbsLDA:[ITER] 33, 1.04, -355607.53\n", | ||
"2016-02-10 10:39:39 INFO:GibbsLDA:[ITER] 34, 1.02, -355128.47\n", | ||
"INFO:GibbsLDA:[ITER] 34, 1.02, -355128.47\n", | ||
"2016-02-10 10:39:40 INFO:GibbsLDA:[ITER] 35, 0.98, -354985.22\n", | ||
"INFO:GibbsLDA:[ITER] 35, 0.98, -354985.22\n", | ||
"2016-02-10 10:39:41 INFO:GibbsLDA:[ITER] 36, 1.02, -354992.00\n", | ||
"INFO:GibbsLDA:[ITER] 36, 1.02, -354992.00\n", | ||
"2016-02-10 10:39:42 INFO:GibbsLDA:[ITER] 37, 0.97, -354565.29\n", | ||
"INFO:GibbsLDA:[ITER] 37, 0.97, -354565.29\n", | ||
"2016-02-10 10:39:43 INFO:GibbsLDA:[ITER] 38, 1.00, -354011.48\n", | ||
"INFO:GibbsLDA:[ITER] 38, 1.00, -354011.48\n", | ||
"2016-02-10 10:39:44 INFO:GibbsLDA:[ITER] 39, 1.00, -353808.40\n", | ||
"INFO:GibbsLDA:[ITER] 39, 1.00, -353808.40\n", | ||
"2016-02-10 10:39:46 INFO:GibbsLDA:[ITER] 40, 1.01, -353243.78\n", | ||
"INFO:GibbsLDA:[ITER] 40, 1.01, -353243.78\n", | ||
"2016-02-10 10:39:47 INFO:GibbsLDA:[ITER] 41, 1.03, -353491.12\n", | ||
"INFO:GibbsLDA:[ITER] 41, 1.03, -353491.12\n", | ||
"2016-02-10 10:39:48 INFO:GibbsLDA:[ITER] 42, 1.00, -353579.48\n", | ||
"INFO:GibbsLDA:[ITER] 42, 1.00, -353579.48\n", | ||
"2016-02-10 10:39:49 INFO:GibbsLDA:[ITER] 43, 0.97, -353030.64\n", | ||
"INFO:GibbsLDA:[ITER] 43, 0.97, -353030.64\n", | ||
"2016-02-10 10:39:50 INFO:GibbsLDA:[ITER] 44, 0.96, -352786.36\n", | ||
"INFO:GibbsLDA:[ITER] 44, 0.96, -352786.36\n", | ||
"2016-02-10 10:39:51 INFO:GibbsLDA:[ITER] 45, 0.99, -352842.34\n", | ||
"INFO:GibbsLDA:[ITER] 45, 0.99, -352842.34\n", | ||
"2016-02-10 10:39:52 INFO:GibbsLDA:[ITER] 46, 1.02, -352905.42\n", | ||
"INFO:GibbsLDA:[ITER] 46, 1.02, -352905.42\n", | ||
"2016-02-10 10:39:53 INFO:GibbsLDA:[ITER] 47, 1.05, -352675.95\n", | ||
"INFO:GibbsLDA:[ITER] 47, 1.05, -352675.95\n", | ||
"2016-02-10 10:39:54 INFO:GibbsLDA:[ITER] 48, 1.01, -352287.79\n", | ||
"INFO:GibbsLDA:[ITER] 48, 1.01, -352287.79\n", | ||
"2016-02-10 10:39:55 INFO:GibbsLDA:[ITER] 49, 1.01, -352322.47\n", | ||
"INFO:GibbsLDA:[ITER] 49, 1.01, -352322.47\n", | ||
"2016-02-10 10:39:56 INFO:GibbsLDA:[ITER] 50, 0.99, -352085.49\n", | ||
"INFO:GibbsLDA:[ITER] 50, 0.99, -352085.49\n", | ||
"2016-02-10 10:39:57 INFO:GibbsLDA:[ITER] 51, 1.01, -352525.78\n", | ||
"INFO:GibbsLDA:[ITER] 51, 1.01, -352525.78\n", | ||
"2016-02-10 10:39:58 INFO:GibbsLDA:[ITER] 52, 0.98, -352468.89\n", | ||
"INFO:GibbsLDA:[ITER] 52, 0.98, -352468.89\n", | ||
"2016-02-10 10:39:59 INFO:GibbsLDA:[ITER] 53, 0.97, -351834.87\n", | ||
"INFO:GibbsLDA:[ITER] 53, 0.97, -351834.87\n", | ||
"2016-02-10 10:40:00 INFO:GibbsLDA:[ITER] 54, 1.01, -351916.33\n", | ||
"INFO:GibbsLDA:[ITER] 54, 1.01, -351916.33\n", | ||
"2016-02-10 10:40:01 INFO:GibbsLDA:[ITER] 55, 1.04, -352065.68\n", | ||
"INFO:GibbsLDA:[ITER] 55, 1.04, -352065.68\n", | ||
"2016-02-10 10:40:02 INFO:GibbsLDA:[ITER] 56, 1.03, -351816.31\n", | ||
"INFO:GibbsLDA:[ITER] 56, 1.03, -351816.31\n", | ||
"2016-02-10 10:40:03 INFO:GibbsLDA:[ITER] 57, 1.09, -351507.51\n", | ||
"INFO:GibbsLDA:[ITER] 57, 1.09, -351507.51\n", | ||
"2016-02-10 10:40:04 INFO:GibbsLDA:[ITER] 58, 1.04, -351347.31\n", | ||
"INFO:GibbsLDA:[ITER] 58, 1.04, -351347.31\n", | ||
"2016-02-10 10:40:05 INFO:GibbsLDA:[ITER] 59, 0.96, -351306.13\n", | ||
"INFO:GibbsLDA:[ITER] 59, 0.96, -351306.13\n", | ||
"2016-02-10 10:40:06 INFO:GibbsLDA:[ITER] 60, 1.00, -351042.19\n", | ||
"INFO:GibbsLDA:[ITER] 60, 1.00, -351042.19\n", | ||
"2016-02-10 10:40:07 INFO:GibbsLDA:[ITER] 61, 1.06, -351417.68\n", | ||
"INFO:GibbsLDA:[ITER] 61, 1.06, -351417.68\n", | ||
"2016-02-10 10:40:08 INFO:GibbsLDA:[ITER] 62, 0.96, -351318.16\n", | ||
"INFO:GibbsLDA:[ITER] 62, 0.96, -351318.16\n", | ||
"2016-02-10 10:40:09 INFO:GibbsLDA:[ITER] 63, 0.97, -351301.51\n", | ||
"INFO:GibbsLDA:[ITER] 63, 0.97, -351301.51\n", | ||
"2016-02-10 10:40:10 INFO:GibbsLDA:[ITER] 64, 0.97, -350956.96\n", | ||
"INFO:GibbsLDA:[ITER] 64, 0.97, -350956.96\n", | ||
"2016-02-10 10:40:11 INFO:GibbsLDA:[ITER] 65, 0.98, -350700.36\n", | ||
"INFO:GibbsLDA:[ITER] 65, 0.98, -350700.36\n", | ||
"2016-02-10 10:40:12 INFO:GibbsLDA:[ITER] 66, 1.05, -350579.33\n", | ||
"INFO:GibbsLDA:[ITER] 66, 1.05, -350579.33\n", | ||
"2016-02-10 10:40:13 INFO:GibbsLDA:[ITER] 67, 0.99, -350598.48\n", | ||
"INFO:GibbsLDA:[ITER] 67, 0.99, -350598.48\n", | ||
"2016-02-10 10:40:14 INFO:GibbsLDA:[ITER] 68, 0.99, -350757.37\n", | ||
"INFO:GibbsLDA:[ITER] 68, 0.99, -350757.37\n", | ||
"2016-02-10 10:40:15 INFO:GibbsLDA:[ITER] 69, 1.09, -350525.69\n", | ||
"INFO:GibbsLDA:[ITER] 69, 1.09, -350525.69\n", | ||
"2016-02-10 10:40:16 INFO:GibbsLDA:[ITER] 70, 1.11, -350516.66\n", | ||
"INFO:GibbsLDA:[ITER] 70, 1.11, -350516.66\n", | ||
"2016-02-10 10:40:17 INFO:GibbsLDA:[ITER] 71, 0.99, -350455.19\n", | ||
"INFO:GibbsLDA:[ITER] 71, 0.99, -350455.19\n", | ||
"2016-02-10 10:40:19 INFO:GibbsLDA:[ITER] 72, 1.03, -350499.05\n", | ||
"INFO:GibbsLDA:[ITER] 72, 1.03, -350499.05\n", | ||
"2016-02-10 10:40:20 INFO:GibbsLDA:[ITER] 73, 1.08, -349934.15\n", | ||
"INFO:GibbsLDA:[ITER] 73, 1.08, -349934.15\n", | ||
"2016-02-10 10:40:21 INFO:GibbsLDA:[ITER] 74, 1.03, -349892.53\n", | ||
"INFO:GibbsLDA:[ITER] 74, 1.03, -349892.53\n", | ||
"2016-02-10 10:40:22 INFO:GibbsLDA:[ITER] 75, 1.09, -350195.13\n", | ||
"INFO:GibbsLDA:[ITER] 75, 1.09, -350195.13\n", | ||
"2016-02-10 10:40:23 INFO:GibbsLDA:[ITER] 76, 1.04, -350010.16\n", | ||
"INFO:GibbsLDA:[ITER] 76, 1.04, -350010.16\n", | ||
"2016-02-10 10:40:24 INFO:GibbsLDA:[ITER] 77, 0.97, -349705.84\n", | ||
"INFO:GibbsLDA:[ITER] 77, 0.97, -349705.84\n", | ||
"2016-02-10 10:40:25 INFO:GibbsLDA:[ITER] 78, 1.01, -349988.55\n", | ||
"INFO:GibbsLDA:[ITER] 78, 1.01, -349988.55\n", | ||
"2016-02-10 10:40:26 INFO:GibbsLDA:[ITER] 79, 1.07, -349738.13\n", | ||
"INFO:GibbsLDA:[ITER] 79, 1.07, -349738.13\n", | ||
"2016-02-10 10:40:27 INFO:GibbsLDA:[ITER] 80, 1.10, -349867.56\n", | ||
"INFO:GibbsLDA:[ITER] 80, 1.10, -349867.56\n", | ||
"2016-02-10 10:40:28 INFO:GibbsLDA:[ITER] 81, 1.04, -350055.30\n", | ||
"INFO:GibbsLDA:[ITER] 81, 1.04, -350055.30\n", | ||
"2016-02-10 10:40:29 INFO:GibbsLDA:[ITER] 82, 1.05, -349537.14\n", | ||
"INFO:GibbsLDA:[ITER] 82, 1.05, -349537.14\n", | ||
"2016-02-10 10:40:30 INFO:GibbsLDA:[ITER] 83, 1.07, -349465.11\n", | ||
"INFO:GibbsLDA:[ITER] 83, 1.07, -349465.11\n", | ||
"2016-02-10 10:40:31 INFO:GibbsLDA:[ITER] 84, 1.05, -349325.52\n", | ||
"INFO:GibbsLDA:[ITER] 84, 1.05, -349325.52\n", | ||
"2016-02-10 10:40:32 INFO:GibbsLDA:[ITER] 85, 1.08, -349304.29\n", | ||
"INFO:GibbsLDA:[ITER] 85, 1.08, -349304.29\n", | ||
"2016-02-10 10:40:34 INFO:GibbsLDA:[ITER] 86, 1.08, -349455.13\n", | ||
"INFO:GibbsLDA:[ITER] 86, 1.08, -349455.13\n", | ||
"2016-02-10 10:40:35 INFO:GibbsLDA:[ITER] 87, 1.07, -349244.57\n", | ||
"INFO:GibbsLDA:[ITER] 87, 1.07, -349244.57\n", | ||
"2016-02-10 10:40:36 INFO:GibbsLDA:[ITER] 88, 1.10, -349169.23\n", | ||
"INFO:GibbsLDA:[ITER] 88, 1.10, -349169.23\n", | ||
"2016-02-10 10:40:37 INFO:GibbsLDA:[ITER] 89, 1.05, -349327.64\n", | ||
"INFO:GibbsLDA:[ITER] 89, 1.05, -349327.64\n", | ||
"2016-02-10 10:40:38 INFO:GibbsLDA:[ITER] 90, 1.06, -349342.96\n", | ||
"INFO:GibbsLDA:[ITER] 90, 1.06, -349342.96\n", | ||
"2016-02-10 10:40:39 INFO:GibbsLDA:[ITER] 91, 1.02, -348635.82\n", | ||
"INFO:GibbsLDA:[ITER] 91, 1.02, -348635.82\n", | ||
"2016-02-10 10:40:40 INFO:GibbsLDA:[ITER] 92, 1.06, -349333.52\n", | ||
"INFO:GibbsLDA:[ITER] 92, 1.06, -349333.52\n", | ||
"2016-02-10 10:40:41 INFO:GibbsLDA:[ITER] 93, 1.02, -349200.81\n", | ||
"INFO:GibbsLDA:[ITER] 93, 1.02, -349200.81\n", | ||
"2016-02-10 10:40:42 INFO:GibbsLDA:[ITER] 94, 1.00, -348994.61\n", | ||
"INFO:GibbsLDA:[ITER] 94, 1.00, -348994.61\n", | ||
"2016-02-10 10:40:43 INFO:GibbsLDA:[ITER] 95, 1.05, -348928.84\n", | ||
"INFO:GibbsLDA:[ITER] 95, 1.05, -348928.84\n", | ||
"2016-02-10 10:40:44 INFO:GibbsLDA:[ITER] 96, 1.02, -348479.73\n", | ||
"INFO:GibbsLDA:[ITER] 96, 1.02, -348479.73\n", | ||
"2016-02-10 10:40:45 INFO:GibbsLDA:[ITER] 97, 1.02, -348653.66\n", | ||
"INFO:GibbsLDA:[ITER] 97, 1.02, -348653.66\n", | ||
"2016-02-10 10:40:46 INFO:GibbsLDA:[ITER] 98, 1.05, -348446.77\n", | ||
"INFO:GibbsLDA:[ITER] 98, 1.05, -348446.77\n", | ||
"2016-02-10 10:40:47 INFO:GibbsLDA:[ITER] 99, 1.03, -348462.25\n", | ||
"INFO:GibbsLDA:[ITER] 99, 1.03, -348462.25\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"max_iter=100\n", | ||
"n_topic=30\n", | ||
"model = GibbsLDA(n_doc, len(voca), n_topic)\n", | ||
"model.fit(docs, max_iter=max_iter)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"source": [ | ||
"### Print top 10 probability words for each topic" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Topic 0 :\t chemical,group,company,also,total,general,rubber,capital,sold,used\n", | ||
"Topic 1 :\t dollar,yen,japan,bank,central,g,west,exchange,currency,policy\n", | ||
"Topic 2 :\t april,record,one,may,prior,pay,div,split,dividend,note\n", | ||
"Topic 3 :\t deficit,government,major,finance,economic,trade,cut,current,also,industrial\n", | ||
"Topic 4 :\t stocks,production,use,start,end,supply,x,demand,total,cotton\n", | ||
"Topic 5 :\t oil,dome,gas,debt,days,term,energy,plan,new,natural\n", | ||
"Topic 6 :\t quarter,first,earnings,company,share,per,ago,period,fiscal,strong\n", | ||
"Topic 7 :\t would,told,price,house,committee,de,government,official,consumer,meat\n", | ||
"Topic 8 :\t fed,reserve,federal,two,market,repurchase,system,spokesman,wednesday,one\n", | ||
"Topic 9 :\t week,march,february,april,last,fell,average,report,previous,ended\n", | ||
"Topic 10 :\t nil,o,e,c,n,f,p,b,total,d\n", | ||
"Topic 11 :\t fund,free,mine,april,yesterday,port,grain,franklin,management,stockpile\n", | ||
"Topic 12 :\t national,new,coffee,sale,international,american,york,business,sell,unit\n", | ||
"Topic 13 :\t stock,company,share,corp,common,group,merger,outstanding,financial,acquire\n", | ||
"Topic 14 :\t united,world,foreign,would,european,international,currency,decision,community,trade\n", | ||
"Topic 15 :\t two,would,economic,five,major,last,three,report,tax,growth\n", | ||
"Topic 16 :\t last,wheat,month,crop,department,grain,agriculture,corn,soviet,report\n", | ||
"Topic 17 :\t bank,market,around,money,three,day,today,four,two,k\n", | ||
"Topic 18 :\t could,may,would,meeting,minister,think,however,made,business,still\n", | ||
"Topic 19 :\t loss,profit,note,tax,gain,corp,nine,december,six,ended\n", | ||
"Topic 20 :\t loan,gold,non,brazil,income,bond,corp,interest,trust,end\n", | ||
"Topic 21 :\t would,company,canadian,exchange,stake,statement,chairman,commission,canada,told\n", | ||
"Topic 22 :\t share,stock,company,board,dividend,issue,per,buy,assets,plan\n", | ||
"Topic 23 :\t trade,japan,japanese,surplus,ministry,last,washington,open,official,new\n", | ||
"Topic 24 :\t market,one,industry,also,trading,time,steel,say,economy,early\n", | ||
"Topic 25 :\t corp,acquisition,agreement,unit,assets,purchase,cash,subject,company,buy\n", | ||
"Topic 26 :\t offer,sugar,tender,per,export,may,warner,price,june,french\n", | ||
"Topic 27 :\t rate,interest,rise,growth,would,money,rose,inflation,economic,australia\n", | ||
"Topic 28 :\t price,per,borg,contract,china,production,output,crude,unchanged,high\n", | ||
"Topic 29 :\t new,increase,lower,due,last,end,higher,one,since,president\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"for ti in range(n_topic):\n", | ||
" top_words = get_top_words(model.TW, voca, ti, n_words=10)\n", | ||
" print('Topic', ti ,':\\t', ','.join(top_words))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.4.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
Oops, something went wrong.