-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathexample_01_frequency_analysis.php
58 lines (44 loc) · 1.35 KB
/
example_01_frequency_analysis.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
<?php
/**
* Dan Cardin (yooper)
* pulls the top 10 words from tom sawyer and produces are chart
* require the Composer autoloader
*/
require_once('vendor/autoload.php');
//used to generate a chart from the output of PHP Text Analysis
require_once('utils/BarPageBuilder.php');
/**
* Get some text from the internet
* we will grab tom sawyer from the gutenberg project
* http://www.gutenberg.org/cache/epub/74/pg74.txt
*
*/
/**
* @var string $book
*/
$book = file_get_contents('data/books/pg74.txt');
/**
* Create a tokenizer object to parse the book into a set of tokens
*
*/
$tokenizer = new \TextAnalysis\Tokenizers\GeneralTokenizer();
/**
* Get the set of tokens generated by the tokenize, see
*
*/
$tokens = $tokenizer->tokenize($book);
$freqDist = new \TextAnalysis\Analysis\FreqDist($tokens);
/**
* Get the top 10 most used words in Tom Sawyer
*/
$top10 = array_splice($freqDist->getKeyValuesByFrequency(), 0, 10);
/**
* Use High Charts to visualize the data
*/
$pageBuilder = new BarPageBuilder($top10);
$html = $pageBuilder->getHtmlPage();
file_put_contents("pub/pages/example_01_frequency_analysis.html", $html);
echo 'go to the directory pub/pages/example_01_frequency_analysis.html and open the file with your web browser'.PHP_EOL;
/**
* go to the directory in this project and open the file with your web browser
*/