Skip to content

Commit

Permalink
Add Logger on Tika wrapper
Browse files Browse the repository at this point in the history
  • Loading branch information
Garfield-fr committed Nov 14, 2011
1 parent 2b52564 commit dd18d25
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 12 deletions.
14 changes: 14 additions & 0 deletions DependencyInjection/Configuration.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,18 @@
*/
class Configuration implements ConfigurationInterface
{
private $debug;

/**
* Constructor
*
* @param Boolean $debug Whether to use the debug mode
*/
public function __construct($debug)
{
$this->debug = (Boolean) $debug;
}

/**
* {@inheritDoc}
*/
Expand All @@ -21,6 +33,7 @@ public function getConfigTreeBuilder()
$rootNode = $treeBuilder->root('funstaff_tika');

$rootNode
->addDefaultsIfNotSet()
->children()
->scalarNode('tika_path')->isRequired()->end()
->scalarNode('output_format')->defaultValue('xml')
Expand All @@ -29,6 +42,7 @@ public function getConfigTreeBuilder()
->thenInvalid('Not authorized value for output (only xml, html and text)')
->end()
->end()
->booleanNode('logging')->defaultValue($this->debug)->end()
->end();

return $treeBuilder;
Expand Down
2 changes: 1 addition & 1 deletion DependencyInjection/FunstaffTikaExtension.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class FunstaffTikaExtension extends Extension
{
public function load(array $configs, ContainerBuilder $container)
{
$configuration = new Configuration();
$configuration = new Configuration($container->getParameter('kernel.debug'));
$config = $this->processConfiguration($configuration, $configs);

$loader = new XmlFileLoader($container, new FileLocator(__DIR__.'/../Resources/config'));
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ File config.yml
funstaff_tika:
tika_path: /path/to/tika-app-1.0.jar
output_format: ~
logging: ~ # Use the Symfony2 default. Force the logging with this param.


Examples
Expand Down
1 change: 1 addition & 0 deletions Resources/config/services.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
<argument>%funstaff.tika.config%</argument>
<argument>%funstaff.document.class%</argument>
<argument>%funstaff.metadata.class%</argument>
<argument type="service" id="logger" on-invalid="null" />
</service>
</services>

Expand Down
53 changes: 43 additions & 10 deletions Tests/Wrapper/TikaTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

use Symfony\Bundle\FrameworkBundle\Tests\TestCase;
use Funstaff\TikaBundle\Wrapper\Tika;
use Symfony\Bridge\Monolog\Logger;
use Monolog\Handler\NullHandler;

/**
* TikaTest
Expand All @@ -14,14 +16,18 @@ class TikaTest extends TestCase
{
public function setup()
{
$this->config = array('tika_path' => '/www/bin/tika-app-1.0.jar');
$this->config = array(
'tika_path' => '/www/bin/tika-app-1.0.jar',
'output_format' => 'xml',
'logging' => true);
}

public function testWithFailedSetOutputFormat()
{
$tika = new Tika($this->config,
'Funstaff\TikaBundle\Content\Document',
'Funstaff\TikaBundle\Content\Metadata');
'Funstaff\TikaBundle\Content\Metadata',
$this->getLogger());
try {
$tika->setOutputFormat('foo');
} catch (\InvalidArgumentException $e) {
Expand All @@ -34,7 +40,8 @@ public function testSetOutputFormat()
{
$tika = new Tika($this->config,
'Funstaff\TikaBundle\Content\Document',
'Funstaff\TikaBundle\Content\Metadata');
'Funstaff\TikaBundle\Content\Metadata',
$this->getLogger());
$tika->setOutputFormat('xml');
$this->assertEquals('xml', $tika->getOutputFormat());
}
Expand All @@ -43,19 +50,32 @@ public function testAddGetDocument()
{
$tika = new Tika($this->config,
'Funstaff\TikaBundle\Content\Document',
'Funstaff\TikaBundle\Content\Metadata');
'Funstaff\TikaBundle\Content\Metadata',
$this->getLogger());
$this->assertTrue(is_array($tika->getDocuments()));
$this->assertEquals(0, count($tika->getDocuments()));
$tika->addDocument('test', __DIR__.'/../documents/test.pdf');
$this->assertEquals(1, count($tika->getDocuments()));
$this->assertInstanceOf('Funstaff\TikaBundle\Content\Document', $tika->getDocument('test'));
}

public function testLogging()
{
$tika = new Tika($this->config,
'Funstaff\TikaBundle\Content\Document',
'Funstaff\TikaBundle\Content\Metadata',
$this->getLogger());
$this->assertTrue($tika->getLogging());
$tika->setLogging(false);
$this->assertFalse($tika->getLogging());
}

public function testWithNoAddedDocumentExtractContent()
{
$tika = new Tika($this->config,
'Funstaff\TikaBundle\Content\Document',
'Funstaff\TikaBundle\Content\Metadata');
'Funstaff\TikaBundle\Content\Metadata',
$this->getLogger());
try {
$tika->extractContent();
} catch (\InvalidArgumentException $e) {
Expand All @@ -68,7 +88,8 @@ public function testExtractOutputXml()
{
$tika = new Tika($this->config,
'Funstaff\TikaBundle\Content\Document',
'Funstaff\TikaBundle\Content\Metadata');
'Funstaff\TikaBundle\Content\Metadata',
$this->getLogger());
$tika->addDocument('test', __DIR__.'/../documents/test.pdf');
$tika->extractContent();
$content = $tika->getDocument('test')->getContent();
Expand All @@ -86,7 +107,8 @@ public function testExtractOutputHtml()
{
$tika = new Tika($this->config,
'Funstaff\TikaBundle\Content\Document',
'Funstaff\TikaBundle\Content\Metadata');
'Funstaff\TikaBundle\Content\Metadata',
$this->getLogger());
$tika->addDocument('test', __DIR__.'/../documents/test.pdf');
$tika->setOutputFormat('html');
$tika->extractContent();
Expand All @@ -99,7 +121,8 @@ public function testExtractOutputText()
{
$tika = new Tika($this->config,
'Funstaff\TikaBundle\Content\Document',
'Funstaff\TikaBundle\Content\Metadata');
'Funstaff\TikaBundle\Content\Metadata',
$this->getLogger());
$tika->addDocument('test', __DIR__.'/../documents/test.pdf');
$tika->setOutputFormat('text');
$tika->extractContent();
Expand All @@ -112,7 +135,8 @@ public function testExtractMetadata()
{
$tika = new Tika($this->config,
'Funstaff\TikaBundle\Content\Document',
'Funstaff\TikaBundle\Content\Metadata');
'Funstaff\TikaBundle\Content\Metadata',
$this->getLogger());
$tika->addDocument('test', __DIR__.'/../documents/test.pdf');
$tika->extractMetadata();
$metadata = $tika->getDocument('test')->getMetadata();
Expand All @@ -124,12 +148,21 @@ public function testExtractAll()
{
$tika = new Tika($this->config,
'Funstaff\TikaBundle\Content\Document',
'Funstaff\TikaBundle\Content\Metadata');
'Funstaff\TikaBundle\Content\Metadata',
$this->getLogger());
$tika->addDocument('test', __DIR__.'/../documents/test.pdf');
$tika->extractAll();
$content = $tika->getDocument('test')->getContent();
$this->assertTrue(preg_match('/^<\?xml.*/', $content) > 0);
$metadata = $tika->getDocument('test')->getMetadata();
$this->assertEquals('application/pdf', $metadata->get('Content-Type'));
}

protected function getLogger()
{
$logger = new Logger('test');
$logger->pushHandler(new NullHandler());

return $logger;
}
}
35 changes: 34 additions & 1 deletion Wrapper/Tika.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Funstaff\TikaBundle\Wrapper;

use Funstaff\TikaBundle\Wrapper\TikaInterface;
use Symfony\Bridge\Monolog\Logger;

/**
* Tika
Expand All @@ -21,16 +22,22 @@ class Tika implements TikaInterface

protected $metadataClass;

protected $logger;

protected $logging;

/**
* Construct
*
* @param Array $configuration
*/
public function __construct(array $configuration, $documentClass, $metadataClass)
public function __construct(array $configuration, $documentClass, $metadataClass, Logger $logger)
{
$this->configuration = $configuration;
$this->documentClass = $documentClass;
$this->metadataClass = $metadataClass;
$this->logger = $logger;
$this->logging = $configuration['logging'];
$this->outputFormat = ($configuration['output_format']) ? : 'xml';
}

Expand Down Expand Up @@ -99,6 +106,26 @@ public function getDocuments()
return $this->document;
}

/**
* Set Logging
*
* @param boolean $logging
*/
public function setLogging($logging)
{
$this->logging = $logging;
}

/**
* Get Logging
*
* @return $logging
*/
public function getLogging()
{
return $this->logging;
}

/**
* Extract Content
*
Expand All @@ -109,6 +136,9 @@ public function extractContent()
ob_start();
$command = $this->generateTikaCommand($this->outputFormat);
foreach ($this->document as $doc) {
if ($this->logger && $this->logging) {
$this->logger->debug(sprintf('Tika extract content: %s', $doc->getPath()));
}
passthru(sprintf("$command %s", $doc->getPath()));
$output = ob_get_clean();
$doc->setContent($output);
Expand All @@ -125,6 +155,9 @@ public function extractMetadata()
ob_start();
$command = $this->generateTikaCommand('meta');
foreach ($this->document as $doc) {
if ($this->logger && $this->logging) {
$this->logger->debug(sprintf('Tika extract metadata: %s', $doc->getPath()));
}
passthru(sprintf("$command %s", $doc->getPath()));
$output = ob_get_clean();
$doc->setMetadata(new $this->metadataClass($output));
Expand Down

0 comments on commit dd18d25

Please sign in to comment.