From dd18d2553f0da6610e9351d57b25817d6c2afd82 Mon Sep 17 00:00:00 2001 From: Bertrand Zuchuat Date: Mon, 14 Nov 2011 17:42:18 +0100 Subject: [PATCH] Add Logger on Tika wrapper --- DependencyInjection/Configuration.php | 14 +++++ DependencyInjection/FunstaffTikaExtension.php | 2 +- README.md | 1 + Resources/config/services.xml | 1 + Tests/Wrapper/TikaTest.php | 53 +++++++++++++++---- Wrapper/Tika.php | 35 +++++++++++- 6 files changed, 94 insertions(+), 12 deletions(-) diff --git a/DependencyInjection/Configuration.php b/DependencyInjection/Configuration.php index f9c7116..173a8eb 100644 --- a/DependencyInjection/Configuration.php +++ b/DependencyInjection/Configuration.php @@ -12,6 +12,18 @@ */ class Configuration implements ConfigurationInterface { + private $debug; + + /** + * Constructor + * + * @param Boolean $debug Whether to use the debug mode + */ + public function __construct($debug) + { + $this->debug = (Boolean) $debug; + } + /** * {@inheritDoc} */ @@ -21,6 +33,7 @@ public function getConfigTreeBuilder() $rootNode = $treeBuilder->root('funstaff_tika'); $rootNode + ->addDefaultsIfNotSet() ->children() ->scalarNode('tika_path')->isRequired()->end() ->scalarNode('output_format')->defaultValue('xml') @@ -29,6 +42,7 @@ public function getConfigTreeBuilder() ->thenInvalid('Not authorized value for output (only xml, html and text)') ->end() ->end() + ->booleanNode('logging')->defaultValue($this->debug)->end() ->end(); return $treeBuilder; diff --git a/DependencyInjection/FunstaffTikaExtension.php b/DependencyInjection/FunstaffTikaExtension.php index 4296917..3df4df6 100644 --- a/DependencyInjection/FunstaffTikaExtension.php +++ b/DependencyInjection/FunstaffTikaExtension.php @@ -16,7 +16,7 @@ class FunstaffTikaExtension extends Extension { public function load(array $configs, ContainerBuilder $container) { - $configuration = new Configuration(); + $configuration = new Configuration($container->getParameter('kernel.debug')); $config = $this->processConfiguration($configuration, $configs); $loader = new XmlFileLoader($container, new FileLocator(__DIR__.'/../Resources/config')); diff --git a/README.md b/README.md index e94d0b0..1c5d34d 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ File config.yml funstaff_tika: tika_path: /path/to/tika-app-1.0.jar output_format: ~ + logging: ~ # Use the Symfony2 default. Force the logging with this param. Examples diff --git a/Resources/config/services.xml b/Resources/config/services.xml index d16f3f6..c365222 100644 --- a/Resources/config/services.xml +++ b/Resources/config/services.xml @@ -15,6 +15,7 @@ %funstaff.tika.config% %funstaff.document.class% %funstaff.metadata.class% + diff --git a/Tests/Wrapper/TikaTest.php b/Tests/Wrapper/TikaTest.php index d797622..e1c0236 100644 --- a/Tests/Wrapper/TikaTest.php +++ b/Tests/Wrapper/TikaTest.php @@ -4,6 +4,8 @@ use Symfony\Bundle\FrameworkBundle\Tests\TestCase; use Funstaff\TikaBundle\Wrapper\Tika; +use Symfony\Bridge\Monolog\Logger; +use Monolog\Handler\NullHandler; /** * TikaTest @@ -14,14 +16,18 @@ class TikaTest extends TestCase { public function setup() { - $this->config = array('tika_path' => '/www/bin/tika-app-1.0.jar'); + $this->config = array( + 'tika_path' => '/www/bin/tika-app-1.0.jar', + 'output_format' => 'xml', + 'logging' => true); } public function testWithFailedSetOutputFormat() { $tika = new Tika($this->config, 'Funstaff\TikaBundle\Content\Document', - 'Funstaff\TikaBundle\Content\Metadata'); + 'Funstaff\TikaBundle\Content\Metadata', + $this->getLogger()); try { $tika->setOutputFormat('foo'); } catch (\InvalidArgumentException $e) { @@ -34,7 +40,8 @@ public function testSetOutputFormat() { $tika = new Tika($this->config, 'Funstaff\TikaBundle\Content\Document', - 'Funstaff\TikaBundle\Content\Metadata'); + 'Funstaff\TikaBundle\Content\Metadata', + $this->getLogger()); $tika->setOutputFormat('xml'); $this->assertEquals('xml', $tika->getOutputFormat()); } @@ -43,7 +50,8 @@ public function testAddGetDocument() { $tika = new Tika($this->config, 'Funstaff\TikaBundle\Content\Document', - 'Funstaff\TikaBundle\Content\Metadata'); + 'Funstaff\TikaBundle\Content\Metadata', + $this->getLogger()); $this->assertTrue(is_array($tika->getDocuments())); $this->assertEquals(0, count($tika->getDocuments())); $tika->addDocument('test', __DIR__.'/../documents/test.pdf'); @@ -51,11 +59,23 @@ public function testAddGetDocument() $this->assertInstanceOf('Funstaff\TikaBundle\Content\Document', $tika->getDocument('test')); } + public function testLogging() + { + $tika = new Tika($this->config, + 'Funstaff\TikaBundle\Content\Document', + 'Funstaff\TikaBundle\Content\Metadata', + $this->getLogger()); + $this->assertTrue($tika->getLogging()); + $tika->setLogging(false); + $this->assertFalse($tika->getLogging()); + } + public function testWithNoAddedDocumentExtractContent() { $tika = new Tika($this->config, 'Funstaff\TikaBundle\Content\Document', - 'Funstaff\TikaBundle\Content\Metadata'); + 'Funstaff\TikaBundle\Content\Metadata', + $this->getLogger()); try { $tika->extractContent(); } catch (\InvalidArgumentException $e) { @@ -68,7 +88,8 @@ public function testExtractOutputXml() { $tika = new Tika($this->config, 'Funstaff\TikaBundle\Content\Document', - 'Funstaff\TikaBundle\Content\Metadata'); + 'Funstaff\TikaBundle\Content\Metadata', + $this->getLogger()); $tika->addDocument('test', __DIR__.'/../documents/test.pdf'); $tika->extractContent(); $content = $tika->getDocument('test')->getContent(); @@ -86,7 +107,8 @@ public function testExtractOutputHtml() { $tika = new Tika($this->config, 'Funstaff\TikaBundle\Content\Document', - 'Funstaff\TikaBundle\Content\Metadata'); + 'Funstaff\TikaBundle\Content\Metadata', + $this->getLogger()); $tika->addDocument('test', __DIR__.'/../documents/test.pdf'); $tika->setOutputFormat('html'); $tika->extractContent(); @@ -99,7 +121,8 @@ public function testExtractOutputText() { $tika = new Tika($this->config, 'Funstaff\TikaBundle\Content\Document', - 'Funstaff\TikaBundle\Content\Metadata'); + 'Funstaff\TikaBundle\Content\Metadata', + $this->getLogger()); $tika->addDocument('test', __DIR__.'/../documents/test.pdf'); $tika->setOutputFormat('text'); $tika->extractContent(); @@ -112,7 +135,8 @@ public function testExtractMetadata() { $tika = new Tika($this->config, 'Funstaff\TikaBundle\Content\Document', - 'Funstaff\TikaBundle\Content\Metadata'); + 'Funstaff\TikaBundle\Content\Metadata', + $this->getLogger()); $tika->addDocument('test', __DIR__.'/../documents/test.pdf'); $tika->extractMetadata(); $metadata = $tika->getDocument('test')->getMetadata(); @@ -124,7 +148,8 @@ public function testExtractAll() { $tika = new Tika($this->config, 'Funstaff\TikaBundle\Content\Document', - 'Funstaff\TikaBundle\Content\Metadata'); + 'Funstaff\TikaBundle\Content\Metadata', + $this->getLogger()); $tika->addDocument('test', __DIR__.'/../documents/test.pdf'); $tika->extractAll(); $content = $tika->getDocument('test')->getContent(); @@ -132,4 +157,12 @@ public function testExtractAll() $metadata = $tika->getDocument('test')->getMetadata(); $this->assertEquals('application/pdf', $metadata->get('Content-Type')); } + + protected function getLogger() + { + $logger = new Logger('test'); + $logger->pushHandler(new NullHandler()); + + return $logger; + } } \ No newline at end of file diff --git a/Wrapper/Tika.php b/Wrapper/Tika.php index 4325906..3687450 100644 --- a/Wrapper/Tika.php +++ b/Wrapper/Tika.php @@ -3,6 +3,7 @@ namespace Funstaff\TikaBundle\Wrapper; use Funstaff\TikaBundle\Wrapper\TikaInterface; +use Symfony\Bridge\Monolog\Logger; /** * Tika @@ -21,16 +22,22 @@ class Tika implements TikaInterface protected $metadataClass; + protected $logger; + + protected $logging; + /** * Construct * * @param Array $configuration */ - public function __construct(array $configuration, $documentClass, $metadataClass) + public function __construct(array $configuration, $documentClass, $metadataClass, Logger $logger) { $this->configuration = $configuration; $this->documentClass = $documentClass; $this->metadataClass = $metadataClass; + $this->logger = $logger; + $this->logging = $configuration['logging']; $this->outputFormat = ($configuration['output_format']) ? : 'xml'; } @@ -99,6 +106,26 @@ public function getDocuments() return $this->document; } + /** + * Set Logging + * + * @param boolean $logging + */ + public function setLogging($logging) + { + $this->logging = $logging; + } + + /** + * Get Logging + * + * @return $logging + */ + public function getLogging() + { + return $this->logging; + } + /** * Extract Content * @@ -109,6 +136,9 @@ public function extractContent() ob_start(); $command = $this->generateTikaCommand($this->outputFormat); foreach ($this->document as $doc) { + if ($this->logger && $this->logging) { + $this->logger->debug(sprintf('Tika extract content: %s', $doc->getPath())); + } passthru(sprintf("$command %s", $doc->getPath())); $output = ob_get_clean(); $doc->setContent($output); @@ -125,6 +155,9 @@ public function extractMetadata() ob_start(); $command = $this->generateTikaCommand('meta'); foreach ($this->document as $doc) { + if ($this->logger && $this->logging) { + $this->logger->debug(sprintf('Tika extract metadata: %s', $doc->getPath())); + } passthru(sprintf("$command %s", $doc->getPath())); $output = ob_get_clean(); $doc->setMetadata(new $this->metadataClass($output));