From ebd4dd1cdfd34d475162210e53c432405eaccfd8 Mon Sep 17 00:00:00 2001 From: Nate Chrysler Date: Thu, 10 Nov 2016 16:58:28 -0600 Subject: [PATCH 01/16] added dynamic support for AcroForm objects and reused form fields; added helper method PdfDocument::appendPagesFrom() --- .../ZendPdf/InternalType/AcroFormObject.php | 261 ++++++++++++++++++ .../AcroFormObject/AcroFormFieldWorker.php | 131 +++++++++ .../ZendPdf/InternalType/DictionaryObject.php | 9 + .../ZendPdf/InternalType/IndirectObject.php | 11 +- .../InternalType/IndirectObjectReference.php | 7 +- library/ZendPdf/ObjectFactory.php | 52 +++- library/ZendPdf/PdfDocument.php | 76 ++++- 7 files changed, 538 insertions(+), 9 deletions(-) create mode 100644 library/ZendPdf/InternalType/AcroFormObject.php create mode 100644 library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php diff --git a/library/ZendPdf/InternalType/AcroFormObject.php b/library/ZendPdf/InternalType/AcroFormObject.php new file mode 100644 index 0000000..8150923 --- /dev/null +++ b/library/ZendPdf/InternalType/AcroFormObject.php @@ -0,0 +1,261 @@ + + * @license http://framework.zend.com/license/new-bsd New BSD License + * @package Zend_Pdf + */ + +namespace ZendPdf\InternalType; + +use ZendPdf as Pdf; +use ZendPdf\Exception; +use ZendPdf\Page; +use ZendPdf\ObjectFactory; +use ZendPdf\InternalType\DictionaryObject; +use ZendPdf\InternalType\IndirectObjectReference; +use ZendPdf\InternalType\IndirectObject; +use ZendPdf\InternalType\ArrayObject; + +/** + * PDF file 'AcroForm' element implementation + * + * @category Zend + * @package Zend_PDF + * @subpackage Zend_PDF_Internal + */ +class AcroFormObject +{ + + /** + * Associative array of form fields in this document. + * @var array of DictionaryObject representing each form field + */ + protected $_fields = array(); + + /** + * PDF objects factory. + * + * @var \ZendPdf\ObjectFactory + */ + protected $_objFactory = null; + + /** + * Array of object factories already processed by this form + * @var array where the key is the ObjectFactory->getId() and the value is the ObjectFactory itself + */ +// protected $objFactories = array(); + + /** + * Reference to the primary form DictionaryObject (wrapped in an IndirectObject) + * @var IndirectObject + */ + protected $_primaryFormDict = null; + + /** + * The original form object supplied to the constructor + * @var AbstractTypeObject + */ + protected $_sourceForm = null; + + /** + * Reference to the context extracted from the primary form + * @var IndirectObjectReference\Context + */ + protected $_primaryContext = null; + + /** + * Array of IndirectObjectReference that each point to an AcroForm DictionaryObject + * @var array of IndirectObjectReference objects + */ + public $_formObjReferences = null; + + /** + * A log of events related to processing the form + * @var array + */ + protected $log = array(); + + /** + * The parent form when merging pages from multiple PDFs + * @var AcroFormObject + */ + protected $_parentForm = null; + + /** + * Object constructor + * + * @param IndirectObjectReference $val that points to an IndirectObject, and in turn a DictionaryObject + * @param ObjectFactory $objFactory + * @throws \ZendPdf\Exception\ExceptionInterface + */ + public function __construct($val, ObjectFactory $objFactory) + { + $this->_sourceForm = $val; + $this->_objFactory = $objFactory; + + $this->_formObjReferences = []; + + // find the IndirectObject that contains the DictionaryObject + if ($val !== null) { + $formDict = $val->getObject(); + } else { + $formDict = null; + } + $this->createForm($formDict, $objFactory); + } + + /** + * Create a shared form field object for each source form field in the source form. Note that this + * should only be called in context of the parent AcroFormObject when there are multiple forms/pages + * being merged. + * @param AcroFormObject $formObject or null if it should use $this + */ + public function processFormFields($formObject = null) + { + if ($formObject === null) { + $formObject = $this; + } + + $this->processFormFieldsInFactory($formObject, $this->_objFactory); + + } + + /** + * + * @param \ZendPdf\InternalType\AcroFormObject $formObject + * @param ObjectFactory $factory + */ + private function processFormFieldsInFactory(AcroFormObject $formObject, ObjectFactory $factory) + { + $fieldItems = $factory->getModifiedObjects(); + + // catalog the form fields + foreach ($fieldItems as $io) { + if ($io instanceof IndirectObject) { + /* @var $io IndirectObject */ + if ($io->getType() == IndirectObject::TYPE_DICTIONARY && $this->isFormField($io)) { + $formObject->createFormField($io, $formObject->getObjFactory()); + } + } + } + + foreach ($factory->getAttachedFactories() as $subFactory) { + $this->processFormFieldsInFactory($formObject, $subFactory); + } + } + + /** + * + * @param IndirectObject $obj + * @return boolean + */ + private function isFormField(IndirectObject $obj) + { + if ($obj->Type !== null && $obj->Type->value === "Annot" && $obj->Subtype !== null && $obj->Subtype->value === "Widget") { + return true; + } else { + return false; + } + } + + public function getObjFactory() + { + return $this->_objFactory; + } + + /** + * Adds an AcroForm parameter to the Root object, if this form contains any defined fields + * @param AbstractTypeObject the Root object + * @return IndirectObjectReference + */ + public function createFormReference(AbstractTypeObject $root) + { + /* @var $fields ArrayObject */ + $fields = $this->_primaryFormDict->Fields; + if (count($fields->items) > 0) { + $ref = new IndirectObjectReference($this->_primaryFormDict->getObjNum(), $this->_primaryFormDict->getGenNum(), null, $this->_objFactory); + $root->AcroForm = $ref; + } + } + + public function merge(AcroFormObject $otherForm) + { + foreach ($otherForm->_formObjReferences as $ref) + { + if (!in_array($ref, $this->_formObjReferences, true)) { + /* @var $ref IndirectObjectReference */ + $this->_formObjReferences[] = $ref; + } + } + } + + /** + * + * @param IndirectObject $sourceForm + * @param ObjectFactory $factory + */ + protected function createForm($sourceForm, ObjectFactory $factory) + { + // create a new field object + $dict = new DictionaryObject(); + $dict->Fields = new ArrayObject(); + + // copy font configuration + if ($sourceForm !== null && $sourceForm instanceof IndirectObject) { +// if ($sourceForm->DA !== null) { +// $dict->DA = clone $sourceForm->DA; +// } +// if ($sourceForm->DR !== null) { +// $dict->DR = clone $sourceForm->DR; +// } +// if ($sourceForm->Font !== null) { +// $dict->Font = clone $sourceForm->Font; +// } + } + + // create a shared field object + $objRef = $factory->newObject($dict); + + $this->_primaryFormDict = $objRef; + } + + /** + * Create a new form field OR locate an existing one by the same name. + * @param IndirectObject $widget + * @param ObjectFactory $factory the object factory in which to create any NEW objects (NOTE: this is NOT necessarily the object factory that contains $widget) + * @return IndirectObjectReference a reference to the shared form field + */ + protected function createFormField(IndirectObject $widget, ObjectFactory $factory) + { + $worker = $widget->getFactory()->getAcroFormFieldWorker(); + + if (!$worker->shouldProcessField($factory, $widget)) { + return; + } + + $title = $worker->getTitle($factory, $widget); + + if (array_key_exists($title, $this->_fields)) { + // reuse the existing field + $objRef = $this->_fields[$title]; + + } else { + // create a new dictionary and indirect object + $objRef = $worker->createNewSharedField($factory, $widget, $title, $this->_primaryFormDict); + + $this->_fields[$title] = $objRef; + } + + $worker->linkPageFieldToSharedField($factory, $widget, $objRef); + } + + protected function mergeAndDestroyForm(ObjectFactory $factory, $key, DictionaryObject $dict) + { + throw new \Exception("TODO: merge AcroForm dictionaries"); + } + +} \ No newline at end of file diff --git a/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php b/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php new file mode 100644 index 0000000..b8a6acd --- /dev/null +++ b/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php @@ -0,0 +1,131 @@ +FT !== null && $widget->T !== null) { + return true; + } elseif ($widget->Parent !== null && $widget->Parent->T !== null) { + return true; + } else { + return false; + } + } + + /** + * Generate the title for this form field. This method allows you to deduplicate or merge + * form fields, for example while combining multiple PDF files. + * @param ObjectFactory $targetFactory + * @param IndirectObject $widget + * @return string + */ + public function getTitle(ObjectFactory $targetFactory, IndirectObject $widget) + { + if ($widget->FT !== null && $widget->T !== null) { + $title = $widget->T->value; + } elseif ($widget->Parent !== null && $widget->Parent->T !== null) { + $title = $widget->Parent->T->value; + } else { + $title = null; // this shouldn't ever be called, unless we're sub-classed... if you subclass this, and change the processField() method, this is YOUR responsibility! + } + return $title; + } + + /** + * Create a new DictionaryObject representing the new shared field. + * @param ObjectFactory $targetFactory + * @param IndirectObject $widget + * @param string $title + * @param IndirectObject $formDictionary + * @return IndirectObject + */ + public function createNewSharedField(ObjectFactory $targetFactory, IndirectObject $widget, $title, IndirectObject $formDictionary) + { + $dict = $this->createNewFieldDictionary($widget, $title); + $objRef = $this->createNewFieldIndirectObject($targetFactory, $dict); + + $this->addNewFieldToForm($targetFactory, $objRef, $formDictionary); + + return $objRef; + } + + /** + * @param ObjectFactory $targetFactory + * @param IndirectObject $widget + * @param string $title + * @return DictionaryObject + */ + protected function createNewFieldDictionary(IndirectObject $widget, $title) + { + // create a new field object + $dict = new DictionaryObject(); + $dict->DA = clone $widget->DA; + $dict->FT = clone $widget->FT; + $dict->Kids = new ArrayObject(); + $dict->T = new StringObject($title); + + return $dict; + } + + /** + * @param ObjectFactory $targetFactory + * @param DictionaryObject $dict + * @return IndirectObject + */ + protected function createNewFieldIndirectObject(ObjectFactory $targetFactory, DictionaryObject $dict) + { + $objRef = $targetFactory->newObject($dict); + return $objRef; + } + + /** + * @param ObjectFactory $targetFactory + * @param IndirectObject $objRef + * @param IndirectObject $formDictionary + */ + protected function addNewFieldToForm(ObjectFactory $targetFactory, IndirectObject $objRef, IndirectObject $formDictionary) + { + // add to the form + $ref = new IndirectObjectReference($objRef->getObjNum(), $objRef->getGenNum(), null, $targetFactory); + $formDictionary->Fields->items[] = $ref; + } + + /** + * Update the page-specific field object to point to the new shared field object. + * @param ObjectFactory $targetFactory + * @param IndirectObject $pageField + * @param IndirectObject $sharedField + */ + public function linkPageFieldToSharedField(ObjectFactory $targetFactory, IndirectObject $pageField, IndirectObject $sharedField) + { + // hack up the supplied widget to point to the new shared field + unset($pageField->FT); + unset($pageField->T); + unset($pageField->P); // TODO: link back to Page object + + // create a new reference for the original embedded field + $ior = new IndirectObjectReference($sharedField->getObjNum(), $sharedField->getGenNum(), null, $targetFactory); // as long as this IOR references an object in its own factory, the context can be null + $pageField->Parent = $ior; + $pageField->getFactory()->markAsModified($pageField); + + // add new field usage to the field's Kids array + $sharedField->Kids->items[] = new IndirectObjectReference($pageField->getObjNum(), $pageField->getGenNum(), null, $pageField->getFactory()); + } + +} \ No newline at end of file diff --git a/library/ZendPdf/InternalType/DictionaryObject.php b/library/ZendPdf/InternalType/DictionaryObject.php index 39734f7..0587820 100644 --- a/library/ZendPdf/InternalType/DictionaryObject.php +++ b/library/ZendPdf/InternalType/DictionaryObject.php @@ -108,6 +108,15 @@ public function __set($item, $value) $this->_items[$item] = $value; } } + + /** + * Delete the specified key + * + * @param string $item + */ + public function __unset($item) { + unset($this->_items[$item]); + } /** * Return type of the element. diff --git a/library/ZendPdf/InternalType/IndirectObject.php b/library/ZendPdf/InternalType/IndirectObject.php index 05f1661..065a9cb 100644 --- a/library/ZendPdf/InternalType/IndirectObject.php +++ b/library/ZendPdf/InternalType/IndirectObject.php @@ -50,7 +50,7 @@ class IndirectObject extends AbstractTypeObject * @var \ZendPdf\ObjectFactory */ protected $_factory; - + /** * Object constructor * @@ -187,6 +187,15 @@ public function __set($property, $value) { $this->_value->$property = $value; } + + /** + * Delete the specified key + * + * @param string $property + */ + public function __unset($property) { + unset($this->_value->$property); + } /** * Call handler diff --git a/library/ZendPdf/InternalType/IndirectObjectReference.php b/library/ZendPdf/InternalType/IndirectObjectReference.php index 5c7358a..b9b9e8b 100644 --- a/library/ZendPdf/InternalType/IndirectObjectReference.php +++ b/library/ZendPdf/InternalType/IndirectObjectReference.php @@ -75,7 +75,7 @@ class IndirectObjectReference extends AbstractTypeObject */ public function __construct($objNum, $genNum = 0, - IndirectObjectReference\Context $context, + $context, Pdf\ObjectFactory $factory) { if ( !(is_integer($objNum) && $objNum > 0) ) { @@ -84,6 +84,9 @@ public function __construct($objNum, if ( !(is_integer($genNum) && $genNum >= 0) ) { throw new Exception\RuntimeException('Generation number must be non-negative integer'); } + if ($context !== null && !($context instanceof IndirectObjectReference\Context)) { + throw new Exception\RuntimeException('Supplied context must be of type IndirectObjectReference\Context'); + } $this->_objNum = $objNum; $this->_genNum = $genNum; @@ -91,7 +94,7 @@ public function __construct($objNum, $this->_context = $context; $this->_factory = $factory; } - + /** * Check, that object is generated by specified factory * diff --git a/library/ZendPdf/ObjectFactory.php b/library/ZendPdf/ObjectFactory.php index fc6026b..1920976 100644 --- a/library/ZendPdf/ObjectFactory.php +++ b/library/ZendPdf/ObjectFactory.php @@ -14,6 +14,7 @@ use ZendPdf\Exception; use ZendPdf\InternalType; use ZendPdf\ObjectFactory\UpdateInfoContainer; +use ZendPdf\InternalType\AcroFormObject\AcroFormFieldWorker; /** * PDF element factory. @@ -92,8 +93,14 @@ class ObjectFactory * @var array */ private $_shiftCalculationCache = array(); + + /** + * Suffix to append to any AcroForm fields found in this ObjectFactory + * @var AcroFormFieldWorker + */ + private $_acroFormFieldWorker = null; - + /** * Object constructor * @@ -134,6 +141,25 @@ public function close() } $this->_registeredObjects = null; } + + /** + * @param AcroFormFieldWorker $worker + */ + public function setAcroFormFieldWorker(AcroFormFieldWorker $worker) + { + $this->_acroFormFieldWorker = $worker; + } + + /** + * @return AcroFormFieldWorker + */ + public function getAcroFormFieldWorker() + { + if ($this->_acroFormFieldWorker === null) { + $this->_acroFormFieldWorker = new AcroFormFieldWorker(); + } + return $this->_acroFormFieldWorker; + } /** * Get factory ID @@ -188,9 +214,10 @@ public function attach(ObjectFactory $factory) */ return; } - + $this->_attachedFactories[$factory->getId()] = $factory; } + /** @@ -208,7 +235,8 @@ public function calculateShift(ObjectFactory $factory) if (isset($this->_shiftCalculationCache[$factory->_factoryId])) { return $this->_shiftCalculationCache[$factory->_factoryId]; } - + + // determine our shift based on attached sub-factories $shift = $this->_objectCount - 1; foreach ($this->_attachedFactories as $subFactory) { @@ -385,7 +413,23 @@ public function fetchObject($refString) } return $this->_registeredObjects[$refString]; } - + + /** + * Fetch all the modified objects in an associative array + * @return array + */ + public function getModifiedObjects() + { + return $this->_modifiedObjects; + } + + /** + * Return the attached ObjectFactory objects + */ + public function getAttachedFactories() + { + return $this->_attachedFactories; + } /** * Check if PDF file was modified diff --git a/library/ZendPdf/PdfDocument.php b/library/ZendPdf/PdfDocument.php index 139dcaa..e27a609 100644 --- a/library/ZendPdf/PdfDocument.php +++ b/library/ZendPdf/PdfDocument.php @@ -12,6 +12,8 @@ use Zend\Memory; use ZendPdf\Exception; +use ZendPdf\InternalType\AcroFormObject; +use ZendPdf\Page; /** * General entity which describes PDF document. @@ -145,14 +147,29 @@ class PdfDocument * @var \ZendPdf\PdfParser\StructureParser */ protected $_parser; - - + + /** + * Container and helper class for Acrobat forms and form fields + * @var AcroFormObject + */ + protected $_form; + /** * List of inheritable attributesfor pages tree * * @var array */ protected static $_inheritableAttributes = array('Resources', 'MediaBox', 'CropBox', 'Rotate'); + + /** + * An object that represents the AcroForm + * + * @return AcroFormObject + */ + public function getForm() + { + return $this->_form; + } /** * Request used memory manager @@ -258,6 +275,9 @@ public function __construct($source = null, $revision = null, $load = false) } else { $this->_loadPages($this->_trailer->Root->Pages); } + + // parse any existing form and fields + $this->_form = new AcroFormObject($this->_trailer->Root->AcroForm, $this->_objFactory); $this->_loadNamedDestinations($this->_trailer->Root, $this->_parser->getPDFVersion()); $this->_loadOutlines($this->_trailer->Root); @@ -307,6 +327,9 @@ public function __construct($source = null, $revision = null, $load = false) $trailerDictionary->Size = new InternalType\NumericObject(0); $this->_trailer = new Trailer\Generated($trailerDictionary); + + // create an empty form + $this->_form = new AcroFormObject(null, $this->_objFactory); /** * Document catalog indirect object. @@ -493,6 +516,53 @@ protected function _loadOutlines(InternalType\IndirectObjectReference $root) $this->_originalOpenOutlinesCount = $root->Outlines->Count->value; } } + + /** + * Appends pages from the supplied PDF to the current document + * @param \ZendPdf\PdfDocument $pdf another PDF whose pages and form should be appended to this document + * @param AcroFormObject\AcroFormFieldWorker $formFieldWorker a custom form field worker to be used when generating new shared fields + */ + public function appendPagesFrom(PdfDocument $pdf, $formFieldWorker = null) + { + if ($formFieldWorker !== null && !($formFieldWorker instanceof AcroFormObject\AcroFormFieldWorker)) { + throw new \Exception("Invalid value specified for \$formFieldWorker when calling PdfDocument::appendPagesFrom()"); + } + + // create a new ObjectFactory for these cloned pages + $objFactory = ObjectFactory::createFactory($pdf->_objFactory->getObjectCount()); + if ($formFieldWorker !== null) { + $objFactory->setAcroFormFieldWorker($formFieldWorker); + } + $this->_objFactory->attach($objFactory); + $processed = []; + + /* @var $page \ZendPdf\Page */ + foreach ($pdf->pages as $page) { + /* + * Either of these clone methods will make a page available for use in another doc however + * $page->clonePage() will let us reduce the total number of object factories in play. + */ + $newpage = $page->clonePage($objFactory, $processed); + $this->pages[] = $newpage; + } + } + + /** + * Find any directly placed form fields, and move them to the AcroForm object. Change the DictionaryObject into + * a pointer to that shared field. + */ + protected function _deduplicateFormFields() + { + $this->_form->processFormFields(); + } + + /** + * Add the AcroForm if it has been defined. + */ + protected function _dumpForm() + { + $this->_form->createFormReference($this->_trailer->Root); + } /** * Organize pages to the pages tree structure. @@ -1161,6 +1231,8 @@ public function render($newSegmentOnly = false, $outputStream = null) $this->_trailer->Info = $docInfo; } + $this->_deduplicateFormFields(); + $this->_dumpForm(); $this->_dumpPages(); $this->_dumpNamedDestinations(); $this->_dumpOutlines(); From 370f6fa325e80eab46ae375101ea86b388ff517e Mon Sep 17 00:00:00 2001 From: Nate Chrysler Date: Thu, 10 Nov 2016 18:26:11 -0600 Subject: [PATCH 02/16] Added missing copyright info for the other new file --- .../InternalType/AcroFormObject/AcroFormFieldWorker.php | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php b/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php index b8a6acd..57cf31c 100644 --- a/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php +++ b/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php @@ -1,4 +1,13 @@ + * @license http://framework.zend.com/license/new-bsd New BSD License + * @package Zend_Pdf + */ namespace ZendPdf\InternalType\AcroFormObject; From 6926a196c09a9c8fb8fd2e0552865783c64fe72d Mon Sep 17 00:00:00 2001 From: Nate Chrysler Date: Tue, 15 Nov 2016 15:31:28 -0600 Subject: [PATCH 03/16] properly handle missing but expected dictionary keys in field objects; inherit form-level keys --- .../ZendPdf/InternalType/AcroFormObject.php | 18 +++++++++--------- .../AcroFormObject/AcroFormFieldWorker.php | 8 ++++++-- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/library/ZendPdf/InternalType/AcroFormObject.php b/library/ZendPdf/InternalType/AcroFormObject.php index 8150923..ff654f9 100644 --- a/library/ZendPdf/InternalType/AcroFormObject.php +++ b/library/ZendPdf/InternalType/AcroFormObject.php @@ -206,15 +206,15 @@ protected function createForm($sourceForm, ObjectFactory $factory) // copy font configuration if ($sourceForm !== null && $sourceForm instanceof IndirectObject) { -// if ($sourceForm->DA !== null) { -// $dict->DA = clone $sourceForm->DA; -// } -// if ($sourceForm->DR !== null) { -// $dict->DR = clone $sourceForm->DR; -// } -// if ($sourceForm->Font !== null) { -// $dict->Font = clone $sourceForm->Font; -// } + if ($sourceForm->DA !== null) { + $dict->DA = clone $sourceForm->DA; + } + if ($sourceForm->DR !== null) { + $dict->DR = clone $sourceForm->DR; + } + if ($sourceForm->Font !== null) { + $dict->Font = clone $sourceForm->Font; + } } // create a shared field object diff --git a/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php b/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php index 57cf31c..181cef3 100644 --- a/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php +++ b/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php @@ -84,8 +84,12 @@ protected function createNewFieldDictionary(IndirectObject $widget, $title) { // create a new field object $dict = new DictionaryObject(); - $dict->DA = clone $widget->DA; - $dict->FT = clone $widget->FT; + if ($widget->DA !== null) { + $dict->DA = clone $widget->DA; + } + if ($widget->FT !== null) { + $dict->FT = clone $widget->FT; + } $dict->Kids = new ArrayObject(); $dict->T = new StringObject($title); From f6ba79bddfc947ab9d11651e75ba30722704f246 Mon Sep 17 00:00:00 2001 From: Nate Chrysler Date: Sat, 19 Nov 2016 16:01:29 -0600 Subject: [PATCH 04/16] new helper method to access the PDF version --- library/ZendPdf/PdfDocument.php | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/library/ZendPdf/PdfDocument.php b/library/ZendPdf/PdfDocument.php index e27a609..8b2ab64 100644 --- a/library/ZendPdf/PdfDocument.php +++ b/library/ZendPdf/PdfDocument.php @@ -170,6 +170,15 @@ public function getForm() { return $this->_form; } + + /** + * Returns the PDF version + * @return string + */ + public function getPdfVersion() + { + return $this->_pdfHeaderVersion; + } /** * Request used memory manager From 720e58c4deb2b7fe3aa08d616127c12bc38afc27 Mon Sep 17 00:00:00 2001 From: Nate Chrysler Date: Thu, 29 Dec 2016 14:52:20 -0600 Subject: [PATCH 05/16] WIP - new FormToken and form field data feature (INCOMPLETE) --- .../ZendPdf/InternalType/AcroFormObject.php | 63 +++++++++++++- .../AcroFormObject/AcroFormFieldWorker.php | 14 ++++ .../InternalType/AcroFormObject/FormToken.php | 82 +++++++++++++++++++ 3 files changed, 157 insertions(+), 2 deletions(-) create mode 100644 library/ZendPdf/InternalType/AcroFormObject/FormToken.php diff --git a/library/ZendPdf/InternalType/AcroFormObject.php b/library/ZendPdf/InternalType/AcroFormObject.php index ff654f9..e998bbc 100644 --- a/library/ZendPdf/InternalType/AcroFormObject.php +++ b/library/ZendPdf/InternalType/AcroFormObject.php @@ -19,6 +19,7 @@ use ZendPdf\InternalType\IndirectObjectReference; use ZendPdf\InternalType\IndirectObject; use ZendPdf\InternalType\ArrayObject; +use ZendPdf\InternalType\AcroFormObject\FormToken; /** * PDF file 'AcroForm' element implementation @@ -35,6 +36,12 @@ class AcroFormObject * @var array of DictionaryObject representing each form field */ protected $_fields = array(); + + /** + * Associative array of form tokens to be used when rendering. + * @var array of FormToken objects + */ + protected $_tokens = array(); /** * PDF objects factory. @@ -193,6 +200,42 @@ public function merge(AcroFormObject $otherForm) } } + /** + * Add (or replace) a token. + * @param FormToken $token + */ + public function addToken(FormToken $token) + { + $this->_tokens[$token->getFieldName()] = $token; + } + + /** + * Remove an existing token from the array of tokens. + * @param string $tokenFieldName + */ + public function removeToken($tokenFieldName) + { + unset($this->_tokens[$tokenFieldName]); + } + + /** + * Override any current tokens and set all the tokens supplied by the array. Can be an indexed or associative array, as long as each value is a FormToken object. + * @param array $tokens array of FormToken objects + */ + public function setTokens($tokens) + { + // start with a blank array + $this->_tokens = array(); + + // add each supplied token + foreach ($tokens as $token) + { + if ($token instanceof FormToken) { + $this->addToken($token); + } + } + } + /** * * @param IndirectObject $sourceForm @@ -231,14 +274,23 @@ protected function createForm($sourceForm, ObjectFactory $factory) */ protected function createFormField(IndirectObject $widget, ObjectFactory $factory) { + /* @var $token FormToken */ $worker = $widget->getFactory()->getAcroFormFieldWorker(); + $title = $worker->getTitle($factory, $widget); + $token = (array_key_exists($title, $this->_tokens)) ? $this->_tokens[$title] : null; - if (!$worker->shouldProcessField($factory, $widget)) { + // see if we're replacing this field with read-only text + if ($token !== null && $token->getMode() == FormToken::MODE_REPLACE) { + $worker->replaceField($factory, $widget, $token); return; } - $title = $worker->getTitle($factory, $widget); + // if this field has already been converted to a shared field, leave it be + if (!$worker->shouldProcessField($factory, $widget)) { + return; + } + // set up the shared form field object if (array_key_exists($title, $this->_fields)) { // reuse the existing field $objRef = $this->_fields[$title]; @@ -250,6 +302,13 @@ protected function createFormField(IndirectObject $widget, ObjectFactory $factor $this->_fields[$title] = $objRef; } + // populate the default value + if ($token !== null && $token->getMode() == FormToken::MODE_FILL) { + // apply the value to both the original field and the shared field + $widget->V = new StringObject($token->getValue()); + $objRef->V = new StringObject($token->getValue()); + } + $worker->linkPageFieldToSharedField($factory, $widget, $objRef); } diff --git a/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php b/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php index 181cef3..d69cca4 100644 --- a/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php +++ b/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php @@ -17,6 +17,7 @@ use ZendPdf\InternalType\DictionaryObject; use ZendPdf\InternalType\ArrayObject; use ZendPdf\InternalType\StringObject; +use ZendPdf\InternalType\AcroFormObject\FormToken; class AcroFormFieldWorker { @@ -37,6 +38,17 @@ public function shouldProcessField(ObjectFactory $targetFactory, IndirectObject } } + /** + * Replace the existing form field with a read-only text block, using the same text formatting and positioning. + * @param ObjectFactory $targetFactory + * @param IndirectObject $widget + * @param FormToken $token + */ + public function replaceField(ObjectFactory $targetFactory, IndirectObject $widget, FormToken $token) + { + //TODO: + } + /** * Generate the title for this form field. This method allows you to deduplicate or merge * form fields, for example while combining multiple PDF files. @@ -82,6 +94,8 @@ public function createNewSharedField(ObjectFactory $targetFactory, IndirectObjec */ protected function createNewFieldDictionary(IndirectObject $widget, $title) { + // NOTE: do not move the value (V) attribute into a shared field dictionary + // create a new field object $dict = new DictionaryObject(); if ($widget->DA !== null) { diff --git a/library/ZendPdf/InternalType/AcroFormObject/FormToken.php b/library/ZendPdf/InternalType/AcroFormObject/FormToken.php new file mode 100644 index 0000000..770e881 --- /dev/null +++ b/library/ZendPdf/InternalType/AcroFormObject/FormToken.php @@ -0,0 +1,82 @@ + + * @license http://framework.zend.com/license/new-bsd New BSD License + * @package Zend_Pdf + */ + +namespace ZendPdf\InternalType\AcroFormObject; + +use ZendPdf\ObjectFactory; +use ZendPdf\InternalType\IndirectObject; +use ZendPdf\InternalType\IndirectObjectReference; +use ZendPdf\InternalType\DictionaryObject; +use ZendPdf\InternalType\ArrayObject; +use ZendPdf\InternalType\StringObject; + +class FormToken { + + /** + * The FILL mode pre-populates the form field with the requested value. + */ + const MODE_FILL = "fill"; + + /** + * The REPLACE mode replaces the form field with text using the same position, font, and sizing. + */ + const MODE_REPLACE = "replace"; + + private $fieldName; + + private $value; + + private $mode; + + /** + * Create a new FormToken object, representing a value to be used in this AcroForm. + * @param string $fieldName the name of the form field that should be affected by this token + * @param string $value the value to use + * @param constant $mode one of FormToken::MODE_FILL or FormToken::MODE_REPLACE + */ + public function __construct($fieldName, $value, $mode) { + $this->fieldName = $fieldName; + $this->value = $value; + if ($mode == self::MODE_FILL || $mode == self::MODE_REPLACE) { + $this->mode = $mode; + } else { + throw new \ZendPdf\Exception\NotImplementedException("Unknown mode supplied: " . $mode); + } + } + + /** + * Returns the supplied field name. + * @return string + */ + public function getFieldName() + { + return $this->fieldName; + } + + /** + * Returns the supplied value. + * @return string + */ + public function getValue() + { + return $this->value; + } + + /** + * Returns the supplied mode constant - one of FormToken::MODE_FILL or FormToken::MODE_REPLACE + * @return constant + */ + public function getMode() + { + return $this->mode; + } + +} From 624b9f0d29f014b3472cf67a23dbbe76b321baaa Mon Sep 17 00:00:00 2001 From: Nate Chrysler Date: Sat, 31 Dec 2016 14:11:00 -0600 Subject: [PATCH 06/16] finished the FormToken::MODE_REPLACE feature; removed MODE_FILL for the time being (difficulties with V attributes vs storing the value in the associated stream value); --- .../ZendPdf/InternalType/AcroFormObject.php | 101 +++++++++++++++--- .../InternalType/AcroFormObject/FormToken.php | 35 +++++- .../ZendPdf/InternalType/IndirectObject.php | 10 ++ library/ZendPdf/Page.php | 72 +++++++++++++ library/ZendPdf/PdfDocument.php | 9 ++ 5 files changed, 210 insertions(+), 17 deletions(-) diff --git a/library/ZendPdf/InternalType/AcroFormObject.php b/library/ZendPdf/InternalType/AcroFormObject.php index e998bbc..66b036e 100644 --- a/library/ZendPdf/InternalType/AcroFormObject.php +++ b/library/ZendPdf/InternalType/AcroFormObject.php @@ -33,7 +33,7 @@ class AcroFormObject /** * Associative array of form fields in this document. - * @var array of DictionaryObject representing each form field + * @var array of IndirectObject representing each form field */ protected $_fields = array(); @@ -155,6 +155,88 @@ private function processFormFieldsInFactory(AcroFormObject $formObject, ObjectFa } } + /** + * Process the supplied FormToken objects to replace form fields with read-only values. + * @param array $pages array of Page objects in the current document + * @param AcroFormObject $formObject or null if it should use $this + */ + public function replaceTokens($pages) + { + // loop through supplied tokens, find existing form fields, find and replace the field's instances with text blocks, delete the field references and any pointers in the ObjectFactory + /* @var $token FormToken */ + /* @var $field IndirectObject */ + foreach ($this->_tokens as $token) { + $fieldName = $token->getFieldName(); + if (array_key_exists($fieldName, $this->_fields)) { + $field = $this->_fields[$fieldName]; + + // the Kids property contains references to field instances, and each field instance's Parent property refers to the shared field + if ($field->Kids instanceof ArrayObject) { + /* @var $idr IndirectObjectReference */ + $i=0; + /* @var $items \ArrayObject */ + $items = $field->Kids->items; + foreach ($items as $idr) { + $io = $idr->getObject(); + /* + * Source properties that will be needed: + * DA = text style + * Rect = positioning + * P = page (note it's not always available - why?) + * Options for text block: + * - get the page, call drawText()? + * - repliace what happens in drawText()? + */ + $da = $idr->DA; // example: "/TiRo 8 Tf 0 g" + $rect = $idr->Rect; + $p = $idr->P; + $this->log[] = "processReplaceTokens(): Retrieved the field instance data"; + + if ($p === null) { + // we gotta go find the page now... + /* @var $page Page */ + foreach ($pages as $page) { + if ($page->findAnnotation($io)) { + $p = $page; + break; + } + } + } + if ($p !== null) { + /* @var $p Page */ + // draw some text! + + // parse font information from DA + $reg = '/([0-9]+) Tf/'; + $matches = []; + $reg_result = preg_match($reg, $da->toString(), $matches); + if ($reg_result == 1) { + // get the font size + $size = $matches[1]; + // TODO: also parse font name + $p->setFont(new \ZendPdf\Resource\Font\Simple\Standard\TimesRoman(), intval($size)); + } elseif ($p->getFont() === null) { + // default to Times-Roman 10 + $p->setFont(new \ZendPdf\Resource\Font\Simple\Standard\TimesRoman(), 10); + } + + $p->drawTextAt($token->getValue(), $io, $token->getOffsetX(), $token->getOffsetY()); + } + + $io->getFactory()->remove($io); + } + + // remove all the field instances - empty the array + $field->Kids->items = new \ArrayObject(); + } + + // remove the field from its factory + $field->getFactory()->remove($field); + + } + } + } + /** * * @param IndirectObject $obj @@ -279,12 +361,6 @@ protected function createFormField(IndirectObject $widget, ObjectFactory $factor $title = $worker->getTitle($factory, $widget); $token = (array_key_exists($title, $this->_tokens)) ? $this->_tokens[$title] : null; - // see if we're replacing this field with read-only text - if ($token !== null && $token->getMode() == FormToken::MODE_REPLACE) { - $worker->replaceField($factory, $widget, $token); - return; - } - // if this field has already been converted to a shared field, leave it be if (!$worker->shouldProcessField($factory, $widget)) { return; @@ -303,11 +379,12 @@ protected function createFormField(IndirectObject $widget, ObjectFactory $factor } // populate the default value - if ($token !== null && $token->getMode() == FormToken::MODE_FILL) { - // apply the value to both the original field and the shared field - $widget->V = new StringObject($token->getValue()); - $objRef->V = new StringObject($token->getValue()); - } + // note: FormToken:MODE_REPLACE is handled separately, after the form fields are merged. @see replaceTokens() +// if ($token !== null && $token->getMode() == FormToken::MODE_FILL) { +// // apply the value to both the original field and the shared field +// $widget->V = new StringObject($token->getValue()); +// $objRef->V = new StringObject($token->getValue()); +// } $worker->linkPageFieldToSharedField($factory, $widget, $objRef); } diff --git a/library/ZendPdf/InternalType/AcroFormObject/FormToken.php b/library/ZendPdf/InternalType/AcroFormObject/FormToken.php index 770e881..d3fcbfb 100644 --- a/library/ZendPdf/InternalType/AcroFormObject/FormToken.php +++ b/library/ZendPdf/InternalType/AcroFormObject/FormToken.php @@ -23,7 +23,7 @@ class FormToken { /** * The FILL mode pre-populates the form field with the requested value. */ - const MODE_FILL = "fill"; +// const MODE_FILL = "fill"; /** * The REPLACE mode replaces the form field with text using the same position, font, and sizing. @@ -36,16 +36,23 @@ class FormToken { private $mode; + private $offsetX = 0; + + private $offsetY = 0; + /** * Create a new FormToken object, representing a value to be used in this AcroForm. * @param string $fieldName the name of the form field that should be affected by this token * @param string $value the value to use - * @param constant $mode one of FormToken::MODE_FILL or FormToken::MODE_REPLACE + * @param constant $mode one of FormToken::MODE_* constants */ - public function __construct($fieldName, $value, $mode) { + public function __construct($fieldName, $value, $mode, $offsetX=0, $offsetY=0) { $this->fieldName = $fieldName; $this->value = $value; - if ($mode == self::MODE_FILL || $mode == self::MODE_REPLACE) { + $this->offsetX = $offsetX; + $this->offsetY = $offsetY; + + if ($mode == self::MODE_REPLACE) { // $mode == self::MODE_FILL || $this->mode = $mode; } else { throw new \ZendPdf\Exception\NotImplementedException("Unknown mode supplied: " . $mode); @@ -71,7 +78,7 @@ public function getValue() } /** - * Returns the supplied mode constant - one of FormToken::MODE_FILL or FormToken::MODE_REPLACE + * Returns the supplied mode constant - one of FormToken::MODE_* constants * @return constant */ public function getMode() @@ -79,4 +86,22 @@ public function getMode() return $this->mode; } + /** + * When replacing the form field with read-only text, use this offset for positioning the new text + * @return integer + */ + public function getOffsetX() + { + return $this->offsetX; + } + + /** + * When replacing the form field with read-only text, use this offset for positioning the new text + * @return integer + */ + public function getOffsetY() + { + return $this->offsetY; + } + } diff --git a/library/ZendPdf/InternalType/IndirectObject.php b/library/ZendPdf/InternalType/IndirectObject.php index 065a9cb..3ec9716 100644 --- a/library/ZendPdf/InternalType/IndirectObject.php +++ b/library/ZendPdf/InternalType/IndirectObject.php @@ -97,6 +97,16 @@ public function getFactory() { return $this->_factory; } + + /** + * Check if the supplied object factory is our same object factory + * @param ObjectFactory $fact + * @return boolean + */ + public function compareFactory(ObjectFactory $fact) + { + return $this->_factory === $fact; + } /** * Return type of the element. diff --git a/library/ZendPdf/Page.php b/library/ZendPdf/Page.php index cc5473a..d506909 100644 --- a/library/ZendPdf/Page.php +++ b/library/ZendPdf/Page.php @@ -12,6 +12,7 @@ use ZendPdf\Exception; use ZendPdf\InternalType; +use ZendPdf\InternalType\IndirectObject; /** * PDF Page @@ -1591,6 +1592,46 @@ public function drawText($text, $x, $y, $charEncoding = '') return $this; } + + /** + * Draw a line of text at the location of the supplied object + * @param string $text the text to draw + * @param IndirectObject $locationObj the object to use for positioning the text + * @param integer $offsetX the X offset for placement + * @param integer $offsetY the Y offset for placement + * @return \ZendPdf\Page + * @throws Exception\LogicException + */ + public function drawTextAt($text, IndirectObject $locationObj, $offsetX=0, $offsetY=0) + { + $da = $locationObj->DA; + if ($da === null && $this->_font === null) { + throw new Exception\LogicException('Font has not been set and was not found in location object'); + } + if ($this->_font === null) { + throw new Exception\LogicException('Font has not been set'); + } + + /* @var $rect \ZendPdf\InternalType\ArrayObject */ + $rect = $locationObj->Rect; + if ($rect === null) { + throw new Exception\LogicException('Location Rect not available in location object'); + } + + $this->_addProcSet('Text'); + + $charEncoding = ''; + $textObj = new InternalType\StringObject($this->_font->encodeString($text, $charEncoding)); + $xObj = intval($rect->items[0]->toString()) + $offsetX; + $yObj = intval($rect->items[1]->toString()) + $offsetY; + + $this->_contents .= "BT\n" + . $xObj . ' ' . $yObj . " Td\n" + . $textObj->toString() . " Tj\n" + . "ET\n"; + + return $this; + } /** * @@ -1619,6 +1660,37 @@ public function attachAnnotation(Annotation\AbstractAnnotation $annotation) return $this; } + + /** + * @return array of annotations (including form field references) in this Page + */ + public function getAnnotations() + { + if ($this->_pageDictionary->Annots === null) { + return []; + } else { + return $this->_pageDictionary->Annots->items; + } + } + + /** + * Find an annotation by object reference number + * @return object the annotation + */ + public function findAnnotation(IndirectObject $object) + { + // short circuit for no annotations + if ($this->_pageDictionary->Annots === null) { + return false; + } + // lets find a match + foreach ($this->_pageDictionary->Annots->items as $annot) { + if ($annot === $object) { + return true; + } + } + return false; + } /** * Return the height of this page in points. diff --git a/library/ZendPdf/PdfDocument.php b/library/ZendPdf/PdfDocument.php index 8b2ab64..7ac60d3 100644 --- a/library/ZendPdf/PdfDocument.php +++ b/library/ZendPdf/PdfDocument.php @@ -565,6 +565,14 @@ protected function _deduplicateFormFields() $this->_form->processFormFields(); } + /** + * Find any matching form fields and replace them with read-only text blocks. + */ + protected function _replaceTokens() + { + $this->_form->replaceTokens($this->pages); + } + /** * Add the AcroForm if it has been defined. */ @@ -1241,6 +1249,7 @@ public function render($newSegmentOnly = false, $outputStream = null) } $this->_deduplicateFormFields(); + $this->_replaceTokens(); $this->_dumpForm(); $this->_dumpPages(); $this->_dumpNamedDestinations(); From 41a8c67fe7d5ec602d64c9d3fc540fdfed2d0bd0 Mon Sep 17 00:00:00 2001 From: Nate Chrysler Date: Sat, 31 Dec 2016 15:55:18 -0600 Subject: [PATCH 07/16] bug fix for possible null DA value --- library/ZendPdf/InternalType/AcroFormObject.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/ZendPdf/InternalType/AcroFormObject.php b/library/ZendPdf/InternalType/AcroFormObject.php index 66b036e..b709a1b 100644 --- a/library/ZendPdf/InternalType/AcroFormObject.php +++ b/library/ZendPdf/InternalType/AcroFormObject.php @@ -209,7 +209,7 @@ public function replaceTokens($pages) // parse font information from DA $reg = '/([0-9]+) Tf/'; $matches = []; - $reg_result = preg_match($reg, $da->toString(), $matches); + $reg_result = ($da === null) ? 0 : preg_match($reg, $da->toString(), $matches); if ($reg_result == 1) { // get the font size $size = $matches[1]; From 2150ac4cf26fe8c21a11327f497f2828fd46c641 Mon Sep 17 00:00:00 2001 From: Nate Chrysler Date: Mon, 2 Jan 2017 15:19:53 -0600 Subject: [PATCH 08/16] added center and right alignment support for auto-fill form fields; started prepping for finding and using the font specified in the form field; --- .../ZendPdf/InternalType/AcroFormObject.php | 62 ++++++++++++++----- library/ZendPdf/Page.php | 54 +++++++++++++++- library/ZendPdf/PdfDocument.php | 4 +- 3 files changed, 99 insertions(+), 21 deletions(-) diff --git a/library/ZendPdf/InternalType/AcroFormObject.php b/library/ZendPdf/InternalType/AcroFormObject.php index b709a1b..cf773c7 100644 --- a/library/ZendPdf/InternalType/AcroFormObject.php +++ b/library/ZendPdf/InternalType/AcroFormObject.php @@ -14,6 +14,7 @@ use ZendPdf as Pdf; use ZendPdf\Exception; use ZendPdf\Page; +use ZendPdf\Font; use ZendPdf\ObjectFactory; use ZendPdf\InternalType\DictionaryObject; use ZendPdf\InternalType\IndirectObjectReference; @@ -31,6 +32,12 @@ class AcroFormObject { + /** + * The owning PDF Document. + * @var Pdf\PdfDocument + */ + protected $_pdf; + /** * Associative array of form fields in this document. * @var array of IndirectObject representing each form field @@ -99,8 +106,9 @@ class AcroFormObject * @param ObjectFactory $objFactory * @throws \ZendPdf\Exception\ExceptionInterface */ - public function __construct($val, ObjectFactory $objFactory) + public function __construct(Pdf\PdfDocument $pdf, $val, ObjectFactory $objFactory) { + $this->_pdf = $pdf; $this->_sourceForm = $val; $this->_objFactory = $objFactory; @@ -188,7 +196,6 @@ public function replaceTokens($pages) * - repliace what happens in drawText()? */ $da = $idr->DA; // example: "/TiRo 8 Tf 0 g" - $rect = $idr->Rect; $p = $idr->P; $this->log[] = "processReplaceTokens(): Retrieved the field instance data"; @@ -205,21 +212,9 @@ public function replaceTokens($pages) if ($p !== null) { /* @var $p Page */ // draw some text! + list($font, $size) = $this->getFontAndSize($da); - // parse font information from DA - $reg = '/([0-9]+) Tf/'; - $matches = []; - $reg_result = ($da === null) ? 0 : preg_match($reg, $da->toString(), $matches); - if ($reg_result == 1) { - // get the font size - $size = $matches[1]; - // TODO: also parse font name - $p->setFont(new \ZendPdf\Resource\Font\Simple\Standard\TimesRoman(), intval($size)); - } elseif ($p->getFont() === null) { - // default to Times-Roman 10 - $p->setFont(new \ZendPdf\Resource\Font\Simple\Standard\TimesRoman(), 10); - } - + $p->setFont($font, $size); $p->drawTextAt($token->getValue(), $io, $token->getOffsetX(), $token->getOffsetY()); } @@ -237,6 +232,41 @@ public function replaceTokens($pages) } } + /** + * Extract the font styling from the supplied string. + * @param string $da Font styling string (e.g. Helv 12 Tf 0 g) typically found in the DA attribute on a PDF element. + * @return list($font, $size, $g) + */ + private function getFontAndSize($da) + { +// $fonts = $this->_pdf->extractFonts(); + + $font = null; + // parse font information from DA + $reg = '/^\(\\/(.*?) ([0-9]+) Tf ([0-9]+) g\)$/'; + $matches = []; + + $da_str = ($da === null) ? "" : $da->toString(); + $reg_result = preg_match($reg, $da_str, $matches); + if ($reg_result == 1) { + // get the font size + $fontName = $matches[1]; + // TODO: properly look up font names. E.g. $fontName might be "TiRo", and there is an + // xref SOMEWHERE that we can use that looks like this: <> + $font = $this->_pdf->extractFont($fontName); + $size = intval($matches[2]); + $g = intval($matches[3]); + } else { + // defaults + $size = 10; + $g = 0; + } + if ($font === null) { + $font = new \ZendPdf\Resource\Font\Simple\Standard\TimesRoman(); + } + return [$font, $size, $g]; + } + /** * * @param IndirectObject $obj diff --git a/library/ZendPdf/Page.php b/library/ZendPdf/Page.php index d506909..107a0d3 100644 --- a/library/ZendPdf/Page.php +++ b/library/ZendPdf/Page.php @@ -13,6 +13,7 @@ use ZendPdf\Exception; use ZendPdf\InternalType; use ZendPdf\InternalType\IndirectObject; +use ZendPdf\InternalType\NumericObject; /** * PDF Page @@ -1622,9 +1623,36 @@ public function drawTextAt($text, IndirectObject $locationObj, $offsetX=0, $offs $charEncoding = ''; $textObj = new InternalType\StringObject($this->_font->encodeString($text, $charEncoding)); - $xObj = intval($rect->items[0]->toString()) + $offsetX; - $yObj = intval($rect->items[1]->toString()) + $offsetY; - + + // read the Rect object and get actual numbers we can use + $loc = []; + foreach ($rect->items as $idx => $item) { + $loc[$idx] = intval($item->toString()); + } + + // determine horizontal alignment + /* @var $align NumericObject */ + $align = $locationObj->Q; + $aligned = false; + if ($align !== null) { + // measure the text we're about to draw + $width = $this->getTextWidth($text); + // calculate the position based on the horizontal alignment specified + if ($align->value == "1") { // centered + $xObj = $loc[0] + ((($loc[2]-$loc[0])/2) - ($width/2)); // ignore the offset since that is considered padding and we're centering + $aligned = true; + } elseif ($align->value == "2") { // right + $xObj = $loc[2] - $width - $offsetX; + $aligned = true; + } + } + if (!$aligned) { // left + $xObj = $loc[0] + $offsetX; + } + + // Y is always the same regardless of horizontal alignment + $yObj = $loc[1] + $offsetY; + $this->_contents .= "BT\n" . $xObj . ' ' . $yObj . " Td\n" . $textObj->toString() . " Tj\n" @@ -1632,6 +1660,26 @@ public function drawTextAt($text, IndirectObject $locationObj, $offsetX=0, $offs return $this; } + + /** + * Calculates the width of the supplied line of text based on the current $_font and $_fontSize. + * @param string $text + */ + public function getTextWidth($text) + { + if ($this->_font === null) { + throw new Exception\LogicException('Font has not been set'); + } + $drawing_text = iconv('', 'UTF-8', $text); + $characters = array(); + for ($i = 0; $i < strlen($drawing_text); $i++) { + $characters[] = ord($drawing_text[$i]); + } + $glyphs = $this->_font->glyphNumbersForCharacters($characters); + $widths = $this->_font->widthsForGlyphs($glyphs); + $text_width = (array_sum($widths) / $this->_font->getUnitsPerEm()) * $this->_fontSize; + return $text_width; + } /** * diff --git a/library/ZendPdf/PdfDocument.php b/library/ZendPdf/PdfDocument.php index 7ac60d3..cc01b58 100644 --- a/library/ZendPdf/PdfDocument.php +++ b/library/ZendPdf/PdfDocument.php @@ -286,7 +286,7 @@ public function __construct($source = null, $revision = null, $load = false) } // parse any existing form and fields - $this->_form = new AcroFormObject($this->_trailer->Root->AcroForm, $this->_objFactory); + $this->_form = new AcroFormObject($this, $this->_trailer->Root->AcroForm, $this->_objFactory); $this->_loadNamedDestinations($this->_trailer->Root, $this->_parser->getPDFVersion()); $this->_loadOutlines($this->_trailer->Root); @@ -338,7 +338,7 @@ public function __construct($source = null, $revision = null, $load = false) $this->_trailer = new Trailer\Generated($trailerDictionary); // create an empty form - $this->_form = new AcroFormObject(null, $this->_objFactory); + $this->_form = new AcroFormObject($this, null, $this->_objFactory); /** * Document catalog indirect object. From e512d098bde078230afa18f263fdbb3459aa0c5e Mon Sep 17 00:00:00 2001 From: Nate Chrysler Date: Thu, 5 Jan 2017 17:22:00 -0600 Subject: [PATCH 09/16] bug fix for PDFs that couldn't be printed via Acrobat - was outputting references to objects that weren't included in the PDF --- .../ZendPdf/InternalType/AcroFormObject.php | 57 +++++++++++++++++-- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/library/ZendPdf/InternalType/AcroFormObject.php b/library/ZendPdf/InternalType/AcroFormObject.php index cf773c7..2741a9e 100644 --- a/library/ZendPdf/InternalType/AcroFormObject.php +++ b/library/ZendPdf/InternalType/AcroFormObject.php @@ -175,15 +175,19 @@ public function replaceTokens($pages) /* @var $field IndirectObject */ foreach ($this->_tokens as $token) { $fieldName = $token->getFieldName(); + if (array_key_exists($fieldName, $this->_fields)) { $field = $this->_fields[$fieldName]; + $kids_num = 0; + $kids_removed = 0; // the Kids property contains references to field instances, and each field instance's Parent property refers to the shared field if ($field->Kids instanceof ArrayObject) { /* @var $idr IndirectObjectReference */ - $i=0; + $i=0; /* @var $items \ArrayObject */ $items = $field->Kids->items; + $kids_num = count($items);// TODO: count properly foreach ($items as $idr) { $io = $idr->getObject(); /* @@ -216,20 +220,61 @@ public function replaceTokens($pages) $p->setFont($font, $size); $p->drawTextAt($token->getValue(), $io, $token->getOffsetX(), $token->getOffsetY()); + + // remove the existing field + $io->getFactory()->remove($io); + + // remove the field annotation from the page + try { + /* @var $annots \ArrayObject */ + $annots = $p->getPageDictionary()->Annots->items; + $this->spliceArrayObject($annots, $io); + } catch (\Exception $ex) { + // continue with a warning + $this->log[] = "WARNING: replaceTokens() error while locating Page Annots for field instance: " . $ex->getMessage(); + } + + $kids_removed++; } - - $io->getFactory()->remove($io); } // remove all the field instances - empty the array $field->Kids->items = new \ArrayObject(); } - // remove the field from its factory - $field->getFactory()->remove($field); - + if ($kids_removed == $kids_num) { + // remove the field from its factory + $field->getFactory()->remove($field); + + // remove the field from our lookup array + unset($this->_fields[$fieldName]); + + try { + // remove the field from the form dictionary + $fields = $this->_primaryFormDict->Fields->items; + $this->spliceArrayObject($fields, $field); + } catch (\Exception $ex) { + // continue with a warning + $this->log[] = "WARNING: replaceTokens() error while locating AcroForm Fields for field instance: " . $ex->getMessage(); + } + } + } + } + } + + private function spliceArrayObject(\ArrayObject $array, IndirectObject $remove) + { + $keep = array(); + foreach ($array as $item) { + if ($item === $remove) { + // skip + } elseif ($item instanceof IndirectObjectReference && $item->getObject() === $remove) { + // skip + } else { + $keep[] = $item; } } + $array->exchangeArray($keep); } /** From 97ebbfcb1466b74abc7f1c0696de54db93c36482 Mon Sep 17 00:00:00 2001 From: Crysta McKenney Date: Tue, 21 Mar 2017 13:56:19 -0500 Subject: [PATCH 10/16] added the ability for multiline tokens. drawTextAt() was not recognizing line breaks for tokens, so exploding on "\n" to create an array of lines then calling drawTextAt() for each line --- .../ZendPdf/InternalType/AcroFormObject.php | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/library/ZendPdf/InternalType/AcroFormObject.php b/library/ZendPdf/InternalType/AcroFormObject.php index 2741a9e..3e0cadf 100644 --- a/library/ZendPdf/InternalType/AcroFormObject.php +++ b/library/ZendPdf/InternalType/AcroFormObject.php @@ -219,7 +219,24 @@ public function replaceTokens($pages) list($font, $size) = $this->getFontAndSize($da); $p->setFont($font, $size); - $p->drawTextAt($token->getValue(), $io, $token->getOffsetX(), $token->getOffsetY()); + + //line breaks are not recognized when drawing the text, explode into array on \n and draw each line separately + $text = $token->getValue(); + $lines = array(); + foreach (explode("\n", $text) as $line) { + $lines[] = $line; + } + + $offsetY = $token->getOffsetY(); + //draws from the bottom up so reverse the array to start with the last line + $reverse_lines = array_reverse($lines); + + foreach ( $reverse_lines as $line ) { + $p->drawTextAt($line, $io, $token->getOffsetX(), $offsetY); + $offsetY = $offsetY + $size;//go up to next line based on font size + } + //original line calling draw only once +// $p->drawTextAt($token->getValue(), $io, $token->getOffsetX(), $token->getOffsetY()); // remove the existing field $io->getFactory()->remove($io); From ab013710d0a6f1b9fab9949afcfddbbc6da2e50a Mon Sep 17 00:00:00 2001 From: Crysta McKenney Date: Wed, 29 Mar 2017 17:46:55 -0500 Subject: [PATCH 11/16] added ability to wrap text based either on the width of the token field or line breaks inserted into the text new constant of MODE_REPLACE_WRAP moved getting the location array into a helper function --- .../ZendPdf/InternalType/AcroFormObject.php | 60 +++++++++++++++++-- .../InternalType/AcroFormObject/FormToken.php | 7 ++- library/ZendPdf/Page.php | 28 +++++---- 3 files changed, 78 insertions(+), 17 deletions(-) diff --git a/library/ZendPdf/InternalType/AcroFormObject.php b/library/ZendPdf/InternalType/AcroFormObject.php index 3e0cadf..495a39c 100644 --- a/library/ZendPdf/InternalType/AcroFormObject.php +++ b/library/ZendPdf/InternalType/AcroFormObject.php @@ -163,6 +163,52 @@ private function processFormFieldsInFactory(AcroFormObject $formObject, ObjectFa } } + /* + * @param int $width - width of bounding box + * @param obj $p - page object + * @param string $text - text to be wrapped + * @return array $lines + */ + public function wrapText($width, $p, $text){ + //start the array + $lines = []; + + // $lineText is the line of text that we will ultimately write out - we may write more than one line + $lineText = ''; + // Preserve leading spaces (otherwise we'll lose them at the next step) + for( $i = 0, $m = strlen( $text ); $i < $m && $text[$i] == ' '; $i++ ){ + $lineText .= ' '; + } + // Break up paragraph into individual words using space as the delimiter + preg_match_all( '/([^\s]*\s*)/i', $text, $matches ); + $words = $matches[1]; + //get keys + $wordKeys = array_keys($words); + //get the last word + $lastWordKey = array_pop($wordKeys); + $lineWidth = $p->getTextWidth($lineText); + + foreach( $words as $key => $word ){ + // there may be some stray carriage returns in there, which we will strip out. + $word = str_replace( "\x0a", ' ', $word ); + $wordWidth = $p->getTextWidth( $word ); + //see if we are continuing on the same line or need to go down one + if ( ($lineWidth + $wordWidth < $width) && $word != '\n' && $key != $lastWordKey){ + //stay on this line + $lineText .= $word; + $lineWidth += $wordWidth; + }else{ + //finish the line + $lines[] = $lineText; + // start the next line + $lineText = $word; + $lineWidth = $wordWidth; + } + } + + return $lines; + } + /** * Process the supplied FormToken objects to replace form fields with read-only values. * @param array $pages array of Page objects in the current document @@ -219,12 +265,18 @@ public function replaceTokens($pages) list($font, $size) = $this->getFontAndSize($da); $p->setFont($font, $size); - - //line breaks are not recognized when drawing the text, explode into array on \n and draw each line separately $text = $token->getValue(); $lines = array(); - foreach (explode("\n", $text) as $line) { - $lines[] = $line; + $mode = $token->getMode(); + if($mode == FormToken::MODE_REPLACE){ + //explode into array on \n and draw each line separately + foreach (explode("\n", $text) as $line) { + $lines[] = $line; + } + } else if($mode == FormToken::MODE_REPLACE_WRAP){ + // get location array + $loc = $p->getLocationArray($io); + $lines = $this->wrapText($loc[2], $p, $text); } $offsetY = $token->getOffsetY(); diff --git a/library/ZendPdf/InternalType/AcroFormObject/FormToken.php b/library/ZendPdf/InternalType/AcroFormObject/FormToken.php index d3fcbfb..09ccd5f 100644 --- a/library/ZendPdf/InternalType/AcroFormObject/FormToken.php +++ b/library/ZendPdf/InternalType/AcroFormObject/FormToken.php @@ -28,7 +28,8 @@ class FormToken { /** * The REPLACE mode replaces the form field with text using the same position, font, and sizing. */ - const MODE_REPLACE = "replace"; + const MODE_REPLACE = "replace";//won't wrap text but will go down a line if \n is provided in the string + const MODE_REPLACE_WRAP = "replace_wrap";//will wrap text either at edge of token field or at any \n provided private $fieldName; @@ -45,6 +46,8 @@ class FormToken { * @param string $fieldName the name of the form field that should be affected by this token * @param string $value the value to use * @param constant $mode one of FormToken::MODE_* constants + * @param int $offsetX + * @param int $offsetY */ public function __construct($fieldName, $value, $mode, $offsetX=0, $offsetY=0) { $this->fieldName = $fieldName; @@ -52,7 +55,7 @@ public function __construct($fieldName, $value, $mode, $offsetX=0, $offsetY=0) { $this->offsetX = $offsetX; $this->offsetY = $offsetY; - if ($mode == self::MODE_REPLACE) { // $mode == self::MODE_FILL || + if ($mode == self::MODE_REPLACE || $mode == self::MODE_REPLACE_WRAP) { // $mode == self::MODE_FILL || $this->mode = $mode; } else { throw new \ZendPdf\Exception\NotImplementedException("Unknown mode supplied: " . $mode); diff --git a/library/ZendPdf/Page.php b/library/ZendPdf/Page.php index 107a0d3..44cdac8 100644 --- a/library/ZendPdf/Page.php +++ b/library/ZendPdf/Page.php @@ -1594,6 +1594,21 @@ public function drawText($text, $x, $y, $charEncoding = '') return $this; } + public function getLocationArray(IndirectObject $locationObj){ + /* @var $rect \ZendPdf\InternalType\ArrayObject */ + $rect = $locationObj->Rect; + if ($rect === null) { + throw new Exception\LogicException('Location Rect not available in location object'); + } + // read the Rect object and get actual numbers we can use + $loc = []; + foreach ($rect->items as $idx => $item) { + $loc[$idx] = intval($item->toString()); + } + + return $loc; + } + /** * Draw a line of text at the location of the supplied object * @param string $text the text to draw @@ -1613,22 +1628,13 @@ public function drawTextAt($text, IndirectObject $locationObj, $offsetX=0, $offs throw new Exception\LogicException('Font has not been set'); } - /* @var $rect \ZendPdf\InternalType\ArrayObject */ - $rect = $locationObj->Rect; - if ($rect === null) { - throw new Exception\LogicException('Location Rect not available in location object'); - } - $this->_addProcSet('Text'); $charEncoding = ''; $textObj = new InternalType\StringObject($this->_font->encodeString($text, $charEncoding)); - // read the Rect object and get actual numbers we can use - $loc = []; - foreach ($rect->items as $idx => $item) { - $loc[$idx] = intval($item->toString()); - } + // get location array + $loc = $this->getLocationArray($locationObj); // determine horizontal alignment /* @var $align NumericObject */ From 4f23a3b84106e10fad63705e8c0c241c57a62213 Mon Sep 17 00:00:00 2001 From: Crysta McKenney Date: Thu, 20 Apr 2017 14:56:21 -0500 Subject: [PATCH 12/16] quick fix to catch cases of tokens not displaying in forms because the size = 0 --- library/ZendPdf/InternalType/AcroFormObject.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/library/ZendPdf/InternalType/AcroFormObject.php b/library/ZendPdf/InternalType/AcroFormObject.php index 495a39c..3eae7fd 100644 --- a/library/ZendPdf/InternalType/AcroFormObject.php +++ b/library/ZendPdf/InternalType/AcroFormObject.php @@ -263,7 +263,11 @@ public function replaceTokens($pages) /* @var $p Page */ // draw some text! list($font, $size) = $this->getFontAndSize($da); - + //ideally use the size provided, but if none is available default to size 10 + //stop gap fix for tokens not consistently displaying on forms - will likely need a better long term solution - 2017-04-20 - CM + if($size == 0){ + $size = 10; + } $p->setFont($font, $size); $text = $token->getValue(); $lines = array(); From ffaaedac8f7f5635276fa43017a99f1863ddcd1c Mon Sep 17 00:00:00 2001 From: Will Strootman Date: Wed, 1 Nov 2017 15:14:26 -0500 Subject: [PATCH 13/16] enabled the constructor to accept an image file as well as an image name --- library/ZendPdf/Resource/Image/Png.php | 32 +++++++++++--------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/library/ZendPdf/Resource/Image/Png.php b/library/ZendPdf/Resource/Image/Png.php index 1bc4cee..fdfb586 100644 --- a/library/ZendPdf/Resource/Image/Png.php +++ b/library/ZendPdf/Resource/Image/Png.php @@ -47,26 +47,20 @@ class Png extends AbstractImage protected $_height; protected $_imageProperties; - /** - * Object constructor - * - * @param string $imageFileName - * @throws \ZendPdf\Exception\ExceptionInterface - * @todo Add compression conversions to support compression strategys other than PNG_COMPRESSION_DEFAULT_STRATEGY. - * @todo Add pre-compression filtering. - * @todo Add interlaced image handling. - * @todo Add support for 16-bit images. Requires PDF version bump to 1.5 at least. - * @todo Add processing for all PNG chunks defined in the spec. gAMA etc. - * @todo Fix tRNS chunk support for Indexed Images to a SMask. - */ - public function __construct($imageFileName) - { - if (($imageFile = @fopen($imageFileName, 'rb')) === false ) { - throw new Exception\IOException("Can not open '$imageFileName' file for reading."); - } - + public function __construct($imageFileName, $imageFile = null) { parent::__construct(); + if ($imageFileName) { + if (($imageFile = @fopen($imageFileName, 'rb')) === false ) { + throw new Exception\IOException("Can not open '$imageFileName' file for reading."); + } + $this->process($imageFile); + } elseif ($imageFile) { + $this->process($imageFile); + } + } + protected function process($imageFile) { + //Check if the file is a PNG fseek($imageFile, 1, SEEK_CUR); //First signature byte (%) if ('PNG' != fread($imageFile, 3)) { @@ -322,7 +316,7 @@ public function __construct($imageFileName) $this->_resource->skipFilters(); } } - + /** * Image width */ From 801443b91569b00b96267f62df436fa5784f1239 Mon Sep 17 00:00:00 2001 From: Nate Chrysler Date: Mon, 12 Mar 2018 16:14:55 -0500 Subject: [PATCH 14/16] added support for read-only form fields --- .../AcroFormObject/AcroFormFieldWorker.php | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php b/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php index d69cca4..f69f13b 100644 --- a/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php +++ b/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php @@ -95,18 +95,24 @@ public function createNewSharedField(ObjectFactory $targetFactory, IndirectObjec protected function createNewFieldDictionary(IndirectObject $widget, $title) { // NOTE: do not move the value (V) attribute into a shared field dictionary + // NOTE: isset and property_exists appear to not work very well on the IndirectObject, probably due to + // the class using a "magic" getter method for the various attributes. + // NOTE: also make sure you clone the object here, or else it may be retained and reused elsewhere, and + // not actually end up in the desired shared form field. // create a new field object $dict = new DictionaryObject(); if ($widget->DA !== null) { - $dict->DA = clone $widget->DA; + $dict->DA = clone $widget->DA; // font } if ($widget->FT !== null) { - $dict->FT = clone $widget->FT; + $dict->FT = clone $widget->FT; // field type } $dict->Kids = new ArrayObject(); - $dict->T = new StringObject($title); + $dict->T = new StringObject($title); // title + $dict->Ff = clone $widget->Ff; // "read-only" setting + return $dict; } @@ -144,6 +150,7 @@ public function linkPageFieldToSharedField(ObjectFactory $targetFactory, Indirec // hack up the supplied widget to point to the new shared field unset($pageField->FT); unset($pageField->T); + unset($pageField->Ff); // remove the read-only flag unset($pageField->P); // TODO: link back to Page object // create a new reference for the original embedded field From 18e429d1079bf89ff61202649d3b59124ee10ce5 Mon Sep 17 00:00:00 2001 From: Nate Chrysler Date: Thu, 5 Apr 2018 21:07:49 -0500 Subject: [PATCH 15/16] only copy the Ff node if it exists --- .../InternalType/AcroFormObject/AcroFormFieldWorker.php | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php b/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php index f69f13b..10ed5ca 100644 --- a/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php +++ b/library/ZendPdf/InternalType/AcroFormObject/AcroFormFieldWorker.php @@ -111,8 +111,10 @@ protected function createNewFieldDictionary(IndirectObject $widget, $title) $dict->Kids = new ArrayObject(); $dict->T = new StringObject($title); // title - $dict->Ff = clone $widget->Ff; // "read-only" setting - + if ($widget->Ff !== null) { + $dict->Ff = clone $widget->Ff; // "read-only" setting + } + return $dict; } From 09abf48306fc326d6c99e075670d4748f4bcea22 Mon Sep 17 00:00:00 2001 From: Phil Dowson Date: Tue, 18 Sep 2018 11:37:45 -0500 Subject: [PATCH 16/16] Updated composer name for packagist --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index dbb6fdf..3dee710 100644 --- a/composer.json +++ b/composer.json @@ -1,5 +1,5 @@ { - "name": "zendframework/zendpdf", + "name": "rightsourcecompliance/zendpdf", "description": "Zend Pdf Component", "type": "library", "license": "BSD-3-Clause",