-
Notifications
You must be signed in to change notification settings - Fork 61
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Loading status checks…
Improve delete properties performance by replace DOMDocument with xml…
…_parse (#432) * Add some test case for XmlPropsRemover
1 parent
f7b286f
commit 1b5d994
Showing
6 changed files
with
344 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
164 changes: 164 additions & 0 deletions
164
src/Jackalope/Transport/DoctrineDBAL/XmlPropsRemover/XmlPropsRemover.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
<?php | ||
|
||
namespace Jackalope\Transport\DoctrineDBAL\XmlPropsRemover; | ||
|
||
/** | ||
* @internal | ||
*/ | ||
class XmlPropsRemover | ||
{ | ||
/** | ||
* @var string | ||
*/ | ||
private $xml; | ||
|
||
/** | ||
* @var string[] | ||
*/ | ||
private $propertyNames; | ||
|
||
/** | ||
* @var bool | ||
*/ | ||
private $skipCurrentTag = false; | ||
|
||
/** | ||
* @var string | ||
*/ | ||
private $newXml = ''; | ||
|
||
/** | ||
* @var string | ||
*/ | ||
private $newStartTag = ''; | ||
|
||
private $weakReferences = []; | ||
|
||
private $references = []; | ||
|
||
public function __construct(string $xml, array $propertyNames) | ||
{ | ||
$this->xml = $xml; | ||
$this->propertyNames = $propertyNames; | ||
} | ||
|
||
/** | ||
* @example [$xml, $references] = $xmlPropsRemover->removeProps($xml, $propertiesToDelete); | ||
* | ||
* @return array{ | ||
* 0: string, | ||
* 1: array{ | ||
* reference: string[], | ||
* weakreference: string[], | ||
* }, | ||
* } An array with the new xml (0) and the references (1) which requires to be removed. | ||
*/ | ||
public function removeProps(): array | ||
{ | ||
$this->newXml = '<?xml version="1.0" encoding="UTF-8"?>' . PHP_EOL; | ||
$this->references = []; | ||
$this->weakReferences = []; | ||
$this->newStartTag = ''; | ||
$this->skipCurrentTag = false; | ||
|
||
$parser = \xml_parser_create(); | ||
|
||
\xml_set_element_handler( | ||
$parser, | ||
[$this, 'startHandler'], | ||
[$this, 'endHandler'] | ||
); | ||
|
||
\xml_set_character_data_handler($parser, [$this, 'dataHandler']); | ||
|
||
\xml_parse($parser, $this->xml, true); | ||
\xml_parser_free($parser); | ||
// avoid memory leaks and unset the parser see: https://www.php.net/manual/de/function.xml-parser-free.php | ||
unset($parser); | ||
|
||
return [ | ||
$this->newXml . PHP_EOL, | ||
[ | ||
'reference' => $this->references, | ||
'weakreference' => $this->weakReferences, | ||
], | ||
]; | ||
} | ||
|
||
/** | ||
* @param \XmlParser $parser | ||
* @param string $name | ||
* @param mixed[] $attrs | ||
*/ | ||
private function startHandler($parser, $name, $attrs): void | ||
{ | ||
if ($this->skipCurrentTag) { | ||
return; | ||
} | ||
|
||
if ($name === 'SV:PROPERTY') { | ||
$svName = $attrs['SV:NAME']; | ||
|
||
if (\in_array($svName, $this->propertyNames)) { | ||
$this->skipCurrentTag = true; | ||
$svType = $attrs['SV:TYPE']; | ||
|
||
if ($svType === 'reference') { | ||
$this->references[] = $svName; | ||
} elseif ($svType === 'weakreference') { | ||
$this->weakReferences[] = $svName; | ||
} | ||
|
||
return; | ||
} | ||
} | ||
|
||
$tag = '<' . \strtolower($name); | ||
foreach ($attrs as $key => $value) { | ||
$tag .= ' ' . \strtolower($key) // there is no case key which requires escaping for performance reasons we avoid it so | ||
. '="' | ||
. \htmlspecialchars($value, ENT_COMPAT, 'UTF-8') | ||
. '"'; | ||
} | ||
$tag .= '>'; | ||
|
||
$this->newXml .= $this->newStartTag; | ||
$this->newStartTag = $tag; // handling self closing tags in endHandler | ||
} | ||
|
||
private function endHandler($parser, $name): void | ||
{ | ||
if ($name === 'SV:PROPERTY' && $this->skipCurrentTag) { | ||
$this->skipCurrentTag = false; | ||
|
||
return; | ||
} | ||
|
||
if ($this->skipCurrentTag) { | ||
return; | ||
} | ||
|
||
if ($this->newStartTag) { | ||
// if the tag is not rendered to newXml it can be a self-closing tag | ||
$this->newXml .= \substr($this->newStartTag, 0.0, -1) . '/>'; | ||
$this->newStartTag = ''; | ||
|
||
return; | ||
} | ||
|
||
$this->newXml .= '</' . \strtolower($name) . '>'; | ||
} | ||
|
||
private function dataHandler($parser, $data): void | ||
{ | ||
if ($this->skipCurrentTag) { | ||
return; | ||
} | ||
|
||
if ($data !== '') { | ||
$this->newXml .= $this->newStartTag; // non-empty data means no self closing tag so render tag now | ||
$this->newStartTag = ''; | ||
$this->newXml .= \htmlspecialchars($data, ENT_XML1, 'UTF-8'); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
98 changes: 98 additions & 0 deletions
98
tests/Jackalope/Transport/DoctrineDBAL/XmlPropsRemover/XmlPropsRemoverTest.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
<?php | ||
|
||
namespace Jackalope\Transport\DoctrineDBAL\XmlPropsRemover; | ||
|
||
use Jackalope\Factory; | ||
use Jackalope\Test\TestCase; | ||
use Jackalope\Transport\DoctrineDBAL\XmlParser\XmlToPropsParser; | ||
use PHPCR\Util\ValueConverter; | ||
|
||
class XmlPropsRemoverTest extends TestCase | ||
{ | ||
public function testRemoveProps(): void | ||
{ | ||
$xml = <<<EOT | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<sv:node xmlns:mix="http://www.jcp.org/jcr/mix/1.0" xmlns:nt="http://www.jcp.org/jcr/nt/1.0" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:jcr="http://www.jcp.org/jcr/1.0" xmlns:sv="http://www.jcp.org/jcr/sv/1.0" xmlns:rep="internal"> | ||
<sv:property sv:name="jcr:primaryType" sv:type="Name" sv:multi-valued="0"> | ||
<sv:value length="15">nt:unstructured</sv:value> | ||
</sv:property> | ||
<sv:property sv:name="jcr:mixinTypes" sv:type="Name" sv:multi-valued="1"> | ||
<sv:value length="9">sulu:page</sv:value> | ||
</sv:property> | ||
<sv:property sv:name="jcr:uuid" sv:type="String" sv:multi-valued="0"> | ||
<sv:value length="36">0804f0c3-5250-4c2f-9d7e-7d0c99103026</sv:value> | ||
</sv:property> | ||
<sv:property sv:name="i18n:en-title" sv:type="String" sv:multi-valued="0"> | ||
<sv:value length="8">My Title</sv:value> | ||
</sv:property> | ||
<sv:property sv:name="ampersand" sv:type="String" sv:multi-valued="0"><sv:value length="13">foo & bar&baz</sv:value></sv:property> | ||
<sv:property sv:name="äüö?ß<>''"="test" sv:type="String" sv:multi-valued="0"><sv:value length="15"><>:&|öäü"?"ß'='</sv:value></sv:property> | ||
<sv:property sv:name="block_1_ref" sv:type="reference" sv:multi-valued="0">1922ec03-b5ed-40cf-856c-ecfb8eac12e2</sv:property> | ||
<sv:property sv:name="block_2_ref" sv:type="reference" sv:multi-valued="0">94c9aefe-faaa-4896-816b-5bfc575681f0</sv:property> | ||
<sv:property sv:name="block_3_ref" sv:type="weakreference" sv:multi-valued="0">a8ae4420-095b-4045-8775-b731cbae2fe1</sv:property> | ||
<sv:property sv:name="external_reference" sv:type="reference" sv:multi-valued="0"> | ||
<sv:value length="36">842e61c0-09ab-42a9-87c0-308ccc90e6f6</sv:value> | ||
</sv:property> | ||
</sv:node> | ||
EOT; | ||
|
||
$xmlPropsRemover = $this->createXmlPropsRemover($xml, [ | ||
'i18n:en-title', | ||
'block_2_ref', | ||
'block_3_ref', | ||
'external_reference', | ||
]); | ||
[$xml, $references] = $xmlPropsRemover->removeProps(); | ||
|
||
$this->assertStringContainsString('äüö?ß<>\'\'"="test', $xml, 'Not correctly escaped special chars property name, after removing props.'); | ||
$this->assertStringContainsString('<>:&|öäü"?"ß\'=\'', $xml, 'Not correctly escaped special chars property value, after removing props.'); | ||
|
||
$xmlParser = $this->createXmlToPropsParser($xml); | ||
$data = $xmlParser->parse(); | ||
$this->assertSame([ | ||
'jcr:primaryType' => 'nt:unstructured', | ||
':jcr:primaryType' => 7, | ||
'jcr:mixinTypes' => ['sulu:page'], | ||
':jcr:mixinTypes' => 7, | ||
'jcr:uuid' => '0804f0c3-5250-4c2f-9d7e-7d0c99103026', | ||
':jcr:uuid' => 1, | ||
'ampersand' => 'foo & bar&baz', | ||
':ampersand' => 1, | ||
'äüö?ß<>\'\'"="test' => '<>:&|öäü"?"ß\'=\'', | ||
':äüö?ß<>\'\'"="test' => 1, | ||
'block_1_ref' => '1922ec03-b5ed-40cf-856c-ecfb8eac12e2', | ||
':block_1_ref' => 9, | ||
], (array) $data); | ||
$this->assertSame([ | ||
'reference' => [ | ||
'block_2_ref', | ||
'external_reference', | ||
], | ||
'weakreference' => [ | ||
'block_3_ref', | ||
], | ||
], $references); | ||
} | ||
|
||
private function createXmlPropsRemover(string $xml, array $propNames = null): XmlPropsRemover | ||
{ | ||
return new XmlPropsRemover( | ||
$xml, | ||
$propNames | ||
); | ||
} | ||
|
||
private function createXmlToPropsParser(string $xml, array $propNames = null): XmlToPropsParser | ||
{ | ||
$factory = new Factory(); | ||
|
||
$valueConverter = $factory->get(ValueConverter::class); | ||
|
||
return new XmlToPropsParser( | ||
$xml, | ||
$valueConverter, | ||
$propNames | ||
); | ||
} | ||
} |