Skip to content

Commit

Permalink
Improve delete properties performance by replace DOMDocument with xml…
Browse files Browse the repository at this point in the history
…_parse (#432)

* Add some test case for XmlPropsRemover
alexander-schranz authored Jan 6, 2024
1 parent f7b286f commit 1b5d994
Showing 6 changed files with 344 additions and 42 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -4,6 +4,11 @@ Changelog
1.x
===

1.11.0 (unreleased)
-------------------

* Improve delete properties performance by replace DOMDocument with xml_parse

1.10.1
------

80 changes: 43 additions & 37 deletions src/Jackalope/Transport/DoctrineDBAL/Client.php
Original file line number Diff line number Diff line change
@@ -31,6 +31,7 @@
use Jackalope\Transport\BaseTransport;
use Jackalope\Transport\DoctrineDBAL\Query\QOMWalker;
use Jackalope\Transport\DoctrineDBAL\XmlParser\XmlToPropsParser;
use Jackalope\Transport\DoctrineDBAL\XmlPropsRemover\XmlPropsRemover;
use Jackalope\Transport\MoveNodeOperation;
use Jackalope\Transport\NodeTypeManagementInterface;
use Jackalope\Transport\QueryInterface as QueryTransport;
@@ -1408,6 +1409,7 @@ public function getNode($path)

$nestedNodes = $this->getNodesData($rows);
$node = array_shift($nestedNodes);

foreach ($nestedNodes as $nestedPath => $nested) {
$relativePath = PathHelper::relativizePath($nestedPath, $path);
$this->nestNode($node, $nested, explode('/', $relativePath));
@@ -1798,10 +1800,20 @@ private function cleanIdentifierCache($root)
*/
public function deleteProperties(array $operations)
{
$this->assertLoggedIn();

$nodesByPath = [];
foreach ($operations as $op) {
$this->deleteProperty($op->srcPath);
$nodePath = PathHelper::getParentPath($op->srcPath);
$propertyName = PathHelper::getNodeName($op->srcPath);
if (!array_key_exists($nodePath, $nodesByPath)) {
$nodesByPath[$nodePath] = [];
}
$nodesByPath[$nodePath][$propertyName] = $propertyName;
}

// Doing the actual removal
$this->assertLoggedIn();
foreach ($nodesByPath as $nodePath => $propertiesToDelete) {
$this->removePropertiesFromNode($nodePath, $propertiesToDelete);
}

return true;
@@ -1828,60 +1840,54 @@ public function deletePropertyImmediately($path)
protected function deleteProperty($path)
{
$this->assertLoggedIn();

$nodePath = PathHelper::getParentPath($path);
$propertyName = PathHelper::getNodeName($path);
$this->removePropertiesFromNode($nodePath, [$propertyName]);
}

/**
* Removes a list of properties from a given node.
*
* @param string $nodePath
* @param array<string> $propertiesToDelete Path belonging to that node that should be deleted
*/
private function removePropertiesFromNode($nodePath, array $propertiesToDelete): void
{
$nodeId = $this->getSystemIdForNode($nodePath);
if (!$nodeId) {
// no we really don't know that path
throw new ItemNotFoundException("No item found at " . $path);
throw new ItemNotFoundException('No item found at ' . $nodePath);
}

$query = 'SELECT props FROM phpcr_nodes WHERE id = ?';
$xml = $this->getConnection()->fetchOne($query, [$nodeId]);

$dom = new DOMDocument('1.0', 'UTF-8');
$dom->loadXML($xml);
$xpath = new DomXpath($dom);
$xmlPropsRemover = new XmlPropsRemover($xml, $propertiesToDelete);
[$xml, $references] = $xmlPropsRemover->removeProps();

$found = false;
$propertyName = PathHelper::getNodeName($path);
foreach ($xpath->query(sprintf('//*[@sv:name="%s"]', $propertyName)) as $propertyNode) {
$found = true;
// would be nice to have the property object to ask for type
// but its in state deleted, would mean lots of refactoring
if ($propertyNode->hasAttribute('sv:type')) {
$type = strtolower($propertyNode->getAttribute('sv:type'));
if (in_array($type, ['reference', 'weakreference'])) {
$table = $this->referenceTables['reference' === $type ? PropertyType::REFERENCE : PropertyType::WEAKREFERENCE];
try {
$query = "DELETE FROM $table WHERE source_id = ? AND source_property_name = ?";
$this->getConnection()->executeUpdate($query, [$nodeId, $propertyName]);
} catch (DBALException $e) {
throw new RepositoryException(
'Unexpected exception while cleaning up deleted nodes',
$e->getCode(),
$e
);
}
foreach ($references as $type => $propertyNames) {
$table = $this->referenceTables['reference' === $type ? PropertyType::REFERENCE : PropertyType::WEAKREFERENCE];
foreach ($propertyNames as $propertyName) {
try {
$query = "DELETE FROM $table WHERE source_id = ? AND source_property_name = ?";
$this->getConnection()->executeUpdate($query, [$nodeId, $propertyName]);
} catch (DBALException $e) {
throw new RepositoryException(
'Unexpected exception while cleaning up deleted nodes',
$e->getCode(),
$e
);
}
}

$propertyNode->parentNode->removeChild($propertyNode);
}

if (!$found) {
throw new ItemNotFoundException("Node $nodePath has no property $propertyName");
}

$xml = $dom->saveXML();

$query = 'UPDATE phpcr_nodes SET props = ? WHERE id = ?';
$params = [$xml, $nodeId];

try {
$this->getConnection()->executeUpdate($query, $params);
} catch (DBALException $e) {
throw new RepositoryException("Unexpected exception while updating properties of $path", $e->getCode(), $e);
throw new RepositoryException("Unexpected exception while updating properties of $nodePath", $e->getCode(), $e);
}
}

Original file line number Diff line number Diff line change
@@ -86,18 +86,18 @@ public function parse(): \stdClass
{
$this->data = new \stdClass();

$parser = xml_parser_create();
$parser = \xml_parser_create();

xml_set_element_handler(
\xml_set_element_handler(
$parser,
[$this, 'startHandler'],
[$this, 'endHandler']
);

xml_set_character_data_handler($parser, [$this, 'dataHandler']);
\xml_set_character_data_handler($parser, [$this, 'dataHandler']);

xml_parse($parser, $this->xml, true);
xml_parser_free($parser);
\xml_parse($parser, $this->xml, true);
\xml_parser_free($parser);
// avoid memory leaks and unset the parser see: https://www.php.net/manual/de/function.xml-parser-free.php
unset($parser);

Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
<?php

namespace Jackalope\Transport\DoctrineDBAL\XmlPropsRemover;

/**
* @internal
*/
class XmlPropsRemover
{
/**
* @var string
*/
private $xml;

/**
* @var string[]
*/
private $propertyNames;

/**
* @var bool
*/
private $skipCurrentTag = false;

/**
* @var string
*/
private $newXml = '';

/**
* @var string
*/
private $newStartTag = '';

private $weakReferences = [];

private $references = [];

public function __construct(string $xml, array $propertyNames)
{
$this->xml = $xml;
$this->propertyNames = $propertyNames;
}

/**
* @example [$xml, $references] = $xmlPropsRemover->removeProps($xml, $propertiesToDelete);
*
* @return array{
* 0: string,
* 1: array{
* reference: string[],
* weakreference: string[],
* },
* } An array with the new xml (0) and the references (1) which requires to be removed.
*/
public function removeProps(): array
{
$this->newXml = '<?xml version="1.0" encoding="UTF-8"?>' . PHP_EOL;
$this->references = [];
$this->weakReferences = [];
$this->newStartTag = '';
$this->skipCurrentTag = false;

$parser = \xml_parser_create();

\xml_set_element_handler(
$parser,
[$this, 'startHandler'],
[$this, 'endHandler']
);

\xml_set_character_data_handler($parser, [$this, 'dataHandler']);

\xml_parse($parser, $this->xml, true);
\xml_parser_free($parser);
// avoid memory leaks and unset the parser see: https://www.php.net/manual/de/function.xml-parser-free.php
unset($parser);

return [
$this->newXml . PHP_EOL,
[
'reference' => $this->references,
'weakreference' => $this->weakReferences,
],
];
}

/**
* @param \XmlParser $parser
* @param string $name
* @param mixed[] $attrs
*/
private function startHandler($parser, $name, $attrs): void
{
if ($this->skipCurrentTag) {
return;
}

if ($name === 'SV:PROPERTY') {
$svName = $attrs['SV:NAME'];

if (\in_array($svName, $this->propertyNames)) {
$this->skipCurrentTag = true;
$svType = $attrs['SV:TYPE'];

if ($svType === 'reference') {
$this->references[] = $svName;
} elseif ($svType === 'weakreference') {
$this->weakReferences[] = $svName;
}

return;
}
}

$tag = '<' . \strtolower($name);
foreach ($attrs as $key => $value) {
$tag .= ' ' . \strtolower($key) // there is no case key which requires escaping for performance reasons we avoid it so
. '="'
. \htmlspecialchars($value, ENT_COMPAT, 'UTF-8')
. '"';
}
$tag .= '>';

$this->newXml .= $this->newStartTag;
$this->newStartTag = $tag; // handling self closing tags in endHandler
}

private function endHandler($parser, $name): void
{
if ($name === 'SV:PROPERTY' && $this->skipCurrentTag) {
$this->skipCurrentTag = false;

return;
}

if ($this->skipCurrentTag) {
return;
}

if ($this->newStartTag) {
// if the tag is not rendered to newXml it can be a self-closing tag
$this->newXml .= \substr($this->newStartTag, 0.0, -1) . '/>';
$this->newStartTag = '';

return;
}

$this->newXml .= '</' . \strtolower($name) . '>';
}

private function dataHandler($parser, $data): void
{
if ($this->skipCurrentTag) {
return;
}

if ($data !== '') {
$this->newXml .= $this->newStartTag; // non-empty data means no self closing tag so render tag now
$this->newStartTag = '';
$this->newXml .= \htmlspecialchars($data, ENT_XML1, 'UTF-8');
}
}
}
29 changes: 29 additions & 0 deletions tests/Jackalope/Transport/DoctrineDBAL/ClientTest.php
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@
use PHPCR\PropertyType;
use PHPCR\Query\QOM\QueryObjectModelConstantsInterface;
use PHPCR\Query\QueryInterface;
use PHPCR\SimpleCredentials;
use PHPCR\Util\NodeHelper;
use PHPCR\Util\PathHelper;
use PHPCR\Util\QOM\QueryBuilder;
@@ -240,6 +241,34 @@ public function testDeleteMoreThanOneThousandNodes()
$this->addToAssertionCount(1);
}

public function testDeleteProperties()
{
$root = $this->session->getNode('/');
$node = $root->addNode('delete-properties');
for ($i = 0; $i <= 1000; $i++) {
$node->setProperty('property-'.$i, 'value-'.$i);
}

$this->session->save();
$this->assertSame(1002, \count($node->getProperties()));

for ($i = 501; $i <= 1000; $i++) {
$node->setProperty('property-'.$i, null);
}

$this->session->save();
$this->session->refresh(false);
$node = $this->session->getNode('/delete-properties');

for ($i = 0; $i <= 1000; $i++) {
$this->assertSame(
$i < 501,
$node->hasProperty('property-'.$i),
'Unexpected result for property "property-'.$i.'"'
);
}
}

public function testPropertyLengthAttribute()
{
$rootNode = $this->session->getRootNode();
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
<?php

namespace Jackalope\Transport\DoctrineDBAL\XmlPropsRemover;

use Jackalope\Factory;
use Jackalope\Test\TestCase;
use Jackalope\Transport\DoctrineDBAL\XmlParser\XmlToPropsParser;
use PHPCR\Util\ValueConverter;

class XmlPropsRemoverTest extends TestCase
{
public function testRemoveProps(): void
{
$xml = <<<EOT
<?xml version="1.0" encoding="UTF-8"?>
<sv:node xmlns:mix="http://www.jcp.org/jcr/mix/1.0" xmlns:nt="http://www.jcp.org/jcr/nt/1.0" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:jcr="http://www.jcp.org/jcr/1.0" xmlns:sv="http://www.jcp.org/jcr/sv/1.0" xmlns:rep="internal">
<sv:property sv:name="jcr:primaryType" sv:type="Name" sv:multi-valued="0">
<sv:value length="15">nt:unstructured</sv:value>
</sv:property>
<sv:property sv:name="jcr:mixinTypes" sv:type="Name" sv:multi-valued="1">
<sv:value length="9">sulu:page</sv:value>
</sv:property>
<sv:property sv:name="jcr:uuid" sv:type="String" sv:multi-valued="0">
<sv:value length="36">0804f0c3-5250-4c2f-9d7e-7d0c99103026</sv:value>
</sv:property>
<sv:property sv:name="i18n:en-title" sv:type="String" sv:multi-valued="0">
<sv:value length="8">My Title</sv:value>
</sv:property>
<sv:property sv:name="ampersand" sv:type="String" sv:multi-valued="0"><sv:value length="13">foo &amp; bar&amp;baz</sv:value></sv:property>
<sv:property sv:name="äüö?ß&lt;&gt;''&quot;=&quot;test" sv:type="String" sv:multi-valued="0"><sv:value length="15">&lt;&gt;:&amp;|öäü"?"ß'='</sv:value></sv:property>
<sv:property sv:name="block_1_ref" sv:type="reference" sv:multi-valued="0">1922ec03-b5ed-40cf-856c-ecfb8eac12e2</sv:property>
<sv:property sv:name="block_2_ref" sv:type="reference" sv:multi-valued="0">94c9aefe-faaa-4896-816b-5bfc575681f0</sv:property>
<sv:property sv:name="block_3_ref" sv:type="weakreference" sv:multi-valued="0">a8ae4420-095b-4045-8775-b731cbae2fe1</sv:property>
<sv:property sv:name="external_reference" sv:type="reference" sv:multi-valued="0">
<sv:value length="36">842e61c0-09ab-42a9-87c0-308ccc90e6f6</sv:value>
</sv:property>
</sv:node>
EOT;

$xmlPropsRemover = $this->createXmlPropsRemover($xml, [
'i18n:en-title',
'block_2_ref',
'block_3_ref',
'external_reference',
]);
[$xml, $references] = $xmlPropsRemover->removeProps();

$this->assertStringContainsString('äüö?ß&lt;&gt;\'\'&quot;=&quot;test', $xml, 'Not correctly escaped special chars property name, after removing props.');
$this->assertStringContainsString('&lt;&gt;:&amp;|öäü"?"ß\'=\'', $xml, 'Not correctly escaped special chars property value, after removing props.');

$xmlParser = $this->createXmlToPropsParser($xml);
$data = $xmlParser->parse();
$this->assertSame([
'jcr:primaryType' => 'nt:unstructured',
':jcr:primaryType' => 7,
'jcr:mixinTypes' => ['sulu:page'],
':jcr:mixinTypes' => 7,
'jcr:uuid' => '0804f0c3-5250-4c2f-9d7e-7d0c99103026',
':jcr:uuid' => 1,
'ampersand' => 'foo & bar&baz',
':ampersand' => 1,
'äüö?ß<>\'\'"="test' => '<>:&|öäü"?"ß\'=\'',
':äüö?ß<>\'\'"="test' => 1,
'block_1_ref' => '1922ec03-b5ed-40cf-856c-ecfb8eac12e2',
':block_1_ref' => 9,
], (array) $data);
$this->assertSame([
'reference' => [
'block_2_ref',
'external_reference',
],
'weakreference' => [
'block_3_ref',
],
], $references);
}

private function createXmlPropsRemover(string $xml, array $propNames = null): XmlPropsRemover
{
return new XmlPropsRemover(
$xml,
$propNames
);
}

private function createXmlToPropsParser(string $xml, array $propNames = null): XmlToPropsParser
{
$factory = new Factory();

$valueConverter = $factory->get(ValueConverter::class);

return new XmlToPropsParser(
$xml,
$valueConverter,
$propNames
);
}
}

0 comments on commit 1b5d994

Please sign in to comment.