Skip to content

Commit

Permalink
Encode html entities in HtmlHelper to prevent parse errors
Browse files Browse the repository at this point in the history
  • Loading branch information
MortalFlesh committed Oct 25, 2023
1 parent 12a3f28 commit a954571
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

<!-- There is always Unreleased section on the top. Subsections (Added, Changed, Fixed, Removed) should be added as needed. -->
## Unreleased
- Encode html entities in `HtmlHelper` to prevent parse errors

## 7.3.0 - 2023-05-23
- Support `figure` html tag in `HtmlHelper::xpathHtmlDocument` method
Expand Down
4 changes: 2 additions & 2 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@
},
"config": {
"allow-plugins": {
"phpstan/extension-installer": true,
"ergebnis/composer-normalize": true
"ergebnis/composer-normalize": true,
"phpstan/extension-installer": true
},
"secure-http": false,
"sort-packages": true
Expand Down
15 changes: 15 additions & 0 deletions src/Service/HtmlHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ public function xpathHtmlDocument(string $content, string $xpathQuery): ISeq
{
return Seq::init(function () use ($xpathQuery, $content) {
$htmlContent = $this->transformUnsupportedHtml($content);
$htmlContent = $this->encodeHtml($htmlContent);

// @see https://www.php.net/manual/en/domdocument.loadhtml.php#95251
$dom = new \DOMDocument();
Expand All @@ -40,6 +41,20 @@ public function xpathHtmlDocument(string $content, string $xpathQuery): ISeq
});
}

/**
* @see https://stackoverflow.com/questions/1685277/warning-domdocumentloadhtml-htmlparseentityref-expecting-in-entity
*
* It is meant to encode html entities, which would otherwise break the DOMDocument::loadHTML() method.
*/
private function encodeHtml(string $content): string
{
return str_replace(
['&gt;', '&lt;'],
['>', '<'],
htmlentities($content, ENT_NOQUOTES, 'UTF-8', false),
);
}

private function transformUnsupportedHtml(string $originalContent): string
{
$unsupportedTags = Map::from([
Expand Down
18 changes: 18 additions & 0 deletions tests/Service/HtmlHelperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,24 @@ public function linksProvider(): array
]),
],
],
'with one link containing not encoded entities' => [
'<div>content<a href="https://www.vysokeskoly.cz?param1=1&param2=2" class="active"></div>',
[
' href="https://www.vysokeskoly.cz?param1=1&param2=2" class="active"' => new Link([
'href' => 'https://www.vysokeskoly.cz?param1=1&param2=2',
'class' => 'active',
]),
],
],
'with one link containing both encoded and not encoded entities' => [
'<div>content<a href="https://www.vysokeskoly.cz?param1=1&param2=2&amp;param3=3" class="active"></div>',
[
' href="https://www.vysokeskoly.cz?param1=1&param2=2&amp;param3=3" class="active"' => new Link([
'href' => 'https://www.vysokeskoly.cz?param1=1&param2=2&param3=3',
'class' => 'active',
]),
],
],
'with multi line link' => [
'<figure class="wp-block-image size-large is-resized">
content
Expand Down

0 comments on commit a954571

Please sign in to comment.