Skip to content

Commit fd18b0f

Browse files
committed
Merge branch '5.4' into 6.4
* 5.4: fix syntax for PHP 7.2 [Security] Fix Danish translations [DomCrawler] Encode html entities only if nessecary [Serializer] Ignore when using #[Ignore] on a non-accessor [Filesystem] Strengthen the check of file permissions in `dumpFile` [Serializer] Fix XML scalar to object denormalization [HttpClient][EventSourceHttpClient] Fix consuming SSEs with \r\n separator
2 parents f0e7ec3 + 000634e commit fd18b0f

File tree

2 files changed

+24
-2
lines changed

2 files changed

+24
-2
lines changed

Crawler.php

+20-2
Original file line numberDiff line numberDiff line change
@@ -1090,12 +1090,30 @@ protected function sibling(\DOMNode $node, string $siblingDir = 'nextSibling'):
10901090

10911091
private function parseHtml5(string $htmlContent, string $charset = 'UTF-8'): \DOMDocument
10921092
{
1093-
return $this->html5Parser->parse($this->convertToHtmlEntities($htmlContent, $charset));
1093+
if (!$this->supportsEncoding($charset)) {
1094+
$htmlContent = $this->convertToHtmlEntities($htmlContent, $charset);
1095+
$charset = 'UTF-8';
1096+
}
1097+
1098+
return $this->html5Parser->parse($htmlContent, ['encoding' => $charset]);
1099+
}
1100+
1101+
private function supportsEncoding(string $encoding): bool
1102+
{
1103+
try {
1104+
return '' === @mb_convert_encoding('', $encoding, 'UTF-8');
1105+
} catch (\Throwable $e) {
1106+
return false;
1107+
}
10941108
}
10951109

10961110
private function parseXhtml(string $htmlContent, string $charset = 'UTF-8'): \DOMDocument
10971111
{
1098-
$htmlContent = $this->convertToHtmlEntities($htmlContent, $charset);
1112+
if ('UTF-8' === $charset && preg_match('//u', $htmlContent)) {
1113+
$htmlContent = '<?xml encoding="UTF-8">'.$htmlContent;
1114+
} else {
1115+
$htmlContent = $this->convertToHtmlEntities($htmlContent, $charset);
1116+
}
10991117

11001118
$internalErrors = libxml_use_internal_errors(true);
11011119

Tests/AbstractCrawlerTestCase.php

+4
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,10 @@ public function testAddContent()
184184
$crawler = $this->createCrawler();
185185
$crawler->addContent($this->getDoctype().'<html><meta http-equiv="Content-Type" content="text/html; charset=unicode" /><div class="foo"></html></html>');
186186
$this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addContent() ignores bad charset');
187+
188+
$crawler = $this->createCrawler();
189+
$crawler->addContent($this->getDoctype().'<html><script>var foo = "bär";</script></html>', 'text/html; charset=UTF-8');
190+
$this->assertEquals('var foo = "bär";', $crawler->filterXPath('//script')->text(), '->addContent() does not interfere with script content');
187191
}
188192

189193
/**

0 commit comments

Comments
 (0)