Skip to content

Commit f8f02f2

Browse files
committed
Merge branch '6.4' into 7.0
* 6.4: fix merge fix syntax for PHP 7.2 [Security] Fix Danish translations [Messenger] Improve deadlock handling on `ack()` and `reject()` [DomCrawler] Encode html entities only if nessecary [Serializer] reset backed_enum priority, and re-prioritise translatable [Validator] Accept `Stringable` in `ExecutionContext::build/addViolation()` [Serializer] Ignore when using #[Ignore] on a non-accessor [Filesystem] Strengthen the check of file permissions in `dumpFile` [Serializer] Fix XML scalar to object denormalization [HttpClient][EventSourceHttpClient] Fix consuming SSEs with \r\n separator
2 parents 6cb272c + fd18b0f commit f8f02f2

File tree

2 files changed

+24
-2
lines changed

2 files changed

+24
-2
lines changed

Crawler.php

+20-2
Original file line numberDiff line numberDiff line change
@@ -1061,12 +1061,30 @@ protected function sibling(\DOMNode $node, string $siblingDir = 'nextSibling'):
10611061

10621062
private function parseHtml5(string $htmlContent, string $charset = 'UTF-8'): \DOMDocument
10631063
{
1064-
return $this->html5Parser->parse($this->convertToHtmlEntities($htmlContent, $charset));
1064+
if (!$this->supportsEncoding($charset)) {
1065+
$htmlContent = $this->convertToHtmlEntities($htmlContent, $charset);
1066+
$charset = 'UTF-8';
1067+
}
1068+
1069+
return $this->html5Parser->parse($htmlContent, ['encoding' => $charset]);
1070+
}
1071+
1072+
private function supportsEncoding(string $encoding): bool
1073+
{
1074+
try {
1075+
return '' === @mb_convert_encoding('', $encoding, 'UTF-8');
1076+
} catch (\Throwable $e) {
1077+
return false;
1078+
}
10651079
}
10661080

10671081
private function parseXhtml(string $htmlContent, string $charset = 'UTF-8'): \DOMDocument
10681082
{
1069-
$htmlContent = $this->convertToHtmlEntities($htmlContent, $charset);
1083+
if ('UTF-8' === $charset && preg_match('//u', $htmlContent)) {
1084+
$htmlContent = '<?xml encoding="UTF-8">'.$htmlContent;
1085+
} else {
1086+
$htmlContent = $this->convertToHtmlEntities($htmlContent, $charset);
1087+
}
10701088

10711089
$internalErrors = libxml_use_internal_errors(true);
10721090

Tests/AbstractCrawlerTestCase.php

+4
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,10 @@ public function testAddContent()
184184
$crawler = $this->createCrawler();
185185
$crawler->addContent($this->getDoctype().'<html><meta http-equiv="Content-Type" content="text/html; charset=unicode" /><div class="foo"></html></html>');
186186
$this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addContent() ignores bad charset');
187+
188+
$crawler = $this->createCrawler();
189+
$crawler->addContent($this->getDoctype().'<html><script>var foo = "bär";</script></html>', 'text/html; charset=UTF-8');
190+
$this->assertEquals('var foo = "bär";', $crawler->filterXPath('//script')->text(), '->addContent() does not interfere with script content');
187191
}
188192

189193
/**

0 commit comments

Comments
 (0)