diff --git a/CHANGELOG.md b/CHANGELOG.md index 066d605a..21b3ee48 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,8 @@ This project adheres to [Semantic Versioning](https://semver.org/). ([#776](https://github.com/MyIntervals/emogrifier/pull/776)) ### Changed +- Normalize DOCTYPE declaration according to polyglot markup recommendation + ([#866](https://github.com/MyIntervals/emogrifier/pull/866)) - Upgrade to V2 of the PHP setup GitHub action ([#861](https://github.com/MyIntervals/emogrifier/pull/861)) - Move the development tools to Phive diff --git a/src/HtmlProcessor/AbstractHtmlProcessor.php b/src/HtmlProcessor/AbstractHtmlProcessor.php index c6c2638e..10de9c66 100644 --- a/src/HtmlProcessor/AbstractHtmlProcessor.php +++ b/src/HtmlProcessor/AbstractHtmlProcessor.php @@ -234,7 +234,7 @@ private function prepareHtmlForDomConversion(string $html): string } /** - * Makes sure that the passed HTML has a document type. + * Makes sure that the passed HTML has a document type, with lowercase "html". * * @param string $html * @@ -244,12 +244,30 @@ private function ensureDocumentType(string $html): string { $hasDocumentType = \stripos($html, 'normalizeDocumentType($html); } return static::DEFAULT_DOCUMENT_TYPE . $html; } + /** + * Makes sure the document type in the passed HTML has lowercase "html". + * + * @param string $html + * + * @return string HTML with normalized document type + */ + private function normalizeDocumentType(string $html): string + { + // Limit to replacing the first occurrence: as an optimization; and in case an example exists as unescaped text. + return \preg_replace( + '/])/i', + '' . '' . ''; - $formattedHtml = "\n" . + $formattedHtml = "\n" . "\n" . '' . "\n" . "\n" . @@ -377,6 +377,49 @@ public function keepsExistingDocumentType(string $documentType) self::assertContains($documentType, $result); } + /** + * @return string[][] + */ + public function normalizedDocumentTypeDataProvider(): array + { + return [ + 'HTML5, uppercase' => ['', ''], + 'HTML5, lowercase' => ['', ''], + 'HTML5, mixed case' => ['', ''], + 'HTML5, extra whitespace' => ['', ''], + 'HTML 4 transitional, uppercase' => [ + '', + '', + ], + 'HTML 4 transitional, lowercase' => [ + '', + '', + ], + ]; + } + + /** + * @test + * + * @param string $documentType + * @param string $normalizedDocumentType + * + * @dataProvider normalizedDocumentTypeDataProvider + */ + public function normalizesDocumentType(string $documentType, string $normalizedDocumentType) + { + $html = $documentType . ''; + $subject = TestingHtmlProcessor::fromHtml($html); + + $result = $subject->render(); + + self::assertContains($normalizedDocumentType, $result); + } + /** * @test *