Skip to content

Commit f5353b9

Browse files
committed
CssInliner: tokenizer-based CSS parser with DOM inlining
- single regex tokenizer (comment, whitespace, string, url, at-ident, hash, number, ident, char) - verifies full tokenization coverage, throws on unexpected input - handles data: URIs with semicolons, braces in strings, commas in attribute selectors - preserves <style> elements (keeps @media rules intact) - uses Dom\HTMLDocument CSS selectors for element matching (PHP 8.4+)
1 parent e5d058a commit f5353b9

4 files changed

Lines changed: 696 additions & 0 deletions

File tree

composer.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
"nette/phpstan-rules": "^1.0"
2929
},
3030
"suggest": {
31+
"ext-dom": "to use Nette\\Mail\\CssInliner",
3132
"ext-fileinfo": "to detect type of attached files",
3233
"ext-openssl": "to use Nette\\Mail\\DkimSigner"
3334
},

readme.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,23 @@ File `email.latte`:
153153

154154
Nette automatically inserts all images, sets the subject according to the `<title>` element, and generates text alternative for HTML body.
155155

156+
157+
CSS Inlining
158+
------------
159+
160+
Email clients often ignore `<style>` tags, so CSS needs to be applied as inline `style` attributes. The `CssInliner` does this automatically and also adds HTML attributes (`bgcolor`, `width`, `align`) for Outlook.
161+
162+
```php
163+
$html = (new Nette\Mail\CssInliner)
164+
->addCss('p { margin: 0; } a { color: #a0704e; }')
165+
->inline($html);
166+
```
167+
168+
Rules from `<style>` tags in the HTML are extracted and inlined too. The `<style>` tags are preserved so that `@media` queries keep working. CSS nesting is supported.
169+
170+
Requires PHP 8.4+ (`ext-dom`).
171+
172+
156173
 <!---->
157174

158175

src/Mail/CssInliner.php

Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
<?php declare(strict_types=1);
2+
3+
/**
4+
* This file is part of the Nette Framework (https://nette.org)
5+
* Copyright (c) 2004 David Grudl (https://davidgrudl.com)
6+
*/
7+
8+
namespace Nette\Mail;
9+
10+
use Dom;
11+
use Nette\InvalidArgumentException;
12+
use function array_keys, array_merge, count, implode, in_array, preg_match_all, spl_object_id, strlen, strtolower, substr, trim;
13+
14+
15+
/**
16+
* Applies CSS rules as inline styles to HTML elements using DOM CSS selectors.
17+
* Requires PHP 8.4+ for Dom\HTMLDocument support.
18+
*/
19+
class CssInliner
20+
{
21+
private const Patterns = [
22+
self::T_Comment => '/\*[^*]*\*+(?:[^/*][^*]*\*+)*/',
23+
self::T_Whitespace => '[\s]+',
24+
self::T_String => '"(?:[^"\\\]|\\\.)*"|\'(?:[^\'\\\]|\\\.)*\'',
25+
self::T_Url => 'url\(\s*(?:"(?:[^"\\\]|\\\.)*"|\'(?:[^\'\\\]|\\\.)*\'|[^)]*?)\s*\)',
26+
self::T_Escape => '\x5c[^\n\r\f]',
27+
self::T_AtIdent => '@-?[a-zA-Z_][\w-]*',
28+
self::T_Hash => '\#[\w-]+',
29+
self::T_Number => '[+-]?(?:\d+\.?\d*|\.\d+)(?:%|[a-zA-Z]+)?',
30+
self::T_Ident => '--[\w-]+|-?[a-zA-Z_][\w-]*',
31+
self::T_Char => '[{}();:,.\[\]>+\~=*!/^$|&\-<]',
32+
];
33+
34+
private const
35+
T_Comment = 1,
36+
T_Whitespace = 2,
37+
T_String = 3,
38+
T_Url = 4,
39+
T_Escape = 5,
40+
T_AtIdent = 6,
41+
T_Hash = 7,
42+
T_Number = 8,
43+
T_Ident = 9,
44+
T_Char = 10;
45+
46+
// CSS → [HTML attribute, type, allowed elements]. align/valign excluded: different semantics than CSS.
47+
private const HtmlAttributes = [
48+
'background-color' => ['bgcolor', 'string', ['table', 'td', 'th', 'body', 'tr']],
49+
'width' => ['width', 'int', ['table', 'td', 'th', 'img']],
50+
'height' => ['height', 'int', ['table', 'td', 'th', 'img']],
51+
'border-spacing' => ['cellspacing', 'int', ['table']],
52+
];
53+
54+
/** @var list<array{string, array<string, string>}> */
55+
private array $rules = [];
56+
57+
58+
/**
59+
* Adds CSS stylesheet rules to be applied during inlining.
60+
*/
61+
public function addCss(string $css): static
62+
{
63+
$this->rules = array_merge($this->rules, self::parseStylesheet($css));
64+
return $this;
65+
}
66+
67+
68+
/**
69+
* Returns the collected rules as [selector, declarations] pairs.
70+
* @return list<array{string, array<string, string>}>
71+
*/
72+
public function getRules(): array
73+
{
74+
return $this->rules;
75+
}
76+
77+
78+
/**
79+
* Applies all added CSS rules as inline styles to the given HTML.
80+
* Also extracts and inlines rules from <style> tags (which are preserved).
81+
* Existing inline styles on elements take precedence over all rules.
82+
*/
83+
public function inline(string $html): string
84+
{
85+
$doc = Dom\HTMLDocument::createFromString($html, LIBXML_NOERROR, 'UTF-8');
86+
87+
$styleRules = [];
88+
foreach ($doc->querySelectorAll('style') as $styleEl) {
89+
$styleRules = array_merge($styleRules, self::parseStylesheet($styleEl->textContent ?? ''));
90+
}
91+
92+
/** @var array<int, array<string, string>> */
93+
$collectedStyles = [];
94+
/** @var array<int, Dom\Element> */
95+
$elements = [];
96+
$allRules = array_merge($styleRules, $this->rules);
97+
98+
foreach ($allRules as [$selector, $declarations]) {
99+
foreach ($doc->querySelectorAll($selector) as $element) {
100+
$id = spl_object_id($element);
101+
$elements[$id] = $element;
102+
$collectedStyles[$id] = array_merge($collectedStyles[$id] ?? [], $declarations);
103+
}
104+
}
105+
106+
// Prepend collected styles before existing inline style (last declaration wins)
107+
foreach ($collectedStyles as $id => $declarations) {
108+
$element = $elements[$id];
109+
$css = self::buildDeclarations($declarations);
110+
$existing = $element->getAttribute('style');
111+
$element->setAttribute('style', $css . ($existing ? '; ' . $existing : ''));
112+
113+
// Generate HTML attributes for email client compatibility (Outlook)
114+
$tag = strtolower($element->tagName);
115+
foreach (self::HtmlAttributes as $cssProp => [$attr, $type, $tags]) {
116+
if (isset($declarations[$cssProp]) && in_array($tag, $tags, true)) {
117+
$value = $declarations[$cssProp];
118+
if ($type === 'int' && !str_contains($value, '%')) {
119+
$value = (string) (int) $value;
120+
}
121+
122+
$element->setAttribute($attr, $value);
123+
}
124+
}
125+
}
126+
127+
return $doc->saveHtml();
128+
}
129+
130+
131+
/**
132+
* Parses CSS stylesheet text into a list of selector + declarations pairs.
133+
* @return list<array{string, array<string, string>}>
134+
*/
135+
private static function parseStylesheet(string $css): array
136+
{
137+
$tokens = self::tokenize($css);
138+
$rules = [];
139+
$i = 0;
140+
self::parseBlock($tokens, $i, '', $rules);
141+
return $rules;
142+
}
143+
144+
145+
/**
146+
* Parses a CSS block, collecting declarations and recursing into nested rules.
147+
* @param list<array{int|string, string}> $tokens
148+
* @param list<array{string, array<string, string>}> &$rules
149+
*/
150+
private static function parseBlock(array $tokens, int &$i, string $parentSelector, array &$rules): void
151+
{
152+
$count = count($tokens);
153+
/** @var array<string, string> */
154+
$declarations = [];
155+
156+
while ($i < $count && $tokens[$i][0] !== '}') {
157+
if (isset([self::T_Whitespace => 1, self::T_Comment => 1, ';' => 1][$tokens[$i][0]])) {
158+
$i++;
159+
continue;
160+
}
161+
162+
// Accumulate tokens until '{', ';', or '}', tracking first ':'
163+
$part = '';
164+
$colonPos = null;
165+
while ($i < $count && !isset(['{' => 1, '}' => 1, ';' => 1][$tokens[$i][0]])) {
166+
if ($tokens[$i][0] !== self::T_Comment) {
167+
if ($colonPos === null && $tokens[$i][0] === ':') {
168+
$colonPos = strlen($part);
169+
}
170+
171+
$part .= $tokens[$i][1];
172+
}
173+
174+
$i++;
175+
}
176+
177+
if ($i >= $count) {
178+
break;
179+
}
180+
181+
$part = trim($part);
182+
if ($tokens[$i][0] === '{') {
183+
$i++; // skip '{'
184+
185+
if ($part !== '' && $part[0] === '@') {
186+
// Skip @-rule block respecting nesting
187+
$depth = 1;
188+
while ($i < $count && $depth > 0) {
189+
if ($tokens[$i][0] === '{') {
190+
$depth++;
191+
} elseif ($tokens[$i][0] === '}') {
192+
$depth--;
193+
}
194+
195+
$i++;
196+
}
197+
} else {
198+
// Emit parent's declarations before nested rules
199+
if ($parentSelector !== '' && $declarations !== []) {
200+
$rules[] = [$parentSelector, $declarations];
201+
$declarations = [];
202+
}
203+
204+
$fullSelector = match (true) {
205+
$parentSelector === '' => $part,
206+
str_contains($part, '&') => str_replace('&', $parentSelector, $part),
207+
default => $parentSelector . ' ' . $part,
208+
};
209+
self::parseBlock($tokens, $i, $fullSelector, $rules);
210+
if ($i < $count) {
211+
$i++; // skip '}'
212+
}
213+
}
214+
} else {
215+
// Declaration: split on tracked ':'
216+
if ($colonPos !== null) {
217+
$property = trim(substr($part, 0, $colonPos));
218+
$value = trim(substr($part, $colonPos + 1));
219+
if ($property !== '' && $value !== '') {
220+
$declarations[$property] = $value;
221+
}
222+
}
223+
224+
if ($i < $count && $tokens[$i][0] === ';') {
225+
$i++;
226+
}
227+
}
228+
}
229+
230+
if ($parentSelector !== '' && $declarations !== []) {
231+
$rules[] = [$parentSelector, $declarations];
232+
}
233+
}
234+
235+
236+
/**
237+
* Tokenizes a CSS string into a flat array of [type, text] pairs.
238+
* @return list<array{int|string, string}>
239+
*/
240+
private static function tokenize(string $input): array
241+
{
242+
if ($input === '') {
243+
return [];
244+
}
245+
246+
$re = '~(' . implode(')|(', self::Patterns) . ')~Asu';
247+
preg_match_all($re, $input, $matches, PREG_SET_ORDER);
248+
249+
$types = array_keys(self::Patterns);
250+
$tokens = [];
251+
$len = 0;
252+
253+
foreach ($matches as $match) {
254+
$type = $types[count($match) - 2];
255+
$text = $match[0];
256+
$tokens[] = [$type === self::T_Char ? $text : $type, $text];
257+
$len += strlen($text);
258+
}
259+
260+
if ($len !== strlen($input)) {
261+
$unexpected = substr($input, $len, 20);
262+
throw new InvalidArgumentException("Unexpected '$unexpected' at offset $len in CSS.");
263+
}
264+
265+
return $tokens;
266+
}
267+
268+
269+
/**
270+
* Builds a CSS declarations string from property => value pairs.
271+
* @param array<string, string> $declarations
272+
*/
273+
private static function buildDeclarations(array $declarations): string
274+
{
275+
$parts = [];
276+
foreach ($declarations as $property => $value) {
277+
$parts[] = "$property: $value";
278+
}
279+
return implode('; ', $parts);
280+
}
281+
}

0 commit comments

Comments
 (0)