Skip to content

Commit a16d466

Browse files
committed
LibWeb: Relax HTML parser to allow more tags inside <select>
This implements parsing part of customizable <select> spec update. See whatwg/html#10548.
1 parent 40d1f42 commit a16d466

File tree

6 files changed

+345
-369
lines changed

6 files changed

+345
-369
lines changed

Libraries/LibWeb/HTML/Parser/HTMLParser.cpp

Lines changed: 107 additions & 316 deletions
Large diffs are not rendered by default.

Libraries/LibWeb/HTML/Parser/HTMLParser.h

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -27,29 +27,27 @@ class SpeculativeHTMLParser;
2727

2828
namespace Web::HTML {
2929

30-
#define ENUMERATE_INSERTION_MODES \
31-
__ENUMERATE_INSERTION_MODE(Initial) \
32-
__ENUMERATE_INSERTION_MODE(BeforeHTML) \
33-
__ENUMERATE_INSERTION_MODE(BeforeHead) \
34-
__ENUMERATE_INSERTION_MODE(InHead) \
35-
__ENUMERATE_INSERTION_MODE(InHeadNoscript) \
36-
__ENUMERATE_INSERTION_MODE(AfterHead) \
37-
__ENUMERATE_INSERTION_MODE(InBody) \
38-
__ENUMERATE_INSERTION_MODE(Text) \
39-
__ENUMERATE_INSERTION_MODE(InTable) \
40-
__ENUMERATE_INSERTION_MODE(InTableText) \
41-
__ENUMERATE_INSERTION_MODE(InCaption) \
42-
__ENUMERATE_INSERTION_MODE(InColumnGroup) \
43-
__ENUMERATE_INSERTION_MODE(InTableBody) \
44-
__ENUMERATE_INSERTION_MODE(InRow) \
45-
__ENUMERATE_INSERTION_MODE(InCell) \
46-
__ENUMERATE_INSERTION_MODE(InSelect) \
47-
__ENUMERATE_INSERTION_MODE(InSelectInTable) \
48-
__ENUMERATE_INSERTION_MODE(InTemplate) \
49-
__ENUMERATE_INSERTION_MODE(AfterBody) \
50-
__ENUMERATE_INSERTION_MODE(InFrameset) \
51-
__ENUMERATE_INSERTION_MODE(AfterFrameset) \
52-
__ENUMERATE_INSERTION_MODE(AfterAfterBody) \
30+
#define ENUMERATE_INSERTION_MODES \
31+
__ENUMERATE_INSERTION_MODE(Initial) \
32+
__ENUMERATE_INSERTION_MODE(BeforeHTML) \
33+
__ENUMERATE_INSERTION_MODE(BeforeHead) \
34+
__ENUMERATE_INSERTION_MODE(InHead) \
35+
__ENUMERATE_INSERTION_MODE(InHeadNoscript) \
36+
__ENUMERATE_INSERTION_MODE(AfterHead) \
37+
__ENUMERATE_INSERTION_MODE(InBody) \
38+
__ENUMERATE_INSERTION_MODE(Text) \
39+
__ENUMERATE_INSERTION_MODE(InTable) \
40+
__ENUMERATE_INSERTION_MODE(InTableText) \
41+
__ENUMERATE_INSERTION_MODE(InCaption) \
42+
__ENUMERATE_INSERTION_MODE(InColumnGroup) \
43+
__ENUMERATE_INSERTION_MODE(InTableBody) \
44+
__ENUMERATE_INSERTION_MODE(InRow) \
45+
__ENUMERATE_INSERTION_MODE(InCell) \
46+
__ENUMERATE_INSERTION_MODE(InTemplate) \
47+
__ENUMERATE_INSERTION_MODE(AfterBody) \
48+
__ENUMERATE_INSERTION_MODE(InFrameset) \
49+
__ENUMERATE_INSERTION_MODE(AfterFrameset) \
50+
__ENUMERATE_INSERTION_MODE(AfterAfterBody) \
5351
__ENUMERATE_INSERTION_MODE(AfterAfterFrameset)
5452

5553
class WEB_API HTMLParser final : public JS::Cell {
@@ -129,8 +127,6 @@ class WEB_API HTMLParser final : public JS::Cell {
129127
void handle_in_row(HTMLToken&);
130128
void handle_in_cell(HTMLToken&);
131129
void handle_in_table_text(HTMLToken&);
132-
void handle_in_select_in_table(HTMLToken&);
133-
void handle_in_select(HTMLToken&);
134130
void handle_in_caption(HTMLToken&);
135131
void handle_in_column_group(HTMLToken&);
136132
void handle_in_template(HTMLToken&);

Libraries/LibWeb/HTML/Parser/StackOfOpenElements.cpp

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,18 @@
1313

1414
namespace Web::HTML {
1515

16-
static Vector<FlyString> s_base_list { "applet"_fly_string, "caption"_fly_string, "html"_fly_string, "table"_fly_string, "td"_fly_string, "th"_fly_string, "marquee"_fly_string, "object"_fly_string, "template"_fly_string };
16+
static Vector<FlyString> s_base_list {
17+
"applet"_fly_string,
18+
"caption"_fly_string,
19+
"html"_fly_string,
20+
"table"_fly_string,
21+
"td"_fly_string,
22+
"th"_fly_string,
23+
"marquee"_fly_string,
24+
"object"_fly_string,
25+
"select"_fly_string,
26+
"template"_fly_string
27+
};
1728

1829
StackOfOpenElements::~StackOfOpenElements() = default;
1930

@@ -22,6 +33,7 @@ void StackOfOpenElements::visit_edges(JS::Cell::Visitor& visitor)
2233
visitor.visit(m_elements);
2334
}
2435

36+
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-the-specific-scope
2537
bool StackOfOpenElements::has_in_scope_impl(FlyString const& tag_name, Vector<FlyString> const& list, CheckMathAndSVG check_math_and_svg) const
2638
{
2739
for (auto const& element : m_elements.in_reverse()) {
@@ -37,11 +49,13 @@ bool StackOfOpenElements::has_in_scope_impl(FlyString const& tag_name, Vector<Fl
3749
VERIFY_NOT_REACHED();
3850
}
3951

52+
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-scope
4053
bool StackOfOpenElements::has_in_scope(FlyString const& tag_name) const
4154
{
4255
return has_in_scope_impl(tag_name, s_base_list, CheckMathAndSVG::Yes);
4356
}
4457

58+
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-the-specific-scope
4559
bool StackOfOpenElements::has_in_scope_impl(DOM::Element const& target_node, Vector<FlyString> const& list) const
4660
{
4761
for (auto& element : m_elements.in_reverse()) {
@@ -57,23 +71,27 @@ bool StackOfOpenElements::has_in_scope_impl(DOM::Element const& target_node, Vec
5771
VERIFY_NOT_REACHED();
5872
}
5973

74+
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-scope
6075
bool StackOfOpenElements::has_in_scope(DOM::Element const& target_node) const
6176
{
6277
return has_in_scope_impl(target_node, s_base_list);
6378
}
6479

80+
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-button-scope
6581
bool StackOfOpenElements::has_in_button_scope(FlyString const& tag_name) const
6682
{
6783
auto list = s_base_list;
6884
list.append("button"_fly_string);
6985
return has_in_scope_impl(tag_name, list, CheckMathAndSVG::Yes);
7086
}
7187

88+
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-table-scope
7289
bool StackOfOpenElements::has_in_table_scope(FlyString const& tag_name) const
7390
{
7491
return has_in_scope_impl(tag_name, { "html"_fly_string, "table"_fly_string, "template"_fly_string }, CheckMathAndSVG::No);
7592
}
7693

94+
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-list-item-scope
7795
bool StackOfOpenElements::has_in_list_item_scope(FlyString const& tag_name) const
7896
{
7997
auto list = s_base_list;
@@ -82,31 +100,6 @@ bool StackOfOpenElements::has_in_list_item_scope(FlyString const& tag_name) cons
82100
return has_in_scope_impl(tag_name, list, CheckMathAndSVG::Yes);
83101
}
84102

85-
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-select-scope
86-
// The stack of open elements is said to have a particular element in select scope
87-
// when it has that element in the specific scope consisting of all element types except the following:
88-
// - optgroup in the HTML namespace
89-
// - option in the HTML namespace
90-
// NOTE: In this case it's "all element types _except_"
91-
bool StackOfOpenElements::has_in_select_scope(FlyString const& tag_name) const
92-
{
93-
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-the-specific-scope
94-
// 1. Initialize node to be the current node (the bottommost node of the stack).
95-
for (auto& node : m_elements.in_reverse()) {
96-
// 2. If node is target node, terminate in a match state.
97-
if (node->local_name() == tag_name)
98-
return true;
99-
// 3. Otherwise, if node is one of the element types in list, terminate in a failure state.
100-
// NOTE: Here "list" refers to all elements except option and optgroup
101-
if (node->local_name() != HTML::TagNames::option && node->local_name() != HTML::TagNames::optgroup)
102-
return false;
103-
// 4. Otherwise, set node to the previous entry in the stack of open elements and return to step 2.
104-
}
105-
// NOTE: This will never fail, since the loop will always terminate in the previous step if the top of the stack
106-
// — an html element — is reached.
107-
VERIFY_NOT_REACHED();
108-
}
109-
110103
bool StackOfOpenElements::contains(DOM::Element const& element) const
111104
{
112105
for (auto& element_on_stack : m_elements) {

Libraries/LibWeb/HTML/Parser/StackOfOpenElements.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ class StackOfOpenElements {
3939
bool has_in_button_scope(FlyString const& tag_name) const;
4040
bool has_in_table_scope(FlyString const& tag_name) const;
4141
bool has_in_list_item_scope(FlyString const& tag_name) const;
42-
bool has_in_select_scope(FlyString const& tag_name) const;
4342

4443
bool has_in_scope(DOM::Element const&) const;
4544

@@ -49,7 +48,7 @@ class StackOfOpenElements {
4948
auto const& elements() const { return m_elements; }
5049
auto& elements() { return m_elements; }
5150

52-
void pop_until_an_element_with_tag_name_has_been_popped(FlyString const& local_name);
51+
void pop_until_an_element_with_tag_name_has_been_popped(FlyString const& tag_name);
5352

5453
GC::Ptr<DOM::Element> topmost_special_node_below(DOM::Element const&);
5554

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
Harness status: OK
2+
3+
Found 17 tests
4+
5+
16 Pass
6+
1 Fail
7+
Pass <div>s, <button>s, and <datalist>s should be allowed in <select>
8+
Pass </select> should close <button>
9+
Pass </select> should close <datalist>
10+
Pass <select> in <button> in <select> should remove inner <select>
11+
Pass <select> in <select><button><div> should remove inner <select>
12+
Pass JS added nested <select> should be ignored
13+
Pass JS added nested <select>s should be ignored
14+
Pass Divs and imgs should be allowed as direct children of select and within options without a datalist
15+
Pass Input tags should not parse inside select instead of closing the select
16+
Pass textarea tags should parse inside select instead of closing the select
17+
Fail Input tags should parse inside select if nested in another tag
18+
Pass Input tags should close select when directly inside an <option>
19+
Pass The last test should not leave any tags open after parsing
20+
Pass Nested selects should be retained 1
21+
Pass Nested selects should be retained 2
22+
Pass JS added nested select should be ignored
23+
Pass JS added nested selects should be ignored
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
<!DOCTYPE html>
2+
<link rel=author href="mailto:[email protected]">
3+
<link rel=help href="https://github.com/whatwg/html/issues/9799">
4+
<script src="../../../../../resources/testharness.js"></script>
5+
<script src="../../../../../resources/testharnessreport.js"></script>
6+
7+
<body>
8+
9+
<select class=test
10+
data-description='<div>s, <button>s, and <datalist>s should be allowed in <select>'
11+
data-expect='
12+
<div>div 1</div>
13+
<button>button</button>
14+
<div>div 2</div>
15+
<datalist>
16+
<option>option</option>
17+
</datalist>
18+
<div>div 3</div>
19+
'>
20+
<div>div 1</div>
21+
<button>button</button>
22+
<div>div 2</div>
23+
<datalist>
24+
<option>option</option>
25+
</datalist>
26+
<div>div 3</div>
27+
</select>
28+
29+
<select class=test
30+
data-description='</select> should close <button>'
31+
data-expect='<button>button</button>'>
32+
<button>button
33+
</select>
34+
35+
<select class=test
36+
data-description='</select> should close <datalist>'
37+
data-expect='<datalist>datalist</datalist>'>
38+
<datalist>datalist
39+
</select>
40+
41+
<select id=nested1 class=test
42+
data-description='<select> in <button> in <select> should remove inner <select>'
43+
data-expect='<button></button>'>
44+
<button>
45+
<select id=expectafter1></select>
46+
<div id=expectafter1b></div>
47+
</button>
48+
</select>
49+
50+
<select id=nested2 class=test
51+
data-description='<select> in <select><button><div> should remove inner <select>'
52+
data-expect='<button><div></div></button>'>
53+
<button>
54+
<div>
55+
<select id=expectafter2>
56+
</select>
57+
58+
<select
59+
id=nested3
60+
class=test
61+
data-description='JS added nested <select> should be ignored'
62+
data-expect='<option>The Initial Option</option>'
63+
>
64+
<option>The Initial Option</option>
65+
</select>
66+
67+
<select
68+
id=nested4
69+
class=test
70+
data-description='JS added nested <select>s should be ignored'
71+
data-expect='<option>The Initial Option</option>'
72+
>
73+
<option>The Initial Option</option>
74+
</select>
75+
76+
<select class=test
77+
data-description='Divs and imgs should be allowed as direct children of select and within options without a datalist'
78+
data-expect='
79+
<div>
80+
<option><img>option</option>
81+
</div>'>
82+
<div>
83+
<option><img>option</option>
84+
</div>
85+
</select>
86+
87+
<select class=test
88+
data-description='Input tags should not parse inside select instead of closing the select'
89+
data-expect=''>
90+
<input>
91+
</select>
92+
93+
<select class=test
94+
data-description='textarea tags should parse inside select instead of closing the select'
95+
data-expect='<textarea></textarea>'>
96+
<textarea></textarea>
97+
</select>
98+
99+
<select class=test
100+
data-description='Input tags should parse inside select if nested in another tag'
101+
data-expect='<div><input></div>'>
102+
<div>
103+
<input>
104+
</div>
105+
</select>
106+
107+
<select class=test
108+
data-description='Input tags should close select when directly inside an <option>'
109+
data-expect='<option></option>'>
110+
<option>
111+
<input>
112+
</option>
113+
</select>
114+
115+
<div id=afterlast>
116+
keep this div after the last test case
117+
</div>
118+
119+
<script>
120+
function removeWhitespace(t) {
121+
return t.replace(/\s/g,'');
122+
}
123+
document.querySelectorAll('select.test').forEach(s => {
124+
assert_true(!!s.dataset.description.length);
125+
test(() => {
126+
// The document.body check here and in the other tests is to make sure that a
127+
// previous test case didn't leave the HTML parser open on another element.
128+
assert_equals(s.parentNode, document.body);
129+
assert_equals(removeWhitespace(s.innerHTML),removeWhitespace(s.dataset.expect));
130+
},s.dataset.description)
131+
});
132+
133+
test(() => {
134+
assert_equals(document.getElementById('afterlast').parentNode, document.body);
135+
}, 'The last test should not leave any tags open after parsing');
136+
137+
test(() => {
138+
const outerSelect = document.getElementById('nested1');
139+
const innerSelect = document.getElementById('expectafter1');
140+
const nextDiv = document.getElementById('expectafter1b');
141+
assert_true(!!outerSelect);
142+
assert_equals(innerSelect, null,'Nested select should be removed');
143+
assert_equals(outerSelect.nextElementSibling, nextDiv,'Subsequent content is there too');
144+
}, 'Nested selects should be retained 1');
145+
146+
test(() => {
147+
const outerSelect = document.getElementById('nested2');
148+
const innerSelect = document.getElementById('expectafter2');
149+
assert_true(!!outerSelect);
150+
assert_equals(innerSelect, null,'Nested select should be pushed out as the next sibling');
151+
}, 'Nested selects should be retained 2');
152+
153+
test(() => {
154+
assert_true(!!nested3);
155+
nested3.innerHTML = '<select id="ignored"><option>The New Option</option></select>';
156+
157+
const ignored = document.getElementById('ignored');
158+
assert_equals(ignored, null);
159+
160+
assert_equals(nested3.innerHTML, '<option>The New Option</option>');
161+
}, 'JS added nested select should be ignored');
162+
163+
test(() => {
164+
assert_true(!!nested4);
165+
nested4.innerHTML = '<select id="ignore1"><select id="ignore2"><option>The New Option</option></select></select>';
166+
167+
const ignored1 = document.getElementById('ignored1');
168+
assert_equals(ignored1, null);
169+
const ignored2 = document.getElementById('ignored2');
170+
assert_equals(ignored2, null);
171+
172+
assert_equals(nested4.innerHTML, '<option>The New Option</option>');
173+
}, 'JS added nested selects should be ignored');
174+
</script>

0 commit comments

Comments
 (0)