Skip to content

Commit 7f64bc2

Browse files
committed
LibWeb: Relax HTML parser to allow more tags inside <select>
This implements parsing part of customizable <select> spec update. See whatwg/html PR #10548. Two failing subtests in `html5lib_innerHTML_tests_innerHTML_1.html` and `customizable-select/select-parsing.html` are due to the spec still disallowing `<input>` inside `<select>`, even though Chrome has already implemented this behavoir (see whatwg/html#11288).
1 parent 40d1f42 commit 7f64bc2

File tree

17 files changed

+373
-384
lines changed

17 files changed

+373
-384
lines changed

Libraries/LibWeb/HTML/Parser/HTMLParser.cpp

Lines changed: 107 additions & 316 deletions
Large diffs are not rendered by default.

Libraries/LibWeb/HTML/Parser/HTMLParser.h

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -27,29 +27,27 @@ class SpeculativeHTMLParser;
2727

2828
namespace Web::HTML {
2929

30-
#define ENUMERATE_INSERTION_MODES \
31-
__ENUMERATE_INSERTION_MODE(Initial) \
32-
__ENUMERATE_INSERTION_MODE(BeforeHTML) \
33-
__ENUMERATE_INSERTION_MODE(BeforeHead) \
34-
__ENUMERATE_INSERTION_MODE(InHead) \
35-
__ENUMERATE_INSERTION_MODE(InHeadNoscript) \
36-
__ENUMERATE_INSERTION_MODE(AfterHead) \
37-
__ENUMERATE_INSERTION_MODE(InBody) \
38-
__ENUMERATE_INSERTION_MODE(Text) \
39-
__ENUMERATE_INSERTION_MODE(InTable) \
40-
__ENUMERATE_INSERTION_MODE(InTableText) \
41-
__ENUMERATE_INSERTION_MODE(InCaption) \
42-
__ENUMERATE_INSERTION_MODE(InColumnGroup) \
43-
__ENUMERATE_INSERTION_MODE(InTableBody) \
44-
__ENUMERATE_INSERTION_MODE(InRow) \
45-
__ENUMERATE_INSERTION_MODE(InCell) \
46-
__ENUMERATE_INSERTION_MODE(InSelect) \
47-
__ENUMERATE_INSERTION_MODE(InSelectInTable) \
48-
__ENUMERATE_INSERTION_MODE(InTemplate) \
49-
__ENUMERATE_INSERTION_MODE(AfterBody) \
50-
__ENUMERATE_INSERTION_MODE(InFrameset) \
51-
__ENUMERATE_INSERTION_MODE(AfterFrameset) \
52-
__ENUMERATE_INSERTION_MODE(AfterAfterBody) \
30+
#define ENUMERATE_INSERTION_MODES \
31+
__ENUMERATE_INSERTION_MODE(Initial) \
32+
__ENUMERATE_INSERTION_MODE(BeforeHTML) \
33+
__ENUMERATE_INSERTION_MODE(BeforeHead) \
34+
__ENUMERATE_INSERTION_MODE(InHead) \
35+
__ENUMERATE_INSERTION_MODE(InHeadNoscript) \
36+
__ENUMERATE_INSERTION_MODE(AfterHead) \
37+
__ENUMERATE_INSERTION_MODE(InBody) \
38+
__ENUMERATE_INSERTION_MODE(Text) \
39+
__ENUMERATE_INSERTION_MODE(InTable) \
40+
__ENUMERATE_INSERTION_MODE(InTableText) \
41+
__ENUMERATE_INSERTION_MODE(InCaption) \
42+
__ENUMERATE_INSERTION_MODE(InColumnGroup) \
43+
__ENUMERATE_INSERTION_MODE(InTableBody) \
44+
__ENUMERATE_INSERTION_MODE(InRow) \
45+
__ENUMERATE_INSERTION_MODE(InCell) \
46+
__ENUMERATE_INSERTION_MODE(InTemplate) \
47+
__ENUMERATE_INSERTION_MODE(AfterBody) \
48+
__ENUMERATE_INSERTION_MODE(InFrameset) \
49+
__ENUMERATE_INSERTION_MODE(AfterFrameset) \
50+
__ENUMERATE_INSERTION_MODE(AfterAfterBody) \
5351
__ENUMERATE_INSERTION_MODE(AfterAfterFrameset)
5452

5553
class WEB_API HTMLParser final : public JS::Cell {
@@ -129,8 +127,6 @@ class WEB_API HTMLParser final : public JS::Cell {
129127
void handle_in_row(HTMLToken&);
130128
void handle_in_cell(HTMLToken&);
131129
void handle_in_table_text(HTMLToken&);
132-
void handle_in_select_in_table(HTMLToken&);
133-
void handle_in_select(HTMLToken&);
134130
void handle_in_caption(HTMLToken&);
135131
void handle_in_column_group(HTMLToken&);
136132
void handle_in_template(HTMLToken&);

Libraries/LibWeb/HTML/Parser/StackOfOpenElements.cpp

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,18 @@
1313

1414
namespace Web::HTML {
1515

16-
static Vector<FlyString> s_base_list { "applet"_fly_string, "caption"_fly_string, "html"_fly_string, "table"_fly_string, "td"_fly_string, "th"_fly_string, "marquee"_fly_string, "object"_fly_string, "template"_fly_string };
16+
static Vector<FlyString> s_base_list {
17+
"applet"_fly_string,
18+
"caption"_fly_string,
19+
"html"_fly_string,
20+
"table"_fly_string,
21+
"td"_fly_string,
22+
"th"_fly_string,
23+
"marquee"_fly_string,
24+
"object"_fly_string,
25+
"select"_fly_string,
26+
"template"_fly_string
27+
};
1728

1829
StackOfOpenElements::~StackOfOpenElements() = default;
1930

@@ -22,6 +33,7 @@ void StackOfOpenElements::visit_edges(JS::Cell::Visitor& visitor)
2233
visitor.visit(m_elements);
2334
}
2435

36+
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-the-specific-scope
2537
bool StackOfOpenElements::has_in_scope_impl(FlyString const& tag_name, Vector<FlyString> const& list, CheckMathAndSVG check_math_and_svg) const
2638
{
2739
for (auto const& element : m_elements.in_reverse()) {
@@ -37,11 +49,13 @@ bool StackOfOpenElements::has_in_scope_impl(FlyString const& tag_name, Vector<Fl
3749
VERIFY_NOT_REACHED();
3850
}
3951

52+
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-scope
4053
bool StackOfOpenElements::has_in_scope(FlyString const& tag_name) const
4154
{
4255
return has_in_scope_impl(tag_name, s_base_list, CheckMathAndSVG::Yes);
4356
}
4457

58+
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-the-specific-scope
4559
bool StackOfOpenElements::has_in_scope_impl(DOM::Element const& target_node, Vector<FlyString> const& list) const
4660
{
4761
for (auto& element : m_elements.in_reverse()) {
@@ -57,23 +71,27 @@ bool StackOfOpenElements::has_in_scope_impl(DOM::Element const& target_node, Vec
5771
VERIFY_NOT_REACHED();
5872
}
5973

74+
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-scope
6075
bool StackOfOpenElements::has_in_scope(DOM::Element const& target_node) const
6176
{
6277
return has_in_scope_impl(target_node, s_base_list);
6378
}
6479

80+
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-button-scope
6581
bool StackOfOpenElements::has_in_button_scope(FlyString const& tag_name) const
6682
{
6783
auto list = s_base_list;
6884
list.append("button"_fly_string);
6985
return has_in_scope_impl(tag_name, list, CheckMathAndSVG::Yes);
7086
}
7187

88+
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-table-scope
7289
bool StackOfOpenElements::has_in_table_scope(FlyString const& tag_name) const
7390
{
7491
return has_in_scope_impl(tag_name, { "html"_fly_string, "table"_fly_string, "template"_fly_string }, CheckMathAndSVG::No);
7592
}
7693

94+
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-list-item-scope
7795
bool StackOfOpenElements::has_in_list_item_scope(FlyString const& tag_name) const
7896
{
7997
auto list = s_base_list;
@@ -82,31 +100,6 @@ bool StackOfOpenElements::has_in_list_item_scope(FlyString const& tag_name) cons
82100
return has_in_scope_impl(tag_name, list, CheckMathAndSVG::Yes);
83101
}
84102

85-
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-select-scope
86-
// The stack of open elements is said to have a particular element in select scope
87-
// when it has that element in the specific scope consisting of all element types except the following:
88-
// - optgroup in the HTML namespace
89-
// - option in the HTML namespace
90-
// NOTE: In this case it's "all element types _except_"
91-
bool StackOfOpenElements::has_in_select_scope(FlyString const& tag_name) const
92-
{
93-
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-the-specific-scope
94-
// 1. Initialize node to be the current node (the bottommost node of the stack).
95-
for (auto& node : m_elements.in_reverse()) {
96-
// 2. If node is target node, terminate in a match state.
97-
if (node->local_name() == tag_name)
98-
return true;
99-
// 3. Otherwise, if node is one of the element types in list, terminate in a failure state.
100-
// NOTE: Here "list" refers to all elements except option and optgroup
101-
if (node->local_name() != HTML::TagNames::option && node->local_name() != HTML::TagNames::optgroup)
102-
return false;
103-
// 4. Otherwise, set node to the previous entry in the stack of open elements and return to step 2.
104-
}
105-
// NOTE: This will never fail, since the loop will always terminate in the previous step if the top of the stack
106-
// — an html element — is reached.
107-
VERIFY_NOT_REACHED();
108-
}
109-
110103
bool StackOfOpenElements::contains(DOM::Element const& element) const
111104
{
112105
for (auto& element_on_stack : m_elements) {

Libraries/LibWeb/HTML/Parser/StackOfOpenElements.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ class StackOfOpenElements {
3939
bool has_in_button_scope(FlyString const& tag_name) const;
4040
bool has_in_table_scope(FlyString const& tag_name) const;
4141
bool has_in_list_item_scope(FlyString const& tag_name) const;
42-
bool has_in_select_scope(FlyString const& tag_name) const;
4342

4443
bool has_in_scope(DOM::Element const&) const;
4544

@@ -49,7 +48,7 @@ class StackOfOpenElements {
4948
auto const& elements() const { return m_elements; }
5049
auto& elements() { return m_elements; }
5150

52-
void pop_until_an_element_with_tag_name_has_been_popped(FlyString const& local_name);
51+
void pop_until_an_element_with_tag_name_has_been_popped(FlyString const& tag_name);
5352

5453
GC::Ptr<DOM::Element> topmost_special_node_below(DOM::Element const&);
5554

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
Harness status: OK
2+
3+
Found 17 tests
4+
5+
16 Pass
6+
1 Fail
7+
Pass <div>s, <button>s, and <datalist>s should be allowed in <select>
8+
Pass </select> should close <button>
9+
Pass </select> should close <datalist>
10+
Pass <select> in <button> in <select> should remove inner <select>
11+
Pass <select> in <select><button><div> should remove inner <select>
12+
Pass JS added nested <select> should be ignored
13+
Pass JS added nested <select>s should be ignored
14+
Pass Divs and imgs should be allowed as direct children of select and within options without a datalist
15+
Pass Input tags should not parse inside select instead of closing the select
16+
Pass textarea tags should parse inside select instead of closing the select
17+
Fail Input tags should parse inside select if nested in another tag
18+
Pass Input tags should close select when directly inside an <option>
19+
Pass The last test should not leave any tags open after parsing
20+
Pass Nested selects should be retained 1
21+
Pass Nested selects should be retained 2
22+
Pass JS added nested select should be ignored
23+
Pass JS added nested selects should be ignored

Tests/LibWeb/Text/expected/wpt-import/html/syntax/parsing/html5lib_innerHTML_tests_innerHTML_1.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ Harness status: OK
22

33
Found 81 tests
44

5-
81 Pass
5+
80 Pass
6+
1 Fail
67
Pass html5lib_innerHTML_tests_innerHTML_1.html 7a9e287595dd570e0f19b7eec0ac424228908daf
78
Pass html5lib_innerHTML_tests_innerHTML_1.html 6f766fa07c8697a5379c5542adbba2a42f913004
89
Pass html5lib_innerHTML_tests_innerHTML_1.html dbbe75ae41228f9264d56a018e620217ec87fd32
@@ -78,7 +79,7 @@ Pass html5lib_innerHTML_tests_innerHTML_1.html a23b70f1f246ba08d13b570319391b4a5
7879
Pass html5lib_innerHTML_tests_innerHTML_1.html 9d5e0c25bfe921df9ea2897c027f42bc88950e69
7980
Pass html5lib_innerHTML_tests_innerHTML_1.html 9210d577d6deecf5ab3505af86c501c5befa0b50
8081
Pass html5lib_innerHTML_tests_innerHTML_1.html c34af491c0a339db6ba63fcc478108533347319b
81-
Pass html5lib_innerHTML_tests_innerHTML_1.html 2c4284e6b2bb480daa50bca43bcbe29cfcdeeab4
82+
Fail html5lib_innerHTML_tests_innerHTML_1.html 2c4284e6b2bb480daa50bca43bcbe29cfcdeeab4
8283
Pass html5lib_innerHTML_tests_innerHTML_1.html d75277b65d0118463afeb66b478509d4e27565ab
8384
Pass html5lib_innerHTML_tests_innerHTML_1.html b354df69dbe9b3ef0c42177648e3aace114cf8ea
8485
Pass html5lib_innerHTML_tests_innerHTML_1.html fd3be386292ea1f411cea8e86e29595deb177d28

Tests/LibWeb/Text/expected/wpt-import/html/syntax/parsing/html5lib_webkit02.txt

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
Harness status: OK
22

3-
Found 30 tests
3+
Found 41 tests
44

5-
30 Pass
5+
39 Pass
6+
2 Fail
67
Pass html5lib_webkit02.html f50b8c15847159a6d2c6ecc2bd1e4a944ba5aae6
78
Pass html5lib_webkit02.html 326328ea805a2ebdde707e08567713f88a4cf8ab
89
Pass html5lib_webkit02.html 05138397908cfdad69a3bfe5da5a06098320b504
@@ -32,4 +33,15 @@ Pass html5lib_webkit02.html ecd089f9b5193fad306c5b475c4711547fe5e209
3233
Pass html5lib_webkit02.html cee2230c74671c594a1140a68d16e3d3e5ae005a
3334
Pass html5lib_webkit02.html 22b9fe36797d70a3b71a6aadc6ad7cff23c3fc90
3435
Pass html5lib_webkit02.html a82c3bf49c381b5f58c5c8a4bbbe0cef2458e28a
35-
Pass html5lib_webkit02.html 61f8d527795dc8044a95a3e2437de81e16597ceb
36+
Pass html5lib_webkit02.html 61f8d527795dc8044a95a3e2437de81e16597ceb
37+
Pass html5lib_webkit02.html 3a112027586c5c0fb506c49aa01d3f695ca0a5fa
38+
Pass html5lib_webkit02.html 3e95ae8a102dc5d0ae04764420418cb19a288fb7
39+
Pass html5lib_webkit02.html 4bd119721a0eb84aa72a30ea83fc5beb5843ca68
40+
Pass html5lib_webkit02.html a15e2f18330327cc4dcfe8c10af07714903f773a
41+
Pass html5lib_webkit02.html 29539f494ab6b37b18213f062da16b0f0c9d00d9
42+
Pass html5lib_webkit02.html a6c50b1f6bfbe3c55102d8cad0950d0b68cc6729
43+
Pass html5lib_webkit02.html 411f313a1b92ac7be549c41ee6758f952dc2dced
44+
Pass html5lib_webkit02.html 84467597648753feeb78793e2cc9196bc75857c2
45+
Pass html5lib_webkit02.html ae6f2e0a014f620269920ceb12660ec708236846
46+
Fail html5lib_webkit02.html fd7aea4db6702879b9f8f410b0400d9300ae9c05
47+
Fail html5lib_webkit02.html bad5cceffaffe98e3a1522be5f7df3e3e179d500

0 commit comments

Comments
 (0)