Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
423 changes: 107 additions & 316 deletions Libraries/LibWeb/HTML/Parser/HTMLParser.cpp

Large diffs are not rendered by default.

46 changes: 21 additions & 25 deletions Libraries/LibWeb/HTML/Parser/HTMLParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,29 +27,27 @@ class SpeculativeHTMLParser;

namespace Web::HTML {

#define ENUMERATE_INSERTION_MODES \
__ENUMERATE_INSERTION_MODE(Initial) \
__ENUMERATE_INSERTION_MODE(BeforeHTML) \
__ENUMERATE_INSERTION_MODE(BeforeHead) \
__ENUMERATE_INSERTION_MODE(InHead) \
__ENUMERATE_INSERTION_MODE(InHeadNoscript) \
__ENUMERATE_INSERTION_MODE(AfterHead) \
__ENUMERATE_INSERTION_MODE(InBody) \
__ENUMERATE_INSERTION_MODE(Text) \
__ENUMERATE_INSERTION_MODE(InTable) \
__ENUMERATE_INSERTION_MODE(InTableText) \
__ENUMERATE_INSERTION_MODE(InCaption) \
__ENUMERATE_INSERTION_MODE(InColumnGroup) \
__ENUMERATE_INSERTION_MODE(InTableBody) \
__ENUMERATE_INSERTION_MODE(InRow) \
__ENUMERATE_INSERTION_MODE(InCell) \
__ENUMERATE_INSERTION_MODE(InSelect) \
__ENUMERATE_INSERTION_MODE(InSelectInTable) \
__ENUMERATE_INSERTION_MODE(InTemplate) \
__ENUMERATE_INSERTION_MODE(AfterBody) \
__ENUMERATE_INSERTION_MODE(InFrameset) \
__ENUMERATE_INSERTION_MODE(AfterFrameset) \
__ENUMERATE_INSERTION_MODE(AfterAfterBody) \
#define ENUMERATE_INSERTION_MODES \
__ENUMERATE_INSERTION_MODE(Initial) \
__ENUMERATE_INSERTION_MODE(BeforeHTML) \
__ENUMERATE_INSERTION_MODE(BeforeHead) \
__ENUMERATE_INSERTION_MODE(InHead) \
__ENUMERATE_INSERTION_MODE(InHeadNoscript) \
__ENUMERATE_INSERTION_MODE(AfterHead) \
__ENUMERATE_INSERTION_MODE(InBody) \
__ENUMERATE_INSERTION_MODE(Text) \
__ENUMERATE_INSERTION_MODE(InTable) \
__ENUMERATE_INSERTION_MODE(InTableText) \
__ENUMERATE_INSERTION_MODE(InCaption) \
__ENUMERATE_INSERTION_MODE(InColumnGroup) \
__ENUMERATE_INSERTION_MODE(InTableBody) \
__ENUMERATE_INSERTION_MODE(InRow) \
__ENUMERATE_INSERTION_MODE(InCell) \
__ENUMERATE_INSERTION_MODE(InTemplate) \
__ENUMERATE_INSERTION_MODE(AfterBody) \
__ENUMERATE_INSERTION_MODE(InFrameset) \
__ENUMERATE_INSERTION_MODE(AfterFrameset) \
__ENUMERATE_INSERTION_MODE(AfterAfterBody) \
__ENUMERATE_INSERTION_MODE(AfterAfterFrameset)

class WEB_API HTMLParser final : public JS::Cell {
Expand Down Expand Up @@ -129,8 +127,6 @@ class WEB_API HTMLParser final : public JS::Cell {
void handle_in_row(HTMLToken&);
void handle_in_cell(HTMLToken&);
void handle_in_table_text(HTMLToken&);
void handle_in_select_in_table(HTMLToken&);
void handle_in_select(HTMLToken&);
void handle_in_caption(HTMLToken&);
void handle_in_column_group(HTMLToken&);
void handle_in_template(HTMLToken&);
Expand Down
45 changes: 19 additions & 26 deletions Libraries/LibWeb/HTML/Parser/StackOfOpenElements.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,18 @@

namespace Web::HTML {

static Vector<FlyString> s_base_list { "applet"_fly_string, "caption"_fly_string, "html"_fly_string, "table"_fly_string, "td"_fly_string, "th"_fly_string, "marquee"_fly_string, "object"_fly_string, "template"_fly_string };
static Vector<FlyString> s_base_list {
"applet"_fly_string,
"caption"_fly_string,
"html"_fly_string,
"table"_fly_string,
"td"_fly_string,
"th"_fly_string,
"marquee"_fly_string,
"object"_fly_string,
"select"_fly_string,
"template"_fly_string
};

StackOfOpenElements::~StackOfOpenElements() = default;

Expand All @@ -22,6 +33,7 @@ void StackOfOpenElements::visit_edges(JS::Cell::Visitor& visitor)
visitor.visit(m_elements);
}

// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-the-specific-scope
bool StackOfOpenElements::has_in_scope_impl(FlyString const& tag_name, Vector<FlyString> const& list, CheckMathAndSVG check_math_and_svg) const
{
for (auto const& element : m_elements.in_reverse()) {
Expand All @@ -37,11 +49,13 @@ bool StackOfOpenElements::has_in_scope_impl(FlyString const& tag_name, Vector<Fl
VERIFY_NOT_REACHED();
}

// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-scope
bool StackOfOpenElements::has_in_scope(FlyString const& tag_name) const
{
return has_in_scope_impl(tag_name, s_base_list, CheckMathAndSVG::Yes);
}

// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-the-specific-scope
bool StackOfOpenElements::has_in_scope_impl(DOM::Element const& target_node, Vector<FlyString> const& list) const
{
for (auto& element : m_elements.in_reverse()) {
Expand All @@ -57,23 +71,27 @@ bool StackOfOpenElements::has_in_scope_impl(DOM::Element const& target_node, Vec
VERIFY_NOT_REACHED();
}

// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-scope
bool StackOfOpenElements::has_in_scope(DOM::Element const& target_node) const
{
return has_in_scope_impl(target_node, s_base_list);
}

// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-button-scope
bool StackOfOpenElements::has_in_button_scope(FlyString const& tag_name) const
{
auto list = s_base_list;
list.append("button"_fly_string);
return has_in_scope_impl(tag_name, list, CheckMathAndSVG::Yes);
}

// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-table-scope
bool StackOfOpenElements::has_in_table_scope(FlyString const& tag_name) const
{
return has_in_scope_impl(tag_name, { "html"_fly_string, "table"_fly_string, "template"_fly_string }, CheckMathAndSVG::No);
}

// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-list-item-scope
bool StackOfOpenElements::has_in_list_item_scope(FlyString const& tag_name) const
{
auto list = s_base_list;
Expand All @@ -82,31 +100,6 @@ bool StackOfOpenElements::has_in_list_item_scope(FlyString const& tag_name) cons
return has_in_scope_impl(tag_name, list, CheckMathAndSVG::Yes);
}

// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-select-scope
// The stack of open elements is said to have a particular element in select scope
// when it has that element in the specific scope consisting of all element types except the following:
// - optgroup in the HTML namespace
// - option in the HTML namespace
// NOTE: In this case it's "all element types _except_"
bool StackOfOpenElements::has_in_select_scope(FlyString const& tag_name) const
{
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-the-specific-scope
// 1. Initialize node to be the current node (the bottommost node of the stack).
for (auto& node : m_elements.in_reverse()) {
// 2. If node is target node, terminate in a match state.
if (node->local_name() == tag_name)
return true;
// 3. Otherwise, if node is one of the element types in list, terminate in a failure state.
// NOTE: Here "list" refers to all elements except option and optgroup
if (node->local_name() != HTML::TagNames::option && node->local_name() != HTML::TagNames::optgroup)
return false;
// 4. Otherwise, set node to the previous entry in the stack of open elements and return to step 2.
}
// NOTE: This will never fail, since the loop will always terminate in the previous step if the top of the stack
// — an html element — is reached.
VERIFY_NOT_REACHED();
}

bool StackOfOpenElements::contains(DOM::Element const& element) const
{
for (auto& element_on_stack : m_elements) {
Expand Down
3 changes: 1 addition & 2 deletions Libraries/LibWeb/HTML/Parser/StackOfOpenElements.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ class StackOfOpenElements {
bool has_in_button_scope(FlyString const& tag_name) const;
bool has_in_table_scope(FlyString const& tag_name) const;
bool has_in_list_item_scope(FlyString const& tag_name) const;
bool has_in_select_scope(FlyString const& tag_name) const;

bool has_in_scope(DOM::Element const&) const;

Expand All @@ -49,7 +48,7 @@ class StackOfOpenElements {
auto const& elements() const { return m_elements; }
auto& elements() { return m_elements; }

void pop_until_an_element_with_tag_name_has_been_popped(FlyString const& local_name);
void pop_until_an_element_with_tag_name_has_been_popped(FlyString const& tag_name);

GC::Ptr<DOM::Element> topmost_special_node_below(DOM::Element const&);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
Harness status: OK

Found 17 tests

16 Pass
1 Fail
Pass <div>s, <button>s, and <datalist>s should be allowed in <select>
Pass </select> should close <button>
Pass </select> should close <datalist>
Pass <select> in <button> in <select> should remove inner <select>
Pass <select> in <select><button><div> should remove inner <select>
Pass JS added nested <select> should be ignored
Pass JS added nested <select>s should be ignored
Pass Divs and imgs should be allowed as direct children of select and within options without a datalist
Pass Input tags should not parse inside select instead of closing the select
Pass textarea tags should parse inside select instead of closing the select
Fail Input tags should parse inside select if nested in another tag
Pass Input tags should close select when directly inside an <option>
Pass The last test should not leave any tags open after parsing
Pass Nested selects should be retained 1
Pass Nested selects should be retained 2
Pass JS added nested select should be ignored
Pass JS added nested selects should be ignored
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ Harness status: OK

Found 81 tests

81 Pass
80 Pass
1 Fail
Pass html5lib_innerHTML_tests_innerHTML_1.html 7a9e287595dd570e0f19b7eec0ac424228908daf
Pass html5lib_innerHTML_tests_innerHTML_1.html 6f766fa07c8697a5379c5542adbba2a42f913004
Pass html5lib_innerHTML_tests_innerHTML_1.html dbbe75ae41228f9264d56a018e620217ec87fd32
Expand Down Expand Up @@ -78,7 +79,7 @@ Pass html5lib_innerHTML_tests_innerHTML_1.html a23b70f1f246ba08d13b570319391b4a5
Pass html5lib_innerHTML_tests_innerHTML_1.html 9d5e0c25bfe921df9ea2897c027f42bc88950e69
Pass html5lib_innerHTML_tests_innerHTML_1.html 9210d577d6deecf5ab3505af86c501c5befa0b50
Pass html5lib_innerHTML_tests_innerHTML_1.html c34af491c0a339db6ba63fcc478108533347319b
Pass html5lib_innerHTML_tests_innerHTML_1.html 2c4284e6b2bb480daa50bca43bcbe29cfcdeeab4
Fail html5lib_innerHTML_tests_innerHTML_1.html 2c4284e6b2bb480daa50bca43bcbe29cfcdeeab4
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we now failing this test though?

Pass html5lib_innerHTML_tests_innerHTML_1.html d75277b65d0118463afeb66b478509d4e27565ab
Pass html5lib_innerHTML_tests_innerHTML_1.html b354df69dbe9b3ef0c42177648e3aace114cf8ea
Pass html5lib_innerHTML_tests_innerHTML_1.html fd3be386292ea1f411cea8e86e29595deb177d28
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
Harness status: OK

Found 30 tests
Found 41 tests

30 Pass
39 Pass
2 Fail
Pass html5lib_webkit02.html f50b8c15847159a6d2c6ecc2bd1e4a944ba5aae6
Pass html5lib_webkit02.html 326328ea805a2ebdde707e08567713f88a4cf8ab
Pass html5lib_webkit02.html 05138397908cfdad69a3bfe5da5a06098320b504
Expand Down Expand Up @@ -32,4 +33,15 @@ Pass html5lib_webkit02.html ecd089f9b5193fad306c5b475c4711547fe5e209
Pass html5lib_webkit02.html cee2230c74671c594a1140a68d16e3d3e5ae005a
Pass html5lib_webkit02.html 22b9fe36797d70a3b71a6aadc6ad7cff23c3fc90
Pass html5lib_webkit02.html a82c3bf49c381b5f58c5c8a4bbbe0cef2458e28a
Pass html5lib_webkit02.html 61f8d527795dc8044a95a3e2437de81e16597ceb
Pass html5lib_webkit02.html 61f8d527795dc8044a95a3e2437de81e16597ceb
Pass html5lib_webkit02.html 3a112027586c5c0fb506c49aa01d3f695ca0a5fa
Pass html5lib_webkit02.html 3e95ae8a102dc5d0ae04764420418cb19a288fb7
Pass html5lib_webkit02.html 4bd119721a0eb84aa72a30ea83fc5beb5843ca68
Pass html5lib_webkit02.html a15e2f18330327cc4dcfe8c10af07714903f773a
Pass html5lib_webkit02.html 29539f494ab6b37b18213f062da16b0f0c9d00d9
Pass html5lib_webkit02.html a6c50b1f6bfbe3c55102d8cad0950d0b68cc6729
Pass html5lib_webkit02.html 411f313a1b92ac7be549c41ee6758f952dc2dced
Pass html5lib_webkit02.html 84467597648753feeb78793e2cc9196bc75857c2
Pass html5lib_webkit02.html ae6f2e0a014f620269920ceb12660ec708236846
Fail html5lib_webkit02.html fd7aea4db6702879b9f8f410b0400d9300ae9c05
Fail html5lib_webkit02.html bad5cceffaffe98e3a1522be5f7df3e3e179d500
Loading
Loading