Skip to content

Commit 6147557

Browse files
committedFeb 4, 2025·
LibWebView: Normalize source-code text before highlighting it
The previous code to determine the SourceDocument's lines was too naive: the source text can contain other newline characters and sequences, and the HTML/CSS/JS syntax highlighters would take those into account when determining what line a token is on. This disagreement would cause incorrect highlighting, or even crashes, if the source didn't solely use `\n` for its newlines. In order to have everyone agree on what a line is, this patch first processes the source to replace all newlines with `\n`. The need to copy the source like this is unfortunate, but viewing the source is a rare enough action that this should not cause any noticeable performance problems. As the callers have a String, and we want a String, this also changes the function parameters to keep the source as a String instead of converting it to StringView and back. Fixes #3169
1 parent b16b24c commit 6147557

File tree

2 files changed

+45
-12
lines changed

2 files changed

+45
-12
lines changed
 

‎Libraries/LibWebView/SourceHighlighter.cpp

+40-7
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,45 @@
1717

1818
namespace WebView {
1919

20-
SourceDocument::SourceDocument(StringView source)
21-
: m_source(source)
20+
SourceDocument::SourceDocument(String const& source)
2221
{
23-
m_source.for_each_split_view('\n', AK::SplitBehavior::KeepEmpty, [&](auto line) {
24-
m_lines.append(Syntax::TextDocumentLine { *this, line });
25-
});
22+
// HTML, CSS and JS differ slightly on what they consider a newline to be.
23+
// In order to make them get along in documents that include a mix of the three, process the source to make the
24+
// newlines consistent before doing any highlighting.
25+
26+
// Optimization: If all the newlines are \n, just use the input string.
27+
if (!source.code_points().contains_any_of(Array<u32, 3> { '\r', 0x2028, 0x2029 })) {
28+
m_source = source;
29+
} else {
30+
StringBuilder builder { source.byte_count() };
31+
// Convert any '\r\n', \r, <LS> or <PS> to \n
32+
bool previous_was_cr = false;
33+
for (u32 code_point : source.code_points()) {
34+
if (previous_was_cr && code_point != '\n')
35+
builder.append('\n');
36+
previous_was_cr = false;
37+
38+
switch (code_point) {
39+
case '\r':
40+
previous_was_cr = true;
41+
break;
42+
case JS::LINE_SEPARATOR:
43+
case JS::PARAGRAPH_SEPARATOR:
44+
builder.append('\n');
45+
break;
46+
default:
47+
builder.append_code_point(code_point);
48+
}
49+
}
50+
m_source = builder.to_string_without_validation();
51+
}
52+
53+
m_source.code_points().for_each_split_view(
54+
[](u32 it) { return it == '\n'; },
55+
SplitBehavior::KeepEmpty,
56+
[&](auto line) {
57+
m_lines.append(Syntax::TextDocumentLine { *this, line.as_string() });
58+
});
2659
}
2760

2861
Syntax::TextDocumentLine& SourceDocument::line(size_t line_index)
@@ -35,7 +68,7 @@ Syntax::TextDocumentLine const& SourceDocument::line(size_t line_index) const
3568
return m_lines[line_index];
3669
}
3770

38-
SourceHighlighterClient::SourceHighlighterClient(StringView source, Syntax::Language language)
71+
SourceHighlighterClient::SourceHighlighterClient(String const& source, Syntax::Language language)
3972
: m_document(SourceDocument::create(source))
4073
{
4174
// HACK: Syntax highlighters require a palette, but we don't actually care about the output styling, only the type of token for each span.
@@ -114,7 +147,7 @@ void SourceHighlighterClient::highlighter_did_set_folding_regions(Vector<Syntax:
114147
document().set_folding_regions(move(folding_regions));
115148
}
116149

117-
String highlight_source(URL::URL const& url, URL::URL const& base_url, StringView source, Syntax::Language language, HighlightOutputMode mode)
150+
String highlight_source(URL::URL const& url, URL::URL const& base_url, String const& source, Syntax::Language language, HighlightOutputMode mode)
118151
{
119152
SourceHighlighterClient highlighter_client { source, language };
120153
return highlighter_client.to_html_string(url, base_url, mode);

‎Libraries/LibWebView/SourceHighlighter.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ enum class HighlightOutputMode {
2424

2525
class SourceDocument final : public Syntax::Document {
2626
public:
27-
static NonnullRefPtr<SourceDocument> create(StringView source)
27+
static NonnullRefPtr<SourceDocument> create(String const& source)
2828
{
2929
return adopt_ref(*new (nothrow) SourceDocument(source));
3030
}
@@ -38,18 +38,18 @@ class SourceDocument final : public Syntax::Document {
3838
virtual Syntax::TextDocumentLine& line(size_t line_index) override;
3939

4040
private:
41-
SourceDocument(StringView source);
41+
SourceDocument(String const& source);
4242

4343
// ^ Syntax::Document
4444
virtual void update_views(Badge<Syntax::TextDocumentLine>) override { }
4545

46-
StringView m_source;
46+
String m_source;
4747
Vector<Syntax::TextDocumentLine> m_lines;
4848
};
4949

5050
class SourceHighlighterClient final : public Syntax::HighlighterClient {
5151
public:
52-
SourceHighlighterClient(StringView source, Syntax::Language);
52+
SourceHighlighterClient(String const& source, Syntax::Language);
5353
virtual ~SourceHighlighterClient() = default;
5454

5555
String to_html_string(URL::URL const& url, URL::URL const& base_url, HighlightOutputMode) const;
@@ -75,7 +75,7 @@ class SourceHighlighterClient final : public Syntax::HighlighterClient {
7575
OwnPtr<Syntax::Highlighter> m_highlighter;
7676
};
7777

78-
String highlight_source(URL::URL const& url, URL::URL const& base_url, StringView, Syntax::Language, HighlightOutputMode);
78+
String highlight_source(URL::URL const& url, URL::URL const& base_url, String const& source, Syntax::Language, HighlightOutputMode);
7979

8080
constexpr inline StringView HTML_HIGHLIGHTER_STYLE = R"~~~(
8181
@media (prefers-color-scheme: dark) {

0 commit comments

Comments
 (0)
Please sign in to comment.