Skip to content

Commit 6e2915d

Browse files
committed
Remove redundant logic and simplify string tokenization
1 parent a2e3aa2 commit 6e2915d

File tree

1 file changed

+34
-27
lines changed

1 file changed

+34
-27
lines changed

formatter/generic/genericformatter.cpp

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ enum ItemType
1919
StringComponent,
2020
StringSeparator,
2121
StringWhitespace,
22-
StringNewline,
2322
FormatSpecifier,
2423
EscapeSequence,
2524
Group,
@@ -263,16 +262,26 @@ static vector<InstructionTextToken> ParseStringToken(
263262
const auto& src = unprocessedStringToken.text;
264263
const size_t tail = src.size();
265264

266-
// Max parsing length set for performance reasons, increase at your own peril!
265+
// Max parsing length set to max annotation length
267266
if (tail > maxParsingLength)
268267
return { unprocessedStringToken };
269-
270268
vector<InstructionTextToken> result;
271269
size_t curStart = 0, curEnd = 0;
270+
272271
auto ConstructToken = [&](size_t start, size_t end) {
273-
result.emplace_back(StringToken, string(src.substr(start, end - start)));
272+
InstructionTextToken token = unprocessedStringToken;
273+
const string newTxt = string(src.substr(start, end - start));
274+
token.text = newTxt;
275+
token.width = newTxt.size();
276+
result.emplace_back(token);
274277
};
275278

279+
auto flushToken = [&](size_t start, size_t end)
280+
{
281+
if (start < end)
282+
ConstructToken(start, end);
283+
};
284+
276285
// We generally split along spaces while keeping words intact, but some cases have
277286
// specific splitting behavior:
278287
//
@@ -288,8 +297,7 @@ static vector<InstructionTextToken> ParseStringToken(
288297
if (c == '%')
289298
{
290299
// Flush before format specifier
291-
if (curStart < curEnd)
292-
ConstructToken(curStart, curEnd);
300+
flushToken(curStart, curEnd);
293301

294302
size_t start = curEnd;
295303
curEnd++;
@@ -301,8 +309,7 @@ static vector<InstructionTextToken> ParseStringToken(
301309
else if (c == '\\')
302310
{
303311
// Flush before escape sequence
304-
if (curStart < curEnd)
305-
ConstructToken(curStart, curEnd);
312+
flushToken(curStart, curEnd);
306313

307314
size_t start = curEnd;
308315
curEnd++; // consume '\'
@@ -314,8 +321,8 @@ static vector<InstructionTextToken> ParseStringToken(
314321
else if (c == ',' || c == '.' || c == ':' || c == ';' || isspace(c))
315322
{
316323
// Flush before punctuation
317-
if (curStart < curEnd)
318-
ConstructToken(curStart, curEnd);
324+
flushToken(curStart, curEnd);
325+
319326
// Group together repeated punctuation
320327
size_t start = curEnd;
321328
while (curEnd < tail && src[curEnd] == c)
@@ -329,9 +336,7 @@ static vector<InstructionTextToken> ParseStringToken(
329336
}
330337
}
331338

332-
if (curStart < curEnd)
333-
ConstructToken(curStart, curEnd);
334-
339+
flushToken(curStart, curEnd);
335340
return result;
336341
}
337342

@@ -341,7 +346,7 @@ static vector<Item> CreateStringGroups(const vector<Item>& items)
341346
bool hasStrings = false;
342347
for (auto& i : items)
343348
{
344-
if ((i.type == StringSeparator) && !i.tokens.empty())
349+
if (i.type == StringSeparator && !i.tokens.empty())
345350
{
346351
// We try to push separators onto a preceding word, otherwise treat as
347352
// a singular atom
@@ -370,13 +375,16 @@ static vector<Item> CreateStringGroups(const vector<Item>& items)
370375
}
371376
else if (i.type == FormatSpecifier || i.type == EscapeSequence)
372377
{
378+
// Flush previous tokens before special sequences like format specifiers or
379+
// escape sequences
373380
if (!pending.empty())
374381
{
375382
result.push_back(Item {StringComponent, pending, {}, 0 });
376383
pending.clear();
377384
}
378385
result.push_back(Item { Atom, i.items, i.tokens, i.width});
379386
}
387+
380388
else if (i.type == StartOfContainer && pending.empty())
381389
{
382390
result.push_back(i);
@@ -739,6 +747,7 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
739747
switch (token.type)
740748
{
741749
case BraceToken:
750+
// Beginning of string
742751
if (tokenIndex + 1 < currentLine.tokens.size()
743752
&& currentLine.tokens[tokenIndex + 1].type == StringToken)
744753
{
@@ -751,7 +760,7 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
751760
items.clear();
752761
items.push_back(Item {StartOfContainer, {}, {token}, 0});
753762
}
754-
// Check for end of string - gross!
763+
// End of string
755764
else if (currentLine.tokens[tokenIndex].type == StringToken
756765
&& tokenIndex + 1 < currentLine.tokens.size()
757766
&& currentLine.tokens[tokenIndex + 1].type == BraceToken)
@@ -817,24 +826,15 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
817826
case StringToken:
818827
{
819828
vector<InstructionTextToken> stringTokens = ParseStringToken(token, settings.maximumAnnotationLength);
820-
for (size_t k = 0; k < stringTokens.size(); k++)
829+
for (auto subToken : stringTokens)
821830
{
822-
InstructionTextToken subToken = stringTokens[k];
823831
string trimmedSubText = TrimString(subToken.text);
824832
if (trimmedSubText.empty())
825833
items.push_back(Item {StringWhitespace, {}, {subToken}, 0});
826834
if (trimmedSubText[0] == '%')
827835
items.push_back(Item {FormatSpecifier, {}, {subToken}, 0});
828836
else if (!trimmedSubText.empty() && trimmedSubText[0] == '\\')
829-
{
830-
if (trimmedSubText.size() > 1)
831-
{
832-
if (trimmedSubText[1] == 'n')
833-
items.push_back(Item {StringNewline, {}, {subToken}, 0});
834-
continue;
835-
}
836837
items.push_back(Item {EscapeSequence, {}, {subToken}, 0});
837-
}
838838
else if (trimmedSubText[0] == ',' || trimmedSubText[0] == '.' || trimmedSubText[0] == ':' || trimmedSubText[0] == ';')
839839
items.push_back(Item {StringSeparator, {}, {subToken}, 0});
840840
else
@@ -937,9 +937,16 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
937937

938938
for (auto item = items.begin(); item != items.end();)
939939
{
940-
if (currentWidth + item->width > desiredWidth && item->type != StringWhitespace)
940+
if (item->type == StringComponent && currentWidth + item->width > desiredWidth)
941+
{
942+
// If a string is too wide to fit on the current line, create a newline
943+
// without additional indentation
944+
newLine();
945+
}
946+
else if (currentWidth + item->width > desiredWidth && item->type != StringWhitespace)
941947
{
942948
// Current item is too wide to fit on the current line, will need to start a new line.
949+
// Whitespace is allowed to be too wide; we push it on as the preceding word is wrapped.
943950
auto next = item;
944951
++next;
945952

@@ -948,7 +955,7 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
948955
// is a container, always use the splitting behavior.
949956
if (currentWidth == 0 || item->width > desiredContinuationWidth || item->type == Container)
950957
{
951-
if ((item->type == Argument || item->type == StringComponent) && currentWidth != 0)
958+
if (item->type == Argument && currentWidth != 0)
952959
{
953960
// If an argument is too wide to show on a single line all by itself, start the argument
954961
// on a new line, and add additional indentation for the continuation of the argument.

0 commit comments

Comments
 (0)