@@ -19,7 +19,6 @@ enum ItemType
19
19
StringComponent,
20
20
StringSeparator,
21
21
StringWhitespace,
22
- StringNewline,
23
22
FormatSpecifier,
24
23
EscapeSequence,
25
24
Group,
@@ -263,16 +262,26 @@ static vector<InstructionTextToken> ParseStringToken(
263
262
const auto & src = unprocessedStringToken.text ;
264
263
const size_t tail = src.size ();
265
264
266
- // Max parsing length set for performance reasons, increase at your own peril!
265
+ // Max parsing length set to max annotation length
267
266
if (tail > maxParsingLength)
268
267
return { unprocessedStringToken };
269
-
270
268
vector<InstructionTextToken> result;
271
269
size_t curStart = 0 , curEnd = 0 ;
270
+
272
271
auto ConstructToken = [&](size_t start, size_t end) {
273
- result.emplace_back (StringToken, string (src.substr (start, end - start)));
272
+ InstructionTextToken token = unprocessedStringToken;
273
+ const string newTxt = string (src.substr (start, end - start));
274
+ token.text = newTxt;
275
+ token.width = newTxt.size ();
276
+ result.emplace_back (token);
274
277
};
275
278
279
+ auto flushToken = [&](size_t start, size_t end)
280
+ {
281
+ if (start < end)
282
+ ConstructToken (start, end);
283
+ };
284
+
276
285
// We generally split along spaces while keeping words intact, but some cases have
277
286
// specific splitting behavior:
278
287
//
@@ -288,8 +297,7 @@ static vector<InstructionTextToken> ParseStringToken(
288
297
if (c == ' %' )
289
298
{
290
299
// Flush before format specifier
291
- if (curStart < curEnd)
292
- ConstructToken (curStart, curEnd);
300
+ flushToken (curStart, curEnd);
293
301
294
302
size_t start = curEnd;
295
303
curEnd++;
@@ -301,8 +309,7 @@ static vector<InstructionTextToken> ParseStringToken(
301
309
else if (c == ' \\ ' )
302
310
{
303
311
// Flush before escape sequence
304
- if (curStart < curEnd)
305
- ConstructToken (curStart, curEnd);
312
+ flushToken (curStart, curEnd);
306
313
307
314
size_t start = curEnd;
308
315
curEnd++; // consume '\'
@@ -314,8 +321,8 @@ static vector<InstructionTextToken> ParseStringToken(
314
321
else if (c == ' ,' || c == ' .' || c == ' :' || c == ' ;' || isspace (c))
315
322
{
316
323
// Flush before punctuation
317
- if (curStart < curEnd)
318
- ConstructToken (curStart, curEnd);
324
+ flushToken (curStart, curEnd);
325
+
319
326
// Group together repeated punctuation
320
327
size_t start = curEnd;
321
328
while (curEnd < tail && src[curEnd] == c)
@@ -329,9 +336,7 @@ static vector<InstructionTextToken> ParseStringToken(
329
336
}
330
337
}
331
338
332
- if (curStart < curEnd)
333
- ConstructToken (curStart, curEnd);
334
-
339
+ flushToken (curStart, curEnd);
335
340
return result;
336
341
}
337
342
@@ -341,7 +346,7 @@ static vector<Item> CreateStringGroups(const vector<Item>& items)
341
346
bool hasStrings = false ;
342
347
for (auto & i : items)
343
348
{
344
- if (( i.type == StringSeparator) && !i.tokens .empty ())
349
+ if (i.type == StringSeparator && !i.tokens .empty ())
345
350
{
346
351
// We try to push separators onto a preceding word, otherwise treat as
347
352
// a singular atom
@@ -370,13 +375,16 @@ static vector<Item> CreateStringGroups(const vector<Item>& items)
370
375
}
371
376
else if (i.type == FormatSpecifier || i.type == EscapeSequence)
372
377
{
378
+ // Flush previous tokens before special sequences like format specifiers or
379
+ // escape sequences
373
380
if (!pending.empty ())
374
381
{
375
382
result.push_back (Item {StringComponent, pending, {}, 0 });
376
383
pending.clear ();
377
384
}
378
385
result.push_back (Item { Atom, i.items , i.tokens , i.width });
379
386
}
387
+
380
388
else if (i.type == StartOfContainer && pending.empty ())
381
389
{
382
390
result.push_back (i);
@@ -739,6 +747,7 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
739
747
switch (token.type )
740
748
{
741
749
case BraceToken:
750
+ // Beginning of string
742
751
if (tokenIndex + 1 < currentLine.tokens .size ()
743
752
&& currentLine.tokens [tokenIndex + 1 ].type == StringToken)
744
753
{
@@ -751,7 +760,7 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
751
760
items.clear ();
752
761
items.push_back (Item {StartOfContainer, {}, {token}, 0 });
753
762
}
754
- // Check for end of string - gross!
763
+ // End of string
755
764
else if (currentLine.tokens [tokenIndex].type == StringToken
756
765
&& tokenIndex + 1 < currentLine.tokens .size ()
757
766
&& currentLine.tokens [tokenIndex + 1 ].type == BraceToken)
@@ -817,24 +826,15 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
817
826
case StringToken:
818
827
{
819
828
vector<InstructionTextToken> stringTokens = ParseStringToken (token, settings.maximumAnnotationLength );
820
- for (size_t k = 0 ; k < stringTokens. size (); k++ )
829
+ for (auto subToken : stringTokens)
821
830
{
822
- InstructionTextToken subToken = stringTokens[k];
823
831
string trimmedSubText = TrimString (subToken.text );
824
832
if (trimmedSubText.empty ())
825
833
items.push_back (Item {StringWhitespace, {}, {subToken}, 0 });
826
834
if (trimmedSubText[0 ] == ' %' )
827
835
items.push_back (Item {FormatSpecifier, {}, {subToken}, 0 });
828
836
else if (!trimmedSubText.empty () && trimmedSubText[0 ] == ' \\ ' )
829
- {
830
- if (trimmedSubText.size () > 1 )
831
- {
832
- if (trimmedSubText[1 ] == ' n' )
833
- items.push_back (Item {StringNewline, {}, {subToken}, 0 });
834
- continue ;
835
- }
836
837
items.push_back (Item {EscapeSequence, {}, {subToken}, 0 });
837
- }
838
838
else if (trimmedSubText[0 ] == ' ,' || trimmedSubText[0 ] == ' .' || trimmedSubText[0 ] == ' :' || trimmedSubText[0 ] == ' ;' )
839
839
items.push_back (Item {StringSeparator, {}, {subToken}, 0 });
840
840
else
@@ -937,9 +937,16 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
937
937
938
938
for (auto item = items.begin (); item != items.end ();)
939
939
{
940
- if (currentWidth + item->width > desiredWidth && item->type != StringWhitespace)
940
+ if (item->type == StringComponent && currentWidth + item->width > desiredWidth)
941
+ {
942
+ // If a string is too wide to fit on the current line, create a newline
943
+ // without additional indentation
944
+ newLine ();
945
+ }
946
+ else if (currentWidth + item->width > desiredWidth && item->type != StringWhitespace)
941
947
{
942
948
// Current item is too wide to fit on the current line, will need to start a new line.
949
+ // Whitespace is allowed to be too wide; we push it on as the preceding word is wrapped.
943
950
auto next = item;
944
951
++next;
945
952
@@ -948,7 +955,7 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
948
955
// is a container, always use the splitting behavior.
949
956
if (currentWidth == 0 || item->width > desiredContinuationWidth || item->type == Container)
950
957
{
951
- if (( item->type == Argument || item-> type == StringComponent) && currentWidth != 0 )
958
+ if (item->type == Argument && currentWidth != 0 )
952
959
{
953
960
// If an argument is too wide to show on a single line all by itself, start the argument
954
961
// on a new line, and add additional indentation for the continuation of the argument.
0 commit comments