@@ -205,9 +205,10 @@ int IsColorEscape(const char* str)
205
205
}
206
206
if (isdigit (str[1 ])) {
207
207
return 2 ;
208
- } else if (str[1 ] == ' x' || str[1 ] == ' X' ) {
208
+ }
209
+ else if (str[1 ] == ' x' || str[1 ] == ' X' ) {
209
210
for (int c = 0 ; c < 6 ; c++) {
210
- if ( !isxdigit (str[c + 2 ]) ) {
211
+ if (!isxdigit (str[c + 2 ])) {
211
212
return 0 ;
212
213
}
213
214
}
@@ -216,23 +217,77 @@ int IsColorEscape(const char* str)
216
217
return 0 ;
217
218
}
218
219
220
+ int IsColorEscape (std::u32string_view str)
221
+ {
222
+ if (str.size () < 2 || str[0 ] != ' ^' ) {
223
+ return 0 ;
224
+ }
225
+
226
+ auto discrim = str[1 ];
227
+
228
+ // Check for indexed colour escape like ^7.
229
+ // Avoid using isdigit as we only accept arabic numerals.
230
+ if (discrim >= U' 0' && discrim <= U' 9' ) {
231
+ return 2 ;
232
+ }
233
+
234
+ // Check for direct colour escape like ^x123ABC.
235
+ if (str.size () >= 8 && (discrim == ' x' || discrim == ' X' )) {
236
+ for (int c = 0 ; c < 6 ; c++) {
237
+ auto ch = str[c + 2 ];
238
+ bool const isHexDigit = (ch >= U' 0' && ch <= U' 9' ) || (ch >= U' A' && ch <= U' F' ) || (ch >= U' a' && ch <= U' f' );
239
+ if (!isHexDigit) {
240
+ return 0 ;
241
+ }
242
+ }
243
+ return 8 ;
244
+ }
245
+
246
+ // Fallthrough indicates no recognized colour code.
247
+ return 0 ;
248
+ }
249
+
219
250
void ReadColorEscape (const char * str, col3_t out)
220
251
{
221
252
int len = IsColorEscape (str);
222
253
switch (len) {
223
254
case 2 :
224
255
VectorCopy (colorEscape[str[1 ] - ' 0' ], out);
225
256
break ;
257
+ case 8 :
258
+ {
259
+ int xr, xg, xb;
260
+ sscanf (str + 2 , " %2x%2x%2x" , &xr, &xg, &xb);
261
+ out[0 ] = xr / 255 .0f ;
262
+ out[1 ] = xg / 255 .0f ;
263
+ out[2 ] = xb / 255 .0f ;
264
+ }
265
+ break ;
266
+ }
267
+ }
268
+
269
+ std::u32string_view ReadColorEscape (std::u32string_view str, col3_t out)
270
+ {
271
+ int len = IsColorEscape (str);
272
+ switch (len) {
273
+ case 2 :
274
+ VectorCopy (colorEscape[str[1 ] - U' 0' ], out);
275
+ break ;
226
276
case 8 :
227
277
{
228
278
int xr, xg, xb;
229
- sscanf (str + 2 , " %2x%2x%2x" , &xr, &xg, &xb);
279
+ char buf[7 ]{};
280
+ for (size_t i = 0 ; i < 6 ; ++i) {
281
+ buf[i] = (char )str[i + 2 ];
282
+ }
283
+ sscanf (buf, " %2x%2x%2x" , &xr, &xg, &xb);
230
284
out[0 ] = xr / 255 .0f ;
231
285
out[1 ] = xg / 255 .0f ;
232
286
out[2 ] = xb / 255 .0f ;
233
287
}
234
288
break ;
235
289
}
290
+ return str.substr (len);
236
291
}
237
292
238
293
// ================
@@ -279,3 +334,164 @@ dword StringHash(const char* str, int mask)
279
334
}
280
335
return hash & mask;
281
336
}
337
+
338
+ dword StringHash (std::string_view str, int mask)
339
+ {
340
+ size_t len = str.length ();
341
+ dword hash = 0 ;
342
+ for (size_t i = 0 ; i < len; i++) {
343
+ hash += (str[i] * 4999 ) ^ (((dword)i + 17 ) * 2003 );
344
+ }
345
+ return hash & mask;
346
+ }
347
+
348
+ #ifdef _WIN32
349
+ #include < Windows.h>
350
+
351
+ static wchar_t * WidenCodepageString (const char * str, UINT codepage)
352
+ {
353
+ if (!str) {
354
+ return nullptr ;
355
+ }
356
+ // Early-out if empty, avoids ambigious error return from MBTWC.
357
+ if (!*str) {
358
+ wchar_t * wstr = new wchar_t [1 ];
359
+ *wstr = L' \0 ' ;
360
+ return wstr;
361
+ }
362
+ DWORD cb = (DWORD)strlen (str);
363
+ int cch = MultiByteToWideChar (codepage, MB_ERR_INVALID_CHARS, str, cb, nullptr , 0 );
364
+ if (cch == 0 ) {
365
+ // Invalid string or other error.
366
+ return nullptr ;
367
+ }
368
+ wchar_t * wstr = new wchar_t [cch + 1 ]; // sized MBTWC doesn't include terminator.
369
+ MultiByteToWideChar (codepage, 0 , str, cb, wstr, cch);
370
+ wstr[cch] = ' \0 ' ;
371
+ return wstr;
372
+ }
373
+
374
+ wchar_t * WidenANSIString (const char * str)
375
+ {
376
+ return WidenCodepageString (str, CP_ACP);
377
+ }
378
+
379
+ wchar_t * WidenOEMString (const char * str)
380
+ {
381
+ return WidenCodepageString (str, CP_OEMCP);
382
+ }
383
+
384
+ wchar_t * WidenUTF8String (const char * str)
385
+ {
386
+ return WidenCodepageString (str, CP_UTF8);
387
+ }
388
+
389
+ char * NarrowCodepageString (const wchar_t * str, UINT codepage)
390
+ {
391
+ if (!str) {
392
+ return nullptr ;
393
+ }
394
+ if (!*str) {
395
+ char * nstr = new char [1 ];
396
+ *nstr = ' \0 ' ;
397
+ return nstr;
398
+ }
399
+ DWORD cch = (DWORD)wcslen (str);
400
+ int cb = WideCharToMultiByte (codepage, 0 , str, cch, nullptr , 0 , nullptr , nullptr );
401
+ if (cb == 0 ) {
402
+ // Invalid string or other error.
403
+ return nullptr ;
404
+ }
405
+ char * nstr = new char [cb + 1 ];
406
+ WideCharToMultiByte (codepage, 0 , str, cch, nstr, cb, nullptr , nullptr );
407
+ nstr[cb] = ' \0 ' ;
408
+ return nstr;
409
+ }
410
+
411
+ void FreeWideString (wchar_t * str)
412
+ {
413
+ if (str) {
414
+ delete[] str;
415
+ }
416
+ }
417
+
418
+ char * NarrowANSIString (const wchar_t * str)
419
+ {
420
+ return NarrowCodepageString (str, CP_ACP);
421
+ }
422
+
423
+ char * NarrowOEMString (const wchar_t * str)
424
+ {
425
+ return NarrowCodepageString (str, CP_OEMCP);
426
+ }
427
+
428
+ char * NarrowUTF8String (const wchar_t * str)
429
+ {
430
+ return NarrowCodepageString (str, CP_UTF8);
431
+ }
432
+
433
+ IndexedUTF32String IndexUTF8ToUTF32 (std::string_view input)
434
+ {
435
+ IndexedUTF32String ret{};
436
+
437
+ size_t byteCount = input.size ();
438
+ auto & offsets = ret.sourceCodeUnitOffsets ;
439
+ offsets.reserve (byteCount); // conservative reservation
440
+ std::vector<char32_t > codepoints;
441
+
442
+ auto bytes = (uint8_t const *)input.data ();
443
+ for (size_t byteIdx = 0 ; byteIdx < byteCount;) {
444
+ uint8_t const * b = bytes + byteIdx;
445
+ size_t left = byteCount - byteIdx;
446
+ offsets.push_back (byteIdx);
447
+
448
+ char32_t codepoint{};
449
+ if (*b >> 7 == 0b0 ) { // 0xxx'xxxx
450
+ codepoint = *b;
451
+ byteIdx += 1 ;
452
+ }
453
+ else if (left >= 2 &&
454
+ b[0 ] >> 5 == 0b110 &&
455
+ b[1 ] >> 6 == 0b10 )
456
+ {
457
+ auto p0 = (uint32_t )b[0 ] & 0b1'1111 ;
458
+ auto p1 = (uint32_t )b[1 ] & 0b11'1111 ;
459
+ codepoint = p0 << 6 | p1;
460
+ byteIdx += 2 ;
461
+ }
462
+ else if (left >= 3 &&
463
+ b[0 ] >> 4 == 0b1110 &&
464
+ b[1 ] >> 6 == 0b10 &&
465
+ b[2 ] >> 6 == 0b10 )
466
+ {
467
+ auto p0 = (uint32_t )b[0 ] & 0b1111 ;
468
+ auto p1 = (uint32_t )b[1 ] & 0b11'1111 ;
469
+ auto p2 = (uint32_t )b[2 ] & 0b11'1111 ;
470
+ codepoint = p0 << 12 | p1 << 6 | p2;
471
+ byteIdx += 3 ;
472
+ }
473
+ else if (left >= 4 &&
474
+ b[0 ] >> 3 == 0b11110 &&
475
+ b[1 ] >> 6 == 0b10 &&
476
+ b[2 ] >> 6 == 0b10 &&
477
+ b[3 ] >> 6 == 0b10 )
478
+ {
479
+ auto p0 = (uint32_t )b[0 ] & 0b111 ;
480
+ auto p1 = (uint32_t )b[1 ] & 0b11'1111 ;
481
+ auto p2 = (uint32_t )b[2 ] & 0b11'1111 ;
482
+ auto p3 = (uint32_t )b[2 ] & 0b11'1111 ;
483
+ codepoint = p0 << 18 | p1 << 12 | p2 << 6 | p3;
484
+ byteIdx += 4 ;
485
+ }
486
+ else {
487
+ codepoints.push_back (0xFFFDu );
488
+ byteIdx += 1 ;
489
+ }
490
+ codepoints.push_back (codepoint);
491
+ }
492
+
493
+ ret.text = std::u32string (codepoints.begin (), codepoints.end ());
494
+ return ret;
495
+ }
496
+
497
+ #endif
0 commit comments