@@ -480,7 +480,39 @@ static size_t measure_string (unsigned int length,
480480
481481 default :
482482
483- ++ measured_length ;
483+ if ((unsigned char )c <= 0x1F )
484+ {
485+ measured_length += 6 ;
486+ }
487+ else if ((unsigned char )c == 0xED && i + 2 < length )
488+ {
489+ unsigned char c2 = (unsigned char )str [i + 1 ];
490+ unsigned char c3 = (unsigned char )str [i + 2 ];
491+
492+ if (c2 == 0x80 && (c3 == 0xA8 || c3 == 0xA9 ))
493+ {
494+ /* U+2028 line separator, U+2029 paragraph separator */
495+ measured_length += 6 ;
496+ }
497+ else if (c2 == 0xBF && (c3 == 0xBE || c3 == 0xBF ))
498+ {
499+ /* Noncharacters U+FFFE / U+FFFF */
500+ measured_length += 6 ;
501+ }
502+ else if ((c2 >= 0xA0 && c2 <= 0xBF ) && (c3 >= 0x80 && c3 <= 0xBF ))
503+ {
504+ /* Decode WTF-8 unpaired surrogate */
505+ measured_length += 6 ;
506+ }
507+ else
508+ {
509+ measured_length ++ ;
510+ }
511+ }
512+ else
513+ {
514+ measured_length ++ ;
515+ }
484516 break ;
485517 };
486518 };
@@ -493,13 +525,25 @@ static size_t measure_string (unsigned int length,
493525 *buf ++ = (c); \
494526} while(0); \
495527
528+ /* Escape code point cp (0..0xFFFF) as \uXXXX */
529+ #define PRINT_ESCAPED_CP (cp ) do { \
530+ *buf ++ = '\\'; \
531+ *buf ++ = 'u'; \
532+ *buf ++ = hex[((cp) >> 12) & 0xF]; \
533+ *buf ++ = hex[((cp) >> 8) & 0xF]; \
534+ *buf ++ = hex[((cp) >> 4) & 0xF]; \
535+ *buf ++ = hex[(cp) & 0xF]; \
536+ } while(0)
537+
496538static size_t serialize_string (json_char * buf ,
497539 unsigned int length ,
498540 const json_char * str )
499541{
500542 json_char * orig_buf = buf ;
501543 unsigned int i ;
502544
545+ static const char hex [] = "0123456789ABCDEF" ;
546+
503547 for (i = 0 ; i < length ; ++ i )
504548 {
505549 json_char c = str [i ];
@@ -516,7 +560,45 @@ static size_t serialize_string (json_char * buf,
516560
517561 default :
518562
519- * buf ++ = c ;
563+ if ((unsigned char )c <= 0x1F )
564+ {
565+ PRINT_ESCAPED_CP (c );
566+ }
567+ else if ((unsigned char )c == 0xED && i + 2 < length )
568+ {
569+ unsigned char c2 = (unsigned char )str [i + 1 ];
570+ unsigned char c3 = (unsigned char )str [i + 2 ];
571+
572+ if (c2 == 0x80 && (c3 == 0xA8 || c3 == 0xA9 ))
573+ {
574+ /* U+2028 line separator, U+2029 paragraph separator */
575+ unsigned int cp = ((c & 0x0F ) << 12 ) | ((c2 & 0x3F ) << 6 ) | (c3 & 0x3F );
576+ PRINT_ESCAPED_CP (cp );
577+ i += 2 ;
578+ }
579+ else if (c2 == 0xBF && (c3 == 0xBE || c3 == 0xBF ))
580+ {
581+ /* Noncharacters U+FFFE / U+FFFF */
582+ unsigned int cp = ((c & 0x0F ) << 12 ) | ((c2 & 0x3F ) << 6 ) | (c3 & 0x3F );
583+ PRINT_ESCAPED_CP (cp );
584+ i += 2 ;
585+ }
586+ else if ((c2 >= 0xA0 && c2 <= 0xBF ) && (c3 >= 0x80 && c3 <= 0xBF ))
587+ {
588+ /* Decode WTF-8 unpaired surrogate */
589+ unsigned int cp = ((c & 0x0F ) << 12 ) | ((c2 & 0x3F ) << 6 ) | (c3 & 0x3F );
590+ PRINT_ESCAPED_CP (cp );
591+ i += 2 ;
592+ }
593+ else
594+ {
595+ * buf ++ = c ;
596+ }
597+ }
598+ else
599+ {
600+ * buf ++ = c ;
601+ }
520602 break ;
521603 };
522604 };
0 commit comments