1616
1717final class Json5Decoder
1818{
19- private $ json ;
20-
2119 private $ at = 0 ;
2220
2321 private $ lineNumber = 1 ;
@@ -26,6 +24,8 @@ final class Json5Decoder
2624
2725 private $ ch ;
2826
27+ private $ chArr ;
28+
2929 private $ associative = false ;
3030
3131 private $ maxDepth = 512 ;
@@ -36,7 +36,9 @@ final class Json5Decoder
3636
3737 private $ length ;
3838
39- private $ lineCache ;
39+ private $ remainderCache ;
40+
41+ private $ remainderCacheAt ;
4042
4143 /**
4244 * Private constructor.
@@ -48,14 +50,17 @@ final class Json5Decoder
4850 */
4951 private function __construct ($ json , $ associative = false , $ depth = 512 , $ castBigIntToString = false )
5052 {
51- $ this ->json = $ json ;
5253 $ this ->associative = $ associative ;
5354 $ this ->maxDepth = $ depth ;
5455 $ this ->castBigIntToString = $ castBigIntToString ;
5556
5657 $ this ->length = mb_strlen ($ json , 'utf-8 ' );
5758
59+ $ this ->chArr = preg_split ('//u ' , $ json , null , PREG_SPLIT_NO_EMPTY );
5860 $ this ->ch = $ this ->charAt (0 );
61+
62+ $ this ->remainderCache = $ json ;
63+ $ this ->remainderCacheAt = 0 ;
5964 }
6065
6166 /**
@@ -73,7 +78,17 @@ private function __construct($json, $associative = false, $depth = 512, $castBig
7378 */
7479 public static function decode ($ source , $ associative = false , $ depth = 512 , $ options = 0 )
7580 {
76- $ associative = $ associative || ($ options & JSON_OBJECT_AS_ARRAY );
81+ // Try parsing with json_decode first, since that's much faster
82+ // We only attempt this on PHP 7+ because 5.x doesn't parse some edge cases correctly
83+ if (PHP_VERSION_ID >= 700000 ) {
84+ $ result = json_decode ($ source , $ associative , $ depth , $ options );
85+ if (json_last_error () === JSON_ERROR_NONE ) {
86+ return $ result ;
87+ }
88+ }
89+
90+ // Fall back to JSON5 if that fails
91+ $ associative = $ associative === true || ($ associative === null && $ options & JSON_OBJECT_AS_ARRAY );
7792 $ castBigIntToString = $ options & JSON_BIGINT_AS_STRING ;
7893
7994 $ decoder = new self ((string )$ source , $ associative , $ depth , $ castBigIntToString );
@@ -94,34 +109,20 @@ public static function decode($source, $associative = false, $depth = 512, $opti
94109 */
95110 private function charAt ($ at )
96111 {
97- if ($ at < 0 || $ at >= $ this ->length ) {
112+ if ($ at >= $ this ->length ) {
98113 return null ;
99114 }
100115
101- return mb_substr ( $ this ->json , $ at, 1 , ' utf-8 ' ) ;
116+ return $ this ->chArr [ $ at] ;
102117 }
103118
104119 /**
105120 * Parse the next character.
106121 *
107- * If $c is given, the next char will only be parsed if the current
108- * one matches $c.
109- *
110- * @param string|null $c
111- *
112122 * @return null|string
113123 */
114- private function next ($ c = null )
124+ private function next ()
115125 {
116- // If a c parameter is provided, verify that it matches the current character.
117- if ($ c !== null && $ c !== $ this ->ch ) {
118- $ this ->throwSyntaxError (sprintf (
119- 'Expected %s instead of %s ' ,
120- self ::renderChar ($ c ),
121- self ::renderChar ($ this ->ch )
122- ));
123- }
124-
125126 // Get the next character. When there are no more characters,
126127 // return the empty string.
127128 if ($ this ->ch === "\n" || ($ this ->ch === "\r" && $ this ->peek () !== "\n" )) {
@@ -139,29 +140,34 @@ private function next($c = null)
139140 }
140141
141142 /**
142- * Get the next character without consuming it or
143- * assigning it to the ch variable.
143+ * Parse the next character if it matches $c or fail.
144144 *
145- * @return mixed
145+ * @param string $c
146+ *
147+ * @return string|null
146148 */
147- private function peek ( )
149+ private function nextOrFail ( $ c )
148150 {
149- return $ this ->charAt ($ this ->at + 1 );
151+ if ($ c !== $ this ->ch ) {
152+ $ this ->throwSyntaxError (sprintf (
153+ 'Expected %s instead of %s ' ,
154+ self ::renderChar ($ c ),
155+ self ::renderChar ($ this ->ch )
156+ ));
157+ }
158+
159+ return $ this ->next ();
150160 }
151161
152162 /**
153- * @return string
163+ * Get the next character without consuming it or
164+ * assigning it to the ch variable.
165+ *
166+ * @return mixed
154167 */
155- private function getLineRemainder ()
168+ private function peek ()
156169 {
157- // Line are separated by "\n" or "\r" without an "\n" next
158- if ($ this ->lineCache === null ) {
159- $ this ->lineCache = preg_split ('/\n|\r\n?/u ' , $ this ->json );
160- }
161-
162- $ line = $ this ->lineCache [$ this ->lineNumber - 1 ];
163-
164- return mb_substr ($ line , $ this ->columnNumber - 1 );
170+ return $ this ->charAt ($ this ->at + 1 );
165171 }
166172
167173 /**
@@ -175,7 +181,7 @@ private function getLineRemainder()
175181 */
176182 private function match ($ regex )
177183 {
178- $ subject = $ this ->getLineRemainder ();
184+ $ subject = $ this ->getRemainder ();
179185
180186 $ matches = [];
181187 if (!preg_match ($ regex , $ subject , $ matches , PREG_OFFSET_CAPTURE )) {
@@ -234,7 +240,7 @@ private function number()
234240
235241 if ($ this ->ch === '- ' || $ this ->ch === '+ ' ) {
236242 $ sign = $ this ->ch ;
237- $ this ->next ($ this -> ch );
243+ $ this ->next ();
238244 }
239245
240246 // support for Infinity
@@ -272,10 +278,10 @@ private function number()
272278
273279 switch ($ base ) {
274280 case 10 :
275- if (($ match = $ this ->match ('/^\d*\.?\d*/ ' )) !== null ) {
281+ if ((is_numeric ( $ this -> ch ) || $ this -> ch === ' . ' ) && ( $ match = $ this ->match ('/^\d*\.?\d*/ ' )) !== null ) {
276282 $ string .= $ match ;
277283 }
278- if (($ match = $ this ->match ('/^[Ee][-+]?\d*/ ' )) !== null ) {
284+ if (($ this -> ch === ' E ' || $ this -> ch === ' e ' ) && ( $ match = $ this ->match ('/^[Ee][-+]?\d*/ ' )) !== null ) {
279285 $ string .= $ match ;
280286 }
281287 $ number = $ string ;
@@ -319,7 +325,7 @@ private function string()
319325 }
320326
321327 if ($ this ->ch === '\\' ) {
322- if ($ unicodeEscaped = $ this ->match ('/^(?: \\\\u[A-Fa-f0-9]{4})+/ ' )) {
328+ if ($ this -> peek () === ' u ' && $ unicodeEscaped = $ this ->match ('/^(?: \\\\u[A-Fa-f0-9]{4})+/ ' )) {
323329 $ string .= json_decode ('" ' .$ unicodeEscaped .'" ' );
324330 continue ;
325331 }
@@ -379,9 +385,9 @@ private function blockComment()
379385 do {
380386 $ this ->next ();
381387 while ($ this ->ch === '* ' ) {
382- $ this ->next ('* ' );
388+ $ this ->nextOrFail ('* ' );
383389 if ($ this ->ch === '/ ' ) {
384- $ this ->next ('/ ' );
390+ $ this ->nextOrFail ('/ ' );
385391
386392 return ;
387393 }
@@ -397,7 +403,7 @@ private function blockComment()
397403 private function comment ()
398404 {
399405 // Comments always begin with a / character.
400- $ this ->next ('/ ' );
406+ $ this ->nextOrFail ('/ ' );
401407
402408 if ($ this ->ch === '/ ' ) {
403409 $ this ->inlineComment ();
@@ -435,38 +441,38 @@ private function word()
435441 {
436442 switch ($ this ->ch ) {
437443 case 't ' :
438- $ this ->next ('t ' );
439- $ this ->next ('r ' );
440- $ this ->next ('u ' );
441- $ this ->next ('e ' );
444+ $ this ->nextOrFail ('t ' );
445+ $ this ->nextOrFail ('r ' );
446+ $ this ->nextOrFail ('u ' );
447+ $ this ->nextOrFail ('e ' );
442448 return true ;
443449 case 'f ' :
444- $ this ->next ('f ' );
445- $ this ->next ('a ' );
446- $ this ->next ('l ' );
447- $ this ->next ('s ' );
448- $ this ->next ('e ' );
450+ $ this ->nextOrFail ('f ' );
451+ $ this ->nextOrFail ('a ' );
452+ $ this ->nextOrFail ('l ' );
453+ $ this ->nextOrFail ('s ' );
454+ $ this ->nextOrFail ('e ' );
449455 return false ;
450456 case 'n ' :
451- $ this ->next ('n ' );
452- $ this ->next ('u ' );
453- $ this ->next ('l ' );
454- $ this ->next ('l ' );
457+ $ this ->nextOrFail ('n ' );
458+ $ this ->nextOrFail ('u ' );
459+ $ this ->nextOrFail ('l ' );
460+ $ this ->nextOrFail ('l ' );
455461 return null ;
456462 case 'I ' :
457- $ this ->next ('I ' );
458- $ this ->next ('n ' );
459- $ this ->next ('f ' );
460- $ this ->next ('i ' );
461- $ this ->next ('n ' );
462- $ this ->next ('i ' );
463- $ this ->next ('t ' );
464- $ this ->next ('y ' );
463+ $ this ->nextOrFail ('I ' );
464+ $ this ->nextOrFail ('n ' );
465+ $ this ->nextOrFail ('f ' );
466+ $ this ->nextOrFail ('i ' );
467+ $ this ->nextOrFail ('n ' );
468+ $ this ->nextOrFail ('i ' );
469+ $ this ->nextOrFail ('t ' );
470+ $ this ->nextOrFail ('y ' );
465471 return INF ;
466472 case 'N ' :
467- $ this ->next ('N ' );
468- $ this ->next ('a ' );
469- $ this ->next ('N ' );
473+ $ this ->nextOrFail ('N ' );
474+ $ this ->nextOrFail ('a ' );
475+ $ this ->nextOrFail ('N ' );
470476 return NAN ;
471477 }
472478
@@ -482,11 +488,11 @@ private function arr()
482488 $ this ->throwSyntaxError ('Maximum stack depth exceeded ' );
483489 }
484490
485- $ this ->next ('[ ' );
491+ $ this ->nextOrFail ('[ ' );
486492 $ this ->white ();
487493 while ($ this ->ch !== null ) {
488494 if ($ this ->ch === '] ' ) {
489- $ this ->next ('] ' );
495+ $ this ->nextOrFail ('] ' );
490496 $ this ->depth --;
491497 return $ arr ; // Potentially empty array
492498 }
@@ -502,11 +508,11 @@ private function arr()
502508 // If there's no comma after this value, this needs to
503509 // be the end of the array.
504510 if ($ this ->ch !== ', ' ) {
505- $ this ->next ('] ' );
511+ $ this ->nextOrFail ('] ' );
506512 $ this ->depth --;
507513 return $ arr ;
508514 }
509- $ this ->next (', ' );
515+ $ this ->nextOrFail (', ' );
510516 $ this ->white ();
511517 }
512518 }
@@ -526,11 +532,11 @@ private function obj()
526532 $ this ->throwSyntaxError ('Maximum stack depth exceeded ' );
527533 }
528534
529- $ this ->next ('{ ' );
535+ $ this ->nextOrFail ('{ ' );
530536 $ this ->white ();
531537 while ($ this ->ch ) {
532538 if ($ this ->ch === '} ' ) {
533- $ this ->next ('} ' );
539+ $ this ->nextOrFail ('} ' );
534540 $ this ->depth --;
535541 return $ object ; // Potentially empty object
536542 }
@@ -544,7 +550,7 @@ private function obj()
544550 }
545551
546552 $ this ->white ();
547- $ this ->next (': ' );
553+ $ this ->nextOrFail (': ' );
548554 if ($ this ->associative ) {
549555 $ object [$ key ] = $ this ->value ();
550556 } else {
@@ -554,11 +560,11 @@ private function obj()
554560 // If there's no comma after this pair, this needs to be
555561 // the end of the object.
556562 if ($ this ->ch !== ', ' ) {
557- $ this ->next ('} ' );
563+ $ this ->nextOrFail ('} ' );
558564 $ this ->depth --;
559565 return $ object ;
560566 }
561- $ this ->next (', ' );
567+ $ this ->nextOrFail (', ' );
562568 $ this ->white ();
563569 }
564570 }
@@ -625,4 +631,25 @@ private static function getEscapee($ch)
625631 // @codingStandardsIgnoreEnd
626632 }
627633 }
634+
635+ /**
636+ * Returns everything from $this->at onwards.
637+ *
638+ * Utilizes a cache so we don't have to continuously parse through UTF-8
639+ * data that was earlier in the string which we don't even care about.
640+ *
641+ * @return string
642+ */
643+ private function getRemainder ()
644+ {
645+ if ($ this ->remainderCacheAt === $ this ->at ) {
646+ return $ this ->remainderCache ;
647+ }
648+
649+ $ subject = mb_substr ($ this ->remainderCache , $ this ->at - $ this ->remainderCacheAt );
650+ $ this ->remainderCache = $ subject ;
651+ $ this ->remainderCacheAt = $ this ->at ;
652+
653+ return $ subject ;
654+ }
628655}
0 commit comments