Skip to content

Commit 2d1db61

Browse files
authored
Merge pull request #2 from colinodell/optimizations
Optimizations
2 parents 2bb3fbf + 474f909 commit 2d1db61

File tree

4 files changed

+109
-79
lines changed

4 files changed

+109
-79
lines changed

.travis.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ php:
66
- "5.6"
77
- "7.0"
88
- "7.1"
9+
- "7.2"
910
- "master"
1011

1112
matrix:

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ $arr = json5_decode($json);
6666

6767
It takes the same exact parameters in the same order. For more details on these, see the [PHP docs][link-php-jsondecode].
6868

69+
To achieve the best possible performance, it'll try parsing with PHP's native function (which usually fails fast) and then falls back to JSON5.
70+
6971
## Binary / Executable
7072

7173
A binary/executable named `json5` is also provided for converting JSON5 to plain JSON via your terminal.

src/Json5Decoder.php

Lines changed: 105 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616

1717
final class Json5Decoder
1818
{
19-
private $json;
20-
2119
private $at = 0;
2220

2321
private $lineNumber = 1;
@@ -26,6 +24,8 @@ final class Json5Decoder
2624

2725
private $ch;
2826

27+
private $chArr;
28+
2929
private $associative = false;
3030

3131
private $maxDepth = 512;
@@ -36,7 +36,9 @@ final class Json5Decoder
3636

3737
private $length;
3838

39-
private $lineCache;
39+
private $remainderCache;
40+
41+
private $remainderCacheAt;
4042

4143
/**
4244
* Private constructor.
@@ -48,14 +50,17 @@ final class Json5Decoder
4850
*/
4951
private function __construct($json, $associative = false, $depth = 512, $castBigIntToString = false)
5052
{
51-
$this->json = $json;
5253
$this->associative = $associative;
5354
$this->maxDepth = $depth;
5455
$this->castBigIntToString = $castBigIntToString;
5556

5657
$this->length = mb_strlen($json, 'utf-8');
5758

59+
$this->chArr = preg_split('//u', $json, null, PREG_SPLIT_NO_EMPTY);
5860
$this->ch = $this->charAt(0);
61+
62+
$this->remainderCache = $json;
63+
$this->remainderCacheAt = 0;
5964
}
6065

6166
/**
@@ -73,7 +78,17 @@ private function __construct($json, $associative = false, $depth = 512, $castBig
7378
*/
7479
public static function decode($source, $associative = false, $depth = 512, $options = 0)
7580
{
76-
$associative = $associative || ($options & JSON_OBJECT_AS_ARRAY);
81+
// Try parsing with json_decode first, since that's much faster
82+
// We only attempt this on PHP 7+ because 5.x doesn't parse some edge cases correctly
83+
if (PHP_VERSION_ID >= 700000) {
84+
$result = json_decode($source, $associative, $depth, $options);
85+
if (json_last_error() === JSON_ERROR_NONE) {
86+
return $result;
87+
}
88+
}
89+
90+
// Fall back to JSON5 if that fails
91+
$associative = $associative === true || ($associative === null && $options & JSON_OBJECT_AS_ARRAY);
7792
$castBigIntToString = $options & JSON_BIGINT_AS_STRING;
7893

7994
$decoder = new self((string)$source, $associative, $depth, $castBigIntToString);
@@ -94,34 +109,20 @@ public static function decode($source, $associative = false, $depth = 512, $opti
94109
*/
95110
private function charAt($at)
96111
{
97-
if ($at < 0 || $at >= $this->length) {
112+
if ($at >= $this->length) {
98113
return null;
99114
}
100115

101-
return mb_substr($this->json, $at, 1, 'utf-8');
116+
return $this->chArr[$at];
102117
}
103118

104119
/**
105120
* Parse the next character.
106121
*
107-
* If $c is given, the next char will only be parsed if the current
108-
* one matches $c.
109-
*
110-
* @param string|null $c
111-
*
112122
* @return null|string
113123
*/
114-
private function next($c = null)
124+
private function next()
115125
{
116-
// If a c parameter is provided, verify that it matches the current character.
117-
if ($c !== null && $c !== $this->ch) {
118-
$this->throwSyntaxError(sprintf(
119-
'Expected %s instead of %s',
120-
self::renderChar($c),
121-
self::renderChar($this->ch)
122-
));
123-
}
124-
125126
// Get the next character. When there are no more characters,
126127
// return the empty string.
127128
if ($this->ch === "\n" || ($this->ch === "\r" && $this->peek() !== "\n")) {
@@ -139,29 +140,34 @@ private function next($c = null)
139140
}
140141

141142
/**
142-
* Get the next character without consuming it or
143-
* assigning it to the ch variable.
143+
* Parse the next character if it matches $c or fail.
144144
*
145-
* @return mixed
145+
* @param string $c
146+
*
147+
* @return string|null
146148
*/
147-
private function peek()
149+
private function nextOrFail($c)
148150
{
149-
return $this->charAt($this->at + 1);
151+
if ($c !== $this->ch) {
152+
$this->throwSyntaxError(sprintf(
153+
'Expected %s instead of %s',
154+
self::renderChar($c),
155+
self::renderChar($this->ch)
156+
));
157+
}
158+
159+
return $this->next();
150160
}
151161

152162
/**
153-
* @return string
163+
* Get the next character without consuming it or
164+
* assigning it to the ch variable.
165+
*
166+
* @return mixed
154167
*/
155-
private function getLineRemainder()
168+
private function peek()
156169
{
157-
// Line are separated by "\n" or "\r" without an "\n" next
158-
if ($this->lineCache === null) {
159-
$this->lineCache = preg_split('/\n|\r\n?/u', $this->json);
160-
}
161-
162-
$line = $this->lineCache[$this->lineNumber - 1];
163-
164-
return mb_substr($line, $this->columnNumber - 1);
170+
return $this->charAt($this->at + 1);
165171
}
166172

167173
/**
@@ -175,7 +181,7 @@ private function getLineRemainder()
175181
*/
176182
private function match($regex)
177183
{
178-
$subject = $this->getLineRemainder();
184+
$subject = $this->getRemainder();
179185

180186
$matches = [];
181187
if (!preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
@@ -234,7 +240,7 @@ private function number()
234240

235241
if ($this->ch === '-' || $this->ch === '+') {
236242
$sign = $this->ch;
237-
$this->next($this->ch);
243+
$this->next();
238244
}
239245

240246
// support for Infinity
@@ -272,10 +278,10 @@ private function number()
272278

273279
switch ($base) {
274280
case 10:
275-
if (($match = $this->match('/^\d*\.?\d*/')) !== null) {
281+
if ((is_numeric($this->ch) || $this->ch === '.') && ($match = $this->match('/^\d*\.?\d*/')) !== null) {
276282
$string .= $match;
277283
}
278-
if (($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
284+
if (($this->ch === 'E' || $this->ch === 'e') && ($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
279285
$string .= $match;
280286
}
281287
$number = $string;
@@ -319,7 +325,7 @@ private function string()
319325
}
320326

321327
if ($this->ch === '\\') {
322-
if ($unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) {
328+
if ($this->peek() === 'u' && $unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) {
323329
$string .= json_decode('"'.$unicodeEscaped.'"');
324330
continue;
325331
}
@@ -379,9 +385,9 @@ private function blockComment()
379385
do {
380386
$this->next();
381387
while ($this->ch === '*') {
382-
$this->next('*');
388+
$this->nextOrFail('*');
383389
if ($this->ch === '/') {
384-
$this->next('/');
390+
$this->nextOrFail('/');
385391

386392
return;
387393
}
@@ -397,7 +403,7 @@ private function blockComment()
397403
private function comment()
398404
{
399405
// Comments always begin with a / character.
400-
$this->next('/');
406+
$this->nextOrFail('/');
401407

402408
if ($this->ch === '/') {
403409
$this->inlineComment();
@@ -435,38 +441,38 @@ private function word()
435441
{
436442
switch ($this->ch) {
437443
case 't':
438-
$this->next('t');
439-
$this->next('r');
440-
$this->next('u');
441-
$this->next('e');
444+
$this->nextOrFail('t');
445+
$this->nextOrFail('r');
446+
$this->nextOrFail('u');
447+
$this->nextOrFail('e');
442448
return true;
443449
case 'f':
444-
$this->next('f');
445-
$this->next('a');
446-
$this->next('l');
447-
$this->next('s');
448-
$this->next('e');
450+
$this->nextOrFail('f');
451+
$this->nextOrFail('a');
452+
$this->nextOrFail('l');
453+
$this->nextOrFail('s');
454+
$this->nextOrFail('e');
449455
return false;
450456
case 'n':
451-
$this->next('n');
452-
$this->next('u');
453-
$this->next('l');
454-
$this->next('l');
457+
$this->nextOrFail('n');
458+
$this->nextOrFail('u');
459+
$this->nextOrFail('l');
460+
$this->nextOrFail('l');
455461
return null;
456462
case 'I':
457-
$this->next('I');
458-
$this->next('n');
459-
$this->next('f');
460-
$this->next('i');
461-
$this->next('n');
462-
$this->next('i');
463-
$this->next('t');
464-
$this->next('y');
463+
$this->nextOrFail('I');
464+
$this->nextOrFail('n');
465+
$this->nextOrFail('f');
466+
$this->nextOrFail('i');
467+
$this->nextOrFail('n');
468+
$this->nextOrFail('i');
469+
$this->nextOrFail('t');
470+
$this->nextOrFail('y');
465471
return INF;
466472
case 'N':
467-
$this->next('N');
468-
$this->next('a');
469-
$this->next('N');
473+
$this->nextOrFail('N');
474+
$this->nextOrFail('a');
475+
$this->nextOrFail('N');
470476
return NAN;
471477
}
472478

@@ -482,11 +488,11 @@ private function arr()
482488
$this->throwSyntaxError('Maximum stack depth exceeded');
483489
}
484490

485-
$this->next('[');
491+
$this->nextOrFail('[');
486492
$this->white();
487493
while ($this->ch !== null) {
488494
if ($this->ch === ']') {
489-
$this->next(']');
495+
$this->nextOrFail(']');
490496
$this->depth--;
491497
return $arr; // Potentially empty array
492498
}
@@ -502,11 +508,11 @@ private function arr()
502508
// If there's no comma after this value, this needs to
503509
// be the end of the array.
504510
if ($this->ch !== ',') {
505-
$this->next(']');
511+
$this->nextOrFail(']');
506512
$this->depth--;
507513
return $arr;
508514
}
509-
$this->next(',');
515+
$this->nextOrFail(',');
510516
$this->white();
511517
}
512518
}
@@ -526,11 +532,11 @@ private function obj()
526532
$this->throwSyntaxError('Maximum stack depth exceeded');
527533
}
528534

529-
$this->next('{');
535+
$this->nextOrFail('{');
530536
$this->white();
531537
while ($this->ch) {
532538
if ($this->ch === '}') {
533-
$this->next('}');
539+
$this->nextOrFail('}');
534540
$this->depth--;
535541
return $object; // Potentially empty object
536542
}
@@ -544,7 +550,7 @@ private function obj()
544550
}
545551

546552
$this->white();
547-
$this->next(':');
553+
$this->nextOrFail(':');
548554
if ($this->associative) {
549555
$object[$key] = $this->value();
550556
} else {
@@ -554,11 +560,11 @@ private function obj()
554560
// If there's no comma after this pair, this needs to be
555561
// the end of the object.
556562
if ($this->ch !== ',') {
557-
$this->next('}');
563+
$this->nextOrFail('}');
558564
$this->depth--;
559565
return $object;
560566
}
561-
$this->next(',');
567+
$this->nextOrFail(',');
562568
$this->white();
563569
}
564570
}
@@ -625,4 +631,25 @@ private static function getEscapee($ch)
625631
// @codingStandardsIgnoreEnd
626632
}
627633
}
634+
635+
/**
636+
* Returns everything from $this->at onwards.
637+
*
638+
* Utilizes a cache so we don't have to continuously parse through UTF-8
639+
* data that was earlier in the string which we don't even care about.
640+
*
641+
* @return string
642+
*/
643+
private function getRemainder()
644+
{
645+
if ($this->remainderCacheAt === $this->at) {
646+
return $this->remainderCache;
647+
}
648+
649+
$subject = mb_substr($this->remainderCache, $this->at - $this->remainderCacheAt);
650+
$this->remainderCache = $subject;
651+
$this->remainderCacheAt = $this->at;
652+
653+
return $subject;
654+
}
628655
}

test/Functional/OptionsTest.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ public function testAssocFalseWithNoOptionsSet()
2525
public function testAssocFalseWithAssocOption()
2626
{
2727
$result = Json5Decoder::decode('{"foo": true}', false, 512, JSON_OBJECT_AS_ARRAY);
28-
$this->assertInternalType('array', $result);
28+
$this->assertInstanceOf('\stdClass', $result);
2929
}
3030

3131
public function testBigIntWithNoOptionsSet()

0 commit comments

Comments
 (0)