Skip to content

Commit b039a9d

Browse files
authored
Merge pull request #120 from mvorisek/fix_unclosed_comment_parse
Fix unclosed block comment tokenize
2 parents 4605e91 + 9b4d087 commit b039a9d

9 files changed

+176
-17
lines changed

Diff for: src/Tokenizer.php

+4-3
Original file line numberDiff line numberDiff line change
@@ -825,9 +825,10 @@ private function createNextToken(string $string, Token|null $previous = null): T
825825
$last = strpos($string, "\n");
826826
$type = Token::TOKEN_TYPE_COMMENT;
827827
} else { // Comment until closing comment tag
828-
$pos = strpos($string, '*/', 2);
829-
assert($pos !== false);
830-
$last = $pos + 2;
828+
$pos = strpos($string, '*/', 2);
829+
$last = $pos !== false
830+
? $pos + 2
831+
: false;
831832
$type = Token::TOKEN_TYPE_BLOCK_COMMENT;
832833
}
833834

Diff for: tests/SqlFormatterTest.php

+29-10
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
use function defined;
1919
use function explode;
2020
use function file_get_contents;
21+
use function implode;
2122
use function pack;
2223
use function rtrim;
2324
use function sprintf;
@@ -41,6 +42,7 @@ public function testFormatHighlight(string $sql, string $html): void
4142
}
4243

4344
#[DataProvider('formatData')]
45+
#[DataProvider('formatLongConcatData')]
4446
public function testFormat(string $sql, string $html): void
4547
{
4648
$formatter = new SqlFormatter(new NullHighlighter());
@@ -99,13 +101,22 @@ public function testUsePre(): void
99101
$this->assertSame($actual, $expected);
100102
}
101103

104+
/** @return string[] */
105+
private static function fileSqlData(): array
106+
{
107+
$contents = file_get_contents(__DIR__ . '/sql.sql');
108+
assert($contents !== false);
109+
110+
return explode("\n---\n", rtrim($contents, "\n"));
111+
}
112+
102113
/** @return Generator<mixed[]> */
103114
private static function fileDataProvider(string $file): Generator
104115
{
105116
$contents = file_get_contents(__DIR__ . '/' . $file);
106117
assert($contents !== false);
107118
$formatHighlightData = explode("\n---\n", rtrim($contents, "\n"));
108-
$sqlData = self::sqlData();
119+
$sqlData = self::fileSqlData();
109120
if (count($formatHighlightData) !== count($sqlData)) {
110121
throw new UnexpectedValueException(sprintf(
111122
'"%s" (%d sections) and sql.sql (%d sections) should have the same number of sections',
@@ -138,6 +149,23 @@ public static function formatData(): Generator
138149
return self::fileDataProvider('format.txt');
139150
}
140151

152+
/** @return Generator<mixed[]> */
153+
public static function formatLongConcatData(): Generator
154+
{
155+
$sqlParts = [];
156+
for ($i = 0; $i < 2_000; $i++) {
157+
$sqlParts[] = 'cast(\'foo' . $i . '\' as blob)';
158+
}
159+
160+
$inConcat = 'concat(' . implode(', ', $sqlParts) . ')';
161+
$outConcat = "concat(\n " . implode(",\n ", $sqlParts) . "\n )";
162+
163+
yield 'long concat' => [
164+
'select iif(' . $inConcat . ' = ' . $inConcat . ', 10, 20) x',
165+
"select\n iif(\n " . $outConcat . ' = ' . $outConcat . ",\n 10,\n 20\n ) x",
166+
];
167+
}
168+
141169
/** @return Generator<mixed[]> */
142170
public static function compressData(): Generator
143171
{
@@ -149,13 +177,4 @@ public static function highlightData(): Generator
149177
{
150178
return self::fileDataProvider('highlight.html');
151179
}
152-
153-
/** @return mixed[] */
154-
private static function sqlData(): array
155-
{
156-
$contents = file_get_contents(__DIR__ . '/sql.sql');
157-
assert($contents !== false);
158-
159-
return explode("\n---\n", rtrim($contents, "\n"));
160-
}
161180
}

Diff for: tests/TokenizerTest.php

+131-4
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,18 @@
44

55
namespace Doctrine\SqlFormatter\Tests;
66

7+
use Doctrine\SqlFormatter\Cursor;
8+
use Doctrine\SqlFormatter\Token;
79
use Doctrine\SqlFormatter\Tokenizer;
8-
use PHPUnit\Framework\Attributes\DoesNotPerformAssertions;
10+
use Generator;
11+
use PHPUnit\Framework\Attributes\DataProvider;
912
use PHPUnit\Framework\TestCase;
1013
use ReflectionClass;
1114

1215
use function array_filter;
16+
use function implode;
1317
use function preg_match;
18+
use function serialize;
1419
use function sort;
1520
use function strtoupper;
1621

@@ -58,9 +63,131 @@ public function testKeywordsReservedAreSingleUpperWord(): void
5863
self::assertSame([], $kwsDiff);
5964
}
6065

61-
#[DoesNotPerformAssertions]
62-
public function testThereAreNoRegressions(): void
66+
/** @param list<Token> $expectedTokens */
67+
public static function assertEqualsTokens(array $expectedTokens, Cursor $cursor): void
6368
{
64-
(new Tokenizer())->tokenize('*/');
69+
$tokens = [];
70+
71+
$cursor = $cursor->subCursor();
72+
73+
while ($token = $cursor->next()) {
74+
$tokens[] = $token;
75+
}
76+
77+
if (serialize($tokens) === serialize($expectedTokens)) { // optimize self::assertEquals() for large inputs
78+
self::assertTrue(true);
79+
} else {
80+
self::assertEquals($expectedTokens, $tokens);
81+
}
82+
}
83+
84+
/** @param list<Token> $expectedTokens */
85+
#[DataProvider('tokenizeData')]
86+
#[DataProvider('tokenizeLongConcatData')]
87+
public function testTokenize(array $expectedTokens, string $sql): void
88+
{
89+
self::assertEqualsTokens($expectedTokens, (new Tokenizer())->tokenize($sql));
90+
}
91+
92+
/** @return Generator<mixed[]> */
93+
public static function tokenizeData(): Generator
94+
{
95+
yield 'empty' => [
96+
[],
97+
'',
98+
];
99+
100+
yield 'basic' => [
101+
[
102+
new Token(Token::TOKEN_TYPE_RESERVED_TOPLEVEL, 'select'),
103+
new Token(Token::TOKEN_TYPE_WHITESPACE, ' '),
104+
new Token(Token::TOKEN_TYPE_NUMBER, '1'),
105+
],
106+
'select 1',
107+
];
108+
109+
yield 'there are no regressions' => [
110+
[
111+
new Token(Token::TOKEN_TYPE_BOUNDARY, '*'),
112+
new Token(Token::TOKEN_TYPE_BOUNDARY, '/'),
113+
],
114+
'*/',
115+
];
116+
117+
yield 'unclosed quoted string' => [
118+
[
119+
new Token(Token::TOKEN_TYPE_QUOTE, '\'foo...'),
120+
],
121+
'\'foo...',
122+
];
123+
124+
yield 'unclosed block comment' => [
125+
[
126+
new Token(Token::TOKEN_TYPE_BLOCK_COMMENT, '/* foo...'),
127+
],
128+
'/* foo...',
129+
];
130+
}
131+
132+
/** @return Generator<mixed[]> */
133+
public static function tokenizeLongConcatData(): Generator
134+
{
135+
$count = 2_000;
136+
137+
$sqlParts = [];
138+
for ($i = 0; $i < $count; $i++) {
139+
$sqlParts[] = 'cast(\'foo' . $i . '\' as blob)';
140+
}
141+
142+
$concat = 'concat(' . implode(', ', $sqlParts) . ')';
143+
$sql = 'select iif(' . $concat . ' = ' . $concat . ', 10, 20) x';
144+
145+
$expectedTokens = [
146+
new Token(Token::TOKEN_TYPE_RESERVED_TOPLEVEL, 'select'),
147+
new Token(Token::TOKEN_TYPE_WHITESPACE, ' '),
148+
new Token(Token::TOKEN_TYPE_WORD, 'iif'),
149+
new Token(Token::TOKEN_TYPE_BOUNDARY, '('),
150+
];
151+
152+
for ($j = 0; $j < 2; $j++) {
153+
if ($j !== 0) {
154+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
155+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, '=');
156+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
157+
}
158+
159+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_RESERVED, 'concat');
160+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, '(');
161+
162+
for ($i = 0; $i < $count; $i++) {
163+
if ($i !== 0) {
164+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ',');
165+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
166+
}
167+
168+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_RESERVED, 'cast');
169+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, '(');
170+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_QUOTE, '\'foo' . $i . '\'');
171+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
172+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_RESERVED, 'as');
173+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
174+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_WORD, 'blob');
175+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ')');
176+
}
177+
178+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ')');
179+
}
180+
181+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ',');
182+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
183+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_NUMBER, '10');
184+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ',');
185+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
186+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_NUMBER, '20');
187+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ')');
188+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
189+
$expectedTokens[] = new Token(Token::TOKEN_TYPE_WORD, 'x');
190+
191+
yield 'long concat' => [$expectedTokens, $sql];
65192
}
66193
}

Diff for: tests/clihighlight.txt

+2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
ORDER BY
1414
COUNT(order_id) DESC;
1515
---
16+
17+
---
1618
UPDATE
1719
customers
1820
SET

Diff for: tests/compress.txt

+2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
SELECT customer_id, customer_name, COUNT(order_id) as total FROM customers INNER JOIN orders ON customers.customer_id = orders.customer_id GROUP BY customer_id, customer_name HAVING COUNT(order_id) > 5 ORDER BY COUNT(order_id) DESC;
2+
---
3+
24
---
35
UPDATE customers SET totalorders = ordersummary.total FROM (SELECT customer_id, count(order_id) As total FROM orders GROUP BY customer_id) As ordersummary WHERE customers.customer_id = ordersummary.customer_id
46
---

Diff for: tests/format-highlight.html

+2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
<span style="font-weight:bold;">ORDER BY</span>
1414
<span style="font-weight:bold;">COUNT</span>(<span style="color: #333;">order_id</span>) <span style="font-weight:bold;">DESC</span><span >;</span></pre>
1515
---
16+
<pre style="color: black; background-color: white;"></pre>
17+
---
1618
<pre style="color: black; background-color: white;"><span style="font-weight:bold;">UPDATE</span>
1719
<span style="color: #333;">customers</span>
1820
<span style="font-weight:bold;">SET</span>

Diff for: tests/format.txt

+2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ HAVING
1313
ORDER BY
1414
COUNT(order_id) DESC;
1515
---
16+
17+
---
1618
UPDATE
1719
customers
1820
SET

Diff for: tests/highlight.html

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
<span style="font-weight:bold;">HAVING</span> <span style="font-weight:bold;">COUNT</span>(<span style="color: #333;">order_id</span>) <span >&gt;</span> <span style="color: green;">5</span>
55
<span style="font-weight:bold;">ORDER BY</span> <span style="font-weight:bold;">COUNT</span>(<span style="color: #333;">order_id</span>) <span style="font-weight:bold;">DESC</span><span >;</span></pre>
66
---
7+
<pre style="color: black; background-color: white;"></pre>
8+
---
79
<pre style="color: black; background-color: white;"><span style="font-weight:bold;">UPDATE</span> <span style="color: #333;">customers</span>
810
<span style="font-weight:bold;">SET</span> <span style="color: #333;">totalorders</span> <span >=</span> <span style="color: #333;">ordersummary</span><span >.</span><span style="color: #333;">total</span>
911
<span style="font-weight:bold;">FROM</span> (<span style="font-weight:bold;">SELECT</span> <span style="color: #333;">customer_id</span><span >,</span> <span style="font-weight:bold;">count</span>(<span style="color: #333;">order_id</span>) <span style="font-weight:bold;">As</span> <span style="color: #333;">total</span>

Diff for: tests/sql.sql

+2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ FROM customers INNER JOIN orders ON customers.customer_id = orders.customer_id
33
GROUP BY customer_id, customer_name
44
HAVING COUNT(order_id) > 5
55
ORDER BY COUNT(order_id) DESC;
6+
---
7+
68
---
79
UPDATE customers
810
SET totalorders = ordersummary.total

0 commit comments

Comments
 (0)