diff --git a/src/Tokenizer.php b/src/Tokenizer.php index 28556c7..cad0eb8 100644 --- a/src/Tokenizer.php +++ b/src/Tokenizer.php @@ -94,8 +94,8 @@ final class Tokenizer 'ENCLOSED', 'END', 'ENGINE', - 'ENGINE_TYPE', 'ENGINES', + 'ENGINE_TYPE', 'ESCAPE', 'ESCAPED', 'EVENTS', @@ -111,9 +111,9 @@ final class Tokenizer 'FIRST', 'FIXED', 'FLUSH', + 'FOLLOWING', 'FOR', 'FORCE', - 'FOLLOWING', 'FOREIGN', 'FULL', 'FULLTEXT', @@ -187,12 +187,12 @@ final class Tokenizer 'NULL', 'OFFSET', 'ON', + 'ON DELETE', + 'ON UPDATE', 'OPEN', 'OPTIMIZE', 'OPTION', 'OPTIONALLY', - 'ON UPDATE', - 'ON DELETE', 'OUTFILE', 'OVER', 'PACK_KEYS', @@ -209,11 +209,11 @@ final class Tokenizer 'PROCESSLIST', 'PURGE', 'QUICK', - 'RANGE', 'RAID0', 'RAID_CHUNKS', 'RAID_CHUNKSIZE', 'RAID_TYPE', + 'RANGE', 'READ', 'READ_ONLY', 'READ_WRITE', @@ -254,20 +254,20 @@ final class Tokenizer 'SQL_BIG_SELECTS', 'SQL_BIG_TABLES', 'SQL_BUFFER_RESULT', + 'SQL_CACHE', 'SQL_CALC_FOUND_ROWS', 'SQL_LOG_BIN', 'SQL_LOG_OFF', 'SQL_LOG_UPDATE', 'SQL_LOW_PRIORITY_UPDATES', 'SQL_MAX_JOIN_SIZE', + 'SQL_NO_CACHE', 'SQL_QUOTE_SHOW_CREATE', 'SQL_SAFE_UPDATES', 'SQL_SELECT_LIMIT', 'SQL_SLAVE_SKIP_COUNTER', 'SQL_SMALL_RESULT', 'SQL_WARNINGS', - 'SQL_CACHE', - 'SQL_NO_CACHE', 'START', 'STARTING', 'STATUS', @@ -314,47 +314,47 @@ final class Tokenizer * @var list */ private array $reservedToplevel = [ - 'WITH', - 'SELECT', - 'FROM', - 'WHERE', - 'SET', - 'ORDER BY', - 'GROUP BY', - 'LIMIT', - 'DROP', - 'VALUES', - 'UPDATE', - 'HAVING', 'ADD', - 'CHANGE', - 'MODIFY', 'ALTER TABLE', + 'CHANGE', 'DELETE FROM', - 'UNION ALL', - 'UNION', + 'DROP', 'EXCEPT', + 'FROM', + 'GROUP BY', + 'GROUPS', + 'HAVING', 'INTERSECT', + 'LIMIT', + 'MODIFY', + 'ORDER BY', 'PARTITION BY', - 'ROWS', 'RANGE', - 'GROUPS', + 'ROWS', + 'SELECT', + 'SET', + 'UNION', + 'UNION ALL', + 'UPDATE', + 'VALUES', + 'WHERE', 'WINDOW', + 'WITH', ]; /** @var list */ private array $reservedNewline = [ - 'LEFT OUTER JOIN', - 'RIGHT OUTER JOIN', - 'LEFT JOIN', - 'RIGHT JOIN', - 'OUTER JOIN', + 'AND', + 'EXCLUDE', 'INNER JOIN', 'JOIN', - 'XOR', + 'LEFT JOIN', + 'LEFT OUTER JOIN', 'OR', - 'AND', - 'EXCLUDE', + 'OUTER JOIN', + 'RIGHT JOIN', + 'RIGHT OUTER JOIN', + 'XOR', ]; /** @var list */ @@ -575,9 +575,9 @@ final class Tokenizer 'ORD', 'OVERLAPS', 'PASSWORD', - 'PERCENT_RANK', 'PERCENTILE_CONT', 'PERCENTILE_DISC', + 'PERCENT_RANK', 'PERIOD_ADD', 'PERIOD_DIFF', 'PI', @@ -625,13 +625,13 @@ final class Tokenizer 'SRID', 'STARTPOINT', 'STD', - 'STDEV', - 'STDEVP', 'STDDEV', 'STDDEV_POP', 'STDDEV_SAMP', - 'STRING_AGG', + 'STDEV', + 'STDEVP', 'STRCMP', + 'STRING_AGG', 'STR_TO_DATE', 'SUBDATE', 'SUBSTR', @@ -725,11 +725,14 @@ final class Tokenizer */ public function __construct() { - // Sort reserved word list from longest word to shortest, 3x faster than usort - $reservedMap = array_combine($this->reserved, array_map(strlen(...), $this->reserved)); - assert($reservedMap !== false); - arsort($reservedMap); - $this->reserved = array_keys($reservedMap); + // Sort list from longest word to shortest, 3x faster than usort + $sortByLengthFx = static function ($values) { + $valuesMap = array_combine($values, array_map(strlen(...), $values)); + assert($valuesMap !== false); + arsort($valuesMap); + + return array_keys($valuesMap); + }; // Set up regular expressions $this->regexBoundaries = '(' . implode( @@ -738,18 +741,18 @@ public function __construct() ) . ')'; $this->regexReserved = '(' . implode( '|', - $this->quoteRegex($this->reserved), + $this->quoteRegex($sortByLengthFx($this->reserved)), ) . ')'; $this->regexReservedToplevel = str_replace(' ', '\\s+', '(' . implode( '|', - $this->quoteRegex($this->reservedToplevel), + $this->quoteRegex($sortByLengthFx($this->reservedToplevel)), ) . ')'); $this->regexReservedNewline = str_replace(' ', '\\s+', '(' . implode( '|', - $this->quoteRegex($this->reservedNewline), + $this->quoteRegex($sortByLengthFx($this->reservedNewline)), ) . ')'); - $this->regexFunction = '(' . implode('|', $this->quoteRegex($this->functions)) . ')'; + $this->regexFunction = '(' . implode('|', $this->quoteRegex($sortByLengthFx($this->functions))) . ')'; } /** diff --git a/tests/TokenizerTest.php b/tests/TokenizerTest.php index 80a727a..a10838d 100644 --- a/tests/TokenizerTest.php +++ b/tests/TokenizerTest.php @@ -7,9 +7,43 @@ use Doctrine\SqlFormatter\Tokenizer; use PHPUnit\Framework\Attributes\DoesNotPerformAssertions; use PHPUnit\Framework\TestCase; +use ReflectionClass; + +use function sort; final class TokenizerTest extends TestCase { + /** + * @param 'reserved'|'reservedToplevel'|'reservedNewline'|'functions' $propertyName + * + * @return list + */ + private function getTokenizerList(string $propertyName): array + { + $tokenizerReflClass = new ReflectionClass(Tokenizer::class); + /** @var list $res */ + $res = $tokenizerReflClass->getProperty($propertyName)->getDefaultValue(); + + return $res; + } + + public function testInternalKeywordListsAreSortedForEasierMaintenance(): void + { + foreach ( + [ + $this->getTokenizerList('reserved'), + $this->getTokenizerList('reservedToplevel'), + $this->getTokenizerList('reservedNewline'), + $this->getTokenizerList('functions'), + ] as $list + ) { + $listSorted = $list; + sort($listSorted); + + self::assertSame($listSorted, $list); + } + } + #[DoesNotPerformAssertions] public function testThereAreNoRegressions(): void {