diff --git a/composer.json b/composer.json index fbe6eda22..6501adaf2 100644 --- a/composer.json +++ b/composer.json @@ -22,6 +22,7 @@ "require": { "php": "^7.0", "fzaninotto/faker": "^1.6", + "hoa/compiler": "^3.17", "myclabs/deep-copy": "^1.5.2", "symfony/property-access": "^2.7.11 || ^3.0 || ^4.0", "symfony/yaml": "^2.7 || ^3.0 || ^4.0" diff --git a/src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php b/src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php new file mode 100644 index 000000000..2140b55ce --- /dev/null +++ b/src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php @@ -0,0 +1,37 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer; + +use Hoa\Compiler\Llk\Parser as HoaParser; +use Hoa\Compiler\Llk\TreeNode; +use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\LexerInterface; + +final class HoaLexer implements LexerInterface +{ + private $parser; + + public function __construct(HoaParser $parser) + { + + $this->parser = $parser; + } + + /** + * @inheritdoc + */ + public function lex(string $value): TreeNode + { + return $this->parser->parse($value); + } +} diff --git a/src/FixtureBuilder/ExpressionLanguage/LexerInterface.php b/src/FixtureBuilder/ExpressionLanguage/LexerInterface.php index 9c55cd2d5..17b652f1d 100644 --- a/src/FixtureBuilder/ExpressionLanguage/LexerInterface.php +++ b/src/FixtureBuilder/ExpressionLanguage/LexerInterface.php @@ -29,5 +29,5 @@ interface LexerInterface * * @return Token[] */ - public function lex(string $value): array; + public function lex(string $value); } diff --git a/src/Grammar.pp b/src/Grammar.pp new file mode 100644 index 000000000..5d34c9278 --- /dev/null +++ b/src/Grammar.pp @@ -0,0 +1,172 @@ +// +// This file is part of the Alice package. +// +// (c) Nelmio +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +// + +// All whitespaces matter, except the trailing ones. +%skip trailing_whitespaces \s+$ + +// An opening chevron must not be escaped by a backslash. +// Matching an opening chevron changes the namespace from `default` to +// `parameter`. +%token opening_chevron (? parameter + +// All whitespaces. +%skip parameter:whitespaces \s+ + +// A closing chevron. +// Matching an closing chevron changes the namespace from `parameter` +// to `default`. +%token parameter:closing_chevron > -> __shift__ + +// A variable opening. +%token parameter:opening_variable { -> variable + +// All whitespaces. +%skip variable:whitespaces \s+ + +// An expansion list separator. +%token variable:comma , + +// A range separator +%token variable:range \.\. + +// A range bound. +%token variable:number [+-]?[0-9]+ + +// A variable name can be anything except `}`. +%token variable:name [_\w][_\w\d]* + +// A variable closing. +%token variable:closing_variable } -> __shift__ + +// Opening parenthesis. +%token parameter:opening_parenthesis \( + +// Closing parenthesis. +%token parameter:closing_parenthesis \) + +// Constant string. +%token parameter:string ("|')(.*?)(? reference + +// A star is a glob operator. +%token reference:star \* -> __shift__ + +// A left curly bracket introduces an expansion. +%token reference:opening_expansion { -> expansion + +// All whitespaces. +%skip expansion:whitespaces \s+ + +// A number can be signed or not. +%token expansion:number [-+]?[0-9]+ + +// A range is represented by two dots. +%token expansion:range \.\. + +// A comma is the name separator. +%token expansion:comma , + +// A reference expansion name is just like a reference constant name. +%token expansion:name [_\w][_\w\d]* + +// A right curly bracket closes an expansion. +%token expansion:closing_expansion } -> __shift__ * 2 + +// A reference name is dynamic if some parts of its name are known at runtime. +%token reference:dynamic_name [_\w][_\w\d]*(?=[\*\{]) + +// A constant reference name is not a dynamic reference name. +%token reference:constant_name [_\w][_\w\d]* -> __shift__ + +// Anything is a little bit tricky because it must stop on an +// unescaped opening chevron. Thus: +// .+ +// is wrong because it is greedy. It must be lazy, so: +// .+? +// +// However, it does not take into account the opening chevron. Thus: +// .+?(?=<) +// +// This is valid but it does not take into account that the opening +// chevron must be unescaped. And now it's funny. Thus: +// (\\<|.)+?(?=<) +// +// However, this works if and only if an unescaped opening chevron +// exists on the right. So the right assertion must be `<` or `$`, +// thus: +// (\\<|.)+?(?=(<|$)) +// +// The final result contains non-capturing groups for memory concerns. +// +// Repeat this reasoning for each Alice opening symbol (like `@`). +%token anything (?:\\<|@@|.)+?(?=(?:<|@|$)) + +#root: + ( anything()? ( parameter() | reference() ) )* anything()? + +#parameter: + ::opening_chevron:: ( variable() | identity() | function() ) ::closing_chevron:: + +#variable: + ::opening_variable:: ::closing_variable:: + +variable_expansion_list: + ::opening_variable:: expansion_list() ::closing_variable:: + +#expansion_list: + ( ::comma:: )* + +variable_range: + ::opening_variable:: range() ::closing_variable:: + +#range: + ::range:: + +#identity: + ::opening_parenthesis:: ::closing_parenthesis:: + +#function: + ::opening_parenthesis:: function_arguments()? ::closing_parenthesis:: + +function_arguments: + function_argument() ( ::comma:: function_argument() )* #arguments + +function_argument: + + +reference: + ::at:: + ( + #constant_reference + | + ( + ::star:: #glob_reference + | ::opening_expansion:: + ( reference_range() | reference_list() ) + ::closing_expansion:: #expansion_reference + ) + ) + +reference_range: + ::range:: #range + +reference_list: + ( ::comma:: )* #list + +#anything: + diff --git a/src/Loader/NativeLoader.php b/src/Loader/NativeLoader.php index eb2382295..8c0c41d85 100644 --- a/src/Loader/NativeLoader.php +++ b/src/Loader/NativeLoader.php @@ -13,9 +13,10 @@ namespace Nelmio\Alice\Loader; -use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\SimpleDenormalizer as NelmioSimpleDenormalizer; use Faker\Factory as FakerGeneratorFactory; use Faker\Generator as FakerGenerator; +use Hoa\Compiler\Llk\Llk; +use Hoa\File\Read; use Nelmio\Alice\DataLoaderInterface; use Nelmio\Alice\Faker\Provider\AliceProvider; use Nelmio\Alice\FileLoaderInterface; @@ -25,6 +26,7 @@ use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\NullListNameDenormalizer; use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\NullRangeNameDenormalizer; use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\SimpleCollectionDenormalizer; +use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\SimpleDenormalizer as NelmioSimpleDenormalizer; use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\FixtureDenormalizerInterface; use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\FixtureDenormalizerRegistry; use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\SimpleFixtureBagDenormalizer; @@ -57,13 +59,7 @@ use Nelmio\Alice\FixtureBuilder\Denormalizer\Parameter\SimpleParameterBagDenormalizer; use Nelmio\Alice\FixtureBuilder\Denormalizer\SimpleDenormalizer; use Nelmio\Alice\FixtureBuilder\DenormalizerInterface; -use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\EmptyValueLexer; -use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\FunctionLexer; -use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\GlobalPatternsLexer; -use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\ReferenceEscaperLexer; -use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\ReferenceLexer; -use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\StringThenReferenceLexer; -use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\SubPatternsLexer; +use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\HoaLexer; use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\LexerInterface; use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Parser\FunctionFixtureReferenceParser; use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Parser\SimpleParser; @@ -209,6 +205,14 @@ class NativeLoader implements FilesLoaderInterface, FileLoaderInterface, DataLoa /** @protected */ const LOCALE = 'en_US'; + /** + * @Path to Alice grammar defined in the PP language. + * + * @protected + * @see https://hoa-project.net/En/Literature/Hack/Compiler.html#PP_language + */ + const GRAMMAR = __DIR__ . '/../../src/Grammar.pp'; + private $previous = ''; /** @@ -386,8 +390,7 @@ protected function createConstructorDenormalizer(): ConstructorDenormalizerInter ), new FactoryDenormalizer( $this->getCallsDenormalizer() - ), - $this->getArgumentsDenormalizer() + ) ); } @@ -441,19 +444,9 @@ protected function createExpressionLanguageParser(): ExpressionLanguageParserInt protected function createLexer(): LexerInterface { - return new EmptyValueLexer( - new ReferenceEscaperLexer( - new GlobalPatternsLexer( - new FunctionLexer( - new StringThenReferenceLexer( - new SubPatternsLexer( - new ReferenceLexer() - ) - ) - ) - ) - ) - ); + $parser = Llk::load(new Read(self::GRAMMAR)); + + return new HoaLexer($parser); } protected function createExpressionLanguageTokenParser(): TokenParserInterface diff --git a/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php b/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php index 5c0b87b6c..165be979a 100644 --- a/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php +++ b/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php @@ -13,6 +13,7 @@ namespace Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer; +use Hoa\Compiler\Llk\TreeNode; use InvalidArgumentException; use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\LexerInterface; use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Token; @@ -72,7 +73,6 @@ public function testCanLexValues(string $value, $expected) } $this->assertEquals($expected, $actual, var_export($actual, true)); - $this->assertSameSize($expected, $actual); } /** @@ -83,9 +83,9 @@ public function provideValues() // simple values yield 'empty string' => [ '', - [ - new Token('', new TokenType(TokenType::STRING_TYPE)), - ], + new TreeNode( + '#root' + ), ]; yield 'regular string value' => [