From f39c15264196b8e8b68564499eb2b682914b833f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20FIDRY?= <theo.fidry@gmail.com> Date: Thu, 6 Apr 2017 00:37:23 +0100 Subject: [PATCH 1/3] WIP --- Alice.pp | 62 +++++++++++++++++++ composer.json | 1 + .../ExpressionLanguage/Lexer/HoaLexer.php | 29 +++++++++ .../ExpressionLanguage/LexerInterface.php | 2 +- src/Loader/NativeLoader.php | 25 ++++---- .../Lexer/LexerIntegrationTest.php | 1 + 6 files changed, 106 insertions(+), 14 deletions(-) create mode 100644 Alice.pp create mode 100644 src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php diff --git a/Alice.pp b/Alice.pp new file mode 100644 index 000000000..4b0e0fb35 --- /dev/null +++ b/Alice.pp @@ -0,0 +1,62 @@ +// +// LEXEMES +// +%token true true +%token false false +%token null null +%token escape_token \\ +%token string .+ + + +// +// RULES +// +value: + string() + +string: + ::escape_token:: <string> | <escape_token> | <string> + +// +// +// +// +// +// +//%skip space \s +//// Scalars. +//%token true true +//%token false false +//%token null null +//// Strings. +//%token quote_ <{ -> string +//%token string:string [^"]+ +//%token string:_quote }> -> default +//// Objects. +//%token brace_ { +//%token _brace } +//// Arrays. +//%token bracket_ \[ +//%token _bracket \] +//// Rest. +//%token colon : +//%token comma , +//%token number \d+ +// +//value: +// <true> | <false> | <null> | string() | object() | array() | number() +// +//string: +// ::quote_:: <string> ::_quote:: +// +//number: +// <number> +// +//#object: +// ::brace_:: pair() ( ::comma:: pair() )* ::_brace:: +// +//#pair: +// string() ::colon:: value() +// +//#array: +// ::bracket_:: value() ( ::comma:: value() )* ::_bracket:: \ No newline at end of file diff --git a/composer.json b/composer.json index fbe6eda22..fbbe8a322 100644 --- a/composer.json +++ b/composer.json @@ -22,6 +22,7 @@ "require": { "php": "^7.0", "fzaninotto/faker": "^1.6", + "hoa/compiler": "3.17.01.10", "myclabs/deep-copy": "^1.5.2", "symfony/property-access": "^2.7.11 || ^3.0 || ^4.0", "symfony/yaml": "^2.7 || ^3.0 || ^4.0" diff --git a/src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php b/src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php new file mode 100644 index 000000000..244fec1ab --- /dev/null +++ b/src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php @@ -0,0 +1,29 @@ +<?php +declare(strict_types=1); + +namespace Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer; + +use Hoa\Compiler\Llk\Parser as HoaParser; +use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\LexerInterface; + +final class HoaLexer implements LexerInterface +{ + /** + * @var HoaParser + */ + private $parser; + + public function __construct(HoaParser $parser) + { + + $this->parser = $parser; + } + + /** + * @inheritdoc + */ + public function lex(string $value) + { + return $this->parser->parse($value); + } +} \ No newline at end of file diff --git a/src/FixtureBuilder/ExpressionLanguage/LexerInterface.php b/src/FixtureBuilder/ExpressionLanguage/LexerInterface.php index 9c55cd2d5..17b652f1d 100644 --- a/src/FixtureBuilder/ExpressionLanguage/LexerInterface.php +++ b/src/FixtureBuilder/ExpressionLanguage/LexerInterface.php @@ -29,5 +29,5 @@ interface LexerInterface * * @return Token[] */ - public function lex(string $value): array; + public function lex(string $value); } diff --git a/src/Loader/NativeLoader.php b/src/Loader/NativeLoader.php index eb2382295..2d06c2999 100644 --- a/src/Loader/NativeLoader.php +++ b/src/Loader/NativeLoader.php @@ -16,6 +16,8 @@ use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\SimpleDenormalizer as NelmioSimpleDenormalizer; use Faker\Factory as FakerGeneratorFactory; use Faker\Generator as FakerGenerator; +use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\HoaLexer; +use Hoa\File\Read; use Nelmio\Alice\DataLoaderInterface; use Nelmio\Alice\Faker\Provider\AliceProvider; use Nelmio\Alice\FileLoaderInterface; @@ -209,6 +211,13 @@ class NativeLoader implements FilesLoaderInterface, FileLoaderInterface, DataLoa /** @protected */ const LOCALE = 'en_US'; + /** + * @var string Path to Alice grammar defined in the PP language. + * + * @see https://hoa-project.net/En/Literature/Hack/Compiler.html#PP_language + */ + protected $ppFilePath = __DIR__.'/../../Alice.pp'; + private $previous = ''; /** @@ -441,19 +450,9 @@ protected function createExpressionLanguageParser(): ExpressionLanguageParserInt protected function createLexer(): LexerInterface { - return new EmptyValueLexer( - new ReferenceEscaperLexer( - new GlobalPatternsLexer( - new FunctionLexer( - new StringThenReferenceLexer( - new SubPatternsLexer( - new ReferenceLexer() - ) - ) - ) - ) - ) - ); + $parser = \Hoa\Compiler\Llk\Llk::load(new Read($this->ppFilePath)); + + return new HoaLexer($parser); } protected function createExpressionLanguageTokenParser(): TokenParserInterface diff --git a/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php b/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php index 5c0b87b6c..684c1ba8d 100644 --- a/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php +++ b/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php @@ -13,6 +13,7 @@ namespace Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer; +use Hoa\Compiler\Llk\TreeNode; use InvalidArgumentException; use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\LexerInterface; use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Token; From 02ffbf8aa1e14a825931dc7f296d8fb91a3688f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20FIDRY?= <theo.fidry@gmail.com> Date: Sun, 17 Dec 2017 14:34:58 +0000 Subject: [PATCH 2/3] Update with Hywan latest version of the grammar --- Alice.pp | 207 +++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 154 insertions(+), 53 deletions(-) diff --git a/Alice.pp b/Alice.pp index 4b0e0fb35..06f7fed11 100644 --- a/Alice.pp +++ b/Alice.pp @@ -1,62 +1,163 @@ -// -// LEXEMES -// -%token true true -%token false false -%token null null -%token escape_token \\ -%token string .+ +// All whitespaces matter, except the trailing ones. +%skip trailing_whitespaces \s+$ +// An opening chevron must not be escaped by a backslash. +// Matching an opening chevron changes the namespace from `default` to +// `parameter`. +%token opening_chevron (?<!\\)< -> parameter -// -// RULES -// -value: - string() +// All whitespaces. +%skip parameter:whitespaces \s+ -string: - ::escape_token:: <string> | <escape_token> | <string> +// A closing chevron. +// Matching an closing chevron changes the namespace from `parameter` +// to `default`. +%token parameter:closing_chevron > -> __shift__ +// A variable opening. +%token parameter:opening_variable { -> variable + +// All whitespaces. +%skip variable:whitespaces \s+ + +// An expansion list separator. +%token variable:comma , + +// A range separator +%token variable:range \.\. + +// A range bound. +%token variable:number [+-]?[0-9]+ + +// A variable name can be anything except `}`. +%token variable:name [_\w][_\w\d]* + +// A variable closing. +%token variable:closing_variable } -> __shift__ + +// Opening parenthesis. +%token parameter:opening_parenthesis \( + +// Closing parenthesis. +%token parameter:closing_parenthesis \) + +// Constant string. +%token parameter:string ("|')(.*?)(?<!\\)\1 + +// A comma used to separate items in a list. +%token parameter:comma , + +// A variable or a function name. +%token parameter:name [_\w][_\w\d]* + + +// A reference is prefixed by an `@`. +%token at @ -> reference + +// A star is a glob operator. +%token reference:star \* -> __shift__ + +// A left curly bracket introduces an expansion. +%token reference:opening_expansion { -> expansion + +// All whitespaces. +%skip expansion:whitespaces \s+ + +// A number can be signed or not. +%token expansion:number [-+]?[0-9]+ + +// A range is represented by two dots. +%token expansion:range \.\. + +// A comma is the name separator. +%token expansion:comma , + +// A reference expansion name is just like a reference constant name. +%token expansion:name [_\w][_\w\d]* + +// A right curly bracket closes an expansion. +%token expansion:closing_expansion } -> __shift__ * 2 + +// A reference name is dynamic if some parts of its name are known at runtime. +%token reference:dynamic_name [_\w][_\w\d]*(?=[\*\{]) + +// A constant reference name is not a dynamic reference name. +%token reference:constant_name [_\w][_\w\d]* -> __shift__ + +// Anything is a little bit tricky because it must stop on an +// unescaped opening chevron. Thus: +// .+ +// is wrong because it is greedy. It must be lazy, so: +// .+? // +// However, it does not take into account the opening chevron. Thus: +// .+?(?=<) // +// This is valid but it does not take into account that the opening +// chevron must be unescaped. And now it's funny. Thus: +// (\\<|.)+?(?=<) // +// However, this works if and only if an unescaped opening chevron +// exists on the right. So the right assertion must be `<` or `$`, +// thus: +// (\\<|.)+?(?=(<|$)) // +// The final result contains non-capturing groups for memory concerns. // -// -//%skip space \s -//// Scalars. -//%token true true -//%token false false -//%token null null -//// Strings. -//%token quote_ <{ -> string -//%token string:string [^"]+ -//%token string:_quote }> -> default -//// Objects. -//%token brace_ { -//%token _brace } -//// Arrays. -//%token bracket_ \[ -//%token _bracket \] -//// Rest. -//%token colon : -//%token comma , -//%token number \d+ -// -//value: -// <true> | <false> | <null> | string() | object() | array() | number() -// -//string: -// ::quote_:: <string> ::_quote:: -// -//number: -// <number> -// -//#object: -// ::brace_:: pair() ( ::comma:: pair() )* ::_brace:: -// -//#pair: -// string() ::colon:: value() -// -//#array: -// ::bracket_:: value() ( ::comma:: value() )* ::_bracket:: \ No newline at end of file +// Repeat this reasoning for each Alice opening symbol (like `@`). +%token anything (?:\\<|@@|.)+?(?=(?:<|@|$)) + +#root: + ( anything()? ( parameter() | reference() ) )* anything()? + +#parameter: + ::opening_chevron:: ( variable() | identity() | function() ) ::closing_chevron:: + +#variable: + ::opening_variable:: <name> ::closing_variable:: + +variable_expansion_list: + ::opening_variable:: expansion_list() ::closing_variable:: + +#expansion_list: + <name> ( ::comma:: <name> )* + +variable_range: + ::opening_variable:: range() ::closing_variable:: + +#range: + <number> ::range:: <number> + +#identity: + ::opening_parenthesis:: <name> ::closing_parenthesis:: + +#function: + <name> ::opening_parenthesis:: function_arguments()? ::closing_parenthesis:: + +function_arguments: + function_argument() ( ::comma:: function_argument() )* #arguments + +function_argument: + <string> + +reference: + ::at:: + ( + <constant_name> #constant_reference + | <dynamic_name> + ( + ::star:: #glob_reference + | ::opening_expansion:: + ( reference_range() | reference_list() ) + ::closing_expansion:: #expansion_reference + ) + ) + +reference_range: + <number> ::range:: <number> #range + +reference_list: + <name> ( ::comma:: <name> )* #list + +#anything: + <anything> From a34e41ab160d0b944265c5bbb8d3c27e50d169d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20FIDRY?= <theo.fidry@gmail.com> Date: Sun, 17 Dec 2017 14:54:57 +0000 Subject: [PATCH 3/3] Update --- composer.json | 2 +- .../ExpressionLanguage/Lexer/HoaLexer.php | 18 ++++++++++----- Alice.pp => src/Grammar.pp | 9 ++++++++ src/Loader/NativeLoader.php | 22 +++++++------------ .../Lexer/LexerIntegrationTest.php | 7 +++--- 5 files changed, 34 insertions(+), 24 deletions(-) rename Alice.pp => src/Grammar.pp (95%) diff --git a/composer.json b/composer.json index fbbe8a322..6501adaf2 100644 --- a/composer.json +++ b/composer.json @@ -22,7 +22,7 @@ "require": { "php": "^7.0", "fzaninotto/faker": "^1.6", - "hoa/compiler": "3.17.01.10", + "hoa/compiler": "^3.17", "myclabs/deep-copy": "^1.5.2", "symfony/property-access": "^2.7.11 || ^3.0 || ^4.0", "symfony/yaml": "^2.7 || ^3.0 || ^4.0" diff --git a/src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php b/src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php index 244fec1ab..2140b55ce 100644 --- a/src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php +++ b/src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php @@ -1,16 +1,24 @@ <?php + +/* + * This file is part of the Alice package. + * + * (c) Nelmio <hello@nelm.io> + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + declare(strict_types=1); namespace Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer; use Hoa\Compiler\Llk\Parser as HoaParser; +use Hoa\Compiler\Llk\TreeNode; use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\LexerInterface; final class HoaLexer implements LexerInterface { - /** - * @var HoaParser - */ private $parser; public function __construct(HoaParser $parser) @@ -22,8 +30,8 @@ public function __construct(HoaParser $parser) /** * @inheritdoc */ - public function lex(string $value) + public function lex(string $value): TreeNode { return $this->parser->parse($value); } -} \ No newline at end of file +} diff --git a/Alice.pp b/src/Grammar.pp similarity index 95% rename from Alice.pp rename to src/Grammar.pp index 06f7fed11..5d34c9278 100644 --- a/Alice.pp +++ b/src/Grammar.pp @@ -1,3 +1,12 @@ +// +// This file is part of the Alice package. +// +// (c) Nelmio <hello@nelm.io> +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +// + // All whitespaces matter, except the trailing ones. %skip trailing_whitespaces \s+$ diff --git a/src/Loader/NativeLoader.php b/src/Loader/NativeLoader.php index 2d06c2999..8c0c41d85 100644 --- a/src/Loader/NativeLoader.php +++ b/src/Loader/NativeLoader.php @@ -13,10 +13,9 @@ namespace Nelmio\Alice\Loader; -use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\SimpleDenormalizer as NelmioSimpleDenormalizer; use Faker\Factory as FakerGeneratorFactory; use Faker\Generator as FakerGenerator; -use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\HoaLexer; +use Hoa\Compiler\Llk\Llk; use Hoa\File\Read; use Nelmio\Alice\DataLoaderInterface; use Nelmio\Alice\Faker\Provider\AliceProvider; @@ -27,6 +26,7 @@ use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\NullListNameDenormalizer; use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\NullRangeNameDenormalizer; use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\SimpleCollectionDenormalizer; +use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\SimpleDenormalizer as NelmioSimpleDenormalizer; use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\FixtureDenormalizerInterface; use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\FixtureDenormalizerRegistry; use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\SimpleFixtureBagDenormalizer; @@ -59,13 +59,7 @@ use Nelmio\Alice\FixtureBuilder\Denormalizer\Parameter\SimpleParameterBagDenormalizer; use Nelmio\Alice\FixtureBuilder\Denormalizer\SimpleDenormalizer; use Nelmio\Alice\FixtureBuilder\DenormalizerInterface; -use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\EmptyValueLexer; -use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\FunctionLexer; -use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\GlobalPatternsLexer; -use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\ReferenceEscaperLexer; -use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\ReferenceLexer; -use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\StringThenReferenceLexer; -use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\SubPatternsLexer; +use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\HoaLexer; use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\LexerInterface; use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Parser\FunctionFixtureReferenceParser; use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Parser\SimpleParser; @@ -212,11 +206,12 @@ class NativeLoader implements FilesLoaderInterface, FileLoaderInterface, DataLoa const LOCALE = 'en_US'; /** - * @var string Path to Alice grammar defined in the PP language. + * @Path to Alice grammar defined in the PP language. * + * @protected * @see https://hoa-project.net/En/Literature/Hack/Compiler.html#PP_language */ - protected $ppFilePath = __DIR__.'/../../Alice.pp'; + const GRAMMAR = __DIR__ . '/../../src/Grammar.pp'; private $previous = ''; @@ -395,8 +390,7 @@ protected function createConstructorDenormalizer(): ConstructorDenormalizerInter ), new FactoryDenormalizer( $this->getCallsDenormalizer() - ), - $this->getArgumentsDenormalizer() + ) ); } @@ -450,7 +444,7 @@ protected function createExpressionLanguageParser(): ExpressionLanguageParserInt protected function createLexer(): LexerInterface { - $parser = \Hoa\Compiler\Llk\Llk::load(new Read($this->ppFilePath)); + $parser = Llk::load(new Read(self::GRAMMAR)); return new HoaLexer($parser); } diff --git a/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php b/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php index 684c1ba8d..165be979a 100644 --- a/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php +++ b/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php @@ -73,7 +73,6 @@ public function testCanLexValues(string $value, $expected) } $this->assertEquals($expected, $actual, var_export($actual, true)); - $this->assertSameSize($expected, $actual); } /** @@ -84,9 +83,9 @@ public function provideValues() // simple values yield 'empty string' => [ '', - [ - new Token('', new TokenType(TokenType::STRING_TYPE)), - ], + new TreeNode( + '#root' + ), ]; yield 'regular string value' => [