From f39c15264196b8e8b68564499eb2b682914b833f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Th=C3=A9o=20FIDRY?= <theo.fidry@gmail.com>
Date: Thu, 6 Apr 2017 00:37:23 +0100
Subject: [PATCH 1/3] WIP

---
 Alice.pp                                      | 62 +++++++++++++++++++
 composer.json                                 |  1 +
 .../ExpressionLanguage/Lexer/HoaLexer.php     | 29 +++++++++
 .../ExpressionLanguage/LexerInterface.php     |  2 +-
 src/Loader/NativeLoader.php                   | 25 ++++----
 .../Lexer/LexerIntegrationTest.php            |  1 +
 6 files changed, 106 insertions(+), 14 deletions(-)
 create mode 100644 Alice.pp
 create mode 100644 src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php

diff --git a/Alice.pp b/Alice.pp
new file mode 100644
index 000000000..4b0e0fb35
--- /dev/null
+++ b/Alice.pp
@@ -0,0 +1,62 @@
+//
+// LEXEMES
+//
+%token true true
+%token false false
+%token null null
+%token escape_token \\
+%token string .+
+
+
+//
+// RULES
+//
+value:
+    string()
+
+string:
+    ::escape_token:: <string> | <escape_token> | <string>
+
+//
+//
+//
+//
+//
+//
+//%skip   space          \s
+//// Scalars.
+//%token  true           true
+//%token  false          false
+//%token  null           null
+//// Strings.
+//%token  quote_         <{        -> string
+//%token  string:string  [^"]+
+//%token  string:_quote  }>        -> default
+//// Objects.
+//%token  brace_         {
+//%token _brace          }
+//// Arrays.
+//%token  bracket_       \[
+//%token _bracket        \]
+//// Rest.
+//%token  colon          :
+//%token  comma          ,
+//%token  number         \d+
+//
+//value:
+//    <true> | <false> | <null> | string() | object() | array() | number()
+//
+//string:
+//    ::quote_:: <string> ::_quote::
+//
+//number:
+//    <number>
+//
+//#object:
+//    ::brace_:: pair() ( ::comma:: pair() )* ::_brace::
+//
+//#pair:
+//    string() ::colon:: value()
+//
+//#array:
+//    ::bracket_:: value() ( ::comma:: value() )* ::_bracket::
\ No newline at end of file
diff --git a/composer.json b/composer.json
index fbe6eda22..fbbe8a322 100644
--- a/composer.json
+++ b/composer.json
@@ -22,6 +22,7 @@
     "require": {
         "php": "^7.0",
         "fzaninotto/faker": "^1.6",
+        "hoa/compiler": "3.17.01.10",
         "myclabs/deep-copy": "^1.5.2",
         "symfony/property-access": "^2.7.11 || ^3.0 || ^4.0",
         "symfony/yaml": "^2.7 || ^3.0 || ^4.0"
diff --git a/src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php b/src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php
new file mode 100644
index 000000000..244fec1ab
--- /dev/null
+++ b/src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php
@@ -0,0 +1,29 @@
+<?php
+declare(strict_types=1);
+
+namespace Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer;
+
+use Hoa\Compiler\Llk\Parser as HoaParser;
+use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\LexerInterface;
+
+final class HoaLexer implements LexerInterface
+{
+    /**
+     * @var HoaParser
+     */
+    private $parser;
+
+    public function __construct(HoaParser $parser)
+    {
+
+        $this->parser = $parser;
+    }
+
+    /**
+     * @inheritdoc
+     */
+    public function lex(string $value)
+    {
+        return $this->parser->parse($value);
+    }
+}
\ No newline at end of file
diff --git a/src/FixtureBuilder/ExpressionLanguage/LexerInterface.php b/src/FixtureBuilder/ExpressionLanguage/LexerInterface.php
index 9c55cd2d5..17b652f1d 100644
--- a/src/FixtureBuilder/ExpressionLanguage/LexerInterface.php
+++ b/src/FixtureBuilder/ExpressionLanguage/LexerInterface.php
@@ -29,5 +29,5 @@ interface LexerInterface
      *
      * @return Token[]
      */
-    public function lex(string $value): array;
+    public function lex(string $value);
 }
diff --git a/src/Loader/NativeLoader.php b/src/Loader/NativeLoader.php
index eb2382295..2d06c2999 100644
--- a/src/Loader/NativeLoader.php
+++ b/src/Loader/NativeLoader.php
@@ -16,6 +16,8 @@
 use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\SimpleDenormalizer as NelmioSimpleDenormalizer;
 use Faker\Factory as FakerGeneratorFactory;
 use Faker\Generator as FakerGenerator;
+use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\HoaLexer;
+use Hoa\File\Read;
 use Nelmio\Alice\DataLoaderInterface;
 use Nelmio\Alice\Faker\Provider\AliceProvider;
 use Nelmio\Alice\FileLoaderInterface;
@@ -209,6 +211,13 @@ class NativeLoader implements FilesLoaderInterface, FileLoaderInterface, DataLoa
     /** @protected */
     const LOCALE = 'en_US';
 
+    /**
+     * @var string Path to Alice grammar defined in the PP language.
+     *
+     * @see https://hoa-project.net/En/Literature/Hack/Compiler.html#PP_language
+     */
+    protected $ppFilePath = __DIR__.'/../../Alice.pp';
+
     private $previous = '';
 
     /**
@@ -441,19 +450,9 @@ protected function createExpressionLanguageParser(): ExpressionLanguageParserInt
 
     protected function createLexer(): LexerInterface
     {
-        return new EmptyValueLexer(
-            new ReferenceEscaperLexer(
-                new GlobalPatternsLexer(
-                    new FunctionLexer(
-                        new StringThenReferenceLexer(
-                            new SubPatternsLexer(
-                                new ReferenceLexer()
-                            )
-                        )
-                    )
-                )
-            )
-        );
+        $parser = \Hoa\Compiler\Llk\Llk::load(new Read($this->ppFilePath));
+
+        return new HoaLexer($parser);
     }
 
     protected function createExpressionLanguageTokenParser(): TokenParserInterface
diff --git a/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php b/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php
index 5c0b87b6c..684c1ba8d 100644
--- a/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php
+++ b/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php
@@ -13,6 +13,7 @@
 
 namespace Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer;
 
+use Hoa\Compiler\Llk\TreeNode;
 use InvalidArgumentException;
 use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\LexerInterface;
 use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Token;

From 02ffbf8aa1e14a825931dc7f296d8fb91a3688f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Th=C3=A9o=20FIDRY?= <theo.fidry@gmail.com>
Date: Sun, 17 Dec 2017 14:34:58 +0000
Subject: [PATCH 2/3] Update with Hywan latest version of the grammar

---
 Alice.pp | 207 +++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 154 insertions(+), 53 deletions(-)

diff --git a/Alice.pp b/Alice.pp
index 4b0e0fb35..06f7fed11 100644
--- a/Alice.pp
+++ b/Alice.pp
@@ -1,62 +1,163 @@
-//
-// LEXEMES
-//
-%token true true
-%token false false
-%token null null
-%token escape_token \\
-%token string .+
+// All whitespaces matter, except the trailing ones.
+%skip  trailing_whitespaces    \s+$
 
+// An opening chevron must not be escaped by a backslash.
+// Matching an opening chevron changes the namespace from `default` to
+// `parameter`.
+%token opening_chevron    (?<!\\)<    -> parameter
 
-//
-// RULES
-//
-value:
-    string()
+// All whitespaces.
+%skip parameter:whitespaces    \s+
 
-string:
-    ::escape_token:: <string> | <escape_token> | <string>
+// A closing chevron.
+// Matching an closing chevron changes the namespace from `parameter`
+// to `default`.
+%token parameter:closing_chevron    >    -> __shift__
 
+// A variable opening.
+%token parameter:opening_variable    {    -> variable
+
+// All whitespaces.
+%skip variable:whitespaces    \s+
+
+// An expansion list separator.
+%token variable:comma    ,
+
+// A range separator
+%token variable:range    \.\.
+
+// A range bound.
+%token variable:number    [+-]?[0-9]+
+
+// A variable name can be anything except `}`.
+%token variable:name    [_\w][_\w\d]*
+
+// A variable closing.
+%token variable:closing_variable    }    -> __shift__
+
+// Opening parenthesis.
+%token parameter:opening_parenthesis    \(
+
+// Closing parenthesis.
+%token parameter:closing_parenthesis    \)
+
+// Constant string.
+%token parameter:string    ("|')(.*?)(?<!\\)\1
+
+// A comma used to separate items in a list.
+%token parameter:comma    ,
+
+// A variable or a function name.
+%token parameter:name    [_\w][_\w\d]*
+
+
+// A reference is prefixed by an `@`.
+%token at    @    -> reference
+
+// A star is a glob operator.
+%token reference:star    \*    -> __shift__
+
+// A left curly bracket introduces an expansion.
+%token reference:opening_expansion    {    -> expansion
+
+// All whitespaces.
+%skip expansion:whitespaces    \s+
+
+// A number can be signed or not.
+%token expansion:number    [-+]?[0-9]+
+
+// A range is represented by two dots.
+%token expansion:range    \.\.
+
+// A comma is the name separator.
+%token expansion:comma    ,
+
+// A reference expansion name is just like a reference constant name.
+%token expansion:name    [_\w][_\w\d]*
+
+// A right curly bracket closes an expansion.
+%token expansion:closing_expansion    }    -> __shift__ * 2
+
+// A reference name is dynamic if some parts of its name are known at runtime.
+%token reference:dynamic_name    [_\w][_\w\d]*(?=[\*\{])
+
+// A constant reference name is not a dynamic reference name.
+%token reference:constant_name    [_\w][_\w\d]*    ->    __shift__
+
+// Anything is a little bit tricky because it must stop on an
+// unescaped opening chevron. Thus:
+//      .+
+// is wrong because it is greedy. It must be lazy, so:
+//     .+?
 //
+// However, it does not take into account the opening chevron. Thus:
+//     .+?(?=<)
 //
+// This is valid but it does not take into account that the opening
+// chevron must be unescaped. And now it's funny. Thus:
+//     (\\<|.)+?(?=<)
 //
+// However, this works if and only if an unescaped opening chevron
+// exists on the right. So the right assertion must be `<` or `$`,
+// thus:
+//     (\\<|.)+?(?=(<|$))
 //
+// The final result contains non-capturing groups for memory concerns.
 //
-//
-//%skip   space          \s
-//// Scalars.
-//%token  true           true
-//%token  false          false
-//%token  null           null
-//// Strings.
-//%token  quote_         <{        -> string
-//%token  string:string  [^"]+
-//%token  string:_quote  }>        -> default
-//// Objects.
-//%token  brace_         {
-//%token _brace          }
-//// Arrays.
-//%token  bracket_       \[
-//%token _bracket        \]
-//// Rest.
-//%token  colon          :
-//%token  comma          ,
-//%token  number         \d+
-//
-//value:
-//    <true> | <false> | <null> | string() | object() | array() | number()
-//
-//string:
-//    ::quote_:: <string> ::_quote::
-//
-//number:
-//    <number>
-//
-//#object:
-//    ::brace_:: pair() ( ::comma:: pair() )* ::_brace::
-//
-//#pair:
-//    string() ::colon:: value()
-//
-//#array:
-//    ::bracket_:: value() ( ::comma:: value() )* ::_bracket::
\ No newline at end of file
+// Repeat this reasoning for each Alice opening symbol (like `@`).
+%token anything    (?:\\<|@@|.)+?(?=(?:<|@|$))
+
+#root:
+    ( anything()? ( parameter() | reference() ) )* anything()?
+
+#parameter:
+    ::opening_chevron:: ( variable() | identity() | function() ) ::closing_chevron::
+
+#variable:
+    ::opening_variable:: <name> ::closing_variable::
+
+variable_expansion_list:
+    ::opening_variable:: expansion_list() ::closing_variable::
+
+#expansion_list:
+    <name> ( ::comma:: <name> )*
+
+variable_range:
+    ::opening_variable:: range() ::closing_variable::
+
+#range:
+    <number> ::range:: <number>
+
+#identity:
+    ::opening_parenthesis:: <name> ::closing_parenthesis::
+
+#function:
+    <name> ::opening_parenthesis:: function_arguments()? ::closing_parenthesis::
+
+function_arguments:
+    function_argument() ( ::comma:: function_argument() )* #arguments
+
+function_argument:
+    <string>
+
+reference:
+    ::at::
+    (
+        <constant_name> #constant_reference
+      | <dynamic_name>
+        (
+            ::star:: #glob_reference
+          | ::opening_expansion::
+            ( reference_range() | reference_list() )
+            ::closing_expansion:: #expansion_reference
+        )
+    )
+
+reference_range:
+    <number> ::range:: <number> #range
+
+reference_list:
+    <name> ( ::comma:: <name> )* #list
+
+#anything:
+    <anything>

From a34e41ab160d0b944265c5bbb8d3c27e50d169d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Th=C3=A9o=20FIDRY?= <theo.fidry@gmail.com>
Date: Sun, 17 Dec 2017 14:54:57 +0000
Subject: [PATCH 3/3] Update

---
 composer.json                                 |  2 +-
 .../ExpressionLanguage/Lexer/HoaLexer.php     | 18 ++++++++++-----
 Alice.pp => src/Grammar.pp                    |  9 ++++++++
 src/Loader/NativeLoader.php                   | 22 +++++++------------
 .../Lexer/LexerIntegrationTest.php            |  7 +++---
 5 files changed, 34 insertions(+), 24 deletions(-)
 rename Alice.pp => src/Grammar.pp (95%)

diff --git a/composer.json b/composer.json
index fbbe8a322..6501adaf2 100644
--- a/composer.json
+++ b/composer.json
@@ -22,7 +22,7 @@
     "require": {
         "php": "^7.0",
         "fzaninotto/faker": "^1.6",
-        "hoa/compiler": "3.17.01.10",
+        "hoa/compiler": "^3.17",
         "myclabs/deep-copy": "^1.5.2",
         "symfony/property-access": "^2.7.11 || ^3.0 || ^4.0",
         "symfony/yaml": "^2.7 || ^3.0 || ^4.0"
diff --git a/src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php b/src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php
index 244fec1ab..2140b55ce 100644
--- a/src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php
+++ b/src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php
@@ -1,16 +1,24 @@
 <?php
+
+/*
+ * This file is part of the Alice package.
+ *
+ * (c) Nelmio <hello@nelm.io>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
 declare(strict_types=1);
 
 namespace Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer;
 
 use Hoa\Compiler\Llk\Parser as HoaParser;
+use Hoa\Compiler\Llk\TreeNode;
 use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\LexerInterface;
 
 final class HoaLexer implements LexerInterface
 {
-    /**
-     * @var HoaParser
-     */
     private $parser;
 
     public function __construct(HoaParser $parser)
@@ -22,8 +30,8 @@ public function __construct(HoaParser $parser)
     /**
      * @inheritdoc
      */
-    public function lex(string $value)
+    public function lex(string $value): TreeNode
     {
         return $this->parser->parse($value);
     }
-}
\ No newline at end of file
+}
diff --git a/Alice.pp b/src/Grammar.pp
similarity index 95%
rename from Alice.pp
rename to src/Grammar.pp
index 06f7fed11..5d34c9278 100644
--- a/Alice.pp
+++ b/src/Grammar.pp
@@ -1,3 +1,12 @@
+//
+// This file is part of the Alice package.
+//
+// (c) Nelmio <hello@nelm.io>
+//
+// For the full copyright and license information, please view the LICENSE
+// file that was distributed with this source code.
+//
+
 // All whitespaces matter, except the trailing ones.
 %skip  trailing_whitespaces    \s+$
 
diff --git a/src/Loader/NativeLoader.php b/src/Loader/NativeLoader.php
index 2d06c2999..8c0c41d85 100644
--- a/src/Loader/NativeLoader.php
+++ b/src/Loader/NativeLoader.php
@@ -13,10 +13,9 @@
 
 namespace Nelmio\Alice\Loader;
 
-use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\SimpleDenormalizer as NelmioSimpleDenormalizer;
 use Faker\Factory as FakerGeneratorFactory;
 use Faker\Generator as FakerGenerator;
-use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\HoaLexer;
+use Hoa\Compiler\Llk\Llk;
 use Hoa\File\Read;
 use Nelmio\Alice\DataLoaderInterface;
 use Nelmio\Alice\Faker\Provider\AliceProvider;
@@ -27,6 +26,7 @@
 use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\NullListNameDenormalizer;
 use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\NullRangeNameDenormalizer;
 use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\SimpleCollectionDenormalizer;
+use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\SimpleDenormalizer as NelmioSimpleDenormalizer;
 use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\FixtureDenormalizerInterface;
 use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\FixtureDenormalizerRegistry;
 use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\SimpleFixtureBagDenormalizer;
@@ -59,13 +59,7 @@
 use Nelmio\Alice\FixtureBuilder\Denormalizer\Parameter\SimpleParameterBagDenormalizer;
 use Nelmio\Alice\FixtureBuilder\Denormalizer\SimpleDenormalizer;
 use Nelmio\Alice\FixtureBuilder\DenormalizerInterface;
-use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\EmptyValueLexer;
-use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\FunctionLexer;
-use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\GlobalPatternsLexer;
-use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\ReferenceEscaperLexer;
-use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\ReferenceLexer;
-use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\StringThenReferenceLexer;
-use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\SubPatternsLexer;
+use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\HoaLexer;
 use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\LexerInterface;
 use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Parser\FunctionFixtureReferenceParser;
 use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Parser\SimpleParser;
@@ -212,11 +206,12 @@ class NativeLoader implements FilesLoaderInterface, FileLoaderInterface, DataLoa
     const LOCALE = 'en_US';
 
     /**
-     * @var string Path to Alice grammar defined in the PP language.
+     * @Path to Alice grammar defined in the PP language.
      *
+     * @protected
      * @see https://hoa-project.net/En/Literature/Hack/Compiler.html#PP_language
      */
-    protected $ppFilePath = __DIR__.'/../../Alice.pp';
+    const GRAMMAR = __DIR__ . '/../../src/Grammar.pp';
 
     private $previous = '';
 
@@ -395,8 +390,7 @@ protected function createConstructorDenormalizer(): ConstructorDenormalizerInter
             ),
             new FactoryDenormalizer(
                 $this->getCallsDenormalizer()
-            ),
-            $this->getArgumentsDenormalizer()
+            )
         );
     }
 
@@ -450,7 +444,7 @@ protected function createExpressionLanguageParser(): ExpressionLanguageParserInt
 
     protected function createLexer(): LexerInterface
     {
-        $parser = \Hoa\Compiler\Llk\Llk::load(new Read($this->ppFilePath));
+        $parser = Llk::load(new Read(self::GRAMMAR));
 
         return new HoaLexer($parser);
     }
diff --git a/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php b/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php
index 684c1ba8d..165be979a 100644
--- a/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php
+++ b/tests/FixtureBuilder/ExpressionLanguage/Lexer/LexerIntegrationTest.php
@@ -73,7 +73,6 @@ public function testCanLexValues(string $value, $expected)
         }
 
         $this->assertEquals($expected, $actual, var_export($actual, true));
-        $this->assertSameSize($expected, $actual);
     }
 
     /**
@@ -84,9 +83,9 @@ public function provideValues()
         // simple values
         yield 'empty string' => [
             '',
-            [
-                new Token('', new TokenType(TokenType::STRING_TYPE)),
-            ],
+            new TreeNode(
+                '#root'
+            ),
         ];
 
         yield 'regular string value' => [