Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Needs help] Replace in-house lexer by HoaCompiler #712

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"require": {
"php": "^7.0",
"fzaninotto/faker": "^1.6",
"hoa/compiler": "^3.17",
"myclabs/deep-copy": "^1.5.2",
"symfony/property-access": "^2.7.11 || ^3.0 || ^4.0",
"symfony/yaml": "^2.7 || ^3.0 || ^4.0"
Expand Down
37 changes: 37 additions & 0 deletions src/FixtureBuilder/ExpressionLanguage/Lexer/HoaLexer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<?php

/*
* This file is part of the Alice package.
*
* (c) Nelmio <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

declare(strict_types=1);

namespace Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer;

use Hoa\Compiler\Llk\Parser as HoaParser;
use Hoa\Compiler\Llk\TreeNode;
use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\LexerInterface;

final class HoaLexer implements LexerInterface
{
private $parser;

public function __construct(HoaParser $parser)
{

$this->parser = $parser;
}

/**
* @inheritdoc
*/
public function lex(string $value): TreeNode
{
return $this->parser->parse($value);
}
}
2 changes: 1 addition & 1 deletion src/FixtureBuilder/ExpressionLanguage/LexerInterface.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ interface LexerInterface
*
* @return Token[]
*/
public function lex(string $value): array;
public function lex(string $value);
}
172 changes: 172 additions & 0 deletions src/Grammar.pp
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
//
// This file is part of the Alice package.
//
// (c) Nelmio <[email protected]>
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//

// All whitespaces matter, except the trailing ones.
%skip trailing_whitespaces \s+$

// An opening chevron must not be escaped by a backslash.
// Matching an opening chevron changes the namespace from `default` to
// `parameter`.
%token opening_chevron (?<!\\)< -> parameter

// All whitespaces.
%skip parameter:whitespaces \s+

// A closing chevron.
// Matching an closing chevron changes the namespace from `parameter`
// to `default`.
%token parameter:closing_chevron > -> __shift__

// A variable opening.
%token parameter:opening_variable { -> variable

// All whitespaces.
%skip variable:whitespaces \s+

// An expansion list separator.
%token variable:comma ,

// A range separator
%token variable:range \.\.

// A range bound.
%token variable:number [+-]?[0-9]+

// A variable name can be anything except `}`.
%token variable:name [_\w][_\w\d]*

// A variable closing.
%token variable:closing_variable } -> __shift__

// Opening parenthesis.
%token parameter:opening_parenthesis \(

// Closing parenthesis.
%token parameter:closing_parenthesis \)

// Constant string.
%token parameter:string ("|')(.*?)(?<!\\)\1

// A comma used to separate items in a list.
%token parameter:comma ,

// A variable or a function name.
%token parameter:name [_\w][_\w\d]*


// A reference is prefixed by an `@`.
%token at @ -> reference

// A star is a glob operator.
%token reference:star \* -> __shift__

// A left curly bracket introduces an expansion.
%token reference:opening_expansion { -> expansion

// All whitespaces.
%skip expansion:whitespaces \s+

// A number can be signed or not.
%token expansion:number [-+]?[0-9]+

// A range is represented by two dots.
%token expansion:range \.\.

// A comma is the name separator.
%token expansion:comma ,

// A reference expansion name is just like a reference constant name.
%token expansion:name [_\w][_\w\d]*

// A right curly bracket closes an expansion.
%token expansion:closing_expansion } -> __shift__ * 2

// A reference name is dynamic if some parts of its name are known at runtime.
%token reference:dynamic_name [_\w][_\w\d]*(?=[\*\{])

// A constant reference name is not a dynamic reference name.
%token reference:constant_name [_\w][_\w\d]* -> __shift__

// Anything is a little bit tricky because it must stop on an
// unescaped opening chevron. Thus:
// .+
// is wrong because it is greedy. It must be lazy, so:
// .+?
//
// However, it does not take into account the opening chevron. Thus:
// .+?(?=<)
//
// This is valid but it does not take into account that the opening
// chevron must be unescaped. And now it's funny. Thus:
// (\\<|.)+?(?=<)
//
// However, this works if and only if an unescaped opening chevron
// exists on the right. So the right assertion must be `<` or `$`,
// thus:
// (\\<|.)+?(?=(<|$))
//
// The final result contains non-capturing groups for memory concerns.
//
// Repeat this reasoning for each Alice opening symbol (like `@`).
%token anything (?:\\<|@@|.)+?(?=(?:<|@|$))

#root:
( anything()? ( parameter() | reference() ) )* anything()?

#parameter:
::opening_chevron:: ( variable() | identity() | function() ) ::closing_chevron::

#variable:
::opening_variable:: <name> ::closing_variable::

variable_expansion_list:
::opening_variable:: expansion_list() ::closing_variable::

#expansion_list:
<name> ( ::comma:: <name> )*

variable_range:
::opening_variable:: range() ::closing_variable::

#range:
<number> ::range:: <number>

#identity:
::opening_parenthesis:: <name> ::closing_parenthesis::

#function:
<name> ::opening_parenthesis:: function_arguments()? ::closing_parenthesis::

function_arguments:
function_argument() ( ::comma:: function_argument() )* #arguments

function_argument:
<string>

reference:
::at::
(
<constant_name> #constant_reference
| <dynamic_name>
(
::star:: #glob_reference
| ::opening_expansion::
( reference_range() | reference_list() )
::closing_expansion:: #expansion_reference
)
)

reference_range:
<number> ::range:: <number> #range

reference_list:
<name> ( ::comma:: <name> )* #list

#anything:
<anything>
39 changes: 16 additions & 23 deletions src/Loader/NativeLoader.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@

namespace Nelmio\Alice\Loader;

use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\SimpleDenormalizer as NelmioSimpleDenormalizer;
use Faker\Factory as FakerGeneratorFactory;
use Faker\Generator as FakerGenerator;
use Hoa\Compiler\Llk\Llk;
use Hoa\File\Read;
use Nelmio\Alice\DataLoaderInterface;
use Nelmio\Alice\Faker\Provider\AliceProvider;
use Nelmio\Alice\FileLoaderInterface;
Expand All @@ -25,6 +26,7 @@
use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\NullListNameDenormalizer;
use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\NullRangeNameDenormalizer;
use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\SimpleCollectionDenormalizer;
use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\Chainable\SimpleDenormalizer as NelmioSimpleDenormalizer;
use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\FixtureDenormalizerInterface;
use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\FixtureDenormalizerRegistry;
use Nelmio\Alice\FixtureBuilder\Denormalizer\Fixture\SimpleFixtureBagDenormalizer;
Expand Down Expand Up @@ -57,13 +59,7 @@
use Nelmio\Alice\FixtureBuilder\Denormalizer\Parameter\SimpleParameterBagDenormalizer;
use Nelmio\Alice\FixtureBuilder\Denormalizer\SimpleDenormalizer;
use Nelmio\Alice\FixtureBuilder\DenormalizerInterface;
use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\EmptyValueLexer;
use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\FunctionLexer;
use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\GlobalPatternsLexer;
use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\ReferenceEscaperLexer;
use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\ReferenceLexer;
use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\StringThenReferenceLexer;
use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\SubPatternsLexer;
use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer\HoaLexer;
use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\LexerInterface;
use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Parser\FunctionFixtureReferenceParser;
use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Parser\SimpleParser;
Expand Down Expand Up @@ -209,6 +205,14 @@ class NativeLoader implements FilesLoaderInterface, FileLoaderInterface, DataLoa
/** @protected */
const LOCALE = 'en_US';

/**
* @Path to Alice grammar defined in the PP language.
*
* @protected
* @see https://hoa-project.net/En/Literature/Hack/Compiler.html#PP_language
*/
const GRAMMAR = __DIR__ . '/../../src/Grammar.pp';

private $previous = '';

/**
Expand Down Expand Up @@ -386,8 +390,7 @@ protected function createConstructorDenormalizer(): ConstructorDenormalizerInter
),
new FactoryDenormalizer(
$this->getCallsDenormalizer()
),
$this->getArgumentsDenormalizer()
)
);
}

Expand Down Expand Up @@ -441,19 +444,9 @@ protected function createExpressionLanguageParser(): ExpressionLanguageParserInt

protected function createLexer(): LexerInterface
{
return new EmptyValueLexer(
new ReferenceEscaperLexer(
new GlobalPatternsLexer(
new FunctionLexer(
new StringThenReferenceLexer(
new SubPatternsLexer(
new ReferenceLexer()
)
)
)
)
)
);
$parser = Llk::load(new Read(self::GRAMMAR));

return new HoaLexer($parser);
}

protected function createExpressionLanguageTokenParser(): TokenParserInterface
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

namespace Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Lexer;

use Hoa\Compiler\Llk\TreeNode;
use InvalidArgumentException;
use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\LexerInterface;
use Nelmio\Alice\FixtureBuilder\ExpressionLanguage\Token;
Expand Down Expand Up @@ -72,7 +73,6 @@ public function testCanLexValues(string $value, $expected)
}

$this->assertEquals($expected, $actual, var_export($actual, true));
$this->assertSameSize($expected, $actual);
}

/**
Expand All @@ -83,9 +83,9 @@ public function provideValues()
// simple values
yield 'empty string' => [
'',
[
new Token('', new TokenType(TokenType::STRING_TYPE)),
],
new TreeNode(
'#root'
),
];

yield 'regular string value' => [
Expand Down