Skip to content

Commit

Permalink
Generate whole regexp by default
Browse files Browse the repository at this point in the history
  • Loading branch information
JoshyPHP committed Apr 5, 2022
1 parent db93fc1 commit 759c9e8
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 23 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
s9e\RegexpBuilder is a single-purpose library that generates a regular expression that matches a given list of strings. It is best suited for efficiently finding a list of literals inside of a text.

Simply put, given `['foo', 'bar', 'baz']` as input, the library will generate `(?:ba[rz]|foo)`, a regular expression that can match any of the strings `foo`, `bar`, or `baz`.
Simply put, given `['foo', 'bar', 'baz']` as input, the library will generate `ba[rz]|foo`, a regular expression that can match any of the strings `foo`, `bar`, or `baz`.

[![Build status](https://github.com/s9e/RegexpBuilder/actions/workflows/build.yml/badge.svg)](https://github.com/s9e/RegexpBuilder/actions/workflows/build.yml)
[![Code Coverage](https://scrutinizer-ci.com/g/s9e/RegexpBuilder/badges/coverage.png?b=master)](https://scrutinizer-ci.com/g/s9e/RegexpBuilder/?branch=master)
Expand All @@ -14,7 +14,7 @@ $builder = new s9e\RegexpBuilder\Builder;
echo '/', $builder->build(['foo', 'bar', 'baz']), '/';
```
```
/(?:ba[rz]|foo)/
/ba[rz]|foo/
```


Expand Down Expand Up @@ -181,7 +181,7 @@ $builder = new s9e\RegexpBuilder\Builder([
echo '/', $builder->build(['foo?', 'bar*']), '/';
```
```
/(?:bar.*|foo.)/
/bar.*|foo./
```

In the following example, we map `X` to `\d`. Note that sequences produced by meta-characters may appear in character classes if the result is valid.
Expand Down
8 changes: 7 additions & 1 deletion src/Builder.php
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ class Builder
*/
public Serializer $serializer;

/**
* @var bool Whether the expression generated is meant to be used whole. If not, alternations
* will be put into a non-capturing group
*/
public bool $standalone = true;

/**
* @param array $config
*/
Expand Down Expand Up @@ -80,7 +86,7 @@ public function build(array $strings): string
$strings = $this->meta->replaceMeta($strings);
$strings = $this->runner->run($strings);

return $this->serializer->serializeStrings($strings);
return $this->serializer->serializeStrings($strings, !$this->standalone);
}

/**
Expand Down
19 changes: 14 additions & 5 deletions tests/BuilderTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,15 @@
*/
class BuilderTest extends TestCase
{
public function testStandalone()
{
$builder = new Builder;
$this->assertEquals('bar|foo', $builder->build(['foo', 'bar']));

$builder->standalone = false;
$this->assertEquals('(?:bar|foo)', $builder->build(['foo', 'bar']));
}

/**
* @dataProvider getBuilderTests
*/
Expand All @@ -31,15 +40,15 @@ public function getBuilderTests()
'foo',
'bar'
],
'(?:bar|foo)'
'bar|foo'
],
[
[
'foo',
'fool',
'bar'
],
'(?:bar|fool?)'
'bar|fool?'
],
[
[
Expand Down Expand Up @@ -91,7 +100,7 @@ public function getBuilderTests()
"\xEF\xA4\x80\xEF\xA4\x80",
"\xF0\x9F\x98\x80\xF0\x9F\x98\x80"
],
'(?:\\x{D7FB}\\x{D7FB}|\\x{F900}\\x{F900}|\\x{1F600}\\x{1F600})',
'\\x{D7FB}\\x{D7FB}|\\x{F900}\\x{F900}|\\x{1F600}\\x{1F600}',
['input' => 'Utf8', 'output' => 'PHP']
],
[
Expand All @@ -100,7 +109,7 @@ public function getBuilderTests()
"\xEF\xA4\x80\xEF\xA4\x80",
"\xF0\x9F\x98\x80"
],
'(?:\\uD7FB\\uD7FB|\\uF900\\uF900|\\uD83D\\uDE00)',
'\\uD7FB\\uD7FB|\\uF900\\uF900|\\uD83D\\uDE00',
[
'input' => 'Utf8',
'inputOptions' => ['useSurrogates' => true],
Expand Down Expand Up @@ -129,7 +138,7 @@ public function getBuilderTests()
],
[
["\n", '.'],
'(?:\\n|.)',
'\\n|.',
['meta' => ['.' => '.'], 'output' => 'PHP']
],
[
Expand Down
28 changes: 14 additions & 14 deletions tests/ValidationTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public function getValidationTests()
],
[
// CoalesceSingleCharacterPrefix
'(?:[ab]b|c)',
'[ab]b|c',
['ab', 'bb', 'c']
],
[
Expand Down Expand Up @@ -130,7 +130,7 @@ public function getValidationTests()
['best', 'boost', 'bust']
],
[
'(?:b(?:oo)?st|cool)',
'b(?:oo)?st|cool',
['boost', 'bst', 'cool']
],
[
Expand Down Expand Up @@ -166,13 +166,13 @@ public function getValidationTests()
['axx', 'ayy', 'bbxx', 'bbyy']
],
[
'(?:a(?:xx|yy)|bb(?:xx|yy)|c)',
'a(?:xx|yy)|bb(?:xx|yy)|c',
['axx', 'ayy', 'bbxx', 'bbyy', 'c']
],
[
// Ensure it doesn't become (?:c|(?:a|bb)(?:xx|yy)|azz) even though it would be
// shorter, because having fewer alternations at the top level is more important
'(?:a(?:xx|yy|zz)|bb(?:xx|yy)|c)',
'a(?:xx|yy|zz)|bb(?:xx|yy)|c',
['axx', 'ayy', 'azz', 'bbxx', 'bbyy', 'c']
],
[
Expand All @@ -192,7 +192,7 @@ public function getValidationTests()
['ax', 'ay', 'bx', 'by']
],
[
'(?:[ab][xy]|c)',
'[ab][xy]|c',
['ax', 'ay', 'bx', 'by', 'c']
],
[
Expand All @@ -204,15 +204,15 @@ public function getValidationTests()
['03', '04', '13', '14', '3', '4']
],
[
'(?:a[xy]|bb[xy]|c)',
'a[xy]|bb[xy]|c',
['ax', 'ay', 'bbx', 'bby', 'c']
],
[
'(?:[ab][xy]|c|dd[xy])',
'[ab][xy]|c|dd[xy]',
['ax', 'ay', 'bx', 'by', 'c', 'ddx', 'ddy']
],
[
'(?:[ab][xy]|[cd][XY]|[ef]|gg)',
'[ab][xy]|[cd][XY]|[ef]|gg',
['ax', 'ay', 'bx', 'by', 'cX', 'cY', 'dX', 'dY', 'e', 'f', 'gg']
],
[
Expand All @@ -235,15 +235,15 @@ public function getValidationTests()
[]
],
[
'(?:[yz]|bar|foo)',
'[yz]|bar|foo',
['foo', 'bar', 'y', 'z']
],
[
'(?:[yz]|ba[rz]|foo)',
'[yz]|ba[rz]|foo',
['foo', 'bar', 'baz', 'y', 'z']
],
[
'(?:a(?:a(?:cc|dd))?|bb(?:cc|dd))',
'a(?:a(?:cc|dd))?|bb(?:cc|dd)',
['a', 'aacc', 'aadd', 'bbcc', 'bbdd']
],
[
Expand All @@ -259,7 +259,7 @@ public function getValidationTests()
]
],
[
'(?:[1-7][0-7]?|0)',
'[1-7][0-7]?|0',
array_map('decoct', range(0, 63))
],
[
Expand All @@ -279,7 +279,7 @@ public function getValidationTests()
]
],
[
'(?:12?3?|23?|3)',
'12?3?|23?|3',
['1', '12', '123', '13', '2', '23', '3']
],
[
Expand Down Expand Up @@ -332,7 +332,7 @@ function ($n)
[
// CoalesceSingleCharacterPrefix should ignore expressions that do not represent a
// single character
'(?:[\\dab]x|\\bx|\\d+x|zz)',
'[\\dab]x|\\bx|\\d+x|zz',
['ax', 'bx', '?x', '*x', '#x', 'zz'],
['meta' => ['*' => '\\d+', '#' => '\\b', '?' => '\\d']]
],
Expand Down

0 comments on commit 759c9e8

Please sign in to comment.