Skip to content

Commit f2f2ddf

Browse files
authored
RegexArrayShapeMatcher - more precise subject types
1 parent a3ce38e commit f2f2ddf

16 files changed

+364
-240
lines changed

src/Type/Php/RegexArrayShapeMatcher.php

+26-9
Original file line numberDiff line numberDiff line change
@@ -107,12 +107,17 @@ private function matchPatternType(Type $patternType, ?Type $flagsType, TrinaryLo
107107
*/
108108
private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
109109
{
110-
$parseResult = $this->regexGroupParser->parseGroups($regex);
111-
if ($parseResult === null) {
110+
$astWalkResult = $this->regexGroupParser->parseGroups($regex);
111+
if ($astWalkResult === null) {
112112
// regex could not be parsed by Hoa/Regex
113113
return null;
114114
}
115-
[$groupList, $markVerbs] = $parseResult;
115+
$groupList = $astWalkResult->getCapturingGroups();
116+
$markVerbs = $astWalkResult->getMarkVerbs();
117+
$subjectBaseType = new StringType();
118+
if ($wasMatched->yes()) {
119+
$subjectBaseType = $astWalkResult->getSubjectBaseType();
120+
}
116121

117122
$regexGroupList = new RegexGroupList($groupList);
118123
$trailingOptionals = $regexGroupList->countTrailingOptionals();
@@ -130,6 +135,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
130135
$regexGroupList = $regexGroupList->forceGroupNonOptional($onlyOptionalTopLevelGroup);
131136

132137
$combiType = $this->buildArrayType(
138+
$subjectBaseType,
133139
$regexGroupList,
134140
$wasMatched,
135141
$trailingOptionals,
@@ -141,7 +147,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
141147
if (!$this->containsUnmatchedAsNull($flags, $matchesAll)) {
142148
// positive match has a subject but not any capturing group
143149
$combiType = TypeCombinator::union(
144-
new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [1], [], TrinaryLogic::createYes()),
150+
new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($subjectBaseType, $flags, $matchesAll)], [1], [], TrinaryLogic::createYes()),
145151
$combiType,
146152
);
147153
}
@@ -180,6 +186,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
180186
}
181187

182188
$combiType = $this->buildArrayType(
189+
$subjectBaseType,
183190
$comboList,
184191
$wasMatched,
185192
$trailingOptionals,
@@ -199,7 +206,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
199206
)
200207
) {
201208
// positive match has a subject but not any capturing group
202-
$combiTypes[] = new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [1], [], TrinaryLogic::createYes());
209+
$combiTypes[] = new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($subjectBaseType, $flags, $matchesAll)], [1], [], TrinaryLogic::createYes());
203210
}
204211

205212
return TypeCombinator::union(...$combiTypes);
@@ -208,6 +215,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
208215
// the general case, which should work in all cases but does not yield the most
209216
// precise result possible in some cases
210217
return $this->buildArrayType(
218+
$subjectBaseType,
211219
$regexGroupList,
212220
$wasMatched,
213221
$trailingOptionals,
@@ -221,6 +229,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
221229
* @param list<string> $markVerbs
222230
*/
223231
private function buildArrayType(
232+
Type $subjectBaseType,
224233
RegexGroupList $captureGroups,
225234
TrinaryLogic $wasMatched,
226235
int $trailingOptionals,
@@ -234,7 +243,7 @@ private function buildArrayType(
234243
// first item in matches contains the overall match.
235244
$builder->setOffsetValueType(
236245
$this->getKeyType(0),
237-
$this->createSubjectValueType($flags, $matchesAll),
246+
$this->createSubjectValueType($subjectBaseType, $flags, $matchesAll),
238247
$this->isSubjectOptional($wasMatched, $matchesAll),
239248
);
240249

@@ -298,13 +307,21 @@ private function isSubjectOptional(TrinaryLogic $wasMatched, bool $matchesAll):
298307
return !$wasMatched->yes();
299308
}
300309

301-
private function createSubjectValueType(int $flags, bool $matchesAll): Type
310+
/**
311+
* @param Type $baseType A string type (or string variant) representing the subject of the match
312+
*/
313+
private function createSubjectValueType(Type $baseType, int $flags, bool $matchesAll): Type
302314
{
303-
$subjectValueType = TypeCombinator::removeNull($this->getValueType(new StringType(), $flags, $matchesAll));
315+
$subjectValueType = TypeCombinator::removeNull($this->getValueType($baseType, $flags, $matchesAll));
304316

305317
if ($matchesAll) {
318+
$subjectValueType = TypeCombinator::removeNull($this->getValueType(new StringType(), $flags, $matchesAll));
319+
306320
if ($this->containsPatternOrder($flags)) {
307-
$subjectValueType = TypeCombinator::intersect(new ArrayType(new IntegerType(), $subjectValueType), new AccessoryArrayListType());
321+
$subjectValueType = TypeCombinator::intersect(
322+
new ArrayType(new IntegerType(), $subjectValueType),
323+
new AccessoryArrayListType(),
324+
);
308325
}
309326
}
310327

src/Type/Regex/RegexAstWalkResult.php

+25
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
namespace PHPStan\Type\Regex;
44

5+
use PHPStan\Type\StringType;
6+
use PHPStan\Type\Type;
7+
58
/** @immutable */
69
final class RegexAstWalkResult
710
{
@@ -15,6 +18,7 @@ public function __construct(
1518
private int $captureGroupId,
1619
private array $capturingGroups,
1720
private array $markVerbs,
21+
private Type $subjectBaseType,
1822
)
1923
{
2024
}
@@ -27,6 +31,7 @@ public static function createEmpty(): self
2731
100,
2832
[],
2933
[],
34+
new StringType(),
3035
);
3136
}
3237

@@ -37,6 +42,7 @@ public function nextAlternationId(): self
3742
$this->captureGroupId,
3843
$this->capturingGroups,
3944
$this->markVerbs,
45+
$this->subjectBaseType,
4046
);
4147
}
4248

@@ -47,6 +53,7 @@ public function nextCaptureGroupId(): self
4753
$this->captureGroupId + 1,
4854
$this->capturingGroups,
4955
$this->markVerbs,
56+
$this->subjectBaseType,
5057
);
5158
}
5259

@@ -60,6 +67,7 @@ public function addCapturingGroup(RegexCapturingGroup $group): self
6067
$this->captureGroupId,
6168
$capturingGroups,
6269
$this->markVerbs,
70+
$this->subjectBaseType,
6371
);
6472
}
6573

@@ -73,6 +81,18 @@ public function markVerb(string $markVerb): self
7381
$this->captureGroupId,
7482
$this->capturingGroups,
7583
$verbs,
84+
$this->subjectBaseType,
85+
);
86+
}
87+
88+
public function withSubjectBaseType(Type $subjectBaseType): self
89+
{
90+
return new self(
91+
$this->alternationId,
92+
$this->captureGroupId,
93+
$this->capturingGroups,
94+
$this->markVerbs,
95+
$subjectBaseType,
7696
);
7797
}
7898

@@ -102,4 +122,9 @@ public function getMarkVerbs(): array
102122
return $this->markVerbs;
103123
}
104124

125+
public function getSubjectBaseType(): Type
126+
{
127+
return $this->subjectBaseType;
128+
}
129+
105130
}

src/Type/Regex/RegexGroupParser.php

+23-5
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,7 @@ public function __construct(
4949
{
5050
}
5151

52-
/**
53-
* @return array{array<int, RegexCapturingGroup>, list<string>}|null
54-
*/
55-
public function parseGroups(string $regex): ?array
52+
public function parseGroups(string $regex): ?RegexAstWalkResult
5653
{
5754
if (self::$parser === null) {
5855
/** @throws void */
@@ -105,7 +102,28 @@ public function parseGroups(string $regex): ?array
105102
RegexAstWalkResult::createEmpty(),
106103
);
107104

108-
return [$astWalkResult->getCapturingGroups(), $astWalkResult->getMarkVerbs()];
105+
$subjectAsGroupResult = $this->walkGroupAst(
106+
$ast,
107+
false,
108+
false,
109+
$modifiers,
110+
RegexGroupWalkResult::createEmpty(),
111+
);
112+
113+
if (!$subjectAsGroupResult->mightContainEmptyStringLiteral()) {
114+
// we could handle numeric-string, in case we know the regex is delimited by ^ and $
115+
if ($subjectAsGroupResult->isNonFalsy()->yes()) {
116+
$astWalkResult = $astWalkResult->withSubjectBaseType(
117+
TypeCombinator::intersect(new StringType(), new AccessoryNonFalsyStringType()),
118+
);
119+
} elseif ($subjectAsGroupResult->isNonEmpty()->yes()) {
120+
$astWalkResult = $astWalkResult->withSubjectBaseType(
121+
TypeCombinator::intersect(new StringType(), new AccessoryNonEmptyStringType()),
122+
);
123+
}
124+
}
125+
126+
return $astWalkResult;
109127
}
110128

111129
private function createEmptyTokenTreeNode(TreeNode $parentAst): TreeNode

src/Type/Regex/RegexGroupWalkResult.php

+14
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,20 @@ public function getOnlyLiterals(): ?array
103103
return $this->onlyLiterals;
104104
}
105105

106+
public function mightContainEmptyStringLiteral(): bool
107+
{
108+
if ($this->onlyLiterals === null) {
109+
return false;
110+
}
111+
foreach ($this->onlyLiterals as $onlyLiteral) {
112+
if ($onlyLiteral === '') {
113+
return true;
114+
}
115+
}
116+
117+
return false;
118+
}
119+
106120
public function isNonEmpty(): TrinaryLogic
107121
{
108122
return $this->isNonEmpty;

tests/PHPStan/Analyser/nsrt/bug-11293.php

+6-6
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,21 @@ class HelloWorld
99
public function sayHello(string $s): void
1010
{
1111
if (preg_match('/data-(\d{6})\.json$/', $s, $matches) > 0) {
12-
assertType('array{string, non-falsy-string&numeric-string}', $matches);
12+
assertType('array{non-falsy-string, non-falsy-string&numeric-string}', $matches);
1313
}
1414
}
1515

1616
public function sayHello2(string $s): void
1717
{
1818
if (preg_match('/data-(\d{6})\.json$/', $s, $matches) === 1) {
19-
assertType('array{string, non-falsy-string&numeric-string}', $matches);
19+
assertType('array{non-falsy-string, non-falsy-string&numeric-string}', $matches);
2020
}
2121
}
2222

2323
public function sayHello3(string $s): void
2424
{
2525
if (preg_match('/data-(\d{6})\.json$/', $s, $matches) >= 1) {
26-
assertType('array{string, non-falsy-string&numeric-string}', $matches);
26+
assertType('array{non-falsy-string, non-falsy-string&numeric-string}', $matches);
2727
}
2828
}
2929

@@ -35,7 +35,7 @@ public function sayHello4(string $s): void
3535
return;
3636
}
3737

38-
assertType('array{string, non-falsy-string&numeric-string}', $matches);
38+
assertType('array{non-falsy-string, non-falsy-string&numeric-string}', $matches);
3939
}
4040

4141
public function sayHello5(string $s): void
@@ -46,7 +46,7 @@ public function sayHello5(string $s): void
4646
return;
4747
}
4848

49-
assertType('array{string, non-falsy-string&numeric-string}', $matches);
49+
assertType('array{non-falsy-string, non-falsy-string&numeric-string}', $matches);
5050
}
5151

5252
public function sayHello6(string $s): void
@@ -57,6 +57,6 @@ public function sayHello6(string $s): void
5757
return;
5858
}
5959

60-
assertType('array{string, non-falsy-string&numeric-string}', $matches);
60+
assertType('array{non-falsy-string, non-falsy-string&numeric-string}', $matches);
6161
}
6262
}

0 commit comments

Comments
 (0)