1: <?php declare(strict_types = 1);
2:
3: namespace PHPStan\Type\Php;
4:
5: use PhpParser\Node\Expr;
6: use PHPStan\Analyser\Scope;
7: use PHPStan\DependencyInjection\AutowiredService;
8: use PHPStan\Php\PhpVersion;
9: use PHPStan\TrinaryLogic;
10: use PHPStan\Type\Accessory\AccessoryArrayListType;
11: use PHPStan\Type\ArrayType;
12: use PHPStan\Type\Constant\ConstantArrayTypeBuilder;
13: use PHPStan\Type\Constant\ConstantIntegerType;
14: use PHPStan\Type\Constant\ConstantStringType;
15: use PHPStan\Type\IntegerRangeType;
16: use PHPStan\Type\IntegerType;
17: use PHPStan\Type\NullType;
18: use PHPStan\Type\Regex\RegexCapturingGroup;
19: use PHPStan\Type\Regex\RegexExpressionHelper;
20: use PHPStan\Type\Regex\RegexGroupList;
21: use PHPStan\Type\Regex\RegexGroupParser;
22: use PHPStan\Type\StringType;
23: use PHPStan\Type\Type;
24: use PHPStan\Type\TypeCombinator;
25: use function count;
26: use function in_array;
27: use function is_string;
28: use const PREG_OFFSET_CAPTURE;
29: use const PREG_PATTERN_ORDER;
30: use const PREG_SET_ORDER;
31: use const PREG_UNMATCHED_AS_NULL;
32:
33: /**
34: * @api
35: */
36: #[AutowiredService]
37: final class RegexArrayShapeMatcher
38: {
39:
40: /**
41: * Pass this into $flagsType as well if the library supports emulating PREG_UNMATCHED_AS_NULL on PHP 7.2 and 7.3
42: */
43: public const PREG_UNMATCHED_AS_NULL_ON_72_73 = 2048;
44:
45: public function __construct(
46: private RegexGroupParser $regexGroupParser,
47: private RegexExpressionHelper $regexExpressionHelper,
48: private PhpVersion $phpVersion,
49: )
50: {
51: }
52:
53: public function matchAllExpr(Expr $patternExpr, ?Type $flagsType, TrinaryLogic $wasMatched, Scope $scope): ?Type
54: {
55: return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched, true);
56: }
57:
58: public function matchExpr(Expr $patternExpr, ?Type $flagsType, TrinaryLogic $wasMatched, Scope $scope): ?Type
59: {
60: return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched, false);
61: }
62:
63: private function matchPatternType(Type $patternType, ?Type $flagsType, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
64: {
65: if ($wasMatched->no()) {
66: return ConstantArrayTypeBuilder::createEmpty()->getArray();
67: }
68:
69: $constantStrings = $patternType->getConstantStrings();
70: if (count($constantStrings) === 0) {
71: return null;
72: }
73:
74: $flags = null;
75: if ($flagsType !== null) {
76: if (!$flagsType instanceof ConstantIntegerType) {
77: return null;
78: }
79:
80: /** @var int-mask<PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER | PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73> $flags */
81: $flags = $flagsType->getValue() & (PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER | PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73);
82:
83: // some other unsupported/unexpected flag was passed in
84: if ($flags !== $flagsType->getValue()) {
85: return null;
86: }
87: }
88:
89: $matchedTypes = [];
90: foreach ($constantStrings as $constantString) {
91: $matched = $this->matchRegex($constantString->getValue(), $flags, $wasMatched, $matchesAll);
92: if ($matched === null) {
93: return null;
94: }
95:
96: $matchedTypes[] = $matched;
97: }
98:
99: if (count($matchedTypes) === 1) {
100: return $matchedTypes[0];
101: }
102:
103: return TypeCombinator::union(...$matchedTypes);
104: }
105:
106: /**
107: * @param int-mask<PREG_OFFSET_CAPTURE|PREG_PATTERN_ORDER|PREG_SET_ORDER|PREG_UNMATCHED_AS_NULL|self::PREG_UNMATCHED_AS_NULL_ON_72_73>|null $flags
108: */
109: private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
110: {
111: $astWalkResult = $this->regexGroupParser->parseGroups($regex);
112: if ($astWalkResult === null) {
113: // regex could not be parsed by Hoa/Regex
114: return null;
115: }
116: $groupList = $astWalkResult->getCapturingGroups();
117: $markVerbs = $astWalkResult->getMarkVerbs();
118: $subjectBaseType = new StringType();
119: if ($wasMatched->yes()) {
120: $subjectBaseType = $astWalkResult->getSubjectBaseType();
121: }
122:
123: $regexGroupList = new RegexGroupList($groupList);
124: $trailingOptionals = $regexGroupList->countTrailingOptionals();
125: $onlyOptionalTopLevelGroup = $regexGroupList->getOnlyOptionalTopLevelGroup();
126: $onlyTopLevelAlternation = $regexGroupList->getOnlyTopLevelAlternation();
127: $flags ??= 0;
128:
129: if (
130: !$matchesAll
131: && $wasMatched->yes()
132: && $onlyOptionalTopLevelGroup !== null
133: ) {
134: // if only one top level capturing optional group exists
135: // we build a more precise tagged union of a empty-match and a match with the group
136: $regexGroupList = $regexGroupList->forceGroupNonOptional($onlyOptionalTopLevelGroup);
137:
138: $combiType = $this->buildArrayType(
139: $subjectBaseType,
140: $regexGroupList,
141: $wasMatched,
142: $trailingOptionals,
143: $flags,
144: $markVerbs,
145: $matchesAll,
146: );
147:
148: if (!$this->containsUnmatchedAsNull($flags, $matchesAll)) {
149: // positive match has a subject but not any capturing group
150: $builder = ConstantArrayTypeBuilder::createEmpty();
151: $builder->setOffsetValueType(new ConstantIntegerType(0), $this->createSubjectValueType($subjectBaseType, $flags, $matchesAll));
152:
153: $combiType = TypeCombinator::union(
154: $builder->getArray(),
155: $combiType,
156: );
157: }
158:
159: return $combiType;
160: } elseif (
161: !$matchesAll
162: && $onlyOptionalTopLevelGroup === null
163: && $onlyTopLevelAlternation !== null
164: && !$wasMatched->no()
165: ) {
166: // if only a single top level alternation exist built a more precise tagged union
167:
168: $combiTypes = [];
169: $isOptionalAlternation = false;
170: foreach ($onlyTopLevelAlternation->getGroupCombinations() as $groupCombo) {
171: $comboList = new RegexGroupList($groupList);
172:
173: $beforeCurrentCombo = true;
174: foreach ($comboList as $group) {
175: if (in_array($group->getId(), $groupCombo, true)) {
176: $isOptionalAlternation = $group->inOptionalAlternation();
177: $comboList = $comboList->forceGroupNonOptional($group);
178: $beforeCurrentCombo = false;
179: } elseif ($beforeCurrentCombo && !$group->resetsGroupCounter()) {
180: $comboList = $comboList->forceGroupTypeAndNonOptional(
181: $group,
182: $this->containsUnmatchedAsNull($flags, $matchesAll) ? new NullType() : new ConstantStringType(''),
183: );
184: } elseif (
185: $group->getAlternationId() === $onlyTopLevelAlternation->getId()
186: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
187: ) {
188: $comboList = $comboList->removeGroup($group);
189: }
190: }
191:
192: $combiType = $this->buildArrayType(
193: $subjectBaseType,
194: $comboList,
195: $wasMatched,
196: $trailingOptionals,
197: $flags,
198: $markVerbs,
199: $matchesAll,
200: );
201:
202: $combiTypes[] = $combiType;
203: }
204:
205: if (
206: !$this->containsUnmatchedAsNull($flags, $matchesAll)
207: && (
208: $onlyTopLevelAlternation->getAlternationsCount() !== count($onlyTopLevelAlternation->getGroupCombinations())
209: || $isOptionalAlternation
210: )
211: ) {
212: // positive match has a subject but not any capturing group
213: $builder = ConstantArrayTypeBuilder::createEmpty();
214: $builder->setOffsetValueType(new ConstantIntegerType(0), $this->createSubjectValueType($subjectBaseType, $flags, $matchesAll));
215:
216: $combiTypes[] = $builder->getArray();
217: }
218:
219: return TypeCombinator::union(...$combiTypes);
220: }
221:
222: // the general case, which should work in all cases but does not yield the most
223: // precise result possible in some cases
224: return $this->buildArrayType(
225: $subjectBaseType,
226: $regexGroupList,
227: $wasMatched,
228: $trailingOptionals,
229: $flags,
230: $markVerbs,
231: $matchesAll,
232: );
233: }
234:
235: /**
236: * @param list<string> $markVerbs
237: */
238: private function buildArrayType(
239: Type $subjectBaseType,
240: RegexGroupList $captureGroups,
241: TrinaryLogic $wasMatched,
242: int $trailingOptionals,
243: int $flags,
244: array $markVerbs,
245: bool $matchesAll,
246: ): Type
247: {
248: $forceList = count($markVerbs) === 0;
249: $builder = ConstantArrayTypeBuilder::createEmpty();
250:
251: // first item in matches contains the overall match.
252: $builder->setOffsetValueType(
253: $this->getKeyType(0),
254: $this->createSubjectValueType($subjectBaseType, $flags, $matchesAll),
255: $this->isSubjectOptional($wasMatched, $matchesAll),
256: );
257:
258: $countGroups = count($captureGroups);
259: $i = 0;
260: foreach ($captureGroups as $captureGroup) {
261: $isTrailingOptional = $i >= $countGroups - $trailingOptionals;
262: $isLastGroup = $i === $countGroups - 1;
263: $groupValueType = $this->createGroupValueType($captureGroup, $wasMatched, $flags, $isTrailingOptional, $isLastGroup, $matchesAll);
264: $optional = $this->isGroupOptional($captureGroup, $wasMatched, $flags, $isTrailingOptional, $matchesAll);
265:
266: if ($captureGroup->isNamed()) {
267: $forceList = false;
268:
269: $builder->setOffsetValueType(
270: $this->getKeyType($captureGroup->getName()),
271: $groupValueType,
272: $optional,
273: );
274: }
275:
276: $builder->setOffsetValueType(
277: $this->getKeyType($i + 1),
278: $groupValueType,
279: $optional,
280: );
281:
282: $i++;
283: }
284:
285: if (count($markVerbs) > 0) {
286: $markTypes = [];
287: foreach ($markVerbs as $mark) {
288: $markTypes[] = new ConstantStringType($mark);
289: }
290: $builder->setOffsetValueType(
291: $this->getKeyType('MARK'),
292: TypeCombinator::union(...$markTypes),
293: true,
294: );
295: }
296:
297: if ($matchesAll && $this->containsSetOrder($flags)) {
298: $arrayType = TypeCombinator::intersect(new ArrayType(new IntegerType(), $builder->getArray()), new AccessoryArrayListType());
299: if (!$wasMatched->yes()) {
300: $arrayType = TypeCombinator::union(
301: ConstantArrayTypeBuilder::createEmpty()->getArray(),
302: $arrayType,
303: );
304: }
305: return $arrayType;
306: }
307:
308: if ($forceList) {
309: return TypeCombinator::intersect($builder->getArray(), new AccessoryArrayListType());
310: }
311:
312: return $builder->getArray();
313: }
314:
315: private function isSubjectOptional(TrinaryLogic $wasMatched, bool $matchesAll): bool
316: {
317: if ($matchesAll) {
318: return false;
319: }
320:
321: return !$wasMatched->yes();
322: }
323:
324: /**
325: * @param Type $baseType A string type (or string variant) representing the subject of the match
326: */
327: private function createSubjectValueType(Type $baseType, int $flags, bool $matchesAll): Type
328: {
329: $subjectValueType = TypeCombinator::removeNull($this->getValueType($baseType, $flags, $matchesAll));
330:
331: if ($matchesAll) {
332: $subjectValueType = TypeCombinator::removeNull($this->getValueType(new StringType(), $flags, $matchesAll));
333:
334: if ($this->containsPatternOrder($flags)) {
335: $subjectValueType = TypeCombinator::intersect(
336: new ArrayType(new IntegerType(), $subjectValueType),
337: new AccessoryArrayListType(),
338: );
339: }
340: }
341:
342: return $subjectValueType;
343: }
344:
345: private function isGroupOptional(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional, bool $matchesAll): bool
346: {
347: if ($matchesAll) {
348: if ($isTrailingOptional && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $this->containsSetOrder($flags)) {
349: return true;
350: }
351:
352: return false;
353: }
354:
355: if (!$wasMatched->yes()) {
356: $optional = true;
357: } else {
358: if (!$isTrailingOptional) {
359: $optional = false;
360: } elseif ($this->containsUnmatchedAsNull($flags, $matchesAll)) {
361: $optional = false;
362: } else {
363: $optional = $captureGroup->isOptional();
364: }
365: }
366:
367: return $optional;
368: }
369:
370: private function createGroupValueType(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional, bool $isLastGroup, bool $matchesAll): Type
371: {
372: if ($matchesAll) {
373: if (
374: (
375: !$this->containsSetOrder($flags)
376: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
377: && $captureGroup->isOptional()
378: )
379: ||
380: (
381: $this->containsSetOrder($flags)
382: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
383: && $captureGroup->isOptional()
384: && !$isTrailingOptional
385: )
386: ) {
387: $groupValueType = $this->getValueType(
388: TypeCombinator::union($captureGroup->getType(), new ConstantStringType('')),
389: $flags,
390: $matchesAll,
391: );
392: $groupValueType = TypeCombinator::removeNull($groupValueType);
393: } else {
394: $groupValueType = $this->getValueType($captureGroup->getType(), $flags, $matchesAll);
395: }
396:
397: if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags, $matchesAll) && !$captureGroup->isOptional()) {
398: $groupValueType = TypeCombinator::removeNull($groupValueType);
399: }
400:
401: if ($this->containsPatternOrder($flags)) {
402: $groupValueType = TypeCombinator::intersect(new ArrayType(new IntegerType(), $groupValueType), new AccessoryArrayListType());
403: }
404:
405: return $groupValueType;
406: }
407:
408: if (!$isLastGroup && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $captureGroup->isOptional()) {
409: $groupValueType = $this->getValueType(
410: TypeCombinator::union($captureGroup->getType(), new ConstantStringType('')),
411: $flags,
412: $matchesAll,
413: );
414: } else {
415: $groupValueType = $this->getValueType($captureGroup->getType(), $flags, $matchesAll);
416: }
417:
418: if ($wasMatched->yes()) {
419: if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags, $matchesAll) && !$captureGroup->isOptional()) {
420: $groupValueType = TypeCombinator::removeNull($groupValueType);
421: }
422: }
423:
424: return $groupValueType;
425: }
426:
427: private function containsOffsetCapture(int $flags): bool
428: {
429: return ($flags & PREG_OFFSET_CAPTURE) !== 0;
430: }
431:
432: private function containsPatternOrder(int $flags): bool
433: {
434: // If no order flag is given, PREG_PATTERN_ORDER is assumed.
435: return !$this->containsSetOrder($flags);
436: }
437:
438: private function containsSetOrder(int $flags): bool
439: {
440: return ($flags & PREG_SET_ORDER) !== 0;
441: }
442:
443: private function containsUnmatchedAsNull(int $flags, bool $matchesAll): bool
444: {
445: if ($matchesAll) {
446: // preg_match_all() with PREG_UNMATCHED_AS_NULL works consistently across php-versions
447: // https://3v4l.org/tKmPn
448: return ($flags & PREG_UNMATCHED_AS_NULL) !== 0;
449: }
450:
451: return ($flags & PREG_UNMATCHED_AS_NULL) !== 0 && (($flags & self::PREG_UNMATCHED_AS_NULL_ON_72_73) !== 0 || $this->phpVersion->supportsPregUnmatchedAsNull());
452: }
453:
454: private function getKeyType(int|string $key): Type
455: {
456: if (is_string($key)) {
457: return new ConstantStringType($key);
458: }
459:
460: return new ConstantIntegerType($key);
461: }
462:
463: private function getValueType(Type $baseType, int $flags, bool $matchesAll): Type
464: {
465: $valueType = $baseType;
466:
467: // unmatched groups return -1 as offset
468: $offsetType = IntegerRangeType::fromInterval(-1, null);
469: if ($this->containsUnmatchedAsNull($flags, $matchesAll)) {
470: $valueType = TypeCombinator::addNull($valueType);
471: }
472:
473: if ($this->containsOffsetCapture($flags)) {
474: $builder = ConstantArrayTypeBuilder::createEmpty();
475:
476: $builder->setOffsetValueType(
477: new ConstantIntegerType(0),
478: $valueType,
479: );
480: $builder->setOffsetValueType(
481: new ConstantIntegerType(1),
482: $offsetType,
483: );
484:
485: return $builder->getArray();
486: }
487:
488: return $valueType;
489: }
490:
491: private function getPatternType(Expr $patternExpr, Scope $scope): Type
492: {
493: if ($patternExpr instanceof Expr\BinaryOp\Concat) {
494: return $this->regexExpressionHelper->resolvePatternConcat($patternExpr, $scope);
495: }
496:
497: return $scope->getType($patternExpr);
498: }
499:
500: }
501: