1: <?php declare(strict_types = 1);
2:
3: namespace PHPStan\Type\Php;
4:
5: use PhpParser\Node\Expr;
6: use PHPStan\Analyser\Scope;
7: use PHPStan\Php\PhpVersion;
8: use PHPStan\TrinaryLogic;
9: use PHPStan\Type\Accessory\AccessoryArrayListType;
10: use PHPStan\Type\ArrayType;
11: use PHPStan\Type\Constant\ConstantArrayType;
12: use PHPStan\Type\Constant\ConstantArrayTypeBuilder;
13: use PHPStan\Type\Constant\ConstantIntegerType;
14: use PHPStan\Type\Constant\ConstantStringType;
15: use PHPStan\Type\IntegerRangeType;
16: use PHPStan\Type\IntegerType;
17: use PHPStan\Type\NullType;
18: use PHPStan\Type\Regex\RegexAlternation;
19: use PHPStan\Type\Regex\RegexCapturingGroup;
20: use PHPStan\Type\Regex\RegexExpressionHelper;
21: use PHPStan\Type\Regex\RegexGroupParser;
22: use PHPStan\Type\StringType;
23: use PHPStan\Type\Type;
24: use PHPStan\Type\TypeCombinator;
25: use function array_reverse;
26: use function count;
27: use function in_array;
28: use function is_string;
29: use const PREG_OFFSET_CAPTURE;
30: use const PREG_PATTERN_ORDER;
31: use const PREG_SET_ORDER;
32: use const PREG_UNMATCHED_AS_NULL;
33:
34: /**
35: * @api
36: */
37: final class RegexArrayShapeMatcher
38: {
39:
40: /**
41: * Pass this into $flagsType as well if the library supports emulating PREG_UNMATCHED_AS_NULL on PHP 7.2 and 7.3
42: */
43: public const PREG_UNMATCHED_AS_NULL_ON_72_73 = 2048;
44:
45: public function __construct(
46: private RegexGroupParser $regexGroupParser,
47: private RegexExpressionHelper $regexExpressionHelper,
48: private PhpVersion $phpVersion,
49: )
50: {
51: }
52:
53: public function matchAllExpr(Expr $patternExpr, ?Type $flagsType, TrinaryLogic $wasMatched, Scope $scope): ?Type
54: {
55: return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched, true);
56: }
57:
58: public function matchExpr(Expr $patternExpr, ?Type $flagsType, TrinaryLogic $wasMatched, Scope $scope): ?Type
59: {
60: return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched, false);
61: }
62:
63: /**
64: * @deprecated use matchExpr() instead for a more precise result
65: */
66: public function matchType(Type $patternType, ?Type $flagsType, TrinaryLogic $wasMatched): ?Type
67: {
68: return $this->matchPatternType($patternType, $flagsType, $wasMatched, false);
69: }
70:
71: private function matchPatternType(Type $patternType, ?Type $flagsType, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
72: {
73: if ($wasMatched->no()) {
74: return new ConstantArrayType([], []);
75: }
76:
77: $constantStrings = $patternType->getConstantStrings();
78: if (count($constantStrings) === 0) {
79: return null;
80: }
81:
82: $flags = null;
83: if ($flagsType !== null) {
84: if (!$flagsType instanceof ConstantIntegerType) {
85: return null;
86: }
87:
88: /** @var int-mask<PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER | PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73> $flags */
89: $flags = $flagsType->getValue() & (PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER | PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73);
90:
91: // some other unsupported/unexpected flag was passed in
92: if ($flags !== $flagsType->getValue()) {
93: return null;
94: }
95: }
96:
97: $matchedTypes = [];
98: foreach ($constantStrings as $constantString) {
99: $matched = $this->matchRegex($constantString->getValue(), $flags, $wasMatched, $matchesAll);
100: if ($matched === null) {
101: return null;
102: }
103:
104: $matchedTypes[] = $matched;
105: }
106:
107: if (count($matchedTypes) === 1) {
108: return $matchedTypes[0];
109: }
110:
111: return TypeCombinator::union(...$matchedTypes);
112: }
113:
114: /**
115: * @param int-mask<PREG_OFFSET_CAPTURE|PREG_PATTERN_ORDER|PREG_SET_ORDER|PREG_UNMATCHED_AS_NULL|self::PREG_UNMATCHED_AS_NULL_ON_72_73>|null $flags
116: */
117: private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
118: {
119: $parseResult = $this->regexGroupParser->parseGroups($regex);
120: if ($parseResult === null) {
121: // regex could not be parsed by Hoa/Regex
122: return null;
123: }
124: [$groupList, $markVerbs] = $parseResult;
125:
126: $trailingOptionals = 0;
127: foreach (array_reverse($groupList) as $captureGroup) {
128: if (!$captureGroup->isOptional()) {
129: break;
130: }
131: $trailingOptionals++;
132: }
133:
134: $onlyOptionalTopLevelGroup = $this->getOnlyOptionalTopLevelGroup($groupList);
135: $onlyTopLevelAlternation = $this->getOnlyTopLevelAlternation($groupList);
136: $flags ??= 0;
137:
138: if (
139: !$matchesAll
140: && $wasMatched->yes()
141: && $onlyOptionalTopLevelGroup !== null
142: ) {
143: // if only one top level capturing optional group exists
144: // we build a more precise tagged union of a empty-match and a match with the group
145:
146: $onlyOptionalTopLevelGroup->forceNonOptional();
147:
148: $combiType = $this->buildArrayType(
149: $groupList,
150: $wasMatched,
151: $trailingOptionals,
152: $flags,
153: $markVerbs,
154: $matchesAll,
155: );
156:
157: if (!$this->containsUnmatchedAsNull($flags, $matchesAll)) {
158: // positive match has a subject but not any capturing group
159: $combiType = TypeCombinator::union(
160: new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [0], [], true),
161: $combiType,
162: );
163: }
164:
165: $onlyOptionalTopLevelGroup->clearOverrides();
166:
167: return $combiType;
168: } elseif (
169: !$matchesAll
170: && $onlyOptionalTopLevelGroup === null
171: && $onlyTopLevelAlternation !== null
172: && !$wasMatched->no()
173: ) {
174: // if only a single top level alternation exist built a more precise tagged union
175:
176: $combiTypes = [];
177: $isOptionalAlternation = false;
178: foreach ($onlyTopLevelAlternation->getGroupCombinations() as $groupCombo) {
179: $comboList = $groupList;
180:
181: $beforeCurrentCombo = true;
182: foreach ($comboList as $groupId => $group) {
183: if (in_array($groupId, $groupCombo, true)) {
184: $isOptionalAlternation = $group->inOptionalAlternation();
185: $group->forceNonOptional();
186: $beforeCurrentCombo = false;
187: } elseif ($beforeCurrentCombo && !$group->resetsGroupCounter()) {
188: $group->forceNonOptional();
189: $group->forceType(
190: $this->containsUnmatchedAsNull($flags, $matchesAll) ? new NullType() : new ConstantStringType(''),
191: );
192: } elseif (
193: $group->getAlternationId() === $onlyTopLevelAlternation->getId()
194: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
195: ) {
196: unset($comboList[$groupId]);
197: }
198: }
199:
200: $combiType = $this->buildArrayType(
201: $comboList,
202: $wasMatched,
203: $trailingOptionals,
204: $flags,
205: $markVerbs,
206: $matchesAll,
207: );
208:
209: $combiTypes[] = $combiType;
210:
211: foreach ($groupCombo as $groupId) {
212: $group = $comboList[$groupId];
213: $group->clearOverrides();
214: }
215: }
216:
217: if (
218: !$this->containsUnmatchedAsNull($flags, $matchesAll)
219: && (
220: $onlyTopLevelAlternation->getAlternationsCount() !== count($onlyTopLevelAlternation->getGroupCombinations())
221: || $isOptionalAlternation
222: )
223: ) {
224: // positive match has a subject but not any capturing group
225: $combiTypes[] = new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [0], [], true);
226: }
227:
228: return TypeCombinator::union(...$combiTypes);
229: }
230:
231: // the general case, which should work in all cases but does not yield the most
232: // precise result possible in some cases
233: return $this->buildArrayType(
234: $groupList,
235: $wasMatched,
236: $trailingOptionals,
237: $flags,
238: $markVerbs,
239: $matchesAll,
240: );
241: }
242:
243: /**
244: * @param array<int, RegexCapturingGroup> $captureGroups
245: */
246: private function getOnlyOptionalTopLevelGroup(array $captureGroups): ?RegexCapturingGroup
247: {
248: $group = null;
249: foreach ($captureGroups as $captureGroup) {
250: if (!$captureGroup->isTopLevel()) {
251: continue;
252: }
253:
254: if (!$captureGroup->isOptional()) {
255: return null;
256: }
257:
258: if ($group !== null) {
259: return null;
260: }
261:
262: $group = $captureGroup;
263: }
264:
265: return $group;
266: }
267:
268: /**
269: * @param array<int, RegexCapturingGroup> $captureGroups
270: */
271: private function getOnlyTopLevelAlternation(array $captureGroups): ?RegexAlternation
272: {
273: $alternation = null;
274: foreach ($captureGroups as $captureGroup) {
275: if (!$captureGroup->isTopLevel()) {
276: continue;
277: }
278:
279: if (!$captureGroup->inAlternation()) {
280: return null;
281: }
282:
283: if ($alternation === null) {
284: $alternation = $captureGroup->getAlternation();
285: } elseif ($alternation->getId() !== $captureGroup->getAlternation()->getId()) {
286: return null;
287: }
288: }
289:
290: return $alternation;
291: }
292:
293: /**
294: * @param array<RegexCapturingGroup> $captureGroups
295: * @param list<string> $markVerbs
296: */
297: private function buildArrayType(
298: array $captureGroups,
299: TrinaryLogic $wasMatched,
300: int $trailingOptionals,
301: int $flags,
302: array $markVerbs,
303: bool $matchesAll,
304: ): Type
305: {
306: $builder = ConstantArrayTypeBuilder::createEmpty();
307:
308: // first item in matches contains the overall match.
309: $builder->setOffsetValueType(
310: $this->getKeyType(0),
311: $this->createSubjectValueType($flags, $matchesAll),
312: $this->isSubjectOptional($wasMatched, $matchesAll),
313: );
314:
315: $countGroups = count($captureGroups);
316: $i = 0;
317: foreach ($captureGroups as $captureGroup) {
318: $isTrailingOptional = $i >= $countGroups - $trailingOptionals;
319: $isLastGroup = $i === $countGroups - 1;
320: $groupValueType = $this->createGroupValueType($captureGroup, $wasMatched, $flags, $isTrailingOptional, $isLastGroup, $matchesAll);
321: $optional = $this->isGroupOptional($captureGroup, $wasMatched, $flags, $isTrailingOptional, $matchesAll);
322:
323: if ($captureGroup->isNamed()) {
324: $builder->setOffsetValueType(
325: $this->getKeyType($captureGroup->getName()),
326: $groupValueType,
327: $optional,
328: );
329: }
330:
331: $builder->setOffsetValueType(
332: $this->getKeyType($i + 1),
333: $groupValueType,
334: $optional,
335: );
336:
337: $i++;
338: }
339:
340: if (count($markVerbs) > 0) {
341: $markTypes = [];
342: foreach ($markVerbs as $mark) {
343: $markTypes[] = new ConstantStringType($mark);
344: }
345: $builder->setOffsetValueType(
346: $this->getKeyType('MARK'),
347: TypeCombinator::union(...$markTypes),
348: true,
349: );
350: }
351:
352: if ($matchesAll && $this->containsSetOrder($flags)) {
353: $arrayType = AccessoryArrayListType::intersectWith(new ArrayType(new IntegerType(), $builder->getArray()));
354: if (!$wasMatched->yes()) {
355: $arrayType = TypeCombinator::union(
356: new ConstantArrayType([], []),
357: $arrayType,
358: );
359: }
360: return $arrayType;
361: }
362:
363: return $builder->getArray();
364: }
365:
366: private function isSubjectOptional(TrinaryLogic $wasMatched, bool $matchesAll): bool
367: {
368: if ($matchesAll) {
369: return false;
370: }
371:
372: return !$wasMatched->yes();
373: }
374:
375: private function createSubjectValueType(int $flags, bool $matchesAll): Type
376: {
377: $subjectValueType = TypeCombinator::removeNull($this->getValueType(new StringType(), $flags, $matchesAll));
378:
379: if ($matchesAll) {
380: if ($this->containsPatternOrder($flags)) {
381: $subjectValueType = AccessoryArrayListType::intersectWith(new ArrayType(new IntegerType(), $subjectValueType));
382: }
383: }
384:
385: return $subjectValueType;
386: }
387:
388: private function isGroupOptional(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional, bool $matchesAll): bool
389: {
390: if ($matchesAll) {
391: if ($isTrailingOptional && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $this->containsSetOrder($flags)) {
392: return true;
393: }
394:
395: return false;
396: }
397:
398: if (!$wasMatched->yes()) {
399: $optional = true;
400: } else {
401: if (!$isTrailingOptional) {
402: $optional = false;
403: } elseif ($this->containsUnmatchedAsNull($flags, $matchesAll)) {
404: $optional = false;
405: } else {
406: $optional = $captureGroup->isOptional();
407: }
408: }
409:
410: return $optional;
411: }
412:
413: private function createGroupValueType(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional, bool $isLastGroup, bool $matchesAll): Type
414: {
415: if ($matchesAll) {
416: if (!$this->containsSetOrder($flags) && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $captureGroup->isOptional()) {
417: $groupValueType = $this->getValueType(
418: TypeCombinator::union($captureGroup->getType(), new ConstantStringType('')),
419: $flags,
420: $matchesAll,
421: );
422: $groupValueType = TypeCombinator::removeNull($groupValueType);
423: } else {
424: $groupValueType = $this->getValueType($captureGroup->getType(), $flags, $matchesAll);
425: }
426:
427: if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags, $matchesAll) && !$captureGroup->isOptional()) {
428: $groupValueType = TypeCombinator::removeNull($groupValueType);
429: }
430:
431: if ($this->containsPatternOrder($flags)) {
432: $groupValueType = AccessoryArrayListType::intersectWith(new ArrayType(new IntegerType(), $groupValueType));
433: }
434:
435: return $groupValueType;
436: }
437:
438: if (!$isLastGroup && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $captureGroup->isOptional()) {
439: $groupValueType = $this->getValueType(
440: TypeCombinator::union($captureGroup->getType(), new ConstantStringType('')),
441: $flags,
442: $matchesAll,
443: );
444: } else {
445: $groupValueType = $this->getValueType($captureGroup->getType(), $flags, $matchesAll);
446: }
447:
448: if ($wasMatched->yes()) {
449: if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags, $matchesAll) && !$captureGroup->isOptional()) {
450: $groupValueType = TypeCombinator::removeNull($groupValueType);
451: }
452: }
453:
454: return $groupValueType;
455: }
456:
457: private function containsOffsetCapture(int $flags): bool
458: {
459: return ($flags & PREG_OFFSET_CAPTURE) !== 0;
460: }
461:
462: private function containsPatternOrder(int $flags): bool
463: {
464: // If no order flag is given, PREG_PATTERN_ORDER is assumed.
465: return !$this->containsSetOrder($flags);
466: }
467:
468: private function containsSetOrder(int $flags): bool
469: {
470: return ($flags & PREG_SET_ORDER) !== 0;
471: }
472:
473: private function containsUnmatchedAsNull(int $flags, bool $matchesAll): bool
474: {
475: if ($matchesAll) {
476: // preg_match_all() with PREG_UNMATCHED_AS_NULL works consistently across php-versions
477: // https://3v4l.org/tKmPn
478: return ($flags & PREG_UNMATCHED_AS_NULL) !== 0;
479: }
480:
481: return ($flags & PREG_UNMATCHED_AS_NULL) !== 0 && (($flags & self::PREG_UNMATCHED_AS_NULL_ON_72_73) !== 0 || $this->phpVersion->supportsPregUnmatchedAsNull());
482: }
483:
484: private function getKeyType(int|string $key): Type
485: {
486: if (is_string($key)) {
487: return new ConstantStringType($key);
488: }
489:
490: return new ConstantIntegerType($key);
491: }
492:
493: private function getValueType(Type $baseType, int $flags, bool $matchesAll): Type
494: {
495: $valueType = $baseType;
496:
497: // unmatched groups return -1 as offset
498: $offsetType = IntegerRangeType::fromInterval(-1, null);
499: if ($this->containsUnmatchedAsNull($flags, $matchesAll)) {
500: $valueType = TypeCombinator::addNull($valueType);
501: }
502:
503: if ($this->containsOffsetCapture($flags)) {
504: $builder = ConstantArrayTypeBuilder::createEmpty();
505:
506: $builder->setOffsetValueType(
507: new ConstantIntegerType(0),
508: $valueType,
509: );
510: $builder->setOffsetValueType(
511: new ConstantIntegerType(1),
512: $offsetType,
513: );
514:
515: return $builder->getArray();
516: }
517:
518: return $valueType;
519: }
520:
521: private function getPatternType(Expr $patternExpr, Scope $scope): Type
522: {
523: if ($patternExpr instanceof Expr\BinaryOp\Concat) {
524: return $this->regexExpressionHelper->resolvePatternConcat($patternExpr, $scope);
525: }
526:
527: return $scope->getType($patternExpr);
528: }
529:
530: }
531: