1: <?php declare(strict_types = 1);
2:
3: namespace PHPStan\Type\Php;
4:
5: use PhpParser\Node\Expr;
6: use PHPStan\Analyser\Scope;
7: use PHPStan\Php\PhpVersion;
8: use PHPStan\TrinaryLogic;
9: use PHPStan\Type\Accessory\AccessoryArrayListType;
10: use PHPStan\Type\ArrayType;
11: use PHPStan\Type\Constant\ConstantArrayType;
12: use PHPStan\Type\Constant\ConstantArrayTypeBuilder;
13: use PHPStan\Type\Constant\ConstantIntegerType;
14: use PHPStan\Type\Constant\ConstantStringType;
15: use PHPStan\Type\IntegerRangeType;
16: use PHPStan\Type\IntegerType;
17: use PHPStan\Type\NullType;
18: use PHPStan\Type\Regex\RegexAlternation;
19: use PHPStan\Type\Regex\RegexCapturingGroup;
20: use PHPStan\Type\Regex\RegexExpressionHelper;
21: use PHPStan\Type\Regex\RegexGroupParser;
22: use PHPStan\Type\StringType;
23: use PHPStan\Type\Type;
24: use PHPStan\Type\TypeCombinator;
25: use function array_reverse;
26: use function count;
27: use function in_array;
28: use function is_string;
29: use const PREG_OFFSET_CAPTURE;
30: use const PREG_PATTERN_ORDER;
31: use const PREG_SET_ORDER;
32: use const PREG_UNMATCHED_AS_NULL;
33:
34: /**
35: * @api
36: */
37: final class RegexArrayShapeMatcher
38: {
39:
40: /**
41: * Pass this into $flagsType as well if the library supports emulating PREG_UNMATCHED_AS_NULL on PHP 7.2 and 7.3
42: */
43: public const PREG_UNMATCHED_AS_NULL_ON_72_73 = 2048;
44:
45: public function __construct(
46: private RegexGroupParser $regexGroupParser,
47: private RegexExpressionHelper $regexExpressionHelper,
48: private PhpVersion $phpVersion,
49: )
50: {
51: }
52:
53: public function matchAllExpr(Expr $patternExpr, ?Type $flagsType, TrinaryLogic $wasMatched, Scope $scope): ?Type
54: {
55: return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched, true);
56: }
57:
58: public function matchExpr(Expr $patternExpr, ?Type $flagsType, TrinaryLogic $wasMatched, Scope $scope): ?Type
59: {
60: return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched, false);
61: }
62:
63: private function matchPatternType(Type $patternType, ?Type $flagsType, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
64: {
65: if ($wasMatched->no()) {
66: return new ConstantArrayType([], []);
67: }
68:
69: $constantStrings = $patternType->getConstantStrings();
70: if (count($constantStrings) === 0) {
71: return null;
72: }
73:
74: $flags = null;
75: if ($flagsType !== null) {
76: if (!$flagsType instanceof ConstantIntegerType) {
77: return null;
78: }
79:
80: /** @var int-mask<PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER | PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73> $flags */
81: $flags = $flagsType->getValue() & (PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER | PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73);
82:
83: // some other unsupported/unexpected flag was passed in
84: if ($flags !== $flagsType->getValue()) {
85: return null;
86: }
87: }
88:
89: $matchedTypes = [];
90: foreach ($constantStrings as $constantString) {
91: $matched = $this->matchRegex($constantString->getValue(), $flags, $wasMatched, $matchesAll);
92: if ($matched === null) {
93: return null;
94: }
95:
96: $matchedTypes[] = $matched;
97: }
98:
99: if (count($matchedTypes) === 1) {
100: return $matchedTypes[0];
101: }
102:
103: return TypeCombinator::union(...$matchedTypes);
104: }
105:
106: /**
107: * @param int-mask<PREG_OFFSET_CAPTURE|PREG_PATTERN_ORDER|PREG_SET_ORDER|PREG_UNMATCHED_AS_NULL|self::PREG_UNMATCHED_AS_NULL_ON_72_73>|null $flags
108: */
109: private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
110: {
111: $parseResult = $this->regexGroupParser->parseGroups($regex);
112: if ($parseResult === null) {
113: // regex could not be parsed by Hoa/Regex
114: return null;
115: }
116: [$groupList, $markVerbs] = $parseResult;
117:
118: $trailingOptionals = 0;
119: foreach (array_reverse($groupList) as $captureGroup) {
120: if (!$captureGroup->isOptional()) {
121: break;
122: }
123: $trailingOptionals++;
124: }
125:
126: $onlyOptionalTopLevelGroup = $this->getOnlyOptionalTopLevelGroup($groupList);
127: $onlyTopLevelAlternation = $this->getOnlyTopLevelAlternation($groupList);
128: $flags ??= 0;
129:
130: if (
131: !$matchesAll
132: && $wasMatched->yes()
133: && $onlyOptionalTopLevelGroup !== null
134: ) {
135: // if only one top level capturing optional group exists
136: // we build a more precise tagged union of a empty-match and a match with the group
137:
138: $onlyOptionalTopLevelGroup->forceNonOptional();
139:
140: $combiType = $this->buildArrayType(
141: $groupList,
142: $wasMatched,
143: $trailingOptionals,
144: $flags,
145: $markVerbs,
146: $matchesAll,
147: );
148:
149: if (!$this->containsUnmatchedAsNull($flags, $matchesAll)) {
150: // positive match has a subject but not any capturing group
151: $combiType = TypeCombinator::union(
152: new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [1], [], TrinaryLogic::createYes()),
153: $combiType,
154: );
155: }
156:
157: $onlyOptionalTopLevelGroup->clearOverrides();
158:
159: return $combiType;
160: } elseif (
161: !$matchesAll
162: && $onlyOptionalTopLevelGroup === null
163: && $onlyTopLevelAlternation !== null
164: && !$wasMatched->no()
165: ) {
166: // if only a single top level alternation exist built a more precise tagged union
167:
168: $combiTypes = [];
169: $isOptionalAlternation = false;
170: foreach ($onlyTopLevelAlternation->getGroupCombinations() as $groupCombo) {
171: $comboList = $groupList;
172:
173: $beforeCurrentCombo = true;
174: foreach ($comboList as $groupId => $group) {
175: if (in_array($groupId, $groupCombo, true)) {
176: $isOptionalAlternation = $group->inOptionalAlternation();
177: $group->forceNonOptional();
178: $beforeCurrentCombo = false;
179: } elseif ($beforeCurrentCombo && !$group->resetsGroupCounter()) {
180: $group->forceNonOptional();
181: $group->forceType(
182: $this->containsUnmatchedAsNull($flags, $matchesAll) ? new NullType() : new ConstantStringType(''),
183: );
184: } elseif (
185: $group->getAlternationId() === $onlyTopLevelAlternation->getId()
186: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
187: ) {
188: unset($comboList[$groupId]);
189: }
190: }
191:
192: $combiType = $this->buildArrayType(
193: $comboList,
194: $wasMatched,
195: $trailingOptionals,
196: $flags,
197: $markVerbs,
198: $matchesAll,
199: );
200:
201: $combiTypes[] = $combiType;
202:
203: foreach ($groupCombo as $groupId) {
204: $group = $comboList[$groupId];
205: $group->clearOverrides();
206: }
207: }
208:
209: if (
210: !$this->containsUnmatchedAsNull($flags, $matchesAll)
211: && (
212: $onlyTopLevelAlternation->getAlternationsCount() !== count($onlyTopLevelAlternation->getGroupCombinations())
213: || $isOptionalAlternation
214: )
215: ) {
216: // positive match has a subject but not any capturing group
217: $combiTypes[] = new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [1], [], TrinaryLogic::createYes());
218: }
219:
220: return TypeCombinator::union(...$combiTypes);
221: }
222:
223: // the general case, which should work in all cases but does not yield the most
224: // precise result possible in some cases
225: return $this->buildArrayType(
226: $groupList,
227: $wasMatched,
228: $trailingOptionals,
229: $flags,
230: $markVerbs,
231: $matchesAll,
232: );
233: }
234:
235: /**
236: * @param array<int, RegexCapturingGroup> $captureGroups
237: */
238: private function getOnlyOptionalTopLevelGroup(array $captureGroups): ?RegexCapturingGroup
239: {
240: $group = null;
241: foreach ($captureGroups as $captureGroup) {
242: if (!$captureGroup->isTopLevel()) {
243: continue;
244: }
245:
246: if (!$captureGroup->isOptional()) {
247: return null;
248: }
249:
250: if ($group !== null) {
251: return null;
252: }
253:
254: $group = $captureGroup;
255: }
256:
257: return $group;
258: }
259:
260: /**
261: * @param array<int, RegexCapturingGroup> $captureGroups
262: */
263: private function getOnlyTopLevelAlternation(array $captureGroups): ?RegexAlternation
264: {
265: $alternation = null;
266: foreach ($captureGroups as $captureGroup) {
267: if (!$captureGroup->isTopLevel()) {
268: continue;
269: }
270:
271: if (!$captureGroup->inAlternation()) {
272: return null;
273: }
274:
275: if ($captureGroup->inOptionalQuantification()) {
276: return null;
277: }
278:
279: if ($alternation === null) {
280: $alternation = $captureGroup->getAlternation();
281: } elseif ($alternation->getId() !== $captureGroup->getAlternation()->getId()) {
282: return null;
283: }
284: }
285:
286: return $alternation;
287: }
288:
289: /**
290: * @param array<RegexCapturingGroup> $captureGroups
291: * @param list<string> $markVerbs
292: */
293: private function buildArrayType(
294: array $captureGroups,
295: TrinaryLogic $wasMatched,
296: int $trailingOptionals,
297: int $flags,
298: array $markVerbs,
299: bool $matchesAll,
300: ): Type
301: {
302: $builder = ConstantArrayTypeBuilder::createEmpty();
303:
304: // first item in matches contains the overall match.
305: $builder->setOffsetValueType(
306: $this->getKeyType(0),
307: $this->createSubjectValueType($flags, $matchesAll),
308: $this->isSubjectOptional($wasMatched, $matchesAll),
309: );
310:
311: $countGroups = count($captureGroups);
312: $i = 0;
313: foreach ($captureGroups as $captureGroup) {
314: $isTrailingOptional = $i >= $countGroups - $trailingOptionals;
315: $isLastGroup = $i === $countGroups - 1;
316: $groupValueType = $this->createGroupValueType($captureGroup, $wasMatched, $flags, $isTrailingOptional, $isLastGroup, $matchesAll);
317: $optional = $this->isGroupOptional($captureGroup, $wasMatched, $flags, $isTrailingOptional, $matchesAll);
318:
319: if ($captureGroup->isNamed()) {
320: $builder->setOffsetValueType(
321: $this->getKeyType($captureGroup->getName()),
322: $groupValueType,
323: $optional,
324: );
325: }
326:
327: $builder->setOffsetValueType(
328: $this->getKeyType($i + 1),
329: $groupValueType,
330: $optional,
331: );
332:
333: $i++;
334: }
335:
336: if (count($markVerbs) > 0) {
337: $markTypes = [];
338: foreach ($markVerbs as $mark) {
339: $markTypes[] = new ConstantStringType($mark);
340: }
341: $builder->setOffsetValueType(
342: $this->getKeyType('MARK'),
343: TypeCombinator::union(...$markTypes),
344: true,
345: );
346: }
347:
348: if ($matchesAll && $this->containsSetOrder($flags)) {
349: $arrayType = TypeCombinator::intersect(new ArrayType(new IntegerType(), $builder->getArray()), new AccessoryArrayListType());
350: if (!$wasMatched->yes()) {
351: $arrayType = TypeCombinator::union(
352: new ConstantArrayType([], []),
353: $arrayType,
354: );
355: }
356: return $arrayType;
357: }
358:
359: return $builder->getArray();
360: }
361:
362: private function isSubjectOptional(TrinaryLogic $wasMatched, bool $matchesAll): bool
363: {
364: if ($matchesAll) {
365: return false;
366: }
367:
368: return !$wasMatched->yes();
369: }
370:
371: private function createSubjectValueType(int $flags, bool $matchesAll): Type
372: {
373: $subjectValueType = TypeCombinator::removeNull($this->getValueType(new StringType(), $flags, $matchesAll));
374:
375: if ($matchesAll) {
376: if ($this->containsPatternOrder($flags)) {
377: $subjectValueType = TypeCombinator::intersect(new ArrayType(new IntegerType(), $subjectValueType), new AccessoryArrayListType());
378: }
379: }
380:
381: return $subjectValueType;
382: }
383:
384: private function isGroupOptional(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional, bool $matchesAll): bool
385: {
386: if ($matchesAll) {
387: if ($isTrailingOptional && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $this->containsSetOrder($flags)) {
388: return true;
389: }
390:
391: return false;
392: }
393:
394: if (!$wasMatched->yes()) {
395: $optional = true;
396: } else {
397: if (!$isTrailingOptional) {
398: $optional = false;
399: } elseif ($this->containsUnmatchedAsNull($flags, $matchesAll)) {
400: $optional = false;
401: } else {
402: $optional = $captureGroup->isOptional();
403: }
404: }
405:
406: return $optional;
407: }
408:
409: private function createGroupValueType(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional, bool $isLastGroup, bool $matchesAll): Type
410: {
411: if ($matchesAll) {
412: if (
413: (
414: !$this->containsSetOrder($flags)
415: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
416: && $captureGroup->isOptional()
417: )
418: ||
419: (
420: $this->containsSetOrder($flags)
421: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
422: && $captureGroup->isOptional()
423: && !$isTrailingOptional
424: )
425: ) {
426: $groupValueType = $this->getValueType(
427: TypeCombinator::union($captureGroup->getType(), new ConstantStringType('')),
428: $flags,
429: $matchesAll,
430: );
431: $groupValueType = TypeCombinator::removeNull($groupValueType);
432: } else {
433: $groupValueType = $this->getValueType($captureGroup->getType(), $flags, $matchesAll);
434: }
435:
436: if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags, $matchesAll) && !$captureGroup->isOptional()) {
437: $groupValueType = TypeCombinator::removeNull($groupValueType);
438: }
439:
440: if ($this->containsPatternOrder($flags)) {
441: $groupValueType = TypeCombinator::intersect(new ArrayType(new IntegerType(), $groupValueType), new AccessoryArrayListType());
442: }
443:
444: return $groupValueType;
445: }
446:
447: if (!$isLastGroup && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $captureGroup->isOptional()) {
448: $groupValueType = $this->getValueType(
449: TypeCombinator::union($captureGroup->getType(), new ConstantStringType('')),
450: $flags,
451: $matchesAll,
452: );
453: } else {
454: $groupValueType = $this->getValueType($captureGroup->getType(), $flags, $matchesAll);
455: }
456:
457: if ($wasMatched->yes()) {
458: if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags, $matchesAll) && !$captureGroup->isOptional()) {
459: $groupValueType = TypeCombinator::removeNull($groupValueType);
460: }
461: }
462:
463: return $groupValueType;
464: }
465:
466: private function containsOffsetCapture(int $flags): bool
467: {
468: return ($flags & PREG_OFFSET_CAPTURE) !== 0;
469: }
470:
471: private function containsPatternOrder(int $flags): bool
472: {
473: // If no order flag is given, PREG_PATTERN_ORDER is assumed.
474: return !$this->containsSetOrder($flags);
475: }
476:
477: private function containsSetOrder(int $flags): bool
478: {
479: return ($flags & PREG_SET_ORDER) !== 0;
480: }
481:
482: private function containsUnmatchedAsNull(int $flags, bool $matchesAll): bool
483: {
484: if ($matchesAll) {
485: // preg_match_all() with PREG_UNMATCHED_AS_NULL works consistently across php-versions
486: // https://3v4l.org/tKmPn
487: return ($flags & PREG_UNMATCHED_AS_NULL) !== 0;
488: }
489:
490: return ($flags & PREG_UNMATCHED_AS_NULL) !== 0 && (($flags & self::PREG_UNMATCHED_AS_NULL_ON_72_73) !== 0 || $this->phpVersion->supportsPregUnmatchedAsNull());
491: }
492:
493: private function getKeyType(int|string $key): Type
494: {
495: if (is_string($key)) {
496: return new ConstantStringType($key);
497: }
498:
499: return new ConstantIntegerType($key);
500: }
501:
502: private function getValueType(Type $baseType, int $flags, bool $matchesAll): Type
503: {
504: $valueType = $baseType;
505:
506: // unmatched groups return -1 as offset
507: $offsetType = IntegerRangeType::fromInterval(-1, null);
508: if ($this->containsUnmatchedAsNull($flags, $matchesAll)) {
509: $valueType = TypeCombinator::addNull($valueType);
510: }
511:
512: if ($this->containsOffsetCapture($flags)) {
513: $builder = ConstantArrayTypeBuilder::createEmpty();
514:
515: $builder->setOffsetValueType(
516: new ConstantIntegerType(0),
517: $valueType,
518: );
519: $builder->setOffsetValueType(
520: new ConstantIntegerType(1),
521: $offsetType,
522: );
523:
524: return $builder->getArray();
525: }
526:
527: return $valueType;
528: }
529:
530: private function getPatternType(Expr $patternExpr, Scope $scope): Type
531: {
532: if ($patternExpr instanceof Expr\BinaryOp\Concat) {
533: return $this->regexExpressionHelper->resolvePatternConcat($patternExpr, $scope);
534: }
535:
536: return $scope->getType($patternExpr);
537: }
538:
539: }
540: