1: <?php declare(strict_types = 1);
2:
3: namespace PHPStan\Type\Php;
4:
5: use PhpParser\Node\Expr;
6: use PHPStan\Analyser\Scope;
7: use PHPStan\DependencyInjection\AutowiredService;
8: use PHPStan\Php\PhpVersion;
9: use PHPStan\TrinaryLogic;
10: use PHPStan\Type\Accessory\AccessoryArrayListType;
11: use PHPStan\Type\Accessory\NonEmptyArrayType;
12: use PHPStan\Type\ArrayType;
13: use PHPStan\Type\Constant\ConstantArrayTypeBuilder;
14: use PHPStan\Type\Constant\ConstantIntegerType;
15: use PHPStan\Type\Constant\ConstantStringType;
16: use PHPStan\Type\IntegerRangeType;
17: use PHPStan\Type\IntegerType;
18: use PHPStan\Type\IntersectionType;
19: use PHPStan\Type\NullType;
20: use PHPStan\Type\Regex\RegexCapturingGroup;
21: use PHPStan\Type\Regex\RegexExpressionHelper;
22: use PHPStan\Type\Regex\RegexGroupList;
23: use PHPStan\Type\Regex\RegexGroupParser;
24: use PHPStan\Type\StringType;
25: use PHPStan\Type\Type;
26: use PHPStan\Type\TypeCombinator;
27: use function count;
28: use function in_array;
29: use function is_string;
30: use const PREG_OFFSET_CAPTURE;
31: use const PREG_PATTERN_ORDER;
32: use const PREG_SET_ORDER;
33: use const PREG_UNMATCHED_AS_NULL;
34:
35: /**
36: * @api
37: */
38: #[AutowiredService]
39: final class RegexArrayShapeMatcher
40: {
41:
42: /**
43: * Pass this into $flagsType as well if the library supports emulating PREG_UNMATCHED_AS_NULL on PHP 7.2 and 7.3
44: */
45: public const PREG_UNMATCHED_AS_NULL_ON_72_73 = 2048;
46:
47: public function __construct(
48: private RegexGroupParser $regexGroupParser,
49: private RegexExpressionHelper $regexExpressionHelper,
50: private PhpVersion $phpVersion,
51: )
52: {
53: }
54:
55: public function matchAllExpr(Expr $patternExpr, ?Type $flagsType, TrinaryLogic $wasMatched, Scope $scope): ?Type
56: {
57: return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched, true);
58: }
59:
60: public function matchExpr(Expr $patternExpr, ?Type $flagsType, TrinaryLogic $wasMatched, Scope $scope): ?Type
61: {
62: return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched, false);
63: }
64:
65: public function matchSubjectExpr(Expr $patternExpr, Scope $scope): ?Type
66: {
67: $patternType = $this->getPatternType($patternExpr, $scope);
68: $constantStrings = $patternType->getConstantStrings();
69: if (count($constantStrings) === 0) {
70: return null;
71: }
72:
73: $subjectTypes = [];
74: foreach ($constantStrings as $constantString) {
75: $astWalkResult = $this->regexGroupParser->parseGroups($constantString->getValue());
76: if ($astWalkResult === null) {
77: return null;
78: }
79:
80: $subjectTypes[] = $astWalkResult->getSubjectBaseType();
81: }
82:
83: return TypeCombinator::union(...$subjectTypes);
84: }
85:
86: private function matchPatternType(Type $patternType, ?Type $flagsType, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
87: {
88: if ($wasMatched->no()) {
89: return ConstantArrayTypeBuilder::createEmpty()->getArray();
90: }
91:
92: $constantStrings = $patternType->getConstantStrings();
93: if (count($constantStrings) === 0) {
94: return null;
95: }
96:
97: $flags = null;
98: if ($flagsType !== null) {
99: if (!$flagsType instanceof ConstantIntegerType) {
100: return null;
101: }
102:
103: /** @var int-mask<PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER | PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73> $flags */
104: $flags = $flagsType->getValue() & (PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER | PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73);
105:
106: // some other unsupported/unexpected flag was passed in
107: if ($flags !== $flagsType->getValue()) {
108: return null;
109: }
110: }
111:
112: $matchedTypes = [];
113: foreach ($constantStrings as $constantString) {
114: $matched = $this->matchRegex($constantString->getValue(), $flags, $wasMatched, $matchesAll);
115: if ($matched === null) {
116: return null;
117: }
118:
119: $matchedTypes[] = $matched;
120: }
121:
122: if (count($matchedTypes) === 1) {
123: return $matchedTypes[0];
124: }
125:
126: return TypeCombinator::union(...$matchedTypes);
127: }
128:
129: /**
130: * @param int-mask<PREG_OFFSET_CAPTURE|PREG_PATTERN_ORDER|PREG_SET_ORDER|PREG_UNMATCHED_AS_NULL|self::PREG_UNMATCHED_AS_NULL_ON_72_73>|null $flags
131: */
132: private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
133: {
134: $astWalkResult = $this->regexGroupParser->parseGroups($regex);
135: if ($astWalkResult === null) {
136: // regex could not be parsed by Hoa/Regex
137: return null;
138: }
139: $groupList = $astWalkResult->getCapturingGroups();
140: $markVerbs = $astWalkResult->getMarkVerbs();
141: $subjectBaseType = new StringType();
142: if ($wasMatched->yes()) {
143: $subjectBaseType = $astWalkResult->getSubjectBaseType();
144: }
145:
146: $regexGroupList = new RegexGroupList($groupList);
147: $trailingOptionals = $regexGroupList->countTrailingOptionals();
148: $onlyOptionalTopLevelGroup = $regexGroupList->getOnlyOptionalTopLevelGroup();
149: $onlyTopLevelAlternation = $regexGroupList->getOnlyTopLevelAlternation();
150: $flags ??= 0;
151:
152: if (
153: !$matchesAll
154: && $wasMatched->yes()
155: && $onlyOptionalTopLevelGroup !== null
156: ) {
157: // if only one top level capturing optional group exists
158: // we build a more precise tagged union of a empty-match and a match with the group
159: $regexGroupList = $regexGroupList->forceGroupNonOptional($onlyOptionalTopLevelGroup);
160:
161: $combiType = $this->buildArrayType(
162: $subjectBaseType,
163: $regexGroupList,
164: $wasMatched,
165: $trailingOptionals,
166: $flags,
167: $markVerbs,
168: $matchesAll,
169: );
170:
171: if (!$this->containsUnmatchedAsNull($flags, $matchesAll)) {
172: // positive match has a subject but not any capturing group
173: $builder = ConstantArrayTypeBuilder::createEmpty();
174: $builder->setOffsetValueType(new ConstantIntegerType(0), $this->createSubjectValueType($subjectBaseType, $flags, $matchesAll, $wasMatched));
175:
176: $combiType = TypeCombinator::union(
177: $builder->getArray(),
178: $combiType,
179: );
180: }
181:
182: return $combiType;
183: } elseif (
184: !$matchesAll
185: && $onlyOptionalTopLevelGroup === null
186: && $onlyTopLevelAlternation !== null
187: && !$wasMatched->no()
188: ) {
189: // if only a single top level alternation exist built a more precise tagged union
190:
191: $combiTypes = [];
192: $isOptionalAlternation = false;
193: foreach ($onlyTopLevelAlternation->getGroupCombinations() as $groupCombo) {
194: $comboList = new RegexGroupList($groupList);
195:
196: $beforeCurrentCombo = true;
197: foreach ($comboList as $group) {
198: if (in_array($group->getId(), $groupCombo, true)) {
199: $isOptionalAlternation = $group->inOptionalAlternation();
200: $comboList = $comboList->forceGroupNonOptional($group);
201: $beforeCurrentCombo = false;
202: } elseif ($beforeCurrentCombo && !$group->resetsGroupCounter()) {
203: $comboList = $comboList->forceGroupTypeAndNonOptional(
204: $group,
205: $this->containsUnmatchedAsNull($flags, $matchesAll) ? new NullType() : new ConstantStringType(''),
206: );
207: } elseif (
208: $group->getAlternationId() === $onlyTopLevelAlternation->getId()
209: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
210: ) {
211: $comboList = $comboList->removeGroup($group);
212: }
213: }
214:
215: $combiType = $this->buildArrayType(
216: $subjectBaseType,
217: $comboList,
218: $wasMatched,
219: $trailingOptionals,
220: $flags,
221: $markVerbs,
222: $matchesAll,
223: );
224:
225: $combiTypes[] = $combiType;
226: }
227:
228: if (
229: !$this->containsUnmatchedAsNull($flags, $matchesAll)
230: && (
231: $onlyTopLevelAlternation->getAlternationsCount() !== count($onlyTopLevelAlternation->getGroupCombinations())
232: || $isOptionalAlternation
233: )
234: ) {
235: // positive match has a subject but not any capturing group
236: $builder = ConstantArrayTypeBuilder::createEmpty();
237: $builder->setOffsetValueType(new ConstantIntegerType(0), $this->createSubjectValueType($subjectBaseType, $flags, $matchesAll, $wasMatched));
238:
239: $combiTypes[] = $builder->getArray();
240: }
241:
242: return TypeCombinator::union(...$combiTypes);
243: }
244:
245: // the general case, which should work in all cases but does not yield the most
246: // precise result possible in some cases
247: return $this->buildArrayType(
248: $subjectBaseType,
249: $regexGroupList,
250: $wasMatched,
251: $trailingOptionals,
252: $flags,
253: $markVerbs,
254: $matchesAll,
255: );
256: }
257:
258: /**
259: * @param list<string> $markVerbs
260: */
261: private function buildArrayType(
262: Type $subjectBaseType,
263: RegexGroupList $captureGroups,
264: TrinaryLogic $wasMatched,
265: int $trailingOptionals,
266: int $flags,
267: array $markVerbs,
268: bool $matchesAll,
269: ): Type
270: {
271: $forceList = count($markVerbs) === 0;
272: $builder = ConstantArrayTypeBuilder::createEmpty();
273:
274: // first item in matches contains the overall match.
275: $builder->setOffsetValueType(
276: $this->getKeyType(0),
277: $this->createSubjectValueType($subjectBaseType, $flags, $matchesAll, $wasMatched),
278: $this->isSubjectOptional($wasMatched, $matchesAll),
279: );
280:
281: $countGroups = count($captureGroups);
282: $i = 0;
283: foreach ($captureGroups as $captureGroup) {
284: $isTrailingOptional = $i >= $countGroups - $trailingOptionals;
285: $isLastGroup = $i === $countGroups - 1;
286: $groupValueType = $this->createGroupValueType($captureGroup, $wasMatched, $flags, $isTrailingOptional, $isLastGroup, $matchesAll);
287: $optional = $this->isGroupOptional($captureGroup, $wasMatched, $flags, $isTrailingOptional, $matchesAll);
288:
289: if ($captureGroup->isNamed()) {
290: $forceList = false;
291:
292: $builder->setOffsetValueType(
293: $this->getKeyType($captureGroup->getName()),
294: $groupValueType,
295: $optional,
296: );
297: }
298:
299: $builder->setOffsetValueType(
300: $this->getKeyType($i + 1),
301: $groupValueType,
302: $optional,
303: );
304:
305: $i++;
306: }
307:
308: if (count($markVerbs) > 0) {
309: $markTypes = [];
310: foreach ($markVerbs as $mark) {
311: $markTypes[] = new ConstantStringType($mark);
312: }
313: $builder->setOffsetValueType(
314: $this->getKeyType('MARK'),
315: TypeCombinator::union(...$markTypes),
316: true,
317: );
318: }
319:
320: if ($matchesAll && $this->containsSetOrder($flags)) {
321: $accessoryTypes = [new ArrayType(IntegerRangeType::createAllGreaterThanOrEqualTo(0), $builder->getArray()), new AccessoryArrayListType()];
322: if ($wasMatched->yes()) {
323: $accessoryTypes[] = new NonEmptyArrayType();
324: }
325:
326: return new IntersectionType($accessoryTypes);
327: }
328:
329: if ($forceList) {
330: return TypeCombinator::intersect($builder->getArray(), new AccessoryArrayListType());
331: }
332:
333: return $builder->getArray();
334: }
335:
336: private function isSubjectOptional(TrinaryLogic $wasMatched, bool $matchesAll): bool
337: {
338: if ($matchesAll) {
339: return false;
340: }
341:
342: return !$wasMatched->yes();
343: }
344:
345: /**
346: * @param Type $baseType A string type (or string variant) representing the subject of the match
347: */
348: private function createSubjectValueType(Type $baseType, int $flags, bool $matchesAll, TrinaryLogic $wasMatched): Type
349: {
350: $subjectValueType = TypeCombinator::removeNull($this->getValueType($baseType, $flags, $matchesAll));
351:
352: if ($matchesAll) {
353: $subjectValueType = TypeCombinator::removeNull($this->getValueType(new StringType(), $flags, $matchesAll));
354:
355: if ($this->containsPatternOrder($flags)) {
356: $accessoryTypes = [
357: new ArrayType(new IntegerType(), $subjectValueType),
358: new AccessoryArrayListType(),
359: ];
360: if ($wasMatched->yes()) {
361: $accessoryTypes[] = new NonEmptyArrayType();
362: }
363:
364: $subjectValueType = new IntersectionType($accessoryTypes);
365: }
366: }
367:
368: return $subjectValueType;
369: }
370:
371: private function isGroupOptional(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional, bool $matchesAll): bool
372: {
373: if ($matchesAll) {
374: if ($isTrailingOptional && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $this->containsSetOrder($flags)) {
375: return true;
376: }
377:
378: return false;
379: }
380:
381: if (!$wasMatched->yes()) {
382: $optional = true;
383: } else {
384: if (!$isTrailingOptional) {
385: $optional = false;
386: } elseif ($this->containsUnmatchedAsNull($flags, $matchesAll)) {
387: $optional = false;
388: } else {
389: $optional = $captureGroup->isOptional();
390: }
391: }
392:
393: return $optional;
394: }
395:
396: private function createGroupValueType(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional, bool $isLastGroup, bool $matchesAll): Type
397: {
398: if ($matchesAll) {
399: if (
400: (
401: !$this->containsSetOrder($flags)
402: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
403: && $captureGroup->isOptional()
404: )
405: ||
406: (
407: $this->containsSetOrder($flags)
408: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
409: && $captureGroup->isOptional()
410: && !$isTrailingOptional
411: )
412: ) {
413: $groupValueType = $this->getValueType(
414: TypeCombinator::union($captureGroup->getType(), new ConstantStringType('')),
415: $flags,
416: $matchesAll,
417: );
418: $groupValueType = TypeCombinator::removeNull($groupValueType);
419: } else {
420: $groupValueType = $this->getValueType($captureGroup->getType(), $flags, $matchesAll);
421: }
422:
423: if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags, $matchesAll) && !$captureGroup->isOptional()) {
424: $groupValueType = TypeCombinator::removeNull($groupValueType);
425: }
426:
427: if ($this->containsPatternOrder($flags)) {
428: $accessoryTypes = [new ArrayType(IntegerRangeType::createAllGreaterThanOrEqualTo(0), $groupValueType), new AccessoryArrayListType()];
429: if ($wasMatched->yes()) {
430: $accessoryTypes[] = new NonEmptyArrayType();
431: }
432:
433: $groupValueType = new IntersectionType($accessoryTypes);
434: }
435:
436: return $groupValueType;
437: }
438:
439: if (!$isLastGroup && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $captureGroup->isOptional()) {
440: $groupValueType = $this->getValueType(
441: TypeCombinator::union($captureGroup->getType(), new ConstantStringType('')),
442: $flags,
443: $matchesAll,
444: );
445: } else {
446: $groupValueType = $this->getValueType($captureGroup->getType(), $flags, $matchesAll);
447: }
448:
449: if ($wasMatched->yes()) {
450: if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags, $matchesAll) && !$captureGroup->isOptional()) {
451: $groupValueType = TypeCombinator::removeNull($groupValueType);
452: }
453: }
454:
455: return $groupValueType;
456: }
457:
458: private function containsOffsetCapture(int $flags): bool
459: {
460: return ($flags & PREG_OFFSET_CAPTURE) !== 0;
461: }
462:
463: private function containsPatternOrder(int $flags): bool
464: {
465: // If no order flag is given, PREG_PATTERN_ORDER is assumed.
466: return !$this->containsSetOrder($flags);
467: }
468:
469: private function containsSetOrder(int $flags): bool
470: {
471: return ($flags & PREG_SET_ORDER) !== 0;
472: }
473:
474: private function containsUnmatchedAsNull(int $flags, bool $matchesAll): bool
475: {
476: if ($matchesAll) {
477: // preg_match_all() with PREG_UNMATCHED_AS_NULL works consistently across php-versions
478: // https://3v4l.org/tKmPn
479: return ($flags & PREG_UNMATCHED_AS_NULL) !== 0;
480: }
481:
482: return ($flags & PREG_UNMATCHED_AS_NULL) !== 0 && (($flags & self::PREG_UNMATCHED_AS_NULL_ON_72_73) !== 0 || $this->phpVersion->supportsPregUnmatchedAsNull());
483: }
484:
485: private function getKeyType(int|string $key): Type
486: {
487: if (is_string($key)) {
488: return new ConstantStringType($key);
489: }
490:
491: return new ConstantIntegerType($key);
492: }
493:
494: private function getValueType(Type $baseType, int $flags, bool $matchesAll): Type
495: {
496: $valueType = $baseType;
497:
498: // unmatched groups return -1 as offset
499: $offsetType = IntegerRangeType::fromInterval(-1, null);
500: if ($this->containsUnmatchedAsNull($flags, $matchesAll)) {
501: $valueType = TypeCombinator::addNull($valueType);
502: }
503:
504: if ($this->containsOffsetCapture($flags)) {
505: $builder = ConstantArrayTypeBuilder::createEmpty();
506:
507: $builder->setOffsetValueType(
508: new ConstantIntegerType(0),
509: $valueType,
510: );
511: $builder->setOffsetValueType(
512: new ConstantIntegerType(1),
513: $offsetType,
514: );
515:
516: return $builder->getArray();
517: }
518:
519: return $valueType;
520: }
521:
522: private function getPatternType(Expr $patternExpr, Scope $scope): Type
523: {
524: if ($patternExpr instanceof Expr\BinaryOp\Concat) {
525: return $this->regexExpressionHelper->resolvePatternConcat($patternExpr, $scope);
526: }
527:
528: return $scope->getType($patternExpr);
529: }
530:
531: }
532: