1: <?php declare(strict_types = 1);
2:
3: namespace PHPStan\Type\Php;
4:
5: use PhpParser\Node\Expr;
6: use PHPStan\Analyser\Scope;
7: use PHPStan\DependencyInjection\AutowiredService;
8: use PHPStan\Php\PhpVersion;
9: use PHPStan\TrinaryLogic;
10: use PHPStan\Type\Accessory\AccessoryArrayListType;
11: use PHPStan\Type\ArrayType;
12: use PHPStan\Type\Constant\ConstantArrayTypeBuilder;
13: use PHPStan\Type\Constant\ConstantIntegerType;
14: use PHPStan\Type\Constant\ConstantStringType;
15: use PHPStan\Type\IntegerRangeType;
16: use PHPStan\Type\IntegerType;
17: use PHPStan\Type\IntersectionType;
18: use PHPStan\Type\NullType;
19: use PHPStan\Type\Regex\RegexCapturingGroup;
20: use PHPStan\Type\Regex\RegexExpressionHelper;
21: use PHPStan\Type\Regex\RegexGroupList;
22: use PHPStan\Type\Regex\RegexGroupParser;
23: use PHPStan\Type\StringType;
24: use PHPStan\Type\Type;
25: use PHPStan\Type\TypeCombinator;
26: use function count;
27: use function in_array;
28: use function is_string;
29: use const PREG_OFFSET_CAPTURE;
30: use const PREG_PATTERN_ORDER;
31: use const PREG_SET_ORDER;
32: use const PREG_UNMATCHED_AS_NULL;
33:
34: /**
35: * @api
36: */
37: #[AutowiredService]
38: final class RegexArrayShapeMatcher
39: {
40:
41: /**
42: * Pass this into $flagsType as well if the library supports emulating PREG_UNMATCHED_AS_NULL on PHP 7.2 and 7.3
43: */
44: public const PREG_UNMATCHED_AS_NULL_ON_72_73 = 2048;
45:
46: public function __construct(
47: private RegexGroupParser $regexGroupParser,
48: private RegexExpressionHelper $regexExpressionHelper,
49: private PhpVersion $phpVersion,
50: )
51: {
52: }
53:
54: public function matchAllExpr(Expr $patternExpr, ?Type $flagsType, TrinaryLogic $wasMatched, Scope $scope): ?Type
55: {
56: return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched, true);
57: }
58:
59: public function matchExpr(Expr $patternExpr, ?Type $flagsType, TrinaryLogic $wasMatched, Scope $scope): ?Type
60: {
61: return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched, false);
62: }
63:
64: public function matchSubjectExpr(Expr $patternExpr, Scope $scope): ?Type
65: {
66: $patternType = $this->getPatternType($patternExpr, $scope);
67: $constantStrings = $patternType->getConstantStrings();
68: if (count($constantStrings) === 0) {
69: return null;
70: }
71:
72: $subjectTypes = [];
73: foreach ($constantStrings as $constantString) {
74: $astWalkResult = $this->regexGroupParser->parseGroups($constantString->getValue());
75: if ($astWalkResult === null) {
76: return null;
77: }
78:
79: $subjectTypes[] = $astWalkResult->getSubjectBaseType();
80: }
81:
82: return TypeCombinator::union(...$subjectTypes);
83: }
84:
85: private function matchPatternType(Type $patternType, ?Type $flagsType, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
86: {
87: if ($wasMatched->no()) {
88: return ConstantArrayTypeBuilder::createEmpty()->getArray();
89: }
90:
91: $constantStrings = $patternType->getConstantStrings();
92: if (count($constantStrings) === 0) {
93: return null;
94: }
95:
96: $flags = null;
97: if ($flagsType !== null) {
98: if (!$flagsType instanceof ConstantIntegerType) {
99: return null;
100: }
101:
102: /** @var int-mask<PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER | PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73> $flags */
103: $flags = $flagsType->getValue() & (PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER | PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73);
104:
105: // some other unsupported/unexpected flag was passed in
106: if ($flags !== $flagsType->getValue()) {
107: return null;
108: }
109: }
110:
111: $matchedTypes = [];
112: foreach ($constantStrings as $constantString) {
113: $matched = $this->matchRegex($constantString->getValue(), $flags, $wasMatched, $matchesAll);
114: if ($matched === null) {
115: return null;
116: }
117:
118: $matchedTypes[] = $matched;
119: }
120:
121: if (count($matchedTypes) === 1) {
122: return $matchedTypes[0];
123: }
124:
125: return TypeCombinator::union(...$matchedTypes);
126: }
127:
128: /**
129: * @param int-mask<PREG_OFFSET_CAPTURE|PREG_PATTERN_ORDER|PREG_SET_ORDER|PREG_UNMATCHED_AS_NULL|self::PREG_UNMATCHED_AS_NULL_ON_72_73>|null $flags
130: */
131: private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
132: {
133: $astWalkResult = $this->regexGroupParser->parseGroups($regex);
134: if ($astWalkResult === null) {
135: // regex could not be parsed by Hoa/Regex
136: return null;
137: }
138: $groupList = $astWalkResult->getCapturingGroups();
139: $markVerbs = $astWalkResult->getMarkVerbs();
140: $subjectBaseType = new StringType();
141: if ($wasMatched->yes()) {
142: $subjectBaseType = $astWalkResult->getSubjectBaseType();
143: }
144:
145: $regexGroupList = new RegexGroupList($groupList);
146: $trailingOptionals = $regexGroupList->countTrailingOptionals();
147: $onlyOptionalTopLevelGroup = $regexGroupList->getOnlyOptionalTopLevelGroup();
148: $onlyTopLevelAlternation = $regexGroupList->getOnlyTopLevelAlternation();
149: $flags ??= 0;
150:
151: if (
152: !$matchesAll
153: && $wasMatched->yes()
154: && $onlyOptionalTopLevelGroup !== null
155: ) {
156: // if only one top level capturing optional group exists
157: // we build a more precise tagged union of a empty-match and a match with the group
158: $regexGroupList = $regexGroupList->forceGroupNonOptional($onlyOptionalTopLevelGroup);
159:
160: $combiType = $this->buildArrayType(
161: $subjectBaseType,
162: $regexGroupList,
163: $wasMatched,
164: $trailingOptionals,
165: $flags,
166: $markVerbs,
167: $matchesAll,
168: );
169:
170: if (!$this->containsUnmatchedAsNull($flags, $matchesAll)) {
171: // positive match has a subject but not any capturing group
172: $builder = ConstantArrayTypeBuilder::createEmpty();
173: $builder->setOffsetValueType(new ConstantIntegerType(0), $this->createSubjectValueType($subjectBaseType, $flags, $matchesAll));
174:
175: $combiType = TypeCombinator::union(
176: $builder->getArray(),
177: $combiType,
178: );
179: }
180:
181: return $combiType;
182: } elseif (
183: !$matchesAll
184: && $onlyOptionalTopLevelGroup === null
185: && $onlyTopLevelAlternation !== null
186: && !$wasMatched->no()
187: ) {
188: // if only a single top level alternation exist built a more precise tagged union
189:
190: $combiTypes = [];
191: $isOptionalAlternation = false;
192: foreach ($onlyTopLevelAlternation->getGroupCombinations() as $groupCombo) {
193: $comboList = new RegexGroupList($groupList);
194:
195: $beforeCurrentCombo = true;
196: foreach ($comboList as $group) {
197: if (in_array($group->getId(), $groupCombo, true)) {
198: $isOptionalAlternation = $group->inOptionalAlternation();
199: $comboList = $comboList->forceGroupNonOptional($group);
200: $beforeCurrentCombo = false;
201: } elseif ($beforeCurrentCombo && !$group->resetsGroupCounter()) {
202: $comboList = $comboList->forceGroupTypeAndNonOptional(
203: $group,
204: $this->containsUnmatchedAsNull($flags, $matchesAll) ? new NullType() : new ConstantStringType(''),
205: );
206: } elseif (
207: $group->getAlternationId() === $onlyTopLevelAlternation->getId()
208: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
209: ) {
210: $comboList = $comboList->removeGroup($group);
211: }
212: }
213:
214: $combiType = $this->buildArrayType(
215: $subjectBaseType,
216: $comboList,
217: $wasMatched,
218: $trailingOptionals,
219: $flags,
220: $markVerbs,
221: $matchesAll,
222: );
223:
224: $combiTypes[] = $combiType;
225: }
226:
227: if (
228: !$this->containsUnmatchedAsNull($flags, $matchesAll)
229: && (
230: $onlyTopLevelAlternation->getAlternationsCount() !== count($onlyTopLevelAlternation->getGroupCombinations())
231: || $isOptionalAlternation
232: )
233: ) {
234: // positive match has a subject but not any capturing group
235: $builder = ConstantArrayTypeBuilder::createEmpty();
236: $builder->setOffsetValueType(new ConstantIntegerType(0), $this->createSubjectValueType($subjectBaseType, $flags, $matchesAll));
237:
238: $combiTypes[] = $builder->getArray();
239: }
240:
241: return TypeCombinator::union(...$combiTypes);
242: }
243:
244: // the general case, which should work in all cases but does not yield the most
245: // precise result possible in some cases
246: return $this->buildArrayType(
247: $subjectBaseType,
248: $regexGroupList,
249: $wasMatched,
250: $trailingOptionals,
251: $flags,
252: $markVerbs,
253: $matchesAll,
254: );
255: }
256:
257: /**
258: * @param list<string> $markVerbs
259: */
260: private function buildArrayType(
261: Type $subjectBaseType,
262: RegexGroupList $captureGroups,
263: TrinaryLogic $wasMatched,
264: int $trailingOptionals,
265: int $flags,
266: array $markVerbs,
267: bool $matchesAll,
268: ): Type
269: {
270: $forceList = count($markVerbs) === 0;
271: $builder = ConstantArrayTypeBuilder::createEmpty();
272:
273: // first item in matches contains the overall match.
274: $builder->setOffsetValueType(
275: $this->getKeyType(0),
276: $this->createSubjectValueType($subjectBaseType, $flags, $matchesAll),
277: $this->isSubjectOptional($wasMatched, $matchesAll),
278: );
279:
280: $countGroups = count($captureGroups);
281: $i = 0;
282: foreach ($captureGroups as $captureGroup) {
283: $isTrailingOptional = $i >= $countGroups - $trailingOptionals;
284: $isLastGroup = $i === $countGroups - 1;
285: $groupValueType = $this->createGroupValueType($captureGroup, $wasMatched, $flags, $isTrailingOptional, $isLastGroup, $matchesAll);
286: $optional = $this->isGroupOptional($captureGroup, $wasMatched, $flags, $isTrailingOptional, $matchesAll);
287:
288: if ($captureGroup->isNamed()) {
289: $forceList = false;
290:
291: $builder->setOffsetValueType(
292: $this->getKeyType($captureGroup->getName()),
293: $groupValueType,
294: $optional,
295: );
296: }
297:
298: $builder->setOffsetValueType(
299: $this->getKeyType($i + 1),
300: $groupValueType,
301: $optional,
302: );
303:
304: $i++;
305: }
306:
307: if (count($markVerbs) > 0) {
308: $markTypes = [];
309: foreach ($markVerbs as $mark) {
310: $markTypes[] = new ConstantStringType($mark);
311: }
312: $builder->setOffsetValueType(
313: $this->getKeyType('MARK'),
314: TypeCombinator::union(...$markTypes),
315: true,
316: );
317: }
318:
319: if ($matchesAll && $this->containsSetOrder($flags)) {
320: return new IntersectionType([new ArrayType(IntegerRangeType::createAllGreaterThanOrEqualTo(0), $builder->getArray()), new AccessoryArrayListType()]);
321: }
322:
323: if ($forceList) {
324: return TypeCombinator::intersect($builder->getArray(), new AccessoryArrayListType());
325: }
326:
327: return $builder->getArray();
328: }
329:
330: private function isSubjectOptional(TrinaryLogic $wasMatched, bool $matchesAll): bool
331: {
332: if ($matchesAll) {
333: return false;
334: }
335:
336: return !$wasMatched->yes();
337: }
338:
339: /**
340: * @param Type $baseType A string type (or string variant) representing the subject of the match
341: */
342: private function createSubjectValueType(Type $baseType, int $flags, bool $matchesAll): Type
343: {
344: $subjectValueType = TypeCombinator::removeNull($this->getValueType($baseType, $flags, $matchesAll));
345:
346: if ($matchesAll) {
347: $subjectValueType = TypeCombinator::removeNull($this->getValueType(new StringType(), $flags, $matchesAll));
348:
349: if ($this->containsPatternOrder($flags)) {
350: $subjectValueType = new IntersectionType([
351: new ArrayType(new IntegerType(), $subjectValueType),
352: new AccessoryArrayListType(),
353: ]);
354: }
355: }
356:
357: return $subjectValueType;
358: }
359:
360: private function isGroupOptional(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional, bool $matchesAll): bool
361: {
362: if ($matchesAll) {
363: if ($isTrailingOptional && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $this->containsSetOrder($flags)) {
364: return true;
365: }
366:
367: return false;
368: }
369:
370: if (!$wasMatched->yes()) {
371: $optional = true;
372: } else {
373: if (!$isTrailingOptional) {
374: $optional = false;
375: } elseif ($this->containsUnmatchedAsNull($flags, $matchesAll)) {
376: $optional = false;
377: } else {
378: $optional = $captureGroup->isOptional();
379: }
380: }
381:
382: return $optional;
383: }
384:
385: private function createGroupValueType(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional, bool $isLastGroup, bool $matchesAll): Type
386: {
387: if ($matchesAll) {
388: if (
389: (
390: !$this->containsSetOrder($flags)
391: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
392: && $captureGroup->isOptional()
393: )
394: ||
395: (
396: $this->containsSetOrder($flags)
397: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
398: && $captureGroup->isOptional()
399: && !$isTrailingOptional
400: )
401: ) {
402: $groupValueType = $this->getValueType(
403: TypeCombinator::union($captureGroup->getType(), new ConstantStringType('')),
404: $flags,
405: $matchesAll,
406: );
407: $groupValueType = TypeCombinator::removeNull($groupValueType);
408: } else {
409: $groupValueType = $this->getValueType($captureGroup->getType(), $flags, $matchesAll);
410: }
411:
412: if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags, $matchesAll) && !$captureGroup->isOptional()) {
413: $groupValueType = TypeCombinator::removeNull($groupValueType);
414: }
415:
416: if ($this->containsPatternOrder($flags)) {
417: $groupValueType = new IntersectionType([new ArrayType(IntegerRangeType::createAllGreaterThanOrEqualTo(0), $groupValueType), new AccessoryArrayListType()]);
418: }
419:
420: return $groupValueType;
421: }
422:
423: if (!$isLastGroup && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $captureGroup->isOptional()) {
424: $groupValueType = $this->getValueType(
425: TypeCombinator::union($captureGroup->getType(), new ConstantStringType('')),
426: $flags,
427: $matchesAll,
428: );
429: } else {
430: $groupValueType = $this->getValueType($captureGroup->getType(), $flags, $matchesAll);
431: }
432:
433: if ($wasMatched->yes()) {
434: if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags, $matchesAll) && !$captureGroup->isOptional()) {
435: $groupValueType = TypeCombinator::removeNull($groupValueType);
436: }
437: }
438:
439: return $groupValueType;
440: }
441:
442: private function containsOffsetCapture(int $flags): bool
443: {
444: return ($flags & PREG_OFFSET_CAPTURE) !== 0;
445: }
446:
447: private function containsPatternOrder(int $flags): bool
448: {
449: // If no order flag is given, PREG_PATTERN_ORDER is assumed.
450: return !$this->containsSetOrder($flags);
451: }
452:
453: private function containsSetOrder(int $flags): bool
454: {
455: return ($flags & PREG_SET_ORDER) !== 0;
456: }
457:
458: private function containsUnmatchedAsNull(int $flags, bool $matchesAll): bool
459: {
460: if ($matchesAll) {
461: // preg_match_all() with PREG_UNMATCHED_AS_NULL works consistently across php-versions
462: // https://3v4l.org/tKmPn
463: return ($flags & PREG_UNMATCHED_AS_NULL) !== 0;
464: }
465:
466: return ($flags & PREG_UNMATCHED_AS_NULL) !== 0 && (($flags & self::PREG_UNMATCHED_AS_NULL_ON_72_73) !== 0 || $this->phpVersion->supportsPregUnmatchedAsNull());
467: }
468:
469: private function getKeyType(int|string $key): Type
470: {
471: if (is_string($key)) {
472: return new ConstantStringType($key);
473: }
474:
475: return new ConstantIntegerType($key);
476: }
477:
478: private function getValueType(Type $baseType, int $flags, bool $matchesAll): Type
479: {
480: $valueType = $baseType;
481:
482: // unmatched groups return -1 as offset
483: $offsetType = IntegerRangeType::fromInterval(-1, null);
484: if ($this->containsUnmatchedAsNull($flags, $matchesAll)) {
485: $valueType = TypeCombinator::addNull($valueType);
486: }
487:
488: if ($this->containsOffsetCapture($flags)) {
489: $builder = ConstantArrayTypeBuilder::createEmpty();
490:
491: $builder->setOffsetValueType(
492: new ConstantIntegerType(0),
493: $valueType,
494: );
495: $builder->setOffsetValueType(
496: new ConstantIntegerType(1),
497: $offsetType,
498: );
499:
500: return $builder->getArray();
501: }
502:
503: return $valueType;
504: }
505:
506: private function getPatternType(Expr $patternExpr, Scope $scope): Type
507: {
508: if ($patternExpr instanceof Expr\BinaryOp\Concat) {
509: return $this->regexExpressionHelper->resolvePatternConcat($patternExpr, $scope);
510: }
511:
512: return $scope->getType($patternExpr);
513: }
514:
515: }
516: