1: <?php declare(strict_types = 1);
2:
3: namespace PHPStan\Type\Php;
4:
5: use PhpParser\Node\Expr;
6: use PHPStan\Analyser\Scope;
7: use PHPStan\DependencyInjection\AutowiredService;
8: use PHPStan\Php\PhpVersion;
9: use PHPStan\TrinaryLogic;
10: use PHPStan\Type\Accessory\AccessoryArrayListType;
11: use PHPStan\Type\ArrayType;
12: use PHPStan\Type\Constant\ConstantArrayTypeBuilder;
13: use PHPStan\Type\Constant\ConstantIntegerType;
14: use PHPStan\Type\Constant\ConstantStringType;
15: use PHPStan\Type\IntegerRangeType;
16: use PHPStan\Type\IntegerType;
17: use PHPStan\Type\IntersectionType;
18: use PHPStan\Type\NullType;
19: use PHPStan\Type\Regex\RegexCapturingGroup;
20: use PHPStan\Type\Regex\RegexExpressionHelper;
21: use PHPStan\Type\Regex\RegexGroupList;
22: use PHPStan\Type\Regex\RegexGroupParser;
23: use PHPStan\Type\StringType;
24: use PHPStan\Type\Type;
25: use PHPStan\Type\TypeCombinator;
26: use function count;
27: use function in_array;
28: use function is_string;
29: use const PREG_OFFSET_CAPTURE;
30: use const PREG_PATTERN_ORDER;
31: use const PREG_SET_ORDER;
32: use const PREG_UNMATCHED_AS_NULL;
33:
34: /**
35: * @api
36: */
37: #[AutowiredService]
38: final class RegexArrayShapeMatcher
39: {
40:
41: /**
42: * Pass this into $flagsType as well if the library supports emulating PREG_UNMATCHED_AS_NULL on PHP 7.2 and 7.3
43: */
44: public const PREG_UNMATCHED_AS_NULL_ON_72_73 = 2048;
45:
46: public function __construct(
47: private RegexGroupParser $regexGroupParser,
48: private RegexExpressionHelper $regexExpressionHelper,
49: private PhpVersion $phpVersion,
50: )
51: {
52: }
53:
54: public function matchAllExpr(Expr $patternExpr, ?Type $flagsType, TrinaryLogic $wasMatched, Scope $scope): ?Type
55: {
56: return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched, true);
57: }
58:
59: public function matchExpr(Expr $patternExpr, ?Type $flagsType, TrinaryLogic $wasMatched, Scope $scope): ?Type
60: {
61: return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched, false);
62: }
63:
64: private function matchPatternType(Type $patternType, ?Type $flagsType, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
65: {
66: if ($wasMatched->no()) {
67: return ConstantArrayTypeBuilder::createEmpty()->getArray();
68: }
69:
70: $constantStrings = $patternType->getConstantStrings();
71: if (count($constantStrings) === 0) {
72: return null;
73: }
74:
75: $flags = null;
76: if ($flagsType !== null) {
77: if (!$flagsType instanceof ConstantIntegerType) {
78: return null;
79: }
80:
81: /** @var int-mask<PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER | PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73> $flags */
82: $flags = $flagsType->getValue() & (PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER | PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73);
83:
84: // some other unsupported/unexpected flag was passed in
85: if ($flags !== $flagsType->getValue()) {
86: return null;
87: }
88: }
89:
90: $matchedTypes = [];
91: foreach ($constantStrings as $constantString) {
92: $matched = $this->matchRegex($constantString->getValue(), $flags, $wasMatched, $matchesAll);
93: if ($matched === null) {
94: return null;
95: }
96:
97: $matchedTypes[] = $matched;
98: }
99:
100: if (count($matchedTypes) === 1) {
101: return $matchedTypes[0];
102: }
103:
104: return TypeCombinator::union(...$matchedTypes);
105: }
106:
107: /**
108: * @param int-mask<PREG_OFFSET_CAPTURE|PREG_PATTERN_ORDER|PREG_SET_ORDER|PREG_UNMATCHED_AS_NULL|self::PREG_UNMATCHED_AS_NULL_ON_72_73>|null $flags
109: */
110: private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
111: {
112: $astWalkResult = $this->regexGroupParser->parseGroups($regex);
113: if ($astWalkResult === null) {
114: // regex could not be parsed by Hoa/Regex
115: return null;
116: }
117: $groupList = $astWalkResult->getCapturingGroups();
118: $markVerbs = $astWalkResult->getMarkVerbs();
119: $subjectBaseType = new StringType();
120: if ($wasMatched->yes()) {
121: $subjectBaseType = $astWalkResult->getSubjectBaseType();
122: }
123:
124: $regexGroupList = new RegexGroupList($groupList);
125: $trailingOptionals = $regexGroupList->countTrailingOptionals();
126: $onlyOptionalTopLevelGroup = $regexGroupList->getOnlyOptionalTopLevelGroup();
127: $onlyTopLevelAlternation = $regexGroupList->getOnlyTopLevelAlternation();
128: $flags ??= 0;
129:
130: if (
131: !$matchesAll
132: && $wasMatched->yes()
133: && $onlyOptionalTopLevelGroup !== null
134: ) {
135: // if only one top level capturing optional group exists
136: // we build a more precise tagged union of a empty-match and a match with the group
137: $regexGroupList = $regexGroupList->forceGroupNonOptional($onlyOptionalTopLevelGroup);
138:
139: $combiType = $this->buildArrayType(
140: $subjectBaseType,
141: $regexGroupList,
142: $wasMatched,
143: $trailingOptionals,
144: $flags,
145: $markVerbs,
146: $matchesAll,
147: );
148:
149: if (!$this->containsUnmatchedAsNull($flags, $matchesAll)) {
150: // positive match has a subject but not any capturing group
151: $builder = ConstantArrayTypeBuilder::createEmpty();
152: $builder->setOffsetValueType(new ConstantIntegerType(0), $this->createSubjectValueType($subjectBaseType, $flags, $matchesAll));
153:
154: $combiType = TypeCombinator::union(
155: $builder->getArray(),
156: $combiType,
157: );
158: }
159:
160: return $combiType;
161: } elseif (
162: !$matchesAll
163: && $onlyOptionalTopLevelGroup === null
164: && $onlyTopLevelAlternation !== null
165: && !$wasMatched->no()
166: ) {
167: // if only a single top level alternation exist built a more precise tagged union
168:
169: $combiTypes = [];
170: $isOptionalAlternation = false;
171: foreach ($onlyTopLevelAlternation->getGroupCombinations() as $groupCombo) {
172: $comboList = new RegexGroupList($groupList);
173:
174: $beforeCurrentCombo = true;
175: foreach ($comboList as $group) {
176: if (in_array($group->getId(), $groupCombo, true)) {
177: $isOptionalAlternation = $group->inOptionalAlternation();
178: $comboList = $comboList->forceGroupNonOptional($group);
179: $beforeCurrentCombo = false;
180: } elseif ($beforeCurrentCombo && !$group->resetsGroupCounter()) {
181: $comboList = $comboList->forceGroupTypeAndNonOptional(
182: $group,
183: $this->containsUnmatchedAsNull($flags, $matchesAll) ? new NullType() : new ConstantStringType(''),
184: );
185: } elseif (
186: $group->getAlternationId() === $onlyTopLevelAlternation->getId()
187: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
188: ) {
189: $comboList = $comboList->removeGroup($group);
190: }
191: }
192:
193: $combiType = $this->buildArrayType(
194: $subjectBaseType,
195: $comboList,
196: $wasMatched,
197: $trailingOptionals,
198: $flags,
199: $markVerbs,
200: $matchesAll,
201: );
202:
203: $combiTypes[] = $combiType;
204: }
205:
206: if (
207: !$this->containsUnmatchedAsNull($flags, $matchesAll)
208: && (
209: $onlyTopLevelAlternation->getAlternationsCount() !== count($onlyTopLevelAlternation->getGroupCombinations())
210: || $isOptionalAlternation
211: )
212: ) {
213: // positive match has a subject but not any capturing group
214: $builder = ConstantArrayTypeBuilder::createEmpty();
215: $builder->setOffsetValueType(new ConstantIntegerType(0), $this->createSubjectValueType($subjectBaseType, $flags, $matchesAll));
216:
217: $combiTypes[] = $builder->getArray();
218: }
219:
220: return TypeCombinator::union(...$combiTypes);
221: }
222:
223: // the general case, which should work in all cases but does not yield the most
224: // precise result possible in some cases
225: return $this->buildArrayType(
226: $subjectBaseType,
227: $regexGroupList,
228: $wasMatched,
229: $trailingOptionals,
230: $flags,
231: $markVerbs,
232: $matchesAll,
233: );
234: }
235:
236: /**
237: * @param list<string> $markVerbs
238: */
239: private function buildArrayType(
240: Type $subjectBaseType,
241: RegexGroupList $captureGroups,
242: TrinaryLogic $wasMatched,
243: int $trailingOptionals,
244: int $flags,
245: array $markVerbs,
246: bool $matchesAll,
247: ): Type
248: {
249: $forceList = count($markVerbs) === 0;
250: $builder = ConstantArrayTypeBuilder::createEmpty();
251:
252: // first item in matches contains the overall match.
253: $builder->setOffsetValueType(
254: $this->getKeyType(0),
255: $this->createSubjectValueType($subjectBaseType, $flags, $matchesAll),
256: $this->isSubjectOptional($wasMatched, $matchesAll),
257: );
258:
259: $countGroups = count($captureGroups);
260: $i = 0;
261: foreach ($captureGroups as $captureGroup) {
262: $isTrailingOptional = $i >= $countGroups - $trailingOptionals;
263: $isLastGroup = $i === $countGroups - 1;
264: $groupValueType = $this->createGroupValueType($captureGroup, $wasMatched, $flags, $isTrailingOptional, $isLastGroup, $matchesAll);
265: $optional = $this->isGroupOptional($captureGroup, $wasMatched, $flags, $isTrailingOptional, $matchesAll);
266:
267: if ($captureGroup->isNamed()) {
268: $forceList = false;
269:
270: $builder->setOffsetValueType(
271: $this->getKeyType($captureGroup->getName()),
272: $groupValueType,
273: $optional,
274: );
275: }
276:
277: $builder->setOffsetValueType(
278: $this->getKeyType($i + 1),
279: $groupValueType,
280: $optional,
281: );
282:
283: $i++;
284: }
285:
286: if (count($markVerbs) > 0) {
287: $markTypes = [];
288: foreach ($markVerbs as $mark) {
289: $markTypes[] = new ConstantStringType($mark);
290: }
291: $builder->setOffsetValueType(
292: $this->getKeyType('MARK'),
293: TypeCombinator::union(...$markTypes),
294: true,
295: );
296: }
297:
298: if ($matchesAll && $this->containsSetOrder($flags)) {
299: return new IntersectionType([new ArrayType(IntegerRangeType::createAllGreaterThanOrEqualTo(0), $builder->getArray()), new AccessoryArrayListType()]);
300: }
301:
302: if ($forceList) {
303: return TypeCombinator::intersect($builder->getArray(), new AccessoryArrayListType());
304: }
305:
306: return $builder->getArray();
307: }
308:
309: private function isSubjectOptional(TrinaryLogic $wasMatched, bool $matchesAll): bool
310: {
311: if ($matchesAll) {
312: return false;
313: }
314:
315: return !$wasMatched->yes();
316: }
317:
318: /**
319: * @param Type $baseType A string type (or string variant) representing the subject of the match
320: */
321: private function createSubjectValueType(Type $baseType, int $flags, bool $matchesAll): Type
322: {
323: $subjectValueType = TypeCombinator::removeNull($this->getValueType($baseType, $flags, $matchesAll));
324:
325: if ($matchesAll) {
326: $subjectValueType = TypeCombinator::removeNull($this->getValueType(new StringType(), $flags, $matchesAll));
327:
328: if ($this->containsPatternOrder($flags)) {
329: $subjectValueType = new IntersectionType([
330: new ArrayType(new IntegerType(), $subjectValueType),
331: new AccessoryArrayListType(),
332: ]);
333: }
334: }
335:
336: return $subjectValueType;
337: }
338:
339: private function isGroupOptional(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional, bool $matchesAll): bool
340: {
341: if ($matchesAll) {
342: if ($isTrailingOptional && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $this->containsSetOrder($flags)) {
343: return true;
344: }
345:
346: return false;
347: }
348:
349: if (!$wasMatched->yes()) {
350: $optional = true;
351: } else {
352: if (!$isTrailingOptional) {
353: $optional = false;
354: } elseif ($this->containsUnmatchedAsNull($flags, $matchesAll)) {
355: $optional = false;
356: } else {
357: $optional = $captureGroup->isOptional();
358: }
359: }
360:
361: return $optional;
362: }
363:
364: private function createGroupValueType(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional, bool $isLastGroup, bool $matchesAll): Type
365: {
366: if ($matchesAll) {
367: if (
368: (
369: !$this->containsSetOrder($flags)
370: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
371: && $captureGroup->isOptional()
372: )
373: ||
374: (
375: $this->containsSetOrder($flags)
376: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
377: && $captureGroup->isOptional()
378: && !$isTrailingOptional
379: )
380: ) {
381: $groupValueType = $this->getValueType(
382: TypeCombinator::union($captureGroup->getType(), new ConstantStringType('')),
383: $flags,
384: $matchesAll,
385: );
386: $groupValueType = TypeCombinator::removeNull($groupValueType);
387: } else {
388: $groupValueType = $this->getValueType($captureGroup->getType(), $flags, $matchesAll);
389: }
390:
391: if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags, $matchesAll) && !$captureGroup->isOptional()) {
392: $groupValueType = TypeCombinator::removeNull($groupValueType);
393: }
394:
395: if ($this->containsPatternOrder($flags)) {
396: $groupValueType = new IntersectionType([new ArrayType(IntegerRangeType::createAllGreaterThanOrEqualTo(0), $groupValueType), new AccessoryArrayListType()]);
397: }
398:
399: return $groupValueType;
400: }
401:
402: if (!$isLastGroup && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $captureGroup->isOptional()) {
403: $groupValueType = $this->getValueType(
404: TypeCombinator::union($captureGroup->getType(), new ConstantStringType('')),
405: $flags,
406: $matchesAll,
407: );
408: } else {
409: $groupValueType = $this->getValueType($captureGroup->getType(), $flags, $matchesAll);
410: }
411:
412: if ($wasMatched->yes()) {
413: if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags, $matchesAll) && !$captureGroup->isOptional()) {
414: $groupValueType = TypeCombinator::removeNull($groupValueType);
415: }
416: }
417:
418: return $groupValueType;
419: }
420:
421: private function containsOffsetCapture(int $flags): bool
422: {
423: return ($flags & PREG_OFFSET_CAPTURE) !== 0;
424: }
425:
426: private function containsPatternOrder(int $flags): bool
427: {
428: // If no order flag is given, PREG_PATTERN_ORDER is assumed.
429: return !$this->containsSetOrder($flags);
430: }
431:
432: private function containsSetOrder(int $flags): bool
433: {
434: return ($flags & PREG_SET_ORDER) !== 0;
435: }
436:
437: private function containsUnmatchedAsNull(int $flags, bool $matchesAll): bool
438: {
439: if ($matchesAll) {
440: // preg_match_all() with PREG_UNMATCHED_AS_NULL works consistently across php-versions
441: // https://3v4l.org/tKmPn
442: return ($flags & PREG_UNMATCHED_AS_NULL) !== 0;
443: }
444:
445: return ($flags & PREG_UNMATCHED_AS_NULL) !== 0 && (($flags & self::PREG_UNMATCHED_AS_NULL_ON_72_73) !== 0 || $this->phpVersion->supportsPregUnmatchedAsNull());
446: }
447:
448: private function getKeyType(int|string $key): Type
449: {
450: if (is_string($key)) {
451: return new ConstantStringType($key);
452: }
453:
454: return new ConstantIntegerType($key);
455: }
456:
457: private function getValueType(Type $baseType, int $flags, bool $matchesAll): Type
458: {
459: $valueType = $baseType;
460:
461: // unmatched groups return -1 as offset
462: $offsetType = IntegerRangeType::fromInterval(-1, null);
463: if ($this->containsUnmatchedAsNull($flags, $matchesAll)) {
464: $valueType = TypeCombinator::addNull($valueType);
465: }
466:
467: if ($this->containsOffsetCapture($flags)) {
468: $builder = ConstantArrayTypeBuilder::createEmpty();
469:
470: $builder->setOffsetValueType(
471: new ConstantIntegerType(0),
472: $valueType,
473: );
474: $builder->setOffsetValueType(
475: new ConstantIntegerType(1),
476: $offsetType,
477: );
478:
479: return $builder->getArray();
480: }
481:
482: return $valueType;
483: }
484:
485: private function getPatternType(Expr $patternExpr, Scope $scope): Type
486: {
487: if ($patternExpr instanceof Expr\BinaryOp\Concat) {
488: return $this->regexExpressionHelper->resolvePatternConcat($patternExpr, $scope);
489: }
490:
491: return $scope->getType($patternExpr);
492: }
493:
494: }
495: