1: <?php declare(strict_types = 1);
2:
3: namespace PHPStan\Type\Php;
4:
5: use PhpParser\Node\Expr;
6: use PHPStan\Analyser\Scope;
7: use PHPStan\Php\PhpVersion;
8: use PHPStan\TrinaryLogic;
9: use PHPStan\Type\Accessory\AccessoryArrayListType;
10: use PHPStan\Type\ArrayType;
11: use PHPStan\Type\Constant\ConstantArrayTypeBuilder;
12: use PHPStan\Type\Constant\ConstantIntegerType;
13: use PHPStan\Type\Constant\ConstantStringType;
14: use PHPStan\Type\IntegerRangeType;
15: use PHPStan\Type\IntegerType;
16: use PHPStan\Type\NullType;
17: use PHPStan\Type\Regex\RegexCapturingGroup;
18: use PHPStan\Type\Regex\RegexExpressionHelper;
19: use PHPStan\Type\Regex\RegexGroupList;
20: use PHPStan\Type\Regex\RegexGroupParser;
21: use PHPStan\Type\StringType;
22: use PHPStan\Type\Type;
23: use PHPStan\Type\TypeCombinator;
24: use function count;
25: use function in_array;
26: use function is_string;
27: use const PREG_OFFSET_CAPTURE;
28: use const PREG_PATTERN_ORDER;
29: use const PREG_SET_ORDER;
30: use const PREG_UNMATCHED_AS_NULL;
31:
32: /**
33: * @api
34: */
35: final class RegexArrayShapeMatcher
36: {
37:
38: /**
39: * Pass this into $flagsType as well if the library supports emulating PREG_UNMATCHED_AS_NULL on PHP 7.2 and 7.3
40: */
41: public const PREG_UNMATCHED_AS_NULL_ON_72_73 = 2048;
42:
43: public function __construct(
44: private RegexGroupParser $regexGroupParser,
45: private RegexExpressionHelper $regexExpressionHelper,
46: private PhpVersion $phpVersion,
47: )
48: {
49: }
50:
51: public function matchAllExpr(Expr $patternExpr, ?Type $flagsType, TrinaryLogic $wasMatched, Scope $scope): ?Type
52: {
53: return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched, true);
54: }
55:
56: public function matchExpr(Expr $patternExpr, ?Type $flagsType, TrinaryLogic $wasMatched, Scope $scope): ?Type
57: {
58: return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched, false);
59: }
60:
61: private function matchPatternType(Type $patternType, ?Type $flagsType, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
62: {
63: if ($wasMatched->no()) {
64: return ConstantArrayTypeBuilder::createEmpty()->getArray();
65: }
66:
67: $constantStrings = $patternType->getConstantStrings();
68: if (count($constantStrings) === 0) {
69: return null;
70: }
71:
72: $flags = null;
73: if ($flagsType !== null) {
74: if (!$flagsType instanceof ConstantIntegerType) {
75: return null;
76: }
77:
78: /** @var int-mask<PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER | PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73> $flags */
79: $flags = $flagsType->getValue() & (PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER | PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73);
80:
81: // some other unsupported/unexpected flag was passed in
82: if ($flags !== $flagsType->getValue()) {
83: return null;
84: }
85: }
86:
87: $matchedTypes = [];
88: foreach ($constantStrings as $constantString) {
89: $matched = $this->matchRegex($constantString->getValue(), $flags, $wasMatched, $matchesAll);
90: if ($matched === null) {
91: return null;
92: }
93:
94: $matchedTypes[] = $matched;
95: }
96:
97: if (count($matchedTypes) === 1) {
98: return $matchedTypes[0];
99: }
100:
101: return TypeCombinator::union(...$matchedTypes);
102: }
103:
104: /**
105: * @param int-mask<PREG_OFFSET_CAPTURE|PREG_PATTERN_ORDER|PREG_SET_ORDER|PREG_UNMATCHED_AS_NULL|self::PREG_UNMATCHED_AS_NULL_ON_72_73>|null $flags
106: */
107: private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
108: {
109: $astWalkResult = $this->regexGroupParser->parseGroups($regex);
110: if ($astWalkResult === null) {
111: // regex could not be parsed by Hoa/Regex
112: return null;
113: }
114: $groupList = $astWalkResult->getCapturingGroups();
115: $markVerbs = $astWalkResult->getMarkVerbs();
116: $subjectBaseType = new StringType();
117: if ($wasMatched->yes()) {
118: $subjectBaseType = $astWalkResult->getSubjectBaseType();
119: }
120:
121: $regexGroupList = new RegexGroupList($groupList);
122: $trailingOptionals = $regexGroupList->countTrailingOptionals();
123: $onlyOptionalTopLevelGroup = $regexGroupList->getOnlyOptionalTopLevelGroup();
124: $onlyTopLevelAlternation = $regexGroupList->getOnlyTopLevelAlternation();
125: $flags ??= 0;
126:
127: if (
128: !$matchesAll
129: && $wasMatched->yes()
130: && $onlyOptionalTopLevelGroup !== null
131: ) {
132: // if only one top level capturing optional group exists
133: // we build a more precise tagged union of a empty-match and a match with the group
134: $regexGroupList = $regexGroupList->forceGroupNonOptional($onlyOptionalTopLevelGroup);
135:
136: $combiType = $this->buildArrayType(
137: $subjectBaseType,
138: $regexGroupList,
139: $wasMatched,
140: $trailingOptionals,
141: $flags,
142: $markVerbs,
143: $matchesAll,
144: );
145:
146: if (!$this->containsUnmatchedAsNull($flags, $matchesAll)) {
147: // positive match has a subject but not any capturing group
148: $builder = ConstantArrayTypeBuilder::createEmpty();
149: $builder->setOffsetValueType(new ConstantIntegerType(0), $this->createSubjectValueType($subjectBaseType, $flags, $matchesAll));
150:
151: $combiType = TypeCombinator::union(
152: $builder->getArray(),
153: $combiType,
154: );
155: }
156:
157: return $combiType;
158: } elseif (
159: !$matchesAll
160: && $onlyOptionalTopLevelGroup === null
161: && $onlyTopLevelAlternation !== null
162: && !$wasMatched->no()
163: ) {
164: // if only a single top level alternation exist built a more precise tagged union
165:
166: $combiTypes = [];
167: $isOptionalAlternation = false;
168: foreach ($onlyTopLevelAlternation->getGroupCombinations() as $groupCombo) {
169: $comboList = new RegexGroupList($groupList);
170:
171: $beforeCurrentCombo = true;
172: foreach ($comboList as $group) {
173: if (in_array($group->getId(), $groupCombo, true)) {
174: $isOptionalAlternation = $group->inOptionalAlternation();
175: $comboList = $comboList->forceGroupNonOptional($group);
176: $beforeCurrentCombo = false;
177: } elseif ($beforeCurrentCombo && !$group->resetsGroupCounter()) {
178: $comboList = $comboList->forceGroupTypeAndNonOptional(
179: $group,
180: $this->containsUnmatchedAsNull($flags, $matchesAll) ? new NullType() : new ConstantStringType(''),
181: );
182: } elseif (
183: $group->getAlternationId() === $onlyTopLevelAlternation->getId()
184: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
185: ) {
186: $comboList = $comboList->removeGroup($group);
187: }
188: }
189:
190: $combiType = $this->buildArrayType(
191: $subjectBaseType,
192: $comboList,
193: $wasMatched,
194: $trailingOptionals,
195: $flags,
196: $markVerbs,
197: $matchesAll,
198: );
199:
200: $combiTypes[] = $combiType;
201: }
202:
203: if (
204: !$this->containsUnmatchedAsNull($flags, $matchesAll)
205: && (
206: $onlyTopLevelAlternation->getAlternationsCount() !== count($onlyTopLevelAlternation->getGroupCombinations())
207: || $isOptionalAlternation
208: )
209: ) {
210: // positive match has a subject but not any capturing group
211: $builder = ConstantArrayTypeBuilder::createEmpty();
212: $builder->setOffsetValueType(new ConstantIntegerType(0), $this->createSubjectValueType($subjectBaseType, $flags, $matchesAll));
213:
214: $combiTypes[] = $builder->getArray();
215: }
216:
217: return TypeCombinator::union(...$combiTypes);
218: }
219:
220: // the general case, which should work in all cases but does not yield the most
221: // precise result possible in some cases
222: return $this->buildArrayType(
223: $subjectBaseType,
224: $regexGroupList,
225: $wasMatched,
226: $trailingOptionals,
227: $flags,
228: $markVerbs,
229: $matchesAll,
230: );
231: }
232:
233: /**
234: * @param list<string> $markVerbs
235: */
236: private function buildArrayType(
237: Type $subjectBaseType,
238: RegexGroupList $captureGroups,
239: TrinaryLogic $wasMatched,
240: int $trailingOptionals,
241: int $flags,
242: array $markVerbs,
243: bool $matchesAll,
244: ): Type
245: {
246: $forceList = count($markVerbs) === 0;
247: $builder = ConstantArrayTypeBuilder::createEmpty();
248:
249: // first item in matches contains the overall match.
250: $builder->setOffsetValueType(
251: $this->getKeyType(0),
252: $this->createSubjectValueType($subjectBaseType, $flags, $matchesAll),
253: $this->isSubjectOptional($wasMatched, $matchesAll),
254: );
255:
256: $countGroups = count($captureGroups);
257: $i = 0;
258: foreach ($captureGroups as $captureGroup) {
259: $isTrailingOptional = $i >= $countGroups - $trailingOptionals;
260: $isLastGroup = $i === $countGroups - 1;
261: $groupValueType = $this->createGroupValueType($captureGroup, $wasMatched, $flags, $isTrailingOptional, $isLastGroup, $matchesAll);
262: $optional = $this->isGroupOptional($captureGroup, $wasMatched, $flags, $isTrailingOptional, $matchesAll);
263:
264: if ($captureGroup->isNamed()) {
265: $forceList = false;
266:
267: $builder->setOffsetValueType(
268: $this->getKeyType($captureGroup->getName()),
269: $groupValueType,
270: $optional,
271: );
272: }
273:
274: $builder->setOffsetValueType(
275: $this->getKeyType($i + 1),
276: $groupValueType,
277: $optional,
278: );
279:
280: $i++;
281: }
282:
283: if (count($markVerbs) > 0) {
284: $markTypes = [];
285: foreach ($markVerbs as $mark) {
286: $markTypes[] = new ConstantStringType($mark);
287: }
288: $builder->setOffsetValueType(
289: $this->getKeyType('MARK'),
290: TypeCombinator::union(...$markTypes),
291: true,
292: );
293: }
294:
295: if ($matchesAll && $this->containsSetOrder($flags)) {
296: $arrayType = TypeCombinator::intersect(new ArrayType(new IntegerType(), $builder->getArray()), new AccessoryArrayListType());
297: if (!$wasMatched->yes()) {
298: $arrayType = TypeCombinator::union(
299: ConstantArrayTypeBuilder::createEmpty()->getArray(),
300: $arrayType,
301: );
302: }
303: return $arrayType;
304: }
305:
306: if ($forceList) {
307: return TypeCombinator::intersect($builder->getArray(), new AccessoryArrayListType());
308: }
309:
310: return $builder->getArray();
311: }
312:
313: private function isSubjectOptional(TrinaryLogic $wasMatched, bool $matchesAll): bool
314: {
315: if ($matchesAll) {
316: return false;
317: }
318:
319: return !$wasMatched->yes();
320: }
321:
322: /**
323: * @param Type $baseType A string type (or string variant) representing the subject of the match
324: */
325: private function createSubjectValueType(Type $baseType, int $flags, bool $matchesAll): Type
326: {
327: $subjectValueType = TypeCombinator::removeNull($this->getValueType($baseType, $flags, $matchesAll));
328:
329: if ($matchesAll) {
330: $subjectValueType = TypeCombinator::removeNull($this->getValueType(new StringType(), $flags, $matchesAll));
331:
332: if ($this->containsPatternOrder($flags)) {
333: $subjectValueType = TypeCombinator::intersect(
334: new ArrayType(new IntegerType(), $subjectValueType),
335: new AccessoryArrayListType(),
336: );
337: }
338: }
339:
340: return $subjectValueType;
341: }
342:
343: private function isGroupOptional(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional, bool $matchesAll): bool
344: {
345: if ($matchesAll) {
346: if ($isTrailingOptional && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $this->containsSetOrder($flags)) {
347: return true;
348: }
349:
350: return false;
351: }
352:
353: if (!$wasMatched->yes()) {
354: $optional = true;
355: } else {
356: if (!$isTrailingOptional) {
357: $optional = false;
358: } elseif ($this->containsUnmatchedAsNull($flags, $matchesAll)) {
359: $optional = false;
360: } else {
361: $optional = $captureGroup->isOptional();
362: }
363: }
364:
365: return $optional;
366: }
367:
368: private function createGroupValueType(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional, bool $isLastGroup, bool $matchesAll): Type
369: {
370: if ($matchesAll) {
371: if (
372: (
373: !$this->containsSetOrder($flags)
374: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
375: && $captureGroup->isOptional()
376: )
377: ||
378: (
379: $this->containsSetOrder($flags)
380: && !$this->containsUnmatchedAsNull($flags, $matchesAll)
381: && $captureGroup->isOptional()
382: && !$isTrailingOptional
383: )
384: ) {
385: $groupValueType = $this->getValueType(
386: TypeCombinator::union($captureGroup->getType(), new ConstantStringType('')),
387: $flags,
388: $matchesAll,
389: );
390: $groupValueType = TypeCombinator::removeNull($groupValueType);
391: } else {
392: $groupValueType = $this->getValueType($captureGroup->getType(), $flags, $matchesAll);
393: }
394:
395: if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags, $matchesAll) && !$captureGroup->isOptional()) {
396: $groupValueType = TypeCombinator::removeNull($groupValueType);
397: }
398:
399: if ($this->containsPatternOrder($flags)) {
400: $groupValueType = TypeCombinator::intersect(new ArrayType(new IntegerType(), $groupValueType), new AccessoryArrayListType());
401: }
402:
403: return $groupValueType;
404: }
405:
406: if (!$isLastGroup && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $captureGroup->isOptional()) {
407: $groupValueType = $this->getValueType(
408: TypeCombinator::union($captureGroup->getType(), new ConstantStringType('')),
409: $flags,
410: $matchesAll,
411: );
412: } else {
413: $groupValueType = $this->getValueType($captureGroup->getType(), $flags, $matchesAll);
414: }
415:
416: if ($wasMatched->yes()) {
417: if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags, $matchesAll) && !$captureGroup->isOptional()) {
418: $groupValueType = TypeCombinator::removeNull($groupValueType);
419: }
420: }
421:
422: return $groupValueType;
423: }
424:
425: private function containsOffsetCapture(int $flags): bool
426: {
427: return ($flags & PREG_OFFSET_CAPTURE) !== 0;
428: }
429:
430: private function containsPatternOrder(int $flags): bool
431: {
432: // If no order flag is given, PREG_PATTERN_ORDER is assumed.
433: return !$this->containsSetOrder($flags);
434: }
435:
436: private function containsSetOrder(int $flags): bool
437: {
438: return ($flags & PREG_SET_ORDER) !== 0;
439: }
440:
441: private function containsUnmatchedAsNull(int $flags, bool $matchesAll): bool
442: {
443: if ($matchesAll) {
444: // preg_match_all() with PREG_UNMATCHED_AS_NULL works consistently across php-versions
445: // https://3v4l.org/tKmPn
446: return ($flags & PREG_UNMATCHED_AS_NULL) !== 0;
447: }
448:
449: return ($flags & PREG_UNMATCHED_AS_NULL) !== 0 && (($flags & self::PREG_UNMATCHED_AS_NULL_ON_72_73) !== 0 || $this->phpVersion->supportsPregUnmatchedAsNull());
450: }
451:
452: private function getKeyType(int|string $key): Type
453: {
454: if (is_string($key)) {
455: return new ConstantStringType($key);
456: }
457:
458: return new ConstantIntegerType($key);
459: }
460:
461: private function getValueType(Type $baseType, int $flags, bool $matchesAll): Type
462: {
463: $valueType = $baseType;
464:
465: // unmatched groups return -1 as offset
466: $offsetType = IntegerRangeType::fromInterval(-1, null);
467: if ($this->containsUnmatchedAsNull($flags, $matchesAll)) {
468: $valueType = TypeCombinator::addNull($valueType);
469: }
470:
471: if ($this->containsOffsetCapture($flags)) {
472: $builder = ConstantArrayTypeBuilder::createEmpty();
473:
474: $builder->setOffsetValueType(
475: new ConstantIntegerType(0),
476: $valueType,
477: );
478: $builder->setOffsetValueType(
479: new ConstantIntegerType(1),
480: $offsetType,
481: );
482:
483: return $builder->getArray();
484: }
485:
486: return $valueType;
487: }
488:
489: private function getPatternType(Expr $patternExpr, Scope $scope): Type
490: {
491: if ($patternExpr instanceof Expr\BinaryOp\Concat) {
492: return $this->regexExpressionHelper->resolvePatternConcat($patternExpr, $scope);
493: }
494:
495: return $scope->getType($patternExpr);
496: }
497:
498: }
499: