| 1: | <?php declare(strict_types = 1); |
| 2: | |
| 3: | namespace PHPStan\Type\Php; |
| 4: | |
| 5: | use PhpParser\Node\Expr; |
| 6: | use PHPStan\Analyser\Scope; |
| 7: | use PHPStan\Php\PhpVersion; |
| 8: | use PHPStan\TrinaryLogic; |
| 9: | use PHPStan\Type\Accessory\AccessoryArrayListType; |
| 10: | use PHPStan\Type\ArrayType; |
| 11: | use PHPStan\Type\Constant\ConstantArrayType; |
| 12: | use PHPStan\Type\Constant\ConstantArrayTypeBuilder; |
| 13: | use PHPStan\Type\Constant\ConstantIntegerType; |
| 14: | use PHPStan\Type\Constant\ConstantStringType; |
| 15: | use PHPStan\Type\IntegerRangeType; |
| 16: | use PHPStan\Type\IntegerType; |
| 17: | use PHPStan\Type\NullType; |
| 18: | use PHPStan\Type\Regex\RegexAlternation; |
| 19: | use PHPStan\Type\Regex\RegexCapturingGroup; |
| 20: | use PHPStan\Type\Regex\RegexExpressionHelper; |
| 21: | use PHPStan\Type\Regex\RegexGroupParser; |
| 22: | use PHPStan\Type\StringType; |
| 23: | use PHPStan\Type\Type; |
| 24: | use PHPStan\Type\TypeCombinator; |
| 25: | use function array_reverse; |
| 26: | use function count; |
| 27: | use function in_array; |
| 28: | use function is_string; |
| 29: | use const PREG_OFFSET_CAPTURE; |
| 30: | use const PREG_PATTERN_ORDER; |
| 31: | use const PREG_SET_ORDER; |
| 32: | use const PREG_UNMATCHED_AS_NULL; |
| 33: | |
| 34: | |
| 35: | |
| 36: | |
| 37: | final class RegexArrayShapeMatcher |
| 38: | { |
| 39: | |
| 40: | |
| 41: | |
| 42: | |
| 43: | public const PREG_UNMATCHED_AS_NULL_ON_72_73 = 2048; |
| 44: | |
| 45: | public function __construct( |
| 46: | private RegexGroupParser $regexGroupParser, |
| 47: | private RegexExpressionHelper $regexExpressionHelper, |
| 48: | private PhpVersion $phpVersion, |
| 49: | ) |
| 50: | { |
| 51: | } |
| 52: | |
| 53: | public function matchAllExpr(Expr $patternExpr, ?Type $flagsType, TrinaryLogic $wasMatched, Scope $scope): ?Type |
| 54: | { |
| 55: | return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched, true); |
| 56: | } |
| 57: | |
| 58: | public function matchExpr(Expr $patternExpr, ?Type $flagsType, TrinaryLogic $wasMatched, Scope $scope): ?Type |
| 59: | { |
| 60: | return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched, false); |
| 61: | } |
| 62: | |
| 63: | private function matchPatternType(Type $patternType, ?Type $flagsType, TrinaryLogic $wasMatched, bool $matchesAll): ?Type |
| 64: | { |
| 65: | if ($wasMatched->no()) { |
| 66: | return new ConstantArrayType([], []); |
| 67: | } |
| 68: | |
| 69: | $constantStrings = $patternType->getConstantStrings(); |
| 70: | if (count($constantStrings) === 0) { |
| 71: | return null; |
| 72: | } |
| 73: | |
| 74: | $flags = null; |
| 75: | if ($flagsType !== null) { |
| 76: | if (!$flagsType instanceof ConstantIntegerType) { |
| 77: | return null; |
| 78: | } |
| 79: | |
| 80: | |
| 81: | $flags = $flagsType->getValue() & (PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER | PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73); |
| 82: | |
| 83: | |
| 84: | if ($flags !== $flagsType->getValue()) { |
| 85: | return null; |
| 86: | } |
| 87: | } |
| 88: | |
| 89: | $matchedTypes = []; |
| 90: | foreach ($constantStrings as $constantString) { |
| 91: | $matched = $this->matchRegex($constantString->getValue(), $flags, $wasMatched, $matchesAll); |
| 92: | if ($matched === null) { |
| 93: | return null; |
| 94: | } |
| 95: | |
| 96: | $matchedTypes[] = $matched; |
| 97: | } |
| 98: | |
| 99: | if (count($matchedTypes) === 1) { |
| 100: | return $matchedTypes[0]; |
| 101: | } |
| 102: | |
| 103: | return TypeCombinator::union(...$matchedTypes); |
| 104: | } |
| 105: | |
| 106: | |
| 107: | |
| 108: | |
| 109: | private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched, bool $matchesAll): ?Type |
| 110: | { |
| 111: | $parseResult = $this->regexGroupParser->parseGroups($regex); |
| 112: | if ($parseResult === null) { |
| 113: | |
| 114: | return null; |
| 115: | } |
| 116: | [$groupList, $markVerbs] = $parseResult; |
| 117: | |
| 118: | $trailingOptionals = 0; |
| 119: | foreach (array_reverse($groupList) as $captureGroup) { |
| 120: | if (!$captureGroup->isOptional()) { |
| 121: | break; |
| 122: | } |
| 123: | $trailingOptionals++; |
| 124: | } |
| 125: | |
| 126: | $onlyOptionalTopLevelGroup = $this->getOnlyOptionalTopLevelGroup($groupList); |
| 127: | $onlyTopLevelAlternation = $this->getOnlyTopLevelAlternation($groupList); |
| 128: | $flags ??= 0; |
| 129: | |
| 130: | if ( |
| 131: | !$matchesAll |
| 132: | && $wasMatched->yes() |
| 133: | && $onlyOptionalTopLevelGroup !== null |
| 134: | ) { |
| 135: | |
| 136: | |
| 137: | |
| 138: | $onlyOptionalTopLevelGroup->forceNonOptional(); |
| 139: | |
| 140: | $combiType = $this->buildArrayType( |
| 141: | $groupList, |
| 142: | $wasMatched, |
| 143: | $trailingOptionals, |
| 144: | $flags, |
| 145: | $markVerbs, |
| 146: | $matchesAll, |
| 147: | ); |
| 148: | |
| 149: | if (!$this->containsUnmatchedAsNull($flags, $matchesAll)) { |
| 150: | |
| 151: | $combiType = TypeCombinator::union( |
| 152: | new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [1], [], TrinaryLogic::createYes()), |
| 153: | $combiType, |
| 154: | ); |
| 155: | } |
| 156: | |
| 157: | $onlyOptionalTopLevelGroup->clearOverrides(); |
| 158: | |
| 159: | return $combiType; |
| 160: | } elseif ( |
| 161: | !$matchesAll |
| 162: | && $onlyOptionalTopLevelGroup === null |
| 163: | && $onlyTopLevelAlternation !== null |
| 164: | && !$wasMatched->no() |
| 165: | ) { |
| 166: | |
| 167: | |
| 168: | $combiTypes = []; |
| 169: | $isOptionalAlternation = false; |
| 170: | foreach ($onlyTopLevelAlternation->getGroupCombinations() as $groupCombo) { |
| 171: | $comboList = $groupList; |
| 172: | |
| 173: | $beforeCurrentCombo = true; |
| 174: | foreach ($comboList as $groupId => $group) { |
| 175: | if (in_array($groupId, $groupCombo, true)) { |
| 176: | $isOptionalAlternation = $group->inOptionalAlternation(); |
| 177: | $group->forceNonOptional(); |
| 178: | $beforeCurrentCombo = false; |
| 179: | } elseif ($beforeCurrentCombo && !$group->resetsGroupCounter()) { |
| 180: | $group->forceNonOptional(); |
| 181: | $group->forceType( |
| 182: | $this->containsUnmatchedAsNull($flags, $matchesAll) ? new NullType() : new ConstantStringType(''), |
| 183: | ); |
| 184: | } elseif ( |
| 185: | $group->getAlternationId() === $onlyTopLevelAlternation->getId() |
| 186: | && !$this->containsUnmatchedAsNull($flags, $matchesAll) |
| 187: | ) { |
| 188: | unset($comboList[$groupId]); |
| 189: | } |
| 190: | } |
| 191: | |
| 192: | $combiType = $this->buildArrayType( |
| 193: | $comboList, |
| 194: | $wasMatched, |
| 195: | $trailingOptionals, |
| 196: | $flags, |
| 197: | $markVerbs, |
| 198: | $matchesAll, |
| 199: | ); |
| 200: | |
| 201: | $combiTypes[] = $combiType; |
| 202: | |
| 203: | foreach ($groupCombo as $groupId) { |
| 204: | $group = $comboList[$groupId]; |
| 205: | $group->clearOverrides(); |
| 206: | } |
| 207: | } |
| 208: | |
| 209: | if ( |
| 210: | !$this->containsUnmatchedAsNull($flags, $matchesAll) |
| 211: | && ( |
| 212: | $onlyTopLevelAlternation->getAlternationsCount() !== count($onlyTopLevelAlternation->getGroupCombinations()) |
| 213: | || $isOptionalAlternation |
| 214: | ) |
| 215: | ) { |
| 216: | |
| 217: | $combiTypes[] = new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [1], [], TrinaryLogic::createYes()); |
| 218: | } |
| 219: | |
| 220: | return TypeCombinator::union(...$combiTypes); |
| 221: | } |
| 222: | |
| 223: | |
| 224: | |
| 225: | return $this->buildArrayType( |
| 226: | $groupList, |
| 227: | $wasMatched, |
| 228: | $trailingOptionals, |
| 229: | $flags, |
| 230: | $markVerbs, |
| 231: | $matchesAll, |
| 232: | ); |
| 233: | } |
| 234: | |
| 235: | |
| 236: | |
| 237: | |
| 238: | private function getOnlyOptionalTopLevelGroup(array $captureGroups): ?RegexCapturingGroup |
| 239: | { |
| 240: | $group = null; |
| 241: | foreach ($captureGroups as $captureGroup) { |
| 242: | if (!$captureGroup->isTopLevel()) { |
| 243: | continue; |
| 244: | } |
| 245: | |
| 246: | if (!$captureGroup->isOptional()) { |
| 247: | return null; |
| 248: | } |
| 249: | |
| 250: | if ($group !== null) { |
| 251: | return null; |
| 252: | } |
| 253: | |
| 254: | $group = $captureGroup; |
| 255: | } |
| 256: | |
| 257: | return $group; |
| 258: | } |
| 259: | |
| 260: | |
| 261: | |
| 262: | |
| 263: | private function getOnlyTopLevelAlternation(array $captureGroups): ?RegexAlternation |
| 264: | { |
| 265: | $alternation = null; |
| 266: | foreach ($captureGroups as $captureGroup) { |
| 267: | if (!$captureGroup->isTopLevel()) { |
| 268: | continue; |
| 269: | } |
| 270: | |
| 271: | if (!$captureGroup->inAlternation()) { |
| 272: | return null; |
| 273: | } |
| 274: | |
| 275: | if ($captureGroup->inOptionalQuantification()) { |
| 276: | return null; |
| 277: | } |
| 278: | |
| 279: | if ($alternation === null) { |
| 280: | $alternation = $captureGroup->getAlternation(); |
| 281: | } elseif ($alternation->getId() !== $captureGroup->getAlternation()->getId()) { |
| 282: | return null; |
| 283: | } |
| 284: | } |
| 285: | |
| 286: | return $alternation; |
| 287: | } |
| 288: | |
| 289: | |
| 290: | |
| 291: | |
| 292: | |
| 293: | private function buildArrayType( |
| 294: | array $captureGroups, |
| 295: | TrinaryLogic $wasMatched, |
| 296: | int $trailingOptionals, |
| 297: | int $flags, |
| 298: | array $markVerbs, |
| 299: | bool $matchesAll, |
| 300: | ): Type |
| 301: | { |
| 302: | $builder = ConstantArrayTypeBuilder::createEmpty(); |
| 303: | |
| 304: | |
| 305: | $builder->setOffsetValueType( |
| 306: | $this->getKeyType(0), |
| 307: | $this->createSubjectValueType($flags, $matchesAll), |
| 308: | $this->isSubjectOptional($wasMatched, $matchesAll), |
| 309: | ); |
| 310: | |
| 311: | $countGroups = count($captureGroups); |
| 312: | $i = 0; |
| 313: | foreach ($captureGroups as $captureGroup) { |
| 314: | $isTrailingOptional = $i >= $countGroups - $trailingOptionals; |
| 315: | $isLastGroup = $i === $countGroups - 1; |
| 316: | $groupValueType = $this->createGroupValueType($captureGroup, $wasMatched, $flags, $isTrailingOptional, $isLastGroup, $matchesAll); |
| 317: | $optional = $this->isGroupOptional($captureGroup, $wasMatched, $flags, $isTrailingOptional, $matchesAll); |
| 318: | |
| 319: | if ($captureGroup->isNamed()) { |
| 320: | $builder->setOffsetValueType( |
| 321: | $this->getKeyType($captureGroup->getName()), |
| 322: | $groupValueType, |
| 323: | $optional, |
| 324: | ); |
| 325: | } |
| 326: | |
| 327: | $builder->setOffsetValueType( |
| 328: | $this->getKeyType($i + 1), |
| 329: | $groupValueType, |
| 330: | $optional, |
| 331: | ); |
| 332: | |
| 333: | $i++; |
| 334: | } |
| 335: | |
| 336: | if (count($markVerbs) > 0) { |
| 337: | $markTypes = []; |
| 338: | foreach ($markVerbs as $mark) { |
| 339: | $markTypes[] = new ConstantStringType($mark); |
| 340: | } |
| 341: | $builder->setOffsetValueType( |
| 342: | $this->getKeyType('MARK'), |
| 343: | TypeCombinator::union(...$markTypes), |
| 344: | true, |
| 345: | ); |
| 346: | } |
| 347: | |
| 348: | if ($matchesAll && $this->containsSetOrder($flags)) { |
| 349: | $arrayType = TypeCombinator::intersect(new ArrayType(new IntegerType(), $builder->getArray()), new AccessoryArrayListType()); |
| 350: | if (!$wasMatched->yes()) { |
| 351: | $arrayType = TypeCombinator::union( |
| 352: | new ConstantArrayType([], []), |
| 353: | $arrayType, |
| 354: | ); |
| 355: | } |
| 356: | return $arrayType; |
| 357: | } |
| 358: | |
| 359: | return $builder->getArray(); |
| 360: | } |
| 361: | |
| 362: | private function isSubjectOptional(TrinaryLogic $wasMatched, bool $matchesAll): bool |
| 363: | { |
| 364: | if ($matchesAll) { |
| 365: | return false; |
| 366: | } |
| 367: | |
| 368: | return !$wasMatched->yes(); |
| 369: | } |
| 370: | |
| 371: | private function createSubjectValueType(int $flags, bool $matchesAll): Type |
| 372: | { |
| 373: | $subjectValueType = TypeCombinator::removeNull($this->getValueType(new StringType(), $flags, $matchesAll)); |
| 374: | |
| 375: | if ($matchesAll) { |
| 376: | if ($this->containsPatternOrder($flags)) { |
| 377: | $subjectValueType = TypeCombinator::intersect(new ArrayType(new IntegerType(), $subjectValueType), new AccessoryArrayListType()); |
| 378: | } |
| 379: | } |
| 380: | |
| 381: | return $subjectValueType; |
| 382: | } |
| 383: | |
| 384: | private function isGroupOptional(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional, bool $matchesAll): bool |
| 385: | { |
| 386: | if ($matchesAll) { |
| 387: | if ($isTrailingOptional && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $this->containsSetOrder($flags)) { |
| 388: | return true; |
| 389: | } |
| 390: | |
| 391: | return false; |
| 392: | } |
| 393: | |
| 394: | if (!$wasMatched->yes()) { |
| 395: | $optional = true; |
| 396: | } else { |
| 397: | if (!$isTrailingOptional) { |
| 398: | $optional = false; |
| 399: | } elseif ($this->containsUnmatchedAsNull($flags, $matchesAll)) { |
| 400: | $optional = false; |
| 401: | } else { |
| 402: | $optional = $captureGroup->isOptional(); |
| 403: | } |
| 404: | } |
| 405: | |
| 406: | return $optional; |
| 407: | } |
| 408: | |
| 409: | private function createGroupValueType(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional, bool $isLastGroup, bool $matchesAll): Type |
| 410: | { |
| 411: | if ($matchesAll) { |
| 412: | if ( |
| 413: | ( |
| 414: | !$this->containsSetOrder($flags) |
| 415: | && !$this->containsUnmatchedAsNull($flags, $matchesAll) |
| 416: | && $captureGroup->isOptional() |
| 417: | ) |
| 418: | || |
| 419: | ( |
| 420: | $this->containsSetOrder($flags) |
| 421: | && !$this->containsUnmatchedAsNull($flags, $matchesAll) |
| 422: | && $captureGroup->isOptional() |
| 423: | && !$isTrailingOptional |
| 424: | ) |
| 425: | ) { |
| 426: | $groupValueType = $this->getValueType( |
| 427: | TypeCombinator::union($captureGroup->getType(), new ConstantStringType('')), |
| 428: | $flags, |
| 429: | $matchesAll, |
| 430: | ); |
| 431: | $groupValueType = TypeCombinator::removeNull($groupValueType); |
| 432: | } else { |
| 433: | $groupValueType = $this->getValueType($captureGroup->getType(), $flags, $matchesAll); |
| 434: | } |
| 435: | |
| 436: | if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags, $matchesAll) && !$captureGroup->isOptional()) { |
| 437: | $groupValueType = TypeCombinator::removeNull($groupValueType); |
| 438: | } |
| 439: | |
| 440: | if ($this->containsPatternOrder($flags)) { |
| 441: | $groupValueType = TypeCombinator::intersect(new ArrayType(new IntegerType(), $groupValueType), new AccessoryArrayListType()); |
| 442: | } |
| 443: | |
| 444: | return $groupValueType; |
| 445: | } |
| 446: | |
| 447: | if (!$isLastGroup && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $captureGroup->isOptional()) { |
| 448: | $groupValueType = $this->getValueType( |
| 449: | TypeCombinator::union($captureGroup->getType(), new ConstantStringType('')), |
| 450: | $flags, |
| 451: | $matchesAll, |
| 452: | ); |
| 453: | } else { |
| 454: | $groupValueType = $this->getValueType($captureGroup->getType(), $flags, $matchesAll); |
| 455: | } |
| 456: | |
| 457: | if ($wasMatched->yes()) { |
| 458: | if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags, $matchesAll) && !$captureGroup->isOptional()) { |
| 459: | $groupValueType = TypeCombinator::removeNull($groupValueType); |
| 460: | } |
| 461: | } |
| 462: | |
| 463: | return $groupValueType; |
| 464: | } |
| 465: | |
| 466: | private function containsOffsetCapture(int $flags): bool |
| 467: | { |
| 468: | return ($flags & PREG_OFFSET_CAPTURE) !== 0; |
| 469: | } |
| 470: | |
| 471: | private function containsPatternOrder(int $flags): bool |
| 472: | { |
| 473: | |
| 474: | return !$this->containsSetOrder($flags); |
| 475: | } |
| 476: | |
| 477: | private function containsSetOrder(int $flags): bool |
| 478: | { |
| 479: | return ($flags & PREG_SET_ORDER) !== 0; |
| 480: | } |
| 481: | |
| 482: | private function containsUnmatchedAsNull(int $flags, bool $matchesAll): bool |
| 483: | { |
| 484: | if ($matchesAll) { |
| 485: | |
| 486: | |
| 487: | return ($flags & PREG_UNMATCHED_AS_NULL) !== 0; |
| 488: | } |
| 489: | |
| 490: | return ($flags & PREG_UNMATCHED_AS_NULL) !== 0 && (($flags & self::PREG_UNMATCHED_AS_NULL_ON_72_73) !== 0 || $this->phpVersion->supportsPregUnmatchedAsNull()); |
| 491: | } |
| 492: | |
| 493: | private function getKeyType(int|string $key): Type |
| 494: | { |
| 495: | if (is_string($key)) { |
| 496: | return new ConstantStringType($key); |
| 497: | } |
| 498: | |
| 499: | return new ConstantIntegerType($key); |
| 500: | } |
| 501: | |
| 502: | private function getValueType(Type $baseType, int $flags, bool $matchesAll): Type |
| 503: | { |
| 504: | $valueType = $baseType; |
| 505: | |
| 506: | |
| 507: | $offsetType = IntegerRangeType::fromInterval(-1, null); |
| 508: | if ($this->containsUnmatchedAsNull($flags, $matchesAll)) { |
| 509: | $valueType = TypeCombinator::addNull($valueType); |
| 510: | } |
| 511: | |
| 512: | if ($this->containsOffsetCapture($flags)) { |
| 513: | $builder = ConstantArrayTypeBuilder::createEmpty(); |
| 514: | |
| 515: | $builder->setOffsetValueType( |
| 516: | new ConstantIntegerType(0), |
| 517: | $valueType, |
| 518: | ); |
| 519: | $builder->setOffsetValueType( |
| 520: | new ConstantIntegerType(1), |
| 521: | $offsetType, |
| 522: | ); |
| 523: | |
| 524: | return $builder->getArray(); |
| 525: | } |
| 526: | |
| 527: | return $valueType; |
| 528: | } |
| 529: | |
| 530: | private function getPatternType(Expr $patternExpr, Scope $scope): Type |
| 531: | { |
| 532: | if ($patternExpr instanceof Expr\BinaryOp\Concat) { |
| 533: | return $this->regexExpressionHelper->resolvePatternConcat($patternExpr, $scope); |
| 534: | } |
| 535: | |
| 536: | return $scope->getType($patternExpr); |
| 537: | } |
| 538: | |
| 539: | } |
| 540: | |