| 1: | <?php declare(strict_types = 1); |
| 2: | |
| 3: | namespace PHPStan\Type\Php; |
| 4: | |
| 5: | use Hoa\Compiler\Llk\Llk; |
| 6: | use Hoa\Compiler\Llk\Parser; |
| 7: | use Hoa\Compiler\Llk\TreeNode; |
| 8: | use Hoa\Exception\Exception; |
| 9: | use Hoa\File\Read; |
| 10: | use PHPStan\TrinaryLogic; |
| 11: | use PHPStan\Type\Constant\ConstantArrayType; |
| 12: | use PHPStan\Type\Constant\ConstantArrayTypeBuilder; |
| 13: | use PHPStan\Type\Constant\ConstantIntegerType; |
| 14: | use PHPStan\Type\Constant\ConstantStringType; |
| 15: | use PHPStan\Type\IntegerRangeType; |
| 16: | use PHPStan\Type\StringType; |
| 17: | use PHPStan\Type\Type; |
| 18: | use PHPStan\Type\TypeCombinator; |
| 19: | use function array_key_exists; |
| 20: | use function array_reverse; |
| 21: | use function count; |
| 22: | use function in_array; |
| 23: | use function is_string; |
| 24: | use function str_contains; |
| 25: | use const PREG_OFFSET_CAPTURE; |
| 26: | use const PREG_UNMATCHED_AS_NULL; |
| 27: | |
| 28: | |
| 29: | |
| 30: | |
| 31: | final class RegexArrayShapeMatcher |
| 32: | { |
| 33: | |
| 34: | private static ?Parser $parser = null; |
| 35: | |
| 36: | public function matchType(Type $patternType, ?Type $flagsType, TrinaryLogic $wasMatched): ?Type |
| 37: | { |
| 38: | if ($wasMatched->no()) { |
| 39: | return new ConstantArrayType([], []); |
| 40: | } |
| 41: | |
| 42: | $constantStrings = $patternType->getConstantStrings(); |
| 43: | if (count($constantStrings) === 0) { |
| 44: | return null; |
| 45: | } |
| 46: | |
| 47: | $flags = null; |
| 48: | if ($flagsType !== null) { |
| 49: | if ( |
| 50: | !$flagsType instanceof ConstantIntegerType |
| 51: | || !in_array($flagsType->getValue(), [PREG_OFFSET_CAPTURE, PREG_UNMATCHED_AS_NULL, PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL], true) |
| 52: | ) { |
| 53: | return null; |
| 54: | } |
| 55: | |
| 56: | $flags = $flagsType->getValue(); |
| 57: | } |
| 58: | |
| 59: | $matchedTypes = []; |
| 60: | foreach ($constantStrings as $constantString) { |
| 61: | $matched = $this->matchRegex($constantString->getValue(), $flags, $wasMatched); |
| 62: | if ($matched === null) { |
| 63: | return null; |
| 64: | } |
| 65: | |
| 66: | $matchedTypes[] = $matched; |
| 67: | } |
| 68: | |
| 69: | if (count($matchedTypes) === 1) { |
| 70: | return $matchedTypes[0]; |
| 71: | } |
| 72: | |
| 73: | return TypeCombinator::union(...$matchedTypes); |
| 74: | } |
| 75: | |
| 76: | |
| 77: | |
| 78: | |
| 79: | private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched): ?Type |
| 80: | { |
| 81: | $parseResult = $this->parseGroups($regex); |
| 82: | if ($parseResult === null) { |
| 83: | |
| 84: | return null; |
| 85: | } |
| 86: | [$groupList, $groupCombinations] = $parseResult; |
| 87: | |
| 88: | $trailingOptionals = 0; |
| 89: | foreach (array_reverse($groupList) as $captureGroup) { |
| 90: | if (!$captureGroup->isOptional()) { |
| 91: | break; |
| 92: | } |
| 93: | $trailingOptionals++; |
| 94: | } |
| 95: | |
| 96: | $valueType = $this->getValueType($flags ?? 0); |
| 97: | $onlyOptionalTopLevelGroup = $this->getOnlyOptionalTopLevelGroup($groupList); |
| 98: | $onlyTopLevelAlternationId = $this->getOnlyTopLevelAlternationId($groupList); |
| 99: | |
| 100: | if ( |
| 101: | $wasMatched->yes() |
| 102: | && $onlyOptionalTopLevelGroup !== null |
| 103: | ) { |
| 104: | |
| 105: | |
| 106: | |
| 107: | $onlyOptionalTopLevelGroup->forceNonOptional(); |
| 108: | |
| 109: | $combiType = $this->buildArrayType( |
| 110: | $groupList, |
| 111: | $valueType, |
| 112: | $wasMatched, |
| 113: | $trailingOptionals, |
| 114: | ); |
| 115: | |
| 116: | return TypeCombinator::union( |
| 117: | new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()], [0], [], true), |
| 118: | $combiType, |
| 119: | ); |
| 120: | } elseif ( |
| 121: | $wasMatched->yes() |
| 122: | && $onlyTopLevelAlternationId !== null |
| 123: | && array_key_exists($onlyTopLevelAlternationId, $groupCombinations) |
| 124: | ) { |
| 125: | $combiTypes = []; |
| 126: | $isOptionalAlternation = false; |
| 127: | foreach ($groupCombinations[$onlyTopLevelAlternationId] as $groupCombo) { |
| 128: | $comboList = $groupList; |
| 129: | |
| 130: | $beforeCurrentCombo = true; |
| 131: | foreach ($comboList as $groupId => $group) { |
| 132: | if (in_array($groupId, $groupCombo, true)) { |
| 133: | $isOptionalAlternation = $group->inOptionalAlternation(); |
| 134: | $group->forceNonOptional(); |
| 135: | $beforeCurrentCombo = false; |
| 136: | } elseif ($beforeCurrentCombo && !$group->resetsGroupCounter()) { |
| 137: | $group->forceNonOptional(); |
| 138: | } elseif ($group->getAlternationId() === $onlyTopLevelAlternationId) { |
| 139: | unset($comboList[$groupId]); |
| 140: | } |
| 141: | } |
| 142: | |
| 143: | $combiType = $this->buildArrayType( |
| 144: | $comboList, |
| 145: | $valueType, |
| 146: | $wasMatched, |
| 147: | $trailingOptionals, |
| 148: | ); |
| 149: | |
| 150: | $combiTypes[] = $combiType; |
| 151: | |
| 152: | foreach ($groupCombo as $groupId) { |
| 153: | $group = $comboList[$groupId]; |
| 154: | $group->restoreNonOptional(); |
| 155: | } |
| 156: | } |
| 157: | |
| 158: | if ($isOptionalAlternation) { |
| 159: | $combiTypes[] = new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()], [0], [], true); |
| 160: | } |
| 161: | |
| 162: | return TypeCombinator::union(...$combiTypes); |
| 163: | } |
| 164: | |
| 165: | return $this->buildArrayType( |
| 166: | $groupList, |
| 167: | $valueType, |
| 168: | $wasMatched, |
| 169: | $trailingOptionals, |
| 170: | ); |
| 171: | } |
| 172: | |
| 173: | |
| 174: | |
| 175: | |
| 176: | private function getOnlyOptionalTopLevelGroup(array $captureGroups): ?RegexCapturingGroup |
| 177: | { |
| 178: | $group = null; |
| 179: | foreach ($captureGroups as $captureGroup) { |
| 180: | if (!$captureGroup->isTopLevel()) { |
| 181: | continue; |
| 182: | } |
| 183: | |
| 184: | if (!$captureGroup->isOptional()) { |
| 185: | return null; |
| 186: | } |
| 187: | |
| 188: | if ($group !== null) { |
| 189: | return null; |
| 190: | } |
| 191: | |
| 192: | $group = $captureGroup; |
| 193: | } |
| 194: | |
| 195: | return $group; |
| 196: | } |
| 197: | |
| 198: | |
| 199: | |
| 200: | |
| 201: | private function getOnlyTopLevelAlternationId(array $captureGroups): ?int |
| 202: | { |
| 203: | $alternationId = null; |
| 204: | foreach ($captureGroups as $captureGroup) { |
| 205: | if (!$captureGroup->isTopLevel()) { |
| 206: | continue; |
| 207: | } |
| 208: | |
| 209: | if (!$captureGroup->inAlternation()) { |
| 210: | return null; |
| 211: | } |
| 212: | |
| 213: | if ($alternationId === null) { |
| 214: | $alternationId = $captureGroup->getAlternationId(); |
| 215: | } elseif ($alternationId !== $captureGroup->getAlternationId()) { |
| 216: | return null; |
| 217: | } |
| 218: | } |
| 219: | |
| 220: | return $alternationId; |
| 221: | } |
| 222: | |
| 223: | |
| 224: | |
| 225: | |
| 226: | private function buildArrayType( |
| 227: | array $captureGroups, |
| 228: | Type $valueType, |
| 229: | TrinaryLogic $wasMatched, |
| 230: | int $trailingOptionals, |
| 231: | ): Type |
| 232: | { |
| 233: | $builder = ConstantArrayTypeBuilder::createEmpty(); |
| 234: | |
| 235: | |
| 236: | $builder->setOffsetValueType( |
| 237: | $this->getKeyType(0), |
| 238: | TypeCombinator::removeNull($valueType), |
| 239: | !$wasMatched->yes(), |
| 240: | ); |
| 241: | |
| 242: | $countGroups = count($captureGroups); |
| 243: | $i = 0; |
| 244: | foreach ($captureGroups as $captureGroup) { |
| 245: | if (!$wasMatched->yes()) { |
| 246: | $optional = true; |
| 247: | } else { |
| 248: | if ($i < $countGroups - $trailingOptionals) { |
| 249: | $optional = false; |
| 250: | } else { |
| 251: | $optional = $captureGroup->isOptional(); |
| 252: | } |
| 253: | } |
| 254: | |
| 255: | if ($captureGroup->isNamed()) { |
| 256: | $builder->setOffsetValueType( |
| 257: | $this->getKeyType($captureGroup->getName()), |
| 258: | $valueType, |
| 259: | $optional, |
| 260: | ); |
| 261: | } |
| 262: | |
| 263: | $builder->setOffsetValueType( |
| 264: | $this->getKeyType($i + 1), |
| 265: | $valueType, |
| 266: | $optional, |
| 267: | ); |
| 268: | |
| 269: | $i++; |
| 270: | } |
| 271: | |
| 272: | return $builder->getArray(); |
| 273: | } |
| 274: | |
| 275: | private function getKeyType(int|string $key): Type |
| 276: | { |
| 277: | if (is_string($key)) { |
| 278: | return new ConstantStringType($key); |
| 279: | } |
| 280: | |
| 281: | return new ConstantIntegerType($key); |
| 282: | } |
| 283: | |
| 284: | private function getValueType(int $flags): Type |
| 285: | { |
| 286: | $valueType = new StringType(); |
| 287: | $offsetType = IntegerRangeType::fromInterval(0, null); |
| 288: | if (($flags & PREG_UNMATCHED_AS_NULL) !== 0) { |
| 289: | $valueType = TypeCombinator::addNull($valueType); |
| 290: | |
| 291: | $offsetType = IntegerRangeType::fromInterval(-1, null); |
| 292: | } |
| 293: | |
| 294: | if (($flags & PREG_OFFSET_CAPTURE) !== 0) { |
| 295: | $builder = ConstantArrayTypeBuilder::createEmpty(); |
| 296: | |
| 297: | $builder->setOffsetValueType( |
| 298: | new ConstantIntegerType(0), |
| 299: | $valueType, |
| 300: | ); |
| 301: | $builder->setOffsetValueType( |
| 302: | new ConstantIntegerType(1), |
| 303: | $offsetType, |
| 304: | ); |
| 305: | |
| 306: | return $builder->getArray(); |
| 307: | } |
| 308: | |
| 309: | return $valueType; |
| 310: | } |
| 311: | |
| 312: | |
| 313: | |
| 314: | |
| 315: | private function parseGroups(string $regex): ?array |
| 316: | { |
| 317: | if (self::$parser === null) { |
| 318: | |
| 319: | self::$parser = Llk::load(new Read('hoa://Library/Regex/Grammar.pp')); |
| 320: | } |
| 321: | |
| 322: | try { |
| 323: | $ast = self::$parser->parse($regex); |
| 324: | } catch (Exception) { |
| 325: | return null; |
| 326: | } |
| 327: | |
| 328: | $capturingGroups = []; |
| 329: | $groupCombinations = []; |
| 330: | $alternationId = -1; |
| 331: | $captureGroupId = 100; |
| 332: | $this->walkRegexAst( |
| 333: | $ast, |
| 334: | false, |
| 335: | $alternationId, |
| 336: | 0, |
| 337: | false, |
| 338: | null, |
| 339: | $captureGroupId, |
| 340: | $capturingGroups, |
| 341: | $groupCombinations, |
| 342: | ); |
| 343: | |
| 344: | return [$capturingGroups, $groupCombinations]; |
| 345: | } |
| 346: | |
| 347: | |
| 348: | |
| 349: | |
| 350: | |
| 351: | private function walkRegexAst( |
| 352: | TreeNode $ast, |
| 353: | bool $inAlternation, |
| 354: | int &$alternationId, |
| 355: | int $combinationIndex, |
| 356: | bool $inOptionalQuantification, |
| 357: | RegexCapturingGroup|RegexNonCapturingGroup|null $parentGroup, |
| 358: | int &$captureGroupId, |
| 359: | array &$capturingGroups, |
| 360: | array &$groupCombinations, |
| 361: | ): void |
| 362: | { |
| 363: | $group = null; |
| 364: | if ($ast->getId() === '#capturing') { |
| 365: | $group = new RegexCapturingGroup( |
| 366: | $captureGroupId++, |
| 367: | null, |
| 368: | $inAlternation ? $alternationId : null, |
| 369: | $inOptionalQuantification, |
| 370: | $parentGroup, |
| 371: | ); |
| 372: | $parentGroup = $group; |
| 373: | } elseif ($ast->getId() === '#namedcapturing') { |
| 374: | $name = $ast->getChild(0)->getValue()['value']; |
| 375: | $group = new RegexCapturingGroup( |
| 376: | $captureGroupId++, |
| 377: | $name, |
| 378: | $inAlternation ? $alternationId : null, |
| 379: | $inOptionalQuantification, |
| 380: | $parentGroup, |
| 381: | ); |
| 382: | $parentGroup = $group; |
| 383: | } elseif ($ast->getId() === '#noncapturing') { |
| 384: | $group = new RegexNonCapturingGroup( |
| 385: | $inAlternation ? $alternationId : null, |
| 386: | $inOptionalQuantification, |
| 387: | $parentGroup, |
| 388: | false, |
| 389: | ); |
| 390: | $parentGroup = $group; |
| 391: | } elseif ($ast->getId() === '#noncapturingreset') { |
| 392: | $group = new RegexNonCapturingGroup( |
| 393: | $inAlternation ? $alternationId : null, |
| 394: | $inOptionalQuantification, |
| 395: | $parentGroup, |
| 396: | true, |
| 397: | ); |
| 398: | $parentGroup = $group; |
| 399: | } |
| 400: | |
| 401: | $inOptionalQuantification = false; |
| 402: | if ($ast->getId() === '#quantification') { |
| 403: | $lastChild = $ast->getChild($ast->getChildrenNumber() - 1); |
| 404: | $value = $lastChild->getValue(); |
| 405: | |
| 406: | if ($value['token'] === 'n_to_m' && str_contains($value['value'], '{0,')) { |
| 407: | $inOptionalQuantification = true; |
| 408: | } elseif ($value['token'] === 'zero_or_one') { |
| 409: | $inOptionalQuantification = true; |
| 410: | } elseif ($value['token'] === 'zero_or_more') { |
| 411: | $inOptionalQuantification = true; |
| 412: | } |
| 413: | } |
| 414: | |
| 415: | if ($ast->getId() === '#alternation') { |
| 416: | $alternationId++; |
| 417: | $inAlternation = true; |
| 418: | } |
| 419: | |
| 420: | if ($group instanceof RegexCapturingGroup) { |
| 421: | $capturingGroups[$group->getId()] = $group; |
| 422: | |
| 423: | if (!array_key_exists($alternationId, $groupCombinations)) { |
| 424: | $groupCombinations[$alternationId] = []; |
| 425: | } |
| 426: | if (!array_key_exists($combinationIndex, $groupCombinations[$alternationId])) { |
| 427: | $groupCombinations[$alternationId][$combinationIndex] = []; |
| 428: | } |
| 429: | $groupCombinations[$alternationId][$combinationIndex][] = $group->getId(); |
| 430: | } |
| 431: | |
| 432: | foreach ($ast->getChildren() as $child) { |
| 433: | $this->walkRegexAst( |
| 434: | $child, |
| 435: | $inAlternation, |
| 436: | $alternationId, |
| 437: | $combinationIndex, |
| 438: | $inOptionalQuantification, |
| 439: | $parentGroup, |
| 440: | $captureGroupId, |
| 441: | $capturingGroups, |
| 442: | $groupCombinations, |
| 443: | ); |
| 444: | |
| 445: | if ($ast->getId() !== '#alternation') { |
| 446: | continue; |
| 447: | } |
| 448: | |
| 449: | $combinationIndex++; |
| 450: | } |
| 451: | } |
| 452: | |
| 453: | } |
| 454: | |