1: <?php declare(strict_types = 1);
2:
3: namespace PHPStan\Type\Php;
4:
5: use Hoa\Compiler\Llk\Llk;
6: use Hoa\Compiler\Llk\Parser;
7: use Hoa\Compiler\Llk\TreeNode;
8: use Hoa\Exception\Exception;
9: use Hoa\File\Read;
10: use PHPStan\TrinaryLogic;
11: use PHPStan\Type\Constant\ConstantArrayType;
12: use PHPStan\Type\Constant\ConstantArrayTypeBuilder;
13: use PHPStan\Type\Constant\ConstantIntegerType;
14: use PHPStan\Type\Constant\ConstantStringType;
15: use PHPStan\Type\IntegerRangeType;
16: use PHPStan\Type\StringType;
17: use PHPStan\Type\Type;
18: use PHPStan\Type\TypeCombinator;
19: use function array_key_exists;
20: use function array_reverse;
21: use function count;
22: use function in_array;
23: use function is_string;
24: use function str_contains;
25: use const PREG_OFFSET_CAPTURE;
26: use const PREG_UNMATCHED_AS_NULL;
27:
28: /**
29: * @api
30: */
31: final class RegexArrayShapeMatcher
32: {
33:
34: private static ?Parser $parser = null;
35:
36: public function matchType(Type $patternType, ?Type $flagsType, TrinaryLogic $wasMatched): ?Type
37: {
38: if ($wasMatched->no()) {
39: return new ConstantArrayType([], []);
40: }
41:
42: $constantStrings = $patternType->getConstantStrings();
43: if (count($constantStrings) === 0) {
44: return null;
45: }
46:
47: $flags = null;
48: if ($flagsType !== null) {
49: if (
50: !$flagsType instanceof ConstantIntegerType
51: || !in_array($flagsType->getValue(), [PREG_OFFSET_CAPTURE, PREG_UNMATCHED_AS_NULL, PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL], true)
52: ) {
53: return null;
54: }
55:
56: $flags = $flagsType->getValue();
57: }
58:
59: $matchedTypes = [];
60: foreach ($constantStrings as $constantString) {
61: $matched = $this->matchRegex($constantString->getValue(), $flags, $wasMatched);
62: if ($matched === null) {
63: return null;
64: }
65:
66: $matchedTypes[] = $matched;
67: }
68:
69: if (count($matchedTypes) === 1) {
70: return $matchedTypes[0];
71: }
72:
73: return TypeCombinator::union(...$matchedTypes);
74: }
75:
76: /**
77: * @param int-mask<PREG_OFFSET_CAPTURE|PREG_UNMATCHED_AS_NULL>|null $flags
78: */
79: private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched): ?Type
80: {
81: $parseResult = $this->parseGroups($regex);
82: if ($parseResult === null) {
83: // regex could not be parsed by Hoa/Regex
84: return null;
85: }
86: [$groupList, $groupCombinations] = $parseResult;
87:
88: $trailingOptionals = 0;
89: foreach (array_reverse($groupList) as $captureGroup) {
90: if (!$captureGroup->isOptional()) {
91: break;
92: }
93: $trailingOptionals++;
94: }
95:
96: $valueType = $this->getValueType($flags ?? 0);
97: $onlyOptionalTopLevelGroup = $this->getOnlyOptionalTopLevelGroup($groupList);
98: $onlyTopLevelAlternationId = $this->getOnlyTopLevelAlternationId($groupList);
99:
100: if (
101: $wasMatched->yes()
102: && $onlyOptionalTopLevelGroup !== null
103: ) {
104: // if only one top level capturing optional group exists
105: // we build a more precise constant union of a empty-match and a match with the group
106:
107: $onlyOptionalTopLevelGroup->forceNonOptional();
108:
109: $combiType = $this->buildArrayType(
110: $groupList,
111: $valueType,
112: $wasMatched,
113: $trailingOptionals,
114: );
115:
116: return TypeCombinator::union(
117: new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()], [0], [], true),
118: $combiType,
119: );
120: } elseif (
121: $wasMatched->yes()
122: && $onlyTopLevelAlternationId !== null
123: && array_key_exists($onlyTopLevelAlternationId, $groupCombinations)
124: ) {
125: $combiTypes = [];
126: $isOptionalAlternation = false;
127: foreach ($groupCombinations[$onlyTopLevelAlternationId] as $groupCombo) {
128: $comboList = $groupList;
129:
130: $beforeCurrentCombo = true;
131: foreach ($comboList as $groupId => $group) {
132: if (in_array($groupId, $groupCombo, true)) {
133: $isOptionalAlternation = $group->inOptionalAlternation();
134: $group->forceNonOptional();
135: $beforeCurrentCombo = false;
136: } elseif ($beforeCurrentCombo && !$group->resetsGroupCounter()) {
137: $group->forceNonOptional();
138: } elseif ($group->getAlternationId() === $onlyTopLevelAlternationId) {
139: unset($comboList[$groupId]);
140: }
141: }
142:
143: $combiType = $this->buildArrayType(
144: $comboList,
145: $valueType,
146: $wasMatched,
147: $trailingOptionals,
148: );
149:
150: $combiTypes[] = $combiType;
151:
152: foreach ($groupCombo as $groupId) {
153: $group = $comboList[$groupId];
154: $group->restoreNonOptional();
155: }
156: }
157:
158: if ($isOptionalAlternation) {
159: $combiTypes[] = new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()], [0], [], true);
160: }
161:
162: return TypeCombinator::union(...$combiTypes);
163: }
164:
165: return $this->buildArrayType(
166: $groupList,
167: $valueType,
168: $wasMatched,
169: $trailingOptionals,
170: );
171: }
172:
173: /**
174: * @param array<int, RegexCapturingGroup> $captureGroups
175: */
176: private function getOnlyOptionalTopLevelGroup(array $captureGroups): ?RegexCapturingGroup
177: {
178: $group = null;
179: foreach ($captureGroups as $captureGroup) {
180: if (!$captureGroup->isTopLevel()) {
181: continue;
182: }
183:
184: if (!$captureGroup->isOptional()) {
185: return null;
186: }
187:
188: if ($group !== null) {
189: return null;
190: }
191:
192: $group = $captureGroup;
193: }
194:
195: return $group;
196: }
197:
198: /**
199: * @param array<int, RegexCapturingGroup> $captureGroups
200: */
201: private function getOnlyTopLevelAlternationId(array $captureGroups): ?int
202: {
203: $alternationId = null;
204: foreach ($captureGroups as $captureGroup) {
205: if (!$captureGroup->isTopLevel()) {
206: continue;
207: }
208:
209: if (!$captureGroup->inAlternation()) {
210: return null;
211: }
212:
213: if ($alternationId === null) {
214: $alternationId = $captureGroup->getAlternationId();
215: } elseif ($alternationId !== $captureGroup->getAlternationId()) {
216: return null;
217: }
218: }
219:
220: return $alternationId;
221: }
222:
223: /**
224: * @param array<RegexCapturingGroup> $captureGroups
225: */
226: private function buildArrayType(
227: array $captureGroups,
228: Type $valueType,
229: TrinaryLogic $wasMatched,
230: int $trailingOptionals,
231: ): Type
232: {
233: $builder = ConstantArrayTypeBuilder::createEmpty();
234:
235: // first item in matches contains the overall match.
236: $builder->setOffsetValueType(
237: $this->getKeyType(0),
238: TypeCombinator::removeNull($valueType),
239: !$wasMatched->yes(),
240: );
241:
242: $countGroups = count($captureGroups);
243: $i = 0;
244: foreach ($captureGroups as $captureGroup) {
245: if (!$wasMatched->yes()) {
246: $optional = true;
247: } else {
248: if ($i < $countGroups - $trailingOptionals) {
249: $optional = false;
250: } else {
251: $optional = $captureGroup->isOptional();
252: }
253: }
254:
255: if ($captureGroup->isNamed()) {
256: $builder->setOffsetValueType(
257: $this->getKeyType($captureGroup->getName()),
258: $valueType,
259: $optional,
260: );
261: }
262:
263: $builder->setOffsetValueType(
264: $this->getKeyType($i + 1),
265: $valueType,
266: $optional,
267: );
268:
269: $i++;
270: }
271:
272: return $builder->getArray();
273: }
274:
275: private function getKeyType(int|string $key): Type
276: {
277: if (is_string($key)) {
278: return new ConstantStringType($key);
279: }
280:
281: return new ConstantIntegerType($key);
282: }
283:
284: private function getValueType(int $flags): Type
285: {
286: $valueType = new StringType();
287: $offsetType = IntegerRangeType::fromInterval(0, null);
288: if (($flags & PREG_UNMATCHED_AS_NULL) !== 0) {
289: $valueType = TypeCombinator::addNull($valueType);
290: // unmatched groups return -1 as offset
291: $offsetType = IntegerRangeType::fromInterval(-1, null);
292: }
293:
294: if (($flags & PREG_OFFSET_CAPTURE) !== 0) {
295: $builder = ConstantArrayTypeBuilder::createEmpty();
296:
297: $builder->setOffsetValueType(
298: new ConstantIntegerType(0),
299: $valueType,
300: );
301: $builder->setOffsetValueType(
302: new ConstantIntegerType(1),
303: $offsetType,
304: );
305:
306: return $builder->getArray();
307: }
308:
309: return $valueType;
310: }
311:
312: /**
313: * @return array{array<int, RegexCapturingGroup>, array<int, array<int, int[]>>}|null
314: */
315: private function parseGroups(string $regex): ?array
316: {
317: if (self::$parser === null) {
318: /** @throws void */
319: self::$parser = Llk::load(new Read('hoa://Library/Regex/Grammar.pp'));
320: }
321:
322: try {
323: $ast = self::$parser->parse($regex);
324: } catch (Exception) {
325: return null;
326: }
327:
328: $capturingGroups = [];
329: $groupCombinations = [];
330: $alternationId = -1;
331: $captureGroupId = 100;
332: $this->walkRegexAst(
333: $ast,
334: false,
335: $alternationId,
336: 0,
337: false,
338: null,
339: $captureGroupId,
340: $capturingGroups,
341: $groupCombinations,
342: );
343:
344: return [$capturingGroups, $groupCombinations];
345: }
346:
347: /**
348: * @param array<int, RegexCapturingGroup> $capturingGroups
349: * @param array<int, array<int, int[]>> $groupCombinations
350: */
351: private function walkRegexAst(
352: TreeNode $ast,
353: bool $inAlternation,
354: int &$alternationId,
355: int $combinationIndex,
356: bool $inOptionalQuantification,
357: RegexCapturingGroup|RegexNonCapturingGroup|null $parentGroup,
358: int &$captureGroupId,
359: array &$capturingGroups,
360: array &$groupCombinations,
361: ): void
362: {
363: $group = null;
364: if ($ast->getId() === '#capturing') {
365: $group = new RegexCapturingGroup(
366: $captureGroupId++,
367: null,
368: $inAlternation ? $alternationId : null,
369: $inOptionalQuantification,
370: $parentGroup,
371: );
372: $parentGroup = $group;
373: } elseif ($ast->getId() === '#namedcapturing') {
374: $name = $ast->getChild(0)->getValue()['value'];
375: $group = new RegexCapturingGroup(
376: $captureGroupId++,
377: $name,
378: $inAlternation ? $alternationId : null,
379: $inOptionalQuantification,
380: $parentGroup,
381: );
382: $parentGroup = $group;
383: } elseif ($ast->getId() === '#noncapturing') {
384: $group = new RegexNonCapturingGroup(
385: $inAlternation ? $alternationId : null,
386: $inOptionalQuantification,
387: $parentGroup,
388: false,
389: );
390: $parentGroup = $group;
391: } elseif ($ast->getId() === '#noncapturingreset') {
392: $group = new RegexNonCapturingGroup(
393: $inAlternation ? $alternationId : null,
394: $inOptionalQuantification,
395: $parentGroup,
396: true,
397: );
398: $parentGroup = $group;
399: }
400:
401: $inOptionalQuantification = false;
402: if ($ast->getId() === '#quantification') {
403: $lastChild = $ast->getChild($ast->getChildrenNumber() - 1);
404: $value = $lastChild->getValue();
405:
406: if ($value['token'] === 'n_to_m' && str_contains($value['value'], '{0,')) {
407: $inOptionalQuantification = true;
408: } elseif ($value['token'] === 'zero_or_one') {
409: $inOptionalQuantification = true;
410: } elseif ($value['token'] === 'zero_or_more') {
411: $inOptionalQuantification = true;
412: }
413: }
414:
415: if ($ast->getId() === '#alternation') {
416: $alternationId++;
417: $inAlternation = true;
418: }
419:
420: if ($group instanceof RegexCapturingGroup) {
421: $capturingGroups[$group->getId()] = $group;
422:
423: if (!array_key_exists($alternationId, $groupCombinations)) {
424: $groupCombinations[$alternationId] = [];
425: }
426: if (!array_key_exists($combinationIndex, $groupCombinations[$alternationId])) {
427: $groupCombinations[$alternationId][$combinationIndex] = [];
428: }
429: $groupCombinations[$alternationId][$combinationIndex][] = $group->getId();
430: }
431:
432: foreach ($ast->getChildren() as $child) {
433: $this->walkRegexAst(
434: $child,
435: $inAlternation,
436: $alternationId,
437: $combinationIndex,
438: $inOptionalQuantification,
439: $parentGroup,
440: $captureGroupId,
441: $capturingGroups,
442: $groupCombinations,
443: );
444:
445: if ($ast->getId() !== '#alternation') {
446: continue;
447: }
448:
449: $combinationIndex++;
450: }
451: }
452:
453: }
454: