1: | <?php declare(strict_types = 1); |
2: | |
3: | namespace PHPStan\Type\Php; |
4: | |
5: | use Hoa\Compiler\Llk\Llk; |
6: | use Hoa\Compiler\Llk\Parser; |
7: | use Hoa\Compiler\Llk\TreeNode; |
8: | use Hoa\Exception\Exception; |
9: | use Hoa\File\Read; |
10: | use PHPStan\TrinaryLogic; |
11: | use PHPStan\Type\Constant\ConstantArrayType; |
12: | use PHPStan\Type\Constant\ConstantArrayTypeBuilder; |
13: | use PHPStan\Type\Constant\ConstantIntegerType; |
14: | use PHPStan\Type\Constant\ConstantStringType; |
15: | use PHPStan\Type\IntegerRangeType; |
16: | use PHPStan\Type\StringType; |
17: | use PHPStan\Type\Type; |
18: | use PHPStan\Type\TypeCombinator; |
19: | use function array_key_exists; |
20: | use function array_reverse; |
21: | use function count; |
22: | use function in_array; |
23: | use function is_string; |
24: | use function str_contains; |
25: | use const PREG_OFFSET_CAPTURE; |
26: | use const PREG_UNMATCHED_AS_NULL; |
27: | |
28: | |
29: | |
30: | |
31: | final class RegexArrayShapeMatcher |
32: | { |
33: | |
34: | private static ?Parser $parser = null; |
35: | |
36: | public function matchType(Type $patternType, ?Type $flagsType, TrinaryLogic $wasMatched): ?Type |
37: | { |
38: | if ($wasMatched->no()) { |
39: | return new ConstantArrayType([], []); |
40: | } |
41: | |
42: | $constantStrings = $patternType->getConstantStrings(); |
43: | if (count($constantStrings) === 0) { |
44: | return null; |
45: | } |
46: | |
47: | $flags = null; |
48: | if ($flagsType !== null) { |
49: | if ( |
50: | !$flagsType instanceof ConstantIntegerType |
51: | || !in_array($flagsType->getValue(), [PREG_OFFSET_CAPTURE, PREG_UNMATCHED_AS_NULL, PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL], true) |
52: | ) { |
53: | return null; |
54: | } |
55: | |
56: | $flags = $flagsType->getValue(); |
57: | } |
58: | |
59: | $matchedTypes = []; |
60: | foreach ($constantStrings as $constantString) { |
61: | $matched = $this->matchRegex($constantString->getValue(), $flags, $wasMatched); |
62: | if ($matched === null) { |
63: | return null; |
64: | } |
65: | |
66: | $matchedTypes[] = $matched; |
67: | } |
68: | |
69: | if (count($matchedTypes) === 1) { |
70: | return $matchedTypes[0]; |
71: | } |
72: | |
73: | return TypeCombinator::union(...$matchedTypes); |
74: | } |
75: | |
76: | |
77: | |
78: | |
79: | private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched): ?Type |
80: | { |
81: | $parseResult = $this->parseGroups($regex); |
82: | if ($parseResult === null) { |
83: | |
84: | return null; |
85: | } |
86: | [$groupList, $groupCombinations] = $parseResult; |
87: | |
88: | $trailingOptionals = 0; |
89: | foreach (array_reverse($groupList) as $captureGroup) { |
90: | if (!$captureGroup->isOptional()) { |
91: | break; |
92: | } |
93: | $trailingOptionals++; |
94: | } |
95: | |
96: | $valueType = $this->getValueType($flags ?? 0); |
97: | $onlyOptionalTopLevelGroup = $this->getOnlyOptionalTopLevelGroup($groupList); |
98: | $onlyTopLevelAlternationId = $this->getOnlyTopLevelAlternationId($groupList); |
99: | |
100: | if ( |
101: | $wasMatched->yes() |
102: | && $onlyOptionalTopLevelGroup !== null |
103: | ) { |
104: | |
105: | |
106: | |
107: | $onlyOptionalTopLevelGroup->forceNonOptional(); |
108: | |
109: | $combiType = $this->buildArrayType( |
110: | $groupList, |
111: | $valueType, |
112: | $wasMatched, |
113: | $trailingOptionals, |
114: | ); |
115: | |
116: | return TypeCombinator::union( |
117: | new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()], [0], [], true), |
118: | $combiType, |
119: | ); |
120: | } elseif ( |
121: | $wasMatched->yes() |
122: | && $onlyTopLevelAlternationId !== null |
123: | && array_key_exists($onlyTopLevelAlternationId, $groupCombinations) |
124: | ) { |
125: | $combiTypes = []; |
126: | $isOptionalAlternation = false; |
127: | foreach ($groupCombinations[$onlyTopLevelAlternationId] as $groupCombo) { |
128: | $comboList = $groupList; |
129: | |
130: | $beforeCurrentCombo = true; |
131: | foreach ($comboList as $groupId => $group) { |
132: | if (in_array($groupId, $groupCombo, true)) { |
133: | $isOptionalAlternation = $group->inOptionalAlternation(); |
134: | $group->forceNonOptional(); |
135: | $beforeCurrentCombo = false; |
136: | } elseif ($beforeCurrentCombo && !$group->resetsGroupCounter()) { |
137: | $group->forceNonOptional(); |
138: | } elseif ($group->getAlternationId() === $onlyTopLevelAlternationId) { |
139: | unset($comboList[$groupId]); |
140: | } |
141: | } |
142: | |
143: | $combiType = $this->buildArrayType( |
144: | $comboList, |
145: | $valueType, |
146: | $wasMatched, |
147: | $trailingOptionals, |
148: | ); |
149: | |
150: | $combiTypes[] = $combiType; |
151: | |
152: | foreach ($groupCombo as $groupId) { |
153: | $group = $comboList[$groupId]; |
154: | $group->restoreNonOptional(); |
155: | } |
156: | } |
157: | |
158: | if ($isOptionalAlternation) { |
159: | $combiTypes[] = new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()], [0], [], true); |
160: | } |
161: | |
162: | return TypeCombinator::union(...$combiTypes); |
163: | } |
164: | |
165: | return $this->buildArrayType( |
166: | $groupList, |
167: | $valueType, |
168: | $wasMatched, |
169: | $trailingOptionals, |
170: | ); |
171: | } |
172: | |
173: | |
174: | |
175: | |
176: | private function getOnlyOptionalTopLevelGroup(array $captureGroups): ?RegexCapturingGroup |
177: | { |
178: | $group = null; |
179: | foreach ($captureGroups as $captureGroup) { |
180: | if (!$captureGroup->isTopLevel()) { |
181: | continue; |
182: | } |
183: | |
184: | if (!$captureGroup->isOptional()) { |
185: | return null; |
186: | } |
187: | |
188: | if ($group !== null) { |
189: | return null; |
190: | } |
191: | |
192: | $group = $captureGroup; |
193: | } |
194: | |
195: | return $group; |
196: | } |
197: | |
198: | |
199: | |
200: | |
201: | private function getOnlyTopLevelAlternationId(array $captureGroups): ?int |
202: | { |
203: | $alternationId = null; |
204: | foreach ($captureGroups as $captureGroup) { |
205: | if (!$captureGroup->isTopLevel()) { |
206: | continue; |
207: | } |
208: | |
209: | if (!$captureGroup->inAlternation()) { |
210: | return null; |
211: | } |
212: | |
213: | if ($alternationId === null) { |
214: | $alternationId = $captureGroup->getAlternationId(); |
215: | } elseif ($alternationId !== $captureGroup->getAlternationId()) { |
216: | return null; |
217: | } |
218: | } |
219: | |
220: | return $alternationId; |
221: | } |
222: | |
223: | |
224: | |
225: | |
226: | private function buildArrayType( |
227: | array $captureGroups, |
228: | Type $valueType, |
229: | TrinaryLogic $wasMatched, |
230: | int $trailingOptionals, |
231: | ): Type |
232: | { |
233: | $builder = ConstantArrayTypeBuilder::createEmpty(); |
234: | |
235: | |
236: | $builder->setOffsetValueType( |
237: | $this->getKeyType(0), |
238: | TypeCombinator::removeNull($valueType), |
239: | !$wasMatched->yes(), |
240: | ); |
241: | |
242: | $countGroups = count($captureGroups); |
243: | $i = 0; |
244: | foreach ($captureGroups as $captureGroup) { |
245: | if (!$wasMatched->yes()) { |
246: | $optional = true; |
247: | } else { |
248: | if ($i < $countGroups - $trailingOptionals) { |
249: | $optional = false; |
250: | } else { |
251: | $optional = $captureGroup->isOptional(); |
252: | } |
253: | } |
254: | |
255: | if ($captureGroup->isNamed()) { |
256: | $builder->setOffsetValueType( |
257: | $this->getKeyType($captureGroup->getName()), |
258: | $valueType, |
259: | $optional, |
260: | ); |
261: | } |
262: | |
263: | $builder->setOffsetValueType( |
264: | $this->getKeyType($i + 1), |
265: | $valueType, |
266: | $optional, |
267: | ); |
268: | |
269: | $i++; |
270: | } |
271: | |
272: | return $builder->getArray(); |
273: | } |
274: | |
275: | private function getKeyType(int|string $key): Type |
276: | { |
277: | if (is_string($key)) { |
278: | return new ConstantStringType($key); |
279: | } |
280: | |
281: | return new ConstantIntegerType($key); |
282: | } |
283: | |
284: | private function getValueType(int $flags): Type |
285: | { |
286: | $valueType = new StringType(); |
287: | $offsetType = IntegerRangeType::fromInterval(0, null); |
288: | if (($flags & PREG_UNMATCHED_AS_NULL) !== 0) { |
289: | $valueType = TypeCombinator::addNull($valueType); |
290: | |
291: | $offsetType = IntegerRangeType::fromInterval(-1, null); |
292: | } |
293: | |
294: | if (($flags & PREG_OFFSET_CAPTURE) !== 0) { |
295: | $builder = ConstantArrayTypeBuilder::createEmpty(); |
296: | |
297: | $builder->setOffsetValueType( |
298: | new ConstantIntegerType(0), |
299: | $valueType, |
300: | ); |
301: | $builder->setOffsetValueType( |
302: | new ConstantIntegerType(1), |
303: | $offsetType, |
304: | ); |
305: | |
306: | return $builder->getArray(); |
307: | } |
308: | |
309: | return $valueType; |
310: | } |
311: | |
312: | |
313: | |
314: | |
315: | private function parseGroups(string $regex): ?array |
316: | { |
317: | if (self::$parser === null) { |
318: | |
319: | self::$parser = Llk::load(new Read('hoa://Library/Regex/Grammar.pp')); |
320: | } |
321: | |
322: | try { |
323: | $ast = self::$parser->parse($regex); |
324: | } catch (Exception) { |
325: | return null; |
326: | } |
327: | |
328: | $capturingGroups = []; |
329: | $groupCombinations = []; |
330: | $alternationId = -1; |
331: | $captureGroupId = 100; |
332: | $this->walkRegexAst( |
333: | $ast, |
334: | false, |
335: | $alternationId, |
336: | 0, |
337: | false, |
338: | null, |
339: | $captureGroupId, |
340: | $capturingGroups, |
341: | $groupCombinations, |
342: | ); |
343: | |
344: | return [$capturingGroups, $groupCombinations]; |
345: | } |
346: | |
347: | |
348: | |
349: | |
350: | |
351: | private function walkRegexAst( |
352: | TreeNode $ast, |
353: | bool $inAlternation, |
354: | int &$alternationId, |
355: | int $combinationIndex, |
356: | bool $inOptionalQuantification, |
357: | RegexCapturingGroup|RegexNonCapturingGroup|null $parentGroup, |
358: | int &$captureGroupId, |
359: | array &$capturingGroups, |
360: | array &$groupCombinations, |
361: | ): void |
362: | { |
363: | $group = null; |
364: | if ($ast->getId() === '#capturing') { |
365: | $group = new RegexCapturingGroup( |
366: | $captureGroupId++, |
367: | null, |
368: | $inAlternation ? $alternationId : null, |
369: | $inOptionalQuantification, |
370: | $parentGroup, |
371: | ); |
372: | $parentGroup = $group; |
373: | } elseif ($ast->getId() === '#namedcapturing') { |
374: | $name = $ast->getChild(0)->getValue()['value']; |
375: | $group = new RegexCapturingGroup( |
376: | $captureGroupId++, |
377: | $name, |
378: | $inAlternation ? $alternationId : null, |
379: | $inOptionalQuantification, |
380: | $parentGroup, |
381: | ); |
382: | $parentGroup = $group; |
383: | } elseif ($ast->getId() === '#noncapturing') { |
384: | $group = new RegexNonCapturingGroup( |
385: | $inAlternation ? $alternationId : null, |
386: | $inOptionalQuantification, |
387: | $parentGroup, |
388: | false, |
389: | ); |
390: | $parentGroup = $group; |
391: | } elseif ($ast->getId() === '#noncapturingreset') { |
392: | $group = new RegexNonCapturingGroup( |
393: | $inAlternation ? $alternationId : null, |
394: | $inOptionalQuantification, |
395: | $parentGroup, |
396: | true, |
397: | ); |
398: | $parentGroup = $group; |
399: | } |
400: | |
401: | $inOptionalQuantification = false; |
402: | if ($ast->getId() === '#quantification') { |
403: | $lastChild = $ast->getChild($ast->getChildrenNumber() - 1); |
404: | $value = $lastChild->getValue(); |
405: | |
406: | if ($value['token'] === 'n_to_m' && str_contains($value['value'], '{0,')) { |
407: | $inOptionalQuantification = true; |
408: | } elseif ($value['token'] === 'zero_or_one') { |
409: | $inOptionalQuantification = true; |
410: | } elseif ($value['token'] === 'zero_or_more') { |
411: | $inOptionalQuantification = true; |
412: | } |
413: | } |
414: | |
415: | if ($ast->getId() === '#alternation') { |
416: | $alternationId++; |
417: | $inAlternation = true; |
418: | } |
419: | |
420: | if ($group instanceof RegexCapturingGroup) { |
421: | $capturingGroups[$group->getId()] = $group; |
422: | |
423: | if (!array_key_exists($alternationId, $groupCombinations)) { |
424: | $groupCombinations[$alternationId] = []; |
425: | } |
426: | if (!array_key_exists($combinationIndex, $groupCombinations[$alternationId])) { |
427: | $groupCombinations[$alternationId][$combinationIndex] = []; |
428: | } |
429: | $groupCombinations[$alternationId][$combinationIndex][] = $group->getId(); |
430: | } |
431: | |
432: | foreach ($ast->getChildren() as $child) { |
433: | $this->walkRegexAst( |
434: | $child, |
435: | $inAlternation, |
436: | $alternationId, |
437: | $combinationIndex, |
438: | $inOptionalQuantification, |
439: | $parentGroup, |
440: | $captureGroupId, |
441: | $capturingGroups, |
442: | $groupCombinations, |
443: | ); |
444: | |
445: | if ($ast->getId() !== '#alternation') { |
446: | continue; |
447: | } |
448: | |
449: | $combinationIndex++; |
450: | } |
451: | } |
452: | |
453: | } |
454: | |