1: | <?php declare(strict_types = 1); |
2: | |
3: | namespace PHPStan\PhpDocParser\Parser; |
4: | |
5: | use PHPStan\PhpDocParser\Ast; |
6: | use PHPStan\PhpDocParser\Lexer\Lexer; |
7: | use function chr; |
8: | use function hexdec; |
9: | use function octdec; |
10: | use function preg_replace_callback; |
11: | use function str_replace; |
12: | use function strtolower; |
13: | use function substr; |
14: | |
15: | class ConstExprParser |
16: | { |
17: | |
18: | private const REPLACEMENTS = [ |
19: | '\\' => '\\', |
20: | 'n' => "\n", |
21: | 'r' => "\r", |
22: | 't' => "\t", |
23: | 'f' => "\f", |
24: | 'v' => "\v", |
25: | 'e' => "\x1B", |
26: | ]; |
27: | |
28: | |
29: | private $unescapeStrings; |
30: | |
31: | public function __construct(bool $unescapeStrings = false) |
32: | { |
33: | $this->unescapeStrings = $unescapeStrings; |
34: | } |
35: | |
36: | public function parse(TokenIterator $tokens, bool $trimStrings = false): Ast\ConstExpr\ConstExprNode |
37: | { |
38: | if ($tokens->isCurrentTokenType(Lexer::TOKEN_FLOAT)) { |
39: | $value = $tokens->currentTokenValue(); |
40: | $tokens->next(); |
41: | return new Ast\ConstExpr\ConstExprFloatNode($value); |
42: | } |
43: | |
44: | if ($tokens->isCurrentTokenType(Lexer::TOKEN_INTEGER)) { |
45: | $value = $tokens->currentTokenValue(); |
46: | $tokens->next(); |
47: | return new Ast\ConstExpr\ConstExprIntegerNode($value); |
48: | } |
49: | |
50: | if ($tokens->isCurrentTokenType(Lexer::TOKEN_SINGLE_QUOTED_STRING, Lexer::TOKEN_DOUBLE_QUOTED_STRING)) { |
51: | $value = $tokens->currentTokenValue(); |
52: | if ($trimStrings) { |
53: | if ($this->unescapeStrings) { |
54: | $value = self::unescapeString($value); |
55: | } else { |
56: | $value = substr($value, 1, -1); |
57: | } |
58: | } |
59: | $tokens->next(); |
60: | return new Ast\ConstExpr\ConstExprStringNode($value); |
61: | |
62: | } elseif ($tokens->isCurrentTokenType(Lexer::TOKEN_IDENTIFIER)) { |
63: | $identifier = $tokens->currentTokenValue(); |
64: | $tokens->next(); |
65: | |
66: | switch (strtolower($identifier)) { |
67: | case 'true': |
68: | return new Ast\ConstExpr\ConstExprTrueNode(); |
69: | case 'false': |
70: | return new Ast\ConstExpr\ConstExprFalseNode(); |
71: | case 'null': |
72: | return new Ast\ConstExpr\ConstExprNullNode(); |
73: | case 'array': |
74: | $tokens->consumeTokenType(Lexer::TOKEN_OPEN_PARENTHESES); |
75: | return $this->parseArray($tokens, Lexer::TOKEN_CLOSE_PARENTHESES); |
76: | } |
77: | |
78: | if ($tokens->tryConsumeTokenType(Lexer::TOKEN_DOUBLE_COLON)) { |
79: | $classConstantName = ''; |
80: | $lastType = null; |
81: | while (true) { |
82: | if ($lastType !== Lexer::TOKEN_IDENTIFIER && $tokens->currentTokenType() === Lexer::TOKEN_IDENTIFIER) { |
83: | $classConstantName .= $tokens->currentTokenValue(); |
84: | $tokens->consumeTokenType(Lexer::TOKEN_IDENTIFIER); |
85: | $lastType = Lexer::TOKEN_IDENTIFIER; |
86: | |
87: | continue; |
88: | } |
89: | |
90: | if ($lastType !== Lexer::TOKEN_WILDCARD && $tokens->tryConsumeTokenType(Lexer::TOKEN_WILDCARD)) { |
91: | $classConstantName .= '*'; |
92: | $lastType = Lexer::TOKEN_WILDCARD; |
93: | |
94: | if ($tokens->getSkippedHorizontalWhiteSpaceIfAny() !== '') { |
95: | break; |
96: | } |
97: | |
98: | continue; |
99: | } |
100: | |
101: | if ($lastType === null) { |
102: | |
103: | $tokens->consumeTokenType(Lexer::TOKEN_WILDCARD); |
104: | } |
105: | |
106: | break; |
107: | } |
108: | |
109: | return new Ast\ConstExpr\ConstFetchNode($identifier, $classConstantName); |
110: | |
111: | } |
112: | |
113: | return new Ast\ConstExpr\ConstFetchNode('', $identifier); |
114: | |
115: | } elseif ($tokens->tryConsumeTokenType(Lexer::TOKEN_OPEN_SQUARE_BRACKET)) { |
116: | return $this->parseArray($tokens, Lexer::TOKEN_CLOSE_SQUARE_BRACKET); |
117: | } |
118: | |
119: | throw new ParserException( |
120: | $tokens->currentTokenValue(), |
121: | $tokens->currentTokenType(), |
122: | $tokens->currentTokenOffset(), |
123: | Lexer::TOKEN_IDENTIFIER |
124: | ); |
125: | } |
126: | |
127: | |
128: | private function parseArray(TokenIterator $tokens, int $endToken): Ast\ConstExpr\ConstExprArrayNode |
129: | { |
130: | $items = []; |
131: | |
132: | if (!$tokens->tryConsumeTokenType($endToken)) { |
133: | do { |
134: | $items[] = $this->parseArrayItem($tokens); |
135: | } while ($tokens->tryConsumeTokenType(Lexer::TOKEN_COMMA) && !$tokens->isCurrentTokenType($endToken)); |
136: | $tokens->consumeTokenType($endToken); |
137: | } |
138: | |
139: | return new Ast\ConstExpr\ConstExprArrayNode($items); |
140: | } |
141: | |
142: | |
143: | private function parseArrayItem(TokenIterator $tokens): Ast\ConstExpr\ConstExprArrayItemNode |
144: | { |
145: | $expr = $this->parse($tokens); |
146: | |
147: | if ($tokens->tryConsumeTokenType(Lexer::TOKEN_DOUBLE_ARROW)) { |
148: | $key = $expr; |
149: | $value = $this->parse($tokens); |
150: | |
151: | } else { |
152: | $key = null; |
153: | $value = $expr; |
154: | } |
155: | |
156: | return new Ast\ConstExpr\ConstExprArrayItemNode($key, $value); |
157: | } |
158: | |
159: | private static function unescapeString(string $string): string |
160: | { |
161: | $quote = $string[0]; |
162: | |
163: | if ($quote === '\'') { |
164: | return str_replace( |
165: | ['\\\\', '\\\''], |
166: | ['\\', '\''], |
167: | substr($string, 1, -1) |
168: | ); |
169: | } |
170: | |
171: | return self::parseEscapeSequences(substr($string, 1, -1), '"'); |
172: | } |
173: | |
174: | |
175: | |
176: | |
177: | private static function parseEscapeSequences(string $str, string $quote): string |
178: | { |
179: | $str = str_replace('\\' . $quote, $quote, $str); |
180: | |
181: | return preg_replace_callback( |
182: | '~\\\\([\\\\nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3}|u\{([0-9a-fA-F]+)\})~', |
183: | static function ($matches) { |
184: | $str = $matches[1]; |
185: | |
186: | if (isset(self::REPLACEMENTS[$str])) { |
187: | return self::REPLACEMENTS[$str]; |
188: | } |
189: | if ($str[0] === 'x' || $str[0] === 'X') { |
190: | return chr(hexdec(substr($str, 1))); |
191: | } |
192: | if ($str[0] === 'u') { |
193: | return self::codePointToUtf8(hexdec($matches[2])); |
194: | } |
195: | |
196: | return chr(octdec($str)); |
197: | }, |
198: | $str |
199: | ); |
200: | } |
201: | |
202: | |
203: | |
204: | |
205: | private static function codePointToUtf8(int $num): string |
206: | { |
207: | if ($num <= 0x7F) { |
208: | return chr($num); |
209: | } |
210: | if ($num <= 0x7FF) { |
211: | return chr(($num >> 6) + 0xC0) |
212: | . chr(($num & 0x3F) + 0x80); |
213: | } |
214: | if ($num <= 0xFFFF) { |
215: | return chr(($num >> 12) + 0xE0) |
216: | . chr((($num >> 6) & 0x3F) + 0x80) |
217: | . chr(($num & 0x3F) + 0x80); |
218: | } |
219: | if ($num <= 0x1FFFFF) { |
220: | return chr(($num >> 18) + 0xF0) |
221: | . chr((($num >> 12) & 0x3F) + 0x80) |
222: | . chr((($num >> 6) & 0x3F) + 0x80) |
223: | . chr(($num & 0x3F) + 0x80); |
224: | } |
225: | |
226: | |
227: | return "\xef\xbf\xbd"; |
228: | } |
229: | |
230: | } |
231: | |