1: <?php declare(strict_types = 1);
2:
3: namespace PHPStan\PhpDocParser\Parser;
4:
5: use PHPStan\PhpDocParser\Ast;
6: use PHPStan\PhpDocParser\Lexer\Lexer;
7: use function chr;
8: use function hexdec;
9: use function octdec;
10: use function preg_replace_callback;
11: use function str_replace;
12: use function strtolower;
13: use function substr;
14:
15: class ConstExprParser
16: {
17:
18: private const REPLACEMENTS = [
19: '\\' => '\\',
20: 'n' => "\n",
21: 'r' => "\r",
22: 't' => "\t",
23: 'f' => "\f",
24: 'v' => "\v",
25: 'e' => "\x1B",
26: ];
27:
28: /** @var bool */
29: private $unescapeStrings;
30:
31: public function __construct(bool $unescapeStrings = false)
32: {
33: $this->unescapeStrings = $unescapeStrings;
34: }
35:
36: public function parse(TokenIterator $tokens, bool $trimStrings = false): Ast\ConstExpr\ConstExprNode
37: {
38: if ($tokens->isCurrentTokenType(Lexer::TOKEN_FLOAT)) {
39: $value = $tokens->currentTokenValue();
40: $tokens->next();
41: return new Ast\ConstExpr\ConstExprFloatNode($value);
42: }
43:
44: if ($tokens->isCurrentTokenType(Lexer::TOKEN_INTEGER)) {
45: $value = $tokens->currentTokenValue();
46: $tokens->next();
47: return new Ast\ConstExpr\ConstExprIntegerNode($value);
48: }
49:
50: if ($tokens->isCurrentTokenType(Lexer::TOKEN_SINGLE_QUOTED_STRING, Lexer::TOKEN_DOUBLE_QUOTED_STRING)) {
51: $value = $tokens->currentTokenValue();
52: if ($trimStrings) {
53: if ($this->unescapeStrings) {
54: $value = self::unescapeString($value);
55: } else {
56: $value = substr($value, 1, -1);
57: }
58: }
59: $tokens->next();
60: return new Ast\ConstExpr\ConstExprStringNode($value);
61:
62: } elseif ($tokens->isCurrentTokenType(Lexer::TOKEN_IDENTIFIER)) {
63: $identifier = $tokens->currentTokenValue();
64: $tokens->next();
65:
66: switch (strtolower($identifier)) {
67: case 'true':
68: return new Ast\ConstExpr\ConstExprTrueNode();
69: case 'false':
70: return new Ast\ConstExpr\ConstExprFalseNode();
71: case 'null':
72: return new Ast\ConstExpr\ConstExprNullNode();
73: case 'array':
74: $tokens->consumeTokenType(Lexer::TOKEN_OPEN_PARENTHESES);
75: return $this->parseArray($tokens, Lexer::TOKEN_CLOSE_PARENTHESES);
76: }
77:
78: if ($tokens->tryConsumeTokenType(Lexer::TOKEN_DOUBLE_COLON)) {
79: $classConstantName = '';
80: $lastType = null;
81: while (true) {
82: if ($lastType !== Lexer::TOKEN_IDENTIFIER && $tokens->currentTokenType() === Lexer::TOKEN_IDENTIFIER) {
83: $classConstantName .= $tokens->currentTokenValue();
84: $tokens->consumeTokenType(Lexer::TOKEN_IDENTIFIER);
85: $lastType = Lexer::TOKEN_IDENTIFIER;
86:
87: continue;
88: }
89:
90: if ($lastType !== Lexer::TOKEN_WILDCARD && $tokens->tryConsumeTokenType(Lexer::TOKEN_WILDCARD)) {
91: $classConstantName .= '*';
92: $lastType = Lexer::TOKEN_WILDCARD;
93:
94: if ($tokens->getSkippedHorizontalWhiteSpaceIfAny() !== '') {
95: break;
96: }
97:
98: continue;
99: }
100:
101: if ($lastType === null) {
102: // trigger parse error if nothing valid was consumed
103: $tokens->consumeTokenType(Lexer::TOKEN_WILDCARD);
104: }
105:
106: break;
107: }
108:
109: return new Ast\ConstExpr\ConstFetchNode($identifier, $classConstantName);
110:
111: }
112:
113: return new Ast\ConstExpr\ConstFetchNode('', $identifier);
114:
115: } elseif ($tokens->tryConsumeTokenType(Lexer::TOKEN_OPEN_SQUARE_BRACKET)) {
116: return $this->parseArray($tokens, Lexer::TOKEN_CLOSE_SQUARE_BRACKET);
117: }
118:
119: throw new ParserException(
120: $tokens->currentTokenValue(),
121: $tokens->currentTokenType(),
122: $tokens->currentTokenOffset(),
123: Lexer::TOKEN_IDENTIFIER
124: );
125: }
126:
127:
128: private function parseArray(TokenIterator $tokens, int $endToken): Ast\ConstExpr\ConstExprArrayNode
129: {
130: $items = [];
131:
132: if (!$tokens->tryConsumeTokenType($endToken)) {
133: do {
134: $items[] = $this->parseArrayItem($tokens);
135: } while ($tokens->tryConsumeTokenType(Lexer::TOKEN_COMMA) && !$tokens->isCurrentTokenType($endToken));
136: $tokens->consumeTokenType($endToken);
137: }
138:
139: return new Ast\ConstExpr\ConstExprArrayNode($items);
140: }
141:
142:
143: private function parseArrayItem(TokenIterator $tokens): Ast\ConstExpr\ConstExprArrayItemNode
144: {
145: $expr = $this->parse($tokens);
146:
147: if ($tokens->tryConsumeTokenType(Lexer::TOKEN_DOUBLE_ARROW)) {
148: $key = $expr;
149: $value = $this->parse($tokens);
150:
151: } else {
152: $key = null;
153: $value = $expr;
154: }
155:
156: return new Ast\ConstExpr\ConstExprArrayItemNode($key, $value);
157: }
158:
159: private static function unescapeString(string $string): string
160: {
161: $quote = $string[0];
162:
163: if ($quote === '\'') {
164: return str_replace(
165: ['\\\\', '\\\''],
166: ['\\', '\''],
167: substr($string, 1, -1)
168: );
169: }
170:
171: return self::parseEscapeSequences(substr($string, 1, -1), '"');
172: }
173:
174: /**
175: * Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L90-L130
176: */
177: private static function parseEscapeSequences(string $str, string $quote): string
178: {
179: $str = str_replace('\\' . $quote, $quote, $str);
180:
181: return preg_replace_callback(
182: '~\\\\([\\\\nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3}|u\{([0-9a-fA-F]+)\})~',
183: static function ($matches) {
184: $str = $matches[1];
185:
186: if (isset(self::REPLACEMENTS[$str])) {
187: return self::REPLACEMENTS[$str];
188: }
189: if ($str[0] === 'x' || $str[0] === 'X') {
190: return chr(hexdec(substr($str, 1)));
191: }
192: if ($str[0] === 'u') {
193: return self::codePointToUtf8(hexdec($matches[2]));
194: }
195:
196: return chr(octdec($str));
197: },
198: $str
199: );
200: }
201:
202: /**
203: * Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L132-L154
204: */
205: private static function codePointToUtf8(int $num): string
206: {
207: if ($num <= 0x7F) {
208: return chr($num);
209: }
210: if ($num <= 0x7FF) {
211: return chr(($num >> 6) + 0xC0)
212: . chr(($num & 0x3F) + 0x80);
213: }
214: if ($num <= 0xFFFF) {
215: return chr(($num >> 12) + 0xE0)
216: . chr((($num >> 6) & 0x3F) + 0x80)
217: . chr(($num & 0x3F) + 0x80);
218: }
219: if ($num <= 0x1FFFFF) {
220: return chr(($num >> 18) + 0xF0)
221: . chr((($num >> 12) & 0x3F) + 0x80)
222: . chr((($num >> 6) & 0x3F) + 0x80)
223: . chr(($num & 0x3F) + 0x80);
224: }
225:
226: // Invalid UTF-8 codepoint escape sequence: Codepoint too large
227: return "\xef\xbf\xbd";
228: }
229:
230: }
231: