| 1: | <?php declare(strict_types = 1); |
| 2: | |
| 3: | namespace PHPStan\PhpDocParser\Parser; |
| 4: | |
| 5: | use PHPStan\PhpDocParser\Ast; |
| 6: | use PHPStan\PhpDocParser\Lexer\Lexer; |
| 7: | use function chr; |
| 8: | use function hexdec; |
| 9: | use function octdec; |
| 10: | use function preg_replace_callback; |
| 11: | use function str_replace; |
| 12: | use function strtolower; |
| 13: | use function substr; |
| 14: | |
| 15: | class ConstExprParser |
| 16: | { |
| 17: | |
| 18: | private const REPLACEMENTS = [ |
| 19: | '\\' => '\\', |
| 20: | 'n' => "\n", |
| 21: | 'r' => "\r", |
| 22: | 't' => "\t", |
| 23: | 'f' => "\f", |
| 24: | 'v' => "\v", |
| 25: | 'e' => "\x1B", |
| 26: | ]; |
| 27: | |
| 28: | |
| 29: | private $unescapeStrings; |
| 30: | |
| 31: | public function __construct(bool $unescapeStrings = false) |
| 32: | { |
| 33: | $this->unescapeStrings = $unescapeStrings; |
| 34: | } |
| 35: | |
| 36: | public function parse(TokenIterator $tokens, bool $trimStrings = false): Ast\ConstExpr\ConstExprNode |
| 37: | { |
| 38: | if ($tokens->isCurrentTokenType(Lexer::TOKEN_FLOAT)) { |
| 39: | $value = $tokens->currentTokenValue(); |
| 40: | $tokens->next(); |
| 41: | return new Ast\ConstExpr\ConstExprFloatNode($value); |
| 42: | } |
| 43: | |
| 44: | if ($tokens->isCurrentTokenType(Lexer::TOKEN_INTEGER)) { |
| 45: | $value = $tokens->currentTokenValue(); |
| 46: | $tokens->next(); |
| 47: | return new Ast\ConstExpr\ConstExprIntegerNode($value); |
| 48: | } |
| 49: | |
| 50: | if ($tokens->isCurrentTokenType(Lexer::TOKEN_SINGLE_QUOTED_STRING, Lexer::TOKEN_DOUBLE_QUOTED_STRING)) { |
| 51: | $value = $tokens->currentTokenValue(); |
| 52: | if ($trimStrings) { |
| 53: | if ($this->unescapeStrings) { |
| 54: | $value = self::unescapeString($value); |
| 55: | } else { |
| 56: | $value = substr($value, 1, -1); |
| 57: | } |
| 58: | } |
| 59: | $tokens->next(); |
| 60: | return new Ast\ConstExpr\ConstExprStringNode($value); |
| 61: | |
| 62: | } elseif ($tokens->isCurrentTokenType(Lexer::TOKEN_IDENTIFIER)) { |
| 63: | $identifier = $tokens->currentTokenValue(); |
| 64: | $tokens->next(); |
| 65: | |
| 66: | switch (strtolower($identifier)) { |
| 67: | case 'true': |
| 68: | return new Ast\ConstExpr\ConstExprTrueNode(); |
| 69: | case 'false': |
| 70: | return new Ast\ConstExpr\ConstExprFalseNode(); |
| 71: | case 'null': |
| 72: | return new Ast\ConstExpr\ConstExprNullNode(); |
| 73: | case 'array': |
| 74: | $tokens->consumeTokenType(Lexer::TOKEN_OPEN_PARENTHESES); |
| 75: | return $this->parseArray($tokens, Lexer::TOKEN_CLOSE_PARENTHESES); |
| 76: | } |
| 77: | |
| 78: | if ($tokens->tryConsumeTokenType(Lexer::TOKEN_DOUBLE_COLON)) { |
| 79: | $classConstantName = ''; |
| 80: | $lastType = null; |
| 81: | while (true) { |
| 82: | if ($lastType !== Lexer::TOKEN_IDENTIFIER && $tokens->currentTokenType() === Lexer::TOKEN_IDENTIFIER) { |
| 83: | $classConstantName .= $tokens->currentTokenValue(); |
| 84: | $tokens->consumeTokenType(Lexer::TOKEN_IDENTIFIER); |
| 85: | $lastType = Lexer::TOKEN_IDENTIFIER; |
| 86: | |
| 87: | continue; |
| 88: | } |
| 89: | |
| 90: | if ($lastType !== Lexer::TOKEN_WILDCARD && $tokens->tryConsumeTokenType(Lexer::TOKEN_WILDCARD)) { |
| 91: | $classConstantName .= '*'; |
| 92: | $lastType = Lexer::TOKEN_WILDCARD; |
| 93: | |
| 94: | if ($tokens->getSkippedHorizontalWhiteSpaceIfAny() !== '') { |
| 95: | break; |
| 96: | } |
| 97: | |
| 98: | continue; |
| 99: | } |
| 100: | |
| 101: | if ($lastType === null) { |
| 102: | |
| 103: | $tokens->consumeTokenType(Lexer::TOKEN_WILDCARD); |
| 104: | } |
| 105: | |
| 106: | break; |
| 107: | } |
| 108: | |
| 109: | return new Ast\ConstExpr\ConstFetchNode($identifier, $classConstantName); |
| 110: | |
| 111: | } |
| 112: | |
| 113: | return new Ast\ConstExpr\ConstFetchNode('', $identifier); |
| 114: | |
| 115: | } elseif ($tokens->tryConsumeTokenType(Lexer::TOKEN_OPEN_SQUARE_BRACKET)) { |
| 116: | return $this->parseArray($tokens, Lexer::TOKEN_CLOSE_SQUARE_BRACKET); |
| 117: | } |
| 118: | |
| 119: | throw new ParserException( |
| 120: | $tokens->currentTokenValue(), |
| 121: | $tokens->currentTokenType(), |
| 122: | $tokens->currentTokenOffset(), |
| 123: | Lexer::TOKEN_IDENTIFIER |
| 124: | ); |
| 125: | } |
| 126: | |
| 127: | |
| 128: | private function parseArray(TokenIterator $tokens, int $endToken): Ast\ConstExpr\ConstExprArrayNode |
| 129: | { |
| 130: | $items = []; |
| 131: | |
| 132: | if (!$tokens->tryConsumeTokenType($endToken)) { |
| 133: | do { |
| 134: | $items[] = $this->parseArrayItem($tokens); |
| 135: | } while ($tokens->tryConsumeTokenType(Lexer::TOKEN_COMMA) && !$tokens->isCurrentTokenType($endToken)); |
| 136: | $tokens->consumeTokenType($endToken); |
| 137: | } |
| 138: | |
| 139: | return new Ast\ConstExpr\ConstExprArrayNode($items); |
| 140: | } |
| 141: | |
| 142: | |
| 143: | private function parseArrayItem(TokenIterator $tokens): Ast\ConstExpr\ConstExprArrayItemNode |
| 144: | { |
| 145: | $expr = $this->parse($tokens); |
| 146: | |
| 147: | if ($tokens->tryConsumeTokenType(Lexer::TOKEN_DOUBLE_ARROW)) { |
| 148: | $key = $expr; |
| 149: | $value = $this->parse($tokens); |
| 150: | |
| 151: | } else { |
| 152: | $key = null; |
| 153: | $value = $expr; |
| 154: | } |
| 155: | |
| 156: | return new Ast\ConstExpr\ConstExprArrayItemNode($key, $value); |
| 157: | } |
| 158: | |
| 159: | private static function unescapeString(string $string): string |
| 160: | { |
| 161: | $quote = $string[0]; |
| 162: | |
| 163: | if ($quote === '\'') { |
| 164: | return str_replace( |
| 165: | ['\\\\', '\\\''], |
| 166: | ['\\', '\''], |
| 167: | substr($string, 1, -1) |
| 168: | ); |
| 169: | } |
| 170: | |
| 171: | return self::parseEscapeSequences(substr($string, 1, -1), '"'); |
| 172: | } |
| 173: | |
| 174: | |
| 175: | |
| 176: | |
| 177: | private static function parseEscapeSequences(string $str, string $quote): string |
| 178: | { |
| 179: | $str = str_replace('\\' . $quote, $quote, $str); |
| 180: | |
| 181: | return preg_replace_callback( |
| 182: | '~\\\\([\\\\nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3}|u\{([0-9a-fA-F]+)\})~', |
| 183: | static function ($matches) { |
| 184: | $str = $matches[1]; |
| 185: | |
| 186: | if (isset(self::REPLACEMENTS[$str])) { |
| 187: | return self::REPLACEMENTS[$str]; |
| 188: | } |
| 189: | if ($str[0] === 'x' || $str[0] === 'X') { |
| 190: | return chr(hexdec(substr($str, 1))); |
| 191: | } |
| 192: | if ($str[0] === 'u') { |
| 193: | return self::codePointToUtf8(hexdec($matches[2])); |
| 194: | } |
| 195: | |
| 196: | return chr(octdec($str)); |
| 197: | }, |
| 198: | $str |
| 199: | ); |
| 200: | } |
| 201: | |
| 202: | |
| 203: | |
| 204: | |
| 205: | private static function codePointToUtf8(int $num): string |
| 206: | { |
| 207: | if ($num <= 0x7F) { |
| 208: | return chr($num); |
| 209: | } |
| 210: | if ($num <= 0x7FF) { |
| 211: | return chr(($num >> 6) + 0xC0) |
| 212: | . chr(($num & 0x3F) + 0x80); |
| 213: | } |
| 214: | if ($num <= 0xFFFF) { |
| 215: | return chr(($num >> 12) + 0xE0) |
| 216: | . chr((($num >> 6) & 0x3F) + 0x80) |
| 217: | . chr(($num & 0x3F) + 0x80); |
| 218: | } |
| 219: | if ($num <= 0x1FFFFF) { |
| 220: | return chr(($num >> 18) + 0xF0) |
| 221: | . chr((($num >> 12) & 0x3F) + 0x80) |
| 222: | . chr((($num >> 6) & 0x3F) + 0x80) |
| 223: | . chr(($num & 0x3F) + 0x80); |
| 224: | } |
| 225: | |
| 226: | |
| 227: | return "\xef\xbf\xbd"; |
| 228: | } |
| 229: | |
| 230: | } |
| 231: | |