| 1: | <?php declare(strict_types=1); |
| 2: | |
| 3: | namespace PhpParser; |
| 4: | |
| 5: | require __DIR__ . '/compatibility_tokens.php'; |
| 6: | |
| 7: | class Lexer { |
| 8: | |
| 9: | |
| 10: | |
| 11: | |
| 12: | |
| 13: | |
| 14: | |
| 15: | |
| 16: | |
| 17: | |
| 18: | |
| 19: | |
| 20: | |
| 21: | |
| 22: | |
| 23: | |
| 24: | public function tokenize(string $code, ?ErrorHandler $errorHandler = null): array { |
| 25: | if (null === $errorHandler) { |
| 26: | $errorHandler = new ErrorHandler\Throwing(); |
| 27: | } |
| 28: | |
| 29: | $scream = ini_set('xdebug.scream', '0'); |
| 30: | |
| 31: | $tokens = @Token::tokenize($code); |
| 32: | $this->postprocessTokens($tokens, $errorHandler); |
| 33: | |
| 34: | if (false !== $scream) { |
| 35: | ini_set('xdebug.scream', $scream); |
| 36: | } |
| 37: | |
| 38: | return $tokens; |
| 39: | } |
| 40: | |
| 41: | private function handleInvalidCharacter(Token $token, ErrorHandler $errorHandler): void { |
| 42: | $chr = $token->text; |
| 43: | if ($chr === "\0") { |
| 44: | |
| 45: | $errorMsg = 'Unexpected null byte'; |
| 46: | } else { |
| 47: | $errorMsg = sprintf( |
| 48: | 'Unexpected character "%s" (ASCII %d)', $chr, ord($chr) |
| 49: | ); |
| 50: | } |
| 51: | |
| 52: | $errorHandler->handleError(new Error($errorMsg, [ |
| 53: | 'startLine' => $token->line, |
| 54: | 'endLine' => $token->line, |
| 55: | 'startFilePos' => $token->pos, |
| 56: | 'endFilePos' => $token->pos, |
| 57: | ])); |
| 58: | } |
| 59: | |
| 60: | private function isUnterminatedComment(Token $token): bool { |
| 61: | return $token->is([\T_COMMENT, \T_DOC_COMMENT]) |
| 62: | && substr($token->text, 0, 2) === '/*' |
| 63: | && substr($token->text, -2) !== '*/'; |
| 64: | } |
| 65: | |
| 66: | |
| 67: | |
| 68: | |
| 69: | protected function postprocessTokens(array &$tokens, ErrorHandler $errorHandler): void { |
| 70: | |
| 71: | |
| 72: | |
| 73: | |
| 74: | |
| 75: | |
| 76: | $numTokens = \count($tokens); |
| 77: | if ($numTokens === 0) { |
| 78: | |
| 79: | $tokens[] = new Token(0, "\0", 1, 0); |
| 80: | return; |
| 81: | } |
| 82: | |
| 83: | for ($i = 0; $i < $numTokens; $i++) { |
| 84: | $token = $tokens[$i]; |
| 85: | if ($token->id === \T_BAD_CHARACTER) { |
| 86: | $this->handleInvalidCharacter($token, $errorHandler); |
| 87: | } |
| 88: | |
| 89: | if ($token->id === \ord('&')) { |
| 90: | $next = $i + 1; |
| 91: | while (isset($tokens[$next]) && $tokens[$next]->id === \T_WHITESPACE) { |
| 92: | $next++; |
| 93: | } |
| 94: | $followedByVarOrVarArg = isset($tokens[$next]) && |
| 95: | $tokens[$next]->is([\T_VARIABLE, \T_ELLIPSIS]); |
| 96: | $token->id = $followedByVarOrVarArg |
| 97: | ? \T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG |
| 98: | : \T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG; |
| 99: | } |
| 100: | } |
| 101: | |
| 102: | |
| 103: | $lastToken = $tokens[$numTokens - 1]; |
| 104: | if ($this->isUnterminatedComment($lastToken)) { |
| 105: | $errorHandler->handleError(new Error('Unterminated comment', [ |
| 106: | 'startLine' => $lastToken->line, |
| 107: | 'endLine' => $lastToken->getEndLine(), |
| 108: | 'startFilePos' => $lastToken->pos, |
| 109: | 'endFilePos' => $lastToken->getEndPos(), |
| 110: | ])); |
| 111: | } |
| 112: | |
| 113: | |
| 114: | $tokens[] = new Token(0, "\0", $lastToken->getEndLine(), $lastToken->getEndPos()); |
| 115: | } |
| 116: | } |
| 117: | |