1: | <?php declare(strict_types=1); |
2: | |
3: | namespace PhpParser; |
4: | |
5: | require __DIR__ . '/compatibility_tokens.php'; |
6: | |
7: | class Lexer { |
8: | |
9: | |
10: | |
11: | |
12: | |
13: | |
14: | |
15: | |
16: | |
17: | |
18: | |
19: | |
20: | |
21: | |
22: | |
23: | |
24: | public function tokenize(string $code, ?ErrorHandler $errorHandler = null): array { |
25: | if (null === $errorHandler) { |
26: | $errorHandler = new ErrorHandler\Throwing(); |
27: | } |
28: | |
29: | $scream = ini_set('xdebug.scream', '0'); |
30: | |
31: | $tokens = @Token::tokenize($code); |
32: | $this->postprocessTokens($tokens, $errorHandler); |
33: | |
34: | if (false !== $scream) { |
35: | ini_set('xdebug.scream', $scream); |
36: | } |
37: | |
38: | return $tokens; |
39: | } |
40: | |
41: | private function handleInvalidCharacter(Token $token, ErrorHandler $errorHandler): void { |
42: | $chr = $token->text; |
43: | if ($chr === "\0") { |
44: | |
45: | $errorMsg = 'Unexpected null byte'; |
46: | } else { |
47: | $errorMsg = sprintf( |
48: | 'Unexpected character "%s" (ASCII %d)', $chr, ord($chr) |
49: | ); |
50: | } |
51: | |
52: | $errorHandler->handleError(new Error($errorMsg, [ |
53: | 'startLine' => $token->line, |
54: | 'endLine' => $token->line, |
55: | 'startFilePos' => $token->pos, |
56: | 'endFilePos' => $token->pos, |
57: | ])); |
58: | } |
59: | |
60: | private function isUnterminatedComment(Token $token): bool { |
61: | return $token->is([\T_COMMENT, \T_DOC_COMMENT]) |
62: | && substr($token->text, 0, 2) === '/*' |
63: | && substr($token->text, -2) !== '*/'; |
64: | } |
65: | |
66: | |
67: | |
68: | |
69: | protected function postprocessTokens(array &$tokens, ErrorHandler $errorHandler): void { |
70: | |
71: | |
72: | |
73: | |
74: | |
75: | |
76: | $numTokens = \count($tokens); |
77: | if ($numTokens === 0) { |
78: | |
79: | $tokens[] = new Token(0, "\0", 1, 0); |
80: | return; |
81: | } |
82: | |
83: | for ($i = 0; $i < $numTokens; $i++) { |
84: | $token = $tokens[$i]; |
85: | if ($token->id === \T_BAD_CHARACTER) { |
86: | $this->handleInvalidCharacter($token, $errorHandler); |
87: | } |
88: | |
89: | if ($token->id === \ord('&')) { |
90: | $next = $i + 1; |
91: | while (isset($tokens[$next]) && $tokens[$next]->id === \T_WHITESPACE) { |
92: | $next++; |
93: | } |
94: | $followedByVarOrVarArg = isset($tokens[$next]) && |
95: | $tokens[$next]->is([\T_VARIABLE, \T_ELLIPSIS]); |
96: | $token->id = $followedByVarOrVarArg |
97: | ? \T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG |
98: | : \T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG; |
99: | } |
100: | } |
101: | |
102: | |
103: | $lastToken = $tokens[$numTokens - 1]; |
104: | if ($this->isUnterminatedComment($lastToken)) { |
105: | $errorHandler->handleError(new Error('Unterminated comment', [ |
106: | 'startLine' => $lastToken->line, |
107: | 'endLine' => $lastToken->getEndLine(), |
108: | 'startFilePos' => $lastToken->pos, |
109: | 'endFilePos' => $lastToken->getEndPos(), |
110: | ])); |
111: | } |
112: | |
113: | |
114: | $tokens[] = new Token(0, "\0", $lastToken->getEndLine(), $lastToken->getEndPos()); |
115: | } |
116: | } |
117: | |