File vendor/nikic/php-parser/lib/PhpParser/Lexer.php

1:	<?php declare(strict_types=1);
2:
3:	namespace PhpParser;
4:
5:	require __DIR__ . '/compatibility_tokens.php';
6:
7:	class Lexer {
8:	/**
9:	* Tokenize the provided source code.
10:	*
11:	* The token array is in the same format as provided by the PhpToken::tokenize() method in
12:	* PHP 8.0. The tokens are instances of PhpParser\Token, to abstract over a polyfill
13:	* implementation in earlier PHP version.
14:	*
15:	* The token array is terminated by a sentinel token with token ID 0.
16:	* The token array does not discard any tokens (i.e. whitespace and comments are included).
17:	* The token position attributes are against this token array.
18:	*
19:	* @param string $code The source code to tokenize.
20:	* @param ErrorHandler\|null $errorHandler Error handler to use for lexing errors. Defaults to
21:	* ErrorHandler\Throwing.
22:	* @return Token[] Tokens
23:	*/
24:	public function tokenize(string $code, ?ErrorHandler $errorHandler = null): array {
25:	if (null === $errorHandler) {
26:	$errorHandler = new ErrorHandler\Throwing();
27:	}
28:
29:	$scream = ini_set('xdebug.scream', '0');
30:
31:	$tokens = @Token::tokenize($code);
32:	$this->postprocessTokens($tokens, $errorHandler);
33:
34:	if (false !== $scream) {
35:	ini_set('xdebug.scream', $scream);
36:	}
37:
38:	return $tokens;
39:	}
40:
41:	private function handleInvalidCharacter(Token $token, ErrorHandler $errorHandler): void {
42:	$chr = $token->text;
43:	if ($chr === "\0") {
44:	// PHP cuts error message after null byte, so need special case
45:	$errorMsg = 'Unexpected null byte';
46:	} else {
47:	$errorMsg = sprintf(
48:	'Unexpected character "%s" (ASCII %d)', $chr, ord($chr)
49:	);
50:	}
51:
52:	$errorHandler->handleError(new Error($errorMsg, [
53:	'startLine' => $token->line,
54:	'endLine' => $token->line,
55:	'startFilePos' => $token->pos,
56:	'endFilePos' => $token->pos,
57:	]));
58:	}
59:
60:	private function isUnterminatedComment(Token $token): bool {
61:	return $token->is([\T_COMMENT, \T_DOC_COMMENT])
62:	&& substr($token->text, 0, 2) === '/*'
63:	&& substr($token->text, -2) !== '*/';
64:	}
65:
66:	/**
67:	* @param list<Token> $tokens
68:	*/
69:	protected function postprocessTokens(array &$tokens, ErrorHandler $errorHandler): void {
70:	// This function reports errors (bad characters and unterminated comments) in the token
71:	// array, and performs certain canonicalizations:
72:	// * Use PHP 8.1 T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG and
73:	// T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG tokens used to disambiguate intersection types.
74:	// * Add a sentinel token with ID 0.
75:
76:	$numTokens = \count($tokens);
77:	if ($numTokens === 0) {
78:	// Empty input edge case: Just add the sentinel token.
79:	$tokens[] = new Token(0, "\0", 1, 0);
80:	return;
81:	}
82:
83:	for ($i = 0; $i < $numTokens; $i++) {
84:	$token = $tokens[$i];
85:	if ($token->id === \T_BAD_CHARACTER) {
86:	$this->handleInvalidCharacter($token, $errorHandler);
87:	}
88:
89:	if ($token->id === \ord('&')) {
90:	$next = $i + 1;
91:	while (isset($tokens[$next]) && $tokens[$next]->id === \T_WHITESPACE) {
92:	$next++;
93:	}
94:	$followedByVarOrVarArg = isset($tokens[$next]) &&
95:	$tokens[$next]->is([\T_VARIABLE, \T_ELLIPSIS]);
96:	$token->id = $followedByVarOrVarArg
97:	? \T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG
98:	: \T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG;
99:	}
100:	}
101:
102:	// Check for unterminated comment
103:	$lastToken = $tokens[$numTokens - 1];
104:	if ($this->isUnterminatedComment($lastToken)) {
105:	$errorHandler->handleError(new Error('Unterminated comment', [
106:	'startLine' => $lastToken->line,
107:	'endLine' => $lastToken->getEndLine(),
108:	'startFilePos' => $lastToken->pos,
109:	'endFilePos' => $lastToken->getEndPos(),
110:	]));
111:	}
112:
113:	// Add sentinel token.
114:	$tokens[] = new Token(0, "\0", $lastToken->getEndLine(), $lastToken->getEndPos());
115:	}
116:	}
117:

Namespaces

Classes

Interfaces

Exceptions