1: <?php declare(strict_types=1);
2:
3: namespace PhpParser;
4:
5: /*
6: * This parser is based on a skeleton written by Moriyoshi Koizumi, which in
7: * turn is based on work by Masato Bito.
8: */
9:
10: use PhpParser\Node\Arg;
11: use PhpParser\Node\Expr;
12: use PhpParser\Node\Expr\Array_;
13: use PhpParser\Node\Expr\Cast\Double;
14: use PhpParser\Node\Identifier;
15: use PhpParser\Node\InterpolatedStringPart;
16: use PhpParser\Node\Name;
17: use PhpParser\Node\Param;
18: use PhpParser\Node\PropertyHook;
19: use PhpParser\Node\Scalar\InterpolatedString;
20: use PhpParser\Node\Scalar\Int_;
21: use PhpParser\Node\Scalar\String_;
22: use PhpParser\Node\Stmt;
23: use PhpParser\Node\Stmt\Class_;
24: use PhpParser\Node\Stmt\ClassConst;
25: use PhpParser\Node\Stmt\ClassMethod;
26: use PhpParser\Node\Stmt\Const_;
27: use PhpParser\Node\Stmt\Else_;
28: use PhpParser\Node\Stmt\ElseIf_;
29: use PhpParser\Node\Stmt\Enum_;
30: use PhpParser\Node\Stmt\Interface_;
31: use PhpParser\Node\Stmt\Namespace_;
32: use PhpParser\Node\Stmt\Nop;
33: use PhpParser\Node\Stmt\Property;
34: use PhpParser\Node\Stmt\TryCatch;
35: use PhpParser\Node\UseItem;
36: use PhpParser\Node\VarLikeIdentifier;
37: use PhpParser\NodeVisitor\CommentAnnotatingVisitor;
38:
39: abstract class ParserAbstract implements Parser {
40: private const SYMBOL_NONE = -1;
41:
42: /** @var Lexer Lexer that is used when parsing */
43: protected Lexer $lexer;
44: /** @var PhpVersion PHP version to target on a best-effort basis */
45: protected PhpVersion $phpVersion;
46:
47: /*
48: * The following members will be filled with generated parsing data:
49: */
50:
51: /** @var int Size of $tokenToSymbol map */
52: protected int $tokenToSymbolMapSize;
53: /** @var int Size of $action table */
54: protected int $actionTableSize;
55: /** @var int Size of $goto table */
56: protected int $gotoTableSize;
57:
58: /** @var int Symbol number signifying an invalid token */
59: protected int $invalidSymbol;
60: /** @var int Symbol number of error recovery token */
61: protected int $errorSymbol;
62: /** @var int Action number signifying default action */
63: protected int $defaultAction;
64: /** @var int Rule number signifying that an unexpected token was encountered */
65: protected int $unexpectedTokenRule;
66:
67: protected int $YY2TBLSTATE;
68: /** @var int Number of non-leaf states */
69: protected int $numNonLeafStates;
70:
71: /** @var int[] Map of PHP token IDs to internal symbols */
72: protected array $phpTokenToSymbol;
73: /** @var array<int, bool> Map of PHP token IDs to drop */
74: protected array $dropTokens;
75: /** @var int[] Map of external symbols (static::T_*) to internal symbols */
76: protected array $tokenToSymbol;
77: /** @var string[] Map of symbols to their names */
78: protected array $symbolToName;
79: /** @var array<int, string> Names of the production rules (only necessary for debugging) */
80: protected array $productions;
81:
82: /** @var int[] Map of states to a displacement into the $action table. The corresponding action for this
83: * state/symbol pair is $action[$actionBase[$state] + $symbol]. If $actionBase[$state] is 0, the
84: * action is defaulted, i.e. $actionDefault[$state] should be used instead. */
85: protected array $actionBase;
86: /** @var int[] Table of actions. Indexed according to $actionBase comment. */
87: protected array $action;
88: /** @var int[] Table indexed analogously to $action. If $actionCheck[$actionBase[$state] + $symbol] != $symbol
89: * then the action is defaulted, i.e. $actionDefault[$state] should be used instead. */
90: protected array $actionCheck;
91: /** @var int[] Map of states to their default action */
92: protected array $actionDefault;
93: /** @var callable[] Semantic action callbacks */
94: protected array $reduceCallbacks;
95:
96: /** @var int[] Map of non-terminals to a displacement into the $goto table. The corresponding goto state for this
97: * non-terminal/state pair is $goto[$gotoBase[$nonTerminal] + $state] (unless defaulted) */
98: protected array $gotoBase;
99: /** @var int[] Table of states to goto after reduction. Indexed according to $gotoBase comment. */
100: protected array $goto;
101: /** @var int[] Table indexed analogously to $goto. If $gotoCheck[$gotoBase[$nonTerminal] + $state] != $nonTerminal
102: * then the goto state is defaulted, i.e. $gotoDefault[$nonTerminal] should be used. */
103: protected array $gotoCheck;
104: /** @var int[] Map of non-terminals to the default state to goto after their reduction */
105: protected array $gotoDefault;
106:
107: /** @var int[] Map of rules to the non-terminal on their left-hand side, i.e. the non-terminal to use for
108: * determining the state to goto after reduction. */
109: protected array $ruleToNonTerminal;
110: /** @var int[] Map of rules to the length of their right-hand side, which is the number of elements that have to
111: * be popped from the stack(s) on reduction. */
112: protected array $ruleToLength;
113:
114: /*
115: * The following members are part of the parser state:
116: */
117:
118: /** @var mixed Temporary value containing the result of last semantic action (reduction) */
119: protected $semValue;
120: /** @var mixed[] Semantic value stack (contains values of tokens and semantic action results) */
121: protected array $semStack;
122: /** @var int[] Token start position stack */
123: protected array $tokenStartStack;
124: /** @var int[] Token end position stack */
125: protected array $tokenEndStack;
126:
127: /** @var ErrorHandler Error handler */
128: protected ErrorHandler $errorHandler;
129: /** @var int Error state, used to avoid error floods */
130: protected int $errorState;
131:
132: /** @var \SplObjectStorage<Array_, null>|null Array nodes created during parsing, for postprocessing of empty elements. */
133: protected ?\SplObjectStorage $createdArrays;
134:
135: /** @var Token[] Tokens for the current parse */
136: protected array $tokens;
137: /** @var int Current position in token array */
138: protected int $tokenPos;
139:
140: /**
141: * Initialize $reduceCallbacks map.
142: */
143: abstract protected function initReduceCallbacks(): void;
144:
145: /**
146: * Creates a parser instance.
147: *
148: * Options:
149: * * phpVersion: ?PhpVersion,
150: *
151: * @param Lexer $lexer A lexer
152: * @param PhpVersion $phpVersion PHP version to target, defaults to latest supported. This
153: * option is best-effort: Even if specified, parsing will generally assume the latest
154: * supported version and only adjust behavior in minor ways, for example by omitting
155: * errors in older versions and interpreting type hints as a name or identifier depending
156: * on version.
157: */
158: public function __construct(Lexer $lexer, ?PhpVersion $phpVersion = null) {
159: $this->lexer = $lexer;
160: $this->phpVersion = $phpVersion ?? PhpVersion::getNewestSupported();
161:
162: $this->initReduceCallbacks();
163: $this->phpTokenToSymbol = $this->createTokenMap();
164: $this->dropTokens = array_fill_keys(
165: [\T_WHITESPACE, \T_OPEN_TAG, \T_COMMENT, \T_DOC_COMMENT, \T_BAD_CHARACTER], true
166: );
167: }
168:
169: /**
170: * Parses PHP code into a node tree.
171: *
172: * If a non-throwing error handler is used, the parser will continue parsing after an error
173: * occurred and attempt to build a partial AST.
174: *
175: * @param string $code The source code to parse
176: * @param ErrorHandler|null $errorHandler Error handler to use for lexer/parser errors, defaults
177: * to ErrorHandler\Throwing.
178: *
179: * @return Node\Stmt[]|null Array of statements (or null non-throwing error handler is used and
180: * the parser was unable to recover from an error).
181: */
182: public function parse(string $code, ?ErrorHandler $errorHandler = null): ?array {
183: $this->errorHandler = $errorHandler ?: new ErrorHandler\Throwing();
184: $this->createdArrays = new \SplObjectStorage();
185:
186: $this->tokens = $this->lexer->tokenize($code, $this->errorHandler);
187: $result = $this->doParse();
188:
189: // Report errors for any empty elements used inside arrays. This is delayed until after the main parse,
190: // because we don't know a priori whether a given array expression will be used in a destructuring context
191: // or not.
192: foreach ($this->createdArrays as $node) {
193: foreach ($node->items as $item) {
194: if ($item->value instanceof Expr\Error) {
195: $this->errorHandler->handleError(
196: new Error('Cannot use empty array elements in arrays', $item->getAttributes()));
197: }
198: }
199: }
200:
201: // Clear out some of the interior state, so we don't hold onto unnecessary
202: // memory between uses of the parser
203: $this->tokenStartStack = [];
204: $this->tokenEndStack = [];
205: $this->semStack = [];
206: $this->semValue = null;
207: $this->createdArrays = null;
208:
209: if ($result !== null) {
210: $traverser = new NodeTraverser(new CommentAnnotatingVisitor($this->tokens));
211: $traverser->traverse($result);
212: }
213:
214: return $result;
215: }
216:
217: public function getTokens(): array {
218: return $this->tokens;
219: }
220:
221: /** @return Stmt[]|null */
222: protected function doParse(): ?array {
223: // We start off with no lookahead-token
224: $symbol = self::SYMBOL_NONE;
225: $tokenValue = null;
226: $this->tokenPos = -1;
227:
228: // Keep stack of start and end attributes
229: $this->tokenStartStack = [];
230: $this->tokenEndStack = [0];
231:
232: // Start off in the initial state and keep a stack of previous states
233: $state = 0;
234: $stateStack = [$state];
235:
236: // Semantic value stack (contains values of tokens and semantic action results)
237: $this->semStack = [];
238:
239: // Current position in the stack(s)
240: $stackPos = 0;
241:
242: $this->errorState = 0;
243:
244: for (;;) {
245: //$this->traceNewState($state, $symbol);
246:
247: if ($this->actionBase[$state] === 0) {
248: $rule = $this->actionDefault[$state];
249: } else {
250: if ($symbol === self::SYMBOL_NONE) {
251: do {
252: $token = $this->tokens[++$this->tokenPos];
253: $tokenId = $token->id;
254: } while (isset($this->dropTokens[$tokenId]));
255:
256: // Map the lexer token id to the internally used symbols.
257: $tokenValue = $token->text;
258: if (!isset($this->phpTokenToSymbol[$tokenId])) {
259: throw new \RangeException(sprintf(
260: 'The lexer returned an invalid token (id=%d, value=%s)',
261: $tokenId, $tokenValue
262: ));
263: }
264: $symbol = $this->phpTokenToSymbol[$tokenId];
265:
266: //$this->traceRead($symbol);
267: }
268:
269: $idx = $this->actionBase[$state] + $symbol;
270: if ((($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol)
271: || ($state < $this->YY2TBLSTATE
272: && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $symbol) >= 0
273: && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol))
274: && ($action = $this->action[$idx]) !== $this->defaultAction) {
275: /*
276: * >= numNonLeafStates: shift and reduce
277: * > 0: shift
278: * = 0: accept
279: * < 0: reduce
280: * = -YYUNEXPECTED: error
281: */
282: if ($action > 0) {
283: /* shift */
284: //$this->traceShift($symbol);
285:
286: ++$stackPos;
287: $stateStack[$stackPos] = $state = $action;
288: $this->semStack[$stackPos] = $tokenValue;
289: $this->tokenStartStack[$stackPos] = $this->tokenPos;
290: $this->tokenEndStack[$stackPos] = $this->tokenPos;
291: $symbol = self::SYMBOL_NONE;
292:
293: if ($this->errorState) {
294: --$this->errorState;
295: }
296:
297: if ($action < $this->numNonLeafStates) {
298: continue;
299: }
300:
301: /* $yyn >= numNonLeafStates means shift-and-reduce */
302: $rule = $action - $this->numNonLeafStates;
303: } else {
304: $rule = -$action;
305: }
306: } else {
307: $rule = $this->actionDefault[$state];
308: }
309: }
310:
311: for (;;) {
312: if ($rule === 0) {
313: /* accept */
314: //$this->traceAccept();
315: return $this->semValue;
316: }
317: if ($rule !== $this->unexpectedTokenRule) {
318: /* reduce */
319: //$this->traceReduce($rule);
320:
321: $ruleLength = $this->ruleToLength[$rule];
322: try {
323: $callback = $this->reduceCallbacks[$rule];
324: if ($callback !== null) {
325: $callback($this, $stackPos);
326: } elseif ($ruleLength > 0) {
327: $this->semValue = $this->semStack[$stackPos - $ruleLength + 1];
328: }
329: } catch (Error $e) {
330: if (-1 === $e->getStartLine()) {
331: $e->setStartLine($this->tokens[$this->tokenPos]->line);
332: }
333:
334: $this->emitError($e);
335: // Can't recover from this type of error
336: return null;
337: }
338:
339: /* Goto - shift nonterminal */
340: $lastTokenEnd = $this->tokenEndStack[$stackPos];
341: $stackPos -= $ruleLength;
342: $nonTerminal = $this->ruleToNonTerminal[$rule];
343: $idx = $this->gotoBase[$nonTerminal] + $stateStack[$stackPos];
344: if ($idx >= 0 && $idx < $this->gotoTableSize && $this->gotoCheck[$idx] === $nonTerminal) {
345: $state = $this->goto[$idx];
346: } else {
347: $state = $this->gotoDefault[$nonTerminal];
348: }
349:
350: ++$stackPos;
351: $stateStack[$stackPos] = $state;
352: $this->semStack[$stackPos] = $this->semValue;
353: $this->tokenEndStack[$stackPos] = $lastTokenEnd;
354: if ($ruleLength === 0) {
355: // Empty productions use the start attributes of the lookahead token.
356: $this->tokenStartStack[$stackPos] = $this->tokenPos;
357: }
358: } else {
359: /* error */
360: switch ($this->errorState) {
361: case 0:
362: $msg = $this->getErrorMessage($symbol, $state);
363: $this->emitError(new Error($msg, $this->getAttributesForToken($this->tokenPos)));
364: // Break missing intentionally
365: // no break
366: case 1:
367: case 2:
368: $this->errorState = 3;
369:
370: // Pop until error-expecting state uncovered
371: while (!(
372: (($idx = $this->actionBase[$state] + $this->errorSymbol) >= 0
373: && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $this->errorSymbol)
374: || ($state < $this->YY2TBLSTATE
375: && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $this->errorSymbol) >= 0
376: && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $this->errorSymbol)
377: ) || ($action = $this->action[$idx]) === $this->defaultAction) { // Not totally sure about this
378: if ($stackPos <= 0) {
379: // Could not recover from error
380: return null;
381: }
382: $state = $stateStack[--$stackPos];
383: //$this->tracePop($state);
384: }
385:
386: //$this->traceShift($this->errorSymbol);
387: ++$stackPos;
388: $stateStack[$stackPos] = $state = $action;
389:
390: // We treat the error symbol as being empty, so we reset the end attributes
391: // to the end attributes of the last non-error symbol
392: $this->tokenStartStack[$stackPos] = $this->tokenPos;
393: $this->tokenEndStack[$stackPos] = $this->tokenEndStack[$stackPos - 1];
394: break;
395:
396: case 3:
397: if ($symbol === 0) {
398: // Reached EOF without recovering from error
399: return null;
400: }
401:
402: //$this->traceDiscard($symbol);
403: $symbol = self::SYMBOL_NONE;
404: break 2;
405: }
406: }
407:
408: if ($state < $this->numNonLeafStates) {
409: break;
410: }
411:
412: /* >= numNonLeafStates means shift-and-reduce */
413: $rule = $state - $this->numNonLeafStates;
414: }
415: }
416: }
417:
418: protected function emitError(Error $error): void {
419: $this->errorHandler->handleError($error);
420: }
421:
422: /**
423: * Format error message including expected tokens.
424: *
425: * @param int $symbol Unexpected symbol
426: * @param int $state State at time of error
427: *
428: * @return string Formatted error message
429: */
430: protected function getErrorMessage(int $symbol, int $state): string {
431: $expectedString = '';
432: if ($expected = $this->getExpectedTokens($state)) {
433: $expectedString = ', expecting ' . implode(' or ', $expected);
434: }
435:
436: return 'Syntax error, unexpected ' . $this->symbolToName[$symbol] . $expectedString;
437: }
438:
439: /**
440: * Get limited number of expected tokens in given state.
441: *
442: * @param int $state State
443: *
444: * @return string[] Expected tokens. If too many, an empty array is returned.
445: */
446: protected function getExpectedTokens(int $state): array {
447: $expected = [];
448:
449: $base = $this->actionBase[$state];
450: foreach ($this->symbolToName as $symbol => $name) {
451: $idx = $base + $symbol;
452: if ($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol
453: || $state < $this->YY2TBLSTATE
454: && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $symbol) >= 0
455: && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol
456: ) {
457: if ($this->action[$idx] !== $this->unexpectedTokenRule
458: && $this->action[$idx] !== $this->defaultAction
459: && $symbol !== $this->errorSymbol
460: ) {
461: if (count($expected) === 4) {
462: /* Too many expected tokens */
463: return [];
464: }
465:
466: $expected[] = $name;
467: }
468: }
469: }
470:
471: return $expected;
472: }
473:
474: /**
475: * Get attributes for a node with the given start and end token positions.
476: *
477: * @param int $tokenStartPos Token position the node starts at
478: * @param int $tokenEndPos Token position the node ends at
479: * @return array<string, mixed> Attributes
480: */
481: protected function getAttributes(int $tokenStartPos, int $tokenEndPos): array {
482: $startToken = $this->tokens[$tokenStartPos];
483: $afterEndToken = $this->tokens[$tokenEndPos + 1];
484: return [
485: 'startLine' => $startToken->line,
486: 'startTokenPos' => $tokenStartPos,
487: 'startFilePos' => $startToken->pos,
488: 'endLine' => $afterEndToken->line,
489: 'endTokenPos' => $tokenEndPos,
490: 'endFilePos' => $afterEndToken->pos - 1,
491: ];
492: }
493:
494: /**
495: * Get attributes for a single token at the given token position.
496: *
497: * @return array<string, mixed> Attributes
498: */
499: protected function getAttributesForToken(int $tokenPos): array {
500: if ($tokenPos < \count($this->tokens) - 1) {
501: return $this->getAttributes($tokenPos, $tokenPos);
502: }
503:
504: // Get attributes for the sentinel token.
505: $token = $this->tokens[$tokenPos];
506: return [
507: 'startLine' => $token->line,
508: 'startTokenPos' => $tokenPos,
509: 'startFilePos' => $token->pos,
510: 'endLine' => $token->line,
511: 'endTokenPos' => $tokenPos,
512: 'endFilePos' => $token->pos,
513: ];
514: }
515:
516: /*
517: * Tracing functions used for debugging the parser.
518: */
519:
520: /*
521: protected function traceNewState($state, $symbol): void {
522: echo '% State ' . $state
523: . ', Lookahead ' . ($symbol == self::SYMBOL_NONE ? '--none--' : $this->symbolToName[$symbol]) . "\n";
524: }
525:
526: protected function traceRead($symbol): void {
527: echo '% Reading ' . $this->symbolToName[$symbol] . "\n";
528: }
529:
530: protected function traceShift($symbol): void {
531: echo '% Shift ' . $this->symbolToName[$symbol] . "\n";
532: }
533:
534: protected function traceAccept(): void {
535: echo "% Accepted.\n";
536: }
537:
538: protected function traceReduce($n): void {
539: echo '% Reduce by (' . $n . ') ' . $this->productions[$n] . "\n";
540: }
541:
542: protected function tracePop($state): void {
543: echo '% Recovering, uncovered state ' . $state . "\n";
544: }
545:
546: protected function traceDiscard($symbol): void {
547: echo '% Discard ' . $this->symbolToName[$symbol] . "\n";
548: }
549: */
550:
551: /*
552: * Helper functions invoked by semantic actions
553: */
554:
555: /**
556: * Moves statements of semicolon-style namespaces into $ns->stmts and checks various error conditions.
557: *
558: * @param Node\Stmt[] $stmts
559: * @return Node\Stmt[]
560: */
561: protected function handleNamespaces(array $stmts): array {
562: $hasErrored = false;
563: $style = $this->getNamespacingStyle($stmts);
564: if (null === $style) {
565: // not namespaced, nothing to do
566: return $stmts;
567: }
568: if ('brace' === $style) {
569: // For braced namespaces we only have to check that there are no invalid statements between the namespaces
570: $afterFirstNamespace = false;
571: foreach ($stmts as $stmt) {
572: if ($stmt instanceof Node\Stmt\Namespace_) {
573: $afterFirstNamespace = true;
574: } elseif (!$stmt instanceof Node\Stmt\HaltCompiler
575: && !$stmt instanceof Node\Stmt\Nop
576: && $afterFirstNamespace && !$hasErrored) {
577: $this->emitError(new Error(
578: 'No code may exist outside of namespace {}', $stmt->getAttributes()));
579: $hasErrored = true; // Avoid one error for every statement
580: }
581: }
582: return $stmts;
583: } else {
584: // For semicolon namespaces we have to move the statements after a namespace declaration into ->stmts
585: $resultStmts = [];
586: $targetStmts = &$resultStmts;
587: $lastNs = null;
588: foreach ($stmts as $stmt) {
589: if ($stmt instanceof Node\Stmt\Namespace_) {
590: if ($lastNs !== null) {
591: $this->fixupNamespaceAttributes($lastNs);
592: }
593: if ($stmt->stmts === null) {
594: $stmt->stmts = [];
595: $targetStmts = &$stmt->stmts;
596: $resultStmts[] = $stmt;
597: } else {
598: // This handles the invalid case of mixed style namespaces
599: $resultStmts[] = $stmt;
600: $targetStmts = &$resultStmts;
601: }
602: $lastNs = $stmt;
603: } elseif ($stmt instanceof Node\Stmt\HaltCompiler) {
604: // __halt_compiler() is not moved into the namespace
605: $resultStmts[] = $stmt;
606: } else {
607: $targetStmts[] = $stmt;
608: }
609: }
610: if ($lastNs !== null) {
611: $this->fixupNamespaceAttributes($lastNs);
612: }
613: return $resultStmts;
614: }
615: }
616:
617: private function fixupNamespaceAttributes(Node\Stmt\Namespace_ $stmt): void {
618: // We moved the statements into the namespace node, as such the end of the namespace node
619: // needs to be extended to the end of the statements.
620: if (empty($stmt->stmts)) {
621: return;
622: }
623:
624: // We only move the builtin end attributes here. This is the best we can do with the
625: // knowledge we have.
626: $endAttributes = ['endLine', 'endFilePos', 'endTokenPos'];
627: $lastStmt = $stmt->stmts[count($stmt->stmts) - 1];
628: foreach ($endAttributes as $endAttribute) {
629: if ($lastStmt->hasAttribute($endAttribute)) {
630: $stmt->setAttribute($endAttribute, $lastStmt->getAttribute($endAttribute));
631: }
632: }
633: }
634:
635: /** @return array<string, mixed> */
636: private function getNamespaceErrorAttributes(Namespace_ $node): array {
637: $attrs = $node->getAttributes();
638: // Adjust end attributes to only cover the "namespace" keyword, not the whole namespace.
639: if (isset($attrs['startLine'])) {
640: $attrs['endLine'] = $attrs['startLine'];
641: }
642: if (isset($attrs['startTokenPos'])) {
643: $attrs['endTokenPos'] = $attrs['startTokenPos'];
644: }
645: if (isset($attrs['startFilePos'])) {
646: $attrs['endFilePos'] = $attrs['startFilePos'] + \strlen('namespace') - 1;
647: }
648: return $attrs;
649: }
650:
651: /**
652: * Determine namespacing style (semicolon or brace)
653: *
654: * @param Node[] $stmts Top-level statements.
655: *
656: * @return null|string One of "semicolon", "brace" or null (no namespaces)
657: */
658: private function getNamespacingStyle(array $stmts): ?string {
659: $style = null;
660: $hasNotAllowedStmts = false;
661: foreach ($stmts as $i => $stmt) {
662: if ($stmt instanceof Node\Stmt\Namespace_) {
663: $currentStyle = null === $stmt->stmts ? 'semicolon' : 'brace';
664: if (null === $style) {
665: $style = $currentStyle;
666: if ($hasNotAllowedStmts) {
667: $this->emitError(new Error(
668: 'Namespace declaration statement has to be the very first statement in the script',
669: $this->getNamespaceErrorAttributes($stmt)
670: ));
671: }
672: } elseif ($style !== $currentStyle) {
673: $this->emitError(new Error(
674: 'Cannot mix bracketed namespace declarations with unbracketed namespace declarations',
675: $this->getNamespaceErrorAttributes($stmt)
676: ));
677: // Treat like semicolon style for namespace normalization
678: return 'semicolon';
679: }
680: continue;
681: }
682:
683: /* declare(), __halt_compiler() and nops can be used before a namespace declaration */
684: if ($stmt instanceof Node\Stmt\Declare_
685: || $stmt instanceof Node\Stmt\HaltCompiler
686: || $stmt instanceof Node\Stmt\Nop) {
687: continue;
688: }
689:
690: /* There may be a hashbang line at the very start of the file */
691: if ($i === 0 && $stmt instanceof Node\Stmt\InlineHTML && preg_match('/\A#!.*\r?\n\z/', $stmt->value)) {
692: continue;
693: }
694:
695: /* Everything else if forbidden before namespace declarations */
696: $hasNotAllowedStmts = true;
697: }
698: return $style;
699: }
700:
701: /** @return Name|Identifier */
702: protected function handleBuiltinTypes(Name $name) {
703: if (!$name->isUnqualified()) {
704: return $name;
705: }
706:
707: $lowerName = $name->toLowerString();
708: if (!$this->phpVersion->supportsBuiltinType($lowerName)) {
709: return $name;
710: }
711:
712: return new Node\Identifier($lowerName, $name->getAttributes());
713: }
714:
715: /**
716: * Get combined start and end attributes at a stack location
717: *
718: * @param int $stackPos Stack location
719: *
720: * @return array<string, mixed> Combined start and end attributes
721: */
722: protected function getAttributesAt(int $stackPos): array {
723: return $this->getAttributes($this->tokenStartStack[$stackPos], $this->tokenEndStack[$stackPos]);
724: }
725:
726: protected function getFloatCastKind(string $cast): int {
727: $cast = strtolower($cast);
728: if (strpos($cast, 'float') !== false) {
729: return Double::KIND_FLOAT;
730: }
731:
732: if (strpos($cast, 'real') !== false) {
733: return Double::KIND_REAL;
734: }
735:
736: return Double::KIND_DOUBLE;
737: }
738:
739: /** @param array<string, mixed> $attributes */
740: protected function parseLNumber(string $str, array $attributes, bool $allowInvalidOctal = false): Int_ {
741: try {
742: return Int_::fromString($str, $attributes, $allowInvalidOctal);
743: } catch (Error $error) {
744: $this->emitError($error);
745: // Use dummy value
746: return new Int_(0, $attributes);
747: }
748: }
749:
750: /**
751: * Parse a T_NUM_STRING token into either an integer or string node.
752: *
753: * @param string $str Number string
754: * @param array<string, mixed> $attributes Attributes
755: *
756: * @return Int_|String_ Integer or string node.
757: */
758: protected function parseNumString(string $str, array $attributes) {
759: if (!preg_match('/^(?:0|-?[1-9][0-9]*)$/', $str)) {
760: return new String_($str, $attributes);
761: }
762:
763: $num = +$str;
764: if (!is_int($num)) {
765: return new String_($str, $attributes);
766: }
767:
768: return new Int_($num, $attributes);
769: }
770:
771: /** @param array<string, mixed> $attributes */
772: protected function stripIndentation(
773: string $string, int $indentLen, string $indentChar,
774: bool $newlineAtStart, bool $newlineAtEnd, array $attributes
775: ): string {
776: if ($indentLen === 0) {
777: return $string;
778: }
779:
780: $start = $newlineAtStart ? '(?:(?<=\n)|\A)' : '(?<=\n)';
781: $end = $newlineAtEnd ? '(?:(?=[\r\n])|\z)' : '(?=[\r\n])';
782: $regex = '/' . $start . '([ \t]*)(' . $end . ')?/';
783: return preg_replace_callback(
784: $regex,
785: function ($matches) use ($indentLen, $indentChar, $attributes) {
786: $prefix = substr($matches[1], 0, $indentLen);
787: if (false !== strpos($prefix, $indentChar === " " ? "\t" : " ")) {
788: $this->emitError(new Error(
789: 'Invalid indentation - tabs and spaces cannot be mixed', $attributes
790: ));
791: } elseif (strlen($prefix) < $indentLen && !isset($matches[2])) {
792: $this->emitError(new Error(
793: 'Invalid body indentation level ' .
794: '(expecting an indentation level of at least ' . $indentLen . ')',
795: $attributes
796: ));
797: }
798: return substr($matches[0], strlen($prefix));
799: },
800: $string
801: );
802: }
803:
804: /**
805: * @param string|(Expr|InterpolatedStringPart)[] $contents
806: * @param array<string, mixed> $attributes
807: * @param array<string, mixed> $endTokenAttributes
808: */
809: protected function parseDocString(
810: string $startToken, $contents, string $endToken,
811: array $attributes, array $endTokenAttributes, bool $parseUnicodeEscape
812: ): Expr {
813: $kind = strpos($startToken, "'") === false
814: ? String_::KIND_HEREDOC : String_::KIND_NOWDOC;
815:
816: $regex = '/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/';
817: $result = preg_match($regex, $startToken, $matches);
818: assert($result === 1);
819: $label = $matches[1];
820:
821: $result = preg_match('/\A[ \t]*/', $endToken, $matches);
822: assert($result === 1);
823: $indentation = $matches[0];
824:
825: $attributes['kind'] = $kind;
826: $attributes['docLabel'] = $label;
827: $attributes['docIndentation'] = $indentation;
828:
829: $indentHasSpaces = false !== strpos($indentation, " ");
830: $indentHasTabs = false !== strpos($indentation, "\t");
831: if ($indentHasSpaces && $indentHasTabs) {
832: $this->emitError(new Error(
833: 'Invalid indentation - tabs and spaces cannot be mixed',
834: $endTokenAttributes
835: ));
836:
837: // Proceed processing as if this doc string is not indented
838: $indentation = '';
839: }
840:
841: $indentLen = \strlen($indentation);
842: $indentChar = $indentHasSpaces ? " " : "\t";
843:
844: if (\is_string($contents)) {
845: if ($contents === '') {
846: $attributes['rawValue'] = $contents;
847: return new String_('', $attributes);
848: }
849:
850: $contents = $this->stripIndentation(
851: $contents, $indentLen, $indentChar, true, true, $attributes
852: );
853: $contents = preg_replace('~(\r\n|\n|\r)\z~', '', $contents);
854: $attributes['rawValue'] = $contents;
855:
856: if ($kind === String_::KIND_HEREDOC) {
857: $contents = String_::parseEscapeSequences($contents, null, $parseUnicodeEscape);
858: }
859:
860: return new String_($contents, $attributes);
861: } else {
862: assert(count($contents) > 0);
863: if (!$contents[0] instanceof Node\InterpolatedStringPart) {
864: // If there is no leading encapsed string part, pretend there is an empty one
865: $this->stripIndentation(
866: '', $indentLen, $indentChar, true, false, $contents[0]->getAttributes()
867: );
868: }
869:
870: $newContents = [];
871: foreach ($contents as $i => $part) {
872: if ($part instanceof Node\InterpolatedStringPart) {
873: $isLast = $i === \count($contents) - 1;
874: $part->value = $this->stripIndentation(
875: $part->value, $indentLen, $indentChar,
876: $i === 0, $isLast, $part->getAttributes()
877: );
878: if ($isLast) {
879: $part->value = preg_replace('~(\r\n|\n|\r)\z~', '', $part->value);
880: }
881: $part->setAttribute('rawValue', $part->value);
882: $part->value = String_::parseEscapeSequences($part->value, null, $parseUnicodeEscape);
883: if ('' === $part->value) {
884: continue;
885: }
886: }
887: $newContents[] = $part;
888: }
889: return new InterpolatedString($newContents, $attributes);
890: }
891: }
892:
893: protected function createCommentFromToken(Token $token, int $tokenPos): Comment {
894: assert($token->id === \T_COMMENT || $token->id == \T_DOC_COMMENT);
895: return \T_DOC_COMMENT === $token->id
896: ? new Comment\Doc($token->text, $token->line, $token->pos, $tokenPos,
897: $token->getEndLine(), $token->getEndPos() - 1, $tokenPos)
898: : new Comment($token->text, $token->line, $token->pos, $tokenPos,
899: $token->getEndLine(), $token->getEndPos() - 1, $tokenPos);
900: }
901:
902: /**
903: * Get last comment before the given token position, if any
904: */
905: protected function getCommentBeforeToken(int $tokenPos): ?Comment {
906: while (--$tokenPos >= 0) {
907: $token = $this->tokens[$tokenPos];
908: if (!isset($this->dropTokens[$token->id])) {
909: break;
910: }
911:
912: if ($token->id === \T_COMMENT || $token->id === \T_DOC_COMMENT) {
913: return $this->createCommentFromToken($token, $tokenPos);
914: }
915: }
916: return null;
917: }
918:
919: /**
920: * Create a zero-length nop to capture preceding comments, if any.
921: */
922: protected function maybeCreateZeroLengthNop(int $tokenPos): ?Nop {
923: $comment = $this->getCommentBeforeToken($tokenPos);
924: if ($comment === null) {
925: return null;
926: }
927:
928: $commentEndLine = $comment->getEndLine();
929: $commentEndFilePos = $comment->getEndFilePos();
930: $commentEndTokenPos = $comment->getEndTokenPos();
931: $attributes = [
932: 'startLine' => $commentEndLine,
933: 'endLine' => $commentEndLine,
934: 'startFilePos' => $commentEndFilePos + 1,
935: 'endFilePos' => $commentEndFilePos,
936: 'startTokenPos' => $commentEndTokenPos + 1,
937: 'endTokenPos' => $commentEndTokenPos,
938: ];
939: return new Nop($attributes);
940: }
941:
942: protected function maybeCreateNop(int $tokenStartPos, int $tokenEndPos): ?Nop {
943: if ($this->getCommentBeforeToken($tokenStartPos) === null) {
944: return null;
945: }
946: return new Nop($this->getAttributes($tokenStartPos, $tokenEndPos));
947: }
948:
949: protected function handleHaltCompiler(): string {
950: // Prevent the lexer from returning any further tokens.
951: $nextToken = $this->tokens[$this->tokenPos + 1];
952: $this->tokenPos = \count($this->tokens) - 2;
953:
954: // Return text after __halt_compiler.
955: return $nextToken->id === \T_INLINE_HTML ? $nextToken->text : '';
956: }
957:
958: protected function inlineHtmlHasLeadingNewline(int $stackPos): bool {
959: $tokenPos = $this->tokenStartStack[$stackPos];
960: $token = $this->tokens[$tokenPos];
961: assert($token->id == \T_INLINE_HTML);
962: if ($tokenPos > 0) {
963: $prevToken = $this->tokens[$tokenPos - 1];
964: assert($prevToken->id == \T_CLOSE_TAG);
965: return false !== strpos($prevToken->text, "\n")
966: || false !== strpos($prevToken->text, "\r");
967: }
968: return true;
969: }
970:
971: /**
972: * @return array<string, mixed>
973: */
974: protected function createEmptyElemAttributes(int $tokenPos): array {
975: return $this->getAttributesForToken($tokenPos);
976: }
977:
978: protected function fixupArrayDestructuring(Array_ $node): Expr\List_ {
979: $this->createdArrays->detach($node);
980: return new Expr\List_(array_map(function (Node\ArrayItem $item) {
981: if ($item->value instanceof Expr\Error) {
982: // We used Error as a placeholder for empty elements, which are legal for destructuring.
983: return null;
984: }
985: if ($item->value instanceof Array_) {
986: return new Node\ArrayItem(
987: $this->fixupArrayDestructuring($item->value),
988: $item->key, $item->byRef, $item->getAttributes());
989: }
990: return $item;
991: }, $node->items), ['kind' => Expr\List_::KIND_ARRAY] + $node->getAttributes());
992: }
993:
994: protected function postprocessList(Expr\List_ $node): void {
995: foreach ($node->items as $i => $item) {
996: if ($item->value instanceof Expr\Error) {
997: // We used Error as a placeholder for empty elements, which are legal for destructuring.
998: $node->items[$i] = null;
999: }
1000: }
1001: }
1002:
1003: /** @param ElseIf_|Else_ $node */
1004: protected function fixupAlternativeElse($node): void {
1005: // Make sure a trailing nop statement carrying comments is part of the node.
1006: $numStmts = \count($node->stmts);
1007: if ($numStmts !== 0 && $node->stmts[$numStmts - 1] instanceof Nop) {
1008: $nopAttrs = $node->stmts[$numStmts - 1]->getAttributes();
1009: if (isset($nopAttrs['endLine'])) {
1010: $node->setAttribute('endLine', $nopAttrs['endLine']);
1011: }
1012: if (isset($nopAttrs['endFilePos'])) {
1013: $node->setAttribute('endFilePos', $nopAttrs['endFilePos']);
1014: }
1015: if (isset($nopAttrs['endTokenPos'])) {
1016: $node->setAttribute('endTokenPos', $nopAttrs['endTokenPos']);
1017: }
1018: }
1019: }
1020:
1021: protected function checkClassModifier(int $a, int $b, int $modifierPos): void {
1022: try {
1023: Modifiers::verifyClassModifier($a, $b);
1024: } catch (Error $error) {
1025: $error->setAttributes($this->getAttributesAt($modifierPos));
1026: $this->emitError($error);
1027: }
1028: }
1029:
1030: protected function checkModifier(int $a, int $b, int $modifierPos): void {
1031: // Jumping through some hoops here because verifyModifier() is also used elsewhere
1032: try {
1033: Modifiers::verifyModifier($a, $b);
1034: } catch (Error $error) {
1035: $error->setAttributes($this->getAttributesAt($modifierPos));
1036: $this->emitError($error);
1037: }
1038: }
1039:
1040: protected function checkParam(Param $node): void {
1041: if ($node->variadic && null !== $node->default) {
1042: $this->emitError(new Error(
1043: 'Variadic parameter cannot have a default value',
1044: $node->default->getAttributes()
1045: ));
1046: }
1047: }
1048:
1049: protected function checkTryCatch(TryCatch $node): void {
1050: if (empty($node->catches) && null === $node->finally) {
1051: $this->emitError(new Error(
1052: 'Cannot use try without catch or finally', $node->getAttributes()
1053: ));
1054: }
1055: }
1056:
1057: protected function checkNamespace(Namespace_ $node): void {
1058: if (null !== $node->stmts) {
1059: foreach ($node->stmts as $stmt) {
1060: if ($stmt instanceof Namespace_) {
1061: $this->emitError(new Error(
1062: 'Namespace declarations cannot be nested', $stmt->getAttributes()
1063: ));
1064: }
1065: }
1066: }
1067: }
1068:
1069: private function checkClassName(?Identifier $name, int $namePos): void {
1070: if (null !== $name && $name->isSpecialClassName()) {
1071: $this->emitError(new Error(
1072: sprintf('Cannot use \'%s\' as class name as it is reserved', $name),
1073: $this->getAttributesAt($namePos)
1074: ));
1075: }
1076: }
1077:
1078: /** @param Name[] $interfaces */
1079: private function checkImplementedInterfaces(array $interfaces): void {
1080: foreach ($interfaces as $interface) {
1081: if ($interface->isSpecialClassName()) {
1082: $this->emitError(new Error(
1083: sprintf('Cannot use \'%s\' as interface name as it is reserved', $interface),
1084: $interface->getAttributes()
1085: ));
1086: }
1087: }
1088: }
1089:
1090: protected function checkClass(Class_ $node, int $namePos): void {
1091: $this->checkClassName($node->name, $namePos);
1092:
1093: if ($node->extends && $node->extends->isSpecialClassName()) {
1094: $this->emitError(new Error(
1095: sprintf('Cannot use \'%s\' as class name as it is reserved', $node->extends),
1096: $node->extends->getAttributes()
1097: ));
1098: }
1099:
1100: $this->checkImplementedInterfaces($node->implements);
1101: }
1102:
1103: protected function checkInterface(Interface_ $node, int $namePos): void {
1104: $this->checkClassName($node->name, $namePos);
1105: $this->checkImplementedInterfaces($node->extends);
1106: }
1107:
1108: protected function checkEnum(Enum_ $node, int $namePos): void {
1109: $this->checkClassName($node->name, $namePos);
1110: $this->checkImplementedInterfaces($node->implements);
1111: }
1112:
1113: protected function checkClassMethod(ClassMethod $node, int $modifierPos): void {
1114: if ($node->flags & Modifiers::STATIC) {
1115: switch ($node->name->toLowerString()) {
1116: case '__construct':
1117: $this->emitError(new Error(
1118: sprintf('Constructor %s() cannot be static', $node->name),
1119: $this->getAttributesAt($modifierPos)));
1120: break;
1121: case '__destruct':
1122: $this->emitError(new Error(
1123: sprintf('Destructor %s() cannot be static', $node->name),
1124: $this->getAttributesAt($modifierPos)));
1125: break;
1126: case '__clone':
1127: $this->emitError(new Error(
1128: sprintf('Clone method %s() cannot be static', $node->name),
1129: $this->getAttributesAt($modifierPos)));
1130: break;
1131: }
1132: }
1133:
1134: if ($node->flags & Modifiers::READONLY) {
1135: $this->emitError(new Error(
1136: sprintf('Method %s() cannot be readonly', $node->name),
1137: $this->getAttributesAt($modifierPos)));
1138: }
1139: }
1140:
1141: protected function checkClassConst(ClassConst $node, int $modifierPos): void {
1142: foreach ([Modifiers::STATIC, Modifiers::ABSTRACT, Modifiers::READONLY] as $modifier) {
1143: if ($node->flags & $modifier) {
1144: $this->emitError(new Error(
1145: "Cannot use '" . Modifiers::toString($modifier) . "' as constant modifier",
1146: $this->getAttributesAt($modifierPos)));
1147: }
1148: }
1149: }
1150:
1151: protected function checkUseUse(UseItem $node, int $namePos): void {
1152: if ($node->alias && $node->alias->isSpecialClassName()) {
1153: $this->emitError(new Error(
1154: sprintf(
1155: 'Cannot use %s as %s because \'%2$s\' is a special class name',
1156: $node->name, $node->alias
1157: ),
1158: $this->getAttributesAt($namePos)
1159: ));
1160: }
1161: }
1162:
1163: protected function checkPropertyHooksForMultiProperty(Property $property, int $hookPos): void {
1164: if (count($property->props) > 1) {
1165: $this->emitError(new Error(
1166: 'Cannot use hooks when declaring multiple properties', $this->getAttributesAt($hookPos)));
1167: }
1168: }
1169:
1170: /** @param PropertyHook[] $hooks */
1171: protected function checkEmptyPropertyHookList(array $hooks, int $hookPos): void {
1172: if (empty($hooks)) {
1173: $this->emitError(new Error(
1174: 'Property hook list cannot be empty', $this->getAttributesAt($hookPos)));
1175: }
1176: }
1177:
1178: protected function checkPropertyHook(PropertyHook $hook, ?int $paramListPos): void {
1179: $name = $hook->name->toLowerString();
1180: if ($name !== 'get' && $name !== 'set') {
1181: $this->emitError(new Error(
1182: 'Unknown hook "' . $hook->name . '", expected "get" or "set"',
1183: $hook->name->getAttributes()));
1184: }
1185: if ($name === 'get' && $paramListPos !== null) {
1186: $this->emitError(new Error(
1187: 'get hook must not have a parameter list', $this->getAttributesAt($paramListPos)));
1188: }
1189: }
1190:
1191: protected function checkPropertyHookModifiers(int $a, int $b, int $modifierPos): void {
1192: try {
1193: Modifiers::verifyModifier($a, $b);
1194: } catch (Error $error) {
1195: $error->setAttributes($this->getAttributesAt($modifierPos));
1196: $this->emitError($error);
1197: }
1198:
1199: if ($b != Modifiers::FINAL) {
1200: $this->emitError(new Error(
1201: 'Cannot use the ' . Modifiers::toString($b) . ' modifier on a property hook',
1202: $this->getAttributesAt($modifierPos)));
1203: }
1204: }
1205:
1206: protected function checkConstantAttributes(Const_ $node): void {
1207: if ($node->attrGroups !== [] && count($node->consts) > 1) {
1208: $this->emitError(new Error(
1209: 'Cannot use attributes on multiple constants at once', $node->getAttributes()));
1210: }
1211: }
1212:
1213: /**
1214: * @param Property|Param $node
1215: */
1216: protected function addPropertyNameToHooks(Node $node): void {
1217: if ($node instanceof Property) {
1218: $name = $node->props[0]->name->toString();
1219: } else {
1220: $name = $node->var->name;
1221: }
1222: foreach ($node->hooks as $hook) {
1223: $hook->setAttribute('propertyName', $name);
1224: }
1225: }
1226:
1227: /** @param array<Node\Arg|Node\VariadicPlaceholder> $args */
1228: private function isSimpleExit(array $args): bool {
1229: if (\count($args) === 0) {
1230: return true;
1231: }
1232: if (\count($args) === 1) {
1233: $arg = $args[0];
1234: return $arg instanceof Arg && $arg->name === null &&
1235: $arg->byRef === false && $arg->unpack === false;
1236: }
1237: return false;
1238: }
1239:
1240: /**
1241: * @param array<Node\Arg|Node\VariadicPlaceholder> $args
1242: * @param array<string, mixed> $attrs
1243: */
1244: protected function createExitExpr(string $name, int $namePos, array $args, array $attrs): Expr {
1245: if ($this->isSimpleExit($args)) {
1246: // Create Exit node for backwards compatibility.
1247: $attrs['kind'] = strtolower($name) === 'exit' ? Expr\Exit_::KIND_EXIT : Expr\Exit_::KIND_DIE;
1248: return new Expr\Exit_(\count($args) === 1 ? $args[0]->value : null, $attrs);
1249: }
1250: return new Expr\FuncCall(new Name($name, $this->getAttributesAt($namePos)), $args, $attrs);
1251: }
1252:
1253: /**
1254: * Creates the token map.
1255: *
1256: * The token map maps the PHP internal token identifiers
1257: * to the identifiers used by the Parser. Additionally it
1258: * maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'.
1259: *
1260: * @return array<int, int> The token map
1261: */
1262: protected function createTokenMap(): array {
1263: $tokenMap = [];
1264:
1265: // Single-char tokens use an identity mapping.
1266: for ($i = 0; $i < 256; ++$i) {
1267: $tokenMap[$i] = $i;
1268: }
1269:
1270: foreach ($this->symbolToName as $name) {
1271: if ($name[0] === 'T') {
1272: $tokenMap[\constant($name)] = constant(static::class . '::' . $name);
1273: }
1274: }
1275:
1276: // T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO
1277: $tokenMap[\T_OPEN_TAG_WITH_ECHO] = static::T_ECHO;
1278: // T_CLOSE_TAG is equivalent to ';'
1279: $tokenMap[\T_CLOSE_TAG] = ord(';');
1280:
1281: // We have created a map from PHP token IDs to external symbol IDs.
1282: // Now map them to the internal symbol ID.
1283: $fullTokenMap = [];
1284: foreach ($tokenMap as $phpToken => $extSymbol) {
1285: $intSymbol = $this->tokenToSymbol[$extSymbol];
1286: if ($intSymbol === $this->invalidSymbol) {
1287: continue;
1288: }
1289: $fullTokenMap[$phpToken] = $intSymbol;
1290: }
1291:
1292: return $fullTokenMap;
1293: }
1294: }
1295: