1: <?php declare(strict_types=1);
2:
3: namespace PhpParser;
4:
5: /*
6: * This parser is based on a skeleton written by Moriyoshi Koizumi, which in
7: * turn is based on work by Masato Bito.
8: */
9:
10: use PhpParser\Node\Arg;
11: use PhpParser\Node\Expr;
12: use PhpParser\Node\Expr\Array_;
13: use PhpParser\Node\Expr\Cast\Double;
14: use PhpParser\Node\Identifier;
15: use PhpParser\Node\InterpolatedStringPart;
16: use PhpParser\Node\Name;
17: use PhpParser\Node\Param;
18: use PhpParser\Node\PropertyHook;
19: use PhpParser\Node\Scalar\InterpolatedString;
20: use PhpParser\Node\Scalar\Int_;
21: use PhpParser\Node\Scalar\String_;
22: use PhpParser\Node\Stmt;
23: use PhpParser\Node\Stmt\Class_;
24: use PhpParser\Node\Stmt\ClassConst;
25: use PhpParser\Node\Stmt\ClassMethod;
26: use PhpParser\Node\Stmt\Else_;
27: use PhpParser\Node\Stmt\ElseIf_;
28: use PhpParser\Node\Stmt\Enum_;
29: use PhpParser\Node\Stmt\Interface_;
30: use PhpParser\Node\Stmt\Namespace_;
31: use PhpParser\Node\Stmt\Nop;
32: use PhpParser\Node\Stmt\Property;
33: use PhpParser\Node\Stmt\TryCatch;
34: use PhpParser\Node\UseItem;
35: use PhpParser\Node\VarLikeIdentifier;
36: use PhpParser\NodeVisitor\CommentAnnotatingVisitor;
37:
38: abstract class ParserAbstract implements Parser {
39: private const SYMBOL_NONE = -1;
40:
41: /** @var Lexer Lexer that is used when parsing */
42: protected Lexer $lexer;
43: /** @var PhpVersion PHP version to target on a best-effort basis */
44: protected PhpVersion $phpVersion;
45:
46: /*
47: * The following members will be filled with generated parsing data:
48: */
49:
50: /** @var int Size of $tokenToSymbol map */
51: protected int $tokenToSymbolMapSize;
52: /** @var int Size of $action table */
53: protected int $actionTableSize;
54: /** @var int Size of $goto table */
55: protected int $gotoTableSize;
56:
57: /** @var int Symbol number signifying an invalid token */
58: protected int $invalidSymbol;
59: /** @var int Symbol number of error recovery token */
60: protected int $errorSymbol;
61: /** @var int Action number signifying default action */
62: protected int $defaultAction;
63: /** @var int Rule number signifying that an unexpected token was encountered */
64: protected int $unexpectedTokenRule;
65:
66: protected int $YY2TBLSTATE;
67: /** @var int Number of non-leaf states */
68: protected int $numNonLeafStates;
69:
70: /** @var int[] Map of PHP token IDs to internal symbols */
71: protected array $phpTokenToSymbol;
72: /** @var array<int, bool> Map of PHP token IDs to drop */
73: protected array $dropTokens;
74: /** @var int[] Map of external symbols (static::T_*) to internal symbols */
75: protected array $tokenToSymbol;
76: /** @var string[] Map of symbols to their names */
77: protected array $symbolToName;
78: /** @var array<int, string> Names of the production rules (only necessary for debugging) */
79: protected array $productions;
80:
81: /** @var int[] Map of states to a displacement into the $action table. The corresponding action for this
82: * state/symbol pair is $action[$actionBase[$state] + $symbol]. If $actionBase[$state] is 0, the
83: * action is defaulted, i.e. $actionDefault[$state] should be used instead. */
84: protected array $actionBase;
85: /** @var int[] Table of actions. Indexed according to $actionBase comment. */
86: protected array $action;
87: /** @var int[] Table indexed analogously to $action. If $actionCheck[$actionBase[$state] + $symbol] != $symbol
88: * then the action is defaulted, i.e. $actionDefault[$state] should be used instead. */
89: protected array $actionCheck;
90: /** @var int[] Map of states to their default action */
91: protected array $actionDefault;
92: /** @var callable[] Semantic action callbacks */
93: protected array $reduceCallbacks;
94:
95: /** @var int[] Map of non-terminals to a displacement into the $goto table. The corresponding goto state for this
96: * non-terminal/state pair is $goto[$gotoBase[$nonTerminal] + $state] (unless defaulted) */
97: protected array $gotoBase;
98: /** @var int[] Table of states to goto after reduction. Indexed according to $gotoBase comment. */
99: protected array $goto;
100: /** @var int[] Table indexed analogously to $goto. If $gotoCheck[$gotoBase[$nonTerminal] + $state] != $nonTerminal
101: * then the goto state is defaulted, i.e. $gotoDefault[$nonTerminal] should be used. */
102: protected array $gotoCheck;
103: /** @var int[] Map of non-terminals to the default state to goto after their reduction */
104: protected array $gotoDefault;
105:
106: /** @var int[] Map of rules to the non-terminal on their left-hand side, i.e. the non-terminal to use for
107: * determining the state to goto after reduction. */
108: protected array $ruleToNonTerminal;
109: /** @var int[] Map of rules to the length of their right-hand side, which is the number of elements that have to
110: * be popped from the stack(s) on reduction. */
111: protected array $ruleToLength;
112:
113: /*
114: * The following members are part of the parser state:
115: */
116:
117: /** @var mixed Temporary value containing the result of last semantic action (reduction) */
118: protected $semValue;
119: /** @var mixed[] Semantic value stack (contains values of tokens and semantic action results) */
120: protected array $semStack;
121: /** @var int[] Token start position stack */
122: protected array $tokenStartStack;
123: /** @var int[] Token end position stack */
124: protected array $tokenEndStack;
125:
126: /** @var ErrorHandler Error handler */
127: protected ErrorHandler $errorHandler;
128: /** @var int Error state, used to avoid error floods */
129: protected int $errorState;
130:
131: /** @var \SplObjectStorage<Array_, null>|null Array nodes created during parsing, for postprocessing of empty elements. */
132: protected ?\SplObjectStorage $createdArrays;
133:
134: /** @var Token[] Tokens for the current parse */
135: protected array $tokens;
136: /** @var int Current position in token array */
137: protected int $tokenPos;
138:
139: /**
140: * Initialize $reduceCallbacks map.
141: */
142: abstract protected function initReduceCallbacks(): void;
143:
144: /**
145: * Creates a parser instance.
146: *
147: * Options:
148: * * phpVersion: ?PhpVersion,
149: *
150: * @param Lexer $lexer A lexer
151: * @param PhpVersion $phpVersion PHP version to target, defaults to latest supported. This
152: * option is best-effort: Even if specified, parsing will generally assume the latest
153: * supported version and only adjust behavior in minor ways, for example by omitting
154: * errors in older versions and interpreting type hints as a name or identifier depending
155: * on version.
156: */
157: public function __construct(Lexer $lexer, ?PhpVersion $phpVersion = null) {
158: $this->lexer = $lexer;
159: $this->phpVersion = $phpVersion ?? PhpVersion::getNewestSupported();
160:
161: $this->initReduceCallbacks();
162: $this->phpTokenToSymbol = $this->createTokenMap();
163: $this->dropTokens = array_fill_keys(
164: [\T_WHITESPACE, \T_OPEN_TAG, \T_COMMENT, \T_DOC_COMMENT, \T_BAD_CHARACTER], true
165: );
166: }
167:
168: /**
169: * Parses PHP code into a node tree.
170: *
171: * If a non-throwing error handler is used, the parser will continue parsing after an error
172: * occurred and attempt to build a partial AST.
173: *
174: * @param string $code The source code to parse
175: * @param ErrorHandler|null $errorHandler Error handler to use for lexer/parser errors, defaults
176: * to ErrorHandler\Throwing.
177: *
178: * @return Node\Stmt[]|null Array of statements (or null non-throwing error handler is used and
179: * the parser was unable to recover from an error).
180: */
181: public function parse(string $code, ?ErrorHandler $errorHandler = null): ?array {
182: $this->errorHandler = $errorHandler ?: new ErrorHandler\Throwing();
183: $this->createdArrays = new \SplObjectStorage();
184:
185: $this->tokens = $this->lexer->tokenize($code, $this->errorHandler);
186: $result = $this->doParse();
187:
188: // Report errors for any empty elements used inside arrays. This is delayed until after the main parse,
189: // because we don't know a priori whether a given array expression will be used in a destructuring context
190: // or not.
191: foreach ($this->createdArrays as $node) {
192: foreach ($node->items as $item) {
193: if ($item->value instanceof Expr\Error) {
194: $this->errorHandler->handleError(
195: new Error('Cannot use empty array elements in arrays', $item->getAttributes()));
196: }
197: }
198: }
199:
200: // Clear out some of the interior state, so we don't hold onto unnecessary
201: // memory between uses of the parser
202: $this->tokenStartStack = [];
203: $this->tokenEndStack = [];
204: $this->semStack = [];
205: $this->semValue = null;
206: $this->createdArrays = null;
207:
208: if ($result !== null) {
209: $traverser = new NodeTraverser(new CommentAnnotatingVisitor($this->tokens));
210: $traverser->traverse($result);
211: }
212:
213: return $result;
214: }
215:
216: public function getTokens(): array {
217: return $this->tokens;
218: }
219:
220: /** @return Stmt[]|null */
221: protected function doParse(): ?array {
222: // We start off with no lookahead-token
223: $symbol = self::SYMBOL_NONE;
224: $tokenValue = null;
225: $this->tokenPos = -1;
226:
227: // Keep stack of start and end attributes
228: $this->tokenStartStack = [];
229: $this->tokenEndStack = [0];
230:
231: // Start off in the initial state and keep a stack of previous states
232: $state = 0;
233: $stateStack = [$state];
234:
235: // Semantic value stack (contains values of tokens and semantic action results)
236: $this->semStack = [];
237:
238: // Current position in the stack(s)
239: $stackPos = 0;
240:
241: $this->errorState = 0;
242:
243: for (;;) {
244: //$this->traceNewState($state, $symbol);
245:
246: if ($this->actionBase[$state] === 0) {
247: $rule = $this->actionDefault[$state];
248: } else {
249: if ($symbol === self::SYMBOL_NONE) {
250: do {
251: $token = $this->tokens[++$this->tokenPos];
252: $tokenId = $token->id;
253: } while (isset($this->dropTokens[$tokenId]));
254:
255: // Map the lexer token id to the internally used symbols.
256: $tokenValue = $token->text;
257: if (!isset($this->phpTokenToSymbol[$tokenId])) {
258: throw new \RangeException(sprintf(
259: 'The lexer returned an invalid token (id=%d, value=%s)',
260: $tokenId, $tokenValue
261: ));
262: }
263: $symbol = $this->phpTokenToSymbol[$tokenId];
264:
265: //$this->traceRead($symbol);
266: }
267:
268: $idx = $this->actionBase[$state] + $symbol;
269: if ((($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol)
270: || ($state < $this->YY2TBLSTATE
271: && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $symbol) >= 0
272: && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol))
273: && ($action = $this->action[$idx]) !== $this->defaultAction) {
274: /*
275: * >= numNonLeafStates: shift and reduce
276: * > 0: shift
277: * = 0: accept
278: * < 0: reduce
279: * = -YYUNEXPECTED: error
280: */
281: if ($action > 0) {
282: /* shift */
283: //$this->traceShift($symbol);
284:
285: ++$stackPos;
286: $stateStack[$stackPos] = $state = $action;
287: $this->semStack[$stackPos] = $tokenValue;
288: $this->tokenStartStack[$stackPos] = $this->tokenPos;
289: $this->tokenEndStack[$stackPos] = $this->tokenPos;
290: $symbol = self::SYMBOL_NONE;
291:
292: if ($this->errorState) {
293: --$this->errorState;
294: }
295:
296: if ($action < $this->numNonLeafStates) {
297: continue;
298: }
299:
300: /* $yyn >= numNonLeafStates means shift-and-reduce */
301: $rule = $action - $this->numNonLeafStates;
302: } else {
303: $rule = -$action;
304: }
305: } else {
306: $rule = $this->actionDefault[$state];
307: }
308: }
309:
310: for (;;) {
311: if ($rule === 0) {
312: /* accept */
313: //$this->traceAccept();
314: return $this->semValue;
315: }
316: if ($rule !== $this->unexpectedTokenRule) {
317: /* reduce */
318: //$this->traceReduce($rule);
319:
320: $ruleLength = $this->ruleToLength[$rule];
321: try {
322: $callback = $this->reduceCallbacks[$rule];
323: if ($callback !== null) {
324: $callback($this, $stackPos);
325: } elseif ($ruleLength > 0) {
326: $this->semValue = $this->semStack[$stackPos - $ruleLength + 1];
327: }
328: } catch (Error $e) {
329: if (-1 === $e->getStartLine()) {
330: $e->setStartLine($this->tokens[$this->tokenPos]->line);
331: }
332:
333: $this->emitError($e);
334: // Can't recover from this type of error
335: return null;
336: }
337:
338: /* Goto - shift nonterminal */
339: $lastTokenEnd = $this->tokenEndStack[$stackPos];
340: $stackPos -= $ruleLength;
341: $nonTerminal = $this->ruleToNonTerminal[$rule];
342: $idx = $this->gotoBase[$nonTerminal] + $stateStack[$stackPos];
343: if ($idx >= 0 && $idx < $this->gotoTableSize && $this->gotoCheck[$idx] === $nonTerminal) {
344: $state = $this->goto[$idx];
345: } else {
346: $state = $this->gotoDefault[$nonTerminal];
347: }
348:
349: ++$stackPos;
350: $stateStack[$stackPos] = $state;
351: $this->semStack[$stackPos] = $this->semValue;
352: $this->tokenEndStack[$stackPos] = $lastTokenEnd;
353: if ($ruleLength === 0) {
354: // Empty productions use the start attributes of the lookahead token.
355: $this->tokenStartStack[$stackPos] = $this->tokenPos;
356: }
357: } else {
358: /* error */
359: switch ($this->errorState) {
360: case 0:
361: $msg = $this->getErrorMessage($symbol, $state);
362: $this->emitError(new Error($msg, $this->getAttributesForToken($this->tokenPos)));
363: // Break missing intentionally
364: // no break
365: case 1:
366: case 2:
367: $this->errorState = 3;
368:
369: // Pop until error-expecting state uncovered
370: while (!(
371: (($idx = $this->actionBase[$state] + $this->errorSymbol) >= 0
372: && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $this->errorSymbol)
373: || ($state < $this->YY2TBLSTATE
374: && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $this->errorSymbol) >= 0
375: && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $this->errorSymbol)
376: ) || ($action = $this->action[$idx]) === $this->defaultAction) { // Not totally sure about this
377: if ($stackPos <= 0) {
378: // Could not recover from error
379: return null;
380: }
381: $state = $stateStack[--$stackPos];
382: //$this->tracePop($state);
383: }
384:
385: //$this->traceShift($this->errorSymbol);
386: ++$stackPos;
387: $stateStack[$stackPos] = $state = $action;
388:
389: // We treat the error symbol as being empty, so we reset the end attributes
390: // to the end attributes of the last non-error symbol
391: $this->tokenStartStack[$stackPos] = $this->tokenPos;
392: $this->tokenEndStack[$stackPos] = $this->tokenEndStack[$stackPos - 1];
393: break;
394:
395: case 3:
396: if ($symbol === 0) {
397: // Reached EOF without recovering from error
398: return null;
399: }
400:
401: //$this->traceDiscard($symbol);
402: $symbol = self::SYMBOL_NONE;
403: break 2;
404: }
405: }
406:
407: if ($state < $this->numNonLeafStates) {
408: break;
409: }
410:
411: /* >= numNonLeafStates means shift-and-reduce */
412: $rule = $state - $this->numNonLeafStates;
413: }
414: }
415: }
416:
417: protected function emitError(Error $error): void {
418: $this->errorHandler->handleError($error);
419: }
420:
421: /**
422: * Format error message including expected tokens.
423: *
424: * @param int $symbol Unexpected symbol
425: * @param int $state State at time of error
426: *
427: * @return string Formatted error message
428: */
429: protected function getErrorMessage(int $symbol, int $state): string {
430: $expectedString = '';
431: if ($expected = $this->getExpectedTokens($state)) {
432: $expectedString = ', expecting ' . implode(' or ', $expected);
433: }
434:
435: return 'Syntax error, unexpected ' . $this->symbolToName[$symbol] . $expectedString;
436: }
437:
438: /**
439: * Get limited number of expected tokens in given state.
440: *
441: * @param int $state State
442: *
443: * @return string[] Expected tokens. If too many, an empty array is returned.
444: */
445: protected function getExpectedTokens(int $state): array {
446: $expected = [];
447:
448: $base = $this->actionBase[$state];
449: foreach ($this->symbolToName as $symbol => $name) {
450: $idx = $base + $symbol;
451: if ($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol
452: || $state < $this->YY2TBLSTATE
453: && ($idx = $this->actionBase[$state + $this->numNonLeafStates] + $symbol) >= 0
454: && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol
455: ) {
456: if ($this->action[$idx] !== $this->unexpectedTokenRule
457: && $this->action[$idx] !== $this->defaultAction
458: && $symbol !== $this->errorSymbol
459: ) {
460: if (count($expected) === 4) {
461: /* Too many expected tokens */
462: return [];
463: }
464:
465: $expected[] = $name;
466: }
467: }
468: }
469:
470: return $expected;
471: }
472:
473: /**
474: * Get attributes for a node with the given start and end token positions.
475: *
476: * @param int $tokenStartPos Token position the node starts at
477: * @param int $tokenEndPos Token position the node ends at
478: * @return array<string, mixed> Attributes
479: */
480: protected function getAttributes(int $tokenStartPos, int $tokenEndPos): array {
481: $startToken = $this->tokens[$tokenStartPos];
482: $afterEndToken = $this->tokens[$tokenEndPos + 1];
483: return [
484: 'startLine' => $startToken->line,
485: 'startTokenPos' => $tokenStartPos,
486: 'startFilePos' => $startToken->pos,
487: 'endLine' => $afterEndToken->line,
488: 'endTokenPos' => $tokenEndPos,
489: 'endFilePos' => $afterEndToken->pos - 1,
490: ];
491: }
492:
493: /**
494: * Get attributes for a single token at the given token position.
495: *
496: * @return array<string, mixed> Attributes
497: */
498: protected function getAttributesForToken(int $tokenPos): array {
499: if ($tokenPos < \count($this->tokens) - 1) {
500: return $this->getAttributes($tokenPos, $tokenPos);
501: }
502:
503: // Get attributes for the sentinel token.
504: $token = $this->tokens[$tokenPos];
505: return [
506: 'startLine' => $token->line,
507: 'startTokenPos' => $tokenPos,
508: 'startFilePos' => $token->pos,
509: 'endLine' => $token->line,
510: 'endTokenPos' => $tokenPos,
511: 'endFilePos' => $token->pos,
512: ];
513: }
514:
515: /*
516: * Tracing functions used for debugging the parser.
517: */
518:
519: /*
520: protected function traceNewState($state, $symbol): void {
521: echo '% State ' . $state
522: . ', Lookahead ' . ($symbol == self::SYMBOL_NONE ? '--none--' : $this->symbolToName[$symbol]) . "\n";
523: }
524:
525: protected function traceRead($symbol): void {
526: echo '% Reading ' . $this->symbolToName[$symbol] . "\n";
527: }
528:
529: protected function traceShift($symbol): void {
530: echo '% Shift ' . $this->symbolToName[$symbol] . "\n";
531: }
532:
533: protected function traceAccept(): void {
534: echo "% Accepted.\n";
535: }
536:
537: protected function traceReduce($n): void {
538: echo '% Reduce by (' . $n . ') ' . $this->productions[$n] . "\n";
539: }
540:
541: protected function tracePop($state): void {
542: echo '% Recovering, uncovered state ' . $state . "\n";
543: }
544:
545: protected function traceDiscard($symbol): void {
546: echo '% Discard ' . $this->symbolToName[$symbol] . "\n";
547: }
548: */
549:
550: /*
551: * Helper functions invoked by semantic actions
552: */
553:
554: /**
555: * Moves statements of semicolon-style namespaces into $ns->stmts and checks various error conditions.
556: *
557: * @param Node\Stmt[] $stmts
558: * @return Node\Stmt[]
559: */
560: protected function handleNamespaces(array $stmts): array {
561: $hasErrored = false;
562: $style = $this->getNamespacingStyle($stmts);
563: if (null === $style) {
564: // not namespaced, nothing to do
565: return $stmts;
566: }
567: if ('brace' === $style) {
568: // For braced namespaces we only have to check that there are no invalid statements between the namespaces
569: $afterFirstNamespace = false;
570: foreach ($stmts as $stmt) {
571: if ($stmt instanceof Node\Stmt\Namespace_) {
572: $afterFirstNamespace = true;
573: } elseif (!$stmt instanceof Node\Stmt\HaltCompiler
574: && !$stmt instanceof Node\Stmt\Nop
575: && $afterFirstNamespace && !$hasErrored) {
576: $this->emitError(new Error(
577: 'No code may exist outside of namespace {}', $stmt->getAttributes()));
578: $hasErrored = true; // Avoid one error for every statement
579: }
580: }
581: return $stmts;
582: } else {
583: // For semicolon namespaces we have to move the statements after a namespace declaration into ->stmts
584: $resultStmts = [];
585: $targetStmts = &$resultStmts;
586: $lastNs = null;
587: foreach ($stmts as $stmt) {
588: if ($stmt instanceof Node\Stmt\Namespace_) {
589: if ($lastNs !== null) {
590: $this->fixupNamespaceAttributes($lastNs);
591: }
592: if ($stmt->stmts === null) {
593: $stmt->stmts = [];
594: $targetStmts = &$stmt->stmts;
595: $resultStmts[] = $stmt;
596: } else {
597: // This handles the invalid case of mixed style namespaces
598: $resultStmts[] = $stmt;
599: $targetStmts = &$resultStmts;
600: }
601: $lastNs = $stmt;
602: } elseif ($stmt instanceof Node\Stmt\HaltCompiler) {
603: // __halt_compiler() is not moved into the namespace
604: $resultStmts[] = $stmt;
605: } else {
606: $targetStmts[] = $stmt;
607: }
608: }
609: if ($lastNs !== null) {
610: $this->fixupNamespaceAttributes($lastNs);
611: }
612: return $resultStmts;
613: }
614: }
615:
616: private function fixupNamespaceAttributes(Node\Stmt\Namespace_ $stmt): void {
617: // We moved the statements into the namespace node, as such the end of the namespace node
618: // needs to be extended to the end of the statements.
619: if (empty($stmt->stmts)) {
620: return;
621: }
622:
623: // We only move the builtin end attributes here. This is the best we can do with the
624: // knowledge we have.
625: $endAttributes = ['endLine', 'endFilePos', 'endTokenPos'];
626: $lastStmt = $stmt->stmts[count($stmt->stmts) - 1];
627: foreach ($endAttributes as $endAttribute) {
628: if ($lastStmt->hasAttribute($endAttribute)) {
629: $stmt->setAttribute($endAttribute, $lastStmt->getAttribute($endAttribute));
630: }
631: }
632: }
633:
634: /** @return array<string, mixed> */
635: private function getNamespaceErrorAttributes(Namespace_ $node): array {
636: $attrs = $node->getAttributes();
637: // Adjust end attributes to only cover the "namespace" keyword, not the whole namespace.
638: if (isset($attrs['startLine'])) {
639: $attrs['endLine'] = $attrs['startLine'];
640: }
641: if (isset($attrs['startTokenPos'])) {
642: $attrs['endTokenPos'] = $attrs['startTokenPos'];
643: }
644: if (isset($attrs['startFilePos'])) {
645: $attrs['endFilePos'] = $attrs['startFilePos'] + \strlen('namespace') - 1;
646: }
647: return $attrs;
648: }
649:
650: /**
651: * Determine namespacing style (semicolon or brace)
652: *
653: * @param Node[] $stmts Top-level statements.
654: *
655: * @return null|string One of "semicolon", "brace" or null (no namespaces)
656: */
657: private function getNamespacingStyle(array $stmts): ?string {
658: $style = null;
659: $hasNotAllowedStmts = false;
660: foreach ($stmts as $i => $stmt) {
661: if ($stmt instanceof Node\Stmt\Namespace_) {
662: $currentStyle = null === $stmt->stmts ? 'semicolon' : 'brace';
663: if (null === $style) {
664: $style = $currentStyle;
665: if ($hasNotAllowedStmts) {
666: $this->emitError(new Error(
667: 'Namespace declaration statement has to be the very first statement in the script',
668: $this->getNamespaceErrorAttributes($stmt)
669: ));
670: }
671: } elseif ($style !== $currentStyle) {
672: $this->emitError(new Error(
673: 'Cannot mix bracketed namespace declarations with unbracketed namespace declarations',
674: $this->getNamespaceErrorAttributes($stmt)
675: ));
676: // Treat like semicolon style for namespace normalization
677: return 'semicolon';
678: }
679: continue;
680: }
681:
682: /* declare(), __halt_compiler() and nops can be used before a namespace declaration */
683: if ($stmt instanceof Node\Stmt\Declare_
684: || $stmt instanceof Node\Stmt\HaltCompiler
685: || $stmt instanceof Node\Stmt\Nop) {
686: continue;
687: }
688:
689: /* There may be a hashbang line at the very start of the file */
690: if ($i === 0 && $stmt instanceof Node\Stmt\InlineHTML && preg_match('/\A#!.*\r?\n\z/', $stmt->value)) {
691: continue;
692: }
693:
694: /* Everything else if forbidden before namespace declarations */
695: $hasNotAllowedStmts = true;
696: }
697: return $style;
698: }
699:
700: /** @return Name|Identifier */
701: protected function handleBuiltinTypes(Name $name) {
702: if (!$name->isUnqualified()) {
703: return $name;
704: }
705:
706: $lowerName = $name->toLowerString();
707: if (!$this->phpVersion->supportsBuiltinType($lowerName)) {
708: return $name;
709: }
710:
711: return new Node\Identifier($lowerName, $name->getAttributes());
712: }
713:
714: /**
715: * Get combined start and end attributes at a stack location
716: *
717: * @param int $stackPos Stack location
718: *
719: * @return array<string, mixed> Combined start and end attributes
720: */
721: protected function getAttributesAt(int $stackPos): array {
722: return $this->getAttributes($this->tokenStartStack[$stackPos], $this->tokenEndStack[$stackPos]);
723: }
724:
725: protected function getFloatCastKind(string $cast): int {
726: $cast = strtolower($cast);
727: if (strpos($cast, 'float') !== false) {
728: return Double::KIND_FLOAT;
729: }
730:
731: if (strpos($cast, 'real') !== false) {
732: return Double::KIND_REAL;
733: }
734:
735: return Double::KIND_DOUBLE;
736: }
737:
738: /** @param array<string, mixed> $attributes */
739: protected function parseLNumber(string $str, array $attributes, bool $allowInvalidOctal = false): Int_ {
740: try {
741: return Int_::fromString($str, $attributes, $allowInvalidOctal);
742: } catch (Error $error) {
743: $this->emitError($error);
744: // Use dummy value
745: return new Int_(0, $attributes);
746: }
747: }
748:
749: /**
750: * Parse a T_NUM_STRING token into either an integer or string node.
751: *
752: * @param string $str Number string
753: * @param array<string, mixed> $attributes Attributes
754: *
755: * @return Int_|String_ Integer or string node.
756: */
757: protected function parseNumString(string $str, array $attributes) {
758: if (!preg_match('/^(?:0|-?[1-9][0-9]*)$/', $str)) {
759: return new String_($str, $attributes);
760: }
761:
762: $num = +$str;
763: if (!is_int($num)) {
764: return new String_($str, $attributes);
765: }
766:
767: return new Int_($num, $attributes);
768: }
769:
770: /** @param array<string, mixed> $attributes */
771: protected function stripIndentation(
772: string $string, int $indentLen, string $indentChar,
773: bool $newlineAtStart, bool $newlineAtEnd, array $attributes
774: ): string {
775: if ($indentLen === 0) {
776: return $string;
777: }
778:
779: $start = $newlineAtStart ? '(?:(?<=\n)|\A)' : '(?<=\n)';
780: $end = $newlineAtEnd ? '(?:(?=[\r\n])|\z)' : '(?=[\r\n])';
781: $regex = '/' . $start . '([ \t]*)(' . $end . ')?/';
782: return preg_replace_callback(
783: $regex,
784: function ($matches) use ($indentLen, $indentChar, $attributes) {
785: $prefix = substr($matches[1], 0, $indentLen);
786: if (false !== strpos($prefix, $indentChar === " " ? "\t" : " ")) {
787: $this->emitError(new Error(
788: 'Invalid indentation - tabs and spaces cannot be mixed', $attributes
789: ));
790: } elseif (strlen($prefix) < $indentLen && !isset($matches[2])) {
791: $this->emitError(new Error(
792: 'Invalid body indentation level ' .
793: '(expecting an indentation level of at least ' . $indentLen . ')',
794: $attributes
795: ));
796: }
797: return substr($matches[0], strlen($prefix));
798: },
799: $string
800: );
801: }
802:
803: /**
804: * @param string|(Expr|InterpolatedStringPart)[] $contents
805: * @param array<string, mixed> $attributes
806: * @param array<string, mixed> $endTokenAttributes
807: */
808: protected function parseDocString(
809: string $startToken, $contents, string $endToken,
810: array $attributes, array $endTokenAttributes, bool $parseUnicodeEscape
811: ): Expr {
812: $kind = strpos($startToken, "'") === false
813: ? String_::KIND_HEREDOC : String_::KIND_NOWDOC;
814:
815: $regex = '/\A[bB]?<<<[ \t]*[\'"]?([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)[\'"]?(?:\r\n|\n|\r)\z/';
816: $result = preg_match($regex, $startToken, $matches);
817: assert($result === 1);
818: $label = $matches[1];
819:
820: $result = preg_match('/\A[ \t]*/', $endToken, $matches);
821: assert($result === 1);
822: $indentation = $matches[0];
823:
824: $attributes['kind'] = $kind;
825: $attributes['docLabel'] = $label;
826: $attributes['docIndentation'] = $indentation;
827:
828: $indentHasSpaces = false !== strpos($indentation, " ");
829: $indentHasTabs = false !== strpos($indentation, "\t");
830: if ($indentHasSpaces && $indentHasTabs) {
831: $this->emitError(new Error(
832: 'Invalid indentation - tabs and spaces cannot be mixed',
833: $endTokenAttributes
834: ));
835:
836: // Proceed processing as if this doc string is not indented
837: $indentation = '';
838: }
839:
840: $indentLen = \strlen($indentation);
841: $indentChar = $indentHasSpaces ? " " : "\t";
842:
843: if (\is_string($contents)) {
844: if ($contents === '') {
845: $attributes['rawValue'] = $contents;
846: return new String_('', $attributes);
847: }
848:
849: $contents = $this->stripIndentation(
850: $contents, $indentLen, $indentChar, true, true, $attributes
851: );
852: $contents = preg_replace('~(\r\n|\n|\r)\z~', '', $contents);
853: $attributes['rawValue'] = $contents;
854:
855: if ($kind === String_::KIND_HEREDOC) {
856: $contents = String_::parseEscapeSequences($contents, null, $parseUnicodeEscape);
857: }
858:
859: return new String_($contents, $attributes);
860: } else {
861: assert(count($contents) > 0);
862: if (!$contents[0] instanceof Node\InterpolatedStringPart) {
863: // If there is no leading encapsed string part, pretend there is an empty one
864: $this->stripIndentation(
865: '', $indentLen, $indentChar, true, false, $contents[0]->getAttributes()
866: );
867: }
868:
869: $newContents = [];
870: foreach ($contents as $i => $part) {
871: if ($part instanceof Node\InterpolatedStringPart) {
872: $isLast = $i === \count($contents) - 1;
873: $part->value = $this->stripIndentation(
874: $part->value, $indentLen, $indentChar,
875: $i === 0, $isLast, $part->getAttributes()
876: );
877: if ($isLast) {
878: $part->value = preg_replace('~(\r\n|\n|\r)\z~', '', $part->value);
879: }
880: $part->setAttribute('rawValue', $part->value);
881: $part->value = String_::parseEscapeSequences($part->value, null, $parseUnicodeEscape);
882: if ('' === $part->value) {
883: continue;
884: }
885: }
886: $newContents[] = $part;
887: }
888: return new InterpolatedString($newContents, $attributes);
889: }
890: }
891:
892: protected function createCommentFromToken(Token $token, int $tokenPos): Comment {
893: assert($token->id === \T_COMMENT || $token->id == \T_DOC_COMMENT);
894: return \T_DOC_COMMENT === $token->id
895: ? new Comment\Doc($token->text, $token->line, $token->pos, $tokenPos,
896: $token->getEndLine(), $token->getEndPos() - 1, $tokenPos)
897: : new Comment($token->text, $token->line, $token->pos, $tokenPos,
898: $token->getEndLine(), $token->getEndPos() - 1, $tokenPos);
899: }
900:
901: /**
902: * Get last comment before the given token position, if any
903: */
904: protected function getCommentBeforeToken(int $tokenPos): ?Comment {
905: while (--$tokenPos >= 0) {
906: $token = $this->tokens[$tokenPos];
907: if (!isset($this->dropTokens[$token->id])) {
908: break;
909: }
910:
911: if ($token->id === \T_COMMENT || $token->id === \T_DOC_COMMENT) {
912: return $this->createCommentFromToken($token, $tokenPos);
913: }
914: }
915: return null;
916: }
917:
918: /**
919: * Create a zero-length nop to capture preceding comments, if any.
920: */
921: protected function maybeCreateZeroLengthNop(int $tokenPos): ?Nop {
922: $comment = $this->getCommentBeforeToken($tokenPos);
923: if ($comment === null) {
924: return null;
925: }
926:
927: $commentEndLine = $comment->getEndLine();
928: $commentEndFilePos = $comment->getEndFilePos();
929: $commentEndTokenPos = $comment->getEndTokenPos();
930: $attributes = [
931: 'startLine' => $commentEndLine,
932: 'endLine' => $commentEndLine,
933: 'startFilePos' => $commentEndFilePos + 1,
934: 'endFilePos' => $commentEndFilePos,
935: 'startTokenPos' => $commentEndTokenPos + 1,
936: 'endTokenPos' => $commentEndTokenPos,
937: ];
938: return new Nop($attributes);
939: }
940:
941: protected function maybeCreateNop(int $tokenStartPos, int $tokenEndPos): ?Nop {
942: if ($this->getCommentBeforeToken($tokenStartPos) === null) {
943: return null;
944: }
945: return new Nop($this->getAttributes($tokenStartPos, $tokenEndPos));
946: }
947:
948: protected function handleHaltCompiler(): string {
949: // Prevent the lexer from returning any further tokens.
950: $nextToken = $this->tokens[$this->tokenPos + 1];
951: $this->tokenPos = \count($this->tokens) - 2;
952:
953: // Return text after __halt_compiler.
954: return $nextToken->id === \T_INLINE_HTML ? $nextToken->text : '';
955: }
956:
957: protected function inlineHtmlHasLeadingNewline(int $stackPos): bool {
958: $tokenPos = $this->tokenStartStack[$stackPos];
959: $token = $this->tokens[$tokenPos];
960: assert($token->id == \T_INLINE_HTML);
961: if ($tokenPos > 0) {
962: $prevToken = $this->tokens[$tokenPos - 1];
963: assert($prevToken->id == \T_CLOSE_TAG);
964: return false !== strpos($prevToken->text, "\n")
965: || false !== strpos($prevToken->text, "\r");
966: }
967: return true;
968: }
969:
970: /**
971: * @return array<string, mixed>
972: */
973: protected function createEmptyElemAttributes(int $tokenPos): array {
974: return $this->getAttributesForToken($tokenPos);
975: }
976:
977: protected function fixupArrayDestructuring(Array_ $node): Expr\List_ {
978: $this->createdArrays->detach($node);
979: return new Expr\List_(array_map(function (Node\ArrayItem $item) {
980: if ($item->value instanceof Expr\Error) {
981: // We used Error as a placeholder for empty elements, which are legal for destructuring.
982: return null;
983: }
984: if ($item->value instanceof Array_) {
985: return new Node\ArrayItem(
986: $this->fixupArrayDestructuring($item->value),
987: $item->key, $item->byRef, $item->getAttributes());
988: }
989: return $item;
990: }, $node->items), ['kind' => Expr\List_::KIND_ARRAY] + $node->getAttributes());
991: }
992:
993: protected function postprocessList(Expr\List_ $node): void {
994: foreach ($node->items as $i => $item) {
995: if ($item->value instanceof Expr\Error) {
996: // We used Error as a placeholder for empty elements, which are legal for destructuring.
997: $node->items[$i] = null;
998: }
999: }
1000: }
1001:
1002: /** @param ElseIf_|Else_ $node */
1003: protected function fixupAlternativeElse($node): void {
1004: // Make sure a trailing nop statement carrying comments is part of the node.
1005: $numStmts = \count($node->stmts);
1006: if ($numStmts !== 0 && $node->stmts[$numStmts - 1] instanceof Nop) {
1007: $nopAttrs = $node->stmts[$numStmts - 1]->getAttributes();
1008: if (isset($nopAttrs['endLine'])) {
1009: $node->setAttribute('endLine', $nopAttrs['endLine']);
1010: }
1011: if (isset($nopAttrs['endFilePos'])) {
1012: $node->setAttribute('endFilePos', $nopAttrs['endFilePos']);
1013: }
1014: if (isset($nopAttrs['endTokenPos'])) {
1015: $node->setAttribute('endTokenPos', $nopAttrs['endTokenPos']);
1016: }
1017: }
1018: }
1019:
1020: protected function checkClassModifier(int $a, int $b, int $modifierPos): void {
1021: try {
1022: Modifiers::verifyClassModifier($a, $b);
1023: } catch (Error $error) {
1024: $error->setAttributes($this->getAttributesAt($modifierPos));
1025: $this->emitError($error);
1026: }
1027: }
1028:
1029: protected function checkModifier(int $a, int $b, int $modifierPos): void {
1030: // Jumping through some hoops here because verifyModifier() is also used elsewhere
1031: try {
1032: Modifiers::verifyModifier($a, $b);
1033: } catch (Error $error) {
1034: $error->setAttributes($this->getAttributesAt($modifierPos));
1035: $this->emitError($error);
1036: }
1037: }
1038:
1039: protected function checkParam(Param $node): void {
1040: if ($node->variadic && null !== $node->default) {
1041: $this->emitError(new Error(
1042: 'Variadic parameter cannot have a default value',
1043: $node->default->getAttributes()
1044: ));
1045: }
1046: }
1047:
1048: protected function checkTryCatch(TryCatch $node): void {
1049: if (empty($node->catches) && null === $node->finally) {
1050: $this->emitError(new Error(
1051: 'Cannot use try without catch or finally', $node->getAttributes()
1052: ));
1053: }
1054: }
1055:
1056: protected function checkNamespace(Namespace_ $node): void {
1057: if (null !== $node->stmts) {
1058: foreach ($node->stmts as $stmt) {
1059: if ($stmt instanceof Namespace_) {
1060: $this->emitError(new Error(
1061: 'Namespace declarations cannot be nested', $stmt->getAttributes()
1062: ));
1063: }
1064: }
1065: }
1066: }
1067:
1068: private function checkClassName(?Identifier $name, int $namePos): void {
1069: if (null !== $name && $name->isSpecialClassName()) {
1070: $this->emitError(new Error(
1071: sprintf('Cannot use \'%s\' as class name as it is reserved', $name),
1072: $this->getAttributesAt($namePos)
1073: ));
1074: }
1075: }
1076:
1077: /** @param Name[] $interfaces */
1078: private function checkImplementedInterfaces(array $interfaces): void {
1079: foreach ($interfaces as $interface) {
1080: if ($interface->isSpecialClassName()) {
1081: $this->emitError(new Error(
1082: sprintf('Cannot use \'%s\' as interface name as it is reserved', $interface),
1083: $interface->getAttributes()
1084: ));
1085: }
1086: }
1087: }
1088:
1089: protected function checkClass(Class_ $node, int $namePos): void {
1090: $this->checkClassName($node->name, $namePos);
1091:
1092: if ($node->extends && $node->extends->isSpecialClassName()) {
1093: $this->emitError(new Error(
1094: sprintf('Cannot use \'%s\' as class name as it is reserved', $node->extends),
1095: $node->extends->getAttributes()
1096: ));
1097: }
1098:
1099: $this->checkImplementedInterfaces($node->implements);
1100: }
1101:
1102: protected function checkInterface(Interface_ $node, int $namePos): void {
1103: $this->checkClassName($node->name, $namePos);
1104: $this->checkImplementedInterfaces($node->extends);
1105: }
1106:
1107: protected function checkEnum(Enum_ $node, int $namePos): void {
1108: $this->checkClassName($node->name, $namePos);
1109: $this->checkImplementedInterfaces($node->implements);
1110: }
1111:
1112: protected function checkClassMethod(ClassMethod $node, int $modifierPos): void {
1113: if ($node->flags & Modifiers::STATIC) {
1114: switch ($node->name->toLowerString()) {
1115: case '__construct':
1116: $this->emitError(new Error(
1117: sprintf('Constructor %s() cannot be static', $node->name),
1118: $this->getAttributesAt($modifierPos)));
1119: break;
1120: case '__destruct':
1121: $this->emitError(new Error(
1122: sprintf('Destructor %s() cannot be static', $node->name),
1123: $this->getAttributesAt($modifierPos)));
1124: break;
1125: case '__clone':
1126: $this->emitError(new Error(
1127: sprintf('Clone method %s() cannot be static', $node->name),
1128: $this->getAttributesAt($modifierPos)));
1129: break;
1130: }
1131: }
1132:
1133: if ($node->flags & Modifiers::READONLY) {
1134: $this->emitError(new Error(
1135: sprintf('Method %s() cannot be readonly', $node->name),
1136: $this->getAttributesAt($modifierPos)));
1137: }
1138: }
1139:
1140: protected function checkClassConst(ClassConst $node, int $modifierPos): void {
1141: foreach ([Modifiers::STATIC, Modifiers::ABSTRACT, Modifiers::READONLY] as $modifier) {
1142: if ($node->flags & $modifier) {
1143: $this->emitError(new Error(
1144: "Cannot use '" . Modifiers::toString($modifier) . "' as constant modifier",
1145: $this->getAttributesAt($modifierPos)));
1146: }
1147: }
1148: }
1149:
1150: protected function checkUseUse(UseItem $node, int $namePos): void {
1151: if ($node->alias && $node->alias->isSpecialClassName()) {
1152: $this->emitError(new Error(
1153: sprintf(
1154: 'Cannot use %s as %s because \'%2$s\' is a special class name',
1155: $node->name, $node->alias
1156: ),
1157: $this->getAttributesAt($namePos)
1158: ));
1159: }
1160: }
1161:
1162: protected function checkPropertyHooksForMultiProperty(Property $property, int $hookPos): void {
1163: if (count($property->props) > 1) {
1164: $this->emitError(new Error(
1165: 'Cannot use hooks when declaring multiple properties', $this->getAttributesAt($hookPos)));
1166: }
1167: }
1168:
1169: /** @param PropertyHook[] $hooks */
1170: protected function checkEmptyPropertyHookList(array $hooks, int $hookPos): void {
1171: if (empty($hooks)) {
1172: $this->emitError(new Error(
1173: 'Property hook list cannot be empty', $this->getAttributesAt($hookPos)));
1174: }
1175: }
1176:
1177: protected function checkPropertyHook(PropertyHook $hook, ?int $paramListPos): void {
1178: $name = $hook->name->toLowerString();
1179: if ($name !== 'get' && $name !== 'set') {
1180: $this->emitError(new Error(
1181: 'Unknown hook "' . $hook->name . '", expected "get" or "set"',
1182: $hook->name->getAttributes()));
1183: }
1184: if ($name === 'get' && $paramListPos !== null) {
1185: $this->emitError(new Error(
1186: 'get hook must not have a parameter list', $this->getAttributesAt($paramListPos)));
1187: }
1188: }
1189:
1190: protected function checkPropertyHookModifiers(int $a, int $b, int $modifierPos): void {
1191: try {
1192: Modifiers::verifyModifier($a, $b);
1193: } catch (Error $error) {
1194: $error->setAttributes($this->getAttributesAt($modifierPos));
1195: $this->emitError($error);
1196: }
1197:
1198: if ($b != Modifiers::FINAL) {
1199: $this->emitError(new Error(
1200: 'Cannot use the ' . Modifiers::toString($b) . ' modifier on a property hook',
1201: $this->getAttributesAt($modifierPos)));
1202: }
1203: }
1204:
1205: /**
1206: * @param Property|Param $node
1207: */
1208: protected function addPropertyNameToHooks(Node $node): void {
1209: if ($node instanceof Property) {
1210: $name = $node->props[0]->name->toString();
1211: } else {
1212: $name = $node->var->name;
1213: }
1214: foreach ($node->hooks as $hook) {
1215: $hook->setAttribute('propertyName', $name);
1216: }
1217: }
1218:
1219: /** @param array<Node\Arg|Node\VariadicPlaceholder> $args */
1220: private function isSimpleExit(array $args): bool {
1221: if (\count($args) === 0) {
1222: return true;
1223: }
1224: if (\count($args) === 1) {
1225: $arg = $args[0];
1226: return $arg instanceof Arg && $arg->name === null &&
1227: $arg->byRef === false && $arg->unpack === false;
1228: }
1229: return false;
1230: }
1231:
1232: /**
1233: * @param array<Node\Arg|Node\VariadicPlaceholder> $args
1234: * @param array<string, mixed> $attrs
1235: */
1236: protected function createExitExpr(string $name, int $namePos, array $args, array $attrs): Expr {
1237: if ($this->isSimpleExit($args)) {
1238: // Create Exit node for backwards compatibility.
1239: $attrs['kind'] = strtolower($name) === 'exit' ? Expr\Exit_::KIND_EXIT : Expr\Exit_::KIND_DIE;
1240: return new Expr\Exit_(\count($args) === 1 ? $args[0]->value : null, $attrs);
1241: }
1242: return new Expr\FuncCall(new Name($name, $this->getAttributesAt($namePos)), $args, $attrs);
1243: }
1244:
1245: /**
1246: * Creates the token map.
1247: *
1248: * The token map maps the PHP internal token identifiers
1249: * to the identifiers used by the Parser. Additionally it
1250: * maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'.
1251: *
1252: * @return array<int, int> The token map
1253: */
1254: protected function createTokenMap(): array {
1255: $tokenMap = [];
1256:
1257: // Single-char tokens use an identity mapping.
1258: for ($i = 0; $i < 256; ++$i) {
1259: $tokenMap[$i] = $i;
1260: }
1261:
1262: foreach ($this->symbolToName as $name) {
1263: if ($name[0] === 'T') {
1264: $tokenMap[\constant($name)] = constant(static::class . '::' . $name);
1265: }
1266: }
1267:
1268: // T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO
1269: $tokenMap[\T_OPEN_TAG_WITH_ECHO] = static::T_ECHO;
1270: // T_CLOSE_TAG is equivalent to ';'
1271: $tokenMap[\T_CLOSE_TAG] = ord(';');
1272:
1273: // We have created a map from PHP token IDs to external symbol IDs.
1274: // Now map them to the internal symbol ID.
1275: $fullTokenMap = [];
1276: foreach ($tokenMap as $phpToken => $extSymbol) {
1277: $intSymbol = $this->tokenToSymbol[$extSymbol];
1278: if ($intSymbol === $this->invalidSymbol) {
1279: continue;
1280: }
1281: $fullTokenMap[$phpToken] = $intSymbol;
1282: }
1283:
1284: return $fullTokenMap;
1285: }
1286: }
1287: