1: <?php declare(strict_types=1);
2:
3: namespace PhpParser\Lexer\TokenEmulator;
4:
5: use PhpParser\Lexer\Emulative;
6:
7: final class NumericLiteralSeparatorEmulator extends TokenEmulator
8: {
9: const BIN = '(?:0b[01]+(?:_[01]+)*)';
10: const HEX = '(?:0x[0-9a-f]+(?:_[0-9a-f]+)*)';
11: const DEC = '(?:[0-9]+(?:_[0-9]+)*)';
12: const SIMPLE_FLOAT = '(?:' . self::DEC . '\.' . self::DEC . '?|\.' . self::DEC . ')';
13: const EXP = '(?:e[+-]?' . self::DEC . ')';
14: const FLOAT = '(?:' . self::SIMPLE_FLOAT . self::EXP . '?|' . self::DEC . self::EXP . ')';
15: const NUMBER = '~' . self::FLOAT . '|' . self::BIN . '|' . self::HEX . '|' . self::DEC . '~iA';
16:
17: public function getPhpVersion(): string
18: {
19: return Emulative::PHP_7_4;
20: }
21:
22: public function isEmulationNeeded(string $code) : bool
23: {
24: return preg_match('~[0-9]_[0-9]~', $code)
25: || preg_match('~0x[0-9a-f]+_[0-9a-f]~i', $code);
26: }
27:
28: public function emulate(string $code, array $tokens): array
29: {
30: // We need to manually iterate and manage a count because we'll change
31: // the tokens array on the way
32: $codeOffset = 0;
33: for ($i = 0, $c = count($tokens); $i < $c; ++$i) {
34: $token = $tokens[$i];
35: $tokenLen = \strlen(\is_array($token) ? $token[1] : $token);
36:
37: if ($token[0] !== T_LNUMBER && $token[0] !== T_DNUMBER) {
38: $codeOffset += $tokenLen;
39: continue;
40: }
41:
42: $res = preg_match(self::NUMBER, $code, $matches, 0, $codeOffset);
43: assert($res, "No number at number token position");
44:
45: $match = $matches[0];
46: $matchLen = \strlen($match);
47: if ($matchLen === $tokenLen) {
48: // Original token already holds the full number.
49: $codeOffset += $tokenLen;
50: continue;
51: }
52:
53: $tokenKind = $this->resolveIntegerOrFloatToken($match);
54: $newTokens = [[$tokenKind, $match, $token[2]]];
55:
56: $numTokens = 1;
57: $len = $tokenLen;
58: while ($matchLen > $len) {
59: $nextToken = $tokens[$i + $numTokens];
60: $nextTokenText = \is_array($nextToken) ? $nextToken[1] : $nextToken;
61: $nextTokenLen = \strlen($nextTokenText);
62:
63: $numTokens++;
64: if ($matchLen < $len + $nextTokenLen) {
65: // Split trailing characters into a partial token.
66: assert(is_array($nextToken), "Partial token should be an array token");
67: $partialText = substr($nextTokenText, $matchLen - $len);
68: $newTokens[] = [$nextToken[0], $partialText, $nextToken[2]];
69: break;
70: }
71:
72: $len += $nextTokenLen;
73: }
74:
75: array_splice($tokens, $i, $numTokens, $newTokens);
76: $c -= $numTokens - \count($newTokens);
77: $codeOffset += $matchLen;
78: }
79:
80: return $tokens;
81: }
82:
83: private function resolveIntegerOrFloatToken(string $str): int
84: {
85: $str = str_replace('_', '', $str);
86:
87: if (stripos($str, '0b') === 0) {
88: $num = bindec($str);
89: } elseif (stripos($str, '0x') === 0) {
90: $num = hexdec($str);
91: } elseif (stripos($str, '0') === 0 && ctype_digit($str)) {
92: $num = octdec($str);
93: } else {
94: $num = +$str;
95: }
96:
97: return is_float($num) ? T_DNUMBER : T_LNUMBER;
98: }
99:
100: public function reverseEmulate(string $code, array $tokens): array
101: {
102: // Numeric separators were not legal code previously, don't bother.
103: return $tokens;
104: }
105: }
106: