1: <?php declare(strict_types=1);
2:
3: namespace PhpParser\Node\Scalar;
4:
5: use PhpParser\Error;
6: use PhpParser\Node\Scalar;
7:
8: class String_ extends Scalar {
9: /* For use in "kind" attribute */
10: public const KIND_SINGLE_QUOTED = 1;
11: public const KIND_DOUBLE_QUOTED = 2;
12: public const KIND_HEREDOC = 3;
13: public const KIND_NOWDOC = 4;
14:
15: /** @var string String value */
16: public string $value;
17:
18: /** @var array<string, string> Escaped character to its decoded value */
19: protected static array $replacements = [
20: '\\' => '\\',
21: '$' => '$',
22: 'n' => "\n",
23: 'r' => "\r",
24: 't' => "\t",
25: 'f' => "\f",
26: 'v' => "\v",
27: 'e' => "\x1B",
28: ];
29:
30: /**
31: * Constructs a string scalar node.
32: *
33: * @param string $value Value of the string
34: * @param array<string, mixed> $attributes Additional attributes
35: */
36: public function __construct(string $value, array $attributes = []) {
37: $this->attributes = $attributes;
38: $this->value = $value;
39: }
40:
41: public function getSubNodeNames(): array {
42: return ['value'];
43: }
44:
45: /**
46: * @param array<string, mixed> $attributes
47: * @param bool $parseUnicodeEscape Whether to parse PHP 7 \u escapes
48: */
49: public static function fromString(string $str, array $attributes = [], bool $parseUnicodeEscape = true): self {
50: $attributes['kind'] = ($str[0] === "'" || ($str[1] === "'" && ($str[0] === 'b' || $str[0] === 'B')))
51: ? Scalar\String_::KIND_SINGLE_QUOTED
52: : Scalar\String_::KIND_DOUBLE_QUOTED;
53:
54: $attributes['rawValue'] = $str;
55:
56: $string = self::parse($str, $parseUnicodeEscape);
57:
58: return new self($string, $attributes);
59: }
60:
61: /**
62: * @internal
63: *
64: * Parses a string token.
65: *
66: * @param string $str String token content
67: * @param bool $parseUnicodeEscape Whether to parse PHP 7 \u escapes
68: *
69: * @return string The parsed string
70: */
71: public static function parse(string $str, bool $parseUnicodeEscape = true): string {
72: $bLength = 0;
73: if ('b' === $str[0] || 'B' === $str[0]) {
74: $bLength = 1;
75: }
76:
77: if ('\'' === $str[$bLength]) {
78: return str_replace(
79: ['\\\\', '\\\''],
80: ['\\', '\''],
81: substr($str, $bLength + 1, -1)
82: );
83: } else {
84: return self::parseEscapeSequences(
85: substr($str, $bLength + 1, -1), '"', $parseUnicodeEscape
86: );
87: }
88: }
89:
90: /**
91: * @internal
92: *
93: * Parses escape sequences in strings (all string types apart from single quoted).
94: *
95: * @param string $str String without quotes
96: * @param null|string $quote Quote type
97: * @param bool $parseUnicodeEscape Whether to parse PHP 7 \u escapes
98: *
99: * @return string String with escape sequences parsed
100: */
101: public static function parseEscapeSequences(string $str, ?string $quote, bool $parseUnicodeEscape = true): string {
102: if (null !== $quote) {
103: $str = str_replace('\\' . $quote, $quote, $str);
104: }
105:
106: $extra = '';
107: if ($parseUnicodeEscape) {
108: $extra = '|u\{([0-9a-fA-F]+)\}';
109: }
110:
111: return preg_replace_callback(
112: '~\\\\([\\\\$nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3}' . $extra . ')~',
113: function ($matches) {
114: $str = $matches[1];
115:
116: if (isset(self::$replacements[$str])) {
117: return self::$replacements[$str];
118: }
119: if ('x' === $str[0] || 'X' === $str[0]) {
120: return chr(hexdec(substr($str, 1)));
121: }
122: if ('u' === $str[0]) {
123: $dec = hexdec($matches[2]);
124: // If it overflowed to float, treat as INT_MAX, it will throw an error anyway.
125: return self::codePointToUtf8(\is_int($dec) ? $dec : \PHP_INT_MAX);
126: } else {
127: return chr(octdec($str));
128: }
129: },
130: $str
131: );
132: }
133:
134: /**
135: * Converts a Unicode code point to its UTF-8 encoded representation.
136: *
137: * @param int $num Code point
138: *
139: * @return string UTF-8 representation of code point
140: */
141: private static function codePointToUtf8(int $num): string {
142: if ($num <= 0x7F) {
143: return chr($num);
144: }
145: if ($num <= 0x7FF) {
146: return chr(($num >> 6) + 0xC0) . chr(($num & 0x3F) + 0x80);
147: }
148: if ($num <= 0xFFFF) {
149: return chr(($num >> 12) + 0xE0) . chr((($num >> 6) & 0x3F) + 0x80) . chr(($num & 0x3F) + 0x80);
150: }
151: if ($num <= 0x1FFFFF) {
152: return chr(($num >> 18) + 0xF0) . chr((($num >> 12) & 0x3F) + 0x80)
153: . chr((($num >> 6) & 0x3F) + 0x80) . chr(($num & 0x3F) + 0x80);
154: }
155: throw new Error('Invalid UTF-8 codepoint escape sequence: Codepoint too large');
156: }
157:
158: public function getType(): string {
159: return 'Scalar_String';
160: }
161: }
162: