1: <?php declare(strict_types = 1);
2:
3: namespace PHPStan\PhpDocParser\Parser;
4:
5: use function chr;
6: use function hexdec;
7: use function octdec;
8: use function preg_replace_callback;
9: use function str_replace;
10: use function substr;
11:
12: class StringUnescaper
13: {
14:
15: private const REPLACEMENTS = [
16: '\\' => '\\',
17: 'n' => "\n",
18: 'r' => "\r",
19: 't' => "\t",
20: 'f' => "\f",
21: 'v' => "\v",
22: 'e' => "\x1B",
23: ];
24:
25: public static function unescapeString(string $string): string
26: {
27: $quote = $string[0];
28:
29: if ($quote === '\'') {
30: return str_replace(
31: ['\\\\', '\\\''],
32: ['\\', '\''],
33: substr($string, 1, -1)
34: );
35: }
36:
37: return self::parseEscapeSequences(substr($string, 1, -1), '"');
38: }
39:
40: /**
41: * Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L90-L130
42: */
43: private static function parseEscapeSequences(string $str, string $quote): string
44: {
45: $str = str_replace('\\' . $quote, $quote, $str);
46:
47: return preg_replace_callback(
48: '~\\\\([\\\\nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3}|u\{([0-9a-fA-F]+)\})~',
49: static function ($matches) {
50: $str = $matches[1];
51:
52: if (isset(self::REPLACEMENTS[$str])) {
53: return self::REPLACEMENTS[$str];
54: }
55: if ($str[0] === 'x' || $str[0] === 'X') {
56: return chr((int) hexdec(substr($str, 1)));
57: }
58: if ($str[0] === 'u') {
59: return self::codePointToUtf8((int) hexdec($matches[2]));
60: }
61:
62: return chr((int) octdec($str));
63: },
64: $str
65: );
66: }
67:
68: /**
69: * Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L132-L154
70: */
71: private static function codePointToUtf8(int $num): string
72: {
73: if ($num <= 0x7F) {
74: return chr($num);
75: }
76: if ($num <= 0x7FF) {
77: return chr(($num >> 6) + 0xC0)
78: . chr(($num & 0x3F) + 0x80);
79: }
80: if ($num <= 0xFFFF) {
81: return chr(($num >> 12) + 0xE0)
82: . chr((($num >> 6) & 0x3F) + 0x80)
83: . chr(($num & 0x3F) + 0x80);
84: }
85: if ($num <= 0x1FFFFF) {
86: return chr(($num >> 18) + 0xF0)
87: . chr((($num >> 12) & 0x3F) + 0x80)
88: . chr((($num >> 6) & 0x3F) + 0x80)
89: . chr(($num & 0x3F) + 0x80);
90: }
91:
92: // Invalid UTF-8 codepoint escape sequence: Codepoint too large
93: return "\xef\xbf\xbd";
94: }
95:
96: }
97: