Skip to content

Commit d656353

Browse files
committed
#282 Added support for extra escaping sequences
1 parent ef12806 commit d656353

1 file changed

Lines changed: 60 additions & 40 deletions

File tree

src/Loader/StrictPoLoader.php

Lines changed: 60 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
use Gettext\Translations;
99

1010
/**
11-
* Class to load a PO file following the same rules of the GNU tools.
11+
* Class to load a PO file following the same rules of the GNU gettext tools.
1212
*/
1313
final class StrictPoLoader extends Loader
1414
{
@@ -139,7 +139,19 @@ private function readNumber(): string
139139
}
140140

141141
/**
142-
* Attempts to read a standard comment string which ends on \n
142+
* Read at least one character from the given character set
143+
*/
144+
private function readCharset(string $charset, int $maxLength): string
145+
{
146+
for ($data = ''; ($char = $this->getChar()) !== null && is_int(strpos($charset, $char)) && --$maxLength >= 0; $data .= $this->nextChar());
147+
if ($data === '') {
148+
throw new Exception("Expected at least one occurrence of the characters \"{$charset}\" at byte {$this->position}");
149+
}
150+
return $data;
151+
}
152+
153+
/**
154+
* Attempts to read a standard comment string which ends with a newline
143155
*/
144156
private function readCommentString(): string
145157
{
@@ -152,46 +164,62 @@ private function readCommentString(): string
152164
*/
153165
private function readQuotedString(): string
154166
{
155-
static $aliases = [
156-
'\\' => '\\',
157-
'a' => "\x07",
158-
'b' => "\x08",
159-
'e' => "\x1b",
160-
'f' => "\x0c",
161-
'n' => "\n",
162-
'r' => "\r",
163-
't' => "\t",
164-
'v' => "\x0b",
165-
'"' => '"',
166-
];
167-
$hasData = false;
168-
for ($data = '';;) {
167+
static
168+
$aliases = [
169+
'\\' => '\\', 'a' => "\x07", 'b' => "\x08", 'e' => "\e", 'f' => "\f",
170+
'n' => "\n", 'r' => "\r", 't' => "\t", 'v' => "\v", '"' => '"'
171+
],
172+
$octalDigits = '01234567',
173+
$hexDigits = '0123456789abcdefABCDEF';
174+
for ($data = '', $pieces = 0;; ++$pieces) {
169175
if (!$this->readChar('"')) {
170-
// Perhaps the data is over, let the next parser decide
171-
if ($hasData) {
176+
// Perhaps the data is over (e.g. beginning of an identifier), let the next parser decide
177+
if ($pieces) {
172178
break;
173179
}
174180
throw new Exception("Expected an opening quote at byte {$this->position}");
175181
}
176-
// Collects chars until the end of the data/file
182+
// Collects chars until the end of the sequence/file
177183
for (; ($char = $this->getChar() ?? '"') !== '"'; $data .= $char) {
178-
$this->nextChar();
179-
if ($char === '\\') {
180-
// Ensures the next char is a valid escape character
181-
if (($char = $aliases[$this->nextChar()] ?? null) === null) {
182-
throw new Exception("Invalid quoted character at byte {$this->position}");
183-
}
184+
if ($char === "\n" || $char === "\r") {
185+
throw new Exception("Newline character must be escaped at byte {$this->position}");
186+
}
187+
if ($this->nextChar() !== '\\') {
184188
continue;
185189
}
186-
if ($char === "\n" || $char === "\r") {
187-
throw new Exception("New line character must be encoded at byte {$this->position}");
190+
switch ($escaped = $this->nextChar()) {
191+
case ($alias = $aliases[$escaped] ?? null) !== null ? $escaped : '--':
192+
$char = $alias;
193+
break;
194+
case $octalDigit = is_int(strpos($octalDigits, $escaped)) ? $escaped : '--':
195+
$data = $octalDigit . $this->readCharset($octalDigits, 2);
196+
// GNU gettext fails with octals above the signed char range
197+
if (($decimal = octdec($data)) > 127) {
198+
throw new Exception("Octal value out of range [0, 0177] at byte {$this->position}");
199+
}
200+
$char = chr($decimal);
201+
break;
202+
case 'U':
203+
case 'u':
204+
// The GNU gettext is supposed to follow the escaping sequences of C
205+
// Curiously it doesn't support the unicode escape
206+
$data = $this->readCharset($hexDigits, $escaped === 'u' ? 4 : 8);
207+
$data = str_pad($data, strlen($data) + (strlen($data) & 1), '0', STR_PAD_LEFT);
208+
$char = json_decode("\"\\u{$data}\"");
209+
break;
210+
case 'x':
211+
$data = $this->readCharset($hexDigits, PHP_INT_MAX);
212+
// GNU reads all valid hexadecimal chars, but only uses the last pair
213+
$char = chr(hexdec(substr($data, -2)));
214+
break;
215+
default:
216+
throw new Exception("Invalid quoted character at byte {$this->position}");
188217
}
189218
}
190219
if (!$this->readChar('"')) {
191220
throw new Exception("Expected an ending quote at byte {$this->position}");
192221
}
193222
$this->readWhiteSpace();
194-
$hasData = true;
195223
}
196224
return $data;
197225
}
@@ -389,29 +417,21 @@ private function processHeader(): void
389417
*/
390418
private function readHeaders(?string $string): array
391419
{
392-
if (empty($string)) {
393-
return [];
394-
}
395420
$headers = [];
396-
$lines = explode("\n", $string);
397421
$name = null;
398-
foreach ($lines as $line) {
399-
if ($line === '') {
400-
continue;
401-
}
422+
foreach (array_filter(explode("\n", $string), 'strlen') as $line) {
402423
// Checks if it is a header definition line.
403424
// Useful for distinguishing between header definitions and possible continuations of a header entry.
404425
if (preg_match('/^[\w-]+:/', $line)) {
405-
[$name, $value] = array_map('trim', explode(':', $line, 2));
406-
$headers[$name] = $value;
426+
[$name, $value] = explode(':', $line, 2);
427+
$headers[$name] = trim($value);
407428
continue;
408429
}
409430
// Data without a definition
410431
if ($name === null) {
411432
throw new Exception("The header data is missing a definition at byte {$this->position}");
412433
}
413-
$value = $headers[$name] ?? '';
414-
$headers[$name] = $value . $line;
434+
$headers[$name] .= $line;
415435
}
416436
return $headers;
417437
}

0 commit comments

Comments
 (0)