Skip to content

Commit c3289bd

Browse files
committed
#282 Fixes and extra checks/warnings (missing header, incomplete plurals, missing key headers, duplicated/malformed headers)
1 parent 2c244a9 commit c3289bd

1 file changed

Lines changed: 91 additions & 38 deletions

File tree

src/Loader/StrictPoLoader.php

Lines changed: 91 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,41 @@
1212
*/
1313
final class StrictPoLoader extends Loader
1414
{
15+
/** @var string[] */
16+
public $warnings = [];
17+
1518
/** @var Translations */
1619
private $translations;
1720
/** @var Translation */
1821
private $translation;
22+
/** @var Translation|null */
23+
private $header;
1924
/** @var string */
2025
private $data;
2126
/** @var int */
2227
private $position;
28+
/** @var int|null */
29+
private $pluralCount;
2330
/** @var bool */
2431
private $inPreviousComment;
32+
/** @var bool */
33+
private $throwOnWarning;
2534

2635
/**
2736
* Generates a Translations object from a .po based string
2837
*/
29-
public function loadString(string $string, Translations $translations = null): Translations
30-
{
38+
public function loadString(
39+
string $string,
40+
Translations $translations = null,
41+
bool $throwOnWarning = false
42+
): Translations {
3143
$this->data = $string;
3244
$this->position = 0;
3345
$this->translations = parent::loadString($string, $translations);
46+
$this->header = $this->translations->find(null, '');
47+
$this->pluralCount = $this->translations->getHeaders()->getPluralForm()[0] ?? null;
48+
$this->throwOnWarning = $throwOnWarning;
49+
$this->warnings = [];
3450
for ($this->newEntry(); $this->getChar() !== null; $this->newEntry()) {
3551
while ($this->readComment());
3652
$this->readContext();
@@ -42,7 +58,9 @@ public function loadString(string $string, Translations $translations = null): T
4258
}
4359
$this->saveEntry();
4460
}
45-
$this->processHeader();
61+
if (!$this->header) {
62+
$this->addWarning("The loaded string has no header translation at byte {$this->position}");
63+
}
4664

4765
return $this->translations;
4866
}
@@ -61,9 +79,19 @@ private function newEntry(): void
6179
*/
6280
private function saveEntry(): void
6381
{
82+
if ($this->translation->getOriginal() === '' && $this->translation->getContext() === null) {
83+
$this->processHeader();
84+
85+
return;
86+
}
6487
if ($this->translations->getTranslations()[$this->translation->getId()] ?? null) {
6588
throw new Exception("Duplicated entry at byte {$this->position}");
6689
}
90+
if ($this->pluralCount !== null && $this->translation->getPlural() !== null
91+
&& count($this->translation->getPluralTranslations()) < $this->pluralCount) {
92+
$this->addWarning("The translation doesn't have all the {$this->pluralCount} "
93+
. "plural forms at byte {$this->position}");
94+
}
6795
$this->translations->add($this->translation);
6896
}
6997

@@ -146,15 +174,15 @@ private function readNumber(): string
146174
}
147175

148176
/**
149-
* Read at least one character from the given character set
177+
* Read sequential characters that match the given character set until the length range is satisfied
150178
*/
151-
private function readCharset(string $charset, int $maxLength): string
179+
private function readCharset(string $charset, int $min, int $max, string $name): string
152180
{
153181
for ($data = ''; ($char = $this->getChar()) !== null
154182
&& is_int(strpos($charset, $char))
155-
&& --$maxLength >= 0; $data .= $this->nextChar());
156-
if ($data === '') {
157-
throw new Exception("Expected at least one occurrence of \"{$charset}\" at byte {$this->position}");
183+
&& --$max >= 0; $data .= $this->nextChar());
184+
if (strlen($data) < $min) {
185+
throw new Exception("Expected at least one occurrence of {$name} characters at byte {$this->position}");
158186
}
159187

160188
return $data;
@@ -178,14 +206,16 @@ private function readQuotedString(): string
178206
static
179207
$aliases = [
180208
'\\' => '\\', 'a' => "\x07", 'b' => "\x08", 'e' => "\e", 'f' => "\f",
181-
'n' => "\n", 'r' => "\r", 't' => "\t", 'v' => "\v", '"' => '"'
209+
'n' => "\n", 'r' => "\r", 't' => "\t", 'v' => "\v", '"' => '"',
182210
],
183211
$octalDigits = '01234567',
184212
$hexDigits = '0123456789abcdefABCDEF';
185-
for ($data = '', $pieces = 0;; ++$pieces) {
213+
for ($checkpoint = null, $data = '', $pieces = 0;; ++$pieces) {
186214
if (!$this->readChar('"')) {
187-
// Perhaps the data is over (e.g. beginning of an identifier), let the next parser decide
215+
// The data is over (e.g. beginning of an identifier) or there's an error
216+
// Restore the checkpoint and let the next parser handle it
188217
if ($pieces) {
218+
$this->position = $checkpoint;
189219
break;
190220
}
191221
throw new Exception("Expected an opening quote at byte {$this->position}");
@@ -203,9 +233,9 @@ private function readQuotedString(): string
203233
$char = $alias;
204234
break;
205235
case $octalDigit = is_int(strpos($octalDigits, $escaped)) ? $escaped : '--':
206-
$data = $octalDigit . $this->readCharset($octalDigits, 2);
207-
// GNU gettext fails with octals above the signed char range
208-
if (($decimal = octdec($data)) > 127) {
236+
$value = $octalDigit . $this->readCharset($octalDigits, 0, 2, 'octal');
237+
// GNU gettext fails with an octal above the signed char range
238+
if (($decimal = octdec($value)) > 127) {
209239
throw new Exception("Octal value out of range [0, 0177] at byte {$this->position}");
210240
}
211241
$char = chr($decimal);
@@ -214,14 +244,14 @@ private function readQuotedString(): string
214244
case 'u':
215245
// The GNU gettext is supposed to follow the escaping sequences of C
216246
// Curiously it doesn't support the unicode escape
217-
$data = $this->readCharset($hexDigits, $escaped === 'u' ? 4 : 8);
218-
$data = str_pad($data, strlen($data) + (strlen($data) & 1), '0', STR_PAD_LEFT);
219-
$char = json_decode("\"\\u{$data}\"");
247+
$value = $this->readCharset($hexDigits, 1, $digits = $escaped === 'u' ? 4 : 8, 'hexadecimal');
248+
$value = str_pad($value, $digits, '0', STR_PAD_LEFT);
249+
$char = mb_convert_encoding(hex2bin($value), 'UTF-8', 'UTF-' . ($digits * 4));
220250
break;
221251
case 'x':
222-
$data = $this->readCharset($hexDigits, PHP_INT_MAX);
252+
$value = $this->readCharset($hexDigits, 1, PHP_INT_MAX, 'hexadecimal');
223253
// GNU reads all valid hexadecimal chars, but only uses the last pair
224-
$char = chr(hexdec(substr($data, -2)));
254+
$char = hex2bin(str_pad(substr($value, -2), 2, '0', STR_PAD_LEFT));
225255
break;
226256
default:
227257
throw new Exception("Invalid quoted character at byte {$this->position}");
@@ -230,6 +260,8 @@ private function readQuotedString(): string
230260
if (!$this->readChar('"')) {
231261
throw new Exception("Expected an ending quote at byte {$this->position}");
232262
}
263+
// Saves a checkpoint and attempts to read a new sequence
264+
$checkpoint = $this->position;
233265
$this->readWhiteSpace();
234266
}
235267

@@ -258,12 +290,16 @@ private function readComment(): bool
258290
$this->translation->getComments()->add($data);
259291
break;
260292
case '~':
261-
if ($this->inPreviousComment) {
293+
if ($this->translation->getPreviousOriginal() !== null) {
262294
throw new Exception("Inconsistent use of #~ at byte {$this->position}");
263295
}
264296
$this->translation->disable();
265297
break;
266298
case '|':
299+
if ($this->translation->getPreviousOriginal() !== null) {
300+
throw new Exception('Cannot redeclare the previous comment #|, '
301+
. "ensure the definitions are in the right order at byte {$this->position}");
302+
}
267303
$this->inPreviousComment = true;
268304
$this->translation->setPreviousContext($this->readIdentifier('msgctxt'));
269305
$this->translation->setPreviousOriginal($this->readIdentifier('msgid', true));
@@ -299,11 +335,13 @@ private function readComment(): bool
299335
*/
300336
private function readIdentifier(string $identifier, bool $throwIfNotFound = false): ?string
301337
{
338+
$checkpoint = $this->position;
302339
$this->readWhiteSpace();
303340
if (!$this->readString($identifier)) {
304341
if ($throwIfNotFound) {
305-
throw new Exception("Expected identifier $identifier at byte {$this->position}");
342+
throw new Exception("Expected $identifier at byte {$this->position}");
306343
}
344+
$this->position = $checkpoint;
307345

308346
return null;
309347
}
@@ -409,39 +447,39 @@ private function readPluralTranslation(bool $throwIfNotFound = false): bool
409447
}
410448

411449
/**
412-
* Attempts to find and process the header translation
450+
* Setup the current translation as the header translation
413451
*/
414452
private function processHeader(): void
415453
{
416-
$translations = $this->translations;
417-
if (!($header = $translations->find(null, ''))) {
418-
return;
419-
}
420-
421-
$translations->remove($header);
454+
$this->header = $header = $this->translation;
422455
$description = $header->getComments()->toArray();
423-
424456
if (!empty($description)) {
425-
$translations->setDescription(implode("\n", $description));
457+
$this->translations->setDescription(implode("\n", $description));
426458
}
427459

428460
$flags = $header->getFlags()->toArray();
429-
430461
if (!empty($flags)) {
431-
$translations->getFlags()->add(...$flags);
462+
$this->translations->getFlags()->add(...$flags);
432463
}
433464

434-
$headers = $translations->getHeaders();
435-
436-
foreach (self::readHeaders($header->getTranslation()) as $name => $value) {
465+
$headers = $this->translations->getHeaders();
466+
$newHeaders = self::readHeaders($header->getTranslation() ?? '');
467+
foreach ($newHeaders as $name => $value) {
437468
$headers->set($name, $value);
438469
}
470+
$this->pluralCount = $headers->getPluralForm()[0] ?? null;
471+
472+
foreach (['Language', 'Plural-Forms', 'Content-Type'] as $header) {
473+
if (empty($newHeaders[$header])) {
474+
$this->addWarning("$header header not declared or empty at byte {$this->position}");
475+
}
476+
}
439477
}
440478

441479
/**
442480
* Parses the translation header data into an array
443481
*/
444-
private function readHeaders(?string $string): array
482+
private function readHeaders(string $string): array
445483
{
446484
$headers = [];
447485
$name = null;
@@ -450,12 +488,16 @@ private function readHeaders(?string $string): array
450488
// Useful for distinguishing between header definitions and possible continuations of a header entry.
451489
if (preg_match('/^[\w-]+:/', $line)) {
452490
[$name, $value] = explode(':', $line, 2);
491+
if (isset($headers[$name])) {
492+
$this->addWarning("Header already defined at byte {$this->position}");
493+
}
453494
$headers[$name] = trim($value);
454495
continue;
455496
}
456497
// Data without a definition
457498
if ($name === null) {
458-
throw new Exception("The header data is missing a definition at byte {$this->position}");
499+
$this->addWarning("Malformed header name at byte {$this->position}");
500+
continue;
459501
}
460502
$headers[$name] .= $line;
461503
}
@@ -470,7 +512,18 @@ private function checkNewLine(string $data, string $context): void
470512
{
471513
if (($first = substr($data, 0, 1)) === "\n" || $first === "\r"
472514
|| ($last = substr($data, -1)) === "\n" || $last === "\n") {
473-
throw new Exception("$context cannot start nor end with a newline at byte {$this->position}");
515+
$this->addWarning("$context cannot start nor end with a newline at byte {$this->position}");
516+
}
517+
}
518+
519+
/**
520+
* Adds a warning
521+
*/
522+
private function addWarning(string $message): void
523+
{
524+
if ($this->throwOnWarning) {
525+
throw new Exception($message);
474526
}
527+
$this->warnings[] = $message;
475528
}
476529
}

0 commit comments

Comments
 (0)