1212 */
1313final class StrictPoLoader extends Loader
1414{
15+ /** @var string[] */
16+ public $ warnings = [];
17+
1518 /** @var Translations */
1619 private $ translations ;
1720 /** @var Translation */
1821 private $ translation ;
22+ /** @var Translation|null */
23+ private $ header ;
1924 /** @var string */
2025 private $ data ;
2126 /** @var int */
2227 private $ position ;
28+ /** @var int|null */
29+ private $ pluralCount ;
2330 /** @var bool */
2431 private $ inPreviousComment ;
32+ /** @var bool */
33+ private $ throwOnWarning ;
2534
2635 /**
2736 * Generates a Translations object from a .po based string
2837 */
29- public function loadString (string $ string , Translations $ translations = null ): Translations
30- {
38+ public function loadString (
39+ string $ string ,
40+ Translations $ translations = null ,
41+ bool $ throwOnWarning = false
42+ ): Translations {
3143 $ this ->data = $ string ;
3244 $ this ->position = 0 ;
3345 $ this ->translations = parent ::loadString ($ string , $ translations );
46+ $ this ->header = $ this ->translations ->find (null , '' );
47+ $ this ->pluralCount = $ this ->translations ->getHeaders ()->getPluralForm ()[0 ] ?? null ;
48+ $ this ->throwOnWarning = $ throwOnWarning ;
49+ $ this ->warnings = [];
3450 for ($ this ->newEntry (); $ this ->getChar () !== null ; $ this ->newEntry ()) {
3551 while ($ this ->readComment ());
3652 $ this ->readContext ();
@@ -42,7 +58,9 @@ public function loadString(string $string, Translations $translations = null): T
4258 }
4359 $ this ->saveEntry ();
4460 }
45- $ this ->processHeader ();
61+ if (!$ this ->header ) {
62+ $ this ->addWarning ("The loaded string has no header translation at byte {$ this ->position }" );
63+ }
4664
4765 return $ this ->translations ;
4866 }
@@ -61,9 +79,19 @@ private function newEntry(): void
6179 */
6280 private function saveEntry (): void
6381 {
82+ if ($ this ->translation ->getOriginal () === '' && $ this ->translation ->getContext () === null ) {
83+ $ this ->processHeader ();
84+
85+ return ;
86+ }
6487 if ($ this ->translations ->getTranslations ()[$ this ->translation ->getId ()] ?? null ) {
6588 throw new Exception ("Duplicated entry at byte {$ this ->position }" );
6689 }
90+ if ($ this ->pluralCount !== null && $ this ->translation ->getPlural () !== null
91+ && count ($ this ->translation ->getPluralTranslations ()) < $ this ->pluralCount ) {
92+ $ this ->addWarning ("The translation doesn't have all the {$ this ->pluralCount } "
93+ . "plural forms at byte {$ this ->position }" );
94+ }
6795 $ this ->translations ->add ($ this ->translation );
6896 }
6997
@@ -146,15 +174,15 @@ private function readNumber(): string
146174 }
147175
148176 /**
149- * Read at least one character from the given character set
177+ * Read sequential characters that match the given character set until the length range is satisfied
150178 */
151- private function readCharset (string $ charset , int $ maxLength ): string
179+ private function readCharset (string $ charset , int $ min , int $ max , string $ name ): string
152180 {
153181 for ($ data = '' ; ($ char = $ this ->getChar ()) !== null
154182 && is_int (strpos ($ charset , $ char ))
155- && --$ maxLength >= 0 ; $ data .= $ this ->nextChar ());
156- if ($ data === '' ) {
157- throw new Exception ("Expected at least one occurrence of \"{ $ charset }\" at byte {$ this ->position }" );
183+ && --$ max >= 0 ; $ data .= $ this ->nextChar ());
184+ if (strlen ( $ data) < $ min ) {
185+ throw new Exception ("Expected at least one occurrence of { $ name } characters at byte {$ this ->position }" );
158186 }
159187
160188 return $ data ;
@@ -178,14 +206,16 @@ private function readQuotedString(): string
178206 static
179207 $ aliases = [
180208 '\\' => '\\' , 'a ' => "\x07" , 'b ' => "\x08" , 'e ' => "\e" , 'f ' => "\f" ,
181- 'n ' => "\n" , 'r ' => "\r" , 't ' => "\t" , 'v ' => "\v" , '" ' => '" '
209+ 'n ' => "\n" , 'r ' => "\r" , 't ' => "\t" , 'v ' => "\v" , '" ' => '" ' ,
182210 ],
183211 $ octalDigits = '01234567 ' ,
184212 $ hexDigits = '0123456789abcdefABCDEF ' ;
185- for ($ data = '' , $ pieces = 0 ;; ++$ pieces ) {
213+ for ($ checkpoint = null , $ data = '' , $ pieces = 0 ;; ++$ pieces ) {
186214 if (!$ this ->readChar ('" ' )) {
187- // Perhaps the data is over (e.g. beginning of an identifier), let the next parser decide
215+ // The data is over (e.g. beginning of an identifier) or there's an error
216+ // Restore the checkpoint and let the next parser handle it
188217 if ($ pieces ) {
218+ $ this ->position = $ checkpoint ;
189219 break ;
190220 }
191221 throw new Exception ("Expected an opening quote at byte {$ this ->position }" );
@@ -203,9 +233,9 @@ private function readQuotedString(): string
203233 $ char = $ alias ;
204234 break ;
205235 case $ octalDigit = is_int (strpos ($ octalDigits , $ escaped )) ? $ escaped : '-- ' :
206- $ data = $ octalDigit . $ this ->readCharset ($ octalDigits , 2 );
207- // GNU gettext fails with octals above the signed char range
208- if (($ decimal = octdec ($ data )) > 127 ) {
236+ $ value = $ octalDigit . $ this ->readCharset ($ octalDigits , 0 , 2 , ' octal ' );
237+ // GNU gettext fails with an octal above the signed char range
238+ if (($ decimal = octdec ($ value )) > 127 ) {
209239 throw new Exception ("Octal value out of range [0, 0177] at byte {$ this ->position }" );
210240 }
211241 $ char = chr ($ decimal );
@@ -214,14 +244,14 @@ private function readQuotedString(): string
214244 case 'u ' :
215245 // The GNU gettext is supposed to follow the escaping sequences of C
216246 // Curiously it doesn't support the unicode escape
217- $ data = $ this ->readCharset ($ hexDigits , $ escaped === 'u ' ? 4 : 8 );
218- $ data = str_pad ($ data , strlen ( $ data ) + ( strlen ( $ data ) & 1 ) , '0 ' , STR_PAD_LEFT );
219- $ char = json_decode ( "\"\\ u { $ data }\"" );
247+ $ value = $ this ->readCharset ($ hexDigits , 1 , $ digits = $ escaped === 'u ' ? 4 : 8 , ' hexadecimal ' );
248+ $ value = str_pad ($ value , $ digits , '0 ' , STR_PAD_LEFT );
249+ $ char = mb_convert_encoding ( hex2bin ( $ value ), ' UTF-8 ' , ' UTF- ' . ( $ digits * 4 ) );
220250 break ;
221251 case 'x ' :
222- $ data = $ this ->readCharset ($ hexDigits , PHP_INT_MAX );
252+ $ value = $ this ->readCharset ($ hexDigits , 1 , PHP_INT_MAX , ' hexadecimal ' );
223253 // GNU reads all valid hexadecimal chars, but only uses the last pair
224- $ char = chr ( hexdec (substr ($ data , -2 )));
254+ $ char = hex2bin ( str_pad (substr ($ value , -2 ), 2 , ' 0 ' , STR_PAD_LEFT ));
225255 break ;
226256 default :
227257 throw new Exception ("Invalid quoted character at byte {$ this ->position }" );
@@ -230,6 +260,8 @@ private function readQuotedString(): string
230260 if (!$ this ->readChar ('" ' )) {
231261 throw new Exception ("Expected an ending quote at byte {$ this ->position }" );
232262 }
263+ // Saves a checkpoint and attempts to read a new sequence
264+ $ checkpoint = $ this ->position ;
233265 $ this ->readWhiteSpace ();
234266 }
235267
@@ -258,12 +290,16 @@ private function readComment(): bool
258290 $ this ->translation ->getComments ()->add ($ data );
259291 break ;
260292 case '~ ' :
261- if ($ this ->inPreviousComment ) {
293+ if ($ this ->translation -> getPreviousOriginal () !== null ) {
262294 throw new Exception ("Inconsistent use of #~ at byte {$ this ->position }" );
263295 }
264296 $ this ->translation ->disable ();
265297 break ;
266298 case '| ' :
299+ if ($ this ->translation ->getPreviousOriginal () !== null ) {
300+ throw new Exception ('Cannot redeclare the previous comment #|, '
301+ . "ensure the definitions are in the right order at byte {$ this ->position }" );
302+ }
267303 $ this ->inPreviousComment = true ;
268304 $ this ->translation ->setPreviousContext ($ this ->readIdentifier ('msgctxt ' ));
269305 $ this ->translation ->setPreviousOriginal ($ this ->readIdentifier ('msgid ' , true ));
@@ -299,11 +335,13 @@ private function readComment(): bool
299335 */
300336 private function readIdentifier (string $ identifier , bool $ throwIfNotFound = false ): ?string
301337 {
338+ $ checkpoint = $ this ->position ;
302339 $ this ->readWhiteSpace ();
303340 if (!$ this ->readString ($ identifier )) {
304341 if ($ throwIfNotFound ) {
305- throw new Exception ("Expected identifier $ identifier at byte {$ this ->position }" );
342+ throw new Exception ("Expected $ identifier at byte {$ this ->position }" );
306343 }
344+ $ this ->position = $ checkpoint ;
307345
308346 return null ;
309347 }
@@ -409,39 +447,39 @@ private function readPluralTranslation(bool $throwIfNotFound = false): bool
409447 }
410448
411449 /**
412- * Attempts to find and process the header translation
450+ * Setup the current translation as the header translation
413451 */
414452 private function processHeader (): void
415453 {
416- $ translations = $ this ->translations ;
417- if (!($ header = $ translations ->find (null , '' ))) {
418- return ;
419- }
420-
421- $ translations ->remove ($ header );
454+ $ this ->header = $ header = $ this ->translation ;
422455 $ description = $ header ->getComments ()->toArray ();
423-
424456 if (!empty ($ description )) {
425- $ translations ->setDescription (implode ("\n" , $ description ));
457+ $ this -> translations ->setDescription (implode ("\n" , $ description ));
426458 }
427459
428460 $ flags = $ header ->getFlags ()->toArray ();
429-
430461 if (!empty ($ flags )) {
431- $ translations ->getFlags ()->add (...$ flags );
462+ $ this -> translations ->getFlags ()->add (...$ flags );
432463 }
433464
434- $ headers = $ translations ->getHeaders ();
435-
436- foreach (self :: readHeaders ( $ header -> getTranslation ()) as $ name => $ value ) {
465+ $ headers = $ this -> translations ->getHeaders ();
466+ $ newHeaders = self :: readHeaders ( $ header -> getTranslation () ?? '' );
467+ foreach ($ newHeaders as $ name => $ value ) {
437468 $ headers ->set ($ name , $ value );
438469 }
470+ $ this ->pluralCount = $ headers ->getPluralForm ()[0 ] ?? null ;
471+
472+ foreach (['Language ' , 'Plural-Forms ' , 'Content-Type ' ] as $ header ) {
473+ if (empty ($ newHeaders [$ header ])) {
474+ $ this ->addWarning ("$ header header not declared or empty at byte {$ this ->position }" );
475+ }
476+ }
439477 }
440478
441479 /**
442480 * Parses the translation header data into an array
443481 */
444- private function readHeaders (? string $ string ): array
482+ private function readHeaders (string $ string ): array
445483 {
446484 $ headers = [];
447485 $ name = null ;
@@ -450,12 +488,16 @@ private function readHeaders(?string $string): array
450488 // Useful for distinguishing between header definitions and possible continuations of a header entry.
451489 if (preg_match ('/^[\w-]+:/ ' , $ line )) {
452490 [$ name , $ value ] = explode (': ' , $ line , 2 );
491+ if (isset ($ headers [$ name ])) {
492+ $ this ->addWarning ("Header already defined at byte {$ this ->position }" );
493+ }
453494 $ headers [$ name ] = trim ($ value );
454495 continue ;
455496 }
456497 // Data without a definition
457498 if ($ name === null ) {
458- throw new Exception ("The header data is missing a definition at byte {$ this ->position }" );
499+ $ this ->addWarning ("Malformed header name at byte {$ this ->position }" );
500+ continue ;
459501 }
460502 $ headers [$ name ] .= $ line ;
461503 }
@@ -470,7 +512,18 @@ private function checkNewLine(string $data, string $context): void
470512 {
471513 if (($ first = substr ($ data , 0 , 1 )) === "\n" || $ first === "\r"
472514 || ($ last = substr ($ data , -1 )) === "\n" || $ last === "\n" ) {
473- throw new Exception ("$ context cannot start nor end with a newline at byte {$ this ->position }" );
515+ $ this ->addWarning ("$ context cannot start nor end with a newline at byte {$ this ->position }" );
516+ }
517+ }
518+
519+ /**
520+ * Adds a warning
521+ */
522+ private function addWarning (string $ message ): void
523+ {
524+ if ($ this ->throwOnWarning ) {
525+ throw new Exception ($ message );
474526 }
527+ $ this ->warnings [] = $ message ;
475528 }
476529}
0 commit comments