88use Gettext \Translations ;
99
1010/**
11- * Class to load a PO file following the same rules of the GNU tools.
11+ * Class to load a PO file following the same rules of the GNU gettext tools.
1212 */
1313final class StrictPoLoader extends Loader
1414{
@@ -139,7 +139,19 @@ private function readNumber(): string
139139 }
140140
141141 /**
142- * Attempts to read a standard comment string which ends on \n
142+ * Read at least one character from the given character set
143+ */
144+ private function readCharset (string $ charset , int $ maxLength ): string
145+ {
146+ for ($ data = '' ; ($ char = $ this ->getChar ()) !== null && is_int (strpos ($ charset , $ char )) && --$ maxLength >= 0 ; $ data .= $ this ->nextChar ());
147+ if ($ data === '' ) {
148+ throw new Exception ("Expected at least one occurrence of the characters \"{$ charset }\" at byte {$ this ->position }" );
149+ }
150+ return $ data ;
151+ }
152+
153+ /**
154+ * Attempts to read a standard comment string which ends with a newline
143155 */
144156 private function readCommentString (): string
145157 {
@@ -152,46 +164,62 @@ private function readCommentString(): string
152164 */
153165 private function readQuotedString (): string
154166 {
155- static $ aliases = [
156- '\\' => '\\' ,
157- 'a ' => "\x07" ,
158- 'b ' => "\x08" ,
159- 'e ' => "\x1b" ,
160- 'f ' => "\x0c" ,
161- 'n ' => "\n" ,
162- 'r ' => "\r" ,
163- 't ' => "\t" ,
164- 'v ' => "\x0b" ,
165- '" ' => '" ' ,
166- ];
167- $ hasData = false ;
168- for ($ data = '' ;;) {
167+ static
168+ $ aliases = [
169+ '\\' => '\\' , 'a ' => "\x07" , 'b ' => "\x08" , 'e ' => "\e" , 'f ' => "\f" ,
170+ 'n ' => "\n" , 'r ' => "\r" , 't ' => "\t" , 'v ' => "\v" , '" ' => '" '
171+ ],
172+ $ octalDigits = '01234567 ' ,
173+ $ hexDigits = '0123456789abcdefABCDEF ' ;
174+ for ($ data = '' , $ pieces = 0 ;; ++$ pieces ) {
169175 if (!$ this ->readChar ('" ' )) {
170- // Perhaps the data is over, let the next parser decide
171- if ($ hasData ) {
176+ // Perhaps the data is over (e.g. beginning of an identifier) , let the next parser decide
177+ if ($ pieces ) {
172178 break ;
173179 }
174180 throw new Exception ("Expected an opening quote at byte {$ this ->position }" );
175181 }
176- // Collects chars until the end of the data /file
182+ // Collects chars until the end of the sequence /file
177183 for (; ($ char = $ this ->getChar () ?? '" ' ) !== '" ' ; $ data .= $ char ) {
178- $ this ->nextChar ();
179- if ($ char === '\\' ) {
180- // Ensures the next char is a valid escape character
181- if (($ char = $ aliases [$ this ->nextChar ()] ?? null ) === null ) {
182- throw new Exception ("Invalid quoted character at byte {$ this ->position }" );
183- }
184+ if ($ char === "\n" || $ char === "\r" ) {
185+ throw new Exception ("Newline character must be escaped at byte {$ this ->position }" );
186+ }
187+ if ($ this ->nextChar () !== '\\' ) {
184188 continue ;
185189 }
186- if ($ char === "\n" || $ char === "\r" ) {
187- throw new Exception ("New line character must be encoded at byte {$ this ->position }" );
190+ switch ($ escaped = $ this ->nextChar ()) {
191+ case ($ alias = $ aliases [$ escaped ] ?? null ) !== null ? $ escaped : '-- ' :
192+ $ char = $ alias ;
193+ break ;
194+ case $ octalDigit = is_int (strpos ($ octalDigits , $ escaped )) ? $ escaped : '-- ' :
195+ $ data = $ octalDigit . $ this ->readCharset ($ octalDigits , 2 );
196+ // GNU gettext fails with octals above the signed char range
197+ if (($ decimal = octdec ($ data )) > 127 ) {
198+ throw new Exception ("Octal value out of range [0, 0177] at byte {$ this ->position }" );
199+ }
200+ $ char = chr ($ decimal );
201+ break ;
202+ case 'U ' :
203+ case 'u ' :
204+ // The GNU gettext is supposed to follow the escaping sequences of C
205+ // Curiously it doesn't support the unicode escape
206+ $ data = $ this ->readCharset ($ hexDigits , $ escaped === 'u ' ? 4 : 8 );
207+ $ data = str_pad ($ data , strlen ($ data ) + (strlen ($ data ) & 1 ), '0 ' , STR_PAD_LEFT );
208+ $ char = json_decode ("\"\\u {$ data }\"" );
209+ break ;
210+ case 'x ' :
211+ $ data = $ this ->readCharset ($ hexDigits , PHP_INT_MAX );
212+ // GNU reads all valid hexadecimal chars, but only uses the last pair
213+ $ char = chr (hexdec (substr ($ data , -2 )));
214+ break ;
215+ default :
216+ throw new Exception ("Invalid quoted character at byte {$ this ->position }" );
188217 }
189218 }
190219 if (!$ this ->readChar ('" ' )) {
191220 throw new Exception ("Expected an ending quote at byte {$ this ->position }" );
192221 }
193222 $ this ->readWhiteSpace ();
194- $ hasData = true ;
195223 }
196224 return $ data ;
197225 }
@@ -389,29 +417,21 @@ private function processHeader(): void
389417 */
390418 private function readHeaders (?string $ string ): array
391419 {
392- if (empty ($ string )) {
393- return [];
394- }
395420 $ headers = [];
396- $ lines = explode ("\n" , $ string );
397421 $ name = null ;
398- foreach ($ lines as $ line ) {
399- if ($ line === '' ) {
400- continue ;
401- }
422+ foreach (array_filter (explode ("\n" , $ string ), 'strlen ' ) as $ line ) {
402423 // Checks if it is a header definition line.
403424 // Useful for distinguishing between header definitions and possible continuations of a header entry.
404425 if (preg_match ('/^[\w-]+:/ ' , $ line )) {
405- [$ name , $ value ] = array_map ( ' trim ' , explode (': ' , $ line , 2 ) );
406- $ headers [$ name ] = $ value ;
426+ [$ name , $ value ] = explode (': ' , $ line , 2 );
427+ $ headers [$ name ] = trim ( $ value) ;
407428 continue ;
408429 }
409430 // Data without a definition
410431 if ($ name === null ) {
411432 throw new Exception ("The header data is missing a definition at byte {$ this ->position }" );
412433 }
413- $ value = $ headers [$ name ] ?? '' ;
414- $ headers [$ name ] = $ value . $ line ;
434+ $ headers [$ name ] .= $ line ;
415435 }
416436 return $ headers ;
417437 }
0 commit comments