Skip to content

Commit 7320117

Browse files
committed
#282 Added strict PO loader
1 parent fbd97d9 commit 7320117

1 file changed

Lines changed: 349 additions & 0 deletions

File tree

src/Loader/StrictPoLoader.php

Lines changed: 349 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
<?php
2+
declare(strict_types = 1);
3+
4+
namespace Gettext\Loader;
5+
6+
use Exception;
7+
use Gettext\Translation;
8+
use Gettext\Translations;
9+
10+
/**
11+
* Class to load a PO file.
12+
*/
13+
final class StrictPoLoader extends Loader
14+
{
15+
/** @var Translations */
16+
private $translations;
17+
/** @var Translation */
18+
private $translation;
19+
private $data;
20+
private $position;
21+
private $inPreviousComment;
22+
23+
public function loadString(string $string, Translations $translations = null): Translations
24+
{
25+
$this->data = $string;
26+
$this->position = 0;
27+
$this->translations = parent::loadString($string, $translations);
28+
for ($this->newEntry(); $this->getChar() !== null; $this->newEntry()) {
29+
while ($this->readComment());
30+
$this->readContext();
31+
$this->readId();
32+
if ($this->readPlural()) {
33+
for ($isRequired = true; $this->readPluralTranslation($isRequired); $isRequired = false);
34+
} else {
35+
$this->readTranslation();
36+
}
37+
$this->saveEntry();
38+
}
39+
$this->processHeader();
40+
return $this->translations;
41+
}
42+
43+
private function newEntry(): void
44+
{
45+
$this->inPreviousComment = false;
46+
$this->translation = $this->createTranslation(null, '');
47+
}
48+
49+
private function saveEntry(): void
50+
{
51+
$this->translations->add($this->translation);
52+
}
53+
54+
private function readDisabledComment(): bool
55+
{
56+
return $this->translation->isDisabled() && $this->readString('#~');
57+
}
58+
59+
private function readPreviousTranslationComment(): bool
60+
{
61+
return $this->inPreviousComment && $this->readString('#|');
62+
}
63+
64+
private function readWhiteSpace(): bool
65+
{
66+
$position = $this->position;
67+
while ((ctype_space($this->getChar() ?? '') && $this->nextChar())
68+
|| $this->readDisabledComment()
69+
|| $this->readPreviousTranslationComment());
70+
return $position !== $this->position;
71+
}
72+
73+
private function readString(string $word): bool
74+
{
75+
return substr($this->data, $this->position, strlen($word)) === $word
76+
? !!($this->position += strlen($word))
77+
: false;
78+
}
79+
80+
private function readChar(string $char): bool
81+
{
82+
return $this->getChar() === $char
83+
? !!++$this->position
84+
: false;
85+
}
86+
87+
private function nextChar(): ?string
88+
{
89+
$char = $this->getChar();
90+
if ($char !== null) {
91+
++$this->position;
92+
}
93+
return $char;
94+
}
95+
96+
private function getChar(): ?string
97+
{
98+
return $this->data[$this->position] ?? null;
99+
}
100+
101+
private function readNumber(): string
102+
{
103+
for ($data = ''; ctype_digit($this->getChar() ?? ''); $data .= $this->nextChar());
104+
return $data;
105+
}
106+
107+
private function readCommentString(): string
108+
{
109+
for ($data = ''; ($this->getChar() ?? "\n") !== "\n"; $data .= $this->nextChar());
110+
return $data;
111+
}
112+
113+
private function readQuotedString(): string
114+
{
115+
static $aliases = [
116+
'\\' => '\\',
117+
'a' => "\x07",
118+
'b' => "\x08",
119+
'e' => "\x1b",
120+
'f' => "\x0c",
121+
'n' => "\n",
122+
'r' => "\r",
123+
't' => "\t",
124+
'v' => "\x0b",
125+
'"' => '"',
126+
];
127+
$hasData = false;
128+
for ($data = '';;) {
129+
if (!$this->readChar('"')) {
130+
if ($hasData) {
131+
break;
132+
}
133+
throw new Exception("Expected an opening quote at byte {$this->position}");
134+
}
135+
for (; ($char = $this->getChar() ?? '"') !== '"'; $data .= $char) {
136+
$this->nextChar();
137+
if ($char === '\\') {
138+
if (($alias = $aliases[$this->nextChar()] ?? null) === null) {
139+
throw new Exception("Invalid quoted character at byte {$this->position}");
140+
}
141+
$char = $alias;
142+
continue;
143+
}
144+
if ($char === "\n") {
145+
throw new Exception("New line character must be encoded at byte {$this->position}");
146+
}
147+
}
148+
if (!$this->readChar('"')) {
149+
throw new Exception("Expected an ending quote at byte {$this->position}");
150+
}
151+
$this->readWhiteSpace();
152+
$hasData = true;
153+
}
154+
return $data;
155+
}
156+
157+
private function readComment(): bool
158+
{
159+
$this->readWhiteSpace();
160+
if (!$this->readChar('#')) {
161+
return false;
162+
}
163+
$type = '';
164+
if (is_int(strpos('~|,:.', $char = $this->getChar() ?? ''))) {
165+
$type = $char;
166+
++$this->position;
167+
}
168+
// Only a single space might be optionally added
169+
$this->readChar(' ');
170+
switch ($type) {
171+
case '':
172+
$data = $this->readCommentString();
173+
$this->translation->getComments()->add($data);
174+
break;
175+
case '~':
176+
if ($this->inPreviousComment) {
177+
throw new Exception("Inconsistent use of #~ at byte {$this->position}");
178+
}
179+
$this->translation->disable();
180+
break;
181+
case '|':
182+
$this->inPreviousComment = true;
183+
$this->translation->setPreviousContext($this->readIdentifier('msgctxt'));
184+
$this->translation->setPreviousOriginal($this->readIdentifier('msgid', true));
185+
$this->translation->setPreviousPlural($this->readIdentifier('msgid_plural'));
186+
$this->inPreviousComment = false;
187+
break;
188+
case ',':
189+
$data = $this->readCommentString();
190+
foreach (array_map('trim', explode(',', trim($data))) as $value) {
191+
$this->translation->getFlags()->add($value);
192+
}
193+
break;
194+
case ':':
195+
$data = $this->readCommentString();
196+
foreach (preg_split('/\s+/', trim($data)) as $value) {
197+
if (preg_match('/^(.+)(:(\d*))?$/U', $value, $matches)) {
198+
$line = isset($matches[3]) ? intval($matches[3]) : null;
199+
$this->translation->getReferences()->add($matches[1], $line);
200+
}
201+
}
202+
break;
203+
case '.':
204+
$data = $this->readCommentString();
205+
$this->translation->getExtractedComments()->add($data);
206+
break;
207+
}
208+
return true;
209+
}
210+
211+
private function readIdentifier(string $identifier, bool $isRequired = false): ?string
212+
{
213+
$this->readWhiteSpace();
214+
if (!$this->readString($identifier)) {
215+
if ($isRequired) {
216+
throw new Exception("Expected identifier $identifier at byte {$this->position}");
217+
}
218+
return null;
219+
}
220+
$this->readWhiteSpace();
221+
return $this->readQuotedString();
222+
}
223+
224+
private function readContext(): bool
225+
{
226+
if (($data = $this->readIdentifier('msgctxt')) === null) {
227+
return false;
228+
}
229+
$this->translation = $this->translation->withContext($data);
230+
return true;
231+
}
232+
233+
private function readId(): void
234+
{
235+
$data = $this->readIdentifier('msgid', true);
236+
$this->translation = $this->translation->withOriginal($data);
237+
}
238+
239+
private function readPlural(): bool
240+
{
241+
if (($data = $this->readIdentifier('msgid_plural')) === null) {
242+
return false;
243+
}
244+
$this->translation->setPlural($data);
245+
return true;
246+
}
247+
248+
private function readTranslation(): void
249+
{
250+
$this->readWhiteSpace();
251+
if (!$this->readString('msgstr')) {
252+
throw new Exception("Expected msgstr at byte {$this->position}");
253+
}
254+
$this->readWhiteSpace();
255+
$data = $this->readQuotedString();
256+
$this->translation->translate($data);
257+
}
258+
259+
private function readPluralTranslation(bool $isRequired = false): bool
260+
{
261+
$this->readWhiteSpace();
262+
if (!$this->readString('msgstr')) {
263+
if ($isRequired) {
264+
throw new Exception("Expected indexed msgstr at byte {$this->position}");
265+
}
266+
return false;
267+
}
268+
$this->readWhiteSpace();
269+
if (!$this->readChar('[')) {
270+
throw new Exception("Expected [ character at byte {$this->position}");
271+
}
272+
if (!strlen($index = $this->readNumber())) {
273+
throw new Exception("Expected msgstr index at byte {$this->position}");
274+
}
275+
$this->readWhiteSpace();
276+
if (!$this->readChar(']')) {
277+
throw new Exception("Expected ] character at byte {$this->position}");
278+
}
279+
$translations = $this->translation->getPluralTranslations();
280+
if (($translation = $this->translation->getTranslation()) !== null) {
281+
array_unshift($translations, $translation);
282+
}
283+
if (count($translations) !== (int) $index) {
284+
throw new Exception("The msgstr has an invalid index at byte {$this->position}");
285+
}
286+
$this->readWhiteSpace();
287+
$data = $this->readQuotedString();
288+
$translations[] = $data;
289+
$this->translation->translate(array_shift($translations));
290+
$this->translation->translatePlural(...$translations);
291+
return true;
292+
}
293+
294+
private function processHeader(): void
295+
{
296+
$translations = $this->translations;
297+
if (!($header = $translations->find(null, ''))) {
298+
return;
299+
}
300+
301+
$translations->remove($header);
302+
$description = $header->getComments()->toArray();
303+
304+
if (!empty($description)) {
305+
$translations->setDescription(implode("\n", $description));
306+
}
307+
308+
$flags = $header->getFlags()->toArray();
309+
310+
if (!empty($flags)) {
311+
$translations->getFlags()->add(...$flags);
312+
}
313+
314+
$headers = $translations->getHeaders();
315+
316+
foreach (self::readHeaders($header->getTranslation()) as $name => $value) {
317+
$headers->set($name, $value);
318+
}
319+
}
320+
321+
private function readHeaders(?string $string): array
322+
{
323+
if (empty($string)) {
324+
return [];
325+
}
326+
$headers = [];
327+
$lines = explode("\n", $string);
328+
$name = null;
329+
foreach ($lines as $line) {
330+
if ($line === '') {
331+
continue;
332+
}
333+
// Checks if it is a header definition line.
334+
// Useful for distinguishing between header definitions and possible continuations of a header entry.
335+
if (preg_match('/^[\w-]+:/', $line)) {
336+
[$name, $value] = array_map('trim', explode(':', $line, 2));
337+
$headers[$name] = $value;
338+
continue;
339+
}
340+
// Data without a definition
341+
if ($name === null) {
342+
throw new Exception("The header data is missing a definition at byte {$this->position}");
343+
}
344+
$value = $headers[$name] ?? '';
345+
$headers[$name] = $value . $line;
346+
}
347+
return $headers;
348+
}
349+
}

0 commit comments

Comments
 (0)