Skip to content

Commit 01f3793

Browse files
authored
Merge branch 'master' into master
2 parents bbba96e + d7a7589 commit 01f3793

File tree

4 files changed

+106
-139
lines changed

4 files changed

+106
-139
lines changed

babel/messages/catalog.py

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from babel.core import Locale, UnknownLocaleError
2626
from babel.dates import format_datetime
2727
from babel.messages.plurals import get_plural
28-
from babel.util import LOCALTZ, _cmp, distinct
28+
from babel.util import LOCALTZ, _cmp
2929

3030
if TYPE_CHECKING:
3131
from typing_extensions import TypeAlias
@@ -166,7 +166,7 @@ def __init__(
166166
if not string and self.pluralizable:
167167
string = ('', '')
168168
self.string = string
169-
self.locations = list(distinct(locations))
169+
self.locations = list(dict.fromkeys(locations)) if locations else []
170170
self.flags = set(flags)
171171
if id and self.python_format:
172172
self.flags.add('python-format')
@@ -176,12 +176,15 @@ def __init__(
176176
self.flags.add('python-brace-format')
177177
else:
178178
self.flags.discard('python-brace-format')
179-
self.auto_comments = list(distinct(auto_comments))
180-
self.user_comments = list(distinct(user_comments))
181-
if isinstance(previous_id, str):
182-
self.previous_id = [previous_id]
179+
self.auto_comments = list(dict.fromkeys(auto_comments)) if auto_comments else []
180+
self.user_comments = list(dict.fromkeys(user_comments)) if user_comments else []
181+
if previous_id:
182+
if isinstance(previous_id, str):
183+
self.previous_id = [previous_id]
184+
else:
185+
self.previous_id = list(previous_id)
183186
else:
184-
self.previous_id = list(previous_id)
187+
self.previous_id = []
185188
self.lineno = lineno
186189
self.context = context
187190

@@ -291,9 +294,12 @@ def python_format(self) -> bool:
291294
292295
:type: `bool`"""
293296
ids = self.id
294-
if not isinstance(ids, (list, tuple)):
295-
ids = [ids]
296-
return any(PYTHON_FORMAT.search(id) for id in ids)
297+
if isinstance(ids, (list, tuple)):
298+
for id in ids: # Explicit loop for performance reasons.
299+
if PYTHON_FORMAT.search(id):
300+
return True
301+
return False
302+
return bool(PYTHON_FORMAT.search(ids))
297303

298304
@property
299305
def python_brace_format(self) -> bool:
@@ -306,9 +312,12 @@ def python_brace_format(self) -> bool:
306312
307313
:type: `bool`"""
308314
ids = self.id
309-
if not isinstance(ids, (list, tuple)):
310-
ids = [ids]
311-
return any(_has_python_brace_format(id) for id in ids)
315+
if isinstance(ids, (list, tuple)):
316+
for id in ids: # Explicit loop for performance reasons.
317+
if _has_python_brace_format(id):
318+
return True
319+
return False
320+
return _has_python_brace_format(ids)
312321

313322

314323
class TranslationError(Exception):
@@ -739,12 +748,9 @@ def __setitem__(self, id: _MessageID, message: Message) -> None:
739748
# The new message adds pluralization
740749
current.id = message.id
741750
current.string = message.string
742-
current.locations = list(distinct(current.locations +
743-
message.locations))
744-
current.auto_comments = list(distinct(current.auto_comments +
745-
message.auto_comments))
746-
current.user_comments = list(distinct(current.user_comments +
747-
message.user_comments))
751+
current.locations = list(dict.fromkeys([*current.locations, *message.locations]))
752+
current.auto_comments = list(dict.fromkeys([*current.auto_comments, *message.auto_comments]))
753+
current.user_comments = list(dict.fromkeys([*current.user_comments, *message.user_comments]))
748754
current.flags |= message.flags
749755
elif id == '':
750756
# special treatment for the header message
@@ -941,8 +947,8 @@ def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, s
941947
assert oldmsg is not None
942948
message.string = oldmsg.string
943949

944-
if keep_user_comments:
945-
message.user_comments = list(distinct(oldmsg.user_comments))
950+
if keep_user_comments and oldmsg.user_comments:
951+
message.user_comments = list(dict.fromkeys(oldmsg.user_comments))
946952

947953
if isinstance(message.id, (list, tuple)):
948954
if not isinstance(message.string, (list, tuple)):

babel/messages/pofile.py

Lines changed: 71 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,17 @@
1717

1818
from babel.core import Locale
1919
from babel.messages.catalog import Catalog, Message, ConflictInfo
20-
from babel.util import TextWrapper, _cmp
20+
from babel.util import TextWrapper
2121

2222
if TYPE_CHECKING:
2323
from typing import IO, AnyStr
2424

2525
from _typeshed import SupportsWrite
2626

2727

28+
_unescape_re = re.compile(r'\\([\\trn"])')
29+
30+
2831
def unescape(string: str) -> str:
2932
r"""Reverse `escape` the given string.
3033
@@ -45,7 +48,10 @@ def replace_escapes(match):
4548
return '\r'
4649
# m is \ or "
4750
return m
48-
return re.compile(r'\\([\\trn"])').sub(replace_escapes, string[1:-1])
51+
52+
if "\\" not in string: # Fast path: there's nothing to unescape
53+
return string[1:-1]
54+
return _unescape_re.sub(replace_escapes, string[1:-1])
4955

5056

5157
def denormalize(string: str) -> str:
@@ -73,7 +79,7 @@ def denormalize(string: str) -> str:
7379
escaped_lines = string.splitlines()
7480
if string.startswith('""'):
7581
escaped_lines = escaped_lines[1:]
76-
return ''.join(unescape(line) for line in escaped_lines)
82+
return ''.join(map(unescape, escaped_lines))
7783
else:
7884
return unescape(string)
7985

@@ -132,48 +138,14 @@ def __init__(self, message: str, catalog: Catalog, line: str, lineno: int) -> No
132138
self.lineno = lineno
133139

134140

135-
class _NormalizedString:
136-
141+
class _NormalizedString(list):
137142
def __init__(self, *args: str) -> None:
138-
self._strs: list[str] = []
139-
for arg in args:
140-
self.append(arg)
141-
142-
def append(self, s: str) -> None:
143-
self._strs.append(s.strip())
143+
super().__init__(map(str.strip, args))
144144

145145
def denormalize(self) -> str:
146-
return ''.join(unescape(s) for s in self._strs)
147-
148-
def __bool__(self) -> bool:
149-
return bool(self._strs)
150-
151-
def __repr__(self) -> str:
152-
return os.linesep.join(self._strs)
153-
154-
def __cmp__(self, other: object) -> int:
155-
if not other:
156-
return 1
157-
158-
return _cmp(str(self), str(other))
159-
160-
def __gt__(self, other: object) -> bool:
161-
return self.__cmp__(other) > 0
162-
163-
def __lt__(self, other: object) -> bool:
164-
return self.__cmp__(other) < 0
165-
166-
def __ge__(self, other: object) -> bool:
167-
return self.__cmp__(other) >= 0
168-
169-
def __le__(self, other: object) -> bool:
170-
return self.__cmp__(other) <= 0
171-
172-
def __eq__(self, other: object) -> bool:
173-
return self.__cmp__(other) == 0
174-
175-
def __ne__(self, other: object) -> bool:
176-
return self.__cmp__(other) != 0
146+
if not self:
147+
return ""
148+
return ''.join(map(unescape, self))
177149

178150

179151
class PoFileParser:
@@ -183,13 +155,6 @@ class PoFileParser:
183155
See `read_po` for simple cases.
184156
"""
185157

186-
_keywords = [
187-
'msgid',
188-
'msgstr',
189-
'msgctxt',
190-
'msgid_plural',
191-
]
192-
193158
def __init__(self, catalog: Catalog, ignore_obsolete: bool = False, abort_invalid: bool = False) -> None:
194159
self.catalog = catalog
195160
self.ignore_obsolete = ignore_obsolete
@@ -216,23 +181,20 @@ def _add_message(self) -> None:
216181
Add a message to the catalog based on the current parser state and
217182
clear the state ready to process the next message.
218183
"""
219-
self.translations.sort()
220184
if len(self.messages) > 1:
221185
msgid = tuple(m.denormalize() for m in self.messages)
222-
else:
223-
msgid = self.messages[0].denormalize()
224-
if isinstance(msgid, (list, tuple)):
225186
string = ['' for _ in range(self.catalog.num_plurals)]
226-
for idx, translation in self.translations:
187+
for idx, translation in sorted(self.translations):
227188
if idx >= self.catalog.num_plurals:
228189
self._invalid_pofile("", self.offset, "msg has more translations than num_plurals of catalog")
229190
continue
230191
string[idx] = translation.denormalize()
231192
string = tuple(string)
232193
else:
194+
msgid = self.messages[0].denormalize()
233195
string = self.translations[0][1].denormalize()
234196
msgctxt = self.context.denormalize() if self.context else None
235-
message = Message(msgid, string, list(self.locations), set(self.flags),
197+
message = Message(msgid, string, self.locations, self.flags,
236198
self.auto_comments, self.user_comments, lineno=self.offset + 1,
237199
context=msgctxt)
238200
if self.obsolete:
@@ -247,27 +209,19 @@ def _finish_current_message(self) -> None:
247209
if self.messages:
248210
if not self.translations:
249211
self._invalid_pofile("", self.offset, f"missing msgstr for msgid '{self.messages[0].denormalize()}'")
250-
self.translations.append([0, _NormalizedString("")])
212+
self.translations.append([0, _NormalizedString()])
251213
self._add_message()
252214

253215
def _process_message_line(self, lineno, line, obsolete=False) -> None:
254-
if line.startswith('"'):
216+
if not line:
217+
return
218+
if line[0] == '"':
255219
self._process_string_continuation_line(line, lineno)
256220
else:
257221
self._process_keyword_line(lineno, line, obsolete)
258222

259223
def _process_keyword_line(self, lineno, line, obsolete=False) -> None:
260-
261-
for keyword in self._keywords:
262-
try:
263-
if line.startswith(keyword) and line[len(keyword)] in [' ', '[']:
264-
arg = line[len(keyword):]
265-
break
266-
except IndexError:
267-
self._invalid_pofile(line, lineno, "Keyword must be followed by a string")
268-
else:
269-
self._invalid_pofile(line, lineno, "Start of line didn't match any expected keyword.")
270-
return
224+
keyword, _, arg = line.partition(' ')
271225

272226
if keyword in ['msgid', 'msgctxt']:
273227
self._finish_current_message()
@@ -283,19 +237,23 @@ def _process_keyword_line(self, lineno, line, obsolete=False) -> None:
283237
self.in_msgctxt = False
284238
self.in_msgid = True
285239
self.messages.append(_NormalizedString(arg))
240+
return
286241

287-
elif keyword == 'msgstr':
242+
if keyword == 'msgctxt':
243+
self.in_msgctxt = True
244+
self.context = _NormalizedString(arg)
245+
return
246+
247+
if keyword == 'msgstr' or keyword.startswith('msgstr['):
288248
self.in_msgid = False
289249
self.in_msgstr = True
290-
if arg.startswith('['):
291-
idx, msg = arg[1:].split(']', 1)
292-
self.translations.append([int(idx), _NormalizedString(msg)])
293-
else:
294-
self.translations.append([0, _NormalizedString(arg)])
250+
kwarg, has_bracket, idxarg = keyword.partition('[')
251+
idx = int(idxarg[:-1]) if has_bracket else 0
252+
s = _NormalizedString(arg) if arg != '""' else _NormalizedString()
253+
self.translations.append([idx, s])
254+
return
295255

296-
elif keyword == 'msgctxt':
297-
self.in_msgctxt = True
298-
self.context = _NormalizedString(arg)
256+
self._invalid_pofile(line, lineno, "Unknown or misformatted keyword")
299257

300258
def _process_string_continuation_line(self, line, lineno) -> None:
301259
if self.in_msgid:
@@ -307,52 +265,65 @@ def _process_string_continuation_line(self, line, lineno) -> None:
307265
else:
308266
self._invalid_pofile(line, lineno, "Got line starting with \" but not in msgid, msgstr or msgctxt")
309267
return
310-
s.append(line)
268+
s.append(line.strip()) # For performance reasons, `NormalizedString` doesn't strip internally
311269

312270
def _process_comment(self, line) -> None:
313271

314272
self._finish_current_message()
315273

316-
if line[1:].startswith(':'):
274+
prefix = line[:2]
275+
if prefix == '#:':
317276
for location in _extract_locations(line[2:]):
318-
pos = location.rfind(':')
319-
if pos >= 0:
277+
a, colon, b = location.rpartition(':')
278+
if colon:
320279
try:
321-
lineno = int(location[pos + 1:])
280+
self.locations.append((a, int(b)))
322281
except ValueError:
323282
continue
324-
self.locations.append((location[:pos], lineno))
325-
else:
283+
else: # No line number specified
326284
self.locations.append((location, None))
327-
elif line[1:].startswith(','):
328-
for flag in line[2:].lstrip().split(','):
329-
self.flags.append(flag.strip())
330-
elif line[1:].startswith('.'):
285+
return
286+
287+
if prefix == '#,':
288+
self.flags.extend(flag.strip() for flag in line[2:].lstrip().split(','))
289+
return
290+
291+
if prefix == '#.':
331292
# These are called auto-comments
332293
comment = line[2:].strip()
333294
if comment: # Just check that we're not adding empty comments
334295
self.auto_comments.append(comment)
335-
else:
336-
# These are called user comments
337-
self.user_comments.append(line[1:].strip())
296+
return
297+
298+
# These are called user comments
299+
self.user_comments.append(line[1:].strip())
338300

339301
def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None:
340302
"""
341-
Reads from the file-like object `fileobj` and adds any po file
342-
units found in it to the `Catalog` supplied to the constructor.
303+
Reads from the file-like object (or iterable of string-likes) `fileobj`
304+
and adds any po file units found in it to the `Catalog`
305+
supplied to the constructor.
306+
307+
All of the items in the iterable must be the same type; either `str`
308+
or `bytes` (decoded with the catalog charset), but not a mixture.
343309
"""
310+
needs_decode = None
344311

345312
for lineno, line in enumerate(fileobj):
346313
line = line.strip()
347-
if not isinstance(line, str):
348-
line = line.decode(self.catalog.charset)
314+
if needs_decode is None:
315+
# If we don't yet know whether we need to decode,
316+
# let's find out now.
317+
needs_decode = not isinstance(line, str)
349318
if not line:
350319
continue
351-
if line.startswith('#'):
352-
if line[1:].startswith('-'):
320+
if needs_decode:
321+
line = line.decode(self.catalog.charset)
322+
if line[0] == '#':
323+
if line[:2] == '#-':
353324
self._invalid_pofile(line, lineno, 'cannot parse po file with conflicts')
354325

355-
if line[1:].startswith('~'):
326+
if line[:2] == '#~':
356327
self._process_message_line(lineno, line[2:].lstrip(), obsolete=True)
357328
else:
358329
try:
@@ -367,8 +338,8 @@ def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None:
367338
# No actual messages found, but there was some info in comments, from which
368339
# we'll construct an empty header message
369340
if not self.counter and (self.flags or self.user_comments or self.auto_comments):
370-
self.messages.append(_NormalizedString('""'))
371-
self.translations.append([0, _NormalizedString('""')])
341+
self.messages.append(_NormalizedString())
342+
self.translations.append([0, _NormalizedString()])
372343
self._add_message()
373344

374345
def _invalid_pofile(self, line, lineno, msg) -> None:

0 commit comments

Comments
 (0)