Skip to content

Commit 8b00b02

Browse files
committed
Marking conflicting messages
* Update _prepare function in ConcatenateCatalog to check conflicting messages and to not parse po-files twice * Add _conflicts field in Catalog to mark conflicts * Update tests
1 parent 80ab44a commit 8b00b02

3 files changed

Lines changed: 97 additions & 25 deletions

File tree

babel/messages/catalog.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,14 @@
1212

1313
import datetime
1414
import re
15+
import os
1516
from collections.abc import Iterable, Iterator
1617
from copy import copy
1718
from difflib import SequenceMatcher
1819
from email import message_from_string
1920
from heapq import nlargest
2021
from string import Formatter
21-
from typing import TYPE_CHECKING
22+
from typing import TYPE_CHECKING, TypedDict
2223

2324
from babel import __version__ as VERSION
2425
from babel.core import Locale, UnknownLocaleError
@@ -357,6 +358,13 @@ def _force_text(s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict')
357358
return str(s)
358359

359360

361+
class ConflictInfo(TypedDict):
362+
message: Message
363+
file_name: str
364+
project: str
365+
version: str
366+
367+
360368
class Catalog:
361369
"""Representation of a message catalog."""
362370

@@ -400,6 +408,7 @@ def __init__(
400408
self.locale = locale
401409
self._header_comment = header_comment
402410
self._messages: dict[str | tuple[str, str], Message] = {}
411+
self._conflicts: dict[str | tuple[str, str], list[ConflictInfo]] = {}
403412

404413
self.project = project or 'PROJECT'
405414
self.version = version or 'VERSION'
@@ -780,6 +789,19 @@ def __setitem__(self, id: _MessageID, message: Message) -> None:
780789
)
781790
self._messages[key] = message
782791

792+
def add_conflict(self, message: Message, file_name: str, project: str, version: str):
793+
key = message.id
794+
if key not in self._conflicts:
795+
self._conflicts[key] = []
796+
797+
self._conflicts[key].append({
798+
'message': message,
799+
'file_name': file_name,
800+
'project': project,
801+
'version': version,
802+
})
803+
message.flags |= {'fuzzy'}
804+
783805
def add(
784806
self,
785807
id: _MessageID,

babel/messages/frontend.py

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@
2121
import sys
2222
import tempfile
2323
import warnings
24-
from collections import OrderedDict, defaultdict
24+
from collections import defaultdict
2525
from configparser import RawConfigParser
2626
from io import StringIO
2727
from typing import Any, BinaryIO, Iterable, Literal
2828

2929
from babel import Locale, localedata
3030
from babel import __version__ as VERSION
3131
from babel.core import UnknownLocaleError
32-
from babel.messages.catalog import DEFAULT_HEADER, Catalog
32+
from babel.messages.catalog import DEFAULT_HEADER, Catalog, ConflictInfo
3333
from babel.messages.extract import (
3434
DEFAULT_KEYWORDS,
3535
DEFAULT_MAPPING,
@@ -960,7 +960,7 @@ def initialize_options(self):
960960
self.stringtable_input = None
961961
self.to_code = None
962962
# the first translation is always used temporarily
963-
self.use_first = True #~
963+
self.use_first = False #~
964964
self.lang = None
965965
self.color = None
966966
self.style = None
@@ -1000,36 +1000,49 @@ def finalize_options(self):
10001000
self.less_than = 2
10011001

10021002
def _prepare(self):
1003-
self.message_count = defaultdict(int)
1003+
templates: list[tuple[str, Catalog]] = []
1004+
message_info = {}
10041005

10051006
for filename in self.input_files:
10061007
with open(filename, 'r') as pofile:
10071008
template = read_po(pofile)
10081009
for message in template:
1009-
self.message_count[message.id] += 1
1010+
if message.id not in message_info:
1011+
message_info[message.id] = {
1012+
'count': 0,
1013+
'strings': set(),
1014+
}
1015+
message_info[message.id]['count'] += 1
1016+
message_info[message.id]['strings'].add(message.string if isinstance(message.string, str) else tuple(message.string))
1017+
templates.append((filename, template, ))
1018+
1019+
return templates, message_info
10101020

10111021
def run(self):
10121022
catalog = Catalog(fuzzy=False)
1013-
self._prepare()
1023+
templates, message_info = self._prepare()
10141024

1015-
for filename in self.input_files:
1016-
with open(filename, 'r') as pofile:
1017-
template = read_po(pofile)
1018-
if catalog.locale is None:
1019-
catalog.locale = template.locale
1025+
for path, template in templates:
1026+
if catalog.locale is None:
1027+
catalog.locale = template.locale
10201028

1021-
for message in template:
1022-
if not message.id:
1023-
continue
1029+
for message in template:
1030+
if not message.id:
1031+
continue
1032+
1033+
count = message_info[message.id]['count']
1034+
diff_string_count = len(message_info[message.id]['strings'])
1035+
if count <= self.more_than or (self.less_than is not None and count >= self.less_than):
1036+
continue
10241037

1025-
if message.id in catalog and catalog[message.id].string != message.string and not self.use_first:
1026-
raise NotImplementedError()
1038+
if count > 1 and not self.use_first and diff_string_count > 1:
1039+
file_name = os.path.basename(path)
1040+
catalog.add_conflict(message, file_name, template.project, template.version)
10271041

1028-
message_count = self.message_count[message.id]
1029-
if message_count > self.more_than and (self.less_than is None or message_count < self.less_than):
1030-
catalog[message.id] = message
1042+
catalog[message.id] = message
10311043

10321044
catalog.fuzzy = any(message.fuzzy for message in catalog)
1045+
10331046
with open(self.output_file, 'wb') as outfile:
10341047
write_po(
10351048
outfile,

babel/messages/pofile.py

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
from typing import TYPE_CHECKING, Literal
1818

1919
from babel.core import Locale
20-
from babel.messages.catalog import Catalog, Message
21-
from babel.util import TextWrapper
20+
from babel.messages.catalog import Catalog, Message, ConflictInfo
21+
from babel.util import TextWrapper, _cmp
2222

2323
if TYPE_CHECKING:
2424
from typing import IO, AnyStr
@@ -351,8 +351,11 @@ def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None:
351351
continue
352352
if needs_decode:
353353
line = line.decode(self.catalog.charset)
354-
if line[0] == '#':
355-
if line[:2] == '#~':
354+
if line.startswith('#'):
355+
if line[1:].startswith('-'):
356+
self._invalid_pofile(line, lineno, 'cannot parse po file with conflicts')
357+
358+
if line[1:].startswith('~'):
356359
self._process_message_line(lineno, line[2:].lstrip(), obsolete=True)
357360
else:
358361
try:
@@ -646,6 +649,37 @@ def _format_comment(comment, prefix=''):
646649
for line in comment_wrapper.wrap(comment):
647650
yield f"#{prefix} {line.strip()}\n"
648651

652+
def _format_conflict_comment(file, project, version, prefix=''):
653+
comment = f"#-#-#-#-# {file} ({project} {version}) #-#-#-#-#"
654+
yield f"{normalize(comment, prefix=prefix, width=width)}\n"
655+
656+
def _format_conflict(key: str | tuple[str, str], conflicts: list[ConflictInfo], prefix=''):
657+
for conflict in conflicts:
658+
message = conflict['message']
659+
if message.context:
660+
yield from _format_conflict_comment(conflict['file_name'], conflict['project'], conflict['version'], prefix=prefix)
661+
yield f"{prefix}msgctxt {normalize(message.context, prefix=prefix, width=width)}\n"
662+
663+
if isinstance(key, (list, tuple)):
664+
yield f"{prefix}msgid {normalize(key[0], prefix=prefix, width=width)}\n"
665+
yield f"{prefix}msgid_plural {normalize(key[1], prefix=prefix, width=width)}\n"
666+
else:
667+
yield f"{prefix}msgid {normalize(key, prefix=prefix, width=width)}\n"
668+
yield f"{prefix}msgstr {normalize('', prefix=prefix, width=width)}\n"
669+
670+
for conflict in conflicts:
671+
message = conflict['message']
672+
yield from _format_conflict_comment(conflict['file_name'], conflict['project'], conflict['version'], prefix=prefix)
673+
if isinstance(key, (list, tuple)):
674+
for idx in range(catalog.num_plurals):
675+
try:
676+
string = message.string[idx]
677+
except IndexError:
678+
string = ''
679+
yield f"{prefix}msgstr[{idx:d}] {normalize(string, prefix=prefix, width=width)}\n"
680+
else:
681+
yield f"{normalize(message.string, prefix=prefix, width=width)}\n"
682+
649683
def _format_message(message, prefix=''):
650684
if isinstance(message.id, (list, tuple)):
651685
if message.context:
@@ -717,7 +751,10 @@ def _format_message(message, prefix=''):
717751
norm_previous_id = normalize(message.previous_id[1], width=width)
718752
yield from _format_comment(f'msgid_plural {norm_previous_id}', prefix='|')
719753

720-
yield from _format_message(message)
754+
if len(conflicts := catalog._conflicts.get(message.id, [])) > 0:
755+
yield from _format_conflict(message.id, conflicts)
756+
else:
757+
yield from _format_message(message)
721758
yield '\n'
722759

723760
if not ignore_obsolete:

0 commit comments

Comments
 (0)