Skip to content

Commit 8568e90

Browse files
committed
Marking conflicting messages
* Update _prepare function in ConcatenateCatalog to check conflicting messages and to not parse po-files twice * Add _conflicts field in Catalog to mark conflicts * Update tests
1 parent 4cbe604 commit 8568e90

4 files changed

Lines changed: 171 additions & 31 deletions

File tree

babel/messages/catalog.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,14 @@
1111

1212
import datetime
1313
import re
14+
import os
1415
from collections.abc import Iterable, Iterator
1516
from copy import copy
1617
from difflib import SequenceMatcher
1718
from email import message_from_string
1819
from heapq import nlargest
1920
from string import Formatter
20-
from typing import TYPE_CHECKING
21+
from typing import TYPE_CHECKING, TypedDict
2122

2223
from babel import __version__ as VERSION
2324
from babel.core import Locale, UnknownLocaleError
@@ -338,6 +339,13 @@ def _force_text(s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict')
338339
return str(s)
339340

340341

342+
class ConflictInfo(TypedDict):
343+
message: Message
344+
file_name: str
345+
project: str
346+
version: str
347+
348+
341349
class Catalog:
342350
"""Representation of a message catalog."""
343351

@@ -381,6 +389,7 @@ def __init__(
381389
self.locale = locale
382390
self._header_comment = header_comment
383391
self._messages: dict[str | tuple[str, str], Message] = {}
392+
self._conflicts: dict[str | tuple[str, str], list[ConflictInfo]] = {}
384393

385394
self.project = project or 'PROJECT'
386395
self.version = version or 'VERSION'
@@ -747,6 +756,19 @@ def __setitem__(self, id: _MessageID, message: Message) -> None:
747756
f"Expected sequence but got {type(message.string)}"
748757
self._messages[key] = message
749758

759+
def add_conflict(self, message: Message, file_name: str, project: str, version: str):
760+
key = message.id
761+
if key not in self._conflicts:
762+
self._conflicts[key] = []
763+
764+
self._conflicts[key].append({
765+
'message': message,
766+
'file_name': file_name,
767+
'project': project,
768+
'version': version,
769+
})
770+
message.flags |= {'fuzzy'}
771+
750772
def add(
751773
self,
752774
id: _MessageID,

babel/messages/frontend.py

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@
2020
import sys
2121
import tempfile
2222
import warnings
23-
from collections import OrderedDict, defaultdict
23+
from collections import defaultdict
2424
from configparser import RawConfigParser
2525
from io import StringIO
2626
from typing import BinaryIO, Iterable, Literal
2727

2828
from babel import Locale, localedata
2929
from babel import __version__ as VERSION
3030
from babel.core import UnknownLocaleError
31-
from babel.messages.catalog import DEFAULT_HEADER, Catalog
31+
from babel.messages.catalog import DEFAULT_HEADER, Catalog, ConflictInfo
3232
from babel.messages.extract import (
3333
DEFAULT_KEYWORDS,
3434
DEFAULT_MAPPING,
@@ -925,7 +925,7 @@ def initialize_options(self):
925925
self.stringtable_input = None
926926
self.to_code = None
927927
# the first translation is always used temporarily
928-
self.use_first = True #~
928+
self.use_first = False #~
929929
self.lang = None
930930
self.color = None
931931
self.style = None
@@ -965,36 +965,49 @@ def finalize_options(self):
965965
self.less_than = 2
966966

967967
def _prepare(self):
968-
self.message_count = defaultdict(int)
968+
templates: list[tuple[str, Catalog]] = []
969+
message_info = {}
969970

970971
for filename in self.input_files:
971972
with open(filename, 'r') as pofile:
972973
template = read_po(pofile)
973974
for message in template:
974-
self.message_count[message.id] += 1
975+
if message.id not in message_info:
976+
message_info[message.id] = {
977+
'count': 0,
978+
'strings': set(),
979+
}
980+
message_info[message.id]['count'] += 1
981+
message_info[message.id]['strings'].add(message.string if isinstance(message.string, str) else tuple(message.string))
982+
templates.append((filename, template, ))
983+
984+
return templates, message_info
975985

976986
def run(self):
977987
catalog = Catalog(fuzzy=False)
978-
self._prepare()
988+
templates, message_info = self._prepare()
979989

980-
for filename in self.input_files:
981-
with open(filename, 'r') as pofile:
982-
template = read_po(pofile)
983-
if catalog.locale is None:
984-
catalog.locale = template.locale
990+
for path, template in templates:
991+
if catalog.locale is None:
992+
catalog.locale = template.locale
985993

986-
for message in template:
987-
if not message.id:
988-
continue
994+
for message in template:
995+
if not message.id:
996+
continue
997+
998+
count = message_info[message.id]['count']
999+
diff_string_count = len(message_info[message.id]['strings'])
1000+
if count <= self.more_than or (self.less_than is not None and count >= self.less_than):
1001+
continue
9891002

990-
if message.id in catalog and catalog[message.id].string != message.string and not self.use_first:
991-
raise NotImplementedError()
1003+
if count > 1 and not self.use_first and diff_string_count > 1:
1004+
file_name = os.path.basename(path)
1005+
catalog.add_conflict(message, file_name, template.project, template.version)
9921006

993-
message_count = self.message_count[message.id]
994-
if message_count > self.more_than and (self.less_than is None or message_count < self.less_than):
995-
catalog[message.id] = message
1007+
catalog[message.id] = message
9961008

9971009
catalog.fuzzy = any(message.fuzzy for message in catalog)
1010+
9981011
with open(self.output_file, 'wb') as outfile:
9991012
write_po(
10001013
outfile,

babel/messages/pofile.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from typing import TYPE_CHECKING, Literal
1717

1818
from babel.core import Locale
19-
from babel.messages.catalog import Catalog, Message
19+
from babel.messages.catalog import Catalog, Message, ConflictInfo
2020
from babel.util import TextWrapper, _cmp
2121

2222
if TYPE_CHECKING:
@@ -349,6 +349,9 @@ def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None:
349349
if not line:
350350
continue
351351
if line.startswith('#'):
352+
if line[1:].startswith('-'):
353+
self._invalid_pofile(line, lineno, 'cannot parse po file with conflicts')
354+
352355
if line[1:].startswith('~'):
353356
self._process_message_line(lineno, line[2:].lstrip(), obsolete=True)
354357
else:
@@ -642,6 +645,37 @@ def _format_comment(comment, prefix=''):
642645
for line in comment_wrapper.wrap(comment):
643646
yield f"#{prefix} {line.strip()}\n"
644647

648+
def _format_conflict_comment(file, project, version, prefix=''):
649+
comment = f"#-#-#-#-# {file} ({project} {version}) #-#-#-#-#"
650+
yield f"{normalize(comment, prefix=prefix, width=width)}\n"
651+
652+
def _format_conflict(key: str | tuple[str, str], conflicts: list[ConflictInfo], prefix=''):
653+
for conflict in conflicts:
654+
message = conflict['message']
655+
if message.context:
656+
yield from _format_conflict_comment(conflict['file_name'], conflict['project'], conflict['version'], prefix=prefix)
657+
yield f"{prefix}msgctxt {normalize(message.context, prefix=prefix, width=width)}\n"
658+
659+
if isinstance(key, (list, tuple)):
660+
yield f"{prefix}msgid {normalize(key[0], prefix=prefix, width=width)}\n"
661+
yield f"{prefix}msgid_plural {normalize(key[1], prefix=prefix, width=width)}\n"
662+
else:
663+
yield f"{prefix}msgid {normalize(key, prefix=prefix, width=width)}\n"
664+
yield f"{prefix}msgstr {normalize('', prefix=prefix, width=width)}\n"
665+
666+
for conflict in conflicts:
667+
message = conflict['message']
668+
yield from _format_conflict_comment(conflict['file_name'], conflict['project'], conflict['version'], prefix=prefix)
669+
if isinstance(key, (list, tuple)):
670+
for idx in range(catalog.num_plurals):
671+
try:
672+
string = message.string[idx]
673+
except IndexError:
674+
string = ''
675+
yield f"{prefix}msgstr[{idx:d}] {normalize(string, prefix=prefix, width=width)}\n"
676+
else:
677+
yield f"{normalize(message.string, prefix=prefix, width=width)}\n"
678+
645679
def _format_message(message, prefix=''):
646680
if isinstance(message.id, (list, tuple)):
647681
if message.context:
@@ -711,7 +745,10 @@ def _format_message(message, prefix=''):
711745
norm_previous_id = normalize(message.previous_id[1], width=width)
712746
yield from _format_comment(f'msgid_plural {norm_previous_id}', prefix='|')
713747

714-
yield from _format_message(message)
748+
if len(conflicts := catalog._conflicts.get(message.id, [])) > 0:
749+
yield from _format_conflict(message.id, conflicts)
750+
else:
751+
yield from _format_message(message)
715752
yield '\n'
716753

717754
if not ignore_obsolete:

tests/messages/test_frontend.py

Lines changed: 77 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -715,14 +715,14 @@ def test_supports_width(self):
715715
assert expected_content == actual_content
716716

717717

718-
class ConcatanationCatalogTestCase(unittest.TestCase):
718+
class ConcatanateCatalogTestCase(unittest.TestCase):
719719

720720
def setUp(self):
721721
self.olddir = os.getcwd()
722722
os.chdir(data_dir)
723723

724724
self.dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA)
725-
self.cmd = frontend.ConcatenationCatalog(self.dist)
725+
self.cmd = frontend.ConcatenateCatalog(self.dist)
726726
self.cmd.initialize_options()
727727

728728
self.temp1 = f'{i18n_dir}/msgcat_temp1.po'
@@ -752,13 +752,13 @@ def tearDown(self):
752752
if os.path.isfile(file):
753753
os.unlink(file)
754754

755-
def _get_expected(self, messages):
755+
def _get_expected(self, messages, fuzzy=False):
756756
date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en')
757757
return fr"""# Translations template for PROJECT.
758758
# Copyright (C) 1994 ORGANIZATION
759759
# This file is distributed under the same license as the PROJECT project.
760760
# FIRST AUTHOR <EMAIL@ADDRESS>, 1994.
761-
#
761+
#{'\n#, fuzzy' if fuzzy else ''}
762762
msgid ""
763763
msgstr ""
764764
"Project-Id-Version: PROJECT VERSION\n"
@@ -805,6 +805,64 @@ def test_default(self):
805805
msgid "same"
806806
msgstr "Same"
807807
808+
#: hard.py:1000 simple.py:1000
809+
#, flag2, flag3, fuzzy
810+
msgid "almost_same"
811+
msgstr ""
812+
"#-#-#-#-# msgcat_temp1.po (PROJECT VERSION) #-#-#-#-#"
813+
"Almost same"
814+
"#-#-#-#-# msgcat_temp2.po (PROJECT VERSION) #-#-#-#-#"
815+
"A bit same"
816+
817+
#: hard.py:2000 simple.py:2000
818+
#, fuzzy
819+
msgid "plural"
820+
msgid_plural "plurals"
821+
msgstr ""
822+
"#-#-#-#-# msgcat_temp1.po (PROJECT VERSION) #-#-#-#-#"
823+
msgstr[0] "Plural"
824+
msgstr[1] "Plurals"
825+
"#-#-#-#-# msgcat_temp2.po (PROJECT VERSION) #-#-#-#-#"
826+
msgstr[0] "Plural"
827+
msgstr[1] "Plurals other"
828+
829+
#: hard.py:1
830+
msgid "other3"
831+
msgstr "Other 3"
832+
833+
#: hard.py:10
834+
msgid "other4"
835+
msgstr "Other 4"
836+
837+
""", fuzzy=True)
838+
839+
with open(self.output_file, 'r') as f:
840+
actual_content = f.read()
841+
assert expected_content == actual_content
842+
843+
@freeze_time("1994-11-11")
844+
def test_use_first(self):
845+
self.cmd.input_files = [self.temp1, self.temp2]
846+
self.cmd.output_file = self.output_file
847+
self.cmd.use_first = True
848+
849+
self.cmd.finalize_options()
850+
self.cmd.run()
851+
852+
expected_content = self._get_expected(fr"""#: simple.py:1
853+
#, flag1000
854+
msgid "other1"
855+
msgstr "Other 1"
856+
857+
#: simple.py:10
858+
msgid "other2"
859+
msgstr "Other 2"
860+
861+
#: hard.py:100 simple.py:100
862+
#, flag1, flag1.2, flag4
863+
msgid "same"
864+
msgstr "Same"
865+
808866
#: hard.py:1000 simple.py:1000
809867
#, flag2, flag3
810868
msgid "almost_same"
@@ -885,17 +943,27 @@ def test_more_than(self):
885943
msgstr "Same"
886944
887945
#: hard.py:1000 simple.py:1000
888-
#, flag2, flag3
946+
#, flag2, flag3, fuzzy
889947
msgid "almost_same"
890-
msgstr "Almost same"
948+
msgstr ""
949+
"#-#-#-#-# msgcat_temp1.po (PROJECT VERSION) #-#-#-#-#"
950+
"Almost same"
951+
"#-#-#-#-# msgcat_temp2.po (PROJECT VERSION) #-#-#-#-#"
952+
"A bit same"
891953
892954
#: hard.py:2000 simple.py:2000
955+
#, fuzzy
893956
msgid "plural"
894957
msgid_plural "plurals"
958+
msgstr ""
959+
"#-#-#-#-# msgcat_temp1.po (PROJECT VERSION) #-#-#-#-#"
895960
msgstr[0] "Plural"
896961
msgstr[1] "Plurals"
962+
"#-#-#-#-# msgcat_temp2.po (PROJECT VERSION) #-#-#-#-#"
963+
msgstr[0] "Plural"
964+
msgstr[1] "Plurals other"
897965
898-
""")
966+
""", fuzzy=True)
899967

900968
with open(self.output_file, 'r') as f:
901969
actual_content = f.read()
@@ -1029,7 +1097,7 @@ def test_default(self):
10291097
def test_compenidum(self):
10301098
self.cmd.input_files = [self.temp_def, self.temp_ref]
10311099
self.cmd.output_file = self.output_file
1032-
self.cmd.compendium = self.compendium
1100+
self.cmd.compendium = [self.compendium,]
10331101
self.cmd.no_fuzzy_matching = True
10341102
self.cmd.no_compendium_comment = True
10351103
self.cmd.finalize_options()
@@ -1057,7 +1125,7 @@ def test_compenidum(self):
10571125
def test_compenidum_overwrite(self):
10581126
self.cmd.input_files = [self.temp_def, self.temp_ref]
10591127
self.cmd.output_file = self.output_file
1060-
self.cmd.compendium = self.compendium
1128+
self.cmd.compendium = [self.compendium,]
10611129
self.cmd.no_fuzzy_matching = True
10621130
self.cmd.no_compendium_comment = True
10631131
self.cmd.compendium_overwrite = True

0 commit comments

Comments
 (0)