Skip to content

Commit eaccf35

Browse files
committed
Remove C printf formatters for spell checking
1 parent acb2b91 commit eaccf35

6 files changed

Lines changed: 136 additions & 23 deletions

File tree

ChangeLog.asciidoc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
:lang: en
55

66

7+
== Version 2.9 (under dev)
8+
9+
* remove C printf formatters for spell checking
10+
711
== Version 2.8 (2014-12-07)
812

913
* fix read of fuzzy flag

msgcheck/po.py

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,7 @@
4040
except ImportError:
4141
pass
4242

43-
44-
def count_lines(string):
45-
"""
46-
Count the number of lines in a string or translation.
47-
"""
48-
count = len(string.split('\n'))
49-
if count > 1 and string.endswith('\n'):
50-
count -= 1
51-
return count
43+
from . utils import count_lines, replace_formatters
5244

5345

5446
# pylint: disable=too-few-public-methods
@@ -119,7 +111,7 @@ class PoMessage(object):
119111
"""
120112

121113
# pylint: disable=too-many-arguments
122-
def __init__(self, filename, line, msg, charset, fuzzy):
114+
def __init__(self, filename, line, msg, charset, fuzzy, fmt):
123115
"""Build a PO message."""
124116
self.filename = filename
125117
self.line = line
@@ -144,6 +136,7 @@ def __init__(self, filename, line, msg, charset, fuzzy):
144136
else:
145137
self.messages.append((msg.get('msgid', ''), msg.get('msgstr', '')))
146138
self.fuzzy = fuzzy
139+
self.fmt = fmt
147140

148141
def check_lines(self):
149142
"""
@@ -271,7 +264,10 @@ def check_spelling(self, spelling, checkers):
271264
for mid, mstr in self.messages:
272265
if not mid or not mstr:
273266
continue
274-
checkers[0].set_text(mstr if spelling == 'str' else mid)
267+
text = mstr if spelling == 'str' else mid
268+
if self.fmt:
269+
text = replace_formatters(text, ' ', self.fmt)
270+
checkers[0].set_text(text)
275271
misspelled = []
276272
for err in checkers[0]:
277273
misspelled_word = True
@@ -302,7 +298,7 @@ def __init__(self, filename):
302298
}
303299
self.msgs = []
304300

305-
def _add_message(self, numline_msgid, msgfuzzy, msg):
301+
def _add_message(self, numline_msgid, fuzzy, fmt, msg):
306302
"""
307303
Add a message from PO file in list of messages.
308304
"""
@@ -319,15 +315,16 @@ def _add_message(self, numline_msgid, msgfuzzy, msg):
319315
if match:
320316
self.props['charset'] = match.group(1)
321317
self.msgs.append(PoMessage(self.filename, numline_msgid, msg,
322-
self.props['charset'], msgfuzzy))
318+
self.props['charset'], fuzzy, fmt))
323319

324320
def read(self):
325321
"""
326322
Read messages in PO file.
327323
"""
328324
self.msgs = []
329-
(numline, numline_msgid) = (0, 0)
330-
(fuzzy, msgfuzzy) = (False, False)
325+
numline, numline_msgid = (0, 0)
326+
fuzzy, msgfuzzy = (False, False)
327+
fmt, msgfmt = (None, None)
331328
msg = {}
332329
msgcurrent = ''
333330
with open(self.filename, 'r') as po_file:
@@ -338,6 +335,9 @@ def read(self):
338335
continue
339336
if line.startswith('#,'):
340337
fuzzy = 'fuzzy' in line
338+
match = re.search(r'([a-z-]+)-format', line, re.IGNORECASE)
339+
fmt = match.group(1) if match else None
340+
if line.startswith('#'):
341341
continue
342342
if line.startswith('msg'):
343343
match = re.match(
@@ -351,16 +351,20 @@ def read(self):
351351
if oldmsgcurrent.startswith('msgstr'):
352352
self._add_message(numline_msgid,
353353
msgfuzzy,
354+
msgfmt,
354355
msg)
355356
msgfuzzy = fuzzy
356357
fuzzy = False
358+
msgfmt = fmt
359+
fmt = None
357360
msg = {}
358361
numline_msgid = numline
359362
if msgcurrent and line.startswith('"'):
360363
msg[msgcurrent] = msg.get(msgcurrent, '') + line[1:-1]
361364
if msgcurrent.startswith('msgstr'):
362365
self._add_message(numline_msgid,
363366
msgfuzzy,
367+
msgfmt,
364368
msg)
365369

366370
def compile(self):

msgcheck/utils.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Copyright (C) 2009-2015 Sébastien Helleu <flashcode@flashtux.org>
4+
#
5+
# This file is part of msgcheck.
6+
#
7+
# Msgcheck is free software; you can redistribute it and/or modify
8+
# it under the terms of the GNU General Public License as published by
9+
# the Free Software Foundation; either version 3 of the License, or
10+
# (at your option) any later version.
11+
#
12+
# Msgcheck is distributed in the hope that it will be useful,
13+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
# GNU General Public License for more details.
16+
#
17+
# You should have received a copy of the GNU General Public License
18+
# along with msgcheck. If not, see <http://www.gnu.org/licenses/>.
19+
#
20+
21+
"""
22+
Some utility functions for msgcheck.
23+
"""
24+
25+
from __future__ import print_function
26+
27+
28+
# TODO: add support for other languages
29+
STR_FORMATTERS = {
30+
'c': ('\\', '%', '#- +\'I.0123456789hlLqjzt', 'diouxXeEfFgGaAcsCSpnm'),
31+
}
32+
33+
def count_lines(string):
34+
"""Count the number of lines in a string or translation."""
35+
count = len(string.split('\n'))
36+
if count > 1 and string.endswith('\n'):
37+
count -= 1
38+
return count
39+
40+
41+
def replace_formatters(string, replace, fmt):
42+
"""
43+
Replace formatters (like "%s" or "%03d") with a replacement string.
44+
"""
45+
if fmt not in STR_FORMATTERS:
46+
return string
47+
formatters = STR_FORMATTERS[fmt]
48+
formatter, escape = (False, False)
49+
strformat = []
50+
result = []
51+
52+
for char in string:
53+
if formatter:
54+
if char == formatters[1]:
55+
result.append(char)
56+
formatter = False
57+
elif char in formatters[2]:
58+
strformat.append(char)
59+
elif char in formatters[3]:
60+
result.append(replace)
61+
formatter = False
62+
else:
63+
strformat.append(char)
64+
result += strformat
65+
formatter = False
66+
elif escape:
67+
result.append(formatters[0])
68+
result.append(char)
69+
escape = False
70+
elif char == formatters[0]:
71+
escape = True
72+
elif char == formatters[1]:
73+
formatter = True
74+
strformat = [char]
75+
else:
76+
result.append(char)
77+
78+
if escape: # unterminated escaped char?
79+
result.append(formatters[0])
80+
elif formatter: # unterminated formatter?
81+
result.append(replace)
82+
83+
return ''.join(result)

tests/fr_spelling_id.po

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,9 @@ msgstr ""
3535
"Content-Transfer-Encoding: 8bit\n"
3636
"Plural-Forms: nplurals=2; plural=(n > 1);\n"
3737

38-
msgid "Thsi is a test.\n"
39-
msgstr "Ceci est un test.\n"
38+
#, c-format
39+
msgid "%.3fThsi is a test.\n"
40+
msgstr "%.3fCeci est un test.\n"
4041

4142
msgid "Test 1"
4243
msgstr "Test 1"

tests/fr_spelling_str.po

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,13 @@ msgstr ""
3535
"Content-Transfer-Encoding: 8bit\n"
3636
"Plural-Forms: nplurals=2; plural=(n > 1);\n"
3737

38-
msgid "This is a test.\n"
39-
msgstr "Ceci est un test.\n"
38+
#, c-format
39+
msgid "%.3fThis is a test.\n"
40+
msgstr "%.3fCeci est un test.\n"
41+
42+
#, c-format
43+
msgid "%.3fThis is another test.\n"
44+
msgstr "%.3fCecX est un autre test.\n"
4045

4146
msgid "Test 1"
4247
msgstr "aabbcc"

tests/test_msgcheck.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import unittest
2727

2828
from msgcheck.po import PoFile, PoCheck
29+
from msgcheck.utils import replace_formatters
2930

3031

3132
def local_path(filename):
@@ -95,6 +96,21 @@ def test_checks_fuzzy(self):
9596
# the file has 11 errors (with the fuzzy string)
9697
self.assertEquals(len(result[0][1]), 11)
9798

99+
def test_replace_formatters(self):
100+
"""Test removal of formatters in a string."""
101+
self.assertEquals(replace_formatters('%', '', 'c'), '')
102+
self.assertEquals(replace_formatters('\\', '', 'c'), '\\')
103+
self.assertEquals(replace_formatters('%s', ' ', 'c'), ' ')
104+
self.assertEquals(replace_formatters('%.02f', ' ', 'c'), ' ')
105+
self.assertEquals(replace_formatters('%!%s%!', '', 'c'), '%!%!')
106+
self.assertEquals(replace_formatters('%.02!', ' ', 'c'), '%.02!')
107+
self.assertEquals(
108+
replace_formatters('%.3fThis is a %stest', ' ', 'c'),
109+
' This is a test')
110+
self.assertEquals(
111+
replace_formatters('%.3fTest%s%d%%%.03f%luhere% s', '', 'c'),
112+
'Test%here')
113+
98114
def test_spelling_id(self):
99115
"""Test spelling on source messages (English) of gettext files."""
100116
po_check = PoCheck()
@@ -104,7 +120,7 @@ def test_spelling_id(self):
104120
# be sure we have 1 file in result
105121
self.assertEquals(len(result), 1)
106122

107-
# the file has 2 spelling errors: words "Thsi" and "errro"
123+
# the file has 2 spelling errors: "Thsi" and "errro"
108124
errors = result[0][1]
109125
self.assertEquals(len(errors), 2)
110126
for i, word in enumerate(('Thsi', 'errro')):
@@ -123,10 +139,10 @@ def test_spelling_str(self):
123139
# be sure we have 2 files in result
124140
self.assertEquals(len(result), 2)
125141

126-
# first file has 2 spelling errors: words "aabbcc" and "xxyyzz"
142+
# first file has 3 spelling errors: "CecX", "aabbcc" and "xxyyzz"
127143
errors = result[0][1]
128-
self.assertEquals(len(errors), 2)
129-
for i, word in enumerate(('aabbcc', 'xxyyzz')):
144+
self.assertEquals(len(errors), 3)
145+
for i, word in enumerate(('CecX', 'aabbcc', 'xxyyzz')):
130146
self.assertEquals(errors[i].idmsg, 'spelling-str')
131147
self.assertTrue(type(errors[i].message) is list)
132148
self.assertEquals(len(errors[i].message), 1)

0 commit comments

Comments
 (0)