Skip to content

Commit 0777a58

Browse files
authored
gh-150771: Fix email serialization for shift_jis and euc-jp (GH-151120)
Encode the payload with output_charset instead of input_charset.
1 parent 11f032f commit 0777a58

3 files changed

Lines changed: 46 additions & 1 deletion

File tree

Lib/email/contentmanager.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,8 @@ def set_text_content(msg, string, subtype="plain", charset='utf-8', cte=None,
174174
params=None, headers=None):
175175
_prepare_set(msg, 'text', subtype, headers)
176176

177-
charset = email.charset.Charset(charset).input_charset
177+
cs = email.charset.Charset(charset)
178+
charset = cs.output_charset
178179
cte, payload = _encode_text(string, charset, cte, msg.policy)
179180
msg.set_payload(payload)
180181
msg.set_param('charset', charset, replace=True)

Lib/test/test_email/test_contentmanager.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,46 @@ def test_set_text_charset_cp949(self):
362362
self.assertEqual(m.get_payload(decode=True), content.encode('ks_c_5601-1987'))
363363
self.assertEqual(m.get_content(), content)
364364

365+
def test_set_text_charset_shift_jis(self):
366+
m = self._make_message()
367+
content = "\u65e5\u672c\u8a9e\n"
368+
raw_data_manager.set_content(m, content, charset='shift_jis')
369+
self.assertEqual(m['Content-Type'], 'text/plain; charset="iso-2022-jp"')
370+
self.assertEqual(m.get_payload(decode=True), content.encode('iso-2022-jp'))
371+
self.assertEqual(m.get_content(), content)
372+
self.assertEqual(str(m), textwrap.dedent("""\
373+
Content-Type: text/plain; charset="iso-2022-jp"
374+
Content-Transfer-Encoding: 7bit
375+
376+
\x1b$BF|K\\8l\x1b(B
377+
"""))
378+
self.assertEqual(bytes(m), textwrap.dedent("""\
379+
Content-Type: text/plain; charset="iso-2022-jp"
380+
Content-Transfer-Encoding: 7bit
381+
382+
\u65e5\u672c\u8a9e
383+
""").encode('iso-2022-jp'))
384+
385+
def test_set_text_charset_euc_jp(self):
386+
m = self._make_message()
387+
content = "\u65e5\u672c\u8a9e\n"
388+
raw_data_manager.set_content(m, content, charset='euc-jp')
389+
self.assertEqual(m['Content-Type'], 'text/plain; charset="iso-2022-jp"')
390+
self.assertEqual(m.get_payload(decode=True), content.encode('iso-2022-jp'))
391+
self.assertEqual(m.get_content(), content)
392+
self.assertEqual(str(m), textwrap.dedent("""\
393+
Content-Type: text/plain; charset="iso-2022-jp"
394+
Content-Transfer-Encoding: 7bit
395+
396+
\x1b$BF|K\\8l\x1b(B
397+
"""))
398+
self.assertEqual(bytes(m), textwrap.dedent("""\
399+
Content-Type: text/plain; charset="iso-2022-jp"
400+
Content-Transfer-Encoding: 7bit
401+
402+
\u65e5\u672c\u8a9e
403+
""").encode('iso-2022-jp'))
404+
365405
def test_set_text_plain_long_line_heuristics(self):
366406
m = self._make_message()
367407
content = ("Simple but long message that is over 78 characters"
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix :mod:`email` messages created with ``shift_jis`` or ``euc-jp`` charsets.
2+
``set_content()`` now stores the payload using the output charset
3+
(``iso-2022-jp``) so printing the message no longer raises
4+
:exc:`UnicodeEncodeError`.

0 commit comments

Comments
 (0)