Skip to content

Commit 2bf1045

Browse files
committed
gh-117807: Handle invalid UTF-8 in mimetypes comments
1 parent ce916dc commit 2bf1045

3 files changed

Lines changed: 29 additions & 3 deletions

File tree

Lib/mimetypes.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ def read(self, filename, strict=True):
237237
list of standard types, else to the list of non-standard
238238
types.
239239
"""
240-
with open(filename, encoding='utf-8') as fp:
240+
with open(filename, encoding='utf-8', errors='replace') as fp:
241241
self.readfp(fp, strict)
242242

243243
def readfp(self, fp, strict=True):
@@ -428,7 +428,7 @@ def init(files=None):
428428

429429
def read_mime_types(file):
430430
try:
431-
f = open(file, encoding='utf-8')
431+
f = open(file, encoding='utf-8', errors='replace')
432432
except OSError:
433433
return None
434434
with f:

Lib/test/test_mimetypes.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,33 @@ def test_read_mime_types(self):
6767
with unittest.mock.patch.object(mimetypes, 'open',
6868
return_value=fp) as mock_open:
6969
mime_dict = mimetypes.read_mime_types(filename)
70-
mock_open.assert_called_with(filename, encoding='utf-8')
70+
mock_open.assert_called_with(filename, encoding='utf-8',
71+
errors='replace')
7172
eq(mime_dict[".Français"], "application/no-mans-land")
7273

74+
def test_read_mime_types_invalid_utf8_comment(self):
75+
with os_helper.temp_dir() as directory:
76+
data = (b"# non-UTF-8 comment: \x83\n"
77+
b"x-application/x-unittest pyunit\n")
78+
file = os.path.join(directory, "sample.mimetype")
79+
with open(file, "wb") as f:
80+
f.write(data)
81+
82+
mime_dict = mimetypes.read_mime_types(file)
83+
self.assertEqual(
84+
mime_dict[".pyunit"], "x-application/x-unittest")
85+
86+
db = mimetypes.MimeTypes()
87+
db.read(file)
88+
self.assertEqual(
89+
db.guess_file_type("sample.pyunit")[0],
90+
"x-application/x-unittest")
91+
92+
mimetypes.init(files=[file])
93+
self.assertEqual(
94+
mimetypes.guess_file_type("sample.pyunit")[0],
95+
"x-application/x-unittest")
96+
7397
def test_init_reinitializes(self):
7498
# Issue 4936: make sure an init starts clean
7599
# First, put some poison into the types table
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix :mod:`mimetypes` initialization from MIME map files containing invalid
2+
UTF-8 bytes in comments.

0 commit comments

Comments
 (0)