Skip to content

Commit 12efd8a

Browse files
committed
gh-148428: Fix file handle leak in pulldom.parse()
When pulldom.parse() is called with a filename, the opened file handle is never closed. Add close(), context manager support, and __del__ with ResourceWarning to DOMEventStream. Update clear() to close owned streams.
1 parent 3a7df63 commit 12efd8a

File tree

4 files changed

+88
-6
lines changed

4 files changed

+88
-6
lines changed

Doc/library/xml.dom.pulldom.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,10 @@ DOMEventStream Objects
114114
.. versionchanged:: 3.11
115115
Support for :meth:`~object.__getitem__` method has been removed.
116116

117+
.. versionchanged:: next
118+
:class:`DOMEventStream` now supports the :term:`context manager`
119+
protocol. File handles opened by :func:`parse` are closed on exit.
120+
117121
.. method:: getEvent()
118122

119123
Return a tuple containing *event* and the current *node* as
@@ -141,3 +145,11 @@ DOMEventStream Objects
141145
print(node.toxml())
142146

143147
.. method:: DOMEventStream.reset()
148+
149+
.. method:: DOMEventStream.close()
150+
151+
Close the underlying stream if it was opened by :func:`parse`.
152+
Has no effect if the stream was provided by the caller or is already
153+
closed. It is safe to call this method more than once.
154+
155+
.. versionadded:: next

Lib/test/test_pulldom.py

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from xml.sax.handler import feature_external_ges
77
from xml.dom import pulldom
88

9+
from test import support
910
from test.support import findfile
1011

1112

@@ -31,15 +32,53 @@ def test_parse(self):
3132
# semantics are tested using parseString with a more focused XML
3233
# fragment.
3334

34-
# Test with a filename:
35-
handler = pulldom.parse(tstfile)
36-
self.addCleanup(handler.stream.close)
37-
list(handler)
35+
# Test with a filename (context manager):
36+
with pulldom.parse(tstfile) as handler:
37+
list(handler)
3838

3939
# Test with a file object:
4040
with open(tstfile, "rb") as fin:
4141
list(pulldom.parse(fin))
4242

43+
def test_context_manager_closes_file(self):
44+
# gh-148428: context manager should close owned stream
45+
with pulldom.parse(tstfile) as events:
46+
stream = events.stream
47+
for event, node in events:
48+
pass
49+
self.assertTrue(stream.closed)
50+
51+
def test_context_manager_does_not_close_user_stream(self):
52+
# gh-148428: context manager must not close user-provided streams
53+
with open(tstfile, 'rb') as f:
54+
events = pulldom.DOMEventStream(f, xml.sax.make_parser(), 16384)
55+
for event, node in events:
56+
pass
57+
events.close()
58+
self.assertFalse(f.closed)
59+
60+
def test_close_is_idempotent(self):
61+
# gh-148428: calling close() multiple times should be safe
62+
events = pulldom.parse(tstfile)
63+
stream = events.stream
64+
events.close()
65+
self.assertTrue(stream.closed)
66+
events.close() # should not raise
67+
68+
def test_clear_closes_owned_stream(self):
69+
# gh-148428: clear() should close the stream when parse() opened it
70+
events = pulldom.parse(tstfile)
71+
stream = events.stream
72+
events.clear()
73+
self.assertTrue(stream.closed)
74+
75+
def test_resource_warning_on_del(self):
76+
# gh-148428: unclosed DOMEventStream should emit ResourceWarning
77+
events = pulldom.parse(tstfile)
78+
with self.assertWarns(ResourceWarning):
79+
del events
80+
support.gc_collect()
81+
4382
def test_parse_semantics(self):
4483
"""Test DOMEventStream parsing semantics."""
4584

Lib/xml/dom/pulldom.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import warnings
12
import xml.sax
23
import xml.sax.handler
34

@@ -202,10 +203,11 @@ def fatalError(self, exception):
202203
raise exception
203204

204205
class DOMEventStream:
205-
def __init__(self, stream, parser, bufsize):
206+
def __init__(self, stream, parser, bufsize, _owns_stream=False):
206207
self.stream = stream
207208
self.parser = parser
208209
self.bufsize = bufsize
210+
self._owns_stream = _owns_stream
209211
if not hasattr(self.parser, 'feed'):
210212
self.getEvent = self._slurp
211213
self.reset()
@@ -225,6 +227,28 @@ def __next__(self):
225227
def __iter__(self):
226228
return self
227229

230+
def close(self):
231+
"""Close the stream if it was opened by parse()."""
232+
if self._owns_stream and self.stream is not None:
233+
self.stream.close()
234+
235+
def __enter__(self):
236+
return self
237+
238+
def __exit__(self, *args):
239+
self.close()
240+
241+
def __del__(self, _warn=warnings.warn):
242+
if self._owns_stream and self.stream is not None:
243+
try:
244+
if not self.stream.closed:
245+
_warn(
246+
f"unclosed {self!r}",
247+
ResourceWarning,
248+
source=self)
249+
finally:
250+
self.stream.close()
251+
228252
def expandNode(self, node):
229253
event = self.getEvent()
230254
parents = [node]
@@ -275,6 +299,7 @@ def _emit(self):
275299

276300
def clear(self):
277301
"""clear(): Explicitly release parsing objects"""
302+
self.close()
278303
self.pulldom.clear()
279304
del self.pulldom
280305
self.parser = None
@@ -320,11 +345,13 @@ def parse(stream_or_string, parser=None, bufsize=None):
320345
bufsize = default_bufsize
321346
if isinstance(stream_or_string, str):
322347
stream = open(stream_or_string, 'rb')
348+
owns_stream = True
323349
else:
324350
stream = stream_or_string
351+
owns_stream = False
325352
if not parser:
326353
parser = xml.sax.make_parser()
327-
return DOMEventStream(stream, parser, bufsize)
354+
return DOMEventStream(stream, parser, bufsize, _owns_stream=owns_stream)
328355

329356
def parseString(string, parser=None):
330357
from io import StringIO
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
:class:`xml.dom.pulldom.DOMEventStream` now supports the :term:`context
2+
manager` protocol and has a :meth:`~xml.dom.pulldom.DOMEventStream.close`
3+
method. File handles opened by :func:`xml.dom.pulldom.parse` are now
4+
properly closed, fixing a resource leak.

0 commit comments

Comments
 (0)