diff --git a/Doc/library/xml.dom.pulldom.rst b/Doc/library/xml.dom.pulldom.rst index 52340ffe92eb85..1732f2ce2c044e 100644 --- a/Doc/library/xml.dom.pulldom.rst +++ b/Doc/library/xml.dom.pulldom.rst @@ -114,6 +114,10 @@ DOMEventStream Objects .. versionchanged:: 3.11 Support for :meth:`~object.__getitem__` method has been removed. + .. versionchanged:: next + :class:`DOMEventStream` now supports the :term:`context manager` + protocol. File handles opened by :func:`parse` are closed on exit. + .. method:: getEvent() Return a tuple containing *event* and the current *node* as @@ -141,3 +145,11 @@ DOMEventStream Objects print(node.toxml()) .. method:: DOMEventStream.reset() + + .. method:: DOMEventStream.close() + + Close the underlying stream if it was opened by :func:`parse`. + Has no effect if the stream was provided by the caller or is already + closed. It is safe to call this method more than once. + + .. versionadded:: next diff --git a/Lib/test/test_pulldom.py b/Lib/test/test_pulldom.py index 3c8ed251acaa4d..594ce1dd8af32c 100644 --- a/Lib/test/test_pulldom.py +++ b/Lib/test/test_pulldom.py @@ -6,6 +6,7 @@ from xml.sax.handler import feature_external_ges from xml.dom import pulldom +from test import support from test.support import findfile @@ -33,13 +34,44 @@ def test_parse(self): # Test with a filename: handler = pulldom.parse(tstfile) - self.addCleanup(handler.stream.close) + self.addCleanup(handler.close) list(handler) # Test with a file object: with open(tstfile, "rb") as fin: list(pulldom.parse(fin)) + def test_context_manager_does_not_close_user_stream(self): + # gh-148428: context manager must not close user-provided streams + with open(tstfile, 'rb') as f: + events = pulldom.DOMEventStream(f, xml.sax.make_parser(), 16384) + for event, node in events: + pass + events.close() + self.assertFalse(f.closed) + + def test_close_is_idempotent(self): + # gh-148428: calling close() multiple times should be safe + events = pulldom.parse(tstfile) + stream = events.stream + events.close() + self.assertTrue(stream.closed) + events.close() # should not raise + + def test_clear_closes_owned_stream(self): + # gh-148428: clear() should close the stream when parse() opened it + events = pulldom.parse(tstfile) + stream = events.stream + events.clear() + self.assertTrue(stream.closed) + + def test_resource_warning_on_del(self): + # gh-148428: unclosed DOMEventStream should emit ResourceWarning + events = pulldom.parse(tstfile) + with self.assertWarns(ResourceWarning): + del events + support.gc_collect() + def test_parse_semantics(self): """Test DOMEventStream parsing semantics.""" diff --git a/Lib/xml/dom/pulldom.py b/Lib/xml/dom/pulldom.py index 913141cd7ef3ce..f5626b5365ea1e 100644 --- a/Lib/xml/dom/pulldom.py +++ b/Lib/xml/dom/pulldom.py @@ -1,3 +1,4 @@ +import warnings import xml.sax import xml.sax.handler @@ -202,10 +203,11 @@ def fatalError(self, exception): raise exception class DOMEventStream: - def __init__(self, stream, parser, bufsize): + def __init__(self, stream, parser, bufsize, _owns_stream=False): self.stream = stream self.parser = parser self.bufsize = bufsize + self._owns_stream = _owns_stream if not hasattr(self.parser, 'feed'): self.getEvent = self._slurp self.reset() @@ -225,6 +227,28 @@ def __next__(self): def __iter__(self): return self + def close(self): + """Close the stream if it was opened by parse().""" + if self._owns_stream and self.stream is not None: + self.stream.close() + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + def __del__(self, _warn=warnings.warn): + if self._owns_stream and self.stream is not None: + try: + if not self.stream.closed: + _warn( + f"unclosed {self!r}", + ResourceWarning, + source=self) + finally: + self.stream.close() + def expandNode(self, node): event = self.getEvent() parents = [node] @@ -275,6 +299,7 @@ def _emit(self): def clear(self): """clear(): Explicitly release parsing objects""" + self.close() self.pulldom.clear() del self.pulldom self.parser = None @@ -320,11 +345,13 @@ def parse(stream_or_string, parser=None, bufsize=None): bufsize = default_bufsize if isinstance(stream_or_string, str): stream = open(stream_or_string, 'rb') + owns_stream = True else: stream = stream_or_string + owns_stream = False if not parser: parser = xml.sax.make_parser() - return DOMEventStream(stream, parser, bufsize) + return DOMEventStream(stream, parser, bufsize, _owns_stream=owns_stream) def parseString(string, parser=None): from io import StringIO diff --git a/Misc/NEWS.d/next/Library/2026-04-12-11-14-40.gh-issue-148428.kP9qWx.rst b/Misc/NEWS.d/next/Library/2026-04-12-11-14-40.gh-issue-148428.kP9qWx.rst new file mode 100644 index 00000000000000..f5befbd514a286 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-12-11-14-40.gh-issue-148428.kP9qWx.rst @@ -0,0 +1,4 @@ +:class:`xml.dom.pulldom.DOMEventStream` now supports the :term:`context +manager` protocol and has a :meth:`~xml.dom.pulldom.DOMEventStream.close` +method. File handles opened by :func:`xml.dom.pulldom.parse` are now +properly closed, fixing a resource leak.