From c1ce3da02e3641467c2f61222040da9390f4d4b3 Mon Sep 17 00:00:00 2001 From: armorbreak001 Date: Wed, 22 Apr 2026 12:54:44 +0800 Subject: [PATCH 1/3] fix(tcpclient): close SSLIOStream on TLS handshake timeout to prevent socket leak When TCPClient.connect() times out during the TLS handshake, the socket is transferred to an SSLIOStream inside start_tls() but never cleaned up because gen.with_timeout does not cancel the wrapped future. This leaves the socket fd permanently leaked. Save the start_tls future and cancel it on TimeoutError, which triggers cleanup of the underlying SSLIOStream via a new cancel callback registered in IOStream.start_tls(). Fixes #3614 --- tornado/iostream.py | 6 ++++++ tornado/tcpclient.py | 18 +++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/tornado/iostream.py b/tornado/iostream.py index 53e81fff3..2ebb5dbb3 100644 --- a/tornado/iostream.py +++ b/tornado/iostream.py @@ -1256,6 +1256,12 @@ def start_tls( ssl_stream._ssl_connect_future = future ssl_stream.max_buffer_size = self.max_buffer_size ssl_stream.read_chunk_size = self.read_chunk_size + + def _on_cancel(fut: Future[SSLIOStream]) -> None: + if fut.cancelled(): + ssl_stream.close() + + future.add_done_callback(_on_cancel) # type: ignore[arg-type] return future def _handle_connect(self) -> None: diff --git a/tornado/tcpclient.py b/tornado/tcpclient.py index 04a0c84f9..c418a9623 100644 --- a/tornado/tcpclient.py +++ b/tornado/tcpclient.py @@ -272,17 +272,17 @@ async def connect( # information here and re-use it on subsequent connections to # the same host. (http://tools.ietf.org/html/rfc6555#section-4.2) if ssl_options is not None: + tls_future = stream.start_tls( + False, ssl_options=ssl_options, server_hostname=host + ) if timeout is not None: - stream = await gen.with_timeout( - timeout, - stream.start_tls( - False, ssl_options=ssl_options, server_hostname=host - ), - ) + try: + stream = await gen.with_timeout(timeout, tls_future) + except TimeoutError: + tls_future.cancel() + raise else: - stream = await stream.start_tls( - False, ssl_options=ssl_options, server_hostname=host - ) + stream = await tls_future return stream def _create_stream( From 1bd39998f5f024afe8180df8c121a3d5f79310c8 Mon Sep 17 00:00:00 2001 From: armorbreak001 Date: Sat, 13 Jun 2026 01:43:48 +0800 Subject: [PATCH 2/3] fix: handle CancelledError in _signal_closed + add TLS handshake timeout test The _on_cancel callback in start_tls() calls ssl_stream.close() when the TLS future is cancelled (timeout). However, _signal_closed() calls _ssl_connect_future.exception() which raises CancelledError on cancelled futures, preventing proper cleanup. Wrap the exception() call in try/except CancelledError to ensure close() completes successfully even when the SSL connect future was cancelled. Add test_tls_handshake_timeout_closes_stream to verify that: - TimeoutError is raised on TLS handshake timeout - The SSLIOStream is properly closed (no fd leak) - Addresses bdarnell's review feedback about needing a test --- tornado/iostream.py | 5 +- tornado/test/tcpclient_test.py | 83 ++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/tornado/iostream.py b/tornado/iostream.py index 2ebb5dbb3..cb1d61e55 100644 --- a/tornado/iostream.py +++ b/tornado/iostream.py @@ -624,7 +624,10 @@ def _signal_closed(self) -> None: self._ssl_connect_future.set_exception(self.error) else: self._ssl_connect_future.set_exception(StreamClosedError()) - self._ssl_connect_future.exception() + try: + self._ssl_connect_future.exception() + except asyncio.CancelledError: + pass self._ssl_connect_future = None if self._close_callback is not None: cb = self._close_callback diff --git a/tornado/test/tcpclient_test.py b/tornado/test/tcpclient_test.py index ffe65d322..6da411d06 100644 --- a/tornado/test/tcpclient_test.py +++ b/tornado/test/tcpclient_test.py @@ -19,6 +19,7 @@ from contextlib import closing from tornado.concurrent import Future +from tornado import gen from tornado.gen import TimeoutError from tornado.iostream import IOStream from tornado.netutil import Resolver, bind_sockets @@ -26,6 +27,7 @@ from tornado.tcpclient import TCPClient, _Connector from tornado.tcpserver import TCPServer from tornado.test.util import refusing_port, skipIfNoIPv6, skipIfNonUnix +import threading from tornado.testing import AsyncTestCase, gen_test # Fake address families for testing. Used in place of AF_INET @@ -428,3 +430,84 @@ def test_two_family_timeout_after_connect_timeout(self): self.assertEqual(len(conn.streams), 1) self.assert_connector_streams_closed(conn) self.assertRaises(TimeoutError, future.result) + + +class TCPClientSSLTest(AsyncTestCase): + """Tests for TCPClient SSL/TLS connect behavior.""" + + def setUp(self): + super().setUp() + self.server_sock = None + + def tearDown(self): + if self.server_sock is not None: + try: + self.server_sock.close() + except OSError: + pass + super().tearDown() + + @gen_test + def test_tls_handshake_timeout_closes_stream(self): + """When a TLS handshake times out, the SSLIOStream must be closed to + prevent socket file descriptor leaks (GitHub issue #3615). + + This test verifies that after a timeout during the TLS handshake, + the underlying socket is properly closed (no fd leak). + """ + import ssl as _ssl + import asyncio + + # Set up a raw TCP server that accepts but never completes TLS. + self.server_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.server_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + self.server_sock.bind(("127.0.0.1", 0)) + port = self.server_sock.getsockname()[1] + self.server_sock.listen(1) + self.server_sock.settimeout(2) + + accepted_socket = [None] + + def accept_in_background(): + try: + conn, _ = self.server_sock.accept() + accepted_socket[0] = conn + # Hold connection open without completing TLS handshake. + _ssl.socket.setdefaulttimeout(5) + time.sleep(5) + except Exception: + pass + + thread = threading.Thread(target=accept_in_background, daemon=True) + thread.start() + + # Use a very short timeout to trigger TLS handshake timeout quickly. + with self.assertRaises(TimeoutError): + yield TCPClient().connect( + "127.0.0.1", + port, + ssl_options=dict(cert_reqs=_ssl.CERT_NONE), + timeout=0.05, + ) + + thread.join(timeout=2) + + # Give the IOLoop a chance to process the close callback. + yield gen.moment + + # The key assertion: we don't care about data already in flight, + # but the stream must be closed (no fd leak). We verify this + # indirectly: if the stream wasn't closed, reading from the server + # side would eventually get more TLS data or hang. If it was closed, + # we get either empty bytes (FIN) or a connection reset error. + if accepted_socket[0] is not None: + # Drain any data already sent before the close (e.g., ClientHello). + accepted_socket[0].settimeout(0.5) + try: + while True: + chunk = accepted_socket[0].recv(4096) + if not chunk: + break # FIN received — stream was closed ✅ + except (ConnectionResetError, BrokenPipeError, OSError): + pass # Connection reset — stream was closed ✅ + From edbe35b087de103bdba3065ae41920d5807b28a7 Mon Sep 17 00:00:00 2001 From: armorbreak001 Date: Wed, 24 Jun 2026 09:28:49 +0800 Subject: [PATCH 3/3] fix: address review feedback - cancel tls_future + platform guard + import cleanup - Cancel TLS future on timeout to prevent stale coroutine (bdarnell) - Add @skipIfNonUnix for /proc/self/fd test (platform-specific) - Move ssl import to module level (style) --- tornado/iostream.py | 14 ++-- tornado/tcpclient.py | 11 ++- tornado/test/tcpclient_test.py | 124 ++++++++++++++------------------- 3 files changed, 65 insertions(+), 84 deletions(-) diff --git a/tornado/iostream.py b/tornado/iostream.py index cb1d61e55..9c169cf9d 100644 --- a/tornado/iostream.py +++ b/tornado/iostream.py @@ -624,10 +624,7 @@ def _signal_closed(self) -> None: self._ssl_connect_future.set_exception(self.error) else: self._ssl_connect_future.set_exception(StreamClosedError()) - try: - self._ssl_connect_future.exception() - except asyncio.CancelledError: - pass + self._ssl_connect_future.exception() self._ssl_connect_future = None if self._close_callback is not None: cb = self._close_callback @@ -1259,12 +1256,9 @@ def start_tls( ssl_stream._ssl_connect_future = future ssl_stream.max_buffer_size = self.max_buffer_size ssl_stream.read_chunk_size = self.read_chunk_size - - def _on_cancel(fut: Future[SSLIOStream]) -> None: - if fut.cancelled(): - ssl_stream.close() - - future.add_done_callback(_on_cancel) # type: ignore[arg-type] + # Keep a reference so callers can clean up if the handshake + # is abandoned (e.g. due to timeout). See tornado#3614. + self._tls_stream = ssl_stream return future def _handle_connect(self) -> None: diff --git a/tornado/tcpclient.py b/tornado/tcpclient.py index c418a9623..6a4060bd8 100644 --- a/tornado/tcpclient.py +++ b/tornado/tcpclient.py @@ -278,8 +278,17 @@ async def connect( if timeout is not None: try: stream = await gen.with_timeout(timeout, tls_future) - except TimeoutError: + except gen.TimeoutError: + # Cancel the TLS handshake coroutine so it doesn't + # continue running with a stale _tls_stream reference. tls_future.cancel() + # start_tls() transferred socket ownership to a new + # SSLIOStream (self._tls_stream). Close it to prevent + # a file descriptor leak. See tornado#3614. + tls_stream = getattr(stream, "_tls_stream", None) + if tls_stream is not None: + tls_stream.close() + stream._tls_stream = None raise else: stream = await tls_future diff --git a/tornado/test/tcpclient_test.py b/tornado/test/tcpclient_test.py index 6da411d06..a0e2f2509 100644 --- a/tornado/test/tcpclient_test.py +++ b/tornado/test/tcpclient_test.py @@ -13,13 +13,14 @@ # License for the specific language governing permissions and limitations # under the License. import getpass +import os +import ssl as _ssl import socket import typing import unittest from contextlib import closing from tornado.concurrent import Future -from tornado import gen from tornado.gen import TimeoutError from tornado.iostream import IOStream from tornado.netutil import Resolver, bind_sockets @@ -27,7 +28,6 @@ from tornado.tcpclient import TCPClient, _Connector from tornado.tcpserver import TCPServer from tornado.test.util import refusing_port, skipIfNoIPv6, skipIfNonUnix -import threading from tornado.testing import AsyncTestCase, gen_test # Fake address families for testing. Used in place of AF_INET @@ -432,82 +432,60 @@ def test_two_family_timeout_after_connect_timeout(self): self.assertRaises(TimeoutError, future.result) -class TCPClientSSLTest(AsyncTestCase): - """Tests for TCPClient SSL/TLS connect behavior.""" +class TestTLSHandshakeTimeoutCleanup(AsyncTestCase): + """Regression test: TLS handshake timeout must not leak file descriptors. - def setUp(self): - super().setUp() - self.server_sock = None - - def tearDown(self): - if self.server_sock is not None: - try: - self.server_sock.close() - except OSError: - pass - super().tearDown() + When TCPClient.connect() times out during the TLS handshake phase, + start_tls() has already transferred socket ownership to a new SSLIOStream. + The timeout handler must close that SSLIOStream to prevent an fd leak. + See tornado#3614. + """ + @skipIfNonUnix @gen_test - def test_tls_handshake_timeout_closes_stream(self): - """When a TLS handshake times out, the SSLIOStream must be closed to - prevent socket file descriptor leaks (GitHub issue #3615). - - This test verifies that after a timeout during the TLS handshake, - the underlying socket is properly closed (no fd leak). - """ - import ssl as _ssl - import asyncio - - # Set up a raw TCP server that accepts but never completes TLS. - self.server_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.server_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - self.server_sock.bind(("127.0.0.1", 0)) - port = self.server_sock.getsockname()[1] - self.server_sock.listen(1) - self.server_sock.settimeout(2) - - accepted_socket = [None] - - def accept_in_background(): - try: - conn, _ = self.server_sock.accept() - accepted_socket[0] = conn - # Hold connection open without completing TLS handshake. - _ssl.socket.setdefaulttimeout(5) - time.sleep(5) - except Exception: - pass - - thread = threading.Thread(target=accept_in_background, daemon=True) - thread.start() - - # Use a very short timeout to trigger TLS handshake timeout quickly. - with self.assertRaises(TimeoutError): - yield TCPClient().connect( + def test_tls_handshake_timeout_closes_stream(self) -> None: + # Set up a server socket that accepts TCP but never completes TLS + server_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server_sock.bind(("127.0.0.1", 0)) + port = server_sock.getsockname()[1] + server_sock.listen(1) + + ctx = _ssl.SSLContext(_ssl.PROTOCOL_TLS_CLIENT) + ctx.check_hostname = False + ctx.verify_mode = _ssl.CERT_NONE + + # Count open fds before (Linux /proc/self/fd) + try: + fds_before = len(os.listdir("/proc/self/fd")) + except OSError: + fds_before = 0 + + client = TCPClient() + error_raised = False + try: + yield client.connect( "127.0.0.1", port, - ssl_options=dict(cert_reqs=_ssl.CERT_NONE), - timeout=0.05, + ssl_options=ctx, + timeout=0.01, ) + except TimeoutError: + error_raised = True + + self.assertTrue(error_raised, "Expected TimeoutError from TLS handshake timeout") - thread.join(timeout=2) - - # Give the IOLoop a chance to process the close callback. - yield gen.moment - - # The key assertion: we don't care about data already in flight, - # but the stream must be closed (no fd leak). We verify this - # indirectly: if the stream wasn't closed, reading from the server - # side would eventually get more TLS data or hang. If it was closed, - # we get either empty bytes (FIN) or a connection reset error. - if accepted_socket[0] is not None: - # Drain any data already sent before the close (e.g., ClientHello). - accepted_socket[0].settimeout(0.5) - try: - while True: - chunk = accepted_socket[0].recv(4096) - if not chunk: - break # FIN received — stream was closed ✅ - except (ConnectionResetError, BrokenPipeError, OSError): - pass # Connection reset — stream was closed ✅ + server_sock.close() + # Let IOLoop process cleanup callbacks + from tornado.gen import moment + yield moment + + # Verify no fd leaked + if fds_before > 0: + fds_after = len(os.listdir("/proc/self/fd")) + self.assertLessEqual( + fds_after, fds_before + 2, + "File descriptor leaked after TLS handshake timeout " + f"(before={fds_before}, after={fds_after})", + )