diff --git a/CHANGES b/CHANGES index 19b0e19..414e3da 100644 --- a/CHANGES +++ b/CHANGES @@ -6,6 +6,42 @@ _Notes on upcoming releases will be added here_ +### Breaking changes + +**{tooliconl}`wait-for-text` drops `content_start` and `content_end`** + +The baseline anchor introduced in this release supersedes the manual capture-range knobs: capture now follows the pane's grid position automatically, so the two parameters had no remaining purpose. Agents that named them should drop them from their call sites. (#45) + +```python +# Before +wait_for_text(pattern="OK", content_start=-100) + +# After +wait_for_text(pattern="OK") +``` + +### Fixes + +**{tooliconl}`wait-for-text` now waits for *new* output, not stale scrollback** + +Previously {tooliconl}`wait-for-text` returned `found=True` on the first poll if the pattern was already present anywhere in the pane when the call began. It now snapshots `#{history_size} + #{cursor_y}` at entry and only matches lines written below that baseline, so agents using the tool to synchronise on command output see the result they expected. For the synchronous "is the pattern in the pane right now?" use case, call {tooliconl}`search-panes` instead. (#45) + +**{tooliconl}`wait-for-text` guards against tmux's bottom-row capture clip** + +`capture-pane -S` clips a below-visible start back to the bottom visible row, so a cursor sitting at the last visible row (the normal state after a fresh shell prompt) would match whatever stale text already occupied that row. The poll loop now reads `#{pane_height}` at entry and short-circuits the capture when the computed start would land below the pane. (#45) + +**{tooliconl}`wait-for-text` surfaces pane death and respawn** + +Each poll tick now reads `#{pane_dead}` and `#{pane_pid}` alongside the grid state. A pane whose process exited under `remain-on-exit`, or one that was swapped out by `respawn-pane` mid-wait, raises a `ToolError` instead of silently miscapturing against a frozen or reset grid. (#45) + +**{tooliconl}`wait-for-text` rejects footgun inputs** + +Empty `pattern`, sub-10ms `interval`, and non-positive `timeout` each raise a `ToolError` at entry. The previous behaviour would have matched every line (empty pattern), spun the tmux server (zero interval), or completed a surprise single probe instead of waiting (`timeout=0`). (#45) + +**{tooliconl}`wait-for-text` honours the timeout budget end-to-end** + +`start_time` is now captured before the initial baseline read so a stalled tmux server cannot consume the budget before the deadline timer even starts. libtmux's `tmux_cmd` uses `Popen.communicate()` with no subprocess timeout, so the worst case was real, not theoretical. (#45) + ### Dependencies **Minimum `libtmux>=0.56.0`** (was `>=0.55.1`). Unlocks the new tmux-command wrappers shipped in libtmux 0.56.0 — {meth}`~libtmux.Pane.respawn`, {meth}`~libtmux.Pane.copy_mode`, {meth}`~libtmux.Pane.pipe`, {meth}`~libtmux.Pane.swap`, {meth}`~libtmux.Pane.paste_buffer`, {meth}`~libtmux.Pane.clear_history`, {meth}`~libtmux.Pane.display_message`, {meth}`~libtmux.Server.delete_buffer`, and the {meth}`~libtmux.Session.next_window` / {meth}`~libtmux.Session.previous_window` / {meth}`~libtmux.Session.last_window` trio — so the MCP no longer falls back to raw `cmd()` calls for tmux commands the upstream API now covers. (#46) diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index 52767ce..ebb0f7e 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -22,6 +22,9 @@ WaitForTextResult, ) +if t.TYPE_CHECKING: + from libtmux.pane import Pane + logger = logging.getLogger(__name__) #: Exceptions that indicate "client transport is gone, keep polling". @@ -96,6 +99,47 @@ async def _maybe_log( return +class _PaneState(t.NamedTuple): + """Per-tick snapshot of pane state used by :func:`wait_for_text`. + + Read in one ``display-message`` round-trip so the loop costs two + subprocesses per tick (state + capture) instead of growing + linearly with each new field. ``|`` is the field separator — + history/cursor/height are integers, ``pane_pid`` is a numeric PID + string, and ``pane_dead`` is the literal ``"0"``/``"1"`` flag. + """ + + history_size: int + cursor_y: int + pane_height: int + pane_pid: str + pane_dead: bool + + +def _read_pane_state(pane: Pane) -> _PaneState: + """Return a :class:`_PaneState` snapshot for ``pane``. + + Combines the per-tick reads ``wait_for_text`` needs into a single + ``display-message`` call. ``history_size + cursor_y`` gives the + absolute grid anchor at entry; ``pane_height`` gates the bottom- + row capture clip; ``pane_pid`` and ``pane_dead`` surface + respawn-pane and pane-death events that invalidate the baseline. + """ + stdout = pane.display_message( + "#{history_size}|#{cursor_y}|#{pane_height}|#{pane_pid}|#{pane_dead}", + get_text=True, + ) + raw = stdout[0] if stdout else "0|0|0||0" + hs, cy, sy, pid, dead = raw.split("|", 4) + return _PaneState( + history_size=int(hs), + cursor_y=int(cy), + pane_height=int(sy), + pane_pid=pid, + pane_dead=dead == "1", + ) + + @handle_tool_errors_async async def wait_for_text( pattern: str, @@ -107,16 +151,36 @@ async def wait_for_text( timeout: float = 8.0, interval: float = 0.05, match_case: bool = False, - content_start: int | None = None, - content_end: int | None = None, socket_name: str | None = None, ctx: Context | None = None, ) -> WaitForTextResult: - """Wait for text to appear in a tmux pane. - - Polls the pane content at regular intervals until the pattern is found - or the timeout is reached. Use this instead of polling capture_pane - manually — it saves agent tokens and turns. + r"""Wait for NEW text to appear in a tmux pane. + + Polls the pane at regular intervals until ``pattern`` appears on a + line written *after* the call starts, or the timeout is reached. + Use this instead of polling :func:`capture_pane` manually — it + saves agent tokens and turns. + + **What "new" means.** At entry the tool snapshots the pane's absolute + grid position (``history_size + cursor_y``) and only matches lines + written below that baseline. Stale scrollback that was already + present when the call began is ignored. For the synchronous "is + the pattern in the pane right now?" check, call + {tooliconl}`search-panes` instead. + + In-place updates to the entry cursor's row — carriage-return + rewrites, progress spinners, single-line status updates — are + not observed; only rows below the entry cursor count as "new." + Use {tooliconl}`wait-for-content-change` or pair the command + with a sentinel for those cases. + + **Adversarial-safety pattern.** If you cannot trust that the + pattern only appears after your action — for example because the + pane prints recurring prompts, log lines, or output from background + processes you do not control — bracket your command with a unique + sentinel: ``cmd; echo __WAIT_$RANDOM__`` and wait for the sentinel + instead of ``cmd``'s natural output. tmux's grid model cannot + distinguish "your output" from "theirs"; the sentinel can. When a :class:`fastmcp.Context` is available, this tool emits periodic ``ctx.report_progress`` notifications so MCP clients can @@ -147,10 +211,6 @@ async def wait_for_text( Seconds between polls. Default 0.05 (50ms). match_case : bool Whether to match case. Default False (case-insensitive). - content_start : int, optional - Start line for capture. Negative values reach into scrollback. - content_end : int, optional - End line for capture. socket_name : str, optional tmux socket name. ctx : fastmcp.Context, optional @@ -164,6 +224,32 @@ async def wait_for_text( Notes ----- + **Scrollback truncation.** If ``history-limit`` is small and the + baseline line rolls out of history during the wait, tmux clips + ``-S`` to the oldest available line (``cmd-capture-pane.c``); the + worst case degrades to pre-baseline behaviour on the surviving + portion of history rather than an infinite false-match loop. + + **In-place rewrites below the baseline.** Programs that paint + over rows the tool will capture — cursor-position escape + sequences, full-screen progress displays, anything that rewrites + rows it already wrote — can re-introduce text the caller saw + earlier. Each tick captures the current contents of rows below + the baseline; tmux's grid model cannot distinguish "fresh write" + from "repaint with the same characters." + ``screen_write_reverseindex`` (``screen-write.c``) only scrolls + the visible region within ``[rupper, rlower]`` and never touches + ``hsize``, so ``\\eM`` itself does not invalidate the anchor — + but the surrounding TUI render loop may. Full-screen TUIs + typically run on the alternate screen (a separate grid that + this tool does not traverse), so the main-screen pattern is + rare in practice. + + **``clear`` / ``reset``.** With the default ``scroll-on-clear`` + option, cleared content scrolls into history (``screen-write.c`` + ``screen_write_clearscreen``), so the baseline anchor is + unaffected. + **Safety tier.** Tagged ``readonly`` because the tool observes pane state without mutating it. Readonly clients may therefore block for the caller-supplied ``timeout`` (default 8 s, caller @@ -174,6 +260,16 @@ async def wait_for_text( calls. If you need to rate-limit wait tools, do it at the transport layer or with dedicated middleware. """ + if not pattern: + msg = "pattern must be a non-empty string" + raise ToolError(msg) + if interval < 0.01: + msg = f"interval must be at least 0.01 s (received {interval})" + raise ToolError(msg) + if timeout <= 0: + msg = f"timeout must be positive (received {timeout})" + raise ToolError(msg) + search_pattern = pattern if regex else re.escape(pattern) flags = 0 if match_case else re.IGNORECASE try: @@ -193,9 +289,27 @@ async def wait_for_text( ) assert pane.pane_id is not None - matched_lines: list[str] = [] + + # Anchor ``start_time`` before the baseline read so a stalled + # tmux server cannot blow the user-supplied ``timeout`` budget + # — libtmux's ``tmux_cmd`` uses ``Popen.communicate()`` with no + # subprocess timeout, so the read can block arbitrarily long. start_time = time.monotonic() deadline = start_time + timeout + + # Snapshot the pane state before polling. ``hs0 + cy0`` is the + # absolute grid anchor — invariant under subsequent scrolling + # because tmux's ``-S`` is relative to the live ``hsize`` at + # capture time (cmd-capture-pane.c: ``top = gd->hsize + n``). + # ``pane_pid`` lets us detect a respawn-pane mid-wait that would + # otherwise leave the absolute anchor pointing at the old + # process's output. See issue #45. + entry = await asyncio.to_thread(_read_pane_state, pane) + baseline_abs = entry.history_size + entry.cursor_y + pane_height = entry.pane_height + baseline_pid = entry.pane_pid + + matched_lines: list[str] = [] found = False try: @@ -208,13 +322,35 @@ async def wait_for_text( message=f"Polling pane {pane.pane_id} for pattern", ) - # FastMCP direct-awaits async tools on the main event loop; the - # libtmux capture_pane call is a blocking subprocess.run. Push - # to the default executor so concurrent tool calls are not - # starved during long waits. - lines = await asyncio.to_thread( - pane.capture_pane, start=content_start, end=content_end - ) + # FastMCP direct-awaits async tools on the main event loop; + # the libtmux display-message + capture_pane calls are both + # blocking subprocess.run. Push to the default executor so + # concurrent tool calls are not starved during long waits. + state = await asyncio.to_thread(_read_pane_state, pane) + if state.pane_dead: + msg = f"pane {pane.pane_id} died during wait" + raise ToolError(msg) + if state.pane_pid != baseline_pid: + msg = ( + f"pane {pane.pane_id} was respawned during wait " + f"(pid {baseline_pid} -> {state.pane_pid}); " + "baseline anchor no longer valid" + ) + raise ToolError(msg) + # ``+ 1`` skips the baseline line itself so we don't + # re-match the row the cursor sat on at entry. + start_line = baseline_abs - state.history_size + 1 + # ``capture-pane -S`` clips a below-visible start back to + # the bottom row (cmd-capture-pane.c, post-tmux-3.0), so a + # naive capture would return stale bottom-row text whenever + # no new rows have appeared below the cursor yet. Skip the + # capture entirely on those ticks. + if start_line >= pane_height: + lines: list[str] = [] + else: + lines = await asyncio.to_thread( + pane.capture_pane, start=start_line, end=None + ) hits = [line for line in lines if compiled.search(line)] if hits: matched_lines.extend(hits) diff --git a/tests/test_pane_tools.py b/tests/test_pane_tools.py index 18d395f..24da967 100644 --- a/tests/test_pane_tools.py +++ b/tests/test_pane_tools.py @@ -1128,23 +1128,35 @@ class WaitForTextFixture(t.NamedTuple): """Test fixture for wait_for_text.""" test_id: str - command: str | None + #: Command sent BEFORE ``wait_for_text`` is called. Its output is + #: expected to be present in the pane scrollback (and therefore + #: above the baseline) by the time the wait begins. Used to verify + #: that stale scrollback no longer matches (#45). The positive + #: "text appears after baseline" case lives in + #: ``test_wait_for_text_matches_new_output_after_baseline`` rather + #: than this fixture because it needs ``asyncio.gather`` to + #: coordinate emission against the running poll loop — synchronous + #: setup races the shell's enter-processing on CI and shifts the + #: baseline past single-line output. + pre_command: str | None pattern: str timeout: float expected_found: bool WAIT_FOR_TEXT_FIXTURES: list[WaitForTextFixture] = [ + # Regression for #45: pre-existing scrollback must NOT match. WaitForTextFixture( - test_id="text_found", - command="echo WAIT_MARKER_abc123", - pattern="WAIT_MARKER_abc123", - timeout=2.0, - expected_found=True, + test_id="stale_scrollback_does_not_match", + pre_command="echo WAIT_MARKER_stale", + pattern="WAIT_MARKER_stale", + timeout=0.5, + expected_found=False, ), + # Genuinely absent pattern still times out cleanly. WaitForTextFixture( test_id="timeout_not_found", - command=None, + pre_command=None, pattern="NEVER_EXISTS_xyz999", timeout=0.3, expected_found=False, @@ -1161,7 +1173,7 @@ def test_wait_for_text( mcp_server: Server, mcp_pane: Pane, test_id: str, - command: str | None, + pre_command: str | None, pattern: str, timeout: float, expected_found: bool, @@ -1169,8 +1181,48 @@ def test_wait_for_text( """wait_for_text polls pane content for a pattern.""" import asyncio - if command is not None: - mcp_pane.send_keys(command, enter=True) + if pre_command is not None: + mcp_pane.send_keys(pre_command, enter=True) + # Wait until the pane has fully settled before measuring the + # baseline. "Settled" means: + # + # (a) the OUTPUT line is present — ``line.strip() == pattern``, + # distinguishing the shell's actual output from the typed + # echo line that contains ``pattern`` as a substring (and + # which would otherwise trip a naive ``pattern in capture`` + # predicate while keys are still buffered pre-enter), and + # (b) ``(history_size, cursor_y)`` is unchanged across two + # consecutive polls — zsh prints async prompt-redraw + # lines (vcs_info, precmd hooks) some milliseconds after + # the initial prompt, and those redraws keep growing + # hsize *during* ``wait_for_text``'s window, pulling + # pre-baseline rows back into the visible-relative + # ``start_line`` capture. Waiting them out anchors the + # baseline below all async output. + # + # A fixed ``time.sleep`` would do the same job but couples the + # test to a wall-clock value (the project's idiom for + # tmux-state waits is ``retry_until`` — used throughout this + # file). + last_state: tuple[int, int] = (-1, -1) + + def _stale_settled() -> bool: + nonlocal last_state + raw = mcp_pane.cmd( + "display-message", "-p", "#{history_size}:#{cursor_y}" + ).stdout + if not raw: + return False + hs_str, cy_str = raw[0].split(":", 1) + state = (int(hs_str), int(cy_str)) + has_output_line = any( + line.strip() == pattern for line in mcp_pane.capture_pane() + ) + settled = state == last_state and has_output_line + last_state = state + return settled + + retry_until(_stale_settled, 2, raises=True) result = asyncio.run( wait_for_text( @@ -1190,6 +1242,104 @@ def test_wait_for_text( assert len(result.matched_lines) >= 1 +def test_wait_for_text_matches_new_output_after_baseline( + mcp_server: Server, mcp_pane: Pane +) -> None: + """wait_for_text finds output written AFTER its baseline snapshot. + + Coordinates the marker emission against the running poll loop via + :func:`asyncio.gather` so ``send_keys`` is guaranteed to fire + *after* :func:`wait_for_text` has captured its baseline. Without + that coordination the test races the shell's enter-processing — + if the shell advances the cursor before the baseline read on CI, + ``start_line`` shifts past the single-line marker and the poll + loop misses it (the failure mode that took the original + synchronous ``send_keys`` + ``asyncio.run`` shape to all six tmux + matrix slots on PR #47 commit aa8de89). + """ + import asyncio + + async def emit_after_baseline() -> None: + # The baseline read is a single display-message round trip + # (<5 ms in practice); 0.2 s gives wait_for_text plenty of + # headroom to lock the baseline before the marker fires. + await asyncio.sleep(0.2) + await asyncio.to_thread(mcp_pane.send_keys, "echo WAIT_MARKER_after", True) + + async def run() -> WaitForTextResult: + wait_task = asyncio.create_task( + wait_for_text( + pattern="WAIT_MARKER_after", + pane_id=mcp_pane.pane_id, + timeout=3.0, + socket_name=mcp_server.socket_name, + ) + ) + await emit_after_baseline() + return await wait_task + + result = asyncio.run(run()) + assert result.found is True + assert result.timed_out is False + assert any("WAIT_MARKER_after" in line for line in result.matched_lines) + + +def test_wait_for_text_does_not_match_bottom_row_clip( + mcp_server: Server, mcp_pane: Pane +) -> None: + """wait_for_text must not match stale text sitting on the cursor row. + + When the cursor is at the last visible row at entry, + ``start_line = cy0 + 1`` points below the visible region and + tmux's ``capture-pane -S`` clips back to the bottom row + (``cmd-capture-pane.c``). Without the bottom-aware guard the + poll loop captures the stale cursor-row text and matches it + instantly. + + The pane is respawned with a shell-free ``sh -c`` command that + prints the marker without a trailing newline and then sleeps — + so ``hsize`` and ``cursor_y`` stay frozen for the duration of + the wait. Running this with zsh in the loop produced a + multi-line history burst on shell exit / exec that lowered + ``start_line`` below ``pane_height`` and disengaged the guard. + """ + import asyncio + + # Replace the default shell with a single sh invocation: emit + # filler rows to push the cursor to the bottom of the visible + # region, print the marker without a trailing newline so it + # stays on the cursor row, then sleep so nothing else scrolls + # into history. Fixture teardown kills the pane (and the sleep) + # at test exit. + fill_and_park = ( + "for i in $(seq 1 30); do echo filler; done; " + "printf STALE_BOTTOM_MARKER; sleep 60" + ) + mcp_pane.respawn(kill=True, shell=f"sh -c '{fill_and_park}'") + + def _bottom_row_ready() -> bool: + state = mcp_pane.display_message("#{pane_height}:#{cursor_y}", get_text=True) + if not state: + return False + sy_str, cy_str = state[0].split(":", 1) + if int(cy_str) != int(sy_str) - 1: + return False + return any("STALE_BOTTOM_MARKER" in line for line in mcp_pane.capture_pane()) + + retry_until(_bottom_row_ready, 5, raises=True) + + result = asyncio.run( + wait_for_text( + pattern="STALE_BOTTOM_MARKER", + pane_id=mcp_pane.pane_id, + timeout=0.5, + socket_name=mcp_server.socket_name, + ) + ) + assert result.found is False + assert result.timed_out is True + + def test_wait_for_text_invalid_regex(mcp_server: Server, mcp_pane: Pane) -> None: """wait_for_text raises ToolError on invalid regex when regex=True.""" import asyncio @@ -1205,6 +1355,138 @@ def test_wait_for_text_invalid_regex(mcp_server: Server, mcp_pane: Pane) -> None ) +def test_wait_for_text_rejects_empty_pattern( + mcp_server: Server, mcp_pane: Pane +) -> None: + """An empty pattern matches every line and returns found=True instantly. + + ``re.compile('')`` succeeds and ``re.search`` reports a zero-width + match on every string, so the first poll would return + ``found=True`` against whatever was in the pane. Reject explicitly. + """ + import asyncio + + with pytest.raises(ToolError, match="pattern must be a non-empty string"): + asyncio.run( + wait_for_text( + pattern="", + pane_id=mcp_pane.pane_id, + socket_name=mcp_server.socket_name, + ) + ) + + +def test_wait_for_text_rejects_tiny_interval( + mcp_server: Server, mcp_pane: Pane +) -> None: + """A sub-10ms interval lets the poll loop saturate the tmux server. + + ``asyncio.sleep(0)`` yields but does not idle, so an unguarded + ``interval=0`` fires tmux subprocesses as fast as the scheduler + hands them out — a self-inflicted server-side DoS. + """ + import asyncio + + with pytest.raises(ToolError, match=r"interval must be at least 0\.01"): + asyncio.run( + wait_for_text( + pattern="anything", + pane_id=mcp_pane.pane_id, + interval=0, + socket_name=mcp_server.socket_name, + ) + ) + + +def test_wait_for_text_raises_on_pane_respawn( + mcp_server: Server, mcp_pane: Pane +) -> None: + """Respawning the pane mid-wait invalidates the baseline anchor. + + The baseline absolute index is computed against the original + pane process's grid. ``respawn-pane`` clears the visible region + but preserves ``hsize`` (``screen_reinit``), so the math keeps + pointing at the *old* process's content — silently miscapturing. + ``wait_for_text`` detects the ``pane_pid`` change and surfaces + it as a ToolError instead. + """ + import asyncio + + async def respawn_after_delay() -> None: + # Let wait_for_text capture its baseline first, then swap + # the pane process so pane_pid changes. + await asyncio.sleep(0.1) + await asyncio.to_thread(mcp_pane.respawn, kill=True, shell="sleep 30") + + async def run() -> WaitForTextResult: + wait_task = asyncio.create_task( + wait_for_text( + pattern="NEVER_APPEARS_xyz", + pane_id=mcp_pane.pane_id, + timeout=3.0, + socket_name=mcp_server.socket_name, + ) + ) + await respawn_after_delay() + return await wait_task + + with pytest.raises(ToolError, match="respawned during wait"): + asyncio.run(run()) + + +def test_wait_for_text_raises_on_pane_death(mcp_server: Server, mcp_pane: Pane) -> None: + """A pane whose process has exited surfaces as a ToolError. + + With ``remain-on-exit`` set, tmux keeps the pane alive after its + child exits and reports ``#{pane_dead}=1``. The wait loop checks + that flag every tick and bails with a ToolError instead of + polling stale content until timeout. + """ + import asyncio + + mcp_pane.window.set_option("remain-on-exit", "on") + mcp_pane.respawn(kill=True, shell="true") + + def _is_dead() -> bool: + flag = mcp_pane.display_message("#{pane_dead}", get_text=True) + return bool(flag) and flag[0] == "1" + + retry_until(_is_dead, 3, raises=True) + + with pytest.raises(ToolError, match="died during wait"): + asyncio.run( + wait_for_text( + pattern="anything", + pane_id=mcp_pane.pane_id, + timeout=1.0, + socket_name=mcp_server.socket_name, + ) + ) + + +def test_wait_for_text_rejects_non_positive_timeout( + mcp_server: Server, mcp_pane: Pane +) -> None: + """A non-positive timeout is ambiguous; reject rather than guess. + + The loop body runs one probe before the deadline check, so + ``timeout=0`` would complete a single synchronous capture in a + "wait" tool — surprising. Reject explicitly so callers pick a + meaningful budget. + """ + import asyncio + + with pytest.raises(ToolError, match="timeout must be positive"): + asyncio.run( + wait_for_text( + pattern="anything", + pane_id=mcp_pane.pane_id, + timeout=0, + socket_name=mcp_server.socket_name, + ) + ) + + def test_wait_for_text_reports_progress(mcp_server: Server, mcp_pane: Pane) -> None: """wait_for_text calls ``ctx.report_progress`` at each poll tick.