Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/pty-idle-grace-period.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@aoagents/ao-web": patch
---

Defer killing idle terminal PTYs by 30s in the mux server so that quick reconnects (tab reload, sleep/wake, network blip) reuse the existing PTY instead of allocating a fresh one. macOS never recycles ptmx slot numbers within a boot, so churning PTYs across these events used to drain `kern.tty.ptmx_max` (511) over weeks of dashboard uptime. (#1718)
Original file line number Diff line number Diff line change
Expand Up @@ -392,9 +392,13 @@ describe("mux terminal I/O", () => {
it("receives terminal data after open", async () => {
const ws = await connectMux();

ws.send(
JSON.stringify({ ch: "terminal", id: "test-session", tmuxName: TEST_SESSION, type: "open" }),
);
// Use a unique terminal id so we exercise a fresh tmux attach. The shared
// "test-session" id used by earlier tests in this file would land in the
// post-#1718 PTY-grace path (PTY reused, no fresh init-data flow), and
// the prior test closes its WS too quickly for the buffer to populate —
// so a grace-path replay would deliver an empty buffer and time out.
const id = `test-data-flow-${Date.now()}`;
ws.send(JSON.stringify({ ch: "terminal", id, tmuxName: TEST_SESSION, type: "open" }));
await waitForMessage(ws, (m) => m.ch === "terminal" && m.type === "opened");

// tmux sends terminal init sequences on attach — wait for any data
Expand Down
116 changes: 116 additions & 0 deletions packages/web/server/__tests__/mux-websocket.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -311,3 +311,119 @@ describe("TerminalManager.open — tmux target args (regression for #1714)", ()
expect(args).toEqual(["attach-session", "-t", "=ao-177"]);
});
});

describe("TerminalManager — PTY idle grace period (issue #1718)", () => {
// Stable mock fns shared across spawns within a single test, reset per test.
// The unit under test relies on `pty.kill()` being called exactly once on
// grace expiry, so kill needs a stable identity to count against.
let ptyKill: ReturnType<typeof vi.fn>;
let ptyOnData: ReturnType<typeof vi.fn>;
let ptyOnExit: ReturnType<typeof vi.fn>;
let manager: InstanceType<typeof TerminalManager>;

beforeEach(() => {
vi.useFakeTimers();
mockSpawn.mockReset();
mockPtySpawn.mockReset();

ptyKill = vi.fn();
ptyOnData = vi.fn();
ptyOnExit = vi.fn();

mockSpawn.mockImplementation(() => new EventEmitter());
mockPtySpawn.mockImplementation(() => ({
onData: ptyOnData,
onExit: ptyOnExit,
write: vi.fn(),
resize: vi.fn(),
kill: ptyKill,
}));

manager = new TerminalManager("/usr/bin/tmux");
});

afterEach(() => {
vi.clearAllTimers();
vi.useRealTimers();
});

it("does not kill the PTY immediately when the last subscriber unsubscribes", () => {
const unsub = manager.subscribe("ao-177", undefined, vi.fn());
expect(mockPtySpawn).toHaveBeenCalledTimes(1);

unsub();

// PTY must remain alive during the grace window — killing here would
// burn a fresh ptmx slot on the next reconnect (issue #1718).
expect(ptyKill).not.toHaveBeenCalled();
});

it("kills the PTY after the grace period expires with no reconnect", () => {
const unsub = manager.subscribe("ao-177", undefined, vi.fn());
unsub();

// Just before the window closes — still alive.
vi.advanceTimersByTime(29_999);
expect(ptyKill).not.toHaveBeenCalled();

// Window closes — kill fires.
vi.advanceTimersByTime(1);
expect(ptyKill).toHaveBeenCalledTimes(1);
});

it("preserves the PTY when a new subscriber arrives within the grace window", () => {
const unsub1 = manager.subscribe("ao-177", undefined, vi.fn());
expect(mockPtySpawn).toHaveBeenCalledTimes(1);
unsub1();

// A reconnect lands halfway through the grace window.
vi.advanceTimersByTime(15_000);
const unsub2 = manager.subscribe("ao-177", undefined, vi.fn());

// No new PTY allocated and the original is still alive — reconnect
// reused the existing slot, which is the whole point of the fix.
expect(mockPtySpawn).toHaveBeenCalledTimes(1);
expect(ptyKill).not.toHaveBeenCalled();

// After the original timer's deadline passes, the cancelled timer must
// not retroactively kill the still-subscribed PTY.
vi.advanceTimersByTime(60_000);
expect(ptyKill).not.toHaveBeenCalled();

unsub2();
});

it("only kills once when last subscriber leaves and grace expires", () => {
const unsub1 = manager.subscribe("ao-177", undefined, vi.fn());
const unsub2 = manager.subscribe("ao-177", undefined, vi.fn());

unsub1();
// First unsubscribe is not the last subscriber — no timer scheduled.
vi.advanceTimersByTime(60_000);
expect(ptyKill).not.toHaveBeenCalled();

unsub2();
// Now we are at zero subscribers — schedule the grace timer.
vi.advanceTimersByTime(30_000);
expect(ptyKill).toHaveBeenCalledTimes(1);
});

it("buffer is preserved across an unsubscribe/resubscribe within grace", () => {
const cb = vi.fn();
const unsub = manager.subscribe("ao-177", undefined, cb);

// Simulate the PTY emitting some output by invoking the captured handler.
const onDataHandler = ptyOnData.mock.calls[0]?.[0] as (data: string) => void;
onDataHandler("hello");

unsub();
vi.advanceTimersByTime(10_000);

// The terminal entry must still exist — buffered output is still there.
expect(manager.getBuffer("ao-177")).toBe("hello");

// After full grace expiry, the entry is cleaned up.
vi.advanceTimersByTime(20_001);
expect(manager.getBuffer("ao-177")).toBe("");
});
});
52 changes: 41 additions & 11 deletions packages/web/server/mux-websocket.ts
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,15 @@ interface ManagedTerminal {
* reachable for up to 5 s after teardown.
*/
resetTimer?: ReturnType<typeof setTimeout>;
/**
* Pending kill scheduled when the last subscriber leaves. macOS never
* recycles ptmx slot numbers within a boot, so churning PTYs across
* tab reloads / sleep-wake / network blips burns slots and eventually
* exhausts kern.tty.ptmx_max=511 (issue #1718). A grace window lets a
* quick reconnect reuse the existing PTY instead of allocating a fresh
* one.
*/
idleGraceTimer: ReturnType<typeof setTimeout> | null;
}

const RING_BUFFER_MAX = 50 * 1024; // 50KB max per terminal
Expand All @@ -217,6 +226,7 @@ const MAX_REATTACH_ATTEMPTS = 3;
* crashes hours later gets a fresh retry budget.
*/
const REATTACH_RESET_GRACE_MS = 5_000;
const PTY_IDLE_GRACE_MS = 30_000; // delay before killing an unsubscribed PTY (issue #1718)

/**
* TerminalManager manages PTY processes independently of WebSocket connections.
Expand Down Expand Up @@ -265,6 +275,7 @@ export class TerminalManager {
buffer: [],
bufferBytes: 0,
reattachAttempts: 0,
idleGraceTimer: null,
};
this.terminals.set(key, terminal);
}
Expand Down Expand Up @@ -444,21 +455,40 @@ export class TerminalManager {
terminal.subscribers.add(callback);
if (onExit) terminal.exitCallbacks.add(onExit);

// A subscriber arrived in time — cancel any pending idle-grace kill
// (e.g. tab reload landed within the grace window after a previous
// unsubscribe). The PTY is preserved; no new ptmx slot is allocated.
if (terminal.idleGraceTimer) {
clearTimeout(terminal.idleGraceTimer);
terminal.idleGraceTimer = null;
}

// Return unsubscribe function
return () => {
terminal.subscribers.delete(callback);
if (onExit) terminal.exitCallbacks.delete(onExit);
// Kill PTY and clean up when the last subscriber leaves
if (terminal.subscribers.size === 0) {
if (terminal.resetTimer) {
clearTimeout(terminal.resetTimer);
terminal.resetTimer = undefined;
}
if (terminal.pty) {
terminal.pty.kill();
terminal.pty = null;
}
this.terminals.delete(key);
// Defer PTY kill when the last subscriber leaves. macOS burns a ptmx
// slot per allocation for the lifetime of a boot (issue #1718), so
// killing immediately on every disconnect drains kern.tty.ptmx_max.
// A grace window lets a quick reconnect reuse the existing PTY.
// The reattach-counter resetTimer (#1640) is cleared at kill time so
// its closure doesn't keep the evicted terminal reachable.
if (terminal.subscribers.size === 0 && terminal.idleGraceTimer === null) {
terminal.idleGraceTimer = setTimeout(() => {
terminal.idleGraceTimer = null;
// Re-check: a subscriber may have arrived during the window.
if (terminal.subscribers.size > 0) return;
if (terminal.resetTimer) {
clearTimeout(terminal.resetTimer);
terminal.resetTimer = undefined;
}
if (terminal.pty) {
terminal.pty.kill();
terminal.pty = null;
}
this.terminals.delete(key);
}, PTY_IDLE_GRACE_MS);
terminal.idleGraceTimer.unref();
}
};
Comment thread
harshitsinghbhandari marked this conversation as resolved.
}
Expand Down
Loading