From d2f578cbb4ee47b8db1c1e79741b91c019d74eef Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 11 May 2026 14:24:42 +0100 Subject: [PATCH] fix: retire timed-out codex app-server clients --- CHANGELOG.md | 1 + .../codex/src/app-server/run-attempt.test.ts | 70 ++++++++++++++++--- .../codex/src/app-server/run-attempt.ts | 50 ++++++++++++- 3 files changed, 110 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b698be3241..5dd467014e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai - Agents: add per-agent `tools.message.actions.allow` overrides so sandboxed/public agents can expose and enforce send-only message tools. - Agents: omit the sandbox workspace marker from compact command progress previews while keeping internal sandbox diagnostics unchanged. - Agents: widen progress draft command preview lines by 50% so Discord inline tool updates preserve more useful command context. +- Codex app-server: retire timed-out app-server clients after bounded turn interrupts so Discord agents do not reuse a CPU-spinning Codex process after an attempt timeout. - Build: upgrade workspace package management to pnpm 11 and keep Docker, install, update, and release workflows on the pnpm 11 config surface. (#79414) Thanks @altaywtf. - Build: align Telegram QA workflows and git source installs with the pnpm 11 workspace build allowlist surface. (#80588) Thanks @altaywtf. - Models: add provider-level `localService` startup for on-demand local model servers before OpenAI-compatible requests, including one-shot model probes. diff --git a/extensions/codex/src/app-server/run-attempt.test.ts b/extensions/codex/src/app-server/run-attempt.test.ts index 35beca0e880..c6463b2683b 100644 --- a/extensions/codex/src/app-server/run-attempt.test.ts +++ b/extensions/codex/src/app-server/run-attempt.test.ts @@ -1231,15 +1231,65 @@ describe("runCodexAppServerAttempt", () => { ); await vi.waitFor( () => - expect(request).toHaveBeenCalledWith("turn/interrupt", { - threadId: "thread-1", - turnId: "turn-1", - }), + expect(request).toHaveBeenCalledWith( + "turn/interrupt", + { + threadId: "thread-1", + turnId: "turn-1", + }, + { timeoutMs: 5_000 }, + ), { interval: 1 }, ); expect(queueActiveRunMessageForTest("session-1", "after timeout")).toBe(false); }); + it("closes the app-server client when the active turn exceeds the attempt timeout", async () => { + const close = vi.fn(); + const request = vi.fn(async (method: string) => { + if (method === "thread/start") { + return threadStartResult("thread-1"); + } + if (method === "turn/start") { + return turnStartResult("turn-1", "inProgress"); + } + if (method === "turn/interrupt") { + return new Promise(() => undefined); + } + return {}; + }); + __testing.setCodexAppServerClientFactoryForTests( + async () => + ({ + request, + close, + addNotificationHandler: () => () => undefined, + addRequestHandler: () => () => undefined, + }) as never, + ); + const params = createParams( + path.join(tempDir, "session.jsonl"), + path.join(tempDir, "workspace"), + ); + params.timeoutMs = 100; + + const result = await runCodexAppServerAttempt(params); + + expect(result.aborted).toBe(true); + expect(result.timedOut).toBe(true); + expect(result.promptError).toBe("codex app-server attempt timed out"); + expect(request).toHaveBeenCalledWith( + "turn/interrupt", + { + threadId: "thread-1", + turnId: "turn-1", + }, + { timeoutMs: 5_000 }, + ); + expect(close).toHaveBeenCalledTimes(1); + expect(queueActiveRunMessageForTest("session-1", "after timeout")).toBe(false); + }); + it("does not count account rate-limit updates as turn completion activity", async () => { let notify: (notification: CodexServerNotification) => Promise = async () => undefined; let handleRequest: @@ -1540,10 +1590,14 @@ describe("runCodexAppServerAttempt", () => { ); await vi.waitFor( () => - expect(harness.request).toHaveBeenCalledWith("turn/interrupt", { - threadId: "thread-1", - turnId: "turn-1", - }), + expect(harness.request).toHaveBeenCalledWith( + "turn/interrupt", + { + threadId: "thread-1", + turnId: "turn-1", + }, + { timeoutMs: 5_000 }, + ), { interval: 1 }, ); expect(queueActiveRunMessageForTest("session-1", "after silent turn")).toBe(false); diff --git a/extensions/codex/src/app-server/run-attempt.ts b/extensions/codex/src/app-server/run-attempt.ts index 4d9ec66a736..162bfbf18cb 100644 --- a/extensions/codex/src/app-server/run-attempt.ts +++ b/extensions/codex/src/app-server/run-attempt.ts @@ -141,6 +141,7 @@ const CODEX_DYNAMIC_TOOL_MAX_TIMEOUT_MS = 600_000; const CODEX_DYNAMIC_IMAGE_TOOL_TIMEOUT_MS = 60_000; const CODEX_APP_SERVER_STARTUP_CONNECTION_CLOSE_MAX_ATTEMPTS = 3; const CODEX_APP_SERVER_STARTUP_TIMEOUT_FLOOR_MS = 100; +const CODEX_APP_SERVER_INTERRUPT_TIMEOUT_MS = 5_000; const CODEX_USAGE_LIMIT_RATE_LIMIT_REFRESH_TIMEOUT_MS = 5_000; const CODEX_TURN_COMPLETION_IDLE_TIMEOUT_MS = 60_000; const CODEX_TURN_TERMINAL_IDLE_TIMEOUT_MS = 30 * 60_000; @@ -1499,10 +1500,19 @@ export async function runCodexAppServerAttempt( ); const abortListener = () => { + const shouldRetireClient = timedOut; interruptCodexTurnBestEffort(client, { threadId: thread.threadId, turnId: activeTurnId, + timeoutMs: shouldRetireClient ? CODEX_APP_SERVER_INTERRUPT_TIMEOUT_MS : undefined, }); + if (shouldRetireClient) { + retireCodexAppServerClientAfterTimedOutTurn(client, { + threadId: thread.threadId, + turnId: activeTurnId, + reason: String(runAbortController.signal.reason ?? "timeout"), + }); + } resolveCompletion?.(); }; runAbortController.signal.addEventListener("abort", abortListener, { once: true }); @@ -1977,13 +1987,47 @@ function interruptCodexTurnBestEffort( params: { threadId: string; turnId: string; + timeoutMs?: number; }, ): void { - void Promise.resolve() - .then(() => client.request("turn/interrupt", params)) - .catch((error: unknown) => { + const requestOptions = + params.timeoutMs && Number.isFinite(params.timeoutMs) && params.timeoutMs > 0 + ? { timeoutMs: params.timeoutMs } + : undefined; + const requestParams = { threadId: params.threadId, turnId: params.turnId }; + try { + const interrupt = requestOptions + ? client.request("turn/interrupt", requestParams, requestOptions) + : client.request("turn/interrupt", requestParams); + void Promise.resolve(interrupt).catch((error: unknown) => { embeddedAgentLog.debug("codex app-server turn interrupt failed during abort", { error }); }); + } catch (error) { + embeddedAgentLog.debug("codex app-server turn interrupt failed during abort", { error }); + } +} + +function retireCodexAppServerClientAfterTimedOutTurn( + client: CodexAppServerClient, + params: { + threadId: string; + turnId: string; + reason: string; + }, +): void { + const clearedSharedClient = clearSharedCodexAppServerClientIfCurrent(client); + if (!clearedSharedClient) { + const close = (client as { close?: () => void }).close; + if (typeof close === "function") { + close.call(client); + } + } + embeddedAgentLog.warn("codex app-server client retired after timed-out turn", { + threadId: params.threadId, + turnId: params.turnId, + reason: params.reason, + clearedSharedClient, + }); } type DynamicToolBuildParams = {