fix: retire timed-out codex app-server clients

This commit is contained in:
Peter Steinberger
2026-05-11 14:24:42 +01:00
parent fecf18d277
commit d2f578cbb4
3 changed files with 110 additions and 11 deletions

View File

@@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai
- Agents: add per-agent `tools.message.actions.allow` overrides so sandboxed/public agents can expose and enforce send-only message tools.
- Agents: omit the sandbox workspace marker from compact command progress previews while keeping internal sandbox diagnostics unchanged.
- Agents: widen progress draft command preview lines by 50% so Discord inline tool updates preserve more useful command context.
- Codex app-server: retire timed-out app-server clients after bounded turn interrupts so Discord agents do not reuse a CPU-spinning Codex process after an attempt timeout.
- Build: upgrade workspace package management to pnpm 11 and keep Docker, install, update, and release workflows on the pnpm 11 config surface. (#79414) Thanks @altaywtf.
- Build: align Telegram QA workflows and git source installs with the pnpm 11 workspace build allowlist surface. (#80588) Thanks @altaywtf.
- Models: add provider-level `localService` startup for on-demand local model servers before OpenAI-compatible requests, including one-shot model probes.

View File

@@ -1231,15 +1231,65 @@ describe("runCodexAppServerAttempt", () => {
);
await vi.waitFor(
() =>
expect(request).toHaveBeenCalledWith("turn/interrupt", {
expect(request).toHaveBeenCalledWith(
"turn/interrupt",
{
threadId: "thread-1",
turnId: "turn-1",
}),
},
{ timeoutMs: 5_000 },
),
{ interval: 1 },
);
expect(queueActiveRunMessageForTest("session-1", "after timeout")).toBe(false);
});
it("closes the app-server client when the active turn exceeds the attempt timeout", async () => {
const close = vi.fn();
const request = vi.fn(async (method: string) => {
if (method === "thread/start") {
return threadStartResult("thread-1");
}
if (method === "turn/start") {
return turnStartResult("turn-1", "inProgress");
}
if (method === "turn/interrupt") {
return new Promise<never>(() => undefined);
}
return {};
});
__testing.setCodexAppServerClientFactoryForTests(
async () =>
({
request,
close,
addNotificationHandler: () => () => undefined,
addRequestHandler: () => () => undefined,
}) as never,
);
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 100;
const result = await runCodexAppServerAttempt(params);
expect(result.aborted).toBe(true);
expect(result.timedOut).toBe(true);
expect(result.promptError).toBe("codex app-server attempt timed out");
expect(request).toHaveBeenCalledWith(
"turn/interrupt",
{
threadId: "thread-1",
turnId: "turn-1",
},
{ timeoutMs: 5_000 },
);
expect(close).toHaveBeenCalledTimes(1);
expect(queueActiveRunMessageForTest("session-1", "after timeout")).toBe(false);
});
it("does not count account rate-limit updates as turn completion activity", async () => {
let notify: (notification: CodexServerNotification) => Promise<void> = async () => undefined;
let handleRequest:
@@ -1540,10 +1590,14 @@ describe("runCodexAppServerAttempt", () => {
);
await vi.waitFor(
() =>
expect(harness.request).toHaveBeenCalledWith("turn/interrupt", {
expect(harness.request).toHaveBeenCalledWith(
"turn/interrupt",
{
threadId: "thread-1",
turnId: "turn-1",
}),
},
{ timeoutMs: 5_000 },
),
{ interval: 1 },
);
expect(queueActiveRunMessageForTest("session-1", "after silent turn")).toBe(false);

View File

@@ -141,6 +141,7 @@ const CODEX_DYNAMIC_TOOL_MAX_TIMEOUT_MS = 600_000;
const CODEX_DYNAMIC_IMAGE_TOOL_TIMEOUT_MS = 60_000;
const CODEX_APP_SERVER_STARTUP_CONNECTION_CLOSE_MAX_ATTEMPTS = 3;
const CODEX_APP_SERVER_STARTUP_TIMEOUT_FLOOR_MS = 100;
const CODEX_APP_SERVER_INTERRUPT_TIMEOUT_MS = 5_000;
const CODEX_USAGE_LIMIT_RATE_LIMIT_REFRESH_TIMEOUT_MS = 5_000;
const CODEX_TURN_COMPLETION_IDLE_TIMEOUT_MS = 60_000;
const CODEX_TURN_TERMINAL_IDLE_TIMEOUT_MS = 30 * 60_000;
@@ -1499,10 +1500,19 @@ export async function runCodexAppServerAttempt(
);
const abortListener = () => {
const shouldRetireClient = timedOut;
interruptCodexTurnBestEffort(client, {
threadId: thread.threadId,
turnId: activeTurnId,
timeoutMs: shouldRetireClient ? CODEX_APP_SERVER_INTERRUPT_TIMEOUT_MS : undefined,
});
if (shouldRetireClient) {
retireCodexAppServerClientAfterTimedOutTurn(client, {
threadId: thread.threadId,
turnId: activeTurnId,
reason: String(runAbortController.signal.reason ?? "timeout"),
});
}
resolveCompletion?.();
};
runAbortController.signal.addEventListener("abort", abortListener, { once: true });
@@ -1977,13 +1987,47 @@ function interruptCodexTurnBestEffort(
params: {
threadId: string;
turnId: string;
timeoutMs?: number;
},
): void {
void Promise.resolve()
.then(() => client.request("turn/interrupt", params))
.catch((error: unknown) => {
const requestOptions =
params.timeoutMs && Number.isFinite(params.timeoutMs) && params.timeoutMs > 0
? { timeoutMs: params.timeoutMs }
: undefined;
const requestParams = { threadId: params.threadId, turnId: params.turnId };
try {
const interrupt = requestOptions
? client.request("turn/interrupt", requestParams, requestOptions)
: client.request("turn/interrupt", requestParams);
void Promise.resolve(interrupt).catch((error: unknown) => {
embeddedAgentLog.debug("codex app-server turn interrupt failed during abort", { error });
});
} catch (error) {
embeddedAgentLog.debug("codex app-server turn interrupt failed during abort", { error });
}
}
function retireCodexAppServerClientAfterTimedOutTurn(
client: CodexAppServerClient,
params: {
threadId: string;
turnId: string;
reason: string;
},
): void {
const clearedSharedClient = clearSharedCodexAppServerClientIfCurrent(client);
if (!clearedSharedClient) {
const close = (client as { close?: () => void }).close;
if (typeof close === "function") {
close.call(client);
}
}
embeddedAgentLog.warn("codex app-server client retired after timed-out turn", {
threadId: params.threadId,
turnId: params.turnId,
reason: params.reason,
clearedSharedClient,
});
}
type DynamicToolBuildParams = {