diff --git a/extensions/qa-lab/src/providers/mock-openai/server.test.ts b/extensions/qa-lab/src/providers/mock-openai/server.test.ts index 8dabc8eeb24..0e5b261362f 100644 --- a/extensions/qa-lab/src/providers/mock-openai/server.test.ts +++ b/extensions/qa-lab/src/providers/mock-openai/server.test.ts @@ -694,6 +694,56 @@ describe("qa mock openai server", () => { expect(payload.output?.[0]?.content?.[0]?.text).toContain("Status: complete"); }); + it("uses argument-scoped tool call ids for repeated tool names", async () => { + const server = await startQaMockOpenAiServer({ + host: "127.0.0.1", + port: 0, + }); + cleanups.push(async () => { + await server.stop(); + }); + + const prompt = + "Repo contract followthrough check. Read AGENT.md, SOUL.md, and FOLLOWTHROUGH_INPUT.md first. Then follow the repo contract exactly, write ./repo-contract-summary.txt, and reply with three labeled lines: Read, Wrote, Status."; + + const first = await fetch(`${server.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + stream: false, + model: "gpt-5.5", + input: [{ role: "user", content: [{ type: "input_text", text: prompt }] }], + }), + }); + const firstPayload = (await first.json()) as { + output?: Array<{ call_id?: string }>; + }; + + const second = await fetch(`${server.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + stream: false, + model: "gpt-5.5", + input: [ + { role: "user", content: [{ type: "input_text", text: prompt }] }, + { + type: "function_call_output", + output: + "# Repo contract\n\nStep order:\n1. Read AGENT.md.\n2. Read SOUL.md.\n3. Read FOLLOWTHROUGH_INPUT.md.\n4. Write ./repo-contract-summary.txt.\n", + }, + ], + }), + }); + const secondPayload = (await second.json()) as { + output?: Array<{ call_id?: string }>; + }; + + expect(firstPayload.output?.[0]?.call_id).toMatch(/^call_mock_read_/); + expect(secondPayload.output?.[0]?.call_id).toMatch(/^call_mock_read_/); + expect(firstPayload.output?.[0]?.call_id).not.toBe(secondPayload.output?.[0]?.call_id); + }); + it("continues repo-contract followthrough when a retry user item follows tool output", async () => { const server = await startQaMockOpenAiServer({ host: "127.0.0.1", @@ -731,6 +781,91 @@ describe("qa mock openai server", () => { expect(await response.text()).toContain('"arguments":"{\\"path\\":\\"SOUL.md\\"}"'); }); + it("continues repo-contract followthrough from structured tool output", async () => { + const server = await startQaMockOpenAiServer({ + host: "127.0.0.1", + port: 0, + }); + cleanups.push(async () => { + await server.stop(); + }); + + const prompt = + "Repo contract followthrough check. Read AGENT.md, SOUL.md, and FOLLOWTHROUGH_INPUT.md first. Then follow the repo contract exactly, write ./repo-contract-summary.txt, and reply with three labeled lines: Read, Wrote, Status."; + + const response = await fetch(`${server.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + stream: true, + model: "gpt-5.5", + input: [ + { role: "user", content: [{ type: "input_text", text: prompt }] }, + { + type: "function_call_output", + output: [ + { + type: "output_text", + text: "# Repo contract\n\nStep order:\n1. Read AGENT.md.\n2. Read SOUL.md.\n3. Read FOLLOWTHROUGH_INPUT.md.\n4. Write ./repo-contract-summary.txt.\n", + }, + ], + }, + { + role: "user", + content: [{ type: "input_text", text: "Continue after compaction." }], + }, + ], + }), + }); + + expect(response.status).toBe(200); + expect(await response.text()).toContain('"arguments":"{\\"path\\":\\"SOUL.md\\"}"'); + }); + + it("advances repo-contract followthrough when transcript text is newer than extracted tool output", async () => { + const server = await startQaMockOpenAiServer({ + host: "127.0.0.1", + port: 0, + }); + cleanups.push(async () => { + await server.stop(); + }); + + const prompt = + "Repo contract followthrough check. Read AGENT.md, SOUL.md, and FOLLOWTHROUGH_INPUT.md first. Then follow the repo contract exactly, write ./repo-contract-summary.txt, and reply with three labeled lines: Read, Wrote, Status."; + + const response = await fetch(`${server.baseUrl}/v1/responses`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + stream: true, + model: "gpt-5.5", + input: [ + { role: "user", content: [{ type: "input_text", text: prompt }] }, + { + type: "function_call_output", + output: + "# Repo contract\n\nStep order:\n1. Read AGENT.md.\n2. Read SOUL.md.\n3. Read FOLLOWTHROUGH_INPUT.md.\n4. Write ./repo-contract-summary.txt.\n", + }, + { + role: "user", + content: [ + { + type: "input_text", + text: "# Execution style\n\nStay brief, honest, and action-first.\n", + }, + ], + }, + ], + }), + }); + + expect(response.status).toBe(200); + expect(await response.text()).toContain( + '"arguments":"{\\"path\\":\\"FOLLOWTHROUGH_INPUT.md\\"}"', + ); + }); + it("drives the compaction retry mutating tool parity flow", async () => { const server = await startQaMockOpenAiServer({ host: "127.0.0.1", @@ -1259,6 +1394,66 @@ describe("qa mock openai server", () => { expect(threadMemorySummary.status).toBe(200); expect(JSON.stringify(await threadMemorySummary.json())).toContain("ORBIT-22"); + const structuredThreadMemorySummary = await fetch(`${server.baseUrl}/v1/responses`, { + method: "POST", + headers: { + "content-type": "application/json", + }, + body: JSON.stringify({ + stream: false, + instructions: + "@openclaw Thread memory check: what is the hidden thread codename stored only in memory? Use memory tools first and reply only in this thread.", + input: [ + { + type: "function_call_output", + output: { + text: "Thread-hidden codename: ORBIT-22.", + }, + }, + { + role: "user", + content: [ + { + type: "input_text", + text: "Protocol note: acknowledged. Continue with the QA scenario plan.", + }, + ], + }, + ], + }), + }); + expect(structuredThreadMemorySummary.status).toBe(200); + expect(JSON.stringify(await structuredThreadMemorySummary.json())).toContain("ORBIT-22"); + + const systemFallbackThreadMemorySummary = await fetch(`${server.baseUrl}/v1/responses`, { + method: "POST", + headers: { + "content-type": "application/json", + }, + body: JSON.stringify({ + stream: false, + input: [ + { + role: "system", + content: "## /workspace/MEMORY.md\nThread-hidden codename: ORBIT-22.", + }, + makeUserInput( + "@openclaw Thread memory check: what is the hidden thread codename stored only in memory? Use memory tools first and reply only in this thread.", + ), + { + type: "function_call_output", + output: JSON.stringify({ + results: [], + unavailable: true, + error: "database is not open", + }), + }, + ], + }), + }); + expect(systemFallbackThreadMemorySummary.status).toBe(200); + expect(JSON.stringify(await systemFallbackThreadMemorySummary.json())).toContain("ORBIT-22"); + const memoryFollowup = await fetch(`${server.baseUrl}/v1/responses`, { method: "POST", headers: { diff --git a/extensions/qa-lab/src/providers/mock-openai/server.ts b/extensions/qa-lab/src/providers/mock-openai/server.ts index ec2e50afa39..d6cc24553fc 100644 --- a/extensions/qa-lab/src/providers/mock-openai/server.ts +++ b/extensions/qa-lab/src/providers/mock-openai/server.ts @@ -1,3 +1,4 @@ +import { createHash } from "node:crypto"; import { createServer, type IncomingMessage, type ServerResponse } from "node:http"; import { setTimeout as sleep } from "node:timers/promises"; import { escapeRegExp } from "openclaw/plugin-sdk/text-runtime"; @@ -177,6 +178,7 @@ type MockScenarioState = { const MOCK_OPENAI_MAX_BODY_BYTES = 16 * 1024 * 1024; const MOCK_OPENAI_BODY_TIMEOUT_MS = 30_000; +const MOCK_OPENAI_DEBUG_REQUEST_LIMIT = 200; function readBody(req: IncomingMessage): Promise { return readRequestBodyWithLimit(req, { @@ -292,17 +294,74 @@ function isToolOutputContinuationText(text: string) { ); } +function stringifyFunctionCallOutput(output: unknown): string { + if (typeof output === "string") { + return output; + } + if (Array.isArray(output)) { + return output + .map((entry) => { + if (typeof entry === "string") { + return entry; + } + if (!entry || typeof entry !== "object") { + return ""; + } + const record = entry as Record; + if (typeof record.text === "string") { + return record.text; + } + if (typeof record.output_text === "string") { + return record.output_text; + } + if (typeof record.content === "string") { + return record.content; + } + return ""; + }) + .filter(Boolean) + .join("\n"); + } + if (output && typeof output === "object") { + const record = output as Record; + if (typeof record.text === "string") { + return record.text; + } + if (typeof record.output_text === "string") { + return record.output_text; + } + if (typeof record.content === "string") { + return record.content; + } + try { + return JSON.stringify(output); + } catch { + return ""; + } + } + return ""; +} + +function extractFunctionCallOutputText(item: ResponsesInputItem) { + if (item.type !== "function_call_output") { + return ""; + } + return stringifyFunctionCallOutput(item.output); +} + function extractToolOutput(input: ResponsesInputItem[]) { const lastUserIndex = findLastUserIndex(input); for (let index = input.length - 1; index > lastUserIndex; index -= 1) { const item = input[index]; - if (item.type === "function_call_output" && typeof item.output === "string" && item.output) { - return item.output; + const output = extractFunctionCallOutputText(item); + if (output) { + return output; } } for (let index = input.length - 1; index >= 0; index -= 1) { const item = input[index]; - if (item.type === "function_call_output" && typeof item.output === "string" && item.output) { + const output = extractFunctionCallOutputText(item); + if (output) { const laterUserTexts = input .slice(index + 1) .filter((laterItem) => laterItem.role === "user" && Array.isArray(laterItem.content)) @@ -312,7 +371,7 @@ function extractToolOutput(input: ResponsesInputItem[]) { laterUserTexts.length > 0 && laterUserTexts.every((text) => isToolOutputContinuationText(text)) ) { - return item.output; + return output; } continue; } @@ -320,6 +379,17 @@ function extractToolOutput(input: ResponsesInputItem[]) { return ""; } +function extractLatestToolOutput(input: ResponsesInputItem[]) { + for (let index = input.length - 1; index >= 0; index -= 1) { + const item = input[index]; + const output = extractFunctionCallOutputText(item); + if (output) { + return output; + } + } + return ""; +} + function extractInputText(content: unknown[]): string { return content .filter( @@ -348,6 +418,27 @@ function extractAllUserTexts(input: ResponsesInputItem[]) { return texts; } +function extractSystemInputText(input: ResponsesInputItem[]) { + const texts: string[] = []; + for (const item of input) { + if (item.role !== "system") { + continue; + } + if (typeof item.content === "string" && item.content.trim()) { + texts.push(item.content.trim()); + continue; + } + if (!Array.isArray(item.content)) { + continue; + } + const text = extractInputText(item.content); + if (text) { + texts.push(text); + } + } + return texts.join("\n"); +} + function extractAllInputTexts(input: ResponsesInputItem[]) { const texts: string[] = []; for (const item of input) { @@ -469,14 +560,21 @@ function readTargetFromPrompt(prompt: string) { } function buildToolCallEventsWithArgs(name: string, args: Record): StreamEvent[] { - const callId = `call_mock_${name}_1`; const serialized = JSON.stringify(args); + const callSuffix = createHash("sha1") + .update(name) + .update("\0") + .update(serialized) + .digest("hex") + .slice(0, 10); + const callId = `call_mock_${name}_${callSuffix}`; + const itemId = `fc_mock_${name}_${callSuffix}`; return [ { type: "response.output_item.added", item: { type: "function_call", - id: `fc_mock_${name}_1`, + id: itemId, call_id: callId, name, arguments: "", @@ -487,7 +585,7 @@ function buildToolCallEventsWithArgs(name: string, args: Record type: "response.output_item.done", item: { type: "function_call", - id: `fc_mock_${name}_1`, + id: itemId, call_id: callId, name, arguments: serialized, @@ -496,12 +594,12 @@ function buildToolCallEventsWithArgs(name: string, args: Record { type: "response.completed", response: { - id: `resp_mock_${name}_1`, + id: `resp_mock_${name}_${callSuffix}`, status: "completed", output: [ { type: "function_call", - id: `fc_mock_${name}_1`, + id: itemId, call_id: callId, name, arguments: serialized, @@ -745,7 +843,14 @@ function buildAssistantText( ) { const prompt = extractLastUserText(input); const toolOutput = extractToolOutput(input); - const toolJson = parseToolOutputJson(toolOutput); + const scenarioToolOutput = + toolOutput || + (/thread memory check|session memory ranking check|memory tools check|repo contract followthrough check/i.test( + extractAllRequestTexts(input, body), + ) + ? extractLatestToolOutput(input) + : ""); + const toolJson = parseToolOutputJson(scenarioToolOutput); const userTexts = extractAllUserTexts(input); const allInputText = extractAllRequestTexts(input, body); const rememberedFact = extractRememberedFact(userTexts); @@ -755,8 +860,8 @@ function buildAssistantText( ? toolJson.text : Array.isArray(toolJson?.results) ? JSON.stringify(toolJson.results) - : toolOutput; - const orbitCode = extractOrbitCode(memorySnippet); + : scenarioToolOutput; + const orbitCode = extractOrbitCode(memorySnippet) ?? extractOrbitCode(allInputText); const mediaPath = /MEDIA:([^\n]+)/.exec(toolOutput)?.[1]?.trim(); const exactReplyDirective = extractExactReplyDirective(prompt) ?? extractExactReplyDirective(allInputText); @@ -816,10 +921,11 @@ function buildAssistantText( if (/tool continuity check/i.test(prompt) && toolOutput) { return `Protocol note: model switch handoff confirmed on ${model || "the requested model"}. QA mission from QA_KICKOFF_TASK.md still applies: understand this OpenClaw repo from source + docs before acting.`; } - if (toolOutput && /repo contract followthrough check/i.test(allInputText)) { + if ((toolOutput || allInputText) && /repo contract followthrough check/i.test(allInputText)) { + const repoEvidenceText = [scenarioToolOutput, allInputText].filter(Boolean).join("\n"); if ( - /successfully (?:wrote|created|updated|replaced)/i.test(toolOutput) || - /status:\s*complete/i.test(toolOutput) + /successfully (?:wrote|created|updated|replaced)/i.test(repoEvidenceText) || + /status:\s*complete/i.test(repoEvidenceText) ) { return [ "Read: AGENT.md, SOUL.md, FOLLOWTHROUGH_INPUT.md", @@ -1250,8 +1356,15 @@ async function buildResponsesPayload( const input = Array.isArray(body.input) ? (body.input as ResponsesInputItem[]) : []; const prompt = extractLastUserText(input); const toolOutput = extractToolOutput(input); - const toolJson = parseToolOutputJson(toolOutput); const allInputText = extractAllRequestTexts(input, body); + const scenarioToolOutput = + toolOutput || + (/thread memory check|session memory ranking check|memory tools check|repo contract followthrough check/i.test( + allInputText, + ) + ? extractLatestToolOutput(input) + : ""); + const toolJson = parseToolOutputJson(scenarioToolOutput); const exactReplyDirective = extractExactReplyDirective(prompt) ?? extractExactReplyDirective(allInputText); const exactMarkerDirective = @@ -1311,7 +1424,7 @@ async function buildResponsesPayload( return buildAssistantEvents("BETA-OK"); } if (QA_REASONING_ONLY_RECOVERY_PROMPT_RE.test(allInputText)) { - if (!toolOutput) { + if (!scenarioToolOutput) { return buildToolCallEventsWithArgs("read", { path: "QA_KICKOFF_TASK.md" }); } if (!hasReasoningOnlyRetryInstruction) { @@ -1323,7 +1436,7 @@ async function buildResponsesPayload( return buildAssistantEvents("REASONING-RECOVERED-OK"); } if (QA_REASONING_ONLY_SIDE_EFFECT_PROMPT_RE.test(allInputText)) { - if (!toolOutput) { + if (!scenarioToolOutput) { return buildToolCallEventsWithArgs("write", { path: "reasoning-only-side-effect.txt", content: "side effects already happened\n", @@ -1649,7 +1762,7 @@ async function buildResponsesPayload( return buildAssistantEvents("NONE"); } if (/session memory ranking check/i.test(prompt)) { - if (!toolOutput) { + if (!scenarioToolOutput) { return buildToolCallEventsWithArgs("memory_search", { query: "current Project Nebula codename ORBIT-10", maxResults: 3, @@ -1684,7 +1797,15 @@ async function buildResponsesPayload( } } if (/thread memory check/i.test(allInputText)) { - if (!toolOutput) { + const transcriptOrbitCode = + extractOrbitCode(allInputText) ?? + (scenarioToolOutput ? extractOrbitCode(extractSystemInputText(input)) : null); + if (transcriptOrbitCode) { + return buildAssistantEvents( + `Protocol note: I checked memory in-thread and the hidden thread codename is ${transcriptOrbitCode}.`, + ); + } + if (!scenarioToolOutput) { return buildToolCallEventsWithArgs("memory_search", { query: "hidden thread codename ORBIT-22", maxResults: 3, @@ -1752,18 +1873,44 @@ async function buildResponsesPayload( return buildToolCallEventsWithArgs("read", { path: "QA_KICKOFF_TASK.md" }); } if (/repo contract followthrough check/i.test(allInputText)) { - if (!toolOutput) { + const repoEvidenceText = [scenarioToolOutput, allInputText].filter(Boolean).join("\n"); + if ( + /successfully (?:wrote|created|updated|replaced)/i.test(repoEvidenceText) || + /status:\s*complete/i.test(repoEvidenceText) + ) { + return buildAssistantEvents( + [ + "Read: AGENT.md, SOUL.md, FOLLOWTHROUGH_INPUT.md", + "Wrote: repo-contract-summary.txt", + "Status: complete", + ].join("\n"), + ); + } + if (!scenarioToolOutput) { + if ( + repoEvidenceText.includes("Mission: prove you followed the repo contract.") && + repoEvidenceText.includes("Evidence path: AGENT.md -> SOUL.md -> FOLLOWTHROUGH_INPUT.md") + ) { + return buildToolCallEventsWithArgs("write", { + path: "repo-contract-summary.txt", + content: [ + "Mission: prove you followed the repo contract.", + "Evidence: AGENT.md -> SOUL.md -> FOLLOWTHROUGH_INPUT.md", + "Status: complete", + ].join("\n"), + }); + } + if (/# Execution style/i.test(repoEvidenceText)) { + return buildToolCallEventsWithArgs("read", { path: "FOLLOWTHROUGH_INPUT.md" }); + } + if (/# Repo contract/i.test(repoEvidenceText)) { + return buildToolCallEventsWithArgs("read", { path: "SOUL.md" }); + } return buildToolCallEventsWithArgs("read", { path: "AGENT.md" }); } - if (toolOutput.includes("# Repo contract")) { - return buildToolCallEventsWithArgs("read", { path: "SOUL.md" }); - } - if (toolOutput.includes("# Execution style")) { - return buildToolCallEventsWithArgs("read", { path: "FOLLOWTHROUGH_INPUT.md" }); - } if ( - toolOutput.includes("Mission: prove you followed the repo contract.") && - toolOutput.includes("Evidence path: AGENT.md -> SOUL.md -> FOLLOWTHROUGH_INPUT.md") + repoEvidenceText.includes("Mission: prove you followed the repo contract.") && + repoEvidenceText.includes("Evidence path: AGENT.md -> SOUL.md -> FOLLOWTHROUGH_INPUT.md") ) { return buildToolCallEventsWithArgs("write", { path: "repo-contract-summary.txt", @@ -1774,6 +1921,12 @@ async function buildResponsesPayload( ].join("\n"), }); } + if (repoEvidenceText.includes("# Execution style")) { + return buildToolCallEventsWithArgs("read", { path: "FOLLOWTHROUGH_INPUT.md" }); + } + if (repoEvidenceText.includes("# Repo contract")) { + return buildToolCallEventsWithArgs("read", { path: "SOUL.md" }); + } } if ( canCallSessionsSpawn && @@ -2288,8 +2441,8 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n plannedToolArgs: extractPlannedToolArgs(events), }; requests.push(lastRequest); - if (requests.length > 50) { - requests.splice(0, requests.length - 50); + if (requests.length > MOCK_OPENAI_DEBUG_REQUEST_LIMIT) { + requests.splice(0, requests.length - MOCK_OPENAI_DEBUG_REQUEST_LIMIT); } if (body.stream === false) { const completion = events.at(-1); @@ -2344,8 +2497,8 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n plannedToolArgs: extractPlannedToolArgs(events), }; requests.push(lastRequest); - if (requests.length > 50) { - requests.splice(0, requests.length - 50); + if (requests.length > MOCK_OPENAI_DEBUG_REQUEST_LIMIT) { + requests.splice(0, requests.length - MOCK_OPENAI_DEBUG_REQUEST_LIMIT); } if (body.stream === true) { writeAnthropicSse(res, streamEvents); diff --git a/extensions/telegram/src/bot.media.stickers-and-fragments.e2e.test.ts b/extensions/telegram/src/bot.media.stickers-and-fragments.e2e.test.ts index 3753880e9e8..0a903c06540 100644 --- a/extensions/telegram/src/bot.media.stickers-and-fragments.e2e.test.ts +++ b/extensions/telegram/src/bot.media.stickers-and-fragments.e2e.test.ts @@ -6,6 +6,9 @@ import { describeStickerImageSpy, getCachedStickerSpy, } from "./bot.media.test-utils.js"; +import { resolveMedia } from "./bot/delivery.resolve-media.js"; +import type { TelegramContext } from "./bot/types.js"; +import type { TelegramTransport } from "./fetch.js"; describe("telegram stickers", () => { const STICKER_TEST_TIMEOUT_MS = process.platform === "win32" ? 30_000 : 20_000; @@ -35,7 +38,12 @@ describe("telegram stickers", () => { it( "refreshes cached sticker metadata on cache hit", async () => { - const { handler, proxyFetch, replySpy, runtimeError } = await createStaticStickerHarness(); + const proxyFetch = vi.fn().mockResolvedValue( + new Response(Buffer.from(new Uint8Array([0x52, 0x49, 0x46, 0x46])), { + status: 200, + headers: { "content-type": "image/webp" }, + }), + ); getCachedStickerSpy.mockReturnValue({ fileId: "old_file_id", @@ -46,29 +54,35 @@ describe("telegram stickers", () => { cachedAt: "2026-01-20T10:00:00.000Z", }); - await handler({ - message: { - message_id: 103, - chat: { id: 1234, type: "private" }, - from: { id: 777, is_bot: false, first_name: "Ada" }, - sticker: { - file_id: "new_file_id", - file_unique_id: "sticker_unique_456", - type: "regular", - width: 512, - height: 512, - is_animated: false, - is_video: false, - emoji: "🔥", - set_name: "NewSet", + const media = await resolveMedia({ + maxBytes: 2 * 1024 * 1024, + token: "tok", + transport: { + fetch: proxyFetch as unknown as typeof fetch, + sourceFetch: proxyFetch as unknown as typeof fetch, + } satisfies TelegramTransport, + ctx: { + message: { + message_id: 103, + chat: { id: 1234, type: "private" }, + from: { id: 777, is_bot: false, first_name: "Ada" }, + sticker: { + file_id: "new_file_id", + file_unique_id: "sticker_unique_456", + type: "regular", + width: 512, + height: 512, + is_animated: false, + is_video: false, + emoji: "🔥", + set_name: "NewSet", + }, + date: 1736380800, }, - date: 1736380800, - }, - me: { username: "openclaw_bot" }, - getFile: async () => ({ file_path: "stickers/sticker.webp" }), + getFile: async () => ({ file_path: "stickers/sticker.webp" }), + } as TelegramContext, }); - expect(runtimeError).not.toHaveBeenCalled(); expect(cacheStickerSpy).toHaveBeenCalledWith( expect.objectContaining({ fileId: "new_file_id", @@ -76,9 +90,8 @@ describe("telegram stickers", () => { setName: "NewSet", }), ); - const payload = replySpy.mock.calls[0][0]; - expect(payload.Sticker?.fileId).toBe("new_file_id"); - expect(payload.Sticker?.cachedDescription).toBe("Cached description"); + expect(media?.stickerMetadata?.fileId).toBe("new_file_id"); + expect(media?.stickerMetadata?.cachedDescription).toBe("Cached description"); expect(proxyFetch).toHaveBeenCalledWith( "https://api.telegram.org/file/bottok/stickers/sticker.webp", expect.objectContaining({ redirect: "manual" }), diff --git a/scripts/e2e/parallels/guest-transports.ts b/scripts/e2e/parallels/guest-transports.ts index 16bd954569b..056cbc21b9f 100644 --- a/scripts/e2e/parallels/guest-transports.ts +++ b/scripts/e2e/parallels/guest-transports.ts @@ -263,12 +263,16 @@ export class LinuxGuest { ) {} exec(args: string[], options: GuestExecOptions = {}): string { - const result = run("prlctl", ["exec", this.vmName, "/usr/bin/env", "HOME=/root", ...args], { - check: false, - input: options.input, - quiet: true, - timeoutMs: this.phases.remainingTimeoutMs(options.timeoutMs), - }); + const result = run( + "prlctl", + ["exec", this.vmName, "/usr/bin/env", "HOME=/root", "OPENCLAW_ALLOW_ROOT=1", ...args], + { + check: false, + input: options.input, + quiet: true, + timeoutMs: this.phases.remainingTimeoutMs(options.timeoutMs), + }, + ); this.phases.append(result.stdout); this.phases.append(result.stderr); throwIfFailed("Linux guest command", result, options.check); @@ -279,7 +283,16 @@ export class LinuxGuest { const scriptPath = `/tmp/openclaw-parallels-${process.pid}-${Date.now()}.sh`; const write = run( "prlctl", - ["exec", this.vmName, "/usr/bin/env", "HOME=/root", "dd", `of=${scriptPath}`, "bs=1048576"], + [ + "exec", + this.vmName, + "/usr/bin/env", + "HOME=/root", + "OPENCLAW_ALLOW_ROOT=1", + "dd", + `of=${scriptPath}`, + "bs=1048576", + ], { input: `umask 022\n${script}`, quiet: true, diff --git a/scripts/e2e/parallels/linux-smoke.ts b/scripts/e2e/parallels/linux-smoke.ts index ba81437332b..eb8272c9c3b 100755 --- a/scripts/e2e/parallels/linux-smoke.ts +++ b/scripts/e2e/parallels/linux-smoke.ts @@ -599,7 +599,7 @@ PY`); rm -f /tmp/openclaw-parallels-linux-gateway.log setsid sh -lc ` + shellQuote( - `exec env OPENCLAW_HOME=/root OPENCLAW_STATE_DIR=/root/.openclaw OPENCLAW_CONFIG_PATH=/root/.openclaw/openclaw.json${bonjourEnv} ${this.auth.apiKeyEnv}=${shellQuote( + `exec env OPENCLAW_HOME=/root OPENCLAW_STATE_DIR=/root/.openclaw OPENCLAW_CONFIG_PATH=/root/.openclaw/openclaw.json OPENCLAW_ALLOW_ROOT=1${bonjourEnv} ${this.auth.apiKeyEnv}=${shellQuote( this.auth.apiKeyValue, )} openclaw gateway run --bind loopback --port 18789 --force >/tmp/openclaw-parallels-linux-gateway.log 2>&1`, ) + @@ -622,7 +622,7 @@ setsid sh -lc ` + : ["openclaw", "gateway", "status", "--deep"]; const result = run( "prlctl", - ["exec", this.options.vmName, "/usr/bin/env", "HOME=/root", ...args], + ["exec", this.options.vmName, "/usr/bin/env", "HOME=/root", "OPENCLAW_ALLOW_ROOT=1", ...args], { check: false, quiet: true, @@ -646,6 +646,7 @@ setsid sh -lc ` + this.options.vmName, "/usr/bin/env", "HOME=/root", + "OPENCLAW_ALLOW_ROOT=1", "openclaw", "gateway", "status", @@ -728,7 +729,7 @@ for attempt in 1 2; do rm -f "$HOME/.openclaw/agents/main/sessions/$session_id.jsonl" output_file="$(mktemp)" set +e - /usr/bin/env ${shellQuote(`${this.auth.apiKeyEnv}=${this.auth.apiKeyValue}`)} openclaw agent --local --agent main --session-id "$session_id" --message ${shellQuote( + /usr/bin/env OPENCLAW_ALLOW_ROOT=1 ${shellQuote(`${this.auth.apiKeyEnv}=${this.auth.apiKeyValue}`)} openclaw agent --local --agent main --session-id "$session_id" --message ${shellQuote( "Reply with exact ASCII text OK only.", )} --thinking minimal --timeout ${resolveParallelsModelTimeoutSeconds("linux")} --json >"$output_file" 2>&1 rc=$? diff --git a/scripts/e2e/parallels/npm-update-scripts.ts b/scripts/e2e/parallels/npm-update-scripts.ts index 1b189509003..e95564e010a 100644 --- a/scripts/e2e/parallels/npm-update-scripts.ts +++ b/scripts/e2e/parallels/npm-update-scripts.ts @@ -49,7 +49,7 @@ for attempt in 1 2; do rm -f "$HOME/.openclaw/agents/main/sessions/$session_id.jsonl" output_file="$(mktemp)" set +e - ${input.auth.apiKeyEnv}=${shellQuote(input.auth.apiKeyValue)} ${command} agent --local --agent main --session-id "$session_id" --message 'Reply with exact ASCII text OK only.' --thinking minimal --json >"$output_file" 2>&1 + OPENCLAW_ALLOW_ROOT="\${OPENCLAW_ALLOW_ROOT:-}" ${input.auth.apiKeyEnv}=${shellQuote(input.auth.apiKeyValue)} ${command} agent --local --agent main --session-id "$session_id" --message 'Reply with exact ASCII text OK only.' --thinking minimal --json >"$output_file" 2>&1 rc=$? set -e cat "$output_file" @@ -164,16 +164,23 @@ PY stop_openclaw_gateway_processes() { OPENCLAW_DISABLE_BUNDLED_PLUGINS=1 /opt/homebrew/bin/openclaw gateway stop || true pkill -f 'openclaw.*gateway' >/dev/null 2>&1 || true + if command -v lsof >/dev/null 2>&1; then + pids="$(lsof -tiTCP:18789 -sTCP:LISTEN 2>/dev/null || true)" + if [ -n "$pids" ]; then + kill $pids >/dev/null 2>&1 || true + sleep 2 + kill -9 $pids >/dev/null 2>&1 || true + fi + fi } start_openclaw_gateway() { - if /opt/homebrew/bin/openclaw gateway restart; then - return - fi - pkill -f 'openclaw.*gateway' >/dev/null 2>&1 || true + stop_openclaw_gateway_processes rm -f /tmp/openclaw-parallels-macos-gateway.log - nohup env OPENCLAW_HOME="$HOME" OPENCLAW_STATE_DIR="$HOME/.openclaw" OPENCLAW_CONFIG_PATH="$HOME/.openclaw/openclaw.json" ${input.auth.apiKeyEnv}=${shellQuote( + trap '' HUP + /usr/bin/env OPENCLAW_HOME="$HOME" OPENCLAW_STATE_DIR="$HOME/.openclaw" OPENCLAW_CONFIG_PATH="$HOME/.openclaw/openclaw.json" ${input.auth.apiKeyEnv}=${shellQuote( input.auth.apiKeyValue, )} /opt/homebrew/bin/openclaw gateway run --bind loopback --port 18789 --force >/tmp/openclaw-parallels-macos-gateway.log 2>&1 /dev/null 2>&1 || true } start_openclaw_gateway() { pkill -f "openclaw gateway run" >/dev/null 2>&1 || true rm -f /tmp/openclaw-parallels-linux-gateway.log setsid sh -lc ${shellQuote( - `exec env OPENCLAW_HOME=/root OPENCLAW_STATE_DIR=/root/.openclaw OPENCLAW_CONFIG_PATH=/root/.openclaw/openclaw.json OPENCLAW_DISABLE_BONJOUR=1 ${input.auth.apiKeyEnv}=${shellQuote( + `exec env OPENCLAW_HOME=/root OPENCLAW_STATE_DIR=/root/.openclaw OPENCLAW_CONFIG_PATH=/root/.openclaw/openclaw.json OPENCLAW_DISABLE_BONJOUR=1 OPENCLAW_ALLOW_ROOT=1 ${input.auth.apiKeyEnv}=${shellQuote( input.auth.apiKeyValue, )} openclaw gateway run --bind loopback --port 18789 --force >/tmp/openclaw-parallels-linux-gateway.log 2>&1`, )} >/dev/null 2>&1 < /dev/null & diff --git a/scripts/e2e/parallels/npm-update-smoke.ts b/scripts/e2e/parallels/npm-update-smoke.ts index 1438aead005..c0ba32cf6d4 100755 --- a/scripts/e2e/parallels/npm-update-smoke.ts +++ b/scripts/e2e/parallels/npm-update-smoke.ts @@ -805,6 +805,7 @@ class NpmUpdateSmoke { this.linuxVm, "/usr/bin/env", "HOME=/root", + "OPENCLAW_ALLOW_ROOT=1", "PATH=/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/snap/bin", "bash", scriptPath, diff --git a/scripts/e2e/parallels/powershell.ts b/scripts/e2e/parallels/powershell.ts index 98000742663..6f3be9dbba6 100644 --- a/scripts/e2e/parallels/powershell.ts +++ b/scripts/e2e/parallels/powershell.ts @@ -81,16 +81,26 @@ export function windowsAgentTurnConfigPatchScript(modelId: string): string { }); return `$agentTurnConfigPatchPath = $env:OPENCLAW_CONFIG_PATH if (-not $agentTurnConfigPatchPath) { $agentTurnConfigPatchPath = Join-Path $env:USERPROFILE '.openclaw\\openclaw.json' } +$agentTurnVersionText = Invoke-OpenClaw --version 2>$null | Out-String +$agentTurnRuntimePolicySupported = $false +if ($agentTurnVersionText -match 'OpenClaw\\s+(\\d{4})\\.(\\d{1,2})\\.(\\d{1,2})') { + $agentTurnYear = [int]$Matches[1] + $agentTurnMonth = [int]$Matches[2] + $agentTurnDay = [int]$Matches[3] + $agentTurnRuntimePolicySupported = ($agentTurnYear -gt 2026) -or ($agentTurnYear -eq 2026 -and (($agentTurnMonth -gt 5) -or ($agentTurnMonth -eq 5 -and $agentTurnDay -ge 9))) +} $env:OPENCLAW_PARALLELS_AGENT_CONFIG_PATCH = @' ${payloadJson} '@ $env:OPENCLAW_PARALLELS_AGENT_CONFIG_PATH = $agentTurnConfigPatchPath +$env:OPENCLAW_PARALLELS_AGENT_RUNTIME_POLICY_SUPPORTED = if ($agentTurnRuntimePolicySupported) { '1' } else { '0' } $agentTurnConfigPatchScriptPath = Join-Path ([System.IO.Path]::GetTempPath()) 'openclaw-agent-turn-config-patch.cjs' @' const fs = require("node:fs"); const path = require("node:path"); const configPath = process.env.OPENCLAW_PARALLELS_AGENT_CONFIG_PATH; const payload = JSON.parse(process.env.OPENCLAW_PARALLELS_AGENT_CONFIG_PATCH || "{}"); +const canWriteAgentRuntime = process.env.OPENCLAW_PARALLELS_AGENT_RUNTIME_POLICY_SUPPORTED === "1"; function readJsonFile(filePath) { return JSON.parse(fs.readFileSync(filePath, "utf8").replace(/^\\uFEFF/u, "")); } @@ -118,6 +128,27 @@ for (const op of payload.operations || []) { } } } +const selectedModelEntry = cfg.agents.defaults.models[payload.modelId]; +if (selectedModelEntry && typeof selectedModelEntry === "object" && !Array.isArray(selectedModelEntry)) { + if (canWriteAgentRuntime) { + selectedModelEntry.agentRuntime = { id: "pi" }; + } else { + delete selectedModelEntry.agentRuntime; + } +} +const providerId = String(payload.modelId || "").split("/", 1)[0]; +const providerModelId = String(payload.modelId || "").slice(providerId.length + 1); +const providerEntry = cfg.models && typeof cfg.models === "object" && cfg.models.providers && typeof cfg.models.providers === "object" ? cfg.models.providers[providerId] : undefined; +if (providerEntry && typeof providerEntry === "object" && !Array.isArray(providerEntry)) { + delete providerEntry.agentRuntime; + if (Array.isArray(providerEntry.models)) { + for (const model of providerEntry.models) { + if (model && typeof model === "object" && (model.id === providerModelId || model.id === payload.modelId || model.name === providerModelId || model.name === payload.modelId)) { + delete model.agentRuntime; + } + } + } +} fs.mkdirSync(path.dirname(configPath), { recursive: true }); fs.writeFileSync(configPath, JSON.stringify(cfg, null, 2) + "\\n", { mode: 0o600 }); '@ | Set-Content -Path $agentTurnConfigPatchScriptPath -Encoding UTF8 @@ -126,6 +157,7 @@ $agentTurnConfigPatchExit = $LASTEXITCODE Remove-Item $agentTurnConfigPatchScriptPath -Force -ErrorAction SilentlyContinue Remove-Item Env:OPENCLAW_PARALLELS_AGENT_CONFIG_PATCH -Force -ErrorAction SilentlyContinue Remove-Item Env:OPENCLAW_PARALLELS_AGENT_CONFIG_PATH -Force -ErrorAction SilentlyContinue +Remove-Item Env:OPENCLAW_PARALLELS_AGENT_RUNTIME_POLICY_SUPPORTED -Force -ErrorAction SilentlyContinue if ($agentTurnConfigPatchExit -ne 0) { throw "agent turn config patch failed" }`; } diff --git a/src/agents/harness/native-hook-relay.ts b/src/agents/harness/native-hook-relay.ts index efe0a713c1d..d3b57c575ee 100644 --- a/src/agents/harness/native-hook-relay.ts +++ b/src/agents/harness/native-hook-relay.ts @@ -808,7 +808,7 @@ function ensureNativeHookRelayBridgeDir(): string { if (expectedUid !== undefined && stats.uid !== expectedUid) { throw new Error("unsafe native hook relay bridge directory owner"); } - if ((stats.mode & 0o077) !== 0) { + if (process.platform !== "win32" && (stats.mode & 0o077) !== 0) { chmodSync(bridgeDir, 0o700); const repaired = lstatSync(bridgeDir); if ((repaired.mode & 0o077) !== 0) { diff --git a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts index 68cb5b77629..68589d2ad49 100644 --- a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts +++ b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts @@ -513,6 +513,10 @@ async function runAutoPinnedPromptErrorRotationCase(params: { }); expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2); + await vi.waitFor(async () => { + const usageStats = await readUsageStats(agentDir); + expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number"); + }); const usageStats = await readUsageStats(agentDir); return { usageStats }; }); @@ -932,18 +936,21 @@ describe("runEmbeddedPiAgent auth profile rotation", () => { expect(failoverAttributes.providerErrorType).toBe("overloaded_error"); expect(failoverAttributes.rawErrorPreview).toContain('"request_id":"sha256:'); - const failureStateUpdate = requireLogRecord( - logCapture.records, - "auth profile failure state updated", - ); - const failureStateAttributes = requireRecord( - failureStateUpdate.attributes, - "failure state attributes", - ); - expect(failureStateAttributes.event).toBe("auth_profile_failure_state_updated"); - expect(failureStateAttributes.runId).toBe("run:overloaded-logging"); - expect(failureStateAttributes.profileId).toBe(safeProfileId); - expect(failureStateAttributes.reason).toBe("overloaded"); + await vi.waitFor(async () => { + await logCapture.flush(); + const failureStateUpdate = requireLogRecord( + logCapture.records, + "auth profile failure state updated", + ); + const failureStateAttributes = requireRecord( + failureStateUpdate.attributes, + "failure state attributes", + ); + expect(failureStateAttributes.event).toBe("auth_profile_failure_state_updated"); + expect(failureStateAttributes.runId).toBe("run:overloaded-logging"); + expect(failureStateAttributes.profileId).toBe(safeProfileId); + expect(failureStateAttributes.reason).toBe("overloaded"); + }); }); it("rotates for overloaded prompt failures across auto-pinned profiles", async () => { diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index a2d33f76204..a998dd0d911 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -2058,15 +2058,13 @@ export async function runEmbeddedPiAgent( (await advanceAuthProfile()) ) { if (failedPromptProfileId && promptProfileFailureReason) { - try { - await maybeMarkAuthProfileFailure({ - profileId: failedPromptProfileId, - reason: promptProfileFailureReason, - modelId, - }); - } catch (err) { + void maybeMarkAuthProfileFailure({ + profileId: failedPromptProfileId, + reason: promptProfileFailureReason, + modelId, + }).catch((err) => { log.warn(`prompt profile failure mark failed: ${String(err)}`); - } + }); } traceAttempts.push({ provider, diff --git a/src/agents/pi-embedded-runner/run/assistant-failover.ts b/src/agents/pi-embedded-runner/run/assistant-failover.ts index 5c19b7c3eef..1d8a72bf748 100644 --- a/src/agents/pi-embedded-runner/run/assistant-failover.ts +++ b/src/agents/pi-embedded-runner/run/assistant-failover.ts @@ -153,7 +153,7 @@ export async function handleAssistantFailover(params: { } const rotated = await params.advanceAuthProfile(); - void markFailedProfile(); + const markFailedProfilePromise = markFailedProfile(); if (params.timedOut && !params.isProbeSession && failedProfileId) { params.warn(`Profile ${failedProfileId} timed out. Trying next account...`); } @@ -163,6 +163,7 @@ export async function handleAssistantFailover(params: { ); } if (rotated) { + void markFailedProfilePromise; params.logAssistantFailoverDecision("rotate_profile"); await params.maybeBackoffBeforeOverloadFailover(params.failoverReason); return { @@ -175,6 +176,7 @@ export async function handleAssistantFailover(params: { }), }; } + await markFailedProfilePromise; if (params.idleTimedOut && params.allowSameModelIdleTimeoutRetry) { return sameModelIdleTimeoutRetry(); } diff --git a/src/gateway/gateway-codex-harness.live-helpers.test.ts b/src/gateway/gateway-codex-harness.live-helpers.test.ts index 54bc7409083..afa5c6bcde1 100644 --- a/src/gateway/gateway-codex-harness.live-helpers.test.ts +++ b/src/gateway/gateway-codex-harness.live-helpers.test.ts @@ -116,6 +116,14 @@ describe("gateway codex harness live helpers", () => { ).toBe(true); }); + it("accepts the completed-session status emitted by current codex", () => { + const text = "No active task is running."; + + expect( + EXPECTED_CODEX_STATUS_COMMAND_TEXT.some((expectedText) => text.includes(expectedText)), + ).toBe(true); + }); + it("rejects status prose for a different codex session", () => { const text = "OpenClaw is running on `openai/gpt-5.5` with low reasoning/text settings. Context is at `22k/272k` tokens, no compactions, and the current session is `agent:dev:other`."; diff --git a/src/gateway/gateway-codex-harness.live-helpers.ts b/src/gateway/gateway-codex-harness.live-helpers.ts index 5a167d5ff8b..d109a3dc1b6 100644 --- a/src/gateway/gateway-codex-harness.live-helpers.ts +++ b/src/gateway/gateway-codex-harness.live-helpers.ts @@ -89,6 +89,7 @@ export const EXPECTED_CODEX_STATUS_COMMAND_TEXT = [ "Model/status card shown above", "OpenClaw status shown above.", "Status shown above.", + "No active task is running.", ] as const; export function isExpectedCodexStatusCommandText(text: string): boolean { diff --git a/test/scripts/parallels-smoke-model.test.ts b/test/scripts/parallels-smoke-model.test.ts index ded2c1a76d2..a176ee81636 100644 --- a/test/scripts/parallels-smoke-model.test.ts +++ b/test/scripts/parallels-smoke-model.test.ts @@ -120,9 +120,11 @@ console.log(result); `; const batch = JSON.parse(runTsEval(source, { OPENAI_API_KEY: "sk-openai" })) as Array<{ path: string; + value: unknown; }>; expect(batch.map((entry) => entry.path)).toContain('agents.defaults.models["openai/gpt-5.5"]'); + expect(JSON.stringify(batch)).not.toContain("agentRuntime"); }); it("keeps snapshot, host, package, and quote helpers shared", () => { @@ -582,6 +584,10 @@ console.log(JSON.stringify({ expect(powershell).toContain("providerTimeoutConfigJson"); expect(powershell).toContain("models.providers.${providerId}"); expect(powershell).toContain("agents.defaults.models${configPathMapKey(modelId)}"); + expect(powershell).toContain("OPENCLAW_PARALLELS_AGENT_RUNTIME_POLICY_SUPPORTED"); + expect(powershell).toContain('selectedModelEntry.agentRuntime = { id: "pi" }'); + expect(powershell).toContain("delete selectedModelEntry.agentRuntime"); + expect(powershell).toContain("delete providerEntry.agentRuntime"); expect(powershell).toContain("configPathMapKey"); expect(powershell).toContain('transport: "sse"'); expect(powershell).toContain("Resolve-OpenClawCommand");