fix(release): stabilize full validation gates

This commit is contained in:
Peter Steinberger
2026-05-09 16:18:09 +01:00
parent bcb4c8d597
commit 195e721211
15 changed files with 535 additions and 97 deletions

View File

@@ -694,6 +694,56 @@ describe("qa mock openai server", () => {
expect(payload.output?.[0]?.content?.[0]?.text).toContain("Status: complete");
});
it("uses argument-scoped tool call ids for repeated tool names", async () => {
const server = await startQaMockOpenAiServer({
host: "127.0.0.1",
port: 0,
});
cleanups.push(async () => {
await server.stop();
});
const prompt =
"Repo contract followthrough check. Read AGENT.md, SOUL.md, and FOLLOWTHROUGH_INPUT.md first. Then follow the repo contract exactly, write ./repo-contract-summary.txt, and reply with three labeled lines: Read, Wrote, Status.";
const first = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
stream: false,
model: "gpt-5.5",
input: [{ role: "user", content: [{ type: "input_text", text: prompt }] }],
}),
});
const firstPayload = (await first.json()) as {
output?: Array<{ call_id?: string }>;
};
const second = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
stream: false,
model: "gpt-5.5",
input: [
{ role: "user", content: [{ type: "input_text", text: prompt }] },
{
type: "function_call_output",
output:
"# Repo contract\n\nStep order:\n1. Read AGENT.md.\n2. Read SOUL.md.\n3. Read FOLLOWTHROUGH_INPUT.md.\n4. Write ./repo-contract-summary.txt.\n",
},
],
}),
});
const secondPayload = (await second.json()) as {
output?: Array<{ call_id?: string }>;
};
expect(firstPayload.output?.[0]?.call_id).toMatch(/^call_mock_read_/);
expect(secondPayload.output?.[0]?.call_id).toMatch(/^call_mock_read_/);
expect(firstPayload.output?.[0]?.call_id).not.toBe(secondPayload.output?.[0]?.call_id);
});
it("continues repo-contract followthrough when a retry user item follows tool output", async () => {
const server = await startQaMockOpenAiServer({
host: "127.0.0.1",
@@ -731,6 +781,91 @@ describe("qa mock openai server", () => {
expect(await response.text()).toContain('"arguments":"{\\"path\\":\\"SOUL.md\\"}"');
});
it("continues repo-contract followthrough from structured tool output", async () => {
const server = await startQaMockOpenAiServer({
host: "127.0.0.1",
port: 0,
});
cleanups.push(async () => {
await server.stop();
});
const prompt =
"Repo contract followthrough check. Read AGENT.md, SOUL.md, and FOLLOWTHROUGH_INPUT.md first. Then follow the repo contract exactly, write ./repo-contract-summary.txt, and reply with three labeled lines: Read, Wrote, Status.";
const response = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
stream: true,
model: "gpt-5.5",
input: [
{ role: "user", content: [{ type: "input_text", text: prompt }] },
{
type: "function_call_output",
output: [
{
type: "output_text",
text: "# Repo contract\n\nStep order:\n1. Read AGENT.md.\n2. Read SOUL.md.\n3. Read FOLLOWTHROUGH_INPUT.md.\n4. Write ./repo-contract-summary.txt.\n",
},
],
},
{
role: "user",
content: [{ type: "input_text", text: "Continue after compaction." }],
},
],
}),
});
expect(response.status).toBe(200);
expect(await response.text()).toContain('"arguments":"{\\"path\\":\\"SOUL.md\\"}"');
});
it("advances repo-contract followthrough when transcript text is newer than extracted tool output", async () => {
const server = await startQaMockOpenAiServer({
host: "127.0.0.1",
port: 0,
});
cleanups.push(async () => {
await server.stop();
});
const prompt =
"Repo contract followthrough check. Read AGENT.md, SOUL.md, and FOLLOWTHROUGH_INPUT.md first. Then follow the repo contract exactly, write ./repo-contract-summary.txt, and reply with three labeled lines: Read, Wrote, Status.";
const response = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
stream: true,
model: "gpt-5.5",
input: [
{ role: "user", content: [{ type: "input_text", text: prompt }] },
{
type: "function_call_output",
output:
"# Repo contract\n\nStep order:\n1. Read AGENT.md.\n2. Read SOUL.md.\n3. Read FOLLOWTHROUGH_INPUT.md.\n4. Write ./repo-contract-summary.txt.\n",
},
{
role: "user",
content: [
{
type: "input_text",
text: "# Execution style\n\nStay brief, honest, and action-first.\n",
},
],
},
],
}),
});
expect(response.status).toBe(200);
expect(await response.text()).toContain(
'"arguments":"{\\"path\\":\\"FOLLOWTHROUGH_INPUT.md\\"}"',
);
});
it("drives the compaction retry mutating tool parity flow", async () => {
const server = await startQaMockOpenAiServer({
host: "127.0.0.1",
@@ -1259,6 +1394,66 @@ describe("qa mock openai server", () => {
expect(threadMemorySummary.status).toBe(200);
expect(JSON.stringify(await threadMemorySummary.json())).toContain("ORBIT-22");
const structuredThreadMemorySummary = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: {
"content-type": "application/json",
},
body: JSON.stringify({
stream: false,
instructions:
"@openclaw Thread memory check: what is the hidden thread codename stored only in memory? Use memory tools first and reply only in this thread.",
input: [
{
type: "function_call_output",
output: {
text: "Thread-hidden codename: ORBIT-22.",
},
},
{
role: "user",
content: [
{
type: "input_text",
text: "Protocol note: acknowledged. Continue with the QA scenario plan.",
},
],
},
],
}),
});
expect(structuredThreadMemorySummary.status).toBe(200);
expect(JSON.stringify(await structuredThreadMemorySummary.json())).toContain("ORBIT-22");
const systemFallbackThreadMemorySummary = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: {
"content-type": "application/json",
},
body: JSON.stringify({
stream: false,
input: [
{
role: "system",
content: "## /workspace/MEMORY.md\nThread-hidden codename: ORBIT-22.",
},
makeUserInput(
"@openclaw Thread memory check: what is the hidden thread codename stored only in memory? Use memory tools first and reply only in this thread.",
),
{
type: "function_call_output",
output: JSON.stringify({
results: [],
unavailable: true,
error: "database is not open",
}),
},
],
}),
});
expect(systemFallbackThreadMemorySummary.status).toBe(200);
expect(JSON.stringify(await systemFallbackThreadMemorySummary.json())).toContain("ORBIT-22");
const memoryFollowup = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: {

View File

@@ -1,3 +1,4 @@
import { createHash } from "node:crypto";
import { createServer, type IncomingMessage, type ServerResponse } from "node:http";
import { setTimeout as sleep } from "node:timers/promises";
import { escapeRegExp } from "openclaw/plugin-sdk/text-runtime";
@@ -177,6 +178,7 @@ type MockScenarioState = {
const MOCK_OPENAI_MAX_BODY_BYTES = 16 * 1024 * 1024;
const MOCK_OPENAI_BODY_TIMEOUT_MS = 30_000;
const MOCK_OPENAI_DEBUG_REQUEST_LIMIT = 200;
function readBody(req: IncomingMessage): Promise<string> {
return readRequestBodyWithLimit(req, {
@@ -292,17 +294,74 @@ function isToolOutputContinuationText(text: string) {
);
}
function stringifyFunctionCallOutput(output: unknown): string {
if (typeof output === "string") {
return output;
}
if (Array.isArray(output)) {
return output
.map((entry) => {
if (typeof entry === "string") {
return entry;
}
if (!entry || typeof entry !== "object") {
return "";
}
const record = entry as Record<string, unknown>;
if (typeof record.text === "string") {
return record.text;
}
if (typeof record.output_text === "string") {
return record.output_text;
}
if (typeof record.content === "string") {
return record.content;
}
return "";
})
.filter(Boolean)
.join("\n");
}
if (output && typeof output === "object") {
const record = output as Record<string, unknown>;
if (typeof record.text === "string") {
return record.text;
}
if (typeof record.output_text === "string") {
return record.output_text;
}
if (typeof record.content === "string") {
return record.content;
}
try {
return JSON.stringify(output);
} catch {
return "";
}
}
return "";
}
function extractFunctionCallOutputText(item: ResponsesInputItem) {
if (item.type !== "function_call_output") {
return "";
}
return stringifyFunctionCallOutput(item.output);
}
function extractToolOutput(input: ResponsesInputItem[]) {
const lastUserIndex = findLastUserIndex(input);
for (let index = input.length - 1; index > lastUserIndex; index -= 1) {
const item = input[index];
if (item.type === "function_call_output" && typeof item.output === "string" && item.output) {
return item.output;
const output = extractFunctionCallOutputText(item);
if (output) {
return output;
}
}
for (let index = input.length - 1; index >= 0; index -= 1) {
const item = input[index];
if (item.type === "function_call_output" && typeof item.output === "string" && item.output) {
const output = extractFunctionCallOutputText(item);
if (output) {
const laterUserTexts = input
.slice(index + 1)
.filter((laterItem) => laterItem.role === "user" && Array.isArray(laterItem.content))
@@ -312,7 +371,7 @@ function extractToolOutput(input: ResponsesInputItem[]) {
laterUserTexts.length > 0 &&
laterUserTexts.every((text) => isToolOutputContinuationText(text))
) {
return item.output;
return output;
}
continue;
}
@@ -320,6 +379,17 @@ function extractToolOutput(input: ResponsesInputItem[]) {
return "";
}
function extractLatestToolOutput(input: ResponsesInputItem[]) {
for (let index = input.length - 1; index >= 0; index -= 1) {
const item = input[index];
const output = extractFunctionCallOutputText(item);
if (output) {
return output;
}
}
return "";
}
function extractInputText(content: unknown[]): string {
return content
.filter(
@@ -348,6 +418,27 @@ function extractAllUserTexts(input: ResponsesInputItem[]) {
return texts;
}
function extractSystemInputText(input: ResponsesInputItem[]) {
const texts: string[] = [];
for (const item of input) {
if (item.role !== "system") {
continue;
}
if (typeof item.content === "string" && item.content.trim()) {
texts.push(item.content.trim());
continue;
}
if (!Array.isArray(item.content)) {
continue;
}
const text = extractInputText(item.content);
if (text) {
texts.push(text);
}
}
return texts.join("\n");
}
function extractAllInputTexts(input: ResponsesInputItem[]) {
const texts: string[] = [];
for (const item of input) {
@@ -469,14 +560,21 @@ function readTargetFromPrompt(prompt: string) {
}
function buildToolCallEventsWithArgs(name: string, args: Record<string, unknown>): StreamEvent[] {
const callId = `call_mock_${name}_1`;
const serialized = JSON.stringify(args);
const callSuffix = createHash("sha1")
.update(name)
.update("\0")
.update(serialized)
.digest("hex")
.slice(0, 10);
const callId = `call_mock_${name}_${callSuffix}`;
const itemId = `fc_mock_${name}_${callSuffix}`;
return [
{
type: "response.output_item.added",
item: {
type: "function_call",
id: `fc_mock_${name}_1`,
id: itemId,
call_id: callId,
name,
arguments: "",
@@ -487,7 +585,7 @@ function buildToolCallEventsWithArgs(name: string, args: Record<string, unknown>
type: "response.output_item.done",
item: {
type: "function_call",
id: `fc_mock_${name}_1`,
id: itemId,
call_id: callId,
name,
arguments: serialized,
@@ -496,12 +594,12 @@ function buildToolCallEventsWithArgs(name: string, args: Record<string, unknown>
{
type: "response.completed",
response: {
id: `resp_mock_${name}_1`,
id: `resp_mock_${name}_${callSuffix}`,
status: "completed",
output: [
{
type: "function_call",
id: `fc_mock_${name}_1`,
id: itemId,
call_id: callId,
name,
arguments: serialized,
@@ -745,7 +843,14 @@ function buildAssistantText(
) {
const prompt = extractLastUserText(input);
const toolOutput = extractToolOutput(input);
const toolJson = parseToolOutputJson(toolOutput);
const scenarioToolOutput =
toolOutput ||
(/thread memory check|session memory ranking check|memory tools check|repo contract followthrough check/i.test(
extractAllRequestTexts(input, body),
)
? extractLatestToolOutput(input)
: "");
const toolJson = parseToolOutputJson(scenarioToolOutput);
const userTexts = extractAllUserTexts(input);
const allInputText = extractAllRequestTexts(input, body);
const rememberedFact = extractRememberedFact(userTexts);
@@ -755,8 +860,8 @@ function buildAssistantText(
? toolJson.text
: Array.isArray(toolJson?.results)
? JSON.stringify(toolJson.results)
: toolOutput;
const orbitCode = extractOrbitCode(memorySnippet);
: scenarioToolOutput;
const orbitCode = extractOrbitCode(memorySnippet) ?? extractOrbitCode(allInputText);
const mediaPath = /MEDIA:([^\n]+)/.exec(toolOutput)?.[1]?.trim();
const exactReplyDirective =
extractExactReplyDirective(prompt) ?? extractExactReplyDirective(allInputText);
@@ -816,10 +921,11 @@ function buildAssistantText(
if (/tool continuity check/i.test(prompt) && toolOutput) {
return `Protocol note: model switch handoff confirmed on ${model || "the requested model"}. QA mission from QA_KICKOFF_TASK.md still applies: understand this OpenClaw repo from source + docs before acting.`;
}
if (toolOutput && /repo contract followthrough check/i.test(allInputText)) {
if ((toolOutput || allInputText) && /repo contract followthrough check/i.test(allInputText)) {
const repoEvidenceText = [scenarioToolOutput, allInputText].filter(Boolean).join("\n");
if (
/successfully (?:wrote|created|updated|replaced)/i.test(toolOutput) ||
/status:\s*complete/i.test(toolOutput)
/successfully (?:wrote|created|updated|replaced)/i.test(repoEvidenceText) ||
/status:\s*complete/i.test(repoEvidenceText)
) {
return [
"Read: AGENT.md, SOUL.md, FOLLOWTHROUGH_INPUT.md",
@@ -1250,8 +1356,15 @@ async function buildResponsesPayload(
const input = Array.isArray(body.input) ? (body.input as ResponsesInputItem[]) : [];
const prompt = extractLastUserText(input);
const toolOutput = extractToolOutput(input);
const toolJson = parseToolOutputJson(toolOutput);
const allInputText = extractAllRequestTexts(input, body);
const scenarioToolOutput =
toolOutput ||
(/thread memory check|session memory ranking check|memory tools check|repo contract followthrough check/i.test(
allInputText,
)
? extractLatestToolOutput(input)
: "");
const toolJson = parseToolOutputJson(scenarioToolOutput);
const exactReplyDirective =
extractExactReplyDirective(prompt) ?? extractExactReplyDirective(allInputText);
const exactMarkerDirective =
@@ -1311,7 +1424,7 @@ async function buildResponsesPayload(
return buildAssistantEvents("BETA-OK");
}
if (QA_REASONING_ONLY_RECOVERY_PROMPT_RE.test(allInputText)) {
if (!toolOutput) {
if (!scenarioToolOutput) {
return buildToolCallEventsWithArgs("read", { path: "QA_KICKOFF_TASK.md" });
}
if (!hasReasoningOnlyRetryInstruction) {
@@ -1323,7 +1436,7 @@ async function buildResponsesPayload(
return buildAssistantEvents("REASONING-RECOVERED-OK");
}
if (QA_REASONING_ONLY_SIDE_EFFECT_PROMPT_RE.test(allInputText)) {
if (!toolOutput) {
if (!scenarioToolOutput) {
return buildToolCallEventsWithArgs("write", {
path: "reasoning-only-side-effect.txt",
content: "side effects already happened\n",
@@ -1649,7 +1762,7 @@ async function buildResponsesPayload(
return buildAssistantEvents("NONE");
}
if (/session memory ranking check/i.test(prompt)) {
if (!toolOutput) {
if (!scenarioToolOutput) {
return buildToolCallEventsWithArgs("memory_search", {
query: "current Project Nebula codename ORBIT-10",
maxResults: 3,
@@ -1684,7 +1797,15 @@ async function buildResponsesPayload(
}
}
if (/thread memory check/i.test(allInputText)) {
if (!toolOutput) {
const transcriptOrbitCode =
extractOrbitCode(allInputText) ??
(scenarioToolOutput ? extractOrbitCode(extractSystemInputText(input)) : null);
if (transcriptOrbitCode) {
return buildAssistantEvents(
`Protocol note: I checked memory in-thread and the hidden thread codename is ${transcriptOrbitCode}.`,
);
}
if (!scenarioToolOutput) {
return buildToolCallEventsWithArgs("memory_search", {
query: "hidden thread codename ORBIT-22",
maxResults: 3,
@@ -1752,18 +1873,44 @@ async function buildResponsesPayload(
return buildToolCallEventsWithArgs("read", { path: "QA_KICKOFF_TASK.md" });
}
if (/repo contract followthrough check/i.test(allInputText)) {
if (!toolOutput) {
const repoEvidenceText = [scenarioToolOutput, allInputText].filter(Boolean).join("\n");
if (
/successfully (?:wrote|created|updated|replaced)/i.test(repoEvidenceText) ||
/status:\s*complete/i.test(repoEvidenceText)
) {
return buildAssistantEvents(
[
"Read: AGENT.md, SOUL.md, FOLLOWTHROUGH_INPUT.md",
"Wrote: repo-contract-summary.txt",
"Status: complete",
].join("\n"),
);
}
if (!scenarioToolOutput) {
if (
repoEvidenceText.includes("Mission: prove you followed the repo contract.") &&
repoEvidenceText.includes("Evidence path: AGENT.md -> SOUL.md -> FOLLOWTHROUGH_INPUT.md")
) {
return buildToolCallEventsWithArgs("write", {
path: "repo-contract-summary.txt",
content: [
"Mission: prove you followed the repo contract.",
"Evidence: AGENT.md -> SOUL.md -> FOLLOWTHROUGH_INPUT.md",
"Status: complete",
].join("\n"),
});
}
if (/# Execution style/i.test(repoEvidenceText)) {
return buildToolCallEventsWithArgs("read", { path: "FOLLOWTHROUGH_INPUT.md" });
}
if (/# Repo contract/i.test(repoEvidenceText)) {
return buildToolCallEventsWithArgs("read", { path: "SOUL.md" });
}
return buildToolCallEventsWithArgs("read", { path: "AGENT.md" });
}
if (toolOutput.includes("# Repo contract")) {
return buildToolCallEventsWithArgs("read", { path: "SOUL.md" });
}
if (toolOutput.includes("# Execution style")) {
return buildToolCallEventsWithArgs("read", { path: "FOLLOWTHROUGH_INPUT.md" });
}
if (
toolOutput.includes("Mission: prove you followed the repo contract.") &&
toolOutput.includes("Evidence path: AGENT.md -> SOUL.md -> FOLLOWTHROUGH_INPUT.md")
repoEvidenceText.includes("Mission: prove you followed the repo contract.") &&
repoEvidenceText.includes("Evidence path: AGENT.md -> SOUL.md -> FOLLOWTHROUGH_INPUT.md")
) {
return buildToolCallEventsWithArgs("write", {
path: "repo-contract-summary.txt",
@@ -1774,6 +1921,12 @@ async function buildResponsesPayload(
].join("\n"),
});
}
if (repoEvidenceText.includes("# Execution style")) {
return buildToolCallEventsWithArgs("read", { path: "FOLLOWTHROUGH_INPUT.md" });
}
if (repoEvidenceText.includes("# Repo contract")) {
return buildToolCallEventsWithArgs("read", { path: "SOUL.md" });
}
}
if (
canCallSessionsSpawn &&
@@ -2288,8 +2441,8 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n
plannedToolArgs: extractPlannedToolArgs(events),
};
requests.push(lastRequest);
if (requests.length > 50) {
requests.splice(0, requests.length - 50);
if (requests.length > MOCK_OPENAI_DEBUG_REQUEST_LIMIT) {
requests.splice(0, requests.length - MOCK_OPENAI_DEBUG_REQUEST_LIMIT);
}
if (body.stream === false) {
const completion = events.at(-1);
@@ -2344,8 +2497,8 @@ export async function startQaMockOpenAiServer(params?: { host?: string; port?: n
plannedToolArgs: extractPlannedToolArgs(events),
};
requests.push(lastRequest);
if (requests.length > 50) {
requests.splice(0, requests.length - 50);
if (requests.length > MOCK_OPENAI_DEBUG_REQUEST_LIMIT) {
requests.splice(0, requests.length - MOCK_OPENAI_DEBUG_REQUEST_LIMIT);
}
if (body.stream === true) {
writeAnthropicSse(res, streamEvents);

View File

@@ -6,6 +6,9 @@ import {
describeStickerImageSpy,
getCachedStickerSpy,
} from "./bot.media.test-utils.js";
import { resolveMedia } from "./bot/delivery.resolve-media.js";
import type { TelegramContext } from "./bot/types.js";
import type { TelegramTransport } from "./fetch.js";
describe("telegram stickers", () => {
const STICKER_TEST_TIMEOUT_MS = process.platform === "win32" ? 30_000 : 20_000;
@@ -35,7 +38,12 @@ describe("telegram stickers", () => {
it(
"refreshes cached sticker metadata on cache hit",
async () => {
const { handler, proxyFetch, replySpy, runtimeError } = await createStaticStickerHarness();
const proxyFetch = vi.fn().mockResolvedValue(
new Response(Buffer.from(new Uint8Array([0x52, 0x49, 0x46, 0x46])), {
status: 200,
headers: { "content-type": "image/webp" },
}),
);
getCachedStickerSpy.mockReturnValue({
fileId: "old_file_id",
@@ -46,29 +54,35 @@ describe("telegram stickers", () => {
cachedAt: "2026-01-20T10:00:00.000Z",
});
await handler({
message: {
message_id: 103,
chat: { id: 1234, type: "private" },
from: { id: 777, is_bot: false, first_name: "Ada" },
sticker: {
file_id: "new_file_id",
file_unique_id: "sticker_unique_456",
type: "regular",
width: 512,
height: 512,
is_animated: false,
is_video: false,
emoji: "🔥",
set_name: "NewSet",
const media = await resolveMedia({
maxBytes: 2 * 1024 * 1024,
token: "tok",
transport: {
fetch: proxyFetch as unknown as typeof fetch,
sourceFetch: proxyFetch as unknown as typeof fetch,
} satisfies TelegramTransport,
ctx: {
message: {
message_id: 103,
chat: { id: 1234, type: "private" },
from: { id: 777, is_bot: false, first_name: "Ada" },
sticker: {
file_id: "new_file_id",
file_unique_id: "sticker_unique_456",
type: "regular",
width: 512,
height: 512,
is_animated: false,
is_video: false,
emoji: "🔥",
set_name: "NewSet",
},
date: 1736380800,
},
date: 1736380800,
},
me: { username: "openclaw_bot" },
getFile: async () => ({ file_path: "stickers/sticker.webp" }),
getFile: async () => ({ file_path: "stickers/sticker.webp" }),
} as TelegramContext,
});
expect(runtimeError).not.toHaveBeenCalled();
expect(cacheStickerSpy).toHaveBeenCalledWith(
expect.objectContaining({
fileId: "new_file_id",
@@ -76,9 +90,8 @@ describe("telegram stickers", () => {
setName: "NewSet",
}),
);
const payload = replySpy.mock.calls[0][0];
expect(payload.Sticker?.fileId).toBe("new_file_id");
expect(payload.Sticker?.cachedDescription).toBe("Cached description");
expect(media?.stickerMetadata?.fileId).toBe("new_file_id");
expect(media?.stickerMetadata?.cachedDescription).toBe("Cached description");
expect(proxyFetch).toHaveBeenCalledWith(
"https://api.telegram.org/file/bottok/stickers/sticker.webp",
expect.objectContaining({ redirect: "manual" }),

View File

@@ -263,12 +263,16 @@ export class LinuxGuest {
) {}
exec(args: string[], options: GuestExecOptions = {}): string {
const result = run("prlctl", ["exec", this.vmName, "/usr/bin/env", "HOME=/root", ...args], {
check: false,
input: options.input,
quiet: true,
timeoutMs: this.phases.remainingTimeoutMs(options.timeoutMs),
});
const result = run(
"prlctl",
["exec", this.vmName, "/usr/bin/env", "HOME=/root", "OPENCLAW_ALLOW_ROOT=1", ...args],
{
check: false,
input: options.input,
quiet: true,
timeoutMs: this.phases.remainingTimeoutMs(options.timeoutMs),
},
);
this.phases.append(result.stdout);
this.phases.append(result.stderr);
throwIfFailed("Linux guest command", result, options.check);
@@ -279,7 +283,16 @@ export class LinuxGuest {
const scriptPath = `/tmp/openclaw-parallels-${process.pid}-${Date.now()}.sh`;
const write = run(
"prlctl",
["exec", this.vmName, "/usr/bin/env", "HOME=/root", "dd", `of=${scriptPath}`, "bs=1048576"],
[
"exec",
this.vmName,
"/usr/bin/env",
"HOME=/root",
"OPENCLAW_ALLOW_ROOT=1",
"dd",
`of=${scriptPath}`,
"bs=1048576",
],
{
input: `umask 022\n${script}`,
quiet: true,

View File

@@ -599,7 +599,7 @@ PY`);
rm -f /tmp/openclaw-parallels-linux-gateway.log
setsid sh -lc ` +
shellQuote(
`exec env OPENCLAW_HOME=/root OPENCLAW_STATE_DIR=/root/.openclaw OPENCLAW_CONFIG_PATH=/root/.openclaw/openclaw.json${bonjourEnv} ${this.auth.apiKeyEnv}=${shellQuote(
`exec env OPENCLAW_HOME=/root OPENCLAW_STATE_DIR=/root/.openclaw OPENCLAW_CONFIG_PATH=/root/.openclaw/openclaw.json OPENCLAW_ALLOW_ROOT=1${bonjourEnv} ${this.auth.apiKeyEnv}=${shellQuote(
this.auth.apiKeyValue,
)} openclaw gateway run --bind loopback --port 18789 --force >/tmp/openclaw-parallels-linux-gateway.log 2>&1`,
) +
@@ -622,7 +622,7 @@ setsid sh -lc ` +
: ["openclaw", "gateway", "status", "--deep"];
const result = run(
"prlctl",
["exec", this.options.vmName, "/usr/bin/env", "HOME=/root", ...args],
["exec", this.options.vmName, "/usr/bin/env", "HOME=/root", "OPENCLAW_ALLOW_ROOT=1", ...args],
{
check: false,
quiet: true,
@@ -646,6 +646,7 @@ setsid sh -lc ` +
this.options.vmName,
"/usr/bin/env",
"HOME=/root",
"OPENCLAW_ALLOW_ROOT=1",
"openclaw",
"gateway",
"status",
@@ -728,7 +729,7 @@ for attempt in 1 2; do
rm -f "$HOME/.openclaw/agents/main/sessions/$session_id.jsonl"
output_file="$(mktemp)"
set +e
/usr/bin/env ${shellQuote(`${this.auth.apiKeyEnv}=${this.auth.apiKeyValue}`)} openclaw agent --local --agent main --session-id "$session_id" --message ${shellQuote(
/usr/bin/env OPENCLAW_ALLOW_ROOT=1 ${shellQuote(`${this.auth.apiKeyEnv}=${this.auth.apiKeyValue}`)} openclaw agent --local --agent main --session-id "$session_id" --message ${shellQuote(
"Reply with exact ASCII text OK only.",
)} --thinking minimal --timeout ${resolveParallelsModelTimeoutSeconds("linux")} --json >"$output_file" 2>&1
rc=$?

View File

@@ -49,7 +49,7 @@ for attempt in 1 2; do
rm -f "$HOME/.openclaw/agents/main/sessions/$session_id.jsonl"
output_file="$(mktemp)"
set +e
${input.auth.apiKeyEnv}=${shellQuote(input.auth.apiKeyValue)} ${command} agent --local --agent main --session-id "$session_id" --message 'Reply with exact ASCII text OK only.' --thinking minimal --json >"$output_file" 2>&1
OPENCLAW_ALLOW_ROOT="\${OPENCLAW_ALLOW_ROOT:-}" ${input.auth.apiKeyEnv}=${shellQuote(input.auth.apiKeyValue)} ${command} agent --local --agent main --session-id "$session_id" --message 'Reply with exact ASCII text OK only.' --thinking minimal --json >"$output_file" 2>&1
rc=$?
set -e
cat "$output_file"
@@ -164,16 +164,23 @@ PY
stop_openclaw_gateway_processes() {
OPENCLAW_DISABLE_BUNDLED_PLUGINS=1 /opt/homebrew/bin/openclaw gateway stop || true
pkill -f 'openclaw.*gateway' >/dev/null 2>&1 || true
if command -v lsof >/dev/null 2>&1; then
pids="$(lsof -tiTCP:18789 -sTCP:LISTEN 2>/dev/null || true)"
if [ -n "$pids" ]; then
kill $pids >/dev/null 2>&1 || true
sleep 2
kill -9 $pids >/dev/null 2>&1 || true
fi
fi
}
start_openclaw_gateway() {
if /opt/homebrew/bin/openclaw gateway restart; then
return
fi
pkill -f 'openclaw.*gateway' >/dev/null 2>&1 || true
stop_openclaw_gateway_processes
rm -f /tmp/openclaw-parallels-macos-gateway.log
nohup env OPENCLAW_HOME="$HOME" OPENCLAW_STATE_DIR="$HOME/.openclaw" OPENCLAW_CONFIG_PATH="$HOME/.openclaw/openclaw.json" ${input.auth.apiKeyEnv}=${shellQuote(
trap '' HUP
/usr/bin/env OPENCLAW_HOME="$HOME" OPENCLAW_STATE_DIR="$HOME/.openclaw" OPENCLAW_CONFIG_PATH="$HOME/.openclaw/openclaw.json" ${input.auth.apiKeyEnv}=${shellQuote(
input.auth.apiKeyValue,
)} /opt/homebrew/bin/openclaw gateway run --bind loopback --port 18789 --force >/tmp/openclaw-parallels-macos-gateway.log 2>&1 </dev/null &
sleep 1
}
wait_for_gateway() {
deadline=$((SECONDS + 240))
@@ -251,6 +258,7 @@ ${windowsAssertAgentOkScript(input)}`;
export function linuxUpdateScript(input: NpmUpdateScriptInput): string {
return String.raw`set -euo pipefail
export PATH=/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/snap/bin
export OPENCLAW_ALLOW_ROOT=1
scrub_future_plugin_entries() {
node - <<'JS'
const fs = require("node:fs");
@@ -273,14 +281,14 @@ fs.writeFileSync(configPath, JSON.stringify(config, null, 2) + "\n");
JS
}
stop_openclaw_gateway_processes() {
OPENCLAW_DISABLE_BUNDLED_PLUGINS=1 openclaw gateway stop || true
OPENCLAW_DISABLE_BUNDLED_PLUGINS=1 OPENCLAW_ALLOW_ROOT=1 openclaw gateway stop || true
pkill -f 'openclaw.*gateway' >/dev/null 2>&1 || true
}
start_openclaw_gateway() {
pkill -f "openclaw gateway run" >/dev/null 2>&1 || true
rm -f /tmp/openclaw-parallels-linux-gateway.log
setsid sh -lc ${shellQuote(
`exec env OPENCLAW_HOME=/root OPENCLAW_STATE_DIR=/root/.openclaw OPENCLAW_CONFIG_PATH=/root/.openclaw/openclaw.json OPENCLAW_DISABLE_BONJOUR=1 ${input.auth.apiKeyEnv}=${shellQuote(
`exec env OPENCLAW_HOME=/root OPENCLAW_STATE_DIR=/root/.openclaw OPENCLAW_CONFIG_PATH=/root/.openclaw/openclaw.json OPENCLAW_DISABLE_BONJOUR=1 OPENCLAW_ALLOW_ROOT=1 ${input.auth.apiKeyEnv}=${shellQuote(
input.auth.apiKeyValue,
)} openclaw gateway run --bind loopback --port 18789 --force >/tmp/openclaw-parallels-linux-gateway.log 2>&1`,
)} >/dev/null 2>&1 < /dev/null &

View File

@@ -805,6 +805,7 @@ class NpmUpdateSmoke {
this.linuxVm,
"/usr/bin/env",
"HOME=/root",
"OPENCLAW_ALLOW_ROOT=1",
"PATH=/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/snap/bin",
"bash",
scriptPath,

View File

@@ -81,16 +81,26 @@ export function windowsAgentTurnConfigPatchScript(modelId: string): string {
});
return `$agentTurnConfigPatchPath = $env:OPENCLAW_CONFIG_PATH
if (-not $agentTurnConfigPatchPath) { $agentTurnConfigPatchPath = Join-Path $env:USERPROFILE '.openclaw\\openclaw.json' }
$agentTurnVersionText = Invoke-OpenClaw --version 2>$null | Out-String
$agentTurnRuntimePolicySupported = $false
if ($agentTurnVersionText -match 'OpenClaw\\s+(\\d{4})\\.(\\d{1,2})\\.(\\d{1,2})') {
$agentTurnYear = [int]$Matches[1]
$agentTurnMonth = [int]$Matches[2]
$agentTurnDay = [int]$Matches[3]
$agentTurnRuntimePolicySupported = ($agentTurnYear -gt 2026) -or ($agentTurnYear -eq 2026 -and (($agentTurnMonth -gt 5) -or ($agentTurnMonth -eq 5 -and $agentTurnDay -ge 9)))
}
$env:OPENCLAW_PARALLELS_AGENT_CONFIG_PATCH = @'
${payloadJson}
'@
$env:OPENCLAW_PARALLELS_AGENT_CONFIG_PATH = $agentTurnConfigPatchPath
$env:OPENCLAW_PARALLELS_AGENT_RUNTIME_POLICY_SUPPORTED = if ($agentTurnRuntimePolicySupported) { '1' } else { '0' }
$agentTurnConfigPatchScriptPath = Join-Path ([System.IO.Path]::GetTempPath()) 'openclaw-agent-turn-config-patch.cjs'
@'
const fs = require("node:fs");
const path = require("node:path");
const configPath = process.env.OPENCLAW_PARALLELS_AGENT_CONFIG_PATH;
const payload = JSON.parse(process.env.OPENCLAW_PARALLELS_AGENT_CONFIG_PATCH || "{}");
const canWriteAgentRuntime = process.env.OPENCLAW_PARALLELS_AGENT_RUNTIME_POLICY_SUPPORTED === "1";
function readJsonFile(filePath) {
return JSON.parse(fs.readFileSync(filePath, "utf8").replace(/^\\uFEFF/u, ""));
}
@@ -118,6 +128,27 @@ for (const op of payload.operations || []) {
}
}
}
const selectedModelEntry = cfg.agents.defaults.models[payload.modelId];
if (selectedModelEntry && typeof selectedModelEntry === "object" && !Array.isArray(selectedModelEntry)) {
if (canWriteAgentRuntime) {
selectedModelEntry.agentRuntime = { id: "pi" };
} else {
delete selectedModelEntry.agentRuntime;
}
}
const providerId = String(payload.modelId || "").split("/", 1)[0];
const providerModelId = String(payload.modelId || "").slice(providerId.length + 1);
const providerEntry = cfg.models && typeof cfg.models === "object" && cfg.models.providers && typeof cfg.models.providers === "object" ? cfg.models.providers[providerId] : undefined;
if (providerEntry && typeof providerEntry === "object" && !Array.isArray(providerEntry)) {
delete providerEntry.agentRuntime;
if (Array.isArray(providerEntry.models)) {
for (const model of providerEntry.models) {
if (model && typeof model === "object" && (model.id === providerModelId || model.id === payload.modelId || model.name === providerModelId || model.name === payload.modelId)) {
delete model.agentRuntime;
}
}
}
}
fs.mkdirSync(path.dirname(configPath), { recursive: true });
fs.writeFileSync(configPath, JSON.stringify(cfg, null, 2) + "\\n", { mode: 0o600 });
'@ | Set-Content -Path $agentTurnConfigPatchScriptPath -Encoding UTF8
@@ -126,6 +157,7 @@ $agentTurnConfigPatchExit = $LASTEXITCODE
Remove-Item $agentTurnConfigPatchScriptPath -Force -ErrorAction SilentlyContinue
Remove-Item Env:OPENCLAW_PARALLELS_AGENT_CONFIG_PATCH -Force -ErrorAction SilentlyContinue
Remove-Item Env:OPENCLAW_PARALLELS_AGENT_CONFIG_PATH -Force -ErrorAction SilentlyContinue
Remove-Item Env:OPENCLAW_PARALLELS_AGENT_RUNTIME_POLICY_SUPPORTED -Force -ErrorAction SilentlyContinue
if ($agentTurnConfigPatchExit -ne 0) { throw "agent turn config patch failed" }`;
}

View File

@@ -808,7 +808,7 @@ function ensureNativeHookRelayBridgeDir(): string {
if (expectedUid !== undefined && stats.uid !== expectedUid) {
throw new Error("unsafe native hook relay bridge directory owner");
}
if ((stats.mode & 0o077) !== 0) {
if (process.platform !== "win32" && (stats.mode & 0o077) !== 0) {
chmodSync(bridgeDir, 0o700);
const repaired = lstatSync(bridgeDir);
if ((repaired.mode & 0o077) !== 0) {

View File

@@ -513,6 +513,10 @@ async function runAutoPinnedPromptErrorRotationCase(params: {
});
expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
await vi.waitFor(async () => {
const usageStats = await readUsageStats(agentDir);
expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number");
});
const usageStats = await readUsageStats(agentDir);
return { usageStats };
});
@@ -932,18 +936,21 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
expect(failoverAttributes.providerErrorType).toBe("overloaded_error");
expect(failoverAttributes.rawErrorPreview).toContain('"request_id":"sha256:');
const failureStateUpdate = requireLogRecord(
logCapture.records,
"auth profile failure state updated",
);
const failureStateAttributes = requireRecord(
failureStateUpdate.attributes,
"failure state attributes",
);
expect(failureStateAttributes.event).toBe("auth_profile_failure_state_updated");
expect(failureStateAttributes.runId).toBe("run:overloaded-logging");
expect(failureStateAttributes.profileId).toBe(safeProfileId);
expect(failureStateAttributes.reason).toBe("overloaded");
await vi.waitFor(async () => {
await logCapture.flush();
const failureStateUpdate = requireLogRecord(
logCapture.records,
"auth profile failure state updated",
);
const failureStateAttributes = requireRecord(
failureStateUpdate.attributes,
"failure state attributes",
);
expect(failureStateAttributes.event).toBe("auth_profile_failure_state_updated");
expect(failureStateAttributes.runId).toBe("run:overloaded-logging");
expect(failureStateAttributes.profileId).toBe(safeProfileId);
expect(failureStateAttributes.reason).toBe("overloaded");
});
});
it("rotates for overloaded prompt failures across auto-pinned profiles", async () => {

View File

@@ -2058,15 +2058,13 @@ export async function runEmbeddedPiAgent(
(await advanceAuthProfile())
) {
if (failedPromptProfileId && promptProfileFailureReason) {
try {
await maybeMarkAuthProfileFailure({
profileId: failedPromptProfileId,
reason: promptProfileFailureReason,
modelId,
});
} catch (err) {
void maybeMarkAuthProfileFailure({
profileId: failedPromptProfileId,
reason: promptProfileFailureReason,
modelId,
}).catch((err) => {
log.warn(`prompt profile failure mark failed: ${String(err)}`);
}
});
}
traceAttempts.push({
provider,

View File

@@ -153,7 +153,7 @@ export async function handleAssistantFailover(params: {
}
const rotated = await params.advanceAuthProfile();
void markFailedProfile();
const markFailedProfilePromise = markFailedProfile();
if (params.timedOut && !params.isProbeSession && failedProfileId) {
params.warn(`Profile ${failedProfileId} timed out. Trying next account...`);
}
@@ -163,6 +163,7 @@ export async function handleAssistantFailover(params: {
);
}
if (rotated) {
void markFailedProfilePromise;
params.logAssistantFailoverDecision("rotate_profile");
await params.maybeBackoffBeforeOverloadFailover(params.failoverReason);
return {
@@ -175,6 +176,7 @@ export async function handleAssistantFailover(params: {
}),
};
}
await markFailedProfilePromise;
if (params.idleTimedOut && params.allowSameModelIdleTimeoutRetry) {
return sameModelIdleTimeoutRetry();
}

View File

@@ -116,6 +116,14 @@ describe("gateway codex harness live helpers", () => {
).toBe(true);
});
it("accepts the completed-session status emitted by current codex", () => {
const text = "No active task is running.";
expect(
EXPECTED_CODEX_STATUS_COMMAND_TEXT.some((expectedText) => text.includes(expectedText)),
).toBe(true);
});
it("rejects status prose for a different codex session", () => {
const text =
"OpenClaw is running on `openai/gpt-5.5` with low reasoning/text settings. Context is at `22k/272k` tokens, no compactions, and the current session is `agent:dev:other`.";

View File

@@ -89,6 +89,7 @@ export const EXPECTED_CODEX_STATUS_COMMAND_TEXT = [
"Model/status card shown above",
"OpenClaw status shown above.",
"Status shown above.",
"No active task is running.",
] as const;
export function isExpectedCodexStatusCommandText(text: string): boolean {

View File

@@ -120,9 +120,11 @@ console.log(result);
`;
const batch = JSON.parse(runTsEval(source, { OPENAI_API_KEY: "sk-openai" })) as Array<{
path: string;
value: unknown;
}>;
expect(batch.map((entry) => entry.path)).toContain('agents.defaults.models["openai/gpt-5.5"]');
expect(JSON.stringify(batch)).not.toContain("agentRuntime");
});
it("keeps snapshot, host, package, and quote helpers shared", () => {
@@ -582,6 +584,10 @@ console.log(JSON.stringify({
expect(powershell).toContain("providerTimeoutConfigJson");
expect(powershell).toContain("models.providers.${providerId}");
expect(powershell).toContain("agents.defaults.models${configPathMapKey(modelId)}");
expect(powershell).toContain("OPENCLAW_PARALLELS_AGENT_RUNTIME_POLICY_SUPPORTED");
expect(powershell).toContain('selectedModelEntry.agentRuntime = { id: "pi" }');
expect(powershell).toContain("delete selectedModelEntry.agentRuntime");
expect(powershell).toContain("delete providerEntry.agentRuntime");
expect(powershell).toContain("configPathMapKey");
expect(powershell).toContain('transport: "sse"');
expect(powershell).toContain("Resolve-OpenClawCommand");