diff --git a/src/agents/pi-embedded-runner/run/payloads.errors.test.ts b/src/agents/pi-embedded-runner/run/payloads.errors.test.ts index e7ff7cc00c4..054958e1f36 100644 --- a/src/agents/pi-embedded-runner/run/payloads.errors.test.ts +++ b/src/agents/pi-embedded-runner/run/payloads.errors.test.ts @@ -378,6 +378,23 @@ describe("buildEmbeddedRunPayloads", () => { expect(payloads[1]?.text).not.toContain("missing"); }); + it("shows exec tool errors when assistant output claims success", () => { + const payloads = buildPayloads({ + assistantTexts: ["The script is ready to use and saved in your workspace."], + lastAssistant: { stopReason: "end_turn" } as unknown as AssistantMessage, + lastToolError: { + toolName: "exec", + error: "/bin/bash: line 1: python: command not found", + }, + }); + + expect(payloads).toHaveLength(2); + expect(payloads[0]?.text).toBe("The script is ready to use and saved in your workspace."); + expect(payloads[1]?.isError).toBe(true); + expect(payloads[1]?.text).toContain("Exec"); + expect(payloads[1]?.text).not.toContain("python: command not found"); + }); + it("shows mutating tool errors when assistant output does not acknowledge the failure", () => { const payloads = buildPayloads({ assistantTexts: ["No issues found. The update is complete."], @@ -435,6 +452,17 @@ describe("buildEmbeddedRunPayloads", () => { expectSinglePayloadSummary(payloads, { text }); }); + it("suppresses exec warnings when assistant output explicitly acknowledges the command failure", () => { + const text = "I couldn't run the command because python was not found."; + const payloads = buildPayloads({ + assistantTexts: [text], + lastAssistant: { stopReason: "end_turn" } as unknown as AssistantMessage, + lastToolError: { toolName: "exec", error: "/bin/bash: line 1: python: command not found" }, + }); + + expectSinglePayloadSummary(payloads, { text }); + }); + it("does not treat session_status read failures as mutating when explicitly flagged", () => { const payloads = buildPayloads({ assistantTexts: ["Status loaded."], diff --git a/src/agents/pi-embedded-runner/run/payloads.test.ts b/src/agents/pi-embedded-runner/run/payloads.test.ts index 587ef82ef9f..236c33f6a96 100644 --- a/src/agents/pi-embedded-runner/run/payloads.test.ts +++ b/src/agents/pi-embedded-runner/run/payloads.test.ts @@ -88,11 +88,28 @@ describe("buildEmbeddedRunPayloads tool-error warnings", () => { expectSinglePayloadText(payloads, "Fixed."); }); - it("suppresses exec tool errors when verbose mode is off", () => { - expectNoPayloads({ + it("surfaces concise exec tool errors when verbose mode is off", () => { + const payloads = buildPayloads({ lastToolError: { toolName: "exec", error: "command failed" }, verboseLevel: "off", }); + + expectSingleToolErrorPayload(payloads, { + title: "Exec", + absentDetail: "command failed", + }); + }); + + it("surfaces concise bash tool errors when verbose mode is off", () => { + const payloads = buildPayloads({ + lastToolError: { toolName: "bash", error: "command failed" }, + verboseLevel: "off", + }); + + expectSingleToolErrorPayload(payloads, { + title: "Bash", + absentDetail: "command failed", + }); }); it("surfaces exec tool errors for cron sessions even when verbose mode is off", () => { @@ -132,12 +149,17 @@ describe("buildEmbeddedRunPayloads tool-error warnings", () => { }); }); - it("keeps non-timeout exec tool errors suppressed for cron sessions when verbose mode is off", () => { - expectNoPayloads({ + it("surfaces non-timeout exec tool errors for cron sessions without raw details", () => { + const payloads = buildPayloads({ lastToolError: { toolName: "exec", error: "Command not found" }, sessionKey: "agent:main:cron:job-1", verboseLevel: "off", }); + + expectSingleToolErrorPayload(payloads, { + title: "Exec", + absentDetail: "Command not found", + }); }); it("shows exec tool errors when verbose mode is on", () => { diff --git a/src/agents/pi-embedded-runner/run/payloads.ts b/src/agents/pi-embedded-runner/run/payloads.ts index ad7e014ebef..fa9387f7b53 100644 --- a/src/agents/pi-embedded-runner/run/payloads.ts +++ b/src/agents/pi-embedded-runner/run/payloads.ts @@ -24,10 +24,7 @@ import { normalizeTextForComparison, } from "../../pi-embedded-helpers.js"; import type { ToolResultFormat } from "../../pi-embedded-subscribe.shared-types.js"; -import { - extractAssistantThinking, - extractAssistantVisibleText, -} from "../../pi-embedded-utils.js"; +import { extractAssistantThinking, extractAssistantVisibleText } from "../../pi-embedded-utils.js"; import { isExecLikeToolName, type ToolErrorSummary } from "../../tool-error-summary.js"; import { isLikelyMutatingToolName } from "../../tool-mutation.js"; @@ -48,7 +45,7 @@ const RECOVERABLE_TOOL_ERROR_KEYWORDS = [ ] as const; const MUTATING_FAILURE_ACTION_PATTERN = - "(?:write|edit|update|save|create|delete|remove|modify|change|apply|patch|move|rename|send|reply|message|tool|action|operation)"; + "(?:write|edit|update|save|create|delete|remove|modify|change|apply|patch|move|rename|send|reply|message|run|execute|execution|command|script|shell|bash|exec|tool|action|operation)"; const MUTATING_FAILURE_INABILITY_PATTERN = new RegExp( `\\b(?:couldn't|could not|can't|cannot|unable to|am unable to|wasn't able to|was not able to|were unable to)\\b.{0,100}\\b${MUTATING_FAILURE_ACTION_PATTERN}\\b`, @@ -143,9 +140,6 @@ function resolveToolErrorWarningPolicy(params: { if (params.suppressToolErrorWarnings) { return { showWarning: false, includeDetails }; } - if (isExecLikeToolName(params.lastToolError.toolName) && !includeDetails) { - return { showWarning: false, includeDetails }; - } // sessions_send timeouts and errors are transient inter-session communication // issues — the message may still have been delivered. Suppress warnings to // prevent raw error text from leaking into the chat surface (#23989). @@ -160,6 +154,9 @@ function resolveToolErrorWarningPolicy(params: { includeDetails, }; } + if (isExecLikeToolName(params.lastToolError.toolName) && !includeDetails) { + return { showWarning: false, includeDetails }; + } if (params.suppressToolErrors) { return { showWarning: false, includeDetails }; }