fix(agents): surface exec failures after claimed success

This commit is contained in:
Jayesh Betala
2026-05-10 04:04:05 +05:30
committed by Peter Steinberger
parent 06f15b6f9a
commit 658a30b42f
3 changed files with 59 additions and 12 deletions

View File

@@ -378,6 +378,23 @@ describe("buildEmbeddedRunPayloads", () => {
expect(payloads[1]?.text).not.toContain("missing");
});
it("shows exec tool errors when assistant output claims success", () => {
const payloads = buildPayloads({
assistantTexts: ["The script is ready to use and saved in your workspace."],
lastAssistant: { stopReason: "end_turn" } as unknown as AssistantMessage,
lastToolError: {
toolName: "exec",
error: "/bin/bash: line 1: python: command not found",
},
});
expect(payloads).toHaveLength(2);
expect(payloads[0]?.text).toBe("The script is ready to use and saved in your workspace.");
expect(payloads[1]?.isError).toBe(true);
expect(payloads[1]?.text).toContain("Exec");
expect(payloads[1]?.text).not.toContain("python: command not found");
});
it("shows mutating tool errors when assistant output does not acknowledge the failure", () => {
const payloads = buildPayloads({
assistantTexts: ["No issues found. The update is complete."],
@@ -435,6 +452,17 @@ describe("buildEmbeddedRunPayloads", () => {
expectSinglePayloadSummary(payloads, { text });
});
it("suppresses exec warnings when assistant output explicitly acknowledges the command failure", () => {
const text = "I couldn't run the command because python was not found.";
const payloads = buildPayloads({
assistantTexts: [text],
lastAssistant: { stopReason: "end_turn" } as unknown as AssistantMessage,
lastToolError: { toolName: "exec", error: "/bin/bash: line 1: python: command not found" },
});
expectSinglePayloadSummary(payloads, { text });
});
it("does not treat session_status read failures as mutating when explicitly flagged", () => {
const payloads = buildPayloads({
assistantTexts: ["Status loaded."],

View File

@@ -88,11 +88,28 @@ describe("buildEmbeddedRunPayloads tool-error warnings", () => {
expectSinglePayloadText(payloads, "Fixed.");
});
it("suppresses exec tool errors when verbose mode is off", () => {
expectNoPayloads({
it("surfaces concise exec tool errors when verbose mode is off", () => {
const payloads = buildPayloads({
lastToolError: { toolName: "exec", error: "command failed" },
verboseLevel: "off",
});
expectSingleToolErrorPayload(payloads, {
title: "Exec",
absentDetail: "command failed",
});
});
it("surfaces concise bash tool errors when verbose mode is off", () => {
const payloads = buildPayloads({
lastToolError: { toolName: "bash", error: "command failed" },
verboseLevel: "off",
});
expectSingleToolErrorPayload(payloads, {
title: "Bash",
absentDetail: "command failed",
});
});
it("surfaces exec tool errors for cron sessions even when verbose mode is off", () => {
@@ -132,12 +149,17 @@ describe("buildEmbeddedRunPayloads tool-error warnings", () => {
});
});
it("keeps non-timeout exec tool errors suppressed for cron sessions when verbose mode is off", () => {
expectNoPayloads({
it("surfaces non-timeout exec tool errors for cron sessions without raw details", () => {
const payloads = buildPayloads({
lastToolError: { toolName: "exec", error: "Command not found" },
sessionKey: "agent:main:cron:job-1",
verboseLevel: "off",
});
expectSingleToolErrorPayload(payloads, {
title: "Exec",
absentDetail: "Command not found",
});
});
it("shows exec tool errors when verbose mode is on", () => {

View File

@@ -24,10 +24,7 @@ import {
normalizeTextForComparison,
} from "../../pi-embedded-helpers.js";
import type { ToolResultFormat } from "../../pi-embedded-subscribe.shared-types.js";
import {
extractAssistantThinking,
extractAssistantVisibleText,
} from "../../pi-embedded-utils.js";
import { extractAssistantThinking, extractAssistantVisibleText } from "../../pi-embedded-utils.js";
import { isExecLikeToolName, type ToolErrorSummary } from "../../tool-error-summary.js";
import { isLikelyMutatingToolName } from "../../tool-mutation.js";
@@ -48,7 +45,7 @@ const RECOVERABLE_TOOL_ERROR_KEYWORDS = [
] as const;
const MUTATING_FAILURE_ACTION_PATTERN =
"(?:write|edit|update|save|create|delete|remove|modify|change|apply|patch|move|rename|send|reply|message|tool|action|operation)";
"(?:write|edit|update|save|create|delete|remove|modify|change|apply|patch|move|rename|send|reply|message|run|execute|execution|command|script|shell|bash|exec|tool|action|operation)";
const MUTATING_FAILURE_INABILITY_PATTERN = new RegExp(
`\\b(?:couldn't|could not|can't|cannot|unable to|am unable to|wasn't able to|was not able to|were unable to)\\b.{0,100}\\b${MUTATING_FAILURE_ACTION_PATTERN}\\b`,
@@ -143,9 +140,6 @@ function resolveToolErrorWarningPolicy(params: {
if (params.suppressToolErrorWarnings) {
return { showWarning: false, includeDetails };
}
if (isExecLikeToolName(params.lastToolError.toolName) && !includeDetails) {
return { showWarning: false, includeDetails };
}
// sessions_send timeouts and errors are transient inter-session communication
// issues — the message may still have been delivered. Suppress warnings to
// prevent raw error text from leaking into the chat surface (#23989).
@@ -160,6 +154,9 @@ function resolveToolErrorWarningPolicy(params: {
includeDetails,
};
}
if (isExecLikeToolName(params.lastToolError.toolName) && !includeDetails) {
return { showWarning: false, includeDetails };
}
if (params.suppressToolErrors) {
return { showWarning: false, includeDetails };
}