mirror of
https://github.com/moltbot/moltbot.git
synced 2026-05-13 23:56:07 +00:00
fix(agents): surface exec failures after claimed success
This commit is contained in:
committed by
Peter Steinberger
parent
06f15b6f9a
commit
658a30b42f
@@ -378,6 +378,23 @@ describe("buildEmbeddedRunPayloads", () => {
|
||||
expect(payloads[1]?.text).not.toContain("missing");
|
||||
});
|
||||
|
||||
it("shows exec tool errors when assistant output claims success", () => {
|
||||
const payloads = buildPayloads({
|
||||
assistantTexts: ["The script is ready to use and saved in your workspace."],
|
||||
lastAssistant: { stopReason: "end_turn" } as unknown as AssistantMessage,
|
||||
lastToolError: {
|
||||
toolName: "exec",
|
||||
error: "/bin/bash: line 1: python: command not found",
|
||||
},
|
||||
});
|
||||
|
||||
expect(payloads).toHaveLength(2);
|
||||
expect(payloads[0]?.text).toBe("The script is ready to use and saved in your workspace.");
|
||||
expect(payloads[1]?.isError).toBe(true);
|
||||
expect(payloads[1]?.text).toContain("Exec");
|
||||
expect(payloads[1]?.text).not.toContain("python: command not found");
|
||||
});
|
||||
|
||||
it("shows mutating tool errors when assistant output does not acknowledge the failure", () => {
|
||||
const payloads = buildPayloads({
|
||||
assistantTexts: ["No issues found. The update is complete."],
|
||||
@@ -435,6 +452,17 @@ describe("buildEmbeddedRunPayloads", () => {
|
||||
expectSinglePayloadSummary(payloads, { text });
|
||||
});
|
||||
|
||||
it("suppresses exec warnings when assistant output explicitly acknowledges the command failure", () => {
|
||||
const text = "I couldn't run the command because python was not found.";
|
||||
const payloads = buildPayloads({
|
||||
assistantTexts: [text],
|
||||
lastAssistant: { stopReason: "end_turn" } as unknown as AssistantMessage,
|
||||
lastToolError: { toolName: "exec", error: "/bin/bash: line 1: python: command not found" },
|
||||
});
|
||||
|
||||
expectSinglePayloadSummary(payloads, { text });
|
||||
});
|
||||
|
||||
it("does not treat session_status read failures as mutating when explicitly flagged", () => {
|
||||
const payloads = buildPayloads({
|
||||
assistantTexts: ["Status loaded."],
|
||||
|
||||
@@ -88,11 +88,28 @@ describe("buildEmbeddedRunPayloads tool-error warnings", () => {
|
||||
expectSinglePayloadText(payloads, "Fixed.");
|
||||
});
|
||||
|
||||
it("suppresses exec tool errors when verbose mode is off", () => {
|
||||
expectNoPayloads({
|
||||
it("surfaces concise exec tool errors when verbose mode is off", () => {
|
||||
const payloads = buildPayloads({
|
||||
lastToolError: { toolName: "exec", error: "command failed" },
|
||||
verboseLevel: "off",
|
||||
});
|
||||
|
||||
expectSingleToolErrorPayload(payloads, {
|
||||
title: "Exec",
|
||||
absentDetail: "command failed",
|
||||
});
|
||||
});
|
||||
|
||||
it("surfaces concise bash tool errors when verbose mode is off", () => {
|
||||
const payloads = buildPayloads({
|
||||
lastToolError: { toolName: "bash", error: "command failed" },
|
||||
verboseLevel: "off",
|
||||
});
|
||||
|
||||
expectSingleToolErrorPayload(payloads, {
|
||||
title: "Bash",
|
||||
absentDetail: "command failed",
|
||||
});
|
||||
});
|
||||
|
||||
it("surfaces exec tool errors for cron sessions even when verbose mode is off", () => {
|
||||
@@ -132,12 +149,17 @@ describe("buildEmbeddedRunPayloads tool-error warnings", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps non-timeout exec tool errors suppressed for cron sessions when verbose mode is off", () => {
|
||||
expectNoPayloads({
|
||||
it("surfaces non-timeout exec tool errors for cron sessions without raw details", () => {
|
||||
const payloads = buildPayloads({
|
||||
lastToolError: { toolName: "exec", error: "Command not found" },
|
||||
sessionKey: "agent:main:cron:job-1",
|
||||
verboseLevel: "off",
|
||||
});
|
||||
|
||||
expectSingleToolErrorPayload(payloads, {
|
||||
title: "Exec",
|
||||
absentDetail: "Command not found",
|
||||
});
|
||||
});
|
||||
|
||||
it("shows exec tool errors when verbose mode is on", () => {
|
||||
|
||||
@@ -24,10 +24,7 @@ import {
|
||||
normalizeTextForComparison,
|
||||
} from "../../pi-embedded-helpers.js";
|
||||
import type { ToolResultFormat } from "../../pi-embedded-subscribe.shared-types.js";
|
||||
import {
|
||||
extractAssistantThinking,
|
||||
extractAssistantVisibleText,
|
||||
} from "../../pi-embedded-utils.js";
|
||||
import { extractAssistantThinking, extractAssistantVisibleText } from "../../pi-embedded-utils.js";
|
||||
import { isExecLikeToolName, type ToolErrorSummary } from "../../tool-error-summary.js";
|
||||
import { isLikelyMutatingToolName } from "../../tool-mutation.js";
|
||||
|
||||
@@ -48,7 +45,7 @@ const RECOVERABLE_TOOL_ERROR_KEYWORDS = [
|
||||
] as const;
|
||||
|
||||
const MUTATING_FAILURE_ACTION_PATTERN =
|
||||
"(?:write|edit|update|save|create|delete|remove|modify|change|apply|patch|move|rename|send|reply|message|tool|action|operation)";
|
||||
"(?:write|edit|update|save|create|delete|remove|modify|change|apply|patch|move|rename|send|reply|message|run|execute|execution|command|script|shell|bash|exec|tool|action|operation)";
|
||||
|
||||
const MUTATING_FAILURE_INABILITY_PATTERN = new RegExp(
|
||||
`\\b(?:couldn't|could not|can't|cannot|unable to|am unable to|wasn't able to|was not able to|were unable to)\\b.{0,100}\\b${MUTATING_FAILURE_ACTION_PATTERN}\\b`,
|
||||
@@ -143,9 +140,6 @@ function resolveToolErrorWarningPolicy(params: {
|
||||
if (params.suppressToolErrorWarnings) {
|
||||
return { showWarning: false, includeDetails };
|
||||
}
|
||||
if (isExecLikeToolName(params.lastToolError.toolName) && !includeDetails) {
|
||||
return { showWarning: false, includeDetails };
|
||||
}
|
||||
// sessions_send timeouts and errors are transient inter-session communication
|
||||
// issues — the message may still have been delivered. Suppress warnings to
|
||||
// prevent raw error text from leaking into the chat surface (#23989).
|
||||
@@ -160,6 +154,9 @@ function resolveToolErrorWarningPolicy(params: {
|
||||
includeDetails,
|
||||
};
|
||||
}
|
||||
if (isExecLikeToolName(params.lastToolError.toolName) && !includeDetails) {
|
||||
return { showWarning: false, includeDetails };
|
||||
}
|
||||
if (params.suppressToolErrors) {
|
||||
return { showWarning: false, includeDetails };
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user