From a71ad1204431bb38f5237faa8b5f7052b2d61065 Mon Sep 17 00:00:00 2001 From: Mariano Belinky Date: Wed, 8 Apr 2026 23:40:34 +0200 Subject: [PATCH] Memory/dreaming: harden grounded backfill follow-ups --- CHANGELOG.md | 1 + docs/cli/memory.md | 4 ++ docs/concepts/dreaming.md | 9 +++ extensions/memory-core/src/cli.runtime.ts | 17 +++++- extensions/memory-core/src/cli.test.ts | 59 +++++++++++++++++++ .../src/dreaming-narrative.test.ts | 27 +++++++++ .../memory-core/src/dreaming-narrative.ts | 46 +++++++++++++-- extensions/memory-core/src/rem-evidence.ts | 22 ++++++- src/gateway/server-methods/doctor.test.ts | 35 ++++++++++- src/gateway/server-methods/doctor.ts | 33 ++++++++++- 10 files changed, 239 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2743a59cf10..a3722ec34e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ Docs: https://docs.openclaw.ai - Agents/timeouts: make the LLM idle timeout inherit `agents.defaults.timeoutSeconds` when configured, disable the unconfigured idle watchdog for cron runs, and point idle-timeout errors at `agents.defaults.llm.idleTimeoutSeconds`. Thanks @drvoss. - Agents/failover: classify Z.ai vendor code `1311` as billing and `1113` as auth, including long wrapped `1311` payloads, so these errors stop falling through to generic failover handling. (#49552) Thanks @1bcMax. - QQBot/media-tags: support HTML entity-encoded angle brackets (`<`/`>`), URL slashes in attributes, and self-closing media tags so upstream `` payloads are correctly parsed and normalized. (#60493) Thanks @ylc0919. +- Memory/dreaming: harden grounded backfill inputs and diary writes by preserving source-day labels, rejecting missing or symlinked targets cleanly, normalizing diary headings in gateway backfills, and tightening claim splitting plus diary source metadata. Thanks @mbelinky. - Memory/dreaming: accept embedded heartbeat trigger tokens so light and REM dreaming still run when runtime wrappers include extra heartbeat text. - QA/live auth: fail fast when live QA scenarios hit classified auth or runtime failure replies, including raw scenario wait paths, and sanitize missing-key guidance so gateway auth problems surface as actionable errors instead of timeouts. (#63333) Thanks @shakkernerd. - Plugins/onboarding auth choices: prevent untrusted workspace plugins from colliding with bundled provider auth-choice ids during non-interactive onboarding, so bundled provider setup keeps operator secrets out of untrusted workspace plugin handlers unless those plugins are explicitly trusted. (#62368) Thanks @pgondhi987. diff --git a/docs/cli/memory.md b/docs/cli/memory.md index 0815b455d43..0e2d8bbd08d 100644 --- a/docs/cli/memory.md +++ b/docs/cli/memory.md @@ -167,4 +167,8 @@ Notes: - If effectively active memory remote API key fields are configured as SecretRefs, the command resolves those values from the active gateway snapshot. If gateway is unavailable, the command fails fast. - Gateway version skew note: this command path requires a gateway that supports `secrets.resolve`; older gateways return an unknown-method error. - Tune scheduled sweep cadence with `dreaming.frequency`. Deep promotion policy is otherwise internal; use CLI flags on `memory promote` when you need one-off manual overrides. +- `memory rem-harness --path --grounded` previews grounded `What Happened`, `Reflections`, and `Possible Lasting Updates` from historical daily notes without writing anything. +- `memory rem-backfill --path ` writes reversible grounded diary entries into `DREAMS.md` for UI review. +- `memory rem-backfill --path --stage-short-term` also seeds grounded durable candidates into the live short-term promotion store so the normal deep phase can rank them. +- `memory rem-backfill --rollback` removes previously written grounded diary entries, and `memory rem-backfill --rollback-short-term` removes previously staged grounded short-term candidates. - See [Dreaming](/concepts/dreaming) for full phase descriptions and configuration reference. diff --git a/docs/concepts/dreaming.md b/docs/concepts/dreaming.md index b642c2d6abd..3a733a86784 100644 --- a/docs/concepts/dreaming.md +++ b/docs/concepts/dreaming.md @@ -81,6 +81,15 @@ subagent turn (using the default runtime model) and appends a short diary entry. This diary is for human reading in the Dreams UI, not a promotion source. +There is also a grounded historical backfill lane for review and recovery work: + +- `memory rem-harness --path ... --grounded` previews grounded diary output from historical `YYYY-MM-DD.md` notes. +- `memory rem-backfill --path ...` writes reversible grounded diary entries into `DREAMS.md`. +- `memory rem-backfill --path ... --stage-short-term` stages grounded durable candidates into the same short-term evidence store the normal deep phase already uses. +- `memory rem-backfill --rollback` and `--rollback-short-term` remove those staged backfill artifacts without touching ordinary diary entries or live short-term recall. + +The Control UI exposes the same diary backfill/reset flow so you can inspect results in the Dreams scene before deciding whether the grounded candidates deserve promotion. + ## Deep ranking signals Deep ranking uses six weighted base signals plus phase reinforcement: diff --git a/extensions/memory-core/src/cli.runtime.ts b/extensions/memory-core/src/cli.runtime.ts index 46f82374504..102e1b413f2 100644 --- a/extensions/memory-core/src/cli.runtime.ts +++ b/extensions/memory-core/src/cli.runtime.ts @@ -122,7 +122,15 @@ const DAILY_MEMORY_FILE_NAME_RE = /^(\d{4}-\d{2}-\d{2})\.md$/; async function listHistoricalDailyFiles(inputPath: string): Promise { const resolvedPath = path.resolve(inputPath); - const stat = await fs.stat(resolvedPath); + let stat; + try { + stat = await fs.stat(resolvedPath); + } catch (err) { + if ((err as NodeJS.ErrnoException | undefined)?.code === "ENOENT") { + return []; + } + throw err; + } if (stat.isFile()) { return DAILY_MEMORY_FILE_NAME_RE.test(path.basename(resolvedPath)) ? [resolvedPath] : []; } @@ -1734,6 +1742,11 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) { workspaceDir: scratchDir, inputPaths: workspaceSourceFiles, }); + const sourcePathByDay = new Map( + sourceFiles + .map((sourcePath) => [extractIsoDayFromPath(sourcePath), sourcePath] as const) + .filter((entry): entry is [string, string] => Boolean(entry[0])), + ); const entries = grounded.files .map((file) => { const isoDay = extractIsoDayFromPath(file.path); @@ -1742,7 +1755,7 @@ export async function runMemoryRemBackfill(opts: MemoryRemBackfillOptions) { } return { isoDay, - sourcePath: file.path, + sourcePath: sourcePathByDay.get(isoDay) ?? file.path, bodyLines: groundedMarkdownToDiaryLines(file.renderedMarkdown), }; }) diff --git a/extensions/memory-core/src/cli.test.ts b/extensions/memory-core/src/cli.test.ts index 14f654d28ef..b77623c6ea1 100644 --- a/extensions/memory-core/src/cli.test.ts +++ b/extensions/memory-core/src/cli.test.ts @@ -1066,6 +1066,7 @@ describe("memory cli", () => { const dreams = await fs.readFile(path.join(workspaceDir, "DREAMS.md"), "utf-8"); expect(dreams).toContain("openclaw:dreaming:backfill-entry"); + expect(dreams).toContain(`source=${historyPath}`); expect(dreams).toContain("January 1, 2025"); expect(dreams).toContain("What Happened"); expect(dreams).toContain("Possible Lasting Updates"); @@ -1074,6 +1075,26 @@ describe("memory cli", () => { }); }); + it("treats a missing historical path as a controlled empty-source error", async () => { + await withTempWorkspace(async (workspaceDir) => { + const close = vi.fn(async () => {}); + mockManager({ + status: () => makeMemoryStatus({ workspaceDir }), + close, + }); + + const errors = spyRuntimeErrors(defaultRuntime); + await runMemoryCli(["rem-backfill", "--path", path.join(workspaceDir, "missing-history")]); + + expect( + errors.mock.calls.some((call) => + String(call[0]).includes("found no YYYY-MM-DD.md files"), + ), + ).toBe(true); + expect(close).toHaveBeenCalled(); + }); + }); + it("stages grounded durable candidates into the live short-term store", async () => { await withTempWorkspace(async (workspaceDir) => { const historyDir = path.join(workspaceDir, "history"); @@ -1301,6 +1322,44 @@ describe("memory cli", () => { }); }); + it("does not split hyphenated words into malformed grounded candidates", async () => { + await withTempWorkspace(async (workspaceDir) => { + const historyDir = path.join(workspaceDir, "history"); + await fs.mkdir(historyDir, { recursive: true }); + const historyPath = path.join(historyDir, "2025-02-20.md"); + await fs.writeFile( + historyPath, + [ + "## Preferences Learned", + "- Use long-term plans, avoid reactive task switching.", + "- A self-aware workflow note should stay intact.", + ].join("\n") + "\n", + "utf-8", + ); + + const close = vi.fn(async () => {}); + mockManager({ + status: () => makeMemoryStatus({ workspaceDir }), + close, + }); + + const writeJson = spyRuntimeJson(defaultRuntime); + await runMemoryCli(["rem-harness", "--json", "--grounded", "--path", historyPath]); + + const payload = firstWrittenJsonArg<{ + grounded?: { + files?: Array<{ + renderedMarkdown?: string; + }>; + } | null; + }>(writeJson); + const rendered = payload?.grounded?.files?.[0]?.renderedMarkdown ?? ""; + expect(rendered).not.toContain("Use long- term plans"); + expect(rendered).not.toContain("A self- aware workflow note"); + expect(close).toHaveBeenCalled(); + }); + }); + it("rolls back grounded rem backfill entries from DREAMS.md", async () => { await withTempWorkspace(async (workspaceDir) => { const dreamsPath = path.join(workspaceDir, "DREAMS.md"); diff --git a/extensions/memory-core/src/dreaming-narrative.test.ts b/extensions/memory-core/src/dreaming-narrative.test.ts index 74ed10c234c..7f832fe372c 100644 --- a/extensions/memory-core/src/dreaming-narrative.test.ts +++ b/extensions/memory-core/src/dreaming-narrative.test.ts @@ -126,6 +126,10 @@ describe("backfill diary entries", () => { expect(formatBackfillDiaryDate("2026-01-01", "UTC")).toBe("January 1, 2026"); }); + it("preserves the iso day label in high-positive-offset timezones", () => { + expect(formatBackfillDiaryDate("2026-01-01", "Pacific/Kiritimati")).toBe("January 1, 2026"); + }); + it("builds a marked backfill diary entry", () => { const entry = buildBackfillDiaryEntry({ isoDay: "2026-01-01", @@ -201,6 +205,29 @@ describe("backfill diary entries", () => { expect(content).toContain("Keep this real dream."); expect(content).not.toContain("Remove this backfill."); }); + + it("refuses to overwrite a symlinked DREAMS.md during backfill writes", async () => { + const workspaceDir = await createTempWorkspace("openclaw-dreaming-backfill-"); + const targetPath = path.join(workspaceDir, "outside.txt"); + const dreamsPath = path.join(workspaceDir, "DREAMS.md"); + await fs.writeFile(targetPath, "outside\n", "utf-8"); + await fs.symlink(targetPath, dreamsPath); + + await expect( + writeBackfillDiaryEntries({ + workspaceDir, + timezone: "UTC", + entries: [ + { + isoDay: "2026-01-01", + sourcePath: "memory/2026-01-01.md", + bodyLines: ["What Happened", "1. First pass."], + }, + ], + }), + ).rejects.toThrow("Refusing to write symlinked DREAMS.md"); + await expect(fs.readFile(targetPath, "utf-8")).resolves.toBe("outside\n"); + }); }); describe("appendNarrativeEntry", () => { diff --git a/extensions/memory-core/src/dreaming-narrative.ts b/extensions/memory-core/src/dreaming-narrative.ts index 82c237295a6..5ada4b5809a 100644 --- a/extensions/memory-core/src/dreaming-narrative.ts +++ b/extensions/memory-core/src/dreaming-narrative.ts @@ -240,17 +240,53 @@ function stripBackfillDiaryBlocks(existing: string): { updated: string; removed: }; } -export function formatBackfillDiaryDate(isoDay: string, timezone?: string): string { +export function formatBackfillDiaryDate(isoDay: string, _timezone?: string): string { + const match = /^(\d{4})-(\d{2})-(\d{2})$/.exec(isoDay); + if (!match) { + return isoDay; + } + const [, year, month, day] = match; const opts: Intl.DateTimeFormatOptions = { - timeZone: timezone ?? "UTC", + // Preserve the source iso day exactly; backfill labels should not drift by timezone. + timeZone: "UTC", year: "numeric", month: "long", day: "numeric", }; - const epochMs = Date.parse(`${isoDay}T12:00:00Z`); + const epochMs = Date.UTC(Number(year), Number(month) - 1, Number(day), 12); return new Intl.DateTimeFormat("en-US", opts).format(new Date(epochMs)); } +async function assertSafeDreamsPath(dreamsPath: string): Promise { + const stat = await fs.lstat(dreamsPath).catch((err: NodeJS.ErrnoException) => { + if (err.code === "ENOENT") { + return null; + } + throw err; + }); + if (!stat) { + return; + } + if (stat.isSymbolicLink()) { + throw new Error("Refusing to write symlinked DREAMS.md"); + } + if (!stat.isFile()) { + throw new Error("Refusing to write non-file DREAMS.md"); + } +} + +async function writeDreamsFileAtomic(dreamsPath: string, content: string): Promise { + await assertSafeDreamsPath(dreamsPath); + const tempPath = `${dreamsPath}.${process.pid}.${Date.now()}.tmp`; + await fs.writeFile(tempPath, content, { encoding: "utf-8", flag: "wx" }); + try { + await fs.rename(tempPath, dreamsPath); + } catch (err) { + await fs.rm(tempPath, { force: true }).catch(() => {}); + throw err; + } +} + export function buildBackfillDiaryEntry(params: { isoDay: string; bodyLines: string[]; @@ -295,7 +331,7 @@ export async function writeBackfillDiaryEntries(params: { ), ]; const updated = replaceDiaryContent(stripped.updated, joinDiaryBlocks(nextBlocks)); - await fs.writeFile(dreamsPath, updated, "utf-8"); + await writeDreamsFileAtomic(dreamsPath, updated); return { dreamsPath, written: params.entries.length, @@ -311,7 +347,7 @@ export async function removeBackfillDiaryEntries(params: { const stripped = stripBackfillDiaryBlocks(existing); if (stripped.removed > 0 || existing.length > 0) { await fs.mkdir(path.dirname(dreamsPath), { recursive: true }); - await fs.writeFile(dreamsPath, stripped.updated, "utf-8"); + await writeDreamsFileAtomic(dreamsPath, stripped.updated); } return { dreamsPath, diff --git a/extensions/memory-core/src/rem-evidence.ts b/extensions/memory-core/src/rem-evidence.ts index 37c3babae9b..c5f54b2e703 100644 --- a/extensions/memory-core/src/rem-evidence.ts +++ b/extensions/memory-core/src/rem-evidence.ts @@ -56,6 +56,9 @@ const REM_TIME_PREFIX_RE = /^\d{1,2}:\d{2}\s*-\s*/; const REM_CODE_FENCE_RE = /^\s*```/; const REM_TABLE_RE = /^\s*\|.*\|\s*$/; const REM_TABLE_DIVIDER_RE = /^\s*\|?[\s:-]+\|[\s|:-]*$/; +const MAX_GROUNDED_REM_FILES = 512; +const MAX_GROUNDED_REM_FILE_BYTES = 1_000_000; +const GROUNDED_REM_SKIPPED_DIRS = new Set([".git", "node_modules"]); const REM_SUMMARY_FACT_LIMIT = 4; const REM_SUMMARY_REFLECTION_LIMIT = 4; const REM_SUMMARY_MEMORY_LIMIT = 3; @@ -605,7 +608,7 @@ function splitTopLevelClauses(text: string, delimiter: string): string[] { } function splitSubjectLeadClaim(text: string): string[] { - const match = /^(?.+?(?:—|–|-))\s*(?.+)$/u.exec(text); + const match = /^(?.+?(?:—|–|\s-\s))\s*(?.+)$/u.exec(text); if (!match?.groups) { return [text]; } @@ -1018,16 +1021,29 @@ function previewGroundedRemForFile(params: { async function collectMarkdownFiles(inputPaths: string[]): Promise { const found = new Set(); async function walk(targetPath: string): Promise { + if (found.size >= MAX_GROUNDED_REM_FILES) { + return; + } const resolved = path.resolve(targetPath); - const stat = await fs.stat(resolved); + const stat = await fs.lstat(resolved); + if (stat.isSymbolicLink()) { + return; + } if (stat.isDirectory()) { const entries = await fs.readdir(resolved, { withFileTypes: true }); for (const entry of entries) { + if (entry.isDirectory() && GROUNDED_REM_SKIPPED_DIRS.has(entry.name)) { + continue; + } await walk(path.join(resolved, entry.name)); } return; } - if (stat.isFile() && resolved.toLowerCase().endsWith(".md")) { + if ( + stat.isFile() && + stat.size <= MAX_GROUNDED_REM_FILE_BYTES && + resolved.toLowerCase().endsWith(".md") + ) { found.add(resolved); } } diff --git a/src/gateway/server-methods/doctor.test.ts b/src/gateway/server-methods/doctor.test.ts index a27b946b207..65eaa535d79 100644 --- a/src/gateway/server-methods/doctor.test.ts +++ b/src/gateway/server-methods/doctor.test.ts @@ -810,7 +810,15 @@ describe("doctor.memory.dreamDiary", () => { workspaceDir, inputPaths: [path.join(workspaceDir, "memory", "2026-02-19.md")], }); - expect(writeBackfillDiaryEntries).toHaveBeenCalled(); + expect(writeBackfillDiaryEntries).toHaveBeenCalledWith( + expect.objectContaining({ + entries: [ + expect.objectContaining({ + bodyLines: expect.arrayContaining(["What Happened", "1. Bunji — partner"]), + }), + ], + }), + ); expect(respond).toHaveBeenCalledWith( true, expect.objectContaining({ @@ -827,6 +835,31 @@ describe("doctor.memory.dreamDiary", () => { } }); + it("no-ops backfill when the workspace has no daily memory files", async () => { + const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "doctor-dream-diary-empty-")); + resolveAgentWorkspaceDir.mockReturnValue(workspaceDir); + const respond = vi.fn(); + + try { + await invokeDoctorMemoryBackfillDreamDiary(respond); + expect(previewGroundedRemMarkdown).not.toHaveBeenCalled(); + expect(writeBackfillDiaryEntries).not.toHaveBeenCalled(); + expect(respond).toHaveBeenCalledWith( + true, + expect.objectContaining({ + agentId: "main", + action: "backfill", + scannedFiles: 0, + written: 0, + replaced: 0, + }), + undefined, + ); + } finally { + await fs.rm(workspaceDir, { recursive: true, force: true }); + } + }); + it("resets only backfilled dream diary entries", async () => { const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "doctor-dream-diary-reset-")); await fs.writeFile(path.join(workspaceDir, "DREAMS.md"), "# Dream Diary\n", "utf-8"); diff --git a/src/gateway/server-methods/doctor.ts b/src/gateway/server-methods/doctor.ts index af7657a27b8..e98e78802c7 100644 --- a/src/gateway/server-methods/doctor.ts +++ b/src/gateway/server-methods/doctor.ts @@ -141,7 +141,7 @@ function extractIsoDayFromPath(filePath: string): string | null { function groundedMarkdownToDiaryLines(markdown: string): string[] { return markdown .split("\n") - .map((line) => line.trimEnd()) + .map((line) => line.replace(/^##\s+/, "").trimEnd()) .filter((line, index, lines) => line.length > 0 || (index > 0 && lines[index - 1]?.length > 0)); } @@ -241,6 +241,18 @@ function normalizeMemoryPath(rawPath: string): string { return rawPath.replaceAll("\\", "/").replace(/^\.\//, ""); } +function normalizeMemoryPathForWorkspace(workspaceDir: string, rawPath: string): string { + const normalized = normalizeMemoryPath(rawPath); + const workspaceNormalized = normalizeMemoryPath(workspaceDir); + if ( + path.isAbsolute(rawPath) && + normalized.startsWith(`${workspaceNormalized}/`) + ) { + return normalized.slice(workspaceNormalized.length + 1); + } + return normalized; +} + function isShortTermMemoryPath(filePath: string): boolean { const normalized = normalizeMemoryPath(filePath); if (/(?:^|\/)memory\/(\d{4})-(\d{2})-(\d{2})\.md$/.test(normalized)) { @@ -403,14 +415,15 @@ async function loadDreamingStoreStats( const dailyCount = toNonNegativeInt(entry.dailyCount); const groundedCount = toNonNegativeInt(entry.groundedCount); const totalEntrySignalCount = recallCount + dailyCount + groundedCount; + const normalizedEntryPath = normalizeMemoryPathForWorkspace(workspaceDir, entryPath); const snippet = normalizeTrimmedString(entry.snippet) ?? normalizeTrimmedString(entry.summary) ?? - normalizeMemoryPath(entryPath); + normalizedEntryPath; const lastRecalledAt = normalizeTrimmedString(entry.lastRecalledAt); const detail: DoctorMemoryDreamingEntryPayload = { key: entryKey, - path: normalizeMemoryPath(entryPath), + path: normalizedEntryPath, startLine: range.startLine, endLine: Math.max(range.startLine, range.endLine), snippet, @@ -865,6 +878,20 @@ export const doctorHandlers: GatewayRequestHandlers = { const workspaceDir = resolveAgentWorkspaceDir(cfg, agentId); const memoryDir = path.join(workspaceDir, "memory"); const sourceFiles = await listWorkspaceDailyFiles(memoryDir); + if (sourceFiles.length === 0) { + const dreamDiary = await readDreamDiary(workspaceDir); + const payload: DoctorMemoryDreamDiaryActionPayload = { + agentId, + path: dreamDiary.path, + action: "backfill", + found: dreamDiary.found, + scannedFiles: 0, + written: 0, + replaced: 0, + }; + respond(true, payload, undefined); + return; + } const grounded = await previewGroundedRemMarkdown({ workspaceDir, inputPaths: sourceFiles,