mirror of
https://github.com/moltbot/moltbot.git
synced 2026-05-13 23:56:07 +00:00
Closes #78649. Adds opt-in inbound iMessage catchup that recovers messages landing in chat.db while the gateway is offline (crash, restart, mac sleep). Mirrors the design of the retired BlueBubbles catchup, adapted for the imsg JSON-RPC chats.list + messages.history fetch path. - Schema: new channels.imessage.catchup block with enabled / maxAgeMinutes (1..720) / perRunLimit (1..500) / firstRunLookbackMinutes (1..720) / maxFailureRetries (1..1000). Disabled by default — opt-in. - Cursor + replay loop (extensions/imessage/src/monitor/catchup.ts): per-account state under <openclawStateDir>/imessage/catchup/. Walks rows oldest-first, advances on success/give-up, holds at failed.rowid - 1 when a failure is below maxFailureRetries (cannot leapfrog held failures even when later rows in the same batch succeed). Watermark floor for parse-rejected rows. - Bridge (extensions/imessage/src/monitor/catchup-bridge.ts): live chats.list + per-chat messages.history fetch adapter; dispatch adapter routes through the live handleMessageNow path so allowlists / group policy / dedupe / echo cache behave identically on replayed and live messages. Watermark clamped to last dispatched rowid when the cap truncates. - Monitor wiring (extensions/imessage/src/monitor/monitor-provider.ts): catchup runs once between watch.subscribe and the live dispatch loop when enabled. Bypasses the inbound debouncer for serial per-row dispatch. - Echo-cache TTL bumped 2 min → 12 h so own outbound rows from before a gap are not re-fed as inbound on replay. - Generated bundled-channel-config-metadata.generated.ts so the runtime AJV schema accepts the new catchup block. - Docs: new "Catching up after gateway downtime" section + BlueBubbles migration parity update. Tests: 322/322 in extensions/imessage/, including 5 regression tests covering the cursor-leapfrog, parse-rejected stall, watermark vs held failure, and cap-truncation-cursor-floor edge cases that codex (gpt-5.4) and clawsweeper (gpt-5.5) found during review. Live-tested end-to-end against the running gateway: replayed=1 fetchedCount=1, agent reply observed, cursor persisted at the test row's exact rowid. Co-authored-by: Omar Shahine <10343873+omarshahine@users.noreply.github.com> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
459 lines
15 KiB
TypeScript
459 lines
15 KiB
TypeScript
import fs from "node:fs";
|
|
import os from "node:os";
|
|
import path from "node:path";
|
|
import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
|
import {
|
|
capFailureRetriesMap,
|
|
loadIMessageCatchupCursor,
|
|
performIMessageCatchup,
|
|
resolveCatchupConfig,
|
|
saveIMessageCatchupCursor,
|
|
type CatchupDispatchFn,
|
|
type CatchupFetchFn,
|
|
type IMessageCatchupRow,
|
|
} from "./catchup.js";
|
|
|
|
let tempStateDir: string;
|
|
let priorStateDir: string | undefined;
|
|
|
|
beforeAll(() => {
|
|
tempStateDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-imsg-catchup-"));
|
|
priorStateDir = process.env.OPENCLAW_STATE_DIR;
|
|
process.env.OPENCLAW_STATE_DIR = tempStateDir;
|
|
});
|
|
|
|
afterAll(() => {
|
|
if (priorStateDir === undefined) {
|
|
delete process.env.OPENCLAW_STATE_DIR;
|
|
} else {
|
|
process.env.OPENCLAW_STATE_DIR = priorStateDir;
|
|
}
|
|
fs.rmSync(tempStateDir, { recursive: true, force: true });
|
|
});
|
|
|
|
beforeEach(() => {
|
|
// Wipe per-account cursor state between tests so each test starts clean.
|
|
fs.rmSync(path.join(tempStateDir, "imessage", "catchup"), { recursive: true, force: true });
|
|
});
|
|
|
|
describe("resolveCatchupConfig", () => {
|
|
it("falls back to defaults when raw is undefined", () => {
|
|
const cfg = resolveCatchupConfig(undefined);
|
|
expect(cfg.enabled).toBe(false);
|
|
expect(cfg.maxAgeMinutes).toBe(120);
|
|
expect(cfg.perRunLimit).toBe(50);
|
|
expect(cfg.firstRunLookbackMinutes).toBe(30);
|
|
expect(cfg.maxFailureRetries).toBe(10);
|
|
});
|
|
|
|
it("clamps over-limit input to the documented ceiling", () => {
|
|
const cfg = resolveCatchupConfig({
|
|
enabled: true,
|
|
maxAgeMinutes: 99_999,
|
|
perRunLimit: 10_000,
|
|
maxFailureRetries: 50_000,
|
|
});
|
|
expect(cfg.enabled).toBe(true);
|
|
expect(cfg.maxAgeMinutes).toBe(720);
|
|
expect(cfg.perRunLimit).toBe(500);
|
|
expect(cfg.maxFailureRetries).toBe(1000);
|
|
});
|
|
|
|
it("clamps zero / negative input to 1", () => {
|
|
const cfg = resolveCatchupConfig({
|
|
maxAgeMinutes: 0,
|
|
perRunLimit: -10,
|
|
firstRunLookbackMinutes: -1,
|
|
maxFailureRetries: 0,
|
|
});
|
|
expect(cfg.maxAgeMinutes).toBe(1);
|
|
expect(cfg.perRunLimit).toBe(1);
|
|
expect(cfg.firstRunLookbackMinutes).toBe(1);
|
|
expect(cfg.maxFailureRetries).toBe(1);
|
|
});
|
|
});
|
|
|
|
describe("loadIMessageCatchupCursor / saveIMessageCatchupCursor", () => {
|
|
it("returns null when no cursor exists", async () => {
|
|
const cursor = await loadIMessageCatchupCursor("primary");
|
|
expect(cursor).toBeNull();
|
|
});
|
|
|
|
it("round-trips a cursor without failureRetries", async () => {
|
|
await saveIMessageCatchupCursor("primary", {
|
|
lastSeenMs: 1_700_000_000_000,
|
|
lastSeenRowid: 42,
|
|
});
|
|
const cursor = await loadIMessageCatchupCursor("primary");
|
|
expect(cursor).not.toBeNull();
|
|
expect(cursor?.lastSeenMs).toBe(1_700_000_000_000);
|
|
expect(cursor?.lastSeenRowid).toBe(42);
|
|
expect(cursor?.failureRetries).toBeUndefined();
|
|
});
|
|
|
|
it("round-trips a cursor with failureRetries", async () => {
|
|
await saveIMessageCatchupCursor("primary", {
|
|
lastSeenMs: 1_700_000_000_000,
|
|
lastSeenRowid: 42,
|
|
failureRetries: { "GUID-A": 3 },
|
|
});
|
|
const cursor = await loadIMessageCatchupCursor("primary");
|
|
expect(cursor?.failureRetries).toEqual({ "GUID-A": 3 });
|
|
});
|
|
|
|
it("drops malformed failureRetries entries on load", async () => {
|
|
await saveIMessageCatchupCursor("primary", {
|
|
lastSeenMs: 1_700_000_000_000,
|
|
lastSeenRowid: 42,
|
|
failureRetries: {
|
|
"GUID-A": 3,
|
|
"GUID-B": -1,
|
|
"GUID-C": Number.NaN,
|
|
} as Record<string, number>,
|
|
});
|
|
const cursor = await loadIMessageCatchupCursor("primary");
|
|
expect(cursor?.failureRetries).toEqual({ "GUID-A": 3 });
|
|
});
|
|
|
|
it("isolates state per accountId", async () => {
|
|
await saveIMessageCatchupCursor("a", { lastSeenMs: 100, lastSeenRowid: 1 });
|
|
await saveIMessageCatchupCursor("b", { lastSeenMs: 200, lastSeenRowid: 2 });
|
|
expect((await loadIMessageCatchupCursor("a"))?.lastSeenRowid).toBe(1);
|
|
expect((await loadIMessageCatchupCursor("b"))?.lastSeenRowid).toBe(2);
|
|
});
|
|
});
|
|
|
|
describe("capFailureRetriesMap", () => {
|
|
it("is identity below the cap", () => {
|
|
const map = { a: 1, b: 2 };
|
|
expect(capFailureRetriesMap(map, 10)).toEqual({ a: 1, b: 2 });
|
|
});
|
|
|
|
it("keeps the highest counts when over the cap", () => {
|
|
const map = { a: 1, b: 9, c: 5, d: 9 };
|
|
const capped = capFailureRetriesMap(map, 2);
|
|
// Both b and d at 9; tiebreak by guid string (alphabetical) → b, d
|
|
expect(Object.keys(capped).toSorted()).toEqual(["b", "d"]);
|
|
});
|
|
});
|
|
|
|
describe("performIMessageCatchup", () => {
|
|
const config = resolveCatchupConfig({ enabled: true });
|
|
const now = 1_700_001_000_000; // arbitrary fixed clock
|
|
|
|
function row(overrides: Partial<IMessageCatchupRow>): IMessageCatchupRow {
|
|
return {
|
|
guid: "GUID-X",
|
|
rowid: 1,
|
|
date: now - 60_000,
|
|
isFromMe: false,
|
|
...overrides,
|
|
};
|
|
}
|
|
|
|
function fetchOf(rows: IMessageCatchupRow[]): CatchupFetchFn {
|
|
return vi.fn(async () => ({ resolved: true, rows }));
|
|
}
|
|
|
|
function alwaysOk(): CatchupDispatchFn {
|
|
return vi.fn(async () => ({ ok: true }));
|
|
}
|
|
|
|
it("replays every fresh inbound row through dispatch and advances the cursor", async () => {
|
|
const dispatch = alwaysOk();
|
|
const fetch = fetchOf([
|
|
row({ guid: "A", rowid: 10, date: now - 30_000 }),
|
|
row({ guid: "B", rowid: 11, date: now - 20_000 }),
|
|
]);
|
|
|
|
const summary = await performIMessageCatchup({
|
|
accountId: "primary",
|
|
config,
|
|
now,
|
|
fetch,
|
|
dispatch,
|
|
});
|
|
|
|
expect(summary.querySucceeded).toBe(true);
|
|
expect(summary.replayed).toBe(2);
|
|
expect(summary.failed).toBe(0);
|
|
expect(summary.cursorAfter.lastSeenRowid).toBe(11);
|
|
expect(dispatch).toHaveBeenCalledTimes(2);
|
|
|
|
const cursor = await loadIMessageCatchupCursor("primary");
|
|
expect(cursor?.lastSeenRowid).toBe(11);
|
|
});
|
|
|
|
it("skips is_from_me rows but still advances the cursor past them", async () => {
|
|
const dispatch = alwaysOk();
|
|
const fetch = fetchOf([
|
|
row({ guid: "A", rowid: 10, isFromMe: true }),
|
|
row({ guid: "B", rowid: 11, isFromMe: false }),
|
|
]);
|
|
|
|
const summary = await performIMessageCatchup({
|
|
accountId: "primary",
|
|
config,
|
|
now,
|
|
fetch,
|
|
dispatch,
|
|
});
|
|
|
|
expect(summary.skippedFromMe).toBe(1);
|
|
expect(summary.replayed).toBe(1);
|
|
expect(summary.cursorAfter.lastSeenRowid).toBe(11);
|
|
expect(dispatch).toHaveBeenCalledTimes(1);
|
|
});
|
|
|
|
it("drops rows older than the maxAgeMinutes ceiling and advances past them", async () => {
|
|
const tightConfig = resolveCatchupConfig({ enabled: true, maxAgeMinutes: 1 });
|
|
const dispatch = alwaysOk();
|
|
const fetch = fetchOf([
|
|
row({ guid: "OLD", rowid: 10, date: now - 10 * 60_000 }), // 10 min old, > 1 min ceiling
|
|
row({ guid: "NEW", rowid: 11, date: now - 30_000 }),
|
|
]);
|
|
|
|
const summary = await performIMessageCatchup({
|
|
accountId: "primary",
|
|
config: tightConfig,
|
|
now,
|
|
fetch,
|
|
dispatch,
|
|
});
|
|
|
|
expect(summary.skippedPreCursor).toBe(1);
|
|
expect(summary.replayed).toBe(1);
|
|
expect(summary.cursorAfter.lastSeenRowid).toBe(11);
|
|
});
|
|
|
|
it("holds the cursor on the failing row while count < maxFailureRetries", async () => {
|
|
const dispatch = vi.fn<CatchupDispatchFn>(async () => ({ ok: false }));
|
|
const fetch = fetchOf([row({ guid: "A", rowid: 10 })]);
|
|
|
|
const summary = await performIMessageCatchup({
|
|
accountId: "primary",
|
|
config,
|
|
now,
|
|
fetch,
|
|
dispatch,
|
|
});
|
|
|
|
expect(summary.failed).toBe(1);
|
|
expect(summary.givenUp).toBe(0);
|
|
// Cursor clamps to `failed.rowid - 1` (== 9), strictly below the held
|
|
// failure, so the next pass refetches row 10 — and never leapfrogs it.
|
|
expect(summary.cursorAfter.lastSeenRowid).toBe(9);
|
|
|
|
const cursor = await loadIMessageCatchupCursor("primary");
|
|
expect(cursor?.failureRetries?.A).toBe(1);
|
|
expect(cursor?.lastSeenRowid).toBe(9);
|
|
});
|
|
|
|
it("crosses the maxFailureRetries ceiling, gives up, and advances past the wedged row", async () => {
|
|
const tightConfig = resolveCatchupConfig({ enabled: true, maxFailureRetries: 2 });
|
|
const dispatch = vi.fn<CatchupDispatchFn>(async () => ({ ok: false }));
|
|
const fetch = fetchOf([row({ guid: "A", rowid: 10 })]);
|
|
|
|
// First pass: count goes 0 → 1, cursor held below the failed row.
|
|
// The clamp is `failed.rowid - 1` (== 9), not the prior cursor (0), so
|
|
// the next pass refetches row 10 without re-walking older history.
|
|
await performIMessageCatchup({
|
|
accountId: "primary",
|
|
config: tightConfig,
|
|
now,
|
|
fetch,
|
|
dispatch,
|
|
});
|
|
expect((await loadIMessageCatchupCursor("primary"))?.lastSeenRowid).toBe(9);
|
|
|
|
// Second pass: count goes 1 → 2 (== ceiling), give up, cursor advances.
|
|
const fetch2 = fetchOf([row({ guid: "A", rowid: 10 })]);
|
|
const summary = await performIMessageCatchup({
|
|
accountId: "primary",
|
|
config: tightConfig,
|
|
now,
|
|
fetch: fetch2,
|
|
dispatch,
|
|
});
|
|
|
|
expect(summary.givenUp).toBe(1);
|
|
expect(summary.cursorAfter.lastSeenRowid).toBe(10);
|
|
|
|
const cursor = await loadIMessageCatchupCursor("primary");
|
|
expect(cursor?.failureRetries?.A).toBe(2);
|
|
});
|
|
|
|
it("treats already-given-up rows as skippedGivenUp without dispatching", async () => {
|
|
await saveIMessageCatchupCursor("primary", {
|
|
lastSeenMs: now - 60_000,
|
|
lastSeenRowid: 0,
|
|
failureRetries: { "WEDGED-1": 99 },
|
|
});
|
|
|
|
const dispatch = alwaysOk();
|
|
const fetch = fetchOf([row({ guid: "WEDGED-1", rowid: 5 }), row({ guid: "FRESH", rowid: 6 })]);
|
|
|
|
const summary = await performIMessageCatchup({
|
|
accountId: "primary",
|
|
config,
|
|
now,
|
|
fetch,
|
|
dispatch,
|
|
});
|
|
|
|
expect(summary.skippedGivenUp).toBe(1);
|
|
expect(summary.replayed).toBe(1);
|
|
expect(dispatch).toHaveBeenCalledTimes(1);
|
|
});
|
|
|
|
it("removes a guid from the retry map after a successful dispatch", async () => {
|
|
await saveIMessageCatchupCursor("primary", {
|
|
lastSeenMs: now - 60_000,
|
|
lastSeenRowid: 0,
|
|
failureRetries: { RETRYING: 1 },
|
|
});
|
|
|
|
const dispatch = alwaysOk();
|
|
const fetch = fetchOf([row({ guid: "RETRYING", rowid: 5 })]);
|
|
|
|
await performIMessageCatchup({
|
|
accountId: "primary",
|
|
config,
|
|
now,
|
|
fetch,
|
|
dispatch,
|
|
});
|
|
|
|
const cursor = await loadIMessageCatchupCursor("primary");
|
|
expect(cursor?.failureRetries).toBeUndefined();
|
|
});
|
|
|
|
it("does NOT leapfrog a held failure when a later row in the same batch succeeds", async () => {
|
|
// Regression for #78649 cursor-leapfrog bug. Prior to the fix the loop
|
|
// advanced lastSeenRowid on every successful row, so a held failure at
|
|
// rowid 10 followed by a success at rowid 11 would persist the cursor
|
|
// at 11 — and the next pass would filter row 10 out via `row.rowid <=
|
|
// sinceRowid` and never retry it. With the fix in place the cursor is
|
|
// clamped to `earliestHeldFailureRow.rowid - 1` (== 9) so the next pass
|
|
// refetches row 10.
|
|
let dispatchCount = 0;
|
|
const dispatch = vi.fn<CatchupDispatchFn>(async (row) => {
|
|
dispatchCount += 1;
|
|
if (row.guid === "A") {
|
|
return { ok: false };
|
|
}
|
|
return { ok: true };
|
|
});
|
|
const fetch = fetchOf([
|
|
row({ guid: "A", rowid: 10, date: now - 40_000 }),
|
|
row({ guid: "B", rowid: 11, date: now - 30_000 }),
|
|
]);
|
|
|
|
const summary = await performIMessageCatchup({
|
|
accountId: "primary",
|
|
config,
|
|
now,
|
|
fetch,
|
|
dispatch,
|
|
});
|
|
|
|
expect(summary.failed).toBe(1);
|
|
expect(summary.replayed).toBe(1);
|
|
expect(summary.givenUp).toBe(0);
|
|
// Cursor must not leapfrog the held failure at rowid 10. The persisted
|
|
// cursor lands at rowid 9 so the next pass refetches row 10.
|
|
expect(summary.cursorAfter.lastSeenRowid).toBe(9);
|
|
expect(dispatchCount).toBe(2);
|
|
|
|
const cursor = await loadIMessageCatchupCursor("primary");
|
|
expect(cursor?.lastSeenRowid).toBe(9);
|
|
expect(cursor?.failureRetries?.A).toBe(1);
|
|
});
|
|
|
|
it("advances the cursor past parser-rejected rows via the fetch high-watermark", async () => {
|
|
// Regression: without a high-watermark from the fetcher, an unparseable
|
|
// row never reaches the loop, so the cursor never advances past it and
|
|
// the next pass re-fetches and re-drops the same broken row forever.
|
|
// The bridge probes raw `id` / `created_at` per row and emits a
|
|
// `highWatermarkRowid` / `highWatermarkMs` floor so the loop can advance
|
|
// the cursor even when every fetched row fails the payload parser.
|
|
const dispatch = vi.fn<CatchupDispatchFn>(async () => ({ ok: true }));
|
|
const fetch: CatchupFetchFn = vi.fn(async () => ({
|
|
resolved: true,
|
|
rows: [],
|
|
highWatermarkRowid: 42,
|
|
highWatermarkMs: now - 5_000,
|
|
}));
|
|
|
|
const summary = await performIMessageCatchup({
|
|
accountId: "primary",
|
|
config,
|
|
now,
|
|
fetch,
|
|
dispatch,
|
|
});
|
|
|
|
expect(summary.querySucceeded).toBe(true);
|
|
expect(summary.replayed).toBe(0);
|
|
expect(summary.fetchedCount).toBe(0);
|
|
expect(summary.cursorAfter.lastSeenRowid).toBe(42);
|
|
expect(dispatch).not.toHaveBeenCalled();
|
|
|
|
const cursor = await loadIMessageCatchupCursor("primary");
|
|
expect(cursor?.lastSeenRowid).toBe(42);
|
|
expect(cursor?.lastSeenMs).toBe(now - 5_000);
|
|
});
|
|
|
|
it("does not let the high-watermark leapfrog a held failure", async () => {
|
|
// The fetcher's watermark is a floor for cursor advance, but a held
|
|
// failure must still clamp the cursor below the failed row even when
|
|
// the fetcher reports a higher watermark.
|
|
const dispatch = vi.fn<CatchupDispatchFn>(async () => ({ ok: false }));
|
|
const fetch: CatchupFetchFn = vi.fn(async () => ({
|
|
resolved: true,
|
|
rows: [row({ guid: "A", rowid: 10, date: now - 40_000 })],
|
|
highWatermarkRowid: 99,
|
|
highWatermarkMs: now - 100,
|
|
}));
|
|
|
|
const summary = await performIMessageCatchup({
|
|
accountId: "primary",
|
|
config,
|
|
now,
|
|
fetch,
|
|
dispatch,
|
|
});
|
|
|
|
expect(summary.failed).toBe(1);
|
|
// Even though the fetcher reports watermark=99, the held failure at
|
|
// rowid 10 clamps the cursor at 9.
|
|
expect(summary.cursorAfter.lastSeenRowid).toBe(9);
|
|
});
|
|
|
|
it("returns querySucceeded=false and preserves the cursor on fetch failure", async () => {
|
|
await saveIMessageCatchupCursor("primary", { lastSeenMs: now - 60_000, lastSeenRowid: 7 });
|
|
const dispatch = alwaysOk();
|
|
const fetch = vi.fn<CatchupFetchFn>(async () => {
|
|
throw new Error("imsg rpc closed");
|
|
});
|
|
const warn = vi.fn();
|
|
|
|
const summary = await performIMessageCatchup({
|
|
accountId: "primary",
|
|
config,
|
|
now,
|
|
fetch,
|
|
dispatch,
|
|
warn,
|
|
});
|
|
|
|
expect(summary.querySucceeded).toBe(false);
|
|
expect(summary.replayed).toBe(0);
|
|
expect(dispatch).not.toHaveBeenCalled();
|
|
expect(warn).toHaveBeenCalledWith(expect.stringMatching(/fetch failed/));
|
|
|
|
const cursor = await loadIMessageCatchupCursor("primary");
|
|
expect(cursor?.lastSeenRowid).toBe(7);
|
|
});
|
|
});
|