diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 07419cd52c4..4b94b56fba3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -593,6 +593,7 @@ jobs: RUN_CHANNELS: ${{ needs.preflight.outputs.run_checks }} RUN_CORE_SUPPORT_BOUNDARY: ${{ needs.preflight.outputs.run_checks_node_core_dist }} RUN_GATEWAY_WATCH: ${{ needs.preflight.outputs.run_check_additional }} + OPENCLAW_RUN_PROMPT_SNAPSHOTS: ${{ needs.preflight.outputs.run_prompt_snapshots }} shell: bash run: | set -uo pipefail diff --git a/docs/ci.md b/docs/ci.md index d46eee95643..6a8eb9b4a28 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -56,7 +56,7 @@ Scope logic lives in `scripts/ci-changed-scope.mjs` and is covered by unit tests - **CI routing-only edits, selected cheap core-test fixture edits, and narrow plugin contract helper/test-routing edits** use a fast Node-only manifest path: `preflight`, security, and a single `checks-fast-core` task. That path skips build artifacts, Node 22 compatibility, channel contracts, full core shards, bundled-plugin shards, and additional guard matrices when the change is limited to the routing or helper surfaces the fast task exercises directly. - **Windows Node checks** are scoped to Windows-specific process/path wrappers, npm/pnpm/UI runner helpers, package manager config, and the CI workflow surfaces that execute that lane; unrelated source, plugin, install-smoke, and test-only changes stay on the Linux Node lanes. -The slowest Node test families are split or balanced so each job stays small without over-reserving runners: channel contracts run as three weighted shards, core unit fast/support lanes run separately, core runtime infra is split between state and process/config shards, auto-reply runs as balanced workers (with the reply subtree split into agent-runner, dispatch, and commands/state-routing shards), and agentic gateway/server configs are split across chat/auth/model/http-plugin/runtime/startup lanes instead of waiting on built artifacts. Broad browser, QA, media, and miscellaneous plugin tests use their dedicated Vitest configs instead of the shared plugin catch-all. Include-pattern shards record timing entries using the CI shard name, so `.artifacts/vitest-shard-timings.json` can distinguish a whole config from a filtered shard. `check-additional` keeps package-boundary compile/canary work together and separates runtime topology architecture from gateway watch coverage; the boundary guard list is striped across four matrix shards, each running selected independent guards concurrently and printing per-check timings. The expensive Codex happy-path prompt snapshot drift check runs for manual CI and for prompt-affecting changes only, so normal unrelated Node changes do not wait behind cold prompt snapshot generation while prompt drift is still pinned to the PR that caused it. Gateway watch, channel tests, and the core support-boundary shard run concurrently inside `build-artifacts` after `dist/` and `dist-runtime/` are already built. +The slowest Node test families are split or balanced so each job stays small without over-reserving runners: channel contracts run as three weighted shards, core unit fast/support lanes run separately, core runtime infra is split between state and process/config shards, auto-reply runs as balanced workers (with the reply subtree split into agent-runner, dispatch, and commands/state-routing shards), and agentic gateway/server configs are split across chat/auth/model/http-plugin/runtime/startup lanes instead of waiting on built artifacts. Broad browser, QA, media, and miscellaneous plugin tests use their dedicated Vitest configs instead of the shared plugin catch-all. Include-pattern shards record timing entries using the CI shard name, so `.artifacts/vitest-shard-timings.json` can distinguish a whole config from a filtered shard. `check-additional` keeps package-boundary compile/canary work together and separates runtime topology architecture from gateway watch coverage; the boundary guard list is striped across four matrix shards, each running selected independent guards concurrently and printing per-check timings. The expensive Codex happy-path prompt snapshot drift check runs for manual CI and for prompt-affecting changes only, so normal unrelated Node changes do not wait behind cold prompt snapshot generation while prompt drift is still pinned to the PR that caused it; the same flag skips prompt snapshot Vitest generation inside the built-artifact core support-boundary shard. Gateway watch, channel tests, and the core support-boundary shard run concurrently inside `build-artifacts` after `dist/` and `dist-runtime/` are already built. Android CI runs both `testPlayDebugUnitTest` and `testThirdPartyDebugUnitTest` and then builds the Play debug APK. The third-party flavor has no separate source set or manifest; its unit-test lane still compiles the flavor with the SMS/call-log BuildConfig flags, while avoiding a duplicate debug APK packaging job on every Android-relevant push. diff --git a/test/scripts/prompt-snapshots.test.ts b/test/scripts/prompt-snapshots.test.ts index ceae8720e0a..d1a48e3be09 100644 --- a/test/scripts/prompt-snapshots.test.ts +++ b/test/scripts/prompt-snapshots.test.ts @@ -2,20 +2,22 @@ import fs from "node:fs"; import os from "node:os"; import path from "node:path"; import { describe, expect, it } from "vitest"; -import { - createFormattedPromptSnapshotFiles, - deleteStalePromptSnapshotFiles, -} from "../../scripts/generate-prompt-snapshots.js"; import { defaultCatalogPathCandidates, findDefaultCatalogPath, renderCodexModelInstructions, runCodexModelPromptFixtureSync, } from "../../scripts/sync-codex-model-prompt-fixture.js"; -import { - CODEX_MODEL_PROMPT_FIXTURE_DIR, - CODEX_RUNTIME_HAPPY_PATH_PROMPT_SNAPSHOT_DIR, -} from "../helpers/agents/happy-path-prompt-snapshots.js"; + +const promptSnapshotsEnabled = process.env.OPENCLAW_RUN_PROMPT_SNAPSHOTS !== "false"; +const promptSnapshotIt = promptSnapshotsEnabled ? it : it.skip; +const CODEX_RUNTIME_HAPPY_PATH_PROMPT_SNAPSHOT_DIR = + "test/fixtures/agents/prompt-snapshots/codex-runtime-happy-path"; +const CODEX_MODEL_PROMPT_FIXTURE_DIR = "test/fixtures/agents/prompt-snapshots/codex-model-catalog"; + +async function loadPromptSnapshotGenerator() { + return await import("../../scripts/generate-prompt-snapshots.js"); +} function requireGeneratedSnapshot( generated: Array<{ path: string; content: string }>, @@ -38,7 +40,8 @@ function renderedPromptSection(content: string, heading: string, nextHeading: st } describe("happy path prompt snapshots", () => { - it("matches the committed Codex prompt snapshot artifacts", async () => { + promptSnapshotIt("matches the committed Codex prompt snapshot artifacts", async () => { + const { createFormattedPromptSnapshotFiles } = await loadPromptSnapshotGenerator(); const generated = await createFormattedPromptSnapshotFiles(); const expectedPaths = new Set(generated.map((file) => file.path)); for (const file of generated) { @@ -51,7 +54,8 @@ describe("happy path prompt snapshots", () => { expect(committed.toSorted()).toEqual([...expectedPaths].toSorted()); }); - it("deletes stale generated snapshot artifacts", async () => { + promptSnapshotIt("deletes stale generated snapshot artifacts", async () => { + const { deleteStalePromptSnapshotFiles } = await loadPromptSnapshotGenerator(); const root = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-prompt-snapshot-stale-")); try { const snapshotDir = path.join(root, CODEX_RUNTIME_HAPPY_PATH_PROMPT_SNAPSHOT_DIR); @@ -73,7 +77,8 @@ describe("happy path prompt snapshots", () => { } }); - it("renders the Codex model-bound prompt layers", async () => { + promptSnapshotIt("renders the Codex model-bound prompt layers", async () => { + const { createFormattedPromptSnapshotFiles } = await loadPromptSnapshotGenerator(); const generated = await createFormattedPromptSnapshotFiles(); const telegram = requireGeneratedSnapshot(generated, "telegram-direct-codex-message-tool.md"); @@ -94,7 +99,8 @@ describe("happy path prompt snapshots", () => { expect(telegram).toContain("### Tools: Dynamic Tool Catalog"); }); - it("keeps heartbeat guidance in heartbeat collaboration mode only", async () => { + promptSnapshotIt("keeps heartbeat guidance in heartbeat collaboration mode only", async () => { + const { createFormattedPromptSnapshotFiles } = await loadPromptSnapshotGenerator(); const generated = await createFormattedPromptSnapshotFiles(); const direct = requireGeneratedSnapshot(generated, "telegram-direct-codex-message-tool.md"); const group = requireGeneratedSnapshot(generated, "discord-group-codex-message-tool.md");