test(cli): add response-time contract for CLI surfaces

2026-05-13 15:47:28 +00:00 · 2026-05-10 23:05:55 +08:00
parent b5633698e2
commit 97283f0a2e
5 changed files with 526 additions and 101 deletions
--- a/package.json
+++ b/package.json
@@ -1667,6 +1667,7 @@
    "test:sectriage": "OPENCLAW_GATEWAY_PROJECT_SHARDS=1 node scripts/run-vitest.mjs run --config test/vitest/vitest.gateway.config.ts && node scripts/run-vitest.mjs run --config test/vitest/vitest.unit.config.ts --exclude src/daemon/launchd.integration.test.ts --exclude src/process/exec.test.ts",
    "test:serial": "OPENCLAW_TEST_PROJECTS_SERIAL=1 OPENCLAW_VITEST_MAX_WORKERS=1 node scripts/test-projects.mjs",
    "test:stability:gateway": "OPENCLAW_VITEST_MAX_WORKERS=1 node scripts/run-vitest.mjs run --config test/vitest/vitest.gateway.config.ts src/gateway/gateway-stability.test.ts && OPENCLAW_VITEST_MAX_WORKERS=1 node scripts/run-vitest.mjs run --config test/vitest/vitest.logging.config.ts src/logging/diagnostic-stability-bundle.test.ts && OPENCLAW_VITEST_MAX_WORKERS=1 node scripts/run-vitest.mjs run --config test/vitest/vitest.infra.config.ts src/infra/fatal-error-hooks.test.ts",
+    "test:cli-response:contract": "node scripts/build-all.mjs cliStartup && node scripts/test-cli-startup-bench-budget.mjs --preset response --runs 1 --warmup 0 --timeout-ms 10000 --skip-baseline",
    "test:startup:bench": "node --import tsx scripts/bench-cli-startup.ts",
    "test:startup:bench:check": "node scripts/test-cli-startup-bench-budget.mjs",
    "test:startup:bench:save": "node --import tsx scripts/bench-cli-startup.ts --preset all --runs 5 --warmup 1 --output .artifacts/cli-startup-bench-all.json",
--- a/scripts/bench-cli-startup.ts
+++ b/scripts/bench-cli-startup.ts
@@ -1,4 +1,4 @@
-import { spawnSync } from "node:child_process";
+import { spawn } from "node:child_process";
 import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
 import os from "node:os";
 import path from "node:path";
@@ -8,10 +8,13 @@ type CommandCase = {
  name: string;
  args: string[];
  presets: readonly string[];
+  firstOutputBudgetMs?: number;
+  exitBudgetMs?: number;
 };

 type Sample = {
  ms: number;
+  firstOutputMs: number | null;
  maxRssMb: number | null;
  exitCode: number | null;
  signal: string | null;
@@ -30,6 +33,7 @@ type SummaryStats = {
 type CaseSummary = {
  sampleCount: number;
  durationMs: SummaryStats;
+  firstOutputMs: SummaryStats | null;
  maxRssMb: SummaryStats | null;
  exitSummary: string;
 };
@@ -40,6 +44,10 @@ type SuiteResult = {
    id: string;
    name: string;
    args: string[];
+    contract: {
+      firstOutputBudgetMs: number | null;
+      exitBudgetMs: number | null;
+    } | null;
    samples: Sample[];
    summary: CaseSummary;
  }>;
@@ -65,8 +73,198 @@ const DEFAULT_ENTRY = "openclaw.mjs";
 const MAX_RSS_MARKER = "__OPENCLAW_MAX_RSS_KB__=";

 const COMMAND_CASES: readonly CommandCase[] = [
-  { id: "version", name: "--version", args: ["--version"], presets: ["startup"] },
-  { id: "help", name: "--help", args: ["--help"], presets: ["startup"] },
+  {
+    id: "version",
+    name: "--version",
+    args: ["--version"],
+    presets: ["startup", "response"],
+    firstOutputBudgetMs: 1_000,
+    exitBudgetMs: 2_000,
+  },
+  {
+    id: "help",
+    name: "--help",
+    args: ["--help"],
+    presets: ["startup", "response"],
+    firstOutputBudgetMs: 1_000,
+    exitBudgetMs: 2_000,
+  },
+  {
+    id: "onboardHelp",
+    name: "onboard --help",
+    args: ["onboard", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
+  {
+    id: "setupHelp",
+    name: "setup --help",
+    args: ["setup", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
+  {
+    id: "configureHelp",
+    name: "configure --help",
+    args: ["configure", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
+  {
+    id: "channelsAddHelp",
+    name: "channels add --help",
+    args: ["channels", "add", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
+  {
+    id: "doctorHelp",
+    name: "doctor --help",
+    args: ["doctor", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
+  {
+    id: "modelsHelp",
+    name: "models --help",
+    args: ["models", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
+  {
+    id: "pluginsHelp",
+    name: "plugins --help",
+    args: ["plugins", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
+  {
+    id: "gatewayHelp",
+    name: "gateway --help",
+    args: ["gateway", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
+  {
+    id: "agentsHelp",
+    name: "agents --help",
+    args: ["agents", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 3_500,
+    exitBudgetMs: 8_000,
+  },
+  {
+    id: "sessionsHelp",
+    name: "sessions --help",
+    args: ["sessions", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
+  {
+    id: "tasksHelp",
+    name: "tasks --help",
+    args: ["tasks", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
+  {
+    id: "messageHelp",
+    name: "message --help",
+    args: ["message", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
+  {
+    id: "pairingHelp",
+    name: "pairing --help",
+    args: ["pairing", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
+  {
+    id: "authHelp",
+    name: "auth --help",
+    args: ["auth", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
+  {
+    id: "configHelp",
+    name: "config --help",
+    args: ["config", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
+  {
+    id: "secretsHelp",
+    name: "secrets --help",
+    args: ["secrets", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
+  {
+    id: "skillsHelp",
+    name: "skills --help",
+    args: ["skills", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
+  {
+    id: "nodesHelp",
+    name: "nodes --help",
+    args: ["nodes", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 3_500,
+    exitBudgetMs: 8_000,
+  },
+  {
+    id: "directoryHelp",
+    name: "directory --help",
+    args: ["directory", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
+  {
+    id: "sandboxHelp",
+    name: "sandbox --help",
+    args: ["sandbox", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
+  {
+    id: "browserHelp",
+    name: "browser --help",
+    args: ["browser", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 1_500,
+    exitBudgetMs: 3_000,
+  },
+  {
+    id: "webhooksHelp",
+    name: "webhooks --help",
+    args: ["webhooks", "--help"],
+    presets: ["response"],
+    firstOutputBudgetMs: 2_500,
+    exitBudgetMs: 6_000,
+  },
  { id: "health", name: "health", args: ["health"], presets: ["startup", "real"] },
  { id: "healthJson", name: "health --json", args: ["health", "--json"], presets: ["startup"] },
  {
@@ -175,7 +373,7 @@ function parsePresets(raw: string | undefined): string[] {
    .map((value) => value.trim())
    .filter(Boolean);
  if (values.includes("all")) {
-    return ["startup", "real"];
+    return ["startup", "real", "response"];
  }
  return values.length > 0 ? values : ["startup"];
 }
@@ -233,12 +431,16 @@ function summarizeNumbers(values: number[]): SummaryStats {

 function summarizeSamples(samples: Sample[]): CaseSummary {
  const durations = summarizeNumbers(samples.map((sample) => sample.ms));
+  const firstOutputValues = samples
+    .map((sample) => sample.firstOutputMs)
+    .filter((value): value is number => typeof value === "number" && Number.isFinite(value));
  const rssValues = samples
    .map((sample) => sample.maxRssMb)
    .filter((value): value is number => typeof value === "number" && Number.isFinite(value));
  return {
    sampleCount: samples.length,
    durationMs: durations,
+    firstOutputMs: firstOutputValues.length > 0 ? summarizeNumbers(firstOutputValues) : null,
    maxRssMb: rssValues.length > 0 ? summarizeNumbers(rssValues) : null,
    exitSummary: collectExitSummary(samples),
  };
@@ -300,7 +502,135 @@ function buildCpuOrHeapFlags(options: { cpuProfDir?: string; heapProfDir?: strin
  return flags;
 }

-function runCase(params: {
+function appendLimited(current: string, chunk: Buffer | string, maxLength: number): string {
+  const next = current + String(chunk);
+  return next.length > maxLength ? next.slice(next.length - maxLength) : next;
+}
+
+async function runSample(params: {
+  entry: string;
+  commandCase: CommandCase;
+  timeoutMs: number;
+  cpuProfDir?: string;
+  heapProfDir?: string;
+  rssHookPath: string;
+}): Promise<Sample> {
+  const runRoot = mkdtempSync(path.join(os.tmpdir(), "openclaw-cli-bench-home-"));
+  const stateDir = path.join(runRoot, ".openclaw");
+  const configPath = path.join(stateDir, "openclaw.json");
+  const nodeArgs = [
+    "--import",
+    params.rssHookPath,
+    ...buildCpuOrHeapFlags({
+      cpuProfDir: params.cpuProfDir,
+      heapProfDir: params.heapProfDir,
+    }),
+    params.entry,
+    ...params.commandCase.args,
+  ];
+  const started = process.hrtime.bigint();
+  let firstOutputMs: number | null = null;
+  let stdout = "";
+  let stderr = "";
+  let settled = false;
+  const maxOutputLength = 32 * 1024 * 1024;
+
+  try {
+    return await new Promise<Sample>((resolve) => {
+      const proc = spawn(process.execPath, nodeArgs, {
+        cwd: process.cwd(),
+        env: {
+          ...process.env,
+          HOME: runRoot,
+          USERPROFILE: runRoot,
+          OPENCLAW_HOME: runRoot,
+          OPENCLAW_STATE_DIR: stateDir,
+          OPENCLAW_CONFIG_PATH: configPath,
+          OPENCLAW_HIDE_BANNER: "1",
+          NO_COLOR: "1",
+          FORCE_COLOR: "0",
+        },
+        stdio: ["ignore", "pipe", "pipe"],
+      });
+
+      const finish = (sample: Omit<Sample, "ms" | "firstOutputMs" | "maxRssMb">) => {
+        if (settled) {
+          return;
+        }
+        settled = true;
+        const ms = Number(process.hrtime.bigint() - started) / 1e6;
+        resolve({
+          ms,
+          firstOutputMs,
+          maxRssMb: parseMaxRssMb(stderr),
+          ...sample,
+        });
+      };
+
+      const markFirstOutput = () => {
+        if (firstOutputMs == null) {
+          firstOutputMs = Number(process.hrtime.bigint() - started) / 1e6;
+        }
+      };
+
+      const timeout = setTimeout(() => {
+        try {
+          proc.kill("SIGTERM");
+        } catch {
+          // Best-effort timeout cleanup.
+        }
+        setTimeout(() => {
+          try {
+            proc.kill("SIGKILL");
+          } catch {
+            // Best-effort timeout cleanup.
+          }
+        }, 1_000).unref?.();
+      }, params.timeoutMs);
+      timeout.unref?.();
+
+      proc.stdout?.on("data", (chunk) => {
+        markFirstOutput();
+        stdout = appendLimited(stdout, chunk, maxOutputLength);
+      });
+      proc.stderr?.on("data", (chunk) => {
+        markFirstOutput();
+        stderr = appendLimited(stderr, chunk, maxOutputLength);
+      });
+      proc.once("error", (error) => {
+        clearTimeout(timeout);
+        stderr = appendLimited(
+          stderr,
+          error instanceof Error ? error.message : String(error),
+          maxOutputLength,
+        );
+        finish({
+          exitCode: null,
+          signal: null,
+          stdoutTail: tailLines(stdout, 20),
+          stderrTail: tailLines(stderr, 20),
+        });
+      });
+      proc.once("close", (code, signal) => {
+        clearTimeout(timeout);
+        finish({
+          exitCode: code,
+          signal,
+          ...(code === 0 && signal == null
+            ? {}
+            : {
+                stdoutTail: tailLines(stdout, 20),
+                stderrTail: tailLines(stderr, 20),
+              }),
+        });
+      });
+    });
+  } finally {
+    rmSync(runRoot, { recursive: true, force: true });
+  }
+}
+
+async function runCase(params: {
  entry: string;
  commandCase: CommandCase;
  runs: number;
@@ -309,48 +639,15 @@ function runCase(params: {
  cpuProfDir?: string;
  heapProfDir?: string;
  rssHookPath: string;
-}): Sample[] {
+}): Promise<Sample[]> {
  const samples: Sample[] = [];
  const totalRuns = params.warmup + params.runs;
  for (let i = 0; i < totalRuns; i += 1) {
-    const nodeArgs = [
-      "--import",
-      params.rssHookPath,
-      ...buildCpuOrHeapFlags({
-        cpuProfDir: params.cpuProfDir,
-        heapProfDir: params.heapProfDir,
-      }),
-      params.entry,
-      ...params.commandCase.args,
-    ];
-    const started = process.hrtime.bigint();
-    const proc = spawnSync(process.execPath, nodeArgs, {
-      cwd: process.cwd(),
-      env: {
-        ...process.env,
-        OPENCLAW_HIDE_BANNER: "1",
-      },
-      stdio: ["ignore", "pipe", "pipe"],
-      encoding: "utf8",
-      timeout: params.timeoutMs,
-      maxBuffer: 32 * 1024 * 1024,
-    });
-    const ms = Number(process.hrtime.bigint() - started) / 1e6;
+    const sample = await runSample(params);
    if (i < params.warmup) {
      continue;
    }
-    samples.push({
-      ms,
-      maxRssMb: parseMaxRssMb(proc.stderr ?? ""),
-      exitCode: proc.status,
-      signal: proc.signal,
-      ...(proc.status === 0
-        ? {}
-        : {
-            stdoutTail: tailLines(proc.stdout ?? "", 20),
-            stderrTail: tailLines(proc.stderr ?? "", 20),
-          }),
-    });
+    samples.push(sample);
  }
  return samples;
 }
@@ -362,17 +659,23 @@ function tailLines(value: string, maxLines: number): string {
 function printSuite(result: SuiteResult): void {
  console.log(`Entry: ${result.entry}`);
  for (const commandCase of result.cases) {
-    const { durationMs, maxRssMb, exitSummary } = commandCase.summary;
+    const { durationMs, firstOutputMs, maxRssMb, exitSummary } = commandCase.summary;
    const rssSummary =
      maxRssMb == null
        ? "rss=n/a"
        : `rss(avg=${formatMb(maxRssMb.avg)} p50=${formatMb(maxRssMb.p50)} p95=${formatMb(maxRssMb.p95)})`;
+    const firstOutputSummary =
+      firstOutputMs == null
+        ? "first-output=n/a"
+        : `first-output(avg=${formatMs(firstOutputMs.avg)} p50=${formatMs(
+            firstOutputMs.p50,
+          )} p95=${formatMs(firstOutputMs.p95)})`;
    console.log(
      `${commandCase.name.padEnd(24)} avg=${formatMs(durationMs.avg)} p50=${formatMs(
        durationMs.p50,
      )} p95=${formatMs(durationMs.p95)} min=${formatMs(durationMs.min)} max=${formatMs(
        durationMs.max,
-      )} ${rssSummary} exits=[${exitSummary}]`,
+      )} ${firstOutputSummary} ${rssSummary} exits=[${exitSummary}]`,
    );
  }
  console.log("");
@@ -404,13 +707,14 @@ function printDelta(primary: SuiteResult, secondary: SuiteResult): void {
  }
 }

-function buildSuiteResult(params: {
+async function buildSuiteResult(params: {
  entry: string;
  options: CliOptions;
  rssHookPath: string;
-}): SuiteResult {
-  const cases = params.options.cases.map((commandCase) => {
-    const samples = runCase({
+}): Promise<SuiteResult> {
+  const cases = [];
+  for (const commandCase of params.options.cases) {
+    const samples = await runCase({
      entry: params.entry,
      commandCase,
      runs: params.options.runs,
@@ -420,14 +724,21 @@ function buildSuiteResult(params: {
      heapProfDir: params.options.heapProfDir,
      rssHookPath: params.rssHookPath,
    });
-    return {
+    cases.push({
      id: commandCase.id,
      name: commandCase.name,
      args: commandCase.args,
+      contract:
+        commandCase.firstOutputBudgetMs != null || commandCase.exitBudgetMs != null
+          ? {
+              firstOutputBudgetMs: commandCase.firstOutputBudgetMs ?? null,
+              exitBudgetMs: commandCase.exitBudgetMs ?? null,
+            }
+          : null,
      samples,
      summary: summarizeSamples(samples),
-    };
-  });
+    });
+  }
  return {
    entry: params.entry,
    cases,
@@ -461,7 +772,8 @@ Usage:
  pnpm tsx scripts/bench-cli-startup.ts [options]

 Options:
-  --preset <startup|real|all>  Command preset to run (default: startup)
+  --preset <startup|real|response|all>
+                               Command preset to run (default: startup)
  --case <id>                  Specific case id to run; repeatable
  --entry <path>               Primary entry file (default: openclaw.mjs)
  --entry-secondary <path>     Secondary entry file for avg delta comparison
@@ -489,13 +801,13 @@ async function main(): Promise<void> {
  const tmpDir = mkdtempSync(path.join(os.tmpdir(), "openclaw-cli-bench-"));
  const rssHookPath = buildRssHook(tmpDir);
  try {
-    const primary = buildSuiteResult({
+    const primary = await buildSuiteResult({
      entry: options.entryPrimary,
      options,
      rssHookPath,
    });
    const secondary = options.entrySecondary
-      ? buildSuiteResult({
+      ? await buildSuiteResult({
          entry: options.entrySecondary,
          options,
          rssHookPath,
--- a/scripts/build-all.mjs
+++ b/scripts/build-all.mjs
@@ -118,6 +118,15 @@ export const BUILD_ALL_PROFILES = {
    "build-stamp",
    "runtime-postbuild-stamp",
  ],
+  cliStartup: [
+    "tsdown",
+    "check-cli-bootstrap-imports",
+    "runtime-postbuild",
+    "build-stamp",
+    "runtime-postbuild-stamp",
+    "write-cli-startup-metadata",
+    "write-cli-compat",
+  ],
 };

 export function resolveBuildAllSteps(profile = "full") {
--- a/scripts/test-cli-startup-bench-budget.mjs
+++ b/scripts/test-cli-startup-bench-budget.mjs
@@ -1,6 +1,13 @@
 import { spawnSync } from "node:child_process";
 import fs from "node:fs";
-import { floatFlag, intFlag, parseFlagArgs, readEnvNumber, stringFlag } from "./lib/arg-utils.mjs";
+import {
+  booleanFlag,
+  floatFlag,
+  intFlag,
+  parseFlagArgs,
+  readEnvNumber,
+  stringFlag,
+} from "./lib/arg-utils.mjs";
 import { readJsonFile } from "./test-report-utils.mjs";

 const CLI_STARTUP_BENCH_FIXTURE_PATH = "test/fixtures/cli-startup-bench.json";
@@ -30,7 +37,10 @@ if (process.argv.slice(2).includes("--help")) {
      "  --timeout-ms <ms>             Per-run timeout (default: 30000)",
      "  --max-duration-regression-pct <n>",
      "                                Fail if avg duration regresses more than this percent",
+      "  --max-first-output-regression-pct <n>",
+      "                                Fail if avg first-output time regresses more than this percent",
      "  --max-rss-regression-pct <n>  Fail if avg RSS regresses more than this percent",
+      "  --skip-baseline               Skip fixture regression checks and enforce case contracts only",
      "  --help                        Show this help text",
      "",
      "Example:",
@@ -52,7 +62,10 @@ const opts = parseFlagArgs(
    timeoutMs: 30_000,
    maxDurationRegressionPct:
      readEnvNumber("OPENCLAW_STARTUP_BENCH_MAX_DURATION_REGRESSION_PCT") ?? 20,
+    maxFirstOutputRegressionPct:
+      readEnvNumber("OPENCLAW_STARTUP_BENCH_MAX_FIRST_OUTPUT_REGRESSION_PCT") ?? 20,
    maxRssRegressionPct: readEnvNumber("OPENCLAW_STARTUP_BENCH_MAX_RSS_REGRESSION_PCT") ?? 20,
+    skipBaseline: false,
  },
  [
    stringFlag("--baseline", "baseline"),
@@ -63,7 +76,9 @@ const opts = parseFlagArgs(
    intFlag("--warmup", "warmup", { min: 0 }),
    intFlag("--timeout-ms", "timeoutMs", { min: 1 }),
    floatFlag("--max-duration-regression-pct", "maxDurationRegressionPct", { min: 0 }),
+    floatFlag("--max-first-output-regression-pct", "maxFirstOutputRegressionPct", { min: 0 }),
    floatFlag("--max-rss-regression-pct", "maxRssRegressionPct", { min: 0 }),
+    booleanFlag("--skip-baseline", "skipBaseline"),
  ],
 );

@@ -112,57 +127,133 @@ const currentCases = indexCases(current);

 let failed = false;

-for (const [id, baselineCase] of baselineCases) {
-  const currentCase = currentCases.get(id);
-  if (!currentCase) {
-    console.error(`[test-cli-startup-bench-budget] missing current case ${String(id)}`);
-    failed = true;
+if (!opts.skipBaseline) {
+  for (const [id, baselineCase] of baselineCases) {
+    const currentCase = currentCases.get(id);
+    if (!currentCase) {
+      console.error(`[test-cli-startup-bench-budget] missing current case ${String(id)}`);
+      failed = true;
+      continue;
+    }
+
+    const baselineDuration = baselineCase.summary?.durationMs?.avg;
+    const currentDuration = currentCase.summary?.durationMs?.avg;
+    if (
+      Number.isFinite(baselineDuration) &&
+      Number.isFinite(currentDuration) &&
+      baselineDuration > 0
+    ) {
+      const allowedDuration = baselineDuration * (1 + opts.maxDurationRegressionPct / 100);
+      if (currentDuration > allowedDuration) {
+        console.error(
+          `[test-cli-startup-bench-budget] ${baselineCase.name} avg duration ${formatMs(
+            currentDuration,
+          )} exceeded ${formatMs(allowedDuration)} (baseline ${formatMs(
+            baselineDuration,
+          )}, +${String(opts.maxDurationRegressionPct)}%).`,
+        );
+        failed = true;
+      }
+    }
+
+    const baselineFirstOutput = baselineCase.summary?.firstOutputMs?.avg;
+    const currentFirstOutput = currentCase.summary?.firstOutputMs?.avg;
+    if (
+      Number.isFinite(baselineFirstOutput) &&
+      Number.isFinite(currentFirstOutput) &&
+      baselineFirstOutput > 0
+    ) {
+      const allowedFirstOutput = baselineFirstOutput * (1 + opts.maxFirstOutputRegressionPct / 100);
+      if (currentFirstOutput > allowedFirstOutput) {
+        console.error(
+          `[test-cli-startup-bench-budget] ${baselineCase.name} avg first output ${formatMs(
+            currentFirstOutput,
+          )} exceeded ${formatMs(allowedFirstOutput)} (baseline ${formatMs(
+            baselineFirstOutput,
+          )}, +${String(opts.maxFirstOutputRegressionPct)}%).`,
+        );
+        failed = true;
+      }
+    }
+
+    const baselineRss = baselineCase.summary?.maxRssMb?.avg;
+    const currentRss = currentCase.summary?.maxRssMb?.avg;
+    if (Number.isFinite(baselineRss) && Number.isFinite(currentRss) && baselineRss > 0) {
+      const allowedRss = baselineRss * (1 + opts.maxRssRegressionPct / 100);
+      if (currentRss > allowedRss) {
+        console.error(
+          `[test-cli-startup-bench-budget] ${baselineCase.name} avg RSS ${formatMb(
+            currentRss,
+          )} exceeded ${formatMb(allowedRss)} (baseline ${formatMb(
+            baselineRss,
+          )}, +${String(opts.maxRssRegressionPct)}%).`,
+        );
+        failed = true;
+      }
+    }
+
+    console.log(
+      `[test-cli-startup-bench-budget] ${baselineCase.name} duration=${formatMs(
+        currentDuration,
+      )} baseline=${formatMs(baselineDuration)} firstOutput=${
+        Number.isFinite(currentFirstOutput) ? formatMs(currentFirstOutput) : "n/a"
+      } baselineFirstOutput=${
+        Number.isFinite(baselineFirstOutput) ? formatMs(baselineFirstOutput) : "n/a"
+      } rss=${
+        Number.isFinite(currentRss) ? formatMb(currentRss) : "n/a"
+      } baselineRss=${Number.isFinite(baselineRss) ? formatMb(baselineRss) : "n/a"}`,
+    );
+  }
+}
+
+for (const currentCase of currentCases.values()) {
+  const contract = currentCase.contract;
+  if (!contract) {
    continue;
  }

-  const baselineDuration = baselineCase.summary?.durationMs?.avg;
-  const currentDuration = currentCase.summary?.durationMs?.avg;
-  if (
-    Number.isFinite(baselineDuration) &&
-    Number.isFinite(currentDuration) &&
-    baselineDuration > 0
-  ) {
-    const allowedDuration = baselineDuration * (1 + opts.maxDurationRegressionPct / 100);
-    if (currentDuration > allowedDuration) {
-      console.error(
-        `[test-cli-startup-bench-budget] ${baselineCase.name} avg duration ${formatMs(
-          currentDuration,
-        )} exceeded ${formatMs(allowedDuration)} (baseline ${formatMs(
-          baselineDuration,
-        )}, +${String(opts.maxDurationRegressionPct)}%).`,
-      );
-      failed = true;
-    }
-  }
-
-  const baselineRss = baselineCase.summary?.maxRssMb?.avg;
-  const currentRss = currentCase.summary?.maxRssMb?.avg;
-  if (Number.isFinite(baselineRss) && Number.isFinite(currentRss) && baselineRss > 0) {
-    const allowedRss = baselineRss * (1 + opts.maxRssRegressionPct / 100);
-    if (currentRss > allowedRss) {
-      console.error(
-        `[test-cli-startup-bench-budget] ${baselineCase.name} avg RSS ${formatMb(
-          currentRss,
-        )} exceeded ${formatMb(allowedRss)} (baseline ${formatMb(
-          baselineRss,
-        )}, +${String(opts.maxRssRegressionPct)}%).`,
-      );
-      failed = true;
-    }
-  }
-
-  console.log(
-    `[test-cli-startup-bench-budget] ${baselineCase.name} duration=${formatMs(
-      currentDuration,
-    )} baseline=${formatMs(baselineDuration)} rss=${
-      Number.isFinite(currentRss) ? formatMb(currentRss) : "n/a"
-    } baselineRss=${Number.isFinite(baselineRss) ? formatMb(baselineRss) : "n/a"}`,
+  const badSample = (currentCase.samples ?? []).find(
+    (sample) => sample.exitCode !== 0 || sample.signal != null,
  );
+  if (badSample) {
+    console.error(
+      `[test-cli-startup-bench-budget] ${currentCase.name} exited ${String(
+        badSample.signal ?? badSample.exitCode,
+      )}; response contract requires a clean exit.`,
+    );
+    failed = true;
+  }
+
+  const firstOutputBudgetMs = contract.firstOutputBudgetMs;
+  const firstOutputMax = currentCase.summary?.firstOutputMs?.max;
+  if (Number.isFinite(firstOutputBudgetMs)) {
+    if (!Number.isFinite(firstOutputMax)) {
+      console.error(
+        `[test-cli-startup-bench-budget] ${currentCase.name} produced no stdout/stderr before exit; response contract requires first output within ${formatMs(
+          firstOutputBudgetMs,
+        )}.`,
+      );
+      failed = true;
+    } else if (firstOutputMax > firstOutputBudgetMs) {
+      console.error(
+        `[test-cli-startup-bench-budget] ${currentCase.name} first output ${formatMs(
+          firstOutputMax,
+        )} exceeded contract ${formatMs(firstOutputBudgetMs)}.`,
+      );
+      failed = true;
+    }
+  }
+
+  const exitBudgetMs = contract.exitBudgetMs;
+  const durationMax = currentCase.summary?.durationMs?.max;
+  if (Number.isFinite(exitBudgetMs) && Number.isFinite(durationMax) && durationMax > exitBudgetMs) {
+    console.error(
+      `[test-cli-startup-bench-budget] ${currentCase.name} exit ${formatMs(
+        durationMax,
+      )} exceeded contract ${formatMs(exitBudgetMs)}.`,
+    );
+    failed = true;
+  }
 }

 if (failed) {
--- a/test/scripts/build-all.test.ts
+++ b/test/scripts/build-all.test.ts
@@ -162,6 +162,18 @@ describe("resolveBuildAllSteps", () => {
    ]);
  });

+  it("uses a CLI startup profile without generated plugin assets", () => {
+    expect(resolveBuildAllSteps("cliStartup").map((step) => step.label)).toEqual([
+      "tsdown",
+      "check-cli-bootstrap-imports",
+      "runtime-postbuild",
+      "build-stamp",
+      "runtime-postbuild-stamp",
+      "write-cli-startup-metadata",
+      "write-cli-compat",
+    ]);
+  });
+
  it("writes the runtime postbuild stamp after the build stamp", () => {
    const labels = resolveBuildAllSteps("full").map((step) => step.label);
    expect(labels).toContain("runtime-postbuild");