ci: add Matrix QA profiles

This commit is contained in:
Peter Steinberger
2026-04-27 05:43:10 +01:00
parent 382e03a2d8
commit 6987132aed
23 changed files with 446 additions and 48 deletions

View File

@@ -62,6 +62,24 @@ scenario through qa-channel, decodes the emitted protobuf spans, and verifies
the exported trace names and privacy contract. It does not require Opik,
Langfuse, or external collector credentials.
## Matrix live profiles
`pnpm openclaw qa matrix` defaults to the full `all` profile. Use explicit
profiles for faster CI/release proof:
```bash
OPENCLAW_QA_MATRIX_NO_REPLY_WINDOW_MS=3000 \
pnpm openclaw qa matrix --profile fast --fail-fast
```
- `fast`: release-critical transport contract, excluding generated image and
deep E2EE recovery inventory.
- `transport`, `media`, `e2ee-smoke`, `e2ee-deep`, `e2ee-cli`: sharded full
Matrix coverage.
- `QA-Lab - All Lanes` uses explicit `fast` Matrix on scheduled runs. Manual
dispatch keeps `matrix_profile=all` as the default and can shard full Matrix
with `matrix_profile=all` and `matrix_shards=true`.
## QA credentials and 1Password
- Use `op` only inside `tmux` for QA secret lookup in this repo.

View File

@@ -110,7 +110,7 @@ dispatches:
- manual `CI` for the full normal CI graph
- `OpenClaw Release Checks` for install smoke, cross-OS release checks, live and
E2E checks, Docker release-path suites, OpenWebUI, QA Lab, Matrix, and
E2E checks, Docker release-path suites, OpenWebUI, QA Lab, fast Matrix, and
Telegram release lanes
- optional post-publish Telegram E2E when a package spec is supplied
@@ -175,6 +175,23 @@ gh workflow run openclaw-release-checks.yml \
-f mode=both
```
### QA Lab Matrix Profiles
`pnpm openclaw qa matrix` defaults to `--profile all`. Do not assume the CLI
default is the fast release path. Use explicit profiles:
- `--profile fast --fail-fast`: release-critical Matrix transport contract
- `--profile transport|media|e2ee-smoke|e2ee-deep|e2ee-cli`: sharded full
Matrix proof
- `OPENCLAW_QA_MATRIX_NO_REPLY_WINDOW_MS=3000`: CI-friendly no-reply quiet
window when paired with fast or sharded gates
`QA-Lab - All Lanes` uses explicit fast Matrix on scheduled runs; manual
dispatch keeps `matrix_profile=all` as the default and can shard full Matrix
with `matrix_profile=all` and `matrix_shards=true`. `OpenClaw Release Checks`
uses explicit fast Matrix; run the sharded all-lanes workflow when release
investigation needs full Matrix media/E2EE inventory.
### Reusable Live/E2E Checks
`OpenClaw Live And E2E Checks (Reusable)`

View File

@@ -355,6 +355,7 @@ jobs:
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENCLAW_QA_REDACT_PUBLIC_METADATA: "1"
OPENCLAW_QA_MATRIX_NO_REPLY_WINDOW_MS: "3000"
run: |
set -euo pipefail
@@ -367,7 +368,9 @@ jobs:
--provider-mode live-frontier \
--model "${OPENCLAW_CI_OPENAI_MODEL}" \
--alt-model "${OPENCLAW_CI_OPENAI_MODEL}" \
--fast
--profile fast \
--fast \
--fail-fast
- name: Upload Matrix QA artifacts
if: always()

View File

@@ -18,6 +18,27 @@ on:
description: Optional comma-separated Discord scenario ids
required: false
type: string
matrix_profile:
description: Matrix QA profile for the live Matrix lane
required: false
default: all
type: choice
options:
- fast
- all
- transport
- media
- e2ee-smoke
- e2ee-deep
- e2ee-cli
matrix_shards:
description: Shard matrix_profile=all into parallel Matrix profile jobs
required: false
default: "false"
type: choice
options:
- "false"
- "true"
permissions:
contents: read
@@ -199,6 +220,7 @@ jobs:
run_live_matrix:
name: Run Matrix live QA lane
needs: [authorize_actor, validate_selected_ref]
if: ${{ !(github.event_name == 'workflow_dispatch' && inputs.matrix_profile == 'all' && inputs.matrix_shards == 'true') }}
runs-on: blacksmith-32vcpu-ubuntu-2404
timeout-minutes: 60
environment: qa-live-shared
@@ -236,7 +258,9 @@ jobs:
shell: bash
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
INPUT_MATRIX_PROFILE: ${{ github.event_name == 'workflow_dispatch' && inputs.matrix_profile || 'fast' }}
OPENCLAW_QA_REDACT_PUBLIC_METADATA: "1"
OPENCLAW_QA_MATRIX_NO_REPLY_WINDOW_MS: "3000"
run: |
set -euo pipefail
@@ -249,7 +273,9 @@ jobs:
--provider-mode live-frontier \
--model "${OPENCLAW_CI_OPENAI_MODEL}" \
--alt-model "${OPENCLAW_CI_OPENAI_MODEL}" \
--fast
--profile "${INPUT_MATRIX_PROFILE}" \
--fast \
--fail-fast
- name: Upload Matrix QA artifacts
if: always()
@@ -260,6 +286,83 @@ jobs:
retention-days: 14
if-no-files-found: warn
run_live_matrix_sharded:
name: Run Matrix live QA lane (${{ matrix.profile }})
needs: [authorize_actor, validate_selected_ref]
if: ${{ github.event_name == 'workflow_dispatch' && inputs.matrix_profile == 'all' && inputs.matrix_shards == 'true' }}
runs-on: blacksmith-32vcpu-ubuntu-2404
timeout-minutes: 60
environment: qa-live-shared
strategy:
fail-fast: false
matrix:
profile:
- transport
- media
- e2ee-smoke
- e2ee-deep
- e2ee-cli
steps:
- name: Checkout selected ref
uses: actions/checkout@v6
with:
ref: ${{ needs.validate_selected_ref.outputs.selected_sha }}
fetch-depth: 1
- name: Setup Node environment
uses: ./.github/actions/setup-node-env
with:
node-version: ${{ env.NODE_VERSION }}
pnpm-version: ${{ env.PNPM_VERSION }}
install-bun: "true"
- name: Validate required QA credential env
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
shell: bash
run: |
set -euo pipefail
if [[ -z "${OPENAI_API_KEY:-}" ]]; then
echo "Missing required OPENAI_API_KEY." >&2
exit 1
fi
- name: Build private QA runtime
run: pnpm build
- name: Run Matrix live lane shard
id: run_lane
shell: bash
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENCLAW_QA_REDACT_PUBLIC_METADATA: "1"
OPENCLAW_QA_MATRIX_NO_REPLY_WINDOW_MS: "3000"
run: |
set -euo pipefail
output_dir=".artifacts/qa-e2e/matrix-live-${{ matrix.profile }}-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
echo "output_dir=${output_dir}" >> "$GITHUB_OUTPUT"
pnpm openclaw qa matrix \
--repo-root . \
--output-dir "${output_dir}" \
--provider-mode live-frontier \
--model "${OPENCLAW_CI_OPENAI_MODEL}" \
--alt-model "${OPENCLAW_CI_OPENAI_MODEL}" \
--profile "${{ matrix.profile }}" \
--fast \
--fail-fast
- name: Upload Matrix QA shard artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: qa-live-matrix-${{ matrix.profile }}-${{ github.run_id }}-${{ github.run_attempt }}
path: ${{ steps.run_lane.outputs.output_dir }}
retention-days: 14
if-no-files-found: warn
run_live_telegram:
name: Run Telegram live QA lane with Convex leases
needs: [authorize_actor, validate_selected_ref]

View File

@@ -145,9 +145,13 @@ QA Lab has dedicated CI lanes outside the main smart-scoped workflow. The
builds the private QA runtime and compares the mock GPT-5.5 and Opus 4.6
agentic packs. The `QA-Lab - All Lanes` workflow runs nightly on `main` and on
manual dispatch; it fans out the mock parity gate, live Matrix lane, and live
Telegram lane as parallel jobs. The live jobs use the `qa-live-shared`
environment, and the Telegram lane uses Convex leases. `OpenClaw Release
Checks` also runs the same QA Lab lanes before release approval.
Telegram and Discord lanes as parallel jobs. The live jobs use the
`qa-live-shared` environment, and Telegram/Discord use Convex leases. Matrix
uses `--profile fast --fail-fast` for scheduled and release gates while the CLI
default and manual workflow input remain `all`; manual all-lanes dispatch can
shard full Matrix coverage into `transport`, `media`, `e2ee-smoke`,
`e2ee-deep`, and `e2ee-cli` jobs. `OpenClaw Release Checks` also runs the
release-critical QA Lab lanes before release approval.
The `Duplicate PRs After Merge` workflow is a manual maintainer workflow for
post-land duplicate cleanup. It defaults to dry-run and only closes explicitly

View File

@@ -73,7 +73,7 @@ instrumentation.
For a transport-real Matrix smoke lane, run:
```bash
pnpm openclaw qa matrix
pnpm openclaw qa matrix --profile fast --fail-fast
```
That lane provisions a disposable Tuwunel homeserver in Docker, registers
@@ -84,9 +84,15 @@ the child config scoped to the transport under test, so Matrix runs without
a combined stdout/stderr log into the selected Matrix QA output directory. To
capture the outer `scripts/run-node.mjs` build/launcher output too, set
`OPENCLAW_RUN_NODE_OUTPUT_LOG=<path>` to a repo-local log file.
Matrix progress is printed by default. `OPENCLAW_QA_MATRIX_TIMEOUT_MS` bounds
the full run, and `OPENCLAW_QA_MATRIX_CLEANUP_TIMEOUT_MS` bounds cleanup so a
stuck Docker teardown reports the exact recovery command instead of hanging.
Matrix progress is printed by default. The CLI default profile is `all`, so
plain `pnpm openclaw qa matrix` still runs the full catalog. Use `--profile
fast` for the release-critical transport contract, or shard full coverage with
`transport`, `media`, `e2ee-smoke`, `e2ee-deep`, and `e2ee-cli`. `--fail-fast`
stops after the first failed scenario when you want a release gate instead of a
full inventory. `OPENCLAW_QA_MATRIX_TIMEOUT_MS` bounds the full run,
`OPENCLAW_QA_MATRIX_NO_REPLY_WINDOW_MS` can shorten no-reply quiet windows for
CI, and `OPENCLAW_QA_MATRIX_CLEANUP_TIMEOUT_MS` bounds cleanup so a stuck
Docker teardown reports the exact recovery command instead of hanging.
For a transport-real Telegram smoke lane, run:

View File

@@ -92,9 +92,13 @@ These commands sit beside the main test suites when you need QA-lab realism:
CI runs QA Lab in dedicated workflows. `Parity gate` runs on matching PRs and
from manual dispatch with mock providers. `QA-Lab - All Lanes` runs nightly on
`main` and from manual dispatch with the mock parity gate, live Matrix lane, and
Convex-managed live Telegram lane as parallel jobs. `OpenClaw Release Checks`
runs the same lanes before release approval.
`main` and from manual dispatch with the mock parity gate, live Matrix lane,
Convex-managed live Telegram lane, and Convex-managed live Discord lane as
parallel jobs. Scheduled QA and release checks pass Matrix `--profile fast`
explicitly, while the Matrix CLI and manual workflow input default remain
`all`; manual dispatch can shard `all` into `transport`, `media`, `e2ee-smoke`,
`e2ee-deep`, and `e2ee-cli` jobs. `OpenClaw Release Checks` runs parity plus
the fast Matrix and Telegram lanes before release approval.
- `pnpm openclaw qa suite`
- Runs repo-backed QA scenarios directly on the host.
@@ -248,10 +252,11 @@ gh workflow run package-acceptance.yml --ref main \
- Repo checkouts load the bundled runner directly; no separate plugin install
step is needed.
- Provisions three temporary Matrix users (`driver`, `sut`, `observer`) plus one private room, then starts a QA gateway child with the real Matrix plugin as the SUT transport.
- Defaults to `--profile all`. Use `--profile fast --fail-fast` for release-critical transport proof, or `--profile transport|media|e2ee-smoke|e2ee-deep|e2ee-cli` when sharding the full catalog.
- Uses the pinned stable Tuwunel image `ghcr.io/matrix-construct/tuwunel:v1.5.1` by default. Override with `OPENCLAW_QA_MATRIX_TUWUNEL_IMAGE` when you need to test a different image.
- Matrix does not expose shared credential-source flags because the lane provisions disposable users locally.
- Writes a Matrix QA report, summary, observed-events artifact, and combined stdout/stderr output log under `.artifacts/qa-e2e/...`.
- Emits progress by default and enforces a hard run timeout with `OPENCLAW_QA_MATRIX_TIMEOUT_MS` (default 30 minutes). Cleanup is bounded by `OPENCLAW_QA_MATRIX_CLEANUP_TIMEOUT_MS` and failures include the recovery `docker compose ... down --remove-orphans` command.
- Emits progress by default and enforces a hard run timeout with `OPENCLAW_QA_MATRIX_TIMEOUT_MS` (default 30 minutes). `OPENCLAW_QA_MATRIX_NO_REPLY_WINDOW_MS` tunes negative no-reply quiet windows, and cleanup is bounded by `OPENCLAW_QA_MATRIX_CLEANUP_TIMEOUT_MS` with failures including the recovery `docker compose ... down --remove-orphans` command.
- `pnpm openclaw qa telegram`
- Runs the Telegram live QA lane against a real private group using the driver and SUT bot tokens from env.
- Requires `OPENCLAW_QA_TELEGRAM_GROUP_ID`, `OPENCLAW_QA_TELEGRAM_DRIVER_BOT_TOKEN`, and `OPENCLAW_QA_TELEGRAM_SUT_BOT_TOKEN`. The group id must be the numeric Telegram chat id.
@@ -267,10 +272,11 @@ Live transport lanes share one standard contract so new transports do not drift:
`qa-channel` remains the broad synthetic QA suite and is not part of the live
transport coverage matrix.
| Lane | Canary | Mention gating | Allowlist block | Top-level reply | Restart resume | Thread follow-up | Thread isolation | Reaction observation | Help command |
| -------- | ------ | -------------- | --------------- | --------------- | -------------- | ---------------- | ---------------- | -------------------- | ------------ |
| Matrix | x | x | x | x | x | x | x | x | |
| Telegram | x | | | | | | | | x |
| Lane | Canary | Mention gating | Allowlist block | Top-level reply | Restart resume | Thread follow-up | Thread isolation | Reaction observation | Help command | Native command registration |
| -------- | ------ | -------------- | --------------- | --------------- | -------------- | ---------------- | ---------------- | -------------------- | ------------ | --------------------------- |
| Matrix | x | x | x | x | x | x | x | x | | |
| Telegram | x | x | | | | | | | x | |
| Discord | x | x | | | | | | | | x |
### Shared Telegram credentials via Convex (v1)

View File

@@ -137,9 +137,12 @@ the maintainer-only release runbook.
- Run `pnpm release:check` before every tagged release
- Release checks now run in a separate manual workflow:
`OpenClaw Release Checks`
- `OpenClaw Release Checks` also runs the QA Lab mock parity gate plus the live
Matrix and Telegram QA lanes before release approval. The live lanes use the
`qa-live-shared` environment; Telegram also uses Convex CI credential leases.
- `OpenClaw Release Checks` also runs the QA Lab mock parity gate plus the fast
live Matrix profile and Telegram QA lane before release approval. The live
lanes use the `qa-live-shared` environment; Telegram also uses Convex CI
credential leases. Run the manual `QA-Lab - All Lanes` workflow with
`matrix_profile=all` and `matrix_shards=true` when you want full Matrix
transport, media, and E2EE inventory in parallel.
- Cross-OS install and upgrade runtime validation is dispatched from the
private caller workflow
`openclaw/releases-private/.github/workflows/openclaw-cross-os-release-checks.yml`,
@@ -338,13 +341,14 @@ Release QA Lab coverage includes:
- mock parity gate comparing the OpenAI candidate lane against the Opus 4.6
baseline using the agentic parity pack
- live Matrix QA lane using the `qa-live-shared` environment
- fast live Matrix QA profile using the `qa-live-shared` environment
- live Telegram QA lane using Convex CI credential leases
- `pnpm qa:otel:smoke` when release telemetry needs explicit local proof
Use this box to answer "does the release behave correctly in QA scenarios and
live channel flows?" Keep the artifact URLs for parity, Matrix, and Telegram
lanes when approving the release.
lanes when approving the release. Full Matrix coverage remains available as a
manual sharded QA-Lab run rather than the default release-critical lane.
### Package

View File

@@ -67,6 +67,8 @@ describe("matrix qa cli registration", () => {
"--alt-model",
"--scenario",
"--fast",
"--profile",
"--fail-fast",
"--sut-account",
]),
);

View File

@@ -56,6 +56,9 @@ export const matrixQaCliRegistration: LiveTransportQaCliRegistration =
commandName: "matrix",
description: "Run the Docker-backed Matrix live QA lane against a disposable homeserver",
outputDirHelp: "Matrix QA artifact directory",
profileHelp:
"Matrix QA profile: all, fast, transport, media, e2ee-smoke, e2ee-deep, or e2ee-cli (default: all)",
failFastHelp: "Stop after the first failed Matrix check or scenario",
scenarioHelp: "Run only the named Matrix QA scenario (repeatable)",
sutAccountHelp: "Temporary Matrix account id inside the QA gateway config",
run: runQaMatrix,

View File

@@ -491,11 +491,14 @@ describe("matrix live qa runtime", () => {
expect(report).toContain("observed events: /tmp/observed.json");
});
it("keeps Matrix scenario execution in catalog order across config changes", () => {
it("groups Matrix scenario execution by gateway config while preserving tail scenarios", () => {
const scenarios = liveTesting.findMatrixQaScenarios([
"matrix-thread-follow-up",
"matrix-e2ee-cli-encryption-setup-multi-account",
"matrix-thread-isolation",
"matrix-e2ee-cli-setup-then-gateway-reply",
"matrix-e2ee-cli-self-verification",
"matrix-e2ee-wrong-account-recovery-key",
]);
expect(
@@ -503,9 +506,12 @@ describe("matrix live qa runtime", () => {
.scheduleMatrixQaScenariosInCatalogOrder(scenarios)
.map(({ scenario }) => scenario.id),
).toEqual([
"matrix-thread-follow-up",
"matrix-thread-isolation",
"matrix-e2ee-cli-self-verification",
"matrix-e2ee-cli-encryption-setup-multi-account",
"matrix-e2ee-cli-setup-then-gateway-reply",
"matrix-e2ee-cli-self-verification",
"matrix-e2ee-wrong-account-recovery-key",
]);
});

View File

@@ -61,6 +61,8 @@ function buildMatrixQaGatewayConfigKey(overrides?: MatrixQaConfigOverrides) {
return JSON.stringify(overrides ?? null);
}
const MATRIX_QA_EXECUTION_TAIL_SCENARIO_IDS = new Set(["matrix-e2ee-wrong-account-recovery-key"]);
type MatrixQaScenarioResult = {
artifacts?: MatrixQaScenarioArtifacts;
details: string;
@@ -313,7 +315,27 @@ function buildMatrixQaScenarioResult(params: {
function scheduleMatrixQaScenariosInCatalogOrder(
scenarios: readonly (typeof MATRIX_QA_SCENARIOS)[number][],
): MatrixQaScheduledScenario[] {
return scenarios.map((scenario, originalIndex) => ({ originalIndex, scenario }));
const entries = scenarios.map((scenario, originalIndex) => ({ originalIndex, scenario }));
const groupedEntries: MatrixQaScheduledScenario[][] = [];
const groupIndexes = new Map<string, number>();
const tailEntries: MatrixQaScheduledScenario[] = [];
for (const entry of entries) {
if (MATRIX_QA_EXECUTION_TAIL_SCENARIO_IDS.has(entry.scenario.id)) {
tailEntries.push(entry);
continue;
}
const key = buildMatrixQaGatewayConfigKey(entry.scenario.configOverrides);
const existingIndex = groupIndexes.get(key);
if (existingIndex !== undefined) {
groupedEntries[existingIndex]?.push(entry);
continue;
}
groupIndexes.set(key, groupedEntries.length);
groupedEntries.push([entry]);
}
return [...groupedEntries.flat(), ...tailEntries];
}
function getMatrixQaScenarioRestartReadyTimeoutMs(scenario: { timeoutMs: number }): number {
@@ -498,8 +520,10 @@ async function startMatrixQaLiveLaneGateway(params: {
export async function runMatrixQaLive(params: {
fastMode?: boolean;
failFast?: boolean;
outputDir?: string;
primaryModel?: string;
profile?: string;
providerMode?: QaProviderModeInput;
repoRoot?: string;
scenarioIds?: string[];
@@ -518,7 +542,7 @@ export async function runMatrixQaLive(params: {
alternateModel: params.alternateModel,
});
const sutAccountId = params.sutAccountId?.trim() || "sut";
const scenarios = findMatrixQaScenarios(params.scenarioIds);
const scenarios = findMatrixQaScenarios(params.scenarioIds, params.profile);
const runSuffix = randomUUID().slice(0, 8);
const topology = buildMatrixQaTopologyForScenarios({
defaultRoomName: `OpenClaw Matrix QA ${runSuffix}`,
@@ -531,7 +555,7 @@ export async function runMatrixQaLive(params: {
const runStartedAtMs = Date.now();
const runDeadline = createMatrixQaRunDeadline();
writeMatrixQaProgress(
`suite start scenarios=${scenarios.length} provider=${providerMode} output=${outputDir} timeout=${formatMatrixQaDurationMs(runDeadline.timeoutMs)}`,
`suite start scenarios=${scenarios.length} profile=${params.profile?.trim() || "all"} provider=${providerMode} output=${outputDir} timeout=${formatMatrixQaDurationMs(runDeadline.timeoutMs)}`,
);
const { durationMs: harnessBootMs, result: harness } = await measureMatrixQaStep(() =>
@@ -895,6 +919,10 @@ export async function runMatrixQaLive(params: {
status: "fail",
});
writeMatrixQaProgress(`scenario fail ${scenario.id} ${formatErrorMessage(error)}`);
if (params.failFast) {
writeMatrixQaProgress("fail-fast stop");
break;
}
}
}
}

View File

@@ -96,6 +96,15 @@ export type MatrixQaScenarioDefinition = LiveTransportScenarioDefinition<MatrixQ
topology?: MatrixQaTopologySpec;
};
export type MatrixQaProfile =
| "all"
| "e2ee-cli"
| "e2ee-deep"
| "e2ee-smoke"
| "fast"
| "media"
| "transport";
export const MATRIX_QA_BLOCK_ROOM_KEY = "block";
export const MATRIX_QA_DRIVER_DM_ROOM_KEY = "driver-dm";
export const MATRIX_QA_DRIVER_DM_SHARED_ROOM_KEY = "driver-dm-shared";
@@ -907,14 +916,117 @@ export const MATRIX_QA_STANDARD_SCENARIO_IDS = collectLiveTransportStandardScena
scenarios: MATRIX_QA_SCENARIOS,
});
export function findMatrixQaScenarios(ids?: string[]) {
export const MATRIX_QA_PROFILE_NAMES: readonly MatrixQaProfile[] = [
"all",
"fast",
"transport",
"media",
"e2ee-smoke",
"e2ee-deep",
"e2ee-cli",
] as const;
const MATRIX_QA_FAST_PROFILE_SCENARIO_IDS = [
"matrix-thread-follow-up",
"matrix-thread-isolation",
"matrix-top-level-reply-shape",
"matrix-reaction-notification",
"matrix-restart-resume",
"matrix-mention-gating",
"matrix-allowlist-block",
"matrix-e2ee-basic-reply",
] satisfies MatrixQaScenarioId[];
const MATRIX_QA_MEDIA_PROFILE_SCENARIO_IDS = [
"matrix-room-image-understanding-attachment",
"matrix-room-generated-image-delivery",
"matrix-media-type-coverage",
"matrix-attachment-only-ignored",
"matrix-unsupported-media-safe",
"matrix-e2ee-media-image",
] satisfies MatrixQaScenarioId[];
const MATRIX_QA_E2EE_SMOKE_PROFILE_SCENARIO_IDS = [
"matrix-e2ee-basic-reply",
"matrix-e2ee-thread-follow-up",
"matrix-e2ee-bootstrap-success",
"matrix-e2ee-recovery-key-lifecycle",
"matrix-e2ee-recovery-owner-verification-required",
"matrix-e2ee-restart-resume",
"matrix-e2ee-artifact-redaction",
"matrix-e2ee-key-bootstrap-failure",
] satisfies MatrixQaScenarioId[];
function isMatrixQaE2eeScenarioId(id: MatrixQaScenarioId): id is MatrixQaE2eeScenarioId {
return id.startsWith("matrix-e2ee-");
}
function isMatrixQaCliE2eeScenarioId(id: MatrixQaScenarioId) {
return id.startsWith("matrix-e2ee-cli-");
}
function buildMatrixQaScenarioIdSet(ids: readonly MatrixQaScenarioId[]) {
return new Set<MatrixQaScenarioId>(ids);
}
function normalizeMatrixQaProfile(profile?: string): MatrixQaProfile {
const normalized = profile?.trim().toLowerCase() || "all";
if (MATRIX_QA_PROFILE_NAMES.includes(normalized as MatrixQaProfile)) {
return normalized as MatrixQaProfile;
}
throw new Error(
`unknown Matrix QA profile "${profile}"; expected one of: ${MATRIX_QA_PROFILE_NAMES.join(", ")}`,
);
}
function getMatrixQaProfileScenarioIds(profile: MatrixQaProfile): MatrixQaScenarioId[] {
const allIds = MATRIX_QA_SCENARIOS.map((scenario) => scenario.id);
const mediaIds = buildMatrixQaScenarioIdSet(MATRIX_QA_MEDIA_PROFILE_SCENARIO_IDS);
const smokeIds = buildMatrixQaScenarioIdSet(MATRIX_QA_E2EE_SMOKE_PROFILE_SCENARIO_IDS);
switch (profile) {
case "all":
return allIds;
case "fast":
return [...MATRIX_QA_FAST_PROFILE_SCENARIO_IDS];
case "transport":
return allIds.filter((id) => !isMatrixQaE2eeScenarioId(id) && !mediaIds.has(id));
case "media":
return [...MATRIX_QA_MEDIA_PROFILE_SCENARIO_IDS];
case "e2ee-smoke":
return [...MATRIX_QA_E2EE_SMOKE_PROFILE_SCENARIO_IDS];
case "e2ee-cli":
return allIds.filter(isMatrixQaCliE2eeScenarioId);
case "e2ee-deep":
return allIds.filter(
(id) =>
isMatrixQaE2eeScenarioId(id) &&
!isMatrixQaCliE2eeScenarioId(id) &&
!mediaIds.has(id) &&
!smokeIds.has(id),
);
default: {
const exhaustiveProfile: never = profile;
return exhaustiveProfile;
}
}
}
export function findMatrixQaScenarios(ids?: string[], profile?: string) {
const normalizedProfile = normalizeMatrixQaProfile(profile);
const selectedIds =
ids && ids.length > 0 ? ids : getMatrixQaProfileScenarioIds(normalizedProfile);
return selectLiveTransportScenarios({
ids,
ids: selectedIds,
laneLabel: "Matrix",
scenarios: MATRIX_QA_SCENARIOS,
});
}
export const __matrixQaProfileTesting = {
getMatrixQaProfileScenarioIds,
normalizeMatrixQaProfile,
};
export function buildMatrixQaTopologyForScenarios(params: {
defaultRoomName: string;
scenarios: MatrixQaScenarioDefinition[];

View File

@@ -12,8 +12,8 @@ import {
buildMatrixReplyArtifact,
buildMatrixReplyDetails,
createMatrixQaScenarioClient,
NO_REPLY_WINDOW_MS,
advanceMatrixQaActorCursor,
resolveMatrixQaNoReplyWindowMs,
runConfigurableTopLevelScenario,
type MatrixQaScenarioContext,
} from "./scenario-runtime-shared.js";
@@ -98,7 +98,7 @@ async function runDmSharedSessionFlow(params: {
event.body.includes("channels.matrix.dm.sessionScope"),
roomId: secondRoomId,
since: noticeSince,
timeoutMs: Math.min(NO_REPLY_WINDOW_MS, params.context.timeoutMs),
timeoutMs: resolveMatrixQaNoReplyWindowMs(params.context.timeoutMs),
}),
]);

View File

@@ -50,7 +50,7 @@ import {
buildMentionPrompt,
doesMatrixQaReplyBodyMatchToken,
isMatrixQaExactMarkerReply,
NO_REPLY_WINDOW_MS,
resolveMatrixQaNoReplyWindowMs,
type MatrixQaScenarioContext,
} from "./scenario-runtime-shared.js";
import type { MatrixQaReplyArtifact, MatrixQaScenarioExecution } from "./scenario-types.js";
@@ -3310,14 +3310,14 @@ export async function runMatrixQaE2eeVerificationNoticeNoTriggerScenario(
token,
}),
roomId,
timeoutMs: Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs),
timeoutMs: resolveMatrixQaNoReplyWindowMs(context.timeoutMs),
});
if (result.matched) {
throw new Error(`unexpected E2EE verification-notice reply: ${result.event.eventId}`);
}
return {
artifacts: {
expectedNoReplyWindowMs: Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs),
expectedNoReplyWindowMs: resolveMatrixQaNoReplyWindowMs(context.timeoutMs),
noticeEventId,
roomKey,
roomId,
@@ -3326,7 +3326,7 @@ export async function runMatrixQaE2eeVerificationNoticeNoTriggerScenario(
`encrypted room key: ${roomKey}`,
`encrypted room id: ${roomId}`,
`verification notice event: ${noticeEventId}`,
`waited ${Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs)}ms with no SUT reply`,
`waited ${resolveMatrixQaNoReplyWindowMs(context.timeoutMs)}ms with no SUT reply`,
].join("\n"),
};
},

View File

@@ -12,8 +12,8 @@ import {
isMatrixQaExactMarkerReply,
assertTopLevelReplyArtifact,
advanceMatrixQaActorCursor,
NO_REPLY_WINDOW_MS,
primeMatrixQaDriverScenarioClient,
resolveMatrixQaNoReplyWindowMs,
runAssertedDriverTopLevelScenario,
type MatrixQaScenarioContext,
} from "./scenario-runtime-shared.js";
@@ -254,7 +254,7 @@ async function assertNoRestartReplayDuplicate(params: {
token: params.replayToken,
}),
roomId: params.roomId,
timeoutMs: Math.min(NO_REPLY_WINDOW_MS, params.context.timeoutMs),
timeoutMs: resolveMatrixQaNoReplyWindowMs(params.context.timeoutMs),
});
if (duplicate.matched) {
throw new Error(
@@ -313,7 +313,7 @@ export async function runRestartReplayDedupeScenario(context: MatrixQaScenarioCo
return {
artifacts: {
duplicateWindowMs: Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs),
duplicateWindowMs: resolveMatrixQaNoReplyWindowMs(context.timeoutMs),
firstDriverEventId: replayDriverEventId,
firstReply,
firstToken: replayToken,
@@ -328,7 +328,7 @@ export async function runRestartReplayDedupeScenario(context: MatrixQaScenarioCo
"restart signal: SIGUSR1",
`first driver event: ${replayDriverEventId}`,
...buildMatrixReplyDetails("first reply", firstReply),
`duplicate replay window: ${Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs)}ms`,
`duplicate replay window: ${resolveMatrixQaNoReplyWindowMs(context.timeoutMs)}ms`,
`fresh post-restart driver event: ${postRestart.driverEventId}`,
...buildMatrixReplyDetails("fresh reply", postRestart.reply),
].join("\n"),
@@ -401,7 +401,7 @@ export async function runStaleSyncReplayDedupeScenario(context: MatrixQaScenario
return {
artifacts: {
dedupeCommitObserved: true,
duplicateWindowMs: Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs),
duplicateWindowMs: resolveMatrixQaNoReplyWindowMs(context.timeoutMs),
firstDriverEventId: replayDriverEventId,
firstReply,
firstToken: replayToken,
@@ -418,7 +418,7 @@ export async function runStaleSyncReplayDedupeScenario(context: MatrixQaScenario
`stale sync cursor: ${staleCursor}`,
`first driver event: ${replayDriverEventId}`,
...buildMatrixReplyDetails("first reply", firstReply),
`duplicate replay window: ${Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs)}ms`,
`duplicate replay window: ${resolveMatrixQaNoReplyWindowMs(context.timeoutMs)}ms`,
`fresh post-restart driver event: ${postRestart.driverEventId}`,
...buildMatrixReplyDetails("fresh reply", postRestart.reply),
].join("\n"),

View File

@@ -24,9 +24,9 @@ import {
createMatrixQaScenarioClient,
isMatrixQaExactMarkerReply,
isMatrixQaMessageLikeKind,
NO_REPLY_WINDOW_MS,
primeMatrixQaActorCursor,
primeMatrixQaDriverScenarioClient,
resolveMatrixQaNoReplyWindowMs,
runAssertedDriverTopLevelScenario,
runConfigurableTopLevelScenario,
runDriverTopLevelMentionScenario,
@@ -530,7 +530,7 @@ export async function runAllowlistHotReloadScenario(context: MatrixQaScenarioCon
sutUserId: context.sutUserId,
token: blockedToken,
}),
timeoutMs: Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs),
timeoutMs: resolveMatrixQaNoReplyWindowMs(context.timeoutMs),
token: blockedToken,
});
@@ -767,7 +767,7 @@ export async function runMembershipLossScenario(context: MatrixQaScenarioContext
syncState: context.syncState,
syncStreams: context.syncStreams,
sutUserId: context.sutUserId,
timeoutMs: Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs),
timeoutMs: resolveMatrixQaNoReplyWindowMs(context.timeoutMs),
token: noReplyToken,
});

View File

@@ -56,6 +56,14 @@ export type MatrixQaScenarioContext = {
};
export const NO_REPLY_WINDOW_MS = 8_000;
const NO_REPLY_WINDOW_ENV = "OPENCLAW_QA_MATRIX_NO_REPLY_WINDOW_MS";
export function resolveMatrixQaNoReplyWindowMs(timeoutMs: number) {
const raw = process.env[NO_REPLY_WINDOW_ENV];
const parsed = raw === undefined ? NO_REPLY_WINDOW_MS : Number(raw);
const windowMs = Number.isFinite(parsed) && parsed >= 1 ? Math.floor(parsed) : NO_REPLY_WINDOW_MS;
return Math.min(windowMs, timeoutMs);
}
export function buildMentionPrompt(sutUserId: string, token: string) {
return `${sutUserId} reply with only this exact marker: ${token}`;
@@ -316,7 +324,7 @@ export async function assertNoSutReplyWindow(params: {
unexpectedLines?: string[];
unexpectedMessage: string;
}) {
const noReplyWindowMs = Math.min(NO_REPLY_WINDOW_MS, params.context.timeoutMs);
const noReplyWindowMs = resolveMatrixQaNoReplyWindowMs(params.context.timeoutMs);
const result = await params.client.waitForOptionalRoomEvent({
observedEvents: params.context.observedEvents,
predicate: (event) =>

View File

@@ -94,8 +94,8 @@ import {
buildMatrixReplyArtifact,
buildMatrixReplyDetails,
buildMentionPrompt,
NO_REPLY_WINDOW_MS,
readMatrixQaSyncCursor,
resolveMatrixQaNoReplyWindowMs,
runNoReplyExpectedScenario,
runTopologyScopedTopLevelScenario,
writeMatrixQaSyncCursor,
@@ -167,7 +167,7 @@ async function runMultiActorOrderingScenario(context: MatrixQaScenarioContext) {
body: buildMentionPrompt(context.sutUserId, blockedToken),
mentionUserIds: [context.sutUserId],
context,
timeoutMs: Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs),
timeoutMs: resolveMatrixQaNoReplyWindowMs(context.timeoutMs),
token: blockedToken,
});
const accepted = await runDriverTopologyScopedScenario({

View File

@@ -256,6 +256,54 @@ describe("matrix live qa scenarios", () => {
}
});
it("keeps the Matrix CLI default profile on the full catalog", () => {
const allIds = scenarioTesting.findMatrixQaScenarios().map((scenario) => scenario.id);
expect(
scenarioTesting.findMatrixQaScenarios(undefined, "all").map((scenario) => scenario.id),
).toEqual(allIds);
});
it("selects the fast release-critical Matrix profile without media or deep E2EE inventory", () => {
expect(
scenarioTesting.findMatrixQaScenarios(undefined, "fast").map((scenario) => scenario.id),
).toEqual([
"matrix-thread-follow-up",
"matrix-thread-isolation",
"matrix-top-level-reply-shape",
"matrix-reaction-notification",
"matrix-restart-resume",
"matrix-mention-gating",
"matrix-allowlist-block",
"matrix-e2ee-basic-reply",
]);
});
it("keeps the full Matrix shard profiles exhaustive and disjoint", () => {
const allIds = scenarioTesting.findMatrixQaScenarios().map((scenario) => scenario.id);
const shardIds = ["transport", "media", "e2ee-smoke", "e2ee-deep", "e2ee-cli"].flatMap(
(profile) =>
scenarioTesting.findMatrixQaScenarios(undefined, profile).map((scenario) => scenario.id),
);
expect(new Set(shardIds).size).toBe(shardIds.length);
expect(shardIds.toSorted()).toEqual(allIds.toSorted());
});
it("lets explicit Matrix scenario ids override the selected profile", () => {
expect(
scenarioTesting
.findMatrixQaScenarios(["matrix-room-generated-image-delivery"], "fast")
.map((scenario) => scenario.id),
).toEqual(["matrix-room-generated-image-delivery"]);
});
it("fails when the Matrix profile is unknown", () => {
expect(() => scenarioTesting.findMatrixQaScenarios(undefined, "speedy")).toThrow(
'unknown Matrix QA profile "speedy"',
);
});
it("uses the repo-wide exact marker prompt shape for Matrix mentions", () => {
expect(
scenarioTesting.buildMentionPrompt("@sut:matrix-qa.test", "MATRIX_QA_CANARY_TOKEN"),

View File

@@ -3,6 +3,7 @@ import {
MATRIX_QA_DRIVER_DM_SHARED_ROOM_KEY,
MATRIX_QA_E2EE_ROOM_KEY,
MATRIX_QA_MEDIA_ROOM_KEY,
MATRIX_QA_PROFILE_NAMES,
MATRIX_QA_MEMBERSHIP_ROOM_KEY,
MATRIX_QA_SCENARIOS,
MATRIX_QA_SECONDARY_ROOM_KEY,
@@ -13,6 +14,8 @@ import {
resolveMatrixQaScenarioRoomId,
type MatrixQaScenarioDefinition,
type MatrixQaScenarioId,
type MatrixQaProfile,
__matrixQaProfileTesting,
} from "./scenario-catalog.js";
import {
buildMatrixReplyArtifact,
@@ -34,6 +37,7 @@ import type {
export type { MatrixQaScenarioDefinition, MatrixQaScenarioId };
export {
MATRIX_QA_PROFILE_NAMES,
MATRIX_QA_SCENARIOS,
MATRIX_QA_STANDARD_SCENARIO_IDS,
buildMatrixReplyArtifact,
@@ -46,6 +50,7 @@ export {
runMatrixQaCanary,
runMatrixQaScenario,
};
export type { MatrixQaProfile };
export type {
MatrixQaCanaryArtifact,
MatrixQaReplyArtifact,
@@ -61,6 +66,7 @@ export const __testing = {
MATRIX_QA_E2EE_ROOM_KEY,
MATRIX_QA_MEDIA_ROOM_KEY,
MATRIX_QA_MEMBERSHIP_ROOM_KEY,
MATRIX_QA_PROFILE_NAMES,
MATRIX_QA_SECONDARY_ROOM_KEY,
MATRIX_QA_STANDARD_SCENARIO_IDS,
buildMatrixQaE2eeScenarioRoomKey,
@@ -69,6 +75,8 @@ export const __testing = {
buildMatrixReplyArtifact,
buildMentionPrompt,
findMatrixQaScenarios,
getMatrixQaProfileScenarioIds: __matrixQaProfileTesting.getMatrixQaProfileScenarioIds,
normalizeMatrixQaProfile: __matrixQaProfileTesting.normalizeMatrixQaProfile,
readMatrixQaSyncCursor,
resolveMatrixQaScenarioRoomId,
writeMatrixQaSyncCursor,

View File

@@ -27,6 +27,8 @@ export function resolveLiveTransportQaRunOptions(
primaryModel: opts.primaryModel,
alternateModel: opts.alternateModel,
fastMode: opts.fastMode,
failFast: opts.failFast,
profile: opts.profile?.trim(),
scenarioIds: opts.scenarioIds,
sutAccountId: opts.sutAccountId,
credentialSource: opts.credentialSource?.trim(),

View File

@@ -9,6 +9,8 @@ export type LiveTransportQaCommandOptions = {
primaryModel?: string;
alternateModel?: string;
fastMode?: boolean;
failFast?: boolean;
profile?: string;
scenarioIds?: string[];
sutAccountId?: string;
credentialSource?: string;
@@ -23,6 +25,8 @@ type LiveTransportQaCommanderOptions = {
altModel?: string;
scenario?: string[];
fast?: boolean;
failFast?: boolean;
profile?: string;
sutAccount?: string;
credentialSource?: string;
credentialRole?: string;
@@ -56,6 +60,8 @@ export function mapLiveTransportQaCommanderOptions(
primaryModel: opts.model,
alternateModel: opts.altModel,
fastMode: opts.fast,
failFast: opts.failFast,
profile: opts.profile,
scenarioIds: opts.scenario,
sutAccountId: opts.sutAccount,
credentialSource: opts.credentialSource,
@@ -69,6 +75,8 @@ export function registerLiveTransportQaCli(params: {
credentialOptions?: LiveTransportQaCredentialCliOptions;
description: string;
outputDirHelp: string;
profileHelp?: string;
failFastHelp?: string;
scenarioHelp: string;
sutAccountHelp: string;
run: (opts: LiveTransportQaCommandOptions) => Promise<void>;
@@ -89,6 +97,14 @@ export function registerLiveTransportQaCli(params: {
.option("--fast", "Enable provider fast mode where supported", false)
.option("--sut-account <id>", params.sutAccountHelp, "sut");
if (params.profileHelp) {
command.option("--profile <profile>", params.profileHelp);
}
if (params.failFastHelp) {
command.option("--fail-fast", params.failFastHelp, false);
}
if (params.credentialOptions) {
command.option(
"--credential-source <source>",
@@ -110,6 +126,8 @@ export function createLiveTransportQaCliRegistration(params: {
credentialOptions?: LiveTransportQaCredentialCliOptions;
description: string;
outputDirHelp: string;
profileHelp?: string;
failFastHelp?: string;
scenarioHelp: string;
sutAccountHelp: string;
run: (opts: LiveTransportQaCommandOptions) => Promise<void>;
@@ -123,6 +141,8 @@ export function createLiveTransportQaCliRegistration(params: {
credentialOptions: params.credentialOptions,
description: params.description,
outputDirHelp: params.outputDirHelp,
profileHelp: params.profileHelp,
failFastHelp: params.failFastHelp,
scenarioHelp: params.scenarioHelp,
sutAccountHelp: params.sutAccountHelp,
run: params.run,