ci(mantis): allow fork telegram proof

This commit is contained in:
Ayaan Zaidi
2026-05-11 17:27:32 +05:30
parent e6b0b37e3f
commit d4c7519989
4 changed files with 104 additions and 5 deletions

View File

@@ -24,6 +24,7 @@ Inputs are provided as environment variables:
- `BASELINE_SHA`
- `CANDIDATE_REF`
- `CANDIDATE_SHA`
- `MANTIS_CANDIDATE_TRUST`
- `MANTIS_OUTPUT_DIR`
- `MANTIS_INSTRUCTIONS`
- `CRABBOX_PROVIDER`
@@ -44,6 +45,12 @@ Required workflow:
`.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/baseline` and
`.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/candidate`, then
install and build each worktree with the repo's normal `pnpm` commands.
If `MANTIS_CANDIDATE_TRUST` is `maintainer-approved-fork-pr-head`, treat the
candidate worktree as untrusted fork code: do not pass GitHub, OpenAI,
Crabbox, Convex, or other workflow secrets into candidate install, build, or
runtime commands. The candidate SUT may receive only the proof runner's
short-lived Telegram bot token, generated local config/state paths, and mock
model key needed for this isolated proof.
5. In each worktree, run the real-user Telegram Crabbox proof flow from the
skill with `$OPENCLAW_TELEGRAM_USER_PROOF_CMD`; do not run
`pnpm qa:telegram-user:crabbox` directly. The proof command comes from the

View File

@@ -35,6 +35,11 @@ on:
description: Optional existing Crabbox desktop lease id or slug to reuse
required: false
type: string
allow_fork_candidate:
description: Allow a fork PR head candidate when pr_number points at that PR
required: false
default: false
type: boolean
permissions:
contents: write
@@ -95,6 +100,7 @@ jobs:
needs: authorize_actor
runs-on: ubuntu-24.04
outputs:
allow_fork_candidate: ${{ steps.resolve.outputs.allow_fork_candidate }}
baseline_ref: ${{ steps.resolve.outputs.baseline_ref }}
candidate_ref: ${{ steps.resolve.outputs.candidate_ref }}
crabbox_provider: ${{ steps.resolve.outputs.crabbox_provider }}
@@ -119,6 +125,10 @@ jobs:
if (eventName === "workflow_dispatch") {
const inputs = context.payload.inputs ?? {};
setOutput("should_run", "true");
setOutput(
"allow_fork_candidate",
String(inputs.allow_fork_candidate) === "true" ? "true" : "false",
);
setOutput("baseline_ref", inputs.baseline_ref || "main");
setOutput("candidate_ref", inputs.candidate_ref || "main");
setOutput("pr_number", inputs.pr_number || "");
@@ -150,6 +160,7 @@ jobs:
if (!requested) {
core.notice("Comment mentioned Mantis but did not request Telegram desktop proof.");
setOutput("should_run", "false");
setOutput("allow_fork_candidate", "false");
setOutput("baseline_ref", "");
setOutput("candidate_ref", "");
setOutput("pr_number", "");
@@ -192,8 +203,10 @@ jobs:
rawCandidate && !["head", "pr", "pr-head"].includes(rawCandidate.toLowerCase())
? rawCandidate
: mergedCandidate || pr.head.sha;
const allowForkCandidate = /\bfork[-_]ok\b/i.test(body);
setOutput("should_run", "true");
setOutput("allow_fork_candidate", allowForkCandidate ? "true" : "false");
setOutput("baseline_ref", baselineMatch?.[1] || mergedBaseline || "main");
setOutput("candidate_ref", candidate);
setOutput("pr_number", String(issue.number));
@@ -217,6 +230,7 @@ jobs:
outputs:
baseline_revision: ${{ steps.validate.outputs.baseline_revision }}
candidate_revision: ${{ steps.validate.outputs.candidate_revision }}
candidate_trust: ${{ steps.validate.outputs.candidate_trust }}
steps:
- name: Checkout harness ref
uses: actions/checkout@v6
@@ -227,6 +241,7 @@ jobs:
- name: Validate refs are trusted
id: validate
env:
ALLOW_FORK_CANDIDATE: ${{ needs.resolve_request.outputs.allow_fork_candidate }}
BASELINE_REF: ${{ needs.resolve_request.outputs.baseline_ref }}
CANDIDATE_REF: ${{ needs.resolve_request.outputs.candidate_ref }}
GH_TOKEN: ${{ github.token }}
@@ -264,25 +279,43 @@ jobs:
)"
if [[ "$pr_head_count" != "0" ]]; then
reason="open-pr-head"
elif [[ "$label" == "candidate" && "${ALLOW_FORK_CANDIDATE:-false}" == "true" && -n "${PR_NUMBER:-}" ]]; then
local fork_pr_head_count
fork_pr_head_count="$(
gh api \
-H "Accept: application/vnd.github+json" \
"repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}" \
--jq 'if .state == "open" and .head.repo.full_name != "'"${GITHUB_REPOSITORY}"'" and .head.sha == "'"${revision}"'" then 1 else 0 end'
)"
if [[ "$fork_pr_head_count" == "1" ]]; then
reason="maintainer-approved-fork-pr-head"
fi
fi
fi
if [[ -z "$reason" ]]; then
echo "${label} ref '${input_ref}' resolved to ${revision}, which is not trusted for this secret-bearing Mantis run." >&2
echo "${label} ref '${input_ref}' resolved to ${revision}, which is not trusted for this secret-bearing Mantis run. Add fork-ok only for a maintainer-approved fork PR head." >&2
exit 1
fi
printf '%s\n' "$revision"
printf '%s\t%s\n' "$revision" "$reason"
}
baseline_revision="$(validate_ref baseline "$BASELINE_REF")"
baseline_trust="${baseline_revision#*$'\t'}"
baseline_revision="${baseline_revision%%$'\t'*}"
candidate_revision="$(validate_ref candidate "$CANDIDATE_REF")"
candidate_trust="${candidate_revision#*$'\t'}"
candidate_revision="${candidate_revision%%$'\t'*}"
echo "baseline_revision=${baseline_revision}" >> "$GITHUB_OUTPUT"
echo "candidate_revision=${candidate_revision}" >> "$GITHUB_OUTPUT"
echo "candidate_trust=${candidate_trust}" >> "$GITHUB_OUTPUT"
{
echo "baseline: \`${BASELINE_REF}\`"
echo "baseline SHA: \`${baseline_revision}\`"
echo "baseline trust: \`${baseline_trust}\`"
echo "candidate: \`${CANDIDATE_REF}\`"
echo "candidate SHA: \`${candidate_revision}\`"
echo "candidate trust: \`${candidate_trust}\`"
} >> "$GITHUB_STEP_SUMMARY"
run_telegram_desktop_proof:
@@ -375,7 +408,7 @@ jobs:
printf '%s\n' 'Defaults env_keep += "CODEX_HOME CODEX_INTERNAL_ORIGINATOR_OVERRIDE"'
printf '%s\n' 'Defaults env_keep += "BASELINE_REF BASELINE_SHA CANDIDATE_REF CANDIDATE_SHA"'
printf '%s\n' 'Defaults env_keep += "CRABBOX_ACCESS_CLIENT_ID CRABBOX_ACCESS_CLIENT_SECRET CRABBOX_COORDINATOR CRABBOX_COORDINATOR_TOKEN CRABBOX_LEASE_ID CRABBOX_PROVIDER"'
printf '%s\n' 'Defaults env_keep += "GH_TOKEN MANTIS_INSTRUCTIONS MANTIS_OUTPUT_DIR MANTIS_PR_NUMBER"'
printf '%s\n' 'Defaults env_keep += "GH_TOKEN MANTIS_CANDIDATE_TRUST MANTIS_INSTRUCTIONS MANTIS_OUTPUT_DIR MANTIS_PR_NUMBER"'
printf '%s\n' 'Defaults env_keep += "OPENCLAW_BUILD_PRIVATE_QA OPENCLAW_ENABLE_PRIVATE_QA_CLI OPENCLAW_QA_CONVEX_SECRET_CI OPENCLAW_QA_CONVEX_SITE_URL OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN"'
printf '%s\n' 'Defaults env_keep += "OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT OPENCLAW_TELEGRAM_USER_PROOF_CMD"'
} | sudo tee /etc/sudoers.d/mantis-codex-env >/dev/null
@@ -406,6 +439,7 @@ jobs:
CRABBOX_LEASE_ID: ${{ needs.resolve_request.outputs.lease_id }}
CRABBOX_PROVIDER: ${{ needs.resolve_request.outputs.crabbox_provider }}
GH_TOKEN: ${{ github.token }}
MANTIS_CANDIDATE_TRUST: ${{ needs.validate_refs.outputs.candidate_trust }}
MANTIS_INSTRUCTIONS: ${{ needs.resolve_request.outputs.instructions }}
MANTIS_OUTPUT_DIR: ${{ env.MANTIS_OUTPUT_DIR }}
MANTIS_PR_NUMBER: ${{ needs.resolve_request.outputs.pr_number }}

View File

@@ -417,9 +417,40 @@ function optionalString(source: JsonObject, key: string) {
return typeof value === "string" && value.trim() ? value.trim() : undefined;
}
function childProcessBaseEnv() {
const keys = [
"CI",
"COREPACK_HOME",
"FORCE_COLOR",
"HOME",
"LANG",
"LC_ALL",
"NODE_OPTIONS",
"OPENCLAW_BUILD_PRIVATE_QA",
"OPENCLAW_ENABLE_PRIVATE_QA_CLI",
"PATH",
"PNPM_HOME",
"SHELL",
"TEMP",
"TMP",
"TMPDIR",
"USER",
"XDG_CACHE_HOME",
"XDG_CONFIG_HOME",
];
const env: NodeJS.ProcessEnv = {};
for (const key of keys) {
const value = process.env[key];
if (value) {
env[key] = value;
}
}
return env;
}
function mockServerEnv(params: { mockPort: number; mockResponseText: string; requestLog: string }) {
return {
...process.env,
...childProcessBaseEnv(),
MOCK_PORT: String(params.mockPort),
MOCK_REQUEST_LOG: params.requestLog,
SUCCESS_MARKER: params.mockResponseText,
@@ -428,7 +459,7 @@ function mockServerEnv(params: { mockPort: number; mockResponseText: string; req
function gatewayEnv(params: { configPath: string; stateDir: string; sutToken: string }) {
return {
...process.env,
...childProcessBaseEnv(),
OPENAI_API_KEY: "sk-openclaw-e2e-mock",
OPENCLAW_CONFIG_PATH: params.configPath,
OPENCLAW_STATE_DIR: params.stateDir,

View File

@@ -107,12 +107,31 @@ describe("Mantis Telegram Desktop proof workflow", () => {
expect(prepare.run).toContain(
"OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT OPENCLAW_TELEGRAM_USER_PROOF_CMD",
);
expect(prepare.run).toContain("MANTIS_CANDIDATE_TRUST");
const prompt = readFileSync(PROMPT, "utf8");
expect(prompt).toContain("$OPENCLAW_TELEGRAM_USER_PROOF_CMD");
expect(prompt).toContain("do not run\n `pnpm qa:telegram-user:crabbox` directly");
});
it("requires explicit maintainer fork approval before accepting fork PR heads", () => {
const workflowText = readFileSync(WORKFLOW, "utf8");
expect(workflowText).toContain("@openclaw-mantis");
expect(workflowText).toContain("fork[-_]ok");
expect(workflowText).toContain("ALLOW_FORK_CANDIDATE");
expect(workflowText).toContain("maintainer-approved-fork-pr-head");
expect(workflowText).toContain(".head.repo.full_name !=");
const agent = workflowStep("Run Codex Mantis Telegram agent");
expect(agent.env?.MANTIS_CANDIDATE_TRUST).toBe(
"${{ needs.validate_refs.outputs.candidate_trust }}",
);
const prompt = readFileSync(PROMPT, "utf8");
expect(prompt).toContain("MANTIS_CANDIDATE_TRUST");
expect(prompt).toContain("untrusted fork code");
});
it("checks the Telegram user driver before leasing credentials", () => {
const proofScript = readFileSync(PROOF_SCRIPT, "utf8");
const startSession = proofScript.slice(
@@ -132,4 +151,12 @@ describe("Mantis Telegram Desktop proof workflow", () => {
defaultProof.indexOf("leaseCredential({ localRoot, opts, root })"),
);
});
it("does not pass the full workflow environment into the local Telegram SUT", () => {
const proofScript = readFileSync(PROOF_SCRIPT, "utf8");
expect(proofScript).toContain("function childProcessBaseEnv()");
expect(proofScript).toContain("...childProcessBaseEnv()");
expect(proofScript).not.toContain("...process.env,\n OPENAI_API_KEY");
expect(proofScript).not.toContain("...process.env,\n MOCK_PORT");
});
});