ci(mantis): derive telegram proof refs from pr

This commit is contained in:
Ayaan Zaidi
2026-05-11 17:49:45 +05:30
parent bf5202b056
commit 663206aac4
3 changed files with 73 additions and 160 deletions

View File

@@ -35,9 +35,7 @@ Required workflow:
1. Read `.agents/skills/telegram-crabbox-e2e-proof/SKILL.md`. 1. Read `.agents/skills/telegram-crabbox-e2e-proof/SKILL.md`.
2. Inspect the PR with `gh pr view "$MANTIS_PR_NUMBER"` and 2. Inspect the PR with `gh pr view "$MANTIS_PR_NUMBER"` and
`gh pr diff "$MANTIS_PR_NUMBER"` when `MANTIS_PR_NUMBER` is set. If the run `gh pr diff "$MANTIS_PR_NUMBER"`.
came from workflow dispatch without a PR number, inspect
`BASELINE_SHA..CANDIDATE_SHA`.
3. Decide what Telegram message, mock model response, command, callback, button, 3. Decide what Telegram message, mock model response, command, callback, button,
media, or sequence best proves the PR. Use `MANTIS_INSTRUCTIONS` as extra media, or sequence best proves the PR. Use `MANTIS_INSTRUCTIONS` as extra
maintainer guidance, not as a replacement for reading the PR. maintainer guidance, not as a replacement for reading the PR.
@@ -45,7 +43,7 @@ Required workflow:
`.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/baseline` and `.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/baseline` and
`.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/candidate`, then `.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/candidate`, then
install and build each worktree with the repo's normal `pnpm` commands. install and build each worktree with the repo's normal `pnpm` commands.
If `MANTIS_CANDIDATE_TRUST` is `maintainer-approved-fork-pr-head`, treat the If `MANTIS_CANDIDATE_TRUST` is `fork-pr-head`, treat the
candidate worktree as untrusted fork code: do not pass GitHub, OpenAI, candidate worktree as untrusted fork code: do not pass GitHub, OpenAI,
Crabbox, Convex, or other workflow secrets into candidate install, build, or Crabbox, Convex, or other workflow secrets into candidate install, build, or
runtime commands. The candidate SUT may receive only the proof runner's runtime commands. The candidate SUT may receive only the proof runner's

View File

@@ -5,19 +5,9 @@ on:
types: [created] types: [created]
workflow_dispatch: workflow_dispatch:
inputs: inputs:
baseline_ref:
description: Ref, tag, or SHA to capture as the before GIF
required: true
default: main
type: string
candidate_ref:
description: Ref, tag, or SHA to capture as the after GIF
required: true
default: main
type: string
pr_number: pr_number:
description: Optional PR number to receive the QA evidence comment description: PR number to capture
required: false required: true
type: string type: string
instructions: instructions:
description: Optional freeform proof instructions for the agent description: Optional freeform proof instructions for the agent
@@ -35,11 +25,6 @@ on:
description: Optional existing Crabbox desktop lease id or slug to reuse description: Optional existing Crabbox desktop lease id or slug to reuse
required: false required: false
type: string type: string
allow_fork_candidate:
description: Allow a fork PR head candidate when pr_number points at that PR
required: false
default: false
type: boolean
permissions: permissions:
contents: write contents: write
@@ -47,7 +32,7 @@ permissions:
pull-requests: write pull-requests: write
concurrency: concurrency:
group: mantis-telegram-desktop-proof-${{ github.event.issue.number || inputs.pr_number || inputs.candidate_ref || github.run_id }}-${{ github.run_attempt }} group: mantis-telegram-desktop-proof-${{ github.event.issue.number || inputs.pr_number || github.run_id }}-${{ github.run_attempt }}
cancel-in-progress: false cancel-in-progress: false
env: env:
@@ -68,6 +53,7 @@ jobs:
( (
github.event_name == 'issue_comment' && github.event_name == 'issue_comment' &&
github.event.issue.pull_request && github.event.issue.pull_request &&
contains(github.event.issue.labels.*.name, 'mantis: telegram-visible-proof') &&
( (
contains(github.event.comment.body, '@openclaw-mantis') || contains(github.event.comment.body, '@openclaw-mantis') ||
contains(github.event.comment.body, '/openclaw-mantis') contains(github.event.comment.body, '/openclaw-mantis')
@@ -100,7 +86,6 @@ jobs:
needs: authorize_actor needs: authorize_actor
runs-on: ubuntu-24.04 runs-on: ubuntu-24.04
outputs: outputs:
allow_fork_candidate: ${{ steps.resolve.outputs.allow_fork_candidate }}
baseline_ref: ${{ steps.resolve.outputs.baseline_ref }} baseline_ref: ${{ steps.resolve.outputs.baseline_ref }}
candidate_ref: ${{ steps.resolve.outputs.candidate_ref }} candidate_ref: ${{ steps.resolve.outputs.candidate_ref }}
crabbox_provider: ${{ steps.resolve.outputs.crabbox_provider }} crabbox_provider: ${{ steps.resolve.outputs.crabbox_provider }}
@@ -108,7 +93,6 @@ jobs:
lease_id: ${{ steps.resolve.outputs.lease_id }} lease_id: ${{ steps.resolve.outputs.lease_id }}
pr_number: ${{ steps.resolve.outputs.pr_number }} pr_number: ${{ steps.resolve.outputs.pr_number }}
request_source: ${{ steps.resolve.outputs.request_source }} request_source: ${{ steps.resolve.outputs.request_source }}
should_run: ${{ steps.resolve.outputs.should_run }}
steps: steps:
- name: Resolve refs and target PR - name: Resolve refs and target PR
id: resolve id: resolve
@@ -122,52 +106,11 @@ jobs:
core.info(`${name}=${value ?? ""}`); core.info(`${name}=${value ?? ""}`);
} }
if (eventName === "workflow_dispatch") { const inputs = context.payload.inputs ?? {};
const inputs = context.payload.inputs ?? {}; const prNumber =
setOutput("should_run", "true"); eventName === "workflow_dispatch" ? inputs.pr_number : String(context.payload.issue?.number ?? "");
setOutput( if (!prNumber) {
"allow_fork_candidate", core.setFailed("Mantis Telegram desktop proof requires a pull request.");
String(inputs.allow_fork_candidate) === "true" ? "true" : "false",
);
setOutput("baseline_ref", inputs.baseline_ref || "main");
setOutput("candidate_ref", inputs.candidate_ref || "main");
setOutput("pr_number", inputs.pr_number || "");
setOutput("instructions", inputs.instructions || "");
setOutput("crabbox_provider", inputs.crabbox_provider || "aws");
setOutput("lease_id", inputs.crabbox_lease_id || "");
setOutput("request_source", "workflow_dispatch");
return;
}
if (eventName !== "issue_comment") {
core.setFailed(`Unsupported event: ${eventName}`);
return;
}
const issue = context.payload.issue;
const body = context.payload.comment?.body ?? "";
if (!issue?.pull_request) {
core.setFailed("Mantis issue_comment trigger requires a pull request comment.");
return;
}
const normalized = body.toLowerCase();
const requested =
(normalized.includes("@openclaw-mantis") || normalized.includes("/openclaw-mantis")) &&
normalized.includes("telegram") &&
(normalized.includes("desktop") || normalized.includes("native")) &&
normalized.includes("proof");
if (!requested) {
core.notice("Comment mentioned Mantis but did not request Telegram desktop proof.");
setOutput("should_run", "false");
setOutput("allow_fork_candidate", "false");
setOutput("baseline_ref", "");
setOutput("candidate_ref", "");
setOutput("pr_number", "");
setOutput("instructions", "");
setOutput("crabbox_provider", "");
setOutput("lease_id", "");
setOutput("request_source", "unsupported_issue_comment");
return; return;
} }
@@ -175,57 +118,35 @@ jobs:
const { data: pr } = await github.rest.pulls.get({ const { data: pr } = await github.rest.pulls.get({
owner, owner,
repo, repo,
pull_number: issue.number, pull_number: Number(prNumber),
}); });
let mergedBaseline = ""; const body = eventName === "workflow_dispatch" ? inputs.instructions || "" : context.payload.comment?.body || "";
let mergedCandidate = ""; const provider = inputs.crabbox_provider || "aws";
if (pr.merged) {
const { data: commits } = await github.rest.pulls.listCommits({
owner,
repo,
pull_number: issue.number,
per_page: 100,
});
mergedCandidate = pr.merge_commit_sha || commits.at(-1)?.sha || "";
mergedBaseline = mergedCandidate && commits.length > 0 ? `${mergedCandidate}~${commits.length}` : "";
}
const baselineMatch = body.match(/(?:baseline|base)[\s:=]+([^\s`]+)/i);
const candidateMatch = body.match(/(?:candidate|head)[\s:=]+([^\s`]+)/i);
const providerMatch = body.match(/(?:provider|crabbox_provider)[\s:=]+([^\s`]+)/i);
const leaseMatch = body.match(/(?:lease|lease_id|crabbox_lease_id)[\s:=]+([^\s`]+)/i);
const provider = providerMatch?.[1] || "aws";
if (!["aws", "hetzner"].includes(provider)) { if (!["aws", "hetzner"].includes(provider)) {
core.setFailed(`Unsupported Crabbox provider for Mantis Telegram desktop proof: ${provider}`); core.setFailed(`Unsupported Crabbox provider for Mantis Telegram desktop proof: ${provider}`);
return; return;
} }
const rawCandidate = candidateMatch?.[1];
const candidate =
rawCandidate && !["head", "pr", "pr-head"].includes(rawCandidate.toLowerCase())
? rawCandidate
: mergedCandidate || pr.head.sha;
const allowForkCandidate = /\bfork[-_]ok\b/i.test(body);
setOutput("should_run", "true"); setOutput("baseline_ref", pr.base.sha);
setOutput("allow_fork_candidate", allowForkCandidate ? "true" : "false"); setOutput("candidate_ref", pr.head.sha);
setOutput("baseline_ref", baselineMatch?.[1] || mergedBaseline || "main"); setOutput("pr_number", String(pr.number));
setOutput("candidate_ref", candidate);
setOutput("pr_number", String(issue.number));
setOutput("instructions", body); setOutput("instructions", body);
setOutput("crabbox_provider", provider); setOutput("crabbox_provider", provider);
setOutput("lease_id", leaseMatch?.[1] || ""); setOutput("lease_id", inputs.crabbox_lease_id || "");
setOutput("request_source", "issue_comment"); setOutput("request_source", eventName);
await github.rest.reactions.createForIssueComment({ if (eventName === "issue_comment") {
owner, await github.rest.reactions.createForIssueComment({
repo, owner,
comment_id: context.payload.comment.id, repo,
content: "eyes", comment_id: context.payload.comment.id,
}).catch((error) => core.warning(`Could not add eyes reaction: ${error.message}`)); content: "eyes",
}).catch((error) => core.warning(`Could not add eyes reaction: ${error.message}`));
}
validate_refs: validate_refs:
name: Validate selected refs name: Validate selected refs
needs: resolve_request needs: resolve_request
if: ${{ needs.resolve_request.outputs.should_run == 'true' }}
runs-on: ubuntu-24.04 runs-on: ubuntu-24.04
outputs: outputs:
baseline_revision: ${{ steps.validate.outputs.baseline_revision }} baseline_revision: ${{ steps.validate.outputs.baseline_revision }}
@@ -241,7 +162,6 @@ jobs:
- name: Validate refs are trusted - name: Validate refs are trusted
id: validate id: validate
env: env:
ALLOW_FORK_CANDIDATE: ${{ needs.resolve_request.outputs.allow_fork_candidate }}
BASELINE_REF: ${{ needs.resolve_request.outputs.baseline_ref }} BASELINE_REF: ${{ needs.resolve_request.outputs.baseline_ref }}
CANDIDATE_REF: ${{ needs.resolve_request.outputs.candidate_ref }} CANDIDATE_REF: ${{ needs.resolve_request.outputs.candidate_ref }}
GH_TOKEN: ${{ github.token }} GH_TOKEN: ${{ github.token }}
@@ -255,64 +175,48 @@ jobs:
git fetch --no-tags origin "+refs/pull/${PR_NUMBER}/head:refs/remotes/origin/pr/${PR_NUMBER}" || true git fetch --no-tags origin "+refs/pull/${PR_NUMBER}/head:refs/remotes/origin/pr/${PR_NUMBER}" || true
fi fi
validate_ref() { resolve_commit() {
local label="$1"
local input_ref="$2" local input_ref="$2"
local revision="" local revision=""
local reason=""
if ! revision="$(git rev-parse --verify "${input_ref}^{commit}" 2>/dev/null)"; then if ! revision="$(git rev-parse --verify "${input_ref}^{commit}" 2>/dev/null)"; then
echo "${label} ref '${input_ref}' is not available in the workflow checkout." >&2 echo "$1 ref '${input_ref}' is not available in the workflow checkout." >&2
exit 1 exit 1
fi fi
if git merge-base --is-ancestor "$revision" refs/remotes/origin/main; then printf '%s\n' "$revision"
reason="main-ancestor"
elif git tag --points-at "$revision" | grep -Eq '^v'; then
reason="release-tag"
else
local pr_head_count
pr_head_count="$(
gh api \
-H "Accept: application/vnd.github+json" \
"repos/${GITHUB_REPOSITORY}/commits/${revision}/pulls" \
--jq '[.[] | select(.state == "open" and .head.repo.full_name == "'"${GITHUB_REPOSITORY}"'" and .head.sha == "'"${revision}"'")] | length'
)"
if [[ "$pr_head_count" != "0" ]]; then
reason="open-pr-head"
elif [[ "$label" == "candidate" && "${ALLOW_FORK_CANDIDATE:-false}" == "true" && -n "${PR_NUMBER:-}" ]]; then
local fork_pr_head_count
fork_pr_head_count="$(
gh api \
-H "Accept: application/vnd.github+json" \
"repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}" \
--jq 'if .state == "open" and .head.repo.full_name != "'"${GITHUB_REPOSITORY}"'" and .head.sha == "'"${revision}"'" then 1 else 0 end'
)"
if [[ "$fork_pr_head_count" == "1" ]]; then
reason="maintainer-approved-fork-pr-head"
fi
fi
fi
if [[ -z "$reason" ]]; then
echo "${label} ref '${input_ref}' resolved to ${revision}, which is not trusted for this secret-bearing Mantis run. Add fork-ok only for a maintainer-approved fork PR head." >&2
exit 1
fi
printf '%s\t%s\n' "$revision" "$reason"
} }
baseline_revision="$(validate_ref baseline "$BASELINE_REF")" baseline_revision="$(resolve_commit baseline "$BASELINE_REF")"
baseline_trust="${baseline_revision#*$'\t'}" candidate_revision="$(resolve_commit candidate "$CANDIDATE_REF")"
baseline_revision="${baseline_revision%%$'\t'*}" if ! git merge-base --is-ancestor "$baseline_revision" refs/remotes/origin/main; then
candidate_revision="$(validate_ref candidate "$CANDIDATE_REF")" echo "baseline ref '${BASELINE_REF}' resolved to ${baseline_revision}, which is not on main." >&2
candidate_trust="${candidate_revision#*$'\t'}" exit 1
candidate_revision="${candidate_revision%%$'\t'*}" fi
pr_head="$(
gh api \
-H "Accept: application/vnd.github+json" \
"repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}" \
--jq '{state, head_sha: .head.sha, head_repo: .head.repo.full_name}'
)"
pr_state="$(jq -r '.state' <<<"$pr_head")"
pr_head_sha="$(jq -r '.head_sha' <<<"$pr_head")"
pr_head_repo="$(jq -r '.head_repo' <<<"$pr_head")"
if [[ "$pr_state" != "open" || "$candidate_revision" != "$pr_head_sha" ]]; then
echo "candidate ref '${CANDIDATE_REF}' resolved to ${candidate_revision}, which is not the open PR head." >&2
exit 1
fi
candidate_trust="open-pr-head"
if [[ "$pr_head_repo" != "$GITHUB_REPOSITORY" ]]; then
candidate_trust="fork-pr-head"
fi
echo "baseline_revision=${baseline_revision}" >> "$GITHUB_OUTPUT" echo "baseline_revision=${baseline_revision}" >> "$GITHUB_OUTPUT"
echo "candidate_revision=${candidate_revision}" >> "$GITHUB_OUTPUT" echo "candidate_revision=${candidate_revision}" >> "$GITHUB_OUTPUT"
echo "candidate_trust=${candidate_trust}" >> "$GITHUB_OUTPUT" echo "candidate_trust=${candidate_trust}" >> "$GITHUB_OUTPUT"
{ {
echo "baseline: \`${BASELINE_REF}\`" echo "baseline: \`${BASELINE_REF}\`"
echo "baseline SHA: \`${baseline_revision}\`" echo "baseline SHA: \`${baseline_revision}\`"
echo "baseline trust: \`${baseline_trust}\`" echo "baseline trust: \`main-ancestor\`"
echo "candidate: \`${CANDIDATE_REF}\`" echo "candidate: \`${CANDIDATE_REF}\`"
echo "candidate SHA: \`${candidate_revision}\`" echo "candidate SHA: \`${candidate_revision}\`"
echo "candidate trust: \`${candidate_trust}\`" echo "candidate trust: \`${candidate_trust}\`"
@@ -321,7 +225,6 @@ jobs:
run_telegram_desktop_proof: run_telegram_desktop_proof:
name: Run agentic native Telegram proof name: Run agentic native Telegram proof
needs: [resolve_request, validate_refs] needs: [resolve_request, validate_refs]
if: ${{ needs.resolve_request.outputs.should_run == 'true' }}
runs-on: blacksmith-16vcpu-ubuntu-2404 runs-on: blacksmith-16vcpu-ubuntu-2404
timeout-minutes: 360 timeout-minutes: 360
environment: qa-live-shared environment: qa-live-shared

View File

@@ -62,6 +62,7 @@ describe("Mantis Telegram Desktop proof workflow", () => {
const workflow = readFileSync(WORKFLOW, "utf8"); const workflow = readFileSync(WORKFLOW, "utf8");
expect(workflow).toContain("@openclaw-mantis"); expect(workflow).toContain("@openclaw-mantis");
expect(workflow).toContain("/openclaw-mantis"); expect(workflow).toContain("/openclaw-mantis");
expect(workflow).toContain("mantis: telegram-visible-proof");
expect(workflow).not.toContain("@Mantis"); expect(workflow).not.toContain("@Mantis");
expect(workflow).not.toContain("@mantis"); expect(workflow).not.toContain("@mantis");
expect(workflow).not.toContain('"/mantis"'); expect(workflow).not.toContain('"/mantis"');
@@ -114,13 +115,23 @@ describe("Mantis Telegram Desktop proof workflow", () => {
expect(prompt).toContain("do not run\n `pnpm qa:telegram-user:crabbox` directly"); expect(prompt).toContain("do not run\n `pnpm qa:telegram-user:crabbox` directly");
}); });
it("requires explicit maintainer fork approval before accepting fork PR heads", () => { it("derives refs from the PR instead of parsing comment prose", () => {
const workflowText = readFileSync(WORKFLOW, "utf8"); const workflowText = readFileSync(WORKFLOW, "utf8");
expect(workflowText).toContain("@openclaw-mantis"); expect(workflowText).toContain('setOutput("baseline_ref", pr.base.sha)');
expect(workflowText).toContain("fork[-_]ok"); expect(workflowText).toContain('setOutput("candidate_ref", pr.head.sha)');
expect(workflowText).toContain("ALLOW_FORK_CANDIDATE"); expect(workflowText).not.toContain("body.match");
expect(workflowText).toContain("maintainer-approved-fork-pr-head"); expect(workflowText).not.toContain("baselineMatch");
expect(workflowText).toContain(".head.repo.full_name !="); expect(workflowText).not.toContain("candidateMatch");
expect(workflowText).not.toContain("leaseMatch");
expect(workflowText).not.toContain("fork-ok");
expect(workflowText).not.toContain("allow_fork_candidate");
});
it("trusts the open PR head and marks fork heads for sandboxed handling", () => {
const workflowText = readFileSync(WORKFLOW, "utf8");
expect(workflowText).toContain("repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}");
expect(workflowText).toContain('candidate_trust="fork-pr-head"');
expect(workflowText).toContain('pr_head_repo" != "$GITHUB_REPOSITORY"');
const agent = workflowStep("Run Codex Mantis Telegram agent"); const agent = workflowStep("Run Codex Mantis Telegram agent");
expect(agent.env?.MANTIS_CANDIDATE_TRUST).toBe( expect(agent.env?.MANTIS_CANDIDATE_TRUST).toBe(
@@ -129,6 +140,7 @@ describe("Mantis Telegram Desktop proof workflow", () => {
const prompt = readFileSync(PROMPT, "utf8"); const prompt = readFileSync(PROMPT, "utf8");
expect(prompt).toContain("MANTIS_CANDIDATE_TRUST"); expect(prompt).toContain("MANTIS_CANDIDATE_TRUST");
expect(prompt).toContain("fork-pr-head");
expect(prompt).toContain("untrusted fork code"); expect(prompt).toContain("untrusted fork code");
}); });