ci(mantis): derive telegram proof refs from pr

This commit is contained in:
Ayaan Zaidi
2026-05-11 17:49:45 +05:30
parent bf5202b056
commit 663206aac4
3 changed files with 73 additions and 160 deletions

View File

@@ -35,9 +35,7 @@ Required workflow:
1. Read `.agents/skills/telegram-crabbox-e2e-proof/SKILL.md`.
2. Inspect the PR with `gh pr view "$MANTIS_PR_NUMBER"` and
`gh pr diff "$MANTIS_PR_NUMBER"` when `MANTIS_PR_NUMBER` is set. If the run
came from workflow dispatch without a PR number, inspect
`BASELINE_SHA..CANDIDATE_SHA`.
`gh pr diff "$MANTIS_PR_NUMBER"`.
3. Decide what Telegram message, mock model response, command, callback, button,
media, or sequence best proves the PR. Use `MANTIS_INSTRUCTIONS` as extra
maintainer guidance, not as a replacement for reading the PR.
@@ -45,7 +43,7 @@ Required workflow:
`.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/baseline` and
`.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/candidate`, then
install and build each worktree with the repo's normal `pnpm` commands.
If `MANTIS_CANDIDATE_TRUST` is `maintainer-approved-fork-pr-head`, treat the
If `MANTIS_CANDIDATE_TRUST` is `fork-pr-head`, treat the
candidate worktree as untrusted fork code: do not pass GitHub, OpenAI,
Crabbox, Convex, or other workflow secrets into candidate install, build, or
runtime commands. The candidate SUT may receive only the proof runner's

View File

@@ -5,19 +5,9 @@ on:
types: [created]
workflow_dispatch:
inputs:
baseline_ref:
description: Ref, tag, or SHA to capture as the before GIF
required: true
default: main
type: string
candidate_ref:
description: Ref, tag, or SHA to capture as the after GIF
required: true
default: main
type: string
pr_number:
description: Optional PR number to receive the QA evidence comment
required: false
description: PR number to capture
required: true
type: string
instructions:
description: Optional freeform proof instructions for the agent
@@ -35,11 +25,6 @@ on:
description: Optional existing Crabbox desktop lease id or slug to reuse
required: false
type: string
allow_fork_candidate:
description: Allow a fork PR head candidate when pr_number points at that PR
required: false
default: false
type: boolean
permissions:
contents: write
@@ -47,7 +32,7 @@ permissions:
pull-requests: write
concurrency:
group: mantis-telegram-desktop-proof-${{ github.event.issue.number || inputs.pr_number || inputs.candidate_ref || github.run_id }}-${{ github.run_attempt }}
group: mantis-telegram-desktop-proof-${{ github.event.issue.number || inputs.pr_number || github.run_id }}-${{ github.run_attempt }}
cancel-in-progress: false
env:
@@ -68,6 +53,7 @@ jobs:
(
github.event_name == 'issue_comment' &&
github.event.issue.pull_request &&
contains(github.event.issue.labels.*.name, 'mantis: telegram-visible-proof') &&
(
contains(github.event.comment.body, '@openclaw-mantis') ||
contains(github.event.comment.body, '/openclaw-mantis')
@@ -100,7 +86,6 @@ jobs:
needs: authorize_actor
runs-on: ubuntu-24.04
outputs:
allow_fork_candidate: ${{ steps.resolve.outputs.allow_fork_candidate }}
baseline_ref: ${{ steps.resolve.outputs.baseline_ref }}
candidate_ref: ${{ steps.resolve.outputs.candidate_ref }}
crabbox_provider: ${{ steps.resolve.outputs.crabbox_provider }}
@@ -108,7 +93,6 @@ jobs:
lease_id: ${{ steps.resolve.outputs.lease_id }}
pr_number: ${{ steps.resolve.outputs.pr_number }}
request_source: ${{ steps.resolve.outputs.request_source }}
should_run: ${{ steps.resolve.outputs.should_run }}
steps:
- name: Resolve refs and target PR
id: resolve
@@ -122,52 +106,11 @@ jobs:
core.info(`${name}=${value ?? ""}`);
}
if (eventName === "workflow_dispatch") {
const inputs = context.payload.inputs ?? {};
setOutput("should_run", "true");
setOutput(
"allow_fork_candidate",
String(inputs.allow_fork_candidate) === "true" ? "true" : "false",
);
setOutput("baseline_ref", inputs.baseline_ref || "main");
setOutput("candidate_ref", inputs.candidate_ref || "main");
setOutput("pr_number", inputs.pr_number || "");
setOutput("instructions", inputs.instructions || "");
setOutput("crabbox_provider", inputs.crabbox_provider || "aws");
setOutput("lease_id", inputs.crabbox_lease_id || "");
setOutput("request_source", "workflow_dispatch");
return;
}
if (eventName !== "issue_comment") {
core.setFailed(`Unsupported event: ${eventName}`);
return;
}
const issue = context.payload.issue;
const body = context.payload.comment?.body ?? "";
if (!issue?.pull_request) {
core.setFailed("Mantis issue_comment trigger requires a pull request comment.");
return;
}
const normalized = body.toLowerCase();
const requested =
(normalized.includes("@openclaw-mantis") || normalized.includes("/openclaw-mantis")) &&
normalized.includes("telegram") &&
(normalized.includes("desktop") || normalized.includes("native")) &&
normalized.includes("proof");
if (!requested) {
core.notice("Comment mentioned Mantis but did not request Telegram desktop proof.");
setOutput("should_run", "false");
setOutput("allow_fork_candidate", "false");
setOutput("baseline_ref", "");
setOutput("candidate_ref", "");
setOutput("pr_number", "");
setOutput("instructions", "");
setOutput("crabbox_provider", "");
setOutput("lease_id", "");
setOutput("request_source", "unsupported_issue_comment");
const inputs = context.payload.inputs ?? {};
const prNumber =
eventName === "workflow_dispatch" ? inputs.pr_number : String(context.payload.issue?.number ?? "");
if (!prNumber) {
core.setFailed("Mantis Telegram desktop proof requires a pull request.");
return;
}
@@ -175,57 +118,35 @@ jobs:
const { data: pr } = await github.rest.pulls.get({
owner,
repo,
pull_number: issue.number,
pull_number: Number(prNumber),
});
let mergedBaseline = "";
let mergedCandidate = "";
if (pr.merged) {
const { data: commits } = await github.rest.pulls.listCommits({
owner,
repo,
pull_number: issue.number,
per_page: 100,
});
mergedCandidate = pr.merge_commit_sha || commits.at(-1)?.sha || "";
mergedBaseline = mergedCandidate && commits.length > 0 ? `${mergedCandidate}~${commits.length}` : "";
}
const baselineMatch = body.match(/(?:baseline|base)[\s:=]+([^\s`]+)/i);
const candidateMatch = body.match(/(?:candidate|head)[\s:=]+([^\s`]+)/i);
const providerMatch = body.match(/(?:provider|crabbox_provider)[\s:=]+([^\s`]+)/i);
const leaseMatch = body.match(/(?:lease|lease_id|crabbox_lease_id)[\s:=]+([^\s`]+)/i);
const provider = providerMatch?.[1] || "aws";
const body = eventName === "workflow_dispatch" ? inputs.instructions || "" : context.payload.comment?.body || "";
const provider = inputs.crabbox_provider || "aws";
if (!["aws", "hetzner"].includes(provider)) {
core.setFailed(`Unsupported Crabbox provider for Mantis Telegram desktop proof: ${provider}`);
return;
}
const rawCandidate = candidateMatch?.[1];
const candidate =
rawCandidate && !["head", "pr", "pr-head"].includes(rawCandidate.toLowerCase())
? rawCandidate
: mergedCandidate || pr.head.sha;
const allowForkCandidate = /\bfork[-_]ok\b/i.test(body);
setOutput("should_run", "true");
setOutput("allow_fork_candidate", allowForkCandidate ? "true" : "false");
setOutput("baseline_ref", baselineMatch?.[1] || mergedBaseline || "main");
setOutput("candidate_ref", candidate);
setOutput("pr_number", String(issue.number));
setOutput("baseline_ref", pr.base.sha);
setOutput("candidate_ref", pr.head.sha);
setOutput("pr_number", String(pr.number));
setOutput("instructions", body);
setOutput("crabbox_provider", provider);
setOutput("lease_id", leaseMatch?.[1] || "");
setOutput("request_source", "issue_comment");
setOutput("lease_id", inputs.crabbox_lease_id || "");
setOutput("request_source", eventName);
await github.rest.reactions.createForIssueComment({
owner,
repo,
comment_id: context.payload.comment.id,
content: "eyes",
}).catch((error) => core.warning(`Could not add eyes reaction: ${error.message}`));
if (eventName === "issue_comment") {
await github.rest.reactions.createForIssueComment({
owner,
repo,
comment_id: context.payload.comment.id,
content: "eyes",
}).catch((error) => core.warning(`Could not add eyes reaction: ${error.message}`));
}
validate_refs:
name: Validate selected refs
needs: resolve_request
if: ${{ needs.resolve_request.outputs.should_run == 'true' }}
runs-on: ubuntu-24.04
outputs:
baseline_revision: ${{ steps.validate.outputs.baseline_revision }}
@@ -241,7 +162,6 @@ jobs:
- name: Validate refs are trusted
id: validate
env:
ALLOW_FORK_CANDIDATE: ${{ needs.resolve_request.outputs.allow_fork_candidate }}
BASELINE_REF: ${{ needs.resolve_request.outputs.baseline_ref }}
CANDIDATE_REF: ${{ needs.resolve_request.outputs.candidate_ref }}
GH_TOKEN: ${{ github.token }}
@@ -255,64 +175,48 @@ jobs:
git fetch --no-tags origin "+refs/pull/${PR_NUMBER}/head:refs/remotes/origin/pr/${PR_NUMBER}" || true
fi
validate_ref() {
local label="$1"
resolve_commit() {
local input_ref="$2"
local revision=""
local reason=""
if ! revision="$(git rev-parse --verify "${input_ref}^{commit}" 2>/dev/null)"; then
echo "${label} ref '${input_ref}' is not available in the workflow checkout." >&2
echo "$1 ref '${input_ref}' is not available in the workflow checkout." >&2
exit 1
fi
if git merge-base --is-ancestor "$revision" refs/remotes/origin/main; then
reason="main-ancestor"
elif git tag --points-at "$revision" | grep -Eq '^v'; then
reason="release-tag"
else
local pr_head_count
pr_head_count="$(
gh api \
-H "Accept: application/vnd.github+json" \
"repos/${GITHUB_REPOSITORY}/commits/${revision}/pulls" \
--jq '[.[] | select(.state == "open" and .head.repo.full_name == "'"${GITHUB_REPOSITORY}"'" and .head.sha == "'"${revision}"'")] | length'
)"
if [[ "$pr_head_count" != "0" ]]; then
reason="open-pr-head"
elif [[ "$label" == "candidate" && "${ALLOW_FORK_CANDIDATE:-false}" == "true" && -n "${PR_NUMBER:-}" ]]; then
local fork_pr_head_count
fork_pr_head_count="$(
gh api \
-H "Accept: application/vnd.github+json" \
"repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}" \
--jq 'if .state == "open" and .head.repo.full_name != "'"${GITHUB_REPOSITORY}"'" and .head.sha == "'"${revision}"'" then 1 else 0 end'
)"
if [[ "$fork_pr_head_count" == "1" ]]; then
reason="maintainer-approved-fork-pr-head"
fi
fi
fi
if [[ -z "$reason" ]]; then
echo "${label} ref '${input_ref}' resolved to ${revision}, which is not trusted for this secret-bearing Mantis run. Add fork-ok only for a maintainer-approved fork PR head." >&2
exit 1
fi
printf '%s\t%s\n' "$revision" "$reason"
printf '%s\n' "$revision"
}
baseline_revision="$(validate_ref baseline "$BASELINE_REF")"
baseline_trust="${baseline_revision#*$'\t'}"
baseline_revision="${baseline_revision%%$'\t'*}"
candidate_revision="$(validate_ref candidate "$CANDIDATE_REF")"
candidate_trust="${candidate_revision#*$'\t'}"
candidate_revision="${candidate_revision%%$'\t'*}"
baseline_revision="$(resolve_commit baseline "$BASELINE_REF")"
candidate_revision="$(resolve_commit candidate "$CANDIDATE_REF")"
if ! git merge-base --is-ancestor "$baseline_revision" refs/remotes/origin/main; then
echo "baseline ref '${BASELINE_REF}' resolved to ${baseline_revision}, which is not on main." >&2
exit 1
fi
pr_head="$(
gh api \
-H "Accept: application/vnd.github+json" \
"repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}" \
--jq '{state, head_sha: .head.sha, head_repo: .head.repo.full_name}'
)"
pr_state="$(jq -r '.state' <<<"$pr_head")"
pr_head_sha="$(jq -r '.head_sha' <<<"$pr_head")"
pr_head_repo="$(jq -r '.head_repo' <<<"$pr_head")"
if [[ "$pr_state" != "open" || "$candidate_revision" != "$pr_head_sha" ]]; then
echo "candidate ref '${CANDIDATE_REF}' resolved to ${candidate_revision}, which is not the open PR head." >&2
exit 1
fi
candidate_trust="open-pr-head"
if [[ "$pr_head_repo" != "$GITHUB_REPOSITORY" ]]; then
candidate_trust="fork-pr-head"
fi
echo "baseline_revision=${baseline_revision}" >> "$GITHUB_OUTPUT"
echo "candidate_revision=${candidate_revision}" >> "$GITHUB_OUTPUT"
echo "candidate_trust=${candidate_trust}" >> "$GITHUB_OUTPUT"
{
echo "baseline: \`${BASELINE_REF}\`"
echo "baseline SHA: \`${baseline_revision}\`"
echo "baseline trust: \`${baseline_trust}\`"
echo "baseline trust: \`main-ancestor\`"
echo "candidate: \`${CANDIDATE_REF}\`"
echo "candidate SHA: \`${candidate_revision}\`"
echo "candidate trust: \`${candidate_trust}\`"
@@ -321,7 +225,6 @@ jobs:
run_telegram_desktop_proof:
name: Run agentic native Telegram proof
needs: [resolve_request, validate_refs]
if: ${{ needs.resolve_request.outputs.should_run == 'true' }}
runs-on: blacksmith-16vcpu-ubuntu-2404
timeout-minutes: 360
environment: qa-live-shared