From 0b8ee4616df1e8330fe797a1938356a6478a61c2 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 13 May 2026 15:20:27 +0100 Subject: [PATCH] fix(github-copilot): support Gemini image understanding Fixes Copilot image understanding by exchanging OAuth tokens for Copilot API tokens, routing Copilot Gemini image requests through Chat Completions, and sending the prompt in user content with Copilot vision headers. Real behavior proof: - Old Responses route with real Copilot key reproduced `400 model gemini-3.1-pro-preview does not support Responses API`. - Fixed route with the same real Copilot key returned `Cat`. - Final CLI live smoke returned `ok: true` and `text: Cat` for `github-copilot/gemini-3.1-pro-preview`. Verification: - pnpm test src/media-understanding/image.test.ts extensions/github-copilot/models.test.ts extensions/github-copilot/stream.test.ts src/agents/pi-hooks/compaction-safeguard.test.ts -- --reporter=verbose - pnpm check:changed via Blacksmith Testbox tbx_01krgt56pqmft8txekt017wke6, Actions run https://github.com/openclaw/openclaw/actions/runs/25803926150, exit 0. Refs #80393, #80442. Co-authored-by: Yang Haoyu <150496764+afunnyhy@users.noreply.github.com> --- CHANGELOG.md | 1 + extensions/github-copilot/embeddings.ts | 4 +- extensions/github-copilot/model-metadata.ts | 33 +++- extensions/github-copilot/models-defaults.ts | 9 +- extensions/github-copilot/models.test.ts | 50 ++++++ extensions/github-copilot/models.ts | 20 ++- extensions/github-copilot/stream.test.ts | 37 ++++ extensions/github-copilot/stream.ts | 25 ++- extensions/github-copilot/usage.ts | 5 +- .../pi-hooks/compaction-safeguard.test.ts | 2 +- src/media-understanding/image.test.ts | 158 ++++++++++++++++++ src/media-understanding/image.ts | 73 ++++++-- src/plugin-sdk/provider-auth.ts | 4 +- 13 files changed, 388 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2a6ac75bb2..9448bd29cc9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- GitHub Copilot: exchange OAuth tokens for Copilot API tokens on image understanding requests and route Gemini image payloads through Chat Completions, fixing Copilot Gemini image descriptions. (#80393, #80442) Thanks @afunnyhy. - Gateway: hide pending Node pairing commands, capabilities, and permissions until approval, and refresh the live approved surface when pairings change. (#80741) Thanks @samzong. - Plugins/Feishu/WhatsApp/Line: enforce inbound media size caps while reading download streams, avoiding full buffering of oversized attachments. (#81044, #81050) Thanks @samzong. - Config: serialize and retry semantic config mutations centrally, so concurrent commands can rebase safe changes instead of clobbering or hand-rolling command-local retry loops. (#76601) diff --git a/extensions/github-copilot/embeddings.ts b/extensions/github-copilot/embeddings.ts index 0bd1401c4b1..f0c661d5225 100644 --- a/extensions/github-copilot/embeddings.ts +++ b/extensions/github-copilot/embeddings.ts @@ -5,6 +5,7 @@ import { type MemoryEmbeddingProvider, type MemoryEmbeddingProviderAdapter, } from "openclaw/plugin-sdk/memory-core-host-engine-embeddings"; +import { buildCopilotIdeHeaders } from "openclaw/plugin-sdk/provider-auth"; import { resolveConfiguredSecretInputString } from "openclaw/plugin-sdk/secret-input-runtime"; import { fetchWithSsrFGuard, type SsrFPolicy } from "openclaw/plugin-sdk/ssrf-runtime"; import { resolveFirstGithubToken } from "./auth.js"; @@ -23,8 +24,7 @@ const PREFERRED_MODELS = [ const COPILOT_HEADERS_STATIC: Record = { "Content-Type": "application/json", - "Editor-Version": "vscode/1.96.2", - "User-Agent": "GitHubCopilotChat/0.26.7", + ...buildCopilotIdeHeaders(), }; function buildSsrfPolicy(baseUrl: string): SsrFPolicy | undefined { diff --git a/extensions/github-copilot/model-metadata.ts b/extensions/github-copilot/model-metadata.ts index 6c7c74a84de..5d2f919bf0b 100644 --- a/extensions/github-copilot/model-metadata.ts +++ b/extensions/github-copilot/model-metadata.ts @@ -1,6 +1,15 @@ import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-model-shared"; import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/string-coerce-runtime"; +type CopilotRuntimeApi = "anthropic-messages" | "openai-completions" | "openai-responses"; + +const COPILOT_CHAT_COMPLETIONS_COMPAT: ModelDefinitionConfig["compat"] = { + supportsStore: false, + supportsDeveloperRole: false, + supportsUsageInStreaming: false, + maxTokensField: "max_tokens", +}; + const STATIC_MODEL_OVERRIDES = new Map>([ [ "gpt-5.5", @@ -13,12 +22,26 @@ const STATIC_MODEL_OVERRIDES = new Map>([ ], ]); -export function resolveCopilotTransportApi( +function isCopilotGeminiModelId(modelId: string): boolean { + return /(?:^|[-_.])gemini(?:$|[-_.])/.test(modelId); +} + +export function resolveCopilotTransportApi(modelId: string): CopilotRuntimeApi { + const normalized = normalizeOptionalLowercaseString(modelId) ?? ""; + if (normalized.includes("claude")) { + return "anthropic-messages"; + } + if (isCopilotGeminiModelId(normalized)) { + return "openai-completions"; + } + return "openai-responses"; +} + +export function resolveCopilotModelCompat( modelId: string, -): "anthropic-messages" | "openai-responses" { - return (normalizeOptionalLowercaseString(modelId) ?? "").includes("claude") - ? "anthropic-messages" - : "openai-responses"; +): ModelDefinitionConfig["compat"] | undefined { + const normalized = normalizeOptionalLowercaseString(modelId) ?? ""; + return isCopilotGeminiModelId(normalized) ? { ...COPILOT_CHAT_COMPLETIONS_COMPAT } : undefined; } export function resolveStaticCopilotModelOverride( diff --git a/extensions/github-copilot/models-defaults.ts b/extensions/github-copilot/models-defaults.ts index ad8d6f515f4..faed5e68b4b 100644 --- a/extensions/github-copilot/models-defaults.ts +++ b/extensions/github-copilot/models-defaults.ts @@ -1,5 +1,9 @@ import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-model-shared"; -import { resolveCopilotTransportApi, resolveStaticCopilotModelOverride } from "./model-metadata.js"; +import { + resolveCopilotModelCompat, + resolveCopilotTransportApi, + resolveStaticCopilotModelOverride, +} from "./model-metadata.js"; const DEFAULT_CONTEXT_WINDOW = 128_000; const DEFAULT_MAX_TOKENS = 8192; @@ -42,6 +46,7 @@ export function buildCopilotModelDefinition(modelId: string): ModelDefinitionCon throw new Error("Model id required"); } const staticOverride = resolveStaticCopilotModelOverride(id); + const compat = staticOverride?.compat ?? resolveCopilotModelCompat(id); return { id, name: staticOverride?.name ?? id, @@ -51,6 +56,6 @@ export function buildCopilotModelDefinition(modelId: string): ModelDefinitionCon cost: staticOverride?.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: staticOverride?.contextWindow ?? DEFAULT_CONTEXT_WINDOW, maxTokens: staticOverride?.maxTokens ?? DEFAULT_MAX_TOKENS, - ...(staticOverride?.compat ? { compat: staticOverride.compat } : {}), + ...(compat ? { compat } : {}), }; } diff --git a/extensions/github-copilot/models.test.ts b/extensions/github-copilot/models.test.ts index a039876c57c..35a584cf094 100644 --- a/extensions/github-copilot/models.test.ts +++ b/extensions/github-copilot/models.test.ts @@ -113,6 +113,17 @@ describe("github-copilot model defaults", () => { expect(def.api).toBe("openai-responses"); }); + it("routes Gemini models through Chat Completions with Copilot compat flags", () => { + const def = buildCopilotModelDefinition("gemini-3.1-pro-preview"); + expect(def.api).toBe("openai-completions"); + expect(def.compat).toEqual({ + supportsStore: false, + supportsDeveloperRole: false, + supportsUsageInStreaming: false, + maxTokensField: "max_tokens", + }); + }); + it("throws on empty model id", () => { expect(() => buildCopilotModelDefinition("")).toThrow("Model id required"); expect(() => buildCopilotModelDefinition(" ")).toThrow("Model id required"); @@ -225,6 +236,17 @@ describe("resolveCopilotForwardCompatModel", () => { expect((result as unknown as Record).input).toEqual(["text", "image"]); }); + it("creates synthetic Gemini models with Chat Completions compatibility", () => { + const result = requireResolvedModel(createMockCtx("gemini-3.1-pro-preview")); + expect((result as unknown as Record).api).toBe("openai-completions"); + expect((result as unknown as Record).compat).toEqual({ + supportsStore: false, + supportsDeveloperRole: false, + supportsUsageInStreaming: false, + maxTokensField: "max_tokens", + }); + }); + it("infers reasoning=true for o1/o3 model IDs", () => { for (const id of ["o1", "o3", "o3-mini", "o1-preview"]) { const ctx = createMockCtx(id); @@ -443,6 +465,24 @@ describe("fetchCopilotModelCatalog", () => { }, }, }, + { + id: "gemini-3.1-pro-preview", + name: "Gemini 3.1 Pro Preview", + object: "model", + vendor: "Google", + capabilities: { + type: "chat", + limits: { + max_context_window_tokens: 1_000_000, + max_output_tokens: 65_536, + }, + supports: { + vision: true, + tool_calls: true, + streaming: true, + }, + }, + }, { id: "claude-opus-4.7-1m-internal", name: "Claude Opus 4.7 (1M context)(Internal only)", @@ -501,6 +541,7 @@ describe("fetchCopilotModelCatalog", () => { expect(out.map((m) => m.id)).toEqual([ "gpt-5.5", "gpt-5.3-codex", + "gemini-3.1-pro-preview", "claude-opus-4.7-1m-internal", ]); @@ -521,6 +562,15 @@ describe("fetchCopilotModelCatalog", () => { expect(codex?.reasoning).toBe(true); expect(codex?.contextWindow).toBe(400000); + const gemini = out.find((m) => m.id === "gemini-3.1-pro-preview"); + expect(gemini?.api).toBe("openai-completions"); + expect(gemini?.compat).toEqual({ + supportsStore: false, + supportsDeveloperRole: false, + supportsUsageInStreaming: false, + maxTokensField: "max_tokens", + }); + const opus1m = out.find((m) => m.id === "claude-opus-4.7-1m-internal"); expect(opus1m?.api).toBe("anthropic-messages"); expect(opus1m?.contextWindow).toBe(1_000_000); diff --git a/extensions/github-copilot/models.ts b/extensions/github-copilot/models.ts index 3370e4c92da..3e07d91aabd 100644 --- a/extensions/github-copilot/models.ts +++ b/extensions/github-copilot/models.ts @@ -2,10 +2,15 @@ import type { ProviderResolveDynamicModelContext, ProviderRuntimeModel, } from "openclaw/plugin-sdk/core"; +import { buildCopilotIdeHeaders, COPILOT_INTEGRATION_ID } from "openclaw/plugin-sdk/provider-auth"; import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-model-shared"; import { normalizeModelCompat } from "openclaw/plugin-sdk/provider-model-shared"; import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/string-coerce-runtime"; -import { resolveCopilotTransportApi, resolveStaticCopilotModelOverride } from "./model-metadata.js"; +import { + resolveCopilotModelCompat, + resolveCopilotTransportApi, + resolveStaticCopilotModelOverride, +} from "./model-metadata.js"; export const PROVIDER_ID = "github-copilot"; const CODEX_FORWARD_COMPAT_TARGET_IDS = new Set(["gpt-5.4", "gpt-5.3-codex"]); @@ -57,6 +62,7 @@ export function resolveCopilotForwardCompatModel( const staticOverride = resolveStaticCopilotModelOverride(lowerModelId); if (staticOverride) { + const compat = staticOverride.compat ?? resolveCopilotModelCompat(trimmedModelId); return normalizeModelCompat({ id: trimmedModelId, name: staticOverride.name ?? trimmedModelId, @@ -67,7 +73,7 @@ export function resolveCopilotForwardCompatModel( cost: staticOverride.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: staticOverride.contextWindow ?? DEFAULT_CONTEXT_WINDOW, maxTokens: staticOverride.maxTokens ?? DEFAULT_MAX_TOKENS, - ...(staticOverride.compat ? { compat: staticOverride.compat } : {}), + ...(compat ? { compat } : {}), } as ProviderRuntimeModel); } @@ -77,6 +83,7 @@ export function resolveCopilotForwardCompatModel( // by simply adding them to agents.defaults.models in openclaw.json — no // code change required. const reasoning = /^o[13](\b|$)/.test(lowerModelId) || isCopilotCodexModelId(lowerModelId); + const compat = resolveCopilotModelCompat(trimmedModelId); return normalizeModelCompat({ id: trimmedModelId, name: trimmedModelId, @@ -89,6 +96,7 @@ export function resolveCopilotForwardCompatModel( cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: DEFAULT_CONTEXT_WINDOW, maxTokens: DEFAULT_MAX_TOKENS, + ...(compat ? { compat } : {}), } as ProviderRuntimeModel); } @@ -126,7 +134,7 @@ const COPILOT_ROUTER_ID_PREFIX = "accounts/"; function resolveCopilotApiForVendor( vendor: string | undefined, modelId: string, -): "anthropic-messages" | "openai-responses" { +): "anthropic-messages" | "openai-completions" | "openai-responses" { if (vendor && vendor.toLowerCase() === "anthropic") { return "anthropic-messages"; } @@ -167,6 +175,7 @@ function mapCopilotApiModelToDefinition( typeof limits?.max_output_tokens === "number" && limits.max_output_tokens > 0 ? limits.max_output_tokens : DEFAULT_MAX_TOKENS; + const compat = resolveCopilotModelCompat(id); const definition: ModelDefinitionConfig = { id, @@ -177,6 +186,7 @@ function mapCopilotApiModelToDefinition( cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow, maxTokens, + ...(compat ? { compat } : {}), }; return definition; } @@ -224,8 +234,8 @@ export async function fetchCopilotModelCatalog( headers: { Accept: "application/json", Authorization: `Bearer ${params.copilotApiToken}`, - "Editor-Version": "vscode/1.96.2", - "Copilot-Integration-Id": "vscode-chat", + ...buildCopilotIdeHeaders(), + "Copilot-Integration-Id": COPILOT_INTEGRATION_ID, }, signal: params.signal ?? controller?.signal, }); diff --git a/extensions/github-copilot/stream.test.ts b/extensions/github-copilot/stream.test.ts index 3c206ffdaba..2d0b6537b92 100644 --- a/extensions/github-copilot/stream.test.ts +++ b/extensions/github-copilot/stream.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it, vi } from "vitest"; import { buildCopilotDynamicHeaders } from "./stream.js"; import { wrapCopilotAnthropicStream, + wrapCopilotOpenAICompletionsStream, wrapCopilotOpenAIResponsesStream, wrapCopilotProviderStream, } from "./stream.js"; @@ -204,6 +205,42 @@ describe("wrapCopilotAnthropicStream", () => { ); }); + it("adds Copilot headers for Chat Completions models", () => { + const baseStreamFn = vi.fn(() => ({ async *[Symbol.asyncIterator]() {} }) as never); + const wrapped = requireStreamFn(wrapCopilotOpenAICompletionsStream(baseStreamFn)); + const messages = [ + { + role: "user", + content: [ + { type: "text", text: "look" }, + { type: "image", data: "abc", mimeType: "image/png" }, + ], + }, + ] as Parameters[0]["messages"]; + const expectedCopilotHeaders = buildCopilotDynamicHeaders({ + messages, + hasImages: true, + }); + + void wrapped( + { + provider: "github-copilot", + api: "openai-completions", + id: "gemini-3.1-pro-preview", + } as never, + { messages } as never, + { headers: { "X-Test": "1" } }, + ); + + const options = requireFirstStreamOptions(baseStreamFn, "Copilot Chat Completions stream"); + expect(options).toEqual({ + headers: { + ...expectedCopilotHeaders, + "X-Test": "1", + }, + }); + }); + it("adapts provider stream context without changing wrapper behavior", () => { const baseStreamFn = vi.fn(() => ({ async *[Symbol.asyncIterator]() {} }) as never); diff --git a/extensions/github-copilot/stream.ts b/extensions/github-copilot/stream.ts index 93a82b627d7..78981436a0b 100644 --- a/extensions/github-copilot/stream.ts +++ b/extensions/github-copilot/stream.ts @@ -131,6 +131,27 @@ export function wrapCopilotOpenAIResponsesStream( }; } -export function wrapCopilotProviderStream(ctx: ProviderWrapStreamFnContext): StreamFn | undefined { - return wrapCopilotOpenAIResponsesStream(wrapCopilotAnthropicStream(ctx.streamFn)); +export function wrapCopilotOpenAICompletionsStream( + baseStreamFn: StreamFn | undefined, +): StreamFn | undefined { + if (!baseStreamFn) { + return undefined; + } + const underlying = baseStreamFn; + return (model, context, options) => { + if (model.provider !== "github-copilot" || model.api !== "openai-completions") { + return underlying(model, context, options); + } + + return underlying(model, context, { + ...options, + headers: buildCopilotRequestHeaders(context, options?.headers), + }); + }; +} + +export function wrapCopilotProviderStream(ctx: ProviderWrapStreamFnContext): StreamFn | undefined { + return wrapCopilotOpenAICompletionsStream( + wrapCopilotOpenAIResponsesStream(wrapCopilotAnthropicStream(ctx.streamFn)), + ); } diff --git a/extensions/github-copilot/usage.ts b/extensions/github-copilot/usage.ts index 1e13717c9ea..7efec13b2c2 100644 --- a/extensions/github-copilot/usage.ts +++ b/extensions/github-copilot/usage.ts @@ -1,3 +1,4 @@ +import { buildCopilotIdeHeaders } from "openclaw/plugin-sdk/provider-auth"; import { buildUsageHttpErrorSnapshot, fetchJson, @@ -25,9 +26,7 @@ export async function fetchCopilotUsage( { headers: { Authorization: `token ${token}`, - "Editor-Version": "vscode/1.96.2", - "User-Agent": "GitHubCopilotChat/0.26.7", - "X-Github-Api-Version": "2025-04-01", + ...buildCopilotIdeHeaders({ includeApiVersion: true }), }, }, timeoutMs, diff --git a/src/agents/pi-hooks/compaction-safeguard.test.ts b/src/agents/pi-hooks/compaction-safeguard.test.ts index 7844e00c39c..703d1efd123 100644 --- a/src/agents/pi-hooks/compaction-safeguard.test.ts +++ b/src/agents/pi-hooks/compaction-safeguard.test.ts @@ -1429,7 +1429,7 @@ describe("compaction-safeguard recent-turn preservation", () => { expect(summaryCall.headers?.["Copilot-Integration-Id"]).toBe("vscode-chat"); expect(summaryCall.headers?.["Editor-Plugin-Version"]).toBe("copilot-chat/0.35.0"); expect(summaryCall.headers?.["Openai-Organization"]).toBe("github-copilot"); - expect(summaryCall.headers?.["User-Agent"]).toBe("GitHubCopilotChat/0.26.7"); + expect(summaryCall.headers?.["User-Agent"]).toBe("GitHubCopilotChat/0.35.0"); expect(summaryCall.headers?.["X-Test"]).toBe("1"); expect(summaryCall.headers?.["x-initiator"]).toBe("user"); }); diff --git a/src/media-understanding/image.test.ts b/src/media-understanding/image.test.ts index 4f7b9a84a1a..5d72b67df73 100644 --- a/src/media-understanding/image.test.ts +++ b/src/media-understanding/image.test.ts @@ -21,6 +21,7 @@ const hoisted = vi.hoisted(() => ({ prepareProviderDynamicModelMock: vi.fn(async () => {}), resolveModelAsyncMock: vi.fn(), resolveModelWithRegistryMock: vi.fn(), + resolveCopilotApiTokenMock: vi.fn(), })); const { completeMock, @@ -35,6 +36,7 @@ const { prepareProviderDynamicModelMock, resolveModelAsyncMock, resolveModelWithRegistryMock, + resolveCopilotApiTokenMock, } = hoisted; type ResolveModelWithRegistryTestParams = { @@ -118,6 +120,15 @@ vi.mock("../agents/pi-embedded-runner/model.js", () => ({ resolveModelAsync: resolveModelAsyncMock, })); +vi.mock("../plugin-sdk/provider-auth.js", () => ({ + buildCopilotIdeHeaders: () => ({ + "Editor-Version": "vscode/1.107.0", + "User-Agent": "GitHubCopilotChat/0.35.0", + }), + COPILOT_INTEGRATION_ID: "vscode-chat", + resolveCopilotApiToken: resolveCopilotApiTokenMock, +})); + const { describeImageWithModel } = await import("./image.js"); describe("describeImageWithModel", () => { @@ -171,6 +182,12 @@ describe("describeImageWithModel", () => { return { authStorage, model, modelRegistry }; }, ); + resolveCopilotApiTokenMock.mockResolvedValue({ + token: "copilot-api-token", + expiresAt: Date.now() + 60_000, + source: "test", + baseUrl: "https://api.githubcopilot.com", + }); }); function getApiKeyForModelCall(index = 0): AuthRequestCall { @@ -776,4 +793,145 @@ describe("describeImageWithModel", () => { expect(authRequest?.profileId).toBe("google:default"); expect(setRuntimeApiKeyMock).toHaveBeenCalledWith("google", "oauth-test"); }); + + it("places image prompt in user content for github-copilot provider", async () => { + const providerStreamResult = { + role: "assistant", + api: "openai-completions", + provider: "github-copilot", + model: "gemini-3.1-pro-preview", + stopReason: "stop", + timestamp: Date.now(), + content: [{ type: "text", text: "A solid red square." }], + }; + const providerStreamFn = vi.fn(() => ({ + result: vi.fn(async () => providerStreamResult), + })); + registerProviderStreamForModelMock.mockReturnValueOnce(providerStreamFn); + discoverModelsMock.mockReturnValue({ + find: vi.fn(() => ({ + provider: "github-copilot", + id: "gemini-3.1-pro-preview", + input: ["text", "image"], + api: "openai-completions", + baseUrl: "https://stale.example.test", + })), + }); + + await describeImageWithModel({ + cfg: {}, + agentDir: "/tmp/openclaw-agent", + provider: "github-copilot", + model: "gemini-3.1-pro-preview", + buffer: Buffer.from("png-bytes"), + fileName: "image.png", + mime: "image/png", + prompt: "Describe the image.", + timeoutMs: 1000, + }); + + expect(completeMock).not.toHaveBeenCalled(); + expect(providerStreamFn).toHaveBeenCalledOnce(); + expect(resolveCopilotApiTokenMock).toHaveBeenCalledWith({ + githubToken: "oauth-test", + }); + expect(setRuntimeApiKeyMock).toHaveBeenCalledWith("github-copilot", "copilot-api-token"); + const [completionModel, context, options] = providerStreamFn.mock.calls[0] as unknown as [ + { baseUrl?: string }, + { systemPrompt?: string; messages?: Array<{ role: string; content: unknown[] }> }, + { apiKey?: string; headers?: Record }, + ]; + expect(completionModel.baseUrl).toBe("https://api.githubcopilot.com"); + expect(options.apiKey).toBe("copilot-api-token"); + expect(options.headers).toMatchObject({ + "Copilot-Integration-Id": "vscode-chat", + "Copilot-Vision-Request": "true", + "Editor-Version": "vscode/1.107.0", + "User-Agent": "GitHubCopilotChat/0.35.0", + }); + expect(context.systemPrompt).toBeUndefined(); + const userMessage = context.messages?.find((m) => m.role === "user"); + expect(userMessage).toBeDefined(); + const contentTypes = userMessage!.content.map((block) => (block as { type: string }).type); + expect(contentTypes).toContain("text"); + expect(contentTypes).toContain("image"); + }); + + it("fails github-copilot image runtime setup when token exchange fails", async () => { + discoverModelsMock.mockReturnValue({ + find: vi.fn(() => ({ + provider: "github-copilot", + id: "gemini-3.1-pro-preview", + input: ["text", "image"], + api: "openai-completions", + baseUrl: "https://api.githubcopilot.com", + })), + }); + resolveCopilotApiTokenMock.mockRejectedValueOnce( + new Error("Copilot token exchange failed: HTTP 401"), + ); + + await expect( + describeImageWithModel({ + cfg: {}, + agentDir: "/tmp/openclaw-agent", + provider: "github-copilot", + model: "gemini-3.1-pro-preview", + buffer: Buffer.from("png-bytes"), + fileName: "image.png", + mime: "image/png", + prompt: "Describe the image.", + timeoutMs: 1000, + }), + ).rejects.toThrow("Copilot token exchange failed: HTTP 401"); + + expect(setRuntimeApiKeyMock).not.toHaveBeenCalledWith("github-copilot", "oauth-test"); + expect(completeMock).not.toHaveBeenCalled(); + }); + + it("does not place image prompt in user content for non-copilot providers", async () => { + discoverModelsMock.mockReturnValue({ + find: vi.fn(() => ({ + provider: "openai", + id: "gpt-4o", + input: ["text", "image"], + api: "openai-responses", + baseUrl: "https://api.openai.com/v1", + })), + }); + completeMock.mockResolvedValue({ + role: "assistant", + api: "openai-responses", + provider: "openai", + model: "gpt-4o", + stopReason: "stop", + timestamp: Date.now(), + content: [{ type: "text", text: "A solid red square." }], + }); + + await describeImageWithModel({ + cfg: {}, + agentDir: "/tmp/openclaw-agent", + provider: "openai", + model: "gpt-4o", + buffer: Buffer.from("png-bytes"), + fileName: "image.png", + mime: "image/png", + prompt: "Describe the image.", + timeoutMs: 1000, + }); + + expect(completeMock).toHaveBeenCalledOnce(); + const [, context] = completeMock.mock.calls[0] as [ + unknown, + { systemPrompt?: string; messages?: Array<{ role: string; content: unknown[] }> }, + ]; + // Non-Copilot providers keep prompt in system message, images in user message + expect(context.systemPrompt).toBe("Describe the image."); + const userMessage = context.messages?.find((m) => m.role === "user"); + expect(userMessage).toBeDefined(); + const contentTypes = userMessage!.content.map((block) => (block as { type: string }).type); + expect(contentTypes).not.toContain("text"); + expect(contentTypes).toContain("image"); + }); }); diff --git a/src/media-understanding/image.ts b/src/media-understanding/image.ts index 3a18ab02081..d77b76729b8 100644 --- a/src/media-understanding/image.ts +++ b/src/media-understanding/image.ts @@ -1,4 +1,10 @@ -import type { Api, Context, Model, ProviderStreamOptions } from "@earendil-works/pi-ai"; +import type { + Api, + AssistantMessage, + Context, + Model, + ProviderStreamOptions, +} from "@earendil-works/pi-ai"; import { complete } from "@earendil-works/pi-ai"; import { isMinimaxVlmModel, minimaxUnderstandImage } from "../agents/minimax-vlm.js"; import { @@ -15,6 +21,11 @@ import { coerceImageAssistantText, hasImageReasoningOnlyResponse, } from "../agents/tools/image-tool.helpers.js"; +import { + buildCopilotIdeHeaders, + COPILOT_INTEGRATION_ID, + resolveCopilotApiToken, +} from "../plugin-sdk/provider-auth.js"; import type { ImageDescriptionRequest, ImageDescriptionResult, @@ -143,7 +154,8 @@ async function resolveImageRuntime(params: { allowBundledStaticCatalogFallback: true, }, ); - const { authStorage, model } = resolved; + const { authStorage } = resolved; + let { model } = resolved; if (!model) { throw new Error(`Unknown model: ${resolvedRef.provider}/${resolvedRef.model}`); } @@ -168,7 +180,20 @@ async function resolveImageRuntime(params: { preferredProfile: params.preferredProfile, store: params.authStore, }); - const apiKey = requireApiKey(apiKeyInfo, model.provider); + let apiKey = requireApiKey(apiKeyInfo, model.provider); + // Image tool bypasses prepareRuntimeAuth — exchange OAuth token for + // a short-lived Copilot API token so the integrator scope (vscode-chat) + // matches what runtime chat requests send. + if (model.provider === "github-copilot") { + const copilotToken = await resolveCopilotApiToken({ + githubToken: apiKey, + }); + apiKey = copilotToken.token; + const runtimeBaseUrl = copilotToken.baseUrl?.trim(); + if (runtimeBaseUrl) { + model = { ...model, baseUrl: runtimeBaseUrl }; + } + } authStorage.setRuntimeApiKey(model.provider, apiKey); return { apiKey, model }; } @@ -200,6 +225,13 @@ function buildImageContext( } function shouldPlaceImagePromptInUserContent(model: Model): boolean { + // GitHub Copilot models (including Gemini 3.1 Pro Preview) require the + // prompt text to be in the user message alongside the image. Placing it + // in a separate system message produces "Request must contain at least + // one non-empty message" (400). + if (model.provider === "github-copilot") { + return true; + } const capabilities = resolveProviderRequestCapabilities({ provider: model.provider, api: model.api, @@ -213,6 +245,19 @@ function shouldPlaceImagePromptInUserContent(model: Model): boolean { ); } +function buildImageRequestHeaders(model: Model): Record | undefined { + if (model.provider !== "github-copilot") { + return undefined; + } + return { + ...buildCopilotIdeHeaders(), + "Copilot-Integration-Id": COPILOT_INTEGRATION_ID, + "Openai-Organization": "github-copilot", + "x-initiator": "user", + "Copilot-Vision-Request": "true", + }; +} + async function describeImagesWithMinimax(params: { apiKey: string; modelId: string; @@ -354,7 +399,7 @@ async function describeImagesWithModelInternal( }); } - registerProviderStreamForModel({ + const providerStreamFn = registerProviderStreamForModel({ model, cfg: params.cfg, agentDir: params.agentDir, @@ -368,16 +413,22 @@ async function describeImagesWithModelInternal( const completeImage = async (onPayload?: ProviderStreamOptions["onPayload"]) => { const payloadHandler = composeImageDescriptionPayloadHandlers(onPayload, options.onPayload); const timeoutMs = resolveImageDescriptionTimeoutMs(params.timeoutMs, startedAtMs); + const headers = buildImageRequestHeaders(model); + const streamOptions = { + apiKey, + maxTokens, + signal: controller.signal, + ...(timeoutMs !== undefined ? { timeoutMs } : {}), + ...(headers ? { headers } : {}), + ...(payloadHandler ? { onPayload: payloadHandler } : {}), + }; + const task: Promise = providerStreamFn + ? (async () => await (await providerStreamFn(model, context, streamOptions)).result())() + : complete(model, context, streamOptions); return await withImageDescriptionTimeout({ controller, timeoutMs, - task: complete(model, context, { - apiKey, - maxTokens, - signal: controller.signal, - ...(timeoutMs !== undefined ? { timeoutMs } : {}), - ...(payloadHandler ? { onPayload: payloadHandler } : {}), - }), + task, }); }; diff --git a/src/plugin-sdk/provider-auth.ts b/src/plugin-sdk/provider-auth.ts index 7e4a17faa48..3aa4c0272f3 100644 --- a/src/plugin-sdk/provider-auth.ts +++ b/src/plugin-sdk/provider-auth.ts @@ -95,9 +95,9 @@ export { const COPILOT_TOKEN_URL = "https://api.github.com/copilot_internal/v2/token"; /** @deprecated GitHub Copilot provider-owned helper; do not use from third-party plugins. */ -export const COPILOT_EDITOR_VERSION = "vscode/1.96.2"; +export const COPILOT_EDITOR_VERSION = "vscode/1.107.0"; /** @deprecated GitHub Copilot provider-owned helper; do not use from third-party plugins. */ -export const COPILOT_USER_AGENT = "GitHubCopilotChat/0.26.7"; +export const COPILOT_USER_AGENT = "GitHubCopilotChat/0.35.0"; /** @deprecated GitHub Copilot provider-owned helper; do not use from third-party plugins. */ export const COPILOT_EDITOR_PLUGIN_VERSION = "copilot-chat/0.35.0"; /** @deprecated GitHub Copilot provider-owned helper; do not use from third-party plugins. */