diff --git a/extensions/openai/video-generation-provider.test.ts b/extensions/openai/video-generation-provider.test.ts index 19474609999..dcda2af6800 100644 --- a/extensions/openai/video-generation-provider.test.ts +++ b/extensions/openai/video-generation-provider.test.ts @@ -3,12 +3,14 @@ import { buildOpenAIVideoGenerationProvider } from "./video-generation-provider. const { resolveApiKeyForProviderMock, + postJsonRequestMock, postTranscriptionRequestMock, fetchWithTimeoutMock, assertOkOrThrowHttpErrorMock, resolveProviderHttpRequestConfigMock, } = vi.hoisted(() => ({ resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "openai-key" })), + postJsonRequestMock: vi.fn(), postTranscriptionRequestMock: vi.fn(), fetchWithTimeoutMock: vi.fn(), assertOkOrThrowHttpErrorMock: vi.fn(async () => {}), @@ -27,6 +29,7 @@ vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({ vi.mock("openclaw/plugin-sdk/provider-http", () => ({ assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock, fetchWithTimeout: fetchWithTimeoutMock, + postJsonRequest: postJsonRequestMock, postTranscriptionRequest: postTranscriptionRequestMock, resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock, })); @@ -34,14 +37,15 @@ vi.mock("openclaw/plugin-sdk/provider-http", () => ({ describe("openai video generation provider", () => { afterEach(() => { resolveApiKeyForProviderMock.mockClear(); + postJsonRequestMock.mockReset(); postTranscriptionRequestMock.mockReset(); fetchWithTimeoutMock.mockReset(); assertOkOrThrowHttpErrorMock.mockClear(); resolveProviderHttpRequestConfigMock.mockClear(); }); - it("creates, polls, and downloads a Sora video", async () => { - postTranscriptionRequestMock.mockResolvedValue({ + it("uses JSON for text-only Sora requests", async () => { + postJsonRequestMock.mockResolvedValue({ response: { json: async () => ({ id: "vid_123", @@ -75,11 +79,12 @@ describe("openai video generation provider", () => { durationSeconds: 4, }); - expect(postTranscriptionRequestMock).toHaveBeenCalledWith( + expect(postJsonRequestMock).toHaveBeenCalledWith( expect.objectContaining({ url: "https://api.openai.com/v1/videos", }), ); + expect(postTranscriptionRequestMock).not.toHaveBeenCalled(); expect(fetchWithTimeoutMock).toHaveBeenNthCalledWith( 1, "https://api.openai.com/v1/videos/vid_123", @@ -97,6 +102,48 @@ describe("openai video generation provider", () => { ); }); + it("uses multipart when a reference asset is present", async () => { + postTranscriptionRequestMock.mockResolvedValue({ + response: { + json: async () => ({ + id: "vid_456", + model: "sora-2", + status: "queued", + }), + }, + release: vi.fn(async () => {}), + }); + fetchWithTimeoutMock + .mockResolvedValueOnce({ + json: async () => ({ + id: "vid_456", + model: "sora-2", + status: "completed", + }), + }) + .mockResolvedValueOnce({ + headers: new Headers({ "content-type": "video/mp4" }), + arrayBuffer: async () => Buffer.from("mp4-bytes"), + }); + + const provider = buildOpenAIVideoGenerationProvider(); + await provider.generateVideo({ + provider: "openai", + model: "sora-2", + prompt: "Animate this frame", + cfg: {}, + inputImages: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }], + }); + + expect(postJsonRequestMock).not.toHaveBeenCalled(); + expect(postTranscriptionRequestMock).toHaveBeenCalledWith( + expect.objectContaining({ + url: "https://api.openai.com/v1/videos", + body: expect.any(FormData), + }), + ); + }); + it("rejects multiple reference assets", async () => { const provider = buildOpenAIVideoGenerationProvider(); diff --git a/extensions/openai/video-generation-provider.ts b/extensions/openai/video-generation-provider.ts index 9b302c1535c..6ef9f8834fe 100644 --- a/extensions/openai/video-generation-provider.ts +++ b/extensions/openai/video-generation-provider.ts @@ -3,6 +3,7 @@ import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runt import { assertOkOrThrowHttpError, fetchWithTimeout, + postJsonRequest, postTranscriptionRequest, resolveProviderHttpRequestConfig, } from "openclaw/plugin-sdk/provider-http"; @@ -220,37 +221,57 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider { transport: "http", }); - const form = new FormData(); - form.set("prompt", req.prompt); - form.set("model", req.model?.trim() || DEFAULT_OPENAI_VIDEO_MODEL); + const model = req.model?.trim() || DEFAULT_OPENAI_VIDEO_MODEL; const seconds = resolveDurationSeconds(req.durationSeconds); - if (seconds) { - form.set("seconds", seconds); - } const size = resolveSize({ size: req.size, aspectRatio: req.aspectRatio, resolution: req.resolution, }); - if (size) { - form.set("size", size); - } const referenceAsset = resolveReferenceAsset(req); - if (referenceAsset) { - form.set("input_reference", referenceAsset); - } - - const multipartHeaders = new Headers(headers); - multipartHeaders.delete("Content-Type"); - const { response, release } = await postTranscriptionRequest({ - url: `${baseUrl}/videos`, - headers: multipartHeaders, - body: form, - timeoutMs: req.timeoutMs, - fetchFn, - allowPrivateNetwork, - dispatcherPolicy, - }); + const requestResult = referenceAsset + ? await (() => { + const form = new FormData(); + form.set("prompt", req.prompt); + form.set("model", model); + if (seconds) { + form.set("seconds", seconds); + } + if (size) { + form.set("size", size); + } + form.set("input_reference", referenceAsset); + const multipartHeaders = new Headers(headers); + multipartHeaders.delete("Content-Type"); + return postTranscriptionRequest({ + url: `${baseUrl}/videos`, + headers: multipartHeaders, + body: form, + timeoutMs: req.timeoutMs, + fetchFn, + allowPrivateNetwork, + dispatcherPolicy, + }); + })() + : await (() => { + const jsonHeaders = new Headers(headers); + jsonHeaders.set("Content-Type", "application/json"); + return postJsonRequest({ + url: `${baseUrl}/videos`, + headers: jsonHeaders, + body: { + prompt: req.prompt, + model, + ...(seconds ? { seconds } : {}), + ...(size ? { size } : {}), + }, + timeoutMs: req.timeoutMs, + fetchFn, + allowPrivateNetwork, + dispatcherPolicy, + }); + })(); + const { response, release } = requestResult; try { await assertOkOrThrowHttpError(response, "OpenAI video generation failed"); @@ -275,7 +296,7 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider { }); return { videos: [video], - model: completed.model ?? submitted.model ?? req.model ?? DEFAULT_OPENAI_VIDEO_MODEL, + model: completed.model ?? submitted.model ?? model, metadata: { videoId, status: completed.status, diff --git a/extensions/qwen/video-generation-provider.test.ts b/extensions/qwen/video-generation-provider.test.ts index 5ccb35035f7..1b62b93e4fe 100644 --- a/extensions/qwen/video-generation-provider.test.ts +++ b/extensions/qwen/video-generation-provider.test.ts @@ -124,4 +124,62 @@ describe("qwen video generation provider", () => { ); expect(postJsonRequestMock).not.toHaveBeenCalled(); }); + + it("preserves dedicated coding endpoints for dedicated API keys", async () => { + postJsonRequestMock.mockResolvedValue({ + response: { + json: async () => ({ + request_id: "req-2", + output: { + task_id: "task-2", + }, + }), + }, + release: vi.fn(async () => {}), + }); + fetchWithTimeoutMock + .mockResolvedValueOnce({ + json: async () => ({ + output: { + task_status: "SUCCEEDED", + results: [{ video_url: "https://example.com/out.mp4" }], + }, + }), + headers: new Headers(), + }) + .mockResolvedValueOnce({ + arrayBuffer: async () => Buffer.from("mp4-bytes"), + headers: new Headers({ "content-type": "video/mp4" }), + }); + + const provider = buildQwenVideoGenerationProvider(); + await provider.generateVideo({ + provider: "qwen", + model: "wan2.6-t2v", + prompt: "animate this shot", + cfg: { + models: { + providers: { + qwen: { + baseUrl: "https://coding-intl.dashscope.aliyuncs.com/v1", + models: [], + }, + }, + }, + }, + }); + + expect(postJsonRequestMock).toHaveBeenCalledWith( + expect.objectContaining({ + url: "https://coding-intl.dashscope.aliyuncs.com/api/v1/services/aigc/video-generation/video-synthesis", + }), + ); + expect(fetchWithTimeoutMock).toHaveBeenNthCalledWith( + 1, + "https://coding-intl.dashscope.aliyuncs.com/api/v1/tasks/task-2", + expect.objectContaining({ method: "GET" }), + 120000, + fetch, + ); + }); }); diff --git a/extensions/qwen/video-generation-provider.ts b/extensions/qwen/video-generation-provider.ts index 207bb0b087c..53ab5713802 100644 --- a/extensions/qwen/video-generation-provider.ts +++ b/extensions/qwen/video-generation-provider.ts @@ -52,26 +52,26 @@ function resolveQwenVideoBaseUrl(req: VideoGenerationRequest): string { return DEFAULT_QWEN_VIDEO_BASE_URL; } try { - const url = new URL(direct); - if (url.hostname === "coding-intl.dashscope.aliyuncs.com") { - return "https://dashscope-intl.aliyuncs.com"; - } - if (url.hostname === "coding.dashscope.aliyuncs.com") { - return "https://dashscope.aliyuncs.com"; - } - if (url.hostname === "dashscope-intl.aliyuncs.com") { - return "https://dashscope-intl.aliyuncs.com"; - } - if (url.hostname === "dashscope.aliyuncs.com") { - return "https://dashscope.aliyuncs.com"; - } - return url.origin; + return new URL(direct).toString(); } catch { return DEFAULT_QWEN_VIDEO_BASE_URL; } } function resolveDashscopeAigcApiBaseUrl(baseUrl: string): string { + try { + const url = new URL(baseUrl); + if ( + url.hostname === "coding-intl.dashscope.aliyuncs.com" || + url.hostname === "coding.dashscope.aliyuncs.com" || + url.hostname === "dashscope-intl.aliyuncs.com" || + url.hostname === "dashscope.aliyuncs.com" + ) { + return url.origin; + } + } catch { + // Fall through to legacy prefix handling for non-URL strings. + } if (baseUrl.startsWith(QWEN_STANDARD_CN_BASE_URL)) { return "https://dashscope.aliyuncs.com"; }