diff --git a/CHANGELOG.md b/CHANGELOG.md index 16024c9a3a7..958a81e9089 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ Docs: https://docs.openclaw.ai ### Changes +- Providers/fal: route GPT Image 2 and Nano Banana 2 reference-image edit requests to `/edit` with `image_urls` array, enforce NB2 edit geometry using `aspect_ratio` and `resolution` params, lift Fal edit mode input-image caps to 10 for GPT Image 2 and 14 for Nano Banana 2, and allow aspect-ratio hints in edit mode. (#77295) Thanks @leoge007. + - Build: enable additional low-churn oxlint rules for promise, TypeScript, and runtime footgun checks. - Build: enable stricter Vitest lint rules for focused, disabled, conditional, hook, matcher, and expectation hazards. - Build: pin explicit oxfmt defaults in the shared formatter config to keep formatting behavior stable across upgrades. diff --git a/docs/providers/fal.md b/docs/providers/fal.md index 3a444f05cbd..d86fd3161ed 100644 --- a/docs/providers/fal.md +++ b/docs/providers/fal.md @@ -43,17 +43,19 @@ OpenClaw ships a bundled `fal` provider for hosted image and video generation. The bundled `fal` image-generation provider defaults to `fal/fal-ai/flux/dev`. -| Capability | Value | -| -------------- | -------------------------- | -| Max images | 4 per request | -| Edit mode | Enabled, 1 reference image | -| Size overrides | Supported | -| Aspect ratio | Supported | -| Resolution | Supported | -| Output format | `png` or `jpeg` | +| Capability | Value | +| -------------- | ----------------------------------------------------------- | +| Max images | 4 per request | +| Edit mode | Flux: 1 reference image; GPT Image 2: 10; Nano Banana 2: 14 | +| Size overrides | Supported | +| Aspect ratio | Supported for generate and GPT Image 2/Nano Banana 2 edit | +| Resolution | Supported | +| Output format | `png` or `jpeg` | -The fal image edit endpoint does **not** support `aspectRatio` overrides. +Flux image-to-image requests do **not** support `aspectRatio` overrides. GPT +Image 2 and Nano Banana 2 edit requests use fal's `/edit` endpoint and accept +aspect-ratio hints. Use `outputFormat: "png"` when you want PNG output. fal does not declare an diff --git a/docs/tools/image-generation.md b/docs/tools/image-generation.md index b9249320133..f77fffce4c6 100644 --- a/docs/tools/image-generation.md +++ b/docs/tools/image-generation.md @@ -90,7 +90,7 @@ backend emits it. | ---------- | --------------------------------------- | ---------------------------------- | ----------------------------------------------------- | | ComfyUI | `workflow` | Yes (1 image, workflow-configured) | `COMFY_API_KEY` or `COMFY_CLOUD_API_KEY` for cloud | | DeepInfra | `black-forest-labs/FLUX-1-schnell` | Yes (1 image) | `DEEPINFRA_API_KEY` | -| fal | `fal-ai/flux/dev` | Yes | `FAL_KEY` | +| fal | `fal-ai/flux/dev` | Yes (model-specific limits) | `FAL_KEY` | | Google | `gemini-3.1-flash-image-preview` | Yes | `GEMINI_API_KEY` or `GOOGLE_API_KEY` | | LiteLLM | `gpt-image-2` | Yes (up to 5 input images) | `LITELLM_API_KEY` | | MiniMax | `image-01` | Yes (subject reference) | `MINIMAX_API_KEY` or MiniMax OAuth (`minimax-portal`) | @@ -107,13 +107,13 @@ Use `action: "list"` to inspect available providers and models at runtime: ## Provider capabilities -| Capability | ComfyUI | DeepInfra | fal | Google | MiniMax | OpenAI | Vydra | xAI | -| --------------------- | ------------------ | --------- | ----------------- | -------------- | --------------------- | -------------- | ----- | -------------- | -| Generate (max count) | Workflow-defined | 4 | 4 | 4 | 9 | 4 | 1 | 4 | -| Edit / reference | 1 image (workflow) | 1 image | 1 image | Up to 5 images | 1 image (subject ref) | Up to 5 images | - | Up to 5 images | -| Size control | - | ✓ | ✓ | ✓ | - | Up to 4K | - | - | -| Aspect ratio | - | - | ✓ (generate only) | ✓ | ✓ | - | - | ✓ | -| Resolution (1K/2K/4K) | - | - | ✓ | ✓ | - | - | - | 1K, 2K | +| Capability | ComfyUI | DeepInfra | fal | Google | MiniMax | OpenAI | Vydra | xAI | +| --------------------- | ------------------ | --------- | ------------------------- | -------------- | --------------------- | -------------- | ----- | -------------- | +| Generate (max count) | Workflow-defined | 4 | 4 | 4 | 9 | 4 | 1 | 4 | +| Edit / reference | 1 image (workflow) | 1 image | Flux: 1; GPT: 10; NB2: 14 | Up to 5 images | 1 image (subject ref) | Up to 5 images | - | Up to 5 images | +| Size control | - | ✓ | ✓ | ✓ | - | Up to 4K | - | - | +| Aspect ratio | - | - | ✓ | ✓ | ✓ | - | - | ✓ | +| Resolution (1K/2K/4K) | - | - | ✓ | ✓ | - | - | - | 1K, 2K | ## Tool parameters @@ -241,7 +241,9 @@ reference images. Pass a reference image path or URL: ``` OpenAI, OpenRouter, Google, and xAI support up to 5 reference images via the -`images` parameter. fal, MiniMax, and ComfyUI support 1. +`images` parameter. fal supports 1 reference image for Flux image-to-image, up +to 10 for GPT Image 2 edits, and up to 14 for Nano Banana 2 edits. MiniMax and +ComfyUI support 1. ## Provider deep dives diff --git a/extensions/fal/image-generation-provider.test.ts b/extensions/fal/image-generation-provider.test.ts index 9d95b1be655..a1e02d6d4bd 100644 --- a/extensions/fal/image-generation-provider.test.ts +++ b/extensions/fal/image-generation-provider.test.ts @@ -171,6 +171,275 @@ describe("fal image-generation provider", () => { }); }); + it("routes GPT Image 2 edits through /edit with image_urls", async () => { + vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({ + apiKey: "fal-test-key", + source: "env", + mode: "api-key", + }); + _setFalFetchGuardForTesting(fetchWithSsrFGuardMock); + fetchWithSsrFGuardMock + .mockResolvedValueOnce({ + response: new Response( + JSON.stringify({ + images: [{ url: "https://v3.fal.media/files/example/gpt-edited.png" }], + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response(Buffer.from("gpt-edited-data"), { + status: 200, + headers: { "content-type": "image/png" }, + }), + release: vi.fn(async () => {}), + }); + + const provider = buildFalImageGenerationProvider(); + await provider.generateImage({ + provider: "fal", + model: "openai/gpt-image-2", + prompt: "combine these references", + cfg: {}, + aspectRatio: "16:9", + inputImages: [ + { buffer: Buffer.from("first"), mimeType: "image/png" }, + { buffer: Buffer.from("second"), mimeType: "image/jpeg" }, + ], + }); + + expectFalJsonPost({ + call: 1, + url: "https://fal.run/openai/gpt-image-2/edit", + body: { + prompt: "combine these references", + image_size: "landscape_16_9", + num_images: 1, + output_format: "png", + image_urls: [ + `data:image/png;base64,${Buffer.from("first").toString("base64")}`, + `data:image/jpeg;base64,${Buffer.from("second").toString("base64")}`, + ], + }, + }); + }); + + it("allows GPT Image 2 edits up to 10 reference images", async () => { + vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({ + apiKey: "fal-test-key", + source: "env", + mode: "api-key", + }); + _setFalFetchGuardForTesting(fetchWithSsrFGuardMock); + fetchWithSsrFGuardMock + .mockResolvedValueOnce({ + response: new Response( + JSON.stringify({ + images: [{ url: "https://v3.fal.media/files/example/gpt-edited.png" }], + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response(Buffer.from("gpt-edited-data"), { + status: 200, + headers: { "content-type": "image/png" }, + }), + release: vi.fn(async () => {}), + }); + + const inputImages = Array.from({ length: 10 }, (_, index) => ({ + buffer: Buffer.from(`ref-${index + 1}`), + mimeType: "image/png", + })); + + const provider = buildFalImageGenerationProvider(); + await provider.generateImage({ + provider: "fal", + model: "openai/gpt-image-2", + prompt: "combine all references", + cfg: {}, + inputImages, + }); + + expectFalJsonPost({ + call: 1, + url: "https://fal.run/openai/gpt-image-2/edit", + body: { + prompt: "combine all references", + num_images: 1, + output_format: "png", + image_urls: inputImages.map( + (image) => `data:image/png;base64,${image.buffer.toString("base64")}`, + ), + }, + }); + }); + + it("rejects GPT Image 2 edits above 10 reference images", async () => { + vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({ + apiKey: "fal-test-key", + source: "env", + mode: "api-key", + }); + _setFalFetchGuardForTesting(fetchWithSsrFGuardMock); + + const provider = buildFalImageGenerationProvider(); + await expect( + provider.generateImage({ + provider: "fal", + model: "openai/gpt-image-2", + prompt: "too many references", + cfg: {}, + inputImages: Array.from({ length: 11 }, () => ({ + buffer: Buffer.from("ref"), + mimeType: "image/png", + })), + }), + ).rejects.toThrow("fal GPT Image edit supports at most 10 reference images"); + expect(fetchWithSsrFGuardMock).not.toHaveBeenCalled(); + }); + + it("routes Nano Banana 2 edits through /edit with NB2 geometry", async () => { + vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({ + apiKey: "fal-test-key", + source: "env", + mode: "api-key", + }); + _setFalFetchGuardForTesting(fetchWithSsrFGuardMock); + fetchWithSsrFGuardMock + .mockResolvedValueOnce({ + response: new Response( + JSON.stringify({ + images: [{ url: "https://v3.fal.media/files/example/nb2-edited.png" }], + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response(Buffer.from("nb2-edited-data"), { + status: 200, + headers: { "content-type": "image/png" }, + }), + release: vi.fn(async () => {}), + }); + + const provider = buildFalImageGenerationProvider(); + await provider.generateImage({ + provider: "fal", + model: "fal-ai/nano-banana-2", + prompt: "blend these references", + cfg: {}, + aspectRatio: "9:16", + resolution: "2K", + inputImages: [ + { buffer: Buffer.from("first"), mimeType: "image/png" }, + { buffer: Buffer.from("second"), mimeType: "image/png" }, + ], + }); + + expectFalJsonPost({ + call: 1, + url: "https://fal.run/fal-ai/nano-banana-2/edit", + body: { + prompt: "blend these references", + aspect_ratio: "9:16", + resolution: "2K", + num_images: 1, + output_format: "png", + image_urls: [ + `data:image/png;base64,${Buffer.from("first").toString("base64")}`, + `data:image/png;base64,${Buffer.from("second").toString("base64")}`, + ], + }, + }); + }); + + it("rejects Nano Banana 2 edits above 14 reference images", async () => { + vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({ + apiKey: "fal-test-key", + source: "env", + mode: "api-key", + }); + _setFalFetchGuardForTesting(fetchWithSsrFGuardMock); + + const provider = buildFalImageGenerationProvider(); + await expect( + provider.generateImage({ + provider: "fal", + model: "fal-ai/nano-banana-2", + prompt: "too many references", + cfg: {}, + inputImages: Array.from({ length: 15 }, () => ({ + buffer: Buffer.from("ref"), + mimeType: "image/png", + })), + }), + ).rejects.toThrow("fal Nano Banana edit supports at most 14 reference images"); + expect(fetchWithSsrFGuardMock).not.toHaveBeenCalled(); + }); + + it("preserves exact custom Fal edit endpoints", async () => { + vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({ + apiKey: "fal-test-key", + source: "env", + mode: "api-key", + }); + _setFalFetchGuardForTesting(fetchWithSsrFGuardMock); + fetchWithSsrFGuardMock + .mockResolvedValueOnce({ + response: new Response( + JSON.stringify({ + images: [{ url: "https://v3.fal.media/files/example/custom-edit.png" }], + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response(Buffer.from("custom-edit-data"), { + status: 200, + headers: { "content-type": "image/png" }, + }), + release: vi.fn(async () => {}), + }); + + const provider = buildFalImageGenerationProvider(); + await provider.generateImage({ + provider: "fal", + model: "fal-ai/custom/edit", + prompt: "edit through custom endpoint", + cfg: {}, + inputImages: [{ buffer: Buffer.from("source-image"), mimeType: "image/png" }], + }); + + expectFalJsonPost({ + call: 1, + url: "https://fal.run/fal-ai/custom/edit", + body: { + prompt: "edit through custom endpoint", + num_images: 1, + output_format: "png", + image_url: `data:image/png;base64,${Buffer.from("source-image").toString("base64")}`, + }, + }); + }); + it("maps aspect ratio for text generation without forcing a square default", async () => { vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({ apiKey: "fal-test-key", @@ -270,7 +539,7 @@ describe("fal image-generation provider", () => { }); }); - it("rejects multi-image edit requests for now", async () => { + it("rejects multi-image for Flux edit", async () => { vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({ apiKey: "fal-test-key", source: "env", @@ -292,7 +561,7 @@ describe("fal image-generation provider", () => { ).rejects.toThrow("at most one reference image"); }); - it("rejects aspect ratio overrides for the current edit endpoint", async () => { + it("rejects aspect ratio for Flux edit", async () => { vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({ apiKey: "fal-test-key", source: "env", diff --git a/extensions/fal/image-generation-provider.ts b/extensions/fal/image-generation-provider.ts index 6910f49fc35..e8a6f5b5642 100644 --- a/extensions/fal/image-generation-provider.ts +++ b/extensions/fal/image-generation-provider.ts @@ -26,6 +26,8 @@ const DEFAULT_FAL_BASE_URL = "https://fal.run"; const DEFAULT_FAL_IMAGE_MODEL = "fal-ai/flux/dev"; const DEFAULT_FAL_EDIT_SUBPATH = "image-to-image"; const DEFAULT_OUTPUT_FORMAT = "png"; +const GPT_IMAGE_EDIT_MAX_INPUT_IMAGES = 10; +const NANO_BANANA_EDIT_MAX_INPUT_IMAGES = 14; const FAL_OUTPUT_FORMATS = ["png", "jpeg"] as const; const FAL_SUPPORTED_SIZES = [ "1024x1024", @@ -97,12 +99,16 @@ function ensureFalModelPath(model: string | undefined, hasInputImages: boolean): return trimmed; } if ( - trimmed.endsWith(`/${DEFAULT_FAL_EDIT_SUBPATH}`) || trimmed.endsWith("/edit") || + trimmed.endsWith(`/${DEFAULT_FAL_EDIT_SUBPATH}`) || trimmed.includes("/image-to-image/") ) { return trimmed; } + // GPT Image 2 and Nano Banana 2 use /edit; Flux uses /image-to-image. + if (trimmed.startsWith("openai/gpt-image-") || trimmed.startsWith("fal-ai/nano-banana-")) { + return `${trimmed}/edit`; + } return `${trimmed}/${DEFAULT_FAL_EDIT_SUBPATH}`; } @@ -196,7 +202,10 @@ function resolveFalImageSize(params: { const normalizedAspectRatio = params.aspectRatio?.trim(); if (normalizedAspectRatio && params.hasInputImages) { - throw new Error("fal image edit endpoint does not support aspectRatio overrides"); + return ( + aspectRatioToEnum(normalizedAspectRatio) ?? + aspectRatioToDimensions(normalizedAspectRatio, 1024) + ); } const edge = mapResolutionToEdge(params.resolution); @@ -268,9 +277,9 @@ export function buildFalImageGenerationProvider(): ImageGenerationProvider { edit: { enabled: true, maxCount: 4, - maxInputImages: 1, + maxInputImages: GPT_IMAGE_EDIT_MAX_INPUT_IMAGES, supportsSize: true, - supportsAspectRatio: false, + supportsAspectRatio: true, supportsResolution: true, }, geometry: { @@ -292,11 +301,8 @@ export function buildFalImageGenerationProvider(): ImageGenerationProvider { if (!auth.apiKey) { throw new Error("fal API key missing"); } - if ((req.inputImages?.length ?? 0) > 1) { - throw new Error("fal image generation currently supports at most one reference image"); - } - - const hasInputImages = (req.inputImages?.length ?? 0) > 0; + const inputImageCount = req.inputImages?.length ?? 0; + const hasInputImages = inputImageCount > 0; const imageSize = resolveFalImageSize({ size: req.size, resolution: req.resolution, @@ -304,6 +310,39 @@ export function buildFalImageGenerationProvider(): ImageGenerationProvider { hasInputImages, }); const model = ensureFalModelPath(req.model, hasInputImages); + + const isGptImageEditModel = model.startsWith("openai/gpt-image-"); + const isNanoBananaEditModel = model.startsWith("fal-ai/nano-banana-"); + if ( + hasInputImages && + isGptImageEditModel && + inputImageCount > GPT_IMAGE_EDIT_MAX_INPUT_IMAGES + ) { + throw new Error( + `fal GPT Image edit supports at most ${GPT_IMAGE_EDIT_MAX_INPUT_IMAGES} reference images (requested ${inputImageCount})`, + ); + } + if ( + hasInputImages && + isNanoBananaEditModel && + inputImageCount > NANO_BANANA_EDIT_MAX_INPUT_IMAGES + ) { + throw new Error( + `fal Nano Banana edit supports at most ${NANO_BANANA_EDIT_MAX_INPUT_IMAGES} reference images (requested ${inputImageCount})`, + ); + } + + // Flux/custom edit endpoints use the singular image_url contract. + if (hasInputImages && !isGptImageEditModel && !isNanoBananaEditModel) { + if (inputImageCount > 1) { + throw new Error( + "fal flux image generation currently supports at most one reference image", + ); + } + if (req.aspectRatio) { + throw new Error("fal flux image edit endpoint does not support aspectRatio overrides"); + } + } const explicitBaseUrl = req.cfg?.models?.providers?.fal?.baseUrl?.trim(); const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } = resolveProviderHttpRequestConfig({ @@ -325,7 +364,17 @@ export function buildFalImageGenerationProvider(): ImageGenerationProvider { output_format: req.outputFormat ?? DEFAULT_OUTPUT_FORMAT, }; if (imageSize !== undefined) { - requestBody.image_size = imageSize; + // NB2 edit uses its own geometry schema; GPT Image 2 and Flux use image_size + if (model.startsWith("fal-ai/nano-banana-") && hasInputImages) { + if (req.aspectRatio) { + requestBody.aspect_ratio = req.aspectRatio; + } + if (req.resolution) { + requestBody.resolution = req.resolution; + } + } else { + requestBody.image_size = imageSize; + } } if (hasInputImages) { @@ -333,7 +382,12 @@ export function buildFalImageGenerationProvider(): ImageGenerationProvider { if (!input) { throw new Error("fal image edit request missing reference image"); } - requestBody.image_url = toImageDataUrl(input); + // GPT Image 2 and NB2 use image_urls (array); Flux uses image_url (singular) + if (isGptImageEditModel || isNanoBananaEditModel) { + requestBody.image_urls = req.inputImages!.map((img) => toImageDataUrl(img)); + } else { + requestBody.image_url = toImageDataUrl(input); + } } const { response, release } = await falFetchGuard({ url: `${baseUrl}/${model}`,