fix(google): retry stalled Gemini first response (#79668)

* fix(google): retry stalled gemini first response

* docs(changelog): note gemini first-response retry

* fix(google): clear gemini first-response deadline
This commit is contained in:
Josh Avant
2026-05-09 00:52:44 -05:00
committed by GitHub
parent b972e207cf
commit 3af81481b4
6 changed files with 495 additions and 62 deletions

View File

@@ -247,6 +247,7 @@ Docs: https://docs.openclaw.ai
- Agents/Codex: auto-enable the Codex harness plugin for one-shot OpenAI model overrides so `openclaw agent --local --model openai/...` does not fail with an unregistered `codex` harness.
- Gateway/live tests: avoid full model-registry enumeration for explicit provider-qualified live model filters, preventing `.profile` OpenAI gateway profile runs from hanging before provider dispatch.
- Gateway/status: surface CLI and gateway runtime versions, warn about stale PATH/global wrappers when they differ, and add stale-wrapper checks to the newer-config warning. Refs #79091. Thanks @RamaAditya49 and @sallyom.
- Google/Gemini: retry stalled Gemini 3 preview direct API-key streams with a lean first-response payload and share Gemini tool-schema cleanup across direct Google and Gemini CLI providers, so main sessions with coding tools can recover before the LLM idle watchdog fires. (#79668) Thanks @joshavant.
- Providers: preserve non-OK `text/event-stream` response bodies so provider HTTP errors keep their JSON detail instead of collapsing to generic streaming failures. Fixes #78180.
- Gateway/auth: make explicit `trusted-proxy` mode fail closed instead of accepting local password fallback credentials after trusted-proxy identity checks fail. Fixes #78684.
- Active memory: treat Google Chat `spaces/...` conversation ids as scoped targets instead of runnable channel names so recall runs no longer fail bundled-plugin dirName validation. Fixes #78918.

View File

@@ -5,7 +5,6 @@ import type {
} from "openclaw/plugin-sdk/plugin-entry";
import { buildOauthProviderAuthResult } from "openclaw/plugin-sdk/provider-auth-result";
import type { ProviderPlugin } from "openclaw/plugin-sdk/provider-model-shared";
import { buildProviderToolCompatFamilyHooks } from "openclaw/plugin-sdk/provider-tools";
import { fetchGeminiUsage } from "openclaw/plugin-sdk/provider-usage";
import { formatGoogleOauthApiKey, parseGoogleUsageToken } from "./oauth-token-shared.js";
import { GOOGLE_GEMINI_PROVIDER_HOOKS } from "./provider-hooks.js";
@@ -21,11 +20,6 @@ const ENV_VARS = [
"GEMINI_CLI_OAUTH_CLIENT_SECRET",
] as const;
const GOOGLE_GEMINI_CLI_PROVIDER_HOOKS = {
...GOOGLE_GEMINI_PROVIDER_HOOKS,
...buildProviderToolCompatFamilyHooks("gemini"),
};
async function fetchGeminiCliUsage(ctx: ProviderFetchUsageSnapshotContext) {
return await fetchGeminiUsage(ctx.token, ctx.timeoutMs, ctx.fetchFn, PROVIDER_ID);
}
@@ -125,7 +119,7 @@ export function buildGoogleGeminiCliProvider(): ProviderPlugin {
providerId: PROVIDER_ID,
ctx,
}),
...GOOGLE_GEMINI_CLI_PROVIDER_HOOKS,
...GOOGLE_GEMINI_PROVIDER_HOOKS,
isModernModelRef: ({ modelId }) => isModernGoogleModel(modelId),
formatApiKey: (cred) => formatGoogleOauthApiKey(cred),
resolveUsageAuth: async (ctx) => {

View File

@@ -94,51 +94,54 @@ describe("google provider plugin hooks", () => {
expect(customEntries[0]?.customType).toBe("google-turn-ordering-bootstrap");
});
it("owns Gemini CLI tool schema normalization", async () => {
it("owns Gemini tool schema normalization for direct and CLI providers", async () => {
const { providers } = await registerProviderPlugin({
plugin: googleProviderPlugin,
id: "google",
name: "Google Provider",
});
const provider = requireRegisteredProvider(providers, "google-gemini-cli");
const providerIds = ["google", "google-gemini-cli"] as const;
const [tool] =
provider.normalizeToolSchemas?.({
provider: "google-gemini-cli",
tools: [
{
name: "write_file",
description: "Write a file",
parameters: {
type: "object",
additionalProperties: false,
properties: {
path: { type: "string", pattern: "^src/" },
for (const providerId of providerIds) {
const provider = requireRegisteredProvider(providers, providerId);
const [tool] =
provider.normalizeToolSchemas?.({
provider: providerId,
tools: [
{
name: "write_file",
description: "Write a file",
parameters: {
type: "object",
additionalProperties: false,
properties: {
path: { type: "string", pattern: "^src/" },
},
},
},
},
],
} as never) ?? [];
],
} as never) ?? [];
expect(tool).toMatchObject({
name: "write_file",
parameters: {
type: "object",
properties: {
path: { type: "string" },
expect(tool).toMatchObject({
name: "write_file",
parameters: {
type: "object",
properties: {
path: { type: "string" },
},
},
},
});
expect(tool?.parameters).not.toHaveProperty("additionalProperties");
expect(
(tool?.parameters as { properties?: { path?: Record<string, unknown> } })?.properties?.path,
).not.toHaveProperty("pattern");
expect(
provider.inspectToolSchemas?.({
provider: "google-gemini-cli",
tools: [tool],
} as never),
).toStrictEqual([]);
});
expect(tool?.parameters).not.toHaveProperty("additionalProperties");
expect(
(tool?.parameters as { properties?: { path?: Record<string, unknown> } })?.properties?.path,
).not.toHaveProperty("pattern");
expect(
provider.inspectToolSchemas?.({
provider: providerId,
tools: [tool],
} as never),
).toEqual([]);
}
});
it("wires google-thinking stream hooks for direct and Gemini CLI providers", async () => {

View File

@@ -3,12 +3,14 @@ import type {
ProviderThinkingProfile,
} from "openclaw/plugin-sdk/core";
import { buildProviderReplayFamilyHooks } from "openclaw/plugin-sdk/provider-model-shared";
import { buildProviderToolCompatFamilyHooks } from "openclaw/plugin-sdk/provider-tools";
import { createGoogleThinkingStreamWrapper, isGoogleGemini3ProModel } from "./thinking-api.js";
export const GOOGLE_GEMINI_PROVIDER_HOOKS = {
...buildProviderReplayFamilyHooks({
family: "google-gemini",
}),
...buildProviderToolCompatFamilyHooks("gemini"),
resolveThinkingProfile: ({ modelId }: ProviderDefaultThinkingPolicyContext) =>
({
levels: isGoogleGemini3ProModel(modelId)

View File

@@ -15,6 +15,7 @@ vi.mock("openclaw/plugin-sdk/provider-transport-runtime", async (importOriginal)
}));
let buildGoogleGenerativeAiParams: typeof import("./transport-stream.js").buildGoogleGenerativeAiParams;
let buildGoogleGemini3FirstResponseRetryParams: typeof import("./transport-stream.js").buildGoogleGemini3FirstResponseRetryParams;
let createGoogleGenerativeAiTransportStreamFn: typeof import("./transport-stream.js").createGoogleGenerativeAiTransportStreamFn;
let createGoogleVertexTransportStreamFn: typeof import("./transport-stream.js").createGoogleVertexTransportStreamFn;
let hasGoogleVertexAuthorizedUserAdcSync: typeof import("./vertex-adc.js").hasGoogleVertexAuthorizedUserAdcSync;
@@ -85,10 +86,40 @@ function buildSseResponse(events: unknown[]): Response {
});
}
function buildDelayedSecondSseResponse(params: {
first: unknown;
second: unknown;
delayMs: number;
}): Response {
const encoder = new TextEncoder();
const first = `data: ${JSON.stringify(params.first)}\n\n`;
const second = `data: ${JSON.stringify(params.second)}\n\ndata: [DONE]\n\n`;
let timeout: ReturnType<typeof setTimeout> | undefined;
const body = new ReadableStream<Uint8Array>({
start(controller) {
controller.enqueue(encoder.encode(first));
timeout = setTimeout(() => {
controller.enqueue(encoder.encode(second));
controller.close();
}, params.delayMs);
},
cancel() {
if (timeout) {
clearTimeout(timeout);
}
},
});
return new Response(body, {
status: 200,
headers: { "content-type": "text/event-stream" },
});
}
describe("google transport stream", () => {
beforeAll(async () => {
({
buildGoogleGenerativeAiParams,
buildGoogleGemini3FirstResponseRetryParams,
createGoogleGenerativeAiTransportStreamFn,
createGoogleVertexTransportStreamFn,
} = await import("./transport-stream.js"));
@@ -248,6 +279,124 @@ describe("google transport stream", () => {
});
});
it("builds a lean Gemini 3 first-response retry payload", () => {
const model = buildGeminiModel({
id: "gemini-3.1-pro-preview",
name: "Gemini 3.1 Pro Preview",
});
const retryPayload = buildGoogleGemini3FirstResponseRetryParams({
model,
request: {
contents: [{ role: "user", parts: [{ text: "hello" }] }],
generationConfig: {
thinkingConfig: {
includeThoughts: true,
thinkingLevel: "HIGH",
},
},
},
});
expect(retryPayload?.generationConfig).toEqual({
thinkingConfig: {
thinkingLevel: "LOW",
},
});
});
it("retries Gemini 3 requests with lean thinking when the first attempt has no first response", async () => {
vi.stubEnv("OPENCLAW_GOOGLE_GEMINI_FIRST_RESPONSE_RETRY_MS", "10");
guardedFetchMock
.mockImplementationOnce(
(_url: string, init?: RequestInit) =>
new Promise<Response>((_resolve, reject) => {
init?.signal?.addEventListener("abort", () => {
reject(init.signal?.reason ?? new Error("aborted"));
});
}),
)
.mockResolvedValueOnce(
buildSseResponse([
{
candidates: [{ content: { parts: [{ text: "recovered" }] }, finishReason: "STOP" }],
},
]),
);
const model = buildGeminiModel({
id: "gemini-3.1-pro-preview",
name: "Gemini 3.1 Pro Preview",
});
const streamFn = createGoogleGenerativeAiTransportStreamFn();
const stream = await Promise.resolve(
streamFn(
model,
{
messages: [{ role: "user", content: "hello", timestamp: 0 }],
tools: [
{
name: "lookup",
description: "Look up a value",
parameters: {
type: "object",
properties: { q: { type: "string" } },
},
},
],
} as never,
{ reasoning: "high" } as never,
),
);
const result = await stream.result();
expect(result.content).toEqual([{ type: "text", text: "recovered" }]);
expect(guardedFetchMock).toHaveBeenCalledTimes(2);
const firstBody = JSON.parse(guardedFetchMock.mock.calls[0]?.[1]?.body as string);
const retryBody = JSON.parse(guardedFetchMock.mock.calls[1]?.[1]?.body as string);
expect(firstBody.generationConfig.thinkingConfig).toMatchObject({
includeThoughts: true,
thinkingLevel: "HIGH",
});
expect(retryBody.generationConfig.thinkingConfig).toEqual({
thinkingLevel: "LOW",
});
expect(retryBody.tools).toEqual(firstBody.tools);
});
it("keeps streaming after the first Gemini 3 chunk arrives before the retry deadline", async () => {
vi.stubEnv("OPENCLAW_GOOGLE_GEMINI_FIRST_RESPONSE_RETRY_MS", "10");
guardedFetchMock.mockResolvedValueOnce(
buildDelayedSecondSseResponse({
first: {
candidates: [{ content: { parts: [{ text: "first " }] } }],
},
second: {
candidates: [{ content: { parts: [{ text: "second" }] }, finishReason: "STOP" }],
},
delayMs: 25,
}),
);
const model = buildGeminiModel({
id: "gemini-3.1-pro-preview",
name: "Gemini 3.1 Pro Preview",
});
const streamFn = createGoogleGenerativeAiTransportStreamFn();
const stream = await Promise.resolve(
streamFn(
model,
{
messages: [{ role: "user", content: "hello", timestamp: 0 }],
} as never,
{ reasoning: "high" } as never,
),
);
const result = await stream.result();
expect(result.content).toEqual([{ type: "text", text: "first second" }]);
expect(guardedFetchMock).toHaveBeenCalledTimes(1);
});
it("uses bearer auth when the Google api key is an OAuth JSON payload", async () => {
guardedFetchMock.mockResolvedValueOnce(buildSseResponse([]));

View File

@@ -74,6 +74,9 @@ type GoogleGenerateContentRequest = {
toolConfig?: Record<string, unknown>;
};
const GOOGLE_GEMINI3_FIRST_RESPONSE_RETRY_DEFAULT_MS = 45_000;
const GOOGLE_GEMINI3_FIRST_RESPONSE_RETRY_ENV = "OPENCLAW_GOOGLE_GEMINI_FIRST_RESPONSE_RETRY_MS";
type GoogleTransportContentBlock =
| { type: "text"; text: string; textSignature?: string }
| { type: "thinking"; thinking: string; thinkingSignature?: string }
@@ -646,6 +649,283 @@ function buildGoogleTransportRequestUrl(
: buildGoogleGenerativeAiRequestUrl(model);
}
function isOfficialGoogleGenerativeAiBaseUrl(baseUrl: string | undefined): boolean {
if (!baseUrl) {
return true;
}
try {
return new URL(baseUrl).hostname === "generativelanguage.googleapis.com";
} catch {
return false;
}
}
function resolveGoogleGemini3FirstResponseRetryMs(env = process.env): number {
const raw = env[GOOGLE_GEMINI3_FIRST_RESPONSE_RETRY_ENV];
if (raw === undefined || raw.trim() === "") {
return GOOGLE_GEMINI3_FIRST_RESPONSE_RETRY_DEFAULT_MS;
}
const parsed = Number(raw);
if (!Number.isFinite(parsed) || parsed < 0) {
return GOOGLE_GEMINI3_FIRST_RESPONSE_RETRY_DEFAULT_MS;
}
return Math.floor(parsed);
}
function shouldRetryGoogleGemini3FirstResponse(params: {
kind: GoogleTransportApi;
model: GoogleTransportModel;
}): boolean {
if (params.kind !== "google-generative-ai") {
return false;
}
if (!isOfficialGoogleGenerativeAiBaseUrl(params.model.baseUrl)) {
return false;
}
return isGoogleGemini3ProModel(params.model.id) || isGoogleGemini3FlashModel(params.model.id);
}
function resolveGoogleGemini3RetryThinkingLevel(modelId: string): GoogleThinkingLevel | undefined {
if (isGoogleGemini3ProModel(modelId)) {
return "LOW";
}
if (isGoogleGemini3FlashModel(modelId)) {
return "MINIMAL";
}
return undefined;
}
function cloneGoogleGenerateContentRequest(
params: GoogleGenerateContentRequest,
): GoogleGenerateContentRequest {
return JSON.parse(JSON.stringify(params)) as GoogleGenerateContentRequest;
}
export function buildGoogleGemini3FirstResponseRetryParams(params: {
model: GoogleTransportModel;
request: GoogleGenerateContentRequest;
}): GoogleGenerateContentRequest | undefined {
const thinkingLevel = resolveGoogleGemini3RetryThinkingLevel(params.model.id);
if (!thinkingLevel) {
return undefined;
}
const retryRequest = cloneGoogleGenerateContentRequest(params.request);
const generationConfig =
retryRequest.generationConfig && typeof retryRequest.generationConfig === "object"
? retryRequest.generationConfig
: {};
const thinkingConfig =
generationConfig.thinkingConfig && typeof generationConfig.thinkingConfig === "object"
? { ...(generationConfig.thinkingConfig as Record<string, unknown>) }
: {};
// Gemini 3 defaults to dynamic high thinking when the request omits an
// explicit level. On a zero-output stall, retry with the smallest supported
// native level and suppress thought streaming so the recovery call prioritizes
// producing a visible first token.
delete thinkingConfig.thinkingBudget;
delete thinkingConfig.includeThoughts;
thinkingConfig.thinkingLevel = thinkingLevel;
generationConfig.thinkingConfig = thinkingConfig;
retryRequest.generationConfig = generationConfig;
return retryRequest;
}
function createChildSignal(parent: AbortSignal | undefined, timeoutMs: number) {
const controller = new AbortController();
let timedOut = false;
let timeout: ReturnType<typeof setTimeout> | undefined;
const abortFromParent = () => {
controller.abort(parent?.reason);
};
if (parent) {
if (parent.aborted) {
abortFromParent();
} else {
parent.addEventListener("abort", abortFromParent, { once: true });
}
}
if (timeoutMs > 0) {
timeout = setTimeout(() => {
timedOut = true;
controller.abort(new Error("Google Gemini first response retry deadline reached"));
}, timeoutMs);
timeout.unref?.();
}
const clearDeadline = () => {
if (timeout) {
clearTimeout(timeout);
timeout = undefined;
}
};
return {
signal: controller.signal,
timedOut: () => timedOut,
clearDeadline,
cleanup: () => {
clearDeadline();
parent?.removeEventListener("abort", abortFromParent);
},
};
}
function iteratorToAsyncGenerator<T>(
iterator: AsyncIterator<T>,
cleanup?: () => void,
): AsyncGenerator<T> {
return (async function* () {
try {
for (;;) {
const next = await iterator.next();
if (next.done) {
return;
}
yield next.value;
}
} finally {
cleanup?.();
await iterator.return?.();
}
})();
}
type GoogleSseAttempt =
| {
type: "ready";
firstChunk?: GoogleSseChunk;
chunks: AsyncGenerator<GoogleSseChunk>;
}
| { type: "timeout" };
async function openGoogleSseAttempt(params: {
guardedFetch: ReturnType<typeof buildGuardedModelFetch>;
url: string;
headers: Record<string, string>;
request: GoogleGenerateContentRequest;
parentSignal?: AbortSignal;
firstResponseTimeoutMs: number;
errorPrefix: string;
}): Promise<GoogleSseAttempt> {
const attemptSignal =
params.firstResponseTimeoutMs > 0
? createChildSignal(params.parentSignal, params.firstResponseTimeoutMs)
: undefined;
const signal = attemptSignal?.signal ?? params.parentSignal;
try {
const response = await params.guardedFetch(params.url, {
method: "POST",
headers: params.headers,
body: JSON.stringify(params.request),
signal,
});
if (!response.ok) {
throw await createProviderHttpError(response, params.errorPrefix);
}
const chunks = parseGoogleSseChunks(response, signal);
const iterator = chunks[Symbol.asyncIterator]();
const first = await iterator.next();
attemptSignal?.clearDeadline();
if (first.done) {
return {
type: "ready",
chunks: iteratorToAsyncGenerator(iterator, attemptSignal?.cleanup),
};
}
return {
type: "ready",
firstChunk: first.value,
chunks: iteratorToAsyncGenerator(iterator, attemptSignal?.cleanup),
};
} catch (error) {
attemptSignal?.cleanup();
if (attemptSignal?.timedOut() && !params.parentSignal?.aborted) {
return { type: "timeout" };
}
throw error;
}
}
async function openGoogleSseChunks(params: {
kind: GoogleTransportApi;
model: GoogleTransportModel;
options: GoogleTransportOptions | undefined;
guardedFetch: ReturnType<typeof buildGuardedModelFetch>;
url: string;
headers: Record<string, string>;
request: GoogleGenerateContentRequest;
}): Promise<Extract<GoogleSseAttempt, { type: "ready" }>> {
const errorPrefix =
params.kind === "google-vertex"
? "Google Vertex AI API error"
: "Google Generative AI API error";
if (!shouldRetryGoogleGemini3FirstResponse({ kind: params.kind, model: params.model })) {
const response = await params.guardedFetch(params.url, {
method: "POST",
headers: params.headers,
body: JSON.stringify(params.request),
signal: params.options?.signal,
});
if (!response.ok) {
throw await createProviderHttpError(response, errorPrefix);
}
return {
type: "ready",
chunks: parseGoogleSseChunks(response, params.options?.signal),
};
}
const retryMs = resolveGoogleGemini3FirstResponseRetryMs();
const retryRequest =
retryMs > 0
? buildGoogleGemini3FirstResponseRetryParams({
model: params.model,
request: params.request,
})
: undefined;
if (!retryRequest) {
const response = await params.guardedFetch(params.url, {
method: "POST",
headers: params.headers,
body: JSON.stringify(params.request),
signal: params.options?.signal,
});
if (!response.ok) {
throw await createProviderHttpError(response, errorPrefix);
}
return {
type: "ready",
chunks: parseGoogleSseChunks(response, params.options?.signal),
};
}
const firstAttempt = await openGoogleSseAttempt({
guardedFetch: params.guardedFetch,
url: params.url,
headers: params.headers,
request: params.request,
parentSignal: params.options?.signal,
firstResponseTimeoutMs: retryMs,
errorPrefix,
});
if (firstAttempt.type === "ready") {
return firstAttempt;
}
const retryAttempt = await openGoogleSseAttempt({
guardedFetch: params.guardedFetch,
url: params.url,
headers: params.headers,
request: retryRequest,
parentSignal: params.options?.signal,
firstResponseTimeoutMs: 0,
errorPrefix,
});
if (retryAttempt.type === "timeout") {
throw new Error("Google Gemini first response retry timed out unexpectedly");
}
return retryAttempt;
}
async function buildGoogleTransportHeaders(params: {
kind: GoogleTransportApi;
model: GoogleTransportModel;
@@ -782,29 +1062,33 @@ function createGoogleTransportStreamFn(kind: GoogleTransportApi): StreamFn {
if (nextParams !== undefined) {
params = nextParams as GoogleGenerateContentRequest;
}
const response = await guardedFetch(buildGoogleTransportRequestUrl(kind, model, options), {
method: "POST",
headers: await buildGoogleTransportHeaders({
kind,
model,
apiKey,
optionHeaders: options?.headers,
fetchImpl: (options as { fetch?: typeof fetch } | undefined)?.fetch,
}),
body: JSON.stringify(params),
signal: options?.signal,
const requestUrl = buildGoogleTransportRequestUrl(kind, model, options);
const requestHeaders = await buildGoogleTransportHeaders({
kind,
model,
apiKey,
optionHeaders: options?.headers,
fetchImpl: (options as { fetch?: typeof fetch } | undefined)?.fetch,
});
const sse = await openGoogleSseChunks({
kind,
model,
options,
guardedFetch,
url: requestUrl,
headers: requestHeaders,
request: params,
});
if (!response.ok) {
throw await createProviderHttpError(
response,
kind === "google-vertex"
? "Google Vertex AI API error"
: "Google Generative AI API error",
);
}
stream.push({ type: "start", partial: output as never });
let currentBlockIndex = -1;
for await (const chunk of parseGoogleSseChunks(response, options?.signal)) {
const chunks =
sse.firstChunk === undefined
? sse.chunks
: (async function* (firstChunk: GoogleSseChunk) {
yield firstChunk;
yield* sse.chunks;
})(sse.firstChunk);
for await (const chunk of chunks) {
output.responseId ||= chunk.responseId;
updateUsage(output, model, chunk);
const candidate = chunk.candidates?.[0];