diff --git a/apps/agent/entrypoints/options/ai-settings/NewProviderDialog.tsx b/apps/agent/entrypoints/options/ai-settings/NewProviderDialog.tsx index c294b510..c981988a 100644 --- a/apps/agent/entrypoints/options/ai-settings/NewProviderDialog.tsx +++ b/apps/agent/entrypoints/options/ai-settings/NewProviderDialog.tsx @@ -245,8 +245,10 @@ export const NewProviderDialog: FC = ({ setIsCustomModel(false) } - // Auto-fill context window when model changes + // Auto-fill context window when model changes (only for new providers) useEffect(() => { + if (initialValues?.id) return + if (watchedModelId && watchedModelId !== 'custom') { const contextLength = getModelContextLength( watchedType as ProviderType, @@ -256,7 +258,7 @@ export const NewProviderDialog: FC = ({ form.setValue('contextWindow', contextLength) } } - }, [watchedModelId, watchedType, form]) + }, [watchedModelId, watchedType, form, initialValues?.id]) // Handle model selection (including custom option) const handleModelChange = (value: string) => { diff --git a/apps/agent/entrypoints/sidepanel/index/useChatSession.ts b/apps/agent/entrypoints/sidepanel/index/useChatSession.ts index 95a9c168..1decc7cd 100644 --- a/apps/agent/entrypoints/sidepanel/index/useChatSession.ts +++ b/apps/agent/entrypoints/sidepanel/index/useChatSession.ts @@ -261,6 +261,8 @@ export const useChatSession = () => { conversationId: conversationIdRef.current, model: provider?.modelId ?? 'default', mode: currentMode, + contextWindowSize: provider?.contextWindow, + temperature: provider?.temperature, // Azure-specific resourceName: provider?.resourceName, // Bedrock-specific diff --git a/apps/agent/lib/schedules/getChatServerResponse.ts b/apps/agent/lib/schedules/getChatServerResponse.ts index d080b269..8b71a8ad 100644 --- a/apps/agent/lib/schedules/getChatServerResponse.ts +++ b/apps/agent/lib/schedules/getChatServerResponse.ts @@ -78,6 +78,8 @@ export async function getChatServerResponse( conversationId, model: provider?.modelId ?? 'default', mode: request.mode ?? 'agent', + contextWindowSize: provider?.contextWindow, + temperature: provider?.temperature, resourceName: provider?.resourceName, accessKeyId: provider?.accessKeyId, secretAccessKey: provider?.secretAccessKey, diff --git a/apps/server/src/agent/agent/gemini-agent.ts b/apps/server/src/agent/agent/gemini-agent.ts index d5323be3..49ffd5e7 100644 --- a/apps/server/src/agent/agent/gemini-agent.ts +++ b/apps/server/src/agent/agent/gemini-agent.ts @@ -49,16 +49,22 @@ export class GeminiAgent { // Calculate compression threshold based on context window size const contextWindow = config.contextWindowSize ?? AGENT_LIMITS.DEFAULT_CONTEXT_WINDOW + + // Hybrid compression: ensure minimum headroom while capping ratio for large contexts + const headroomBasedRatio = + (contextWindow - AGENT_LIMITS.COMPRESSION_MIN_HEADROOM) / contextWindow + const compressionRatio = Math.min( + AGENT_LIMITS.COMPRESSION_MAX_RATIO, + Math.max(AGENT_LIMITS.COMPRESSION_MIN_RATIO, headroomBasedRatio), + ) const compressionThreshold = - (AGENT_LIMITS.DEFAULT_COMPRESSION_RATIO * contextWindow) / - AGENT_LIMITS.DEFAULT_CONTEXT_WINDOW + (compressionRatio * contextWindow) / AGENT_LIMITS.DEFAULT_CONTEXT_WINDOW logger.info('Compression config', { contextWindow, + compressionRatio, compressionThreshold, - compressesAtTokens: Math.floor( - AGENT_LIMITS.DEFAULT_COMPRESSION_RATIO * contextWindow, - ), + compressesAtTokens: Math.floor(compressionRatio * contextWindow), }) logger.debug('MCP servers config', { diff --git a/packages/shared/src/constants/limits.ts b/packages/shared/src/constants/limits.ts index 343dd3e5..c721e4fe 100644 --- a/packages/shared/src/constants/limits.ts +++ b/packages/shared/src/constants/limits.ts @@ -15,7 +15,10 @@ export const RATE_LIMITS = { export const AGENT_LIMITS = { MAX_TURNS: 100, DEFAULT_CONTEXT_WINDOW: 1_000_000, - DEFAULT_COMPRESSION_RATIO: 0.75, + // Compression settings - hybrid approach with minimum headroom + COMPRESSION_MIN_HEADROOM: 10_000, // Always leave at least 10K tokens for tool responses + COMPRESSION_MAX_RATIO: 0.75, // Never wait longer than 75% for large models + COMPRESSION_MIN_RATIO: 0.4, // Never compress too early (before 40%) } as const export const PAGINATION = {