fix: context window size (#171)

2026-05-17 02:25:57 +00:00 · 2026-01-07 17:13:00 +05:30
parent 9c6bccd46e
commit 2ac655b69e
5 changed files with 23 additions and 8 deletions
--- a/apps/agent/entrypoints/options/ai-settings/NewProviderDialog.tsx
+++ b/apps/agent/entrypoints/options/ai-settings/NewProviderDialog.tsx
@@ -245,8 +245,10 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
    setIsCustomModel(false)
  }

-  // Auto-fill context window when model changes
+  // Auto-fill context window when model changes (only for new providers)
  useEffect(() => {
+    if (initialValues?.id) return
+
    if (watchedModelId && watchedModelId !== 'custom') {
      const contextLength = getModelContextLength(
        watchedType as ProviderType,
@@ -256,7 +258,7 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
        form.setValue('contextWindow', contextLength)
      }
    }
-  }, [watchedModelId, watchedType, form])
+  }, [watchedModelId, watchedType, form, initialValues?.id])

  // Handle model selection (including custom option)
  const handleModelChange = (value: string) => {
--- a/apps/agent/entrypoints/sidepanel/index/useChatSession.ts
+++ b/apps/agent/entrypoints/sidepanel/index/useChatSession.ts
@@ -261,6 +261,8 @@ export const useChatSession = () => {
            conversationId: conversationIdRef.current,
            model: provider?.modelId ?? 'default',
            mode: currentMode,
+            contextWindowSize: provider?.contextWindow,
+            temperature: provider?.temperature,
            // Azure-specific
            resourceName: provider?.resourceName,
            // Bedrock-specific
--- a/apps/agent/lib/schedules/getChatServerResponse.ts
+++ b/apps/agent/lib/schedules/getChatServerResponse.ts
@@ -78,6 +78,8 @@ export async function getChatServerResponse(
      conversationId,
      model: provider?.modelId ?? 'default',
      mode: request.mode ?? 'agent',
+      contextWindowSize: provider?.contextWindow,
+      temperature: provider?.temperature,
      resourceName: provider?.resourceName,
      accessKeyId: provider?.accessKeyId,
      secretAccessKey: provider?.secretAccessKey,
--- a/apps/server/src/agent/agent/gemini-agent.ts
+++ b/apps/server/src/agent/agent/gemini-agent.ts
@@ -49,16 +49,22 @@ export class GeminiAgent {
    // Calculate compression threshold based on context window size
    const contextWindow =
      config.contextWindowSize ?? AGENT_LIMITS.DEFAULT_CONTEXT_WINDOW
+
+    // Hybrid compression: ensure minimum headroom while capping ratio for large contexts
+    const headroomBasedRatio =
+      (contextWindow - AGENT_LIMITS.COMPRESSION_MIN_HEADROOM) / contextWindow
+    const compressionRatio = Math.min(
+      AGENT_LIMITS.COMPRESSION_MAX_RATIO,
+      Math.max(AGENT_LIMITS.COMPRESSION_MIN_RATIO, headroomBasedRatio),
+    )
    const compressionThreshold =
-      (AGENT_LIMITS.DEFAULT_COMPRESSION_RATIO * contextWindow) /
-      AGENT_LIMITS.DEFAULT_CONTEXT_WINDOW
+      (compressionRatio * contextWindow) / AGENT_LIMITS.DEFAULT_CONTEXT_WINDOW

    logger.info('Compression config', {
      contextWindow,
+      compressionRatio,
      compressionThreshold,
-      compressesAtTokens: Math.floor(
-        AGENT_LIMITS.DEFAULT_COMPRESSION_RATIO * contextWindow,
-      ),
+      compressesAtTokens: Math.floor(compressionRatio * contextWindow),
    })

    logger.debug('MCP servers config', {
--- a/packages/shared/src/constants/limits.ts
+++ b/packages/shared/src/constants/limits.ts
@@ -15,7 +15,10 @@ export const RATE_LIMITS = {
 export const AGENT_LIMITS = {
  MAX_TURNS: 100,
  DEFAULT_CONTEXT_WINDOW: 1_000_000,
-  DEFAULT_COMPRESSION_RATIO: 0.75,
+  // Compression settings - hybrid approach with minimum headroom
+  COMPRESSION_MIN_HEADROOM: 10_000, // Always leave at least 10K tokens for tool responses
+  COMPRESSION_MAX_RATIO: 0.75, // Never wait longer than 75% for large models
+  COMPRESSION_MIN_RATIO: 0.4, // Never compress too early (before 40%)
 } as const

 export const PAGINATION = {