Compare commits

...

2 Commits

Author SHA1 Message Date
Nikhil Sonti
123adf0457 docs: document BROWSEROS_AI_SDK_DEVTOOLS in .env.example
Surfaces the opt-in dev flag so contributors know it exists. Captures
every LLM call to .devtools/generations.json for post-hoc inspection.
2026-04-15 11:41:56 -07:00
Nikhil Sonti
9fc15637b5 refactor: remove redundant context-overflow middleware
The middleware caught provider overflow errors and re-tried with a
naive prompt truncation, but its `nonSystem.slice()` had no awareness
of tool_use/tool_result pairing — a cut between an assistant tool_use
and the matching tool_result produces an orphaned tool_use that
providers reject with a different error.

Compaction (`createCompactionPrepareStep`) already handles this safely:
`findSafeSplitPoint` walks past tool messages to preserve pair
integrity, and the pipeline (strip binary → prune → reduce outputs →
LLM summarize → sliding window) handles every overflow path before
the request leaves the agent.

Drops 426 lines: the middleware itself, its wiring in ai-sdk-agent,
and the matching test block + helpers in compaction.test.ts.
2026-04-15 11:13:24 -07:00
4 changed files with 10 additions and 426 deletions

View File

@@ -23,5 +23,8 @@ SENTRY_DSN=
NODE_ENV=development
LOG_LEVEL=info
# Debug — captures every LLM call to .devtools/generations.json (view with `npx @ai-sdk/devtools`)
# BROWSEROS_AI_SDK_DEVTOOLS=true
# Testing
BROWSEROS_TEST_HEADLESS=false

View File

@@ -32,7 +32,6 @@ import { buildMemoryToolSet } from '../tools/memory/build-toolset'
import type { ToolRegistry } from '../tools/tool-registry'
import { CHAT_MODE_ALLOWED_TOOLS } from './chat-mode'
import { createCompactionPrepareStep, type StepWithUsage } from './compaction'
import { createContextOverflowMiddleware } from './context-overflow-middleware'
import { buildMcpServerSpecs, createMcpClients } from './mcp-builder'
import {
getMessageNormalizationOptions,
@@ -74,7 +73,6 @@ export class AiSdkAgent {
config.resolvedConfig.contextWindowSize ??
AGENT_LIMITS.DEFAULT_CONTEXT_WINDOW
// Build language model with middleware stack
const rawModel = createLanguageModel(config.resolvedConfig)
const isV3Model =
typeof rawModel === 'object' &&
@@ -83,25 +81,16 @@ export class AiSdkAgent {
rawModel.specificationVersion === 'v3'
let model = rawModel
if (isV3Model) {
// Always apply context overflow protection
if (isV3Model && config.aiSdkDevtoolsEnabled) {
model = wrapLanguageModel({
model: rawModel as LanguageModelV3,
middleware: createContextOverflowMiddleware(contextWindow),
middleware: devToolsMiddleware() as LanguageModelV3Middleware,
})
logger.info('AI SDK DevTools middleware enabled', {
conversationId: config.resolvedConfig.conversationId,
provider: config.resolvedConfig.provider,
model: config.resolvedConfig.model,
})
// Optionally add AI SDK DevTools tracing (dev-only)
if (config.aiSdkDevtoolsEnabled) {
model = wrapLanguageModel({
model: model as LanguageModelV3,
middleware: devToolsMiddleware() as LanguageModelV3Middleware,
})
logger.info('AI SDK DevTools middleware enabled', {
conversationId: config.resolvedConfig.conversationId,
provider: config.resolvedConfig.provider,
model: config.resolvedConfig.model,
})
}
}
// Build browser tools from the unified tool registry

View File

@@ -1,116 +0,0 @@
import type {
LanguageModelV3CallOptions,
LanguageModelV3Message,
LanguageModelV3Middleware,
LanguageModelV3Prompt,
} from '@ai-sdk/provider'
import { logger } from '../lib/logger'
/**
* Provider-specific regex patterns for context overflow errors.
* Adapted from Pi coding agent's overflow detection.
*
* @see https://github.com/badlogic/pi-mono/blob/main/packages/ai/src/utils/overflow.ts
*/
const OVERFLOW_PATTERNS: RegExp[] = [
/prompt is too long/i, // Anthropic
/input is too long for requested model/i, // Amazon Bedrock
/exceeds the context window/i, // OpenAI (Completions & Responses API)
/input token count.*exceeds the maximum/i, // Google (Gemini)
/maximum prompt length is \d+/i, // xAI (Grok)
/reduce the length of the messages/i, // Groq
/maximum context length is \d+ tokens/i, // OpenRouter (all backends)
/exceeds the limit of \d+/i, // GitHub Copilot
/exceeds the available context size/i, // llama.cpp server
/greater than the context length/i, // LM Studio
/context window exceeds limit/i, // MiniMax
/exceeded model token limit/i, // Kimi For Coding
/too large for model with \d+ maximum context length/i, // Mistral
/model_context_window_exceeded/i, // z.ai non-standard finish_reason
/context[_ ]length[_ ]exceeded/i, // Generic fallback
/too many tokens/i, // Generic fallback
/token limit exceeded/i, // Generic fallback
]
export function isContextOverflowError(error: unknown): boolean {
if (!(error instanceof Error)) return false
const msg = error.message
return OVERFLOW_PATTERNS.some((p) => p.test(msg))
}
function truncatePrompt(
prompt: LanguageModelV3Prompt,
contextWindow: number,
): LanguageModelV3Prompt {
const systemMessages: LanguageModelV3Message[] = []
const nonSystem: LanguageModelV3Message[] = []
for (const m of prompt) {
if (m.role === 'system') systemMessages.push(m)
else nonSystem.push(m)
}
// Target 60% of context window to leave headroom
const targetChars = contextWindow * 4 * 0.6
let totalChars = 0
let keepFrom = nonSystem.length
for (let i = nonSystem.length - 1; i >= 0; i--) {
totalChars += JSON.stringify(nonSystem[i].content).length
if (totalChars > targetChars) break
keepFrom = i
}
// Always keep at least the most recent non-system message
if (keepFrom >= nonSystem.length && nonSystem.length > 0) {
keepFrom = nonSystem.length - 1
}
const kept: LanguageModelV3Prompt = [
...systemMessages,
...nonSystem.slice(keepFrom),
]
logger.warn('Emergency prompt truncation', {
original: prompt.length,
kept: kept.length,
dropped: prompt.length - kept.length,
})
return kept
}
export function createContextOverflowMiddleware(
contextWindow: number,
): LanguageModelV3Middleware {
return {
specificationVersion: 'v3',
wrapGenerate: async ({ doGenerate, params }) => {
try {
return await doGenerate()
} catch (error) {
if (!isContextOverflowError(error)) throw error
logger.warn(
'Context overflow detected in doGenerate, truncating and retrying',
)
;(params as LanguageModelV3CallOptions).prompt = truncatePrompt(
params.prompt,
contextWindow,
)
return await doGenerate()
}
},
wrapStream: async ({ doStream, params }) => {
try {
return await doStream()
} catch (error) {
if (!isContextOverflowError(error)) throw error
logger.warn(
'Context overflow detected in doStream, truncating and retrying',
)
;(params as LanguageModelV3CallOptions).prompt = truncatePrompt(
params.prompt,
contextWindow,
)
return await doStream()
}
},
}
}

View File

@@ -1,13 +1,4 @@
import { describe, expect, it } from 'bun:test'
import type {
LanguageModelV3,
LanguageModelV3CallOptions,
LanguageModelV3GenerateResult,
LanguageModelV3Prompt,
LanguageModelV3StreamPart,
LanguageModelV3StreamResult,
LanguageModelV3Usage,
} from '@ai-sdk/provider'
import { AGENT_LIMITS } from '@browseros/shared/constants/limits'
import { LLM_PROVIDERS } from '@browseros/shared/schemas/llm'
import type { ModelMessage, ToolResultPart } from 'ai'
@@ -29,10 +20,6 @@ import {
buildTurnPrefixPrompt,
messagesToTranscript,
} from '../../src/agent/compaction/prompt'
import {
createContextOverflowMiddleware,
isContextOverflowError,
} from '../../src/agent/context-overflow-middleware'
import {
getMessageNormalizationOptions,
normalizeMessagesForModel,
@@ -121,18 +108,6 @@ function assistantMsg(text: string): ModelMessage {
return { role: 'assistant', content: text }
}
function systemPrompt(text: string): LanguageModelV3Prompt[number] {
return { role: 'system', content: text }
}
function userPrompt(text: string): LanguageModelV3Prompt[number] {
return { role: 'user', content: [{ type: 'text', text }] }
}
function assistantPrompt(text: string): LanguageModelV3Prompt[number] {
return { role: 'assistant', content: [{ type: 'text', text }] }
}
function assistantToolCall(
toolName: string,
input: Record<string, unknown>,
@@ -209,92 +184,6 @@ function userMsgWithImage(text: string): ModelMessage {
}
}
function createCallOptions(
prompt: LanguageModelV3Prompt,
): LanguageModelV3CallOptions {
return { prompt }
}
function createUsage(): LanguageModelV3Usage {
return {
inputTokens: {
total: 0,
noCache: 0,
cacheRead: undefined,
cacheWrite: undefined,
},
outputTokens: {
total: 0,
text: 0,
reasoning: undefined,
},
}
}
function createTextResult(text: string): LanguageModelV3GenerateResult {
return {
content: [{ type: 'text', text }],
finishReason: { unified: 'stop', raw: 'stop' },
usage: createUsage(),
warnings: [],
}
}
function createStreamResult(): LanguageModelV3StreamResult {
return {
stream: new ReadableStream<LanguageModelV3StreamPart>(),
}
}
function isSystemPrompt(
message: LanguageModelV3Prompt[number],
): message is Extract<LanguageModelV3Prompt[number], { role: 'system' }> {
return message.role === 'system'
}
const mockLanguageModel: LanguageModelV3 = {
specificationVersion: 'v3',
provider: 'test-provider',
modelId: 'test-model',
supportedUrls: {},
doGenerate: async () => createTextResult('unused'),
doStream: async () => createStreamResult(),
}
async function runWrappedGenerate(
middleware: ReturnType<typeof createContextOverflowMiddleware>,
params: LanguageModelV3CallOptions,
doGenerate: () => Promise<LanguageModelV3GenerateResult>,
): Promise<LanguageModelV3GenerateResult> {
const wrapGenerate = middleware.wrapGenerate
if (!wrapGenerate) {
throw new Error('wrapGenerate is unavailable')
}
return await wrapGenerate({
doGenerate,
doStream: async () => createStreamResult(),
model: mockLanguageModel,
params,
})
}
async function runWrappedStream(
middleware: ReturnType<typeof createContextOverflowMiddleware>,
params: LanguageModelV3CallOptions,
doStream: () => Promise<LanguageModelV3StreamResult>,
): Promise<LanguageModelV3StreamResult> {
const wrapStream = middleware.wrapStream
if (!wrapStream) {
throw new Error('wrapStream is unavailable')
}
return await wrapStream({
doGenerate: async () => createTextResult('unused'),
doStream,
model: mockLanguageModel,
params,
})
}
function repeat(char: string, count: number): string {
return char.repeat(count)
}
@@ -1372,184 +1261,3 @@ describe('getCurrentTokenCount — Pi-style additive', () => {
expect(result).toBe(50_000)
})
})
// ---------------------------------------------------------------------------
// Context overflow middleware
// ---------------------------------------------------------------------------
describe('createContextOverflowMiddleware', () => {
it('passes through when model succeeds', async () => {
const middleware = createContextOverflowMiddleware(200_000)
const mockResult = createTextResult('hello')
const params = createCallOptions([
systemPrompt('You are helpful'),
userPrompt('hi'),
])
const result = await runWrappedGenerate(
middleware,
params,
async () => mockResult,
)
expect(result).toBe(mockResult)
})
it('rethrows non-context errors', async () => {
const middleware = createContextOverflowMiddleware(200_000)
const params = createCallOptions([userPrompt('hi')])
await expect(
runWrappedGenerate(middleware, params, async () => {
throw new Error('network timeout')
}),
).rejects.toThrow('network timeout')
})
it('truncates and retries on context_length error', async () => {
const middleware = createContextOverflowMiddleware(200_000)
let callCount = 0
const mockResult = createTextResult('success after truncation')
const params = createCallOptions([
systemPrompt('system prompt'),
userPrompt('old message 1'),
assistantPrompt('old response 1'),
userPrompt('old message 2'),
assistantPrompt('old response 2'),
userPrompt('recent message'),
])
const result = await runWrappedGenerate(middleware, params, async () => {
callCount++
if (callCount === 1) {
throw new Error('context_length_exceeded')
}
return mockResult
})
expect(callCount).toBe(2)
expect(result).toBe(mockResult)
// System message should be preserved
expect(params.prompt.some((message) => message.role === 'system')).toBe(
true,
)
// Prompt should be shorter after truncation
expect(params.prompt.length).toBeLessThanOrEqual(6)
})
it('preserves system messages during truncation', async () => {
const middleware = createContextOverflowMiddleware(10_000)
const mockResult = createTextResult('ok')
let truncatedPrompt: LanguageModelV3Prompt = []
const params = createCallOptions([
systemPrompt('important system prompt'),
userPrompt('a'.repeat(50_000)),
assistantPrompt('b'.repeat(50_000)),
userPrompt('recent'),
])
await runWrappedGenerate(middleware, params, async () => {
if (truncatedPrompt.length === 0) {
truncatedPrompt = [...params.prompt]
throw new Error('maximum context length exceeded')
}
truncatedPrompt = [...params.prompt]
return mockResult
})
const systemMsgs = truncatedPrompt.filter(isSystemPrompt)
expect(systemMsgs.length).toBe(1)
expect(systemMsgs[0].content).toBe('important system prompt')
})
it('handles wrapStream the same way', async () => {
const middleware = createContextOverflowMiddleware(200_000)
let callCount = 0
const mockResult = createStreamResult()
const params = createCallOptions([
systemPrompt('system'),
userPrompt('message'),
])
const result = await runWrappedStream(middleware, params, async () => {
callCount++
if (callCount === 1) {
throw new Error('token limit exceeded')
}
return mockResult
})
expect(callCount).toBe(2)
expect(result).toBe(mockResult)
})
it('detects provider-specific context overflow errors', async () => {
const middleware = createContextOverflowMiddleware(200_000)
const errorMessages = [
'context_length_exceeded', // Generic
'prompt is too long: 213462 tokens > 200000 maximum', // Anthropic
'Your input exceeds the context window of this model', // OpenAI
'The input token count (1196265) exceeds the maximum number of tokens allowed', // Google
"This model's maximum prompt length is 131072 but the request contains 537812 tokens", // xAI
'Please reduce the length of the messages or completion', // Groq
'maximum context length is 128000 tokens', // OpenRouter
'token limit exceeded', // Generic
'too many tokens', // Generic
'exceeded model token limit', // Kimi
'input is too long for requested model', // Amazon Bedrock
]
for (const errMsg of errorMessages) {
let callCount = 0
const mockResult = createTextResult('ok')
const params = createCallOptions([userPrompt('hi')])
await runWrappedGenerate(middleware, params, async () => {
callCount++
if (callCount === 1) throw new Error(errMsg)
return mockResult
})
expect(callCount).toBe(2)
}
})
it('does not false-positive on unrelated errors', () => {
const unrelatedErrors = [
'URL is too long',
'Invalid max_tokens: must be between 1 and 4096',
'session token is too long',
'file name is too long',
'network timeout',
'rate limit exceeded',
]
for (const errMsg of unrelatedErrors) {
expect(isContextOverflowError(new Error(errMsg))).toBe(false)
}
})
it('keeps at least the last non-system message when it exceeds target', async () => {
const middleware = createContextOverflowMiddleware(1_000)
const mockResult = createTextResult('ok')
let truncatedPrompt: LanguageModelV3Prompt = []
const params = createCallOptions([
systemPrompt('system'),
userPrompt('x'.repeat(100_000)),
])
await runWrappedGenerate(middleware, params, async () => {
if (truncatedPrompt.length === 0) {
truncatedPrompt = [...params.prompt]
throw new Error('context_length_exceeded')
}
truncatedPrompt = [...params.prompt]
return mockResult
})
// Must keep system + at least the last user message (not empty)
expect(truncatedPrompt.length).toBe(2)
expect(truncatedPrompt[0].role).toBe('system')
expect(truncatedPrompt[1].role).toBe('user')
})
})