mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-14 08:03:58 +00:00
Compare commits
2 Commits
fix/setup
...
fix/middle
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
123adf0457 | ||
|
|
9fc15637b5 |
@@ -23,5 +23,8 @@ SENTRY_DSN=
|
||||
NODE_ENV=development
|
||||
LOG_LEVEL=info
|
||||
|
||||
# Debug — captures every LLM call to .devtools/generations.json (view with `npx @ai-sdk/devtools`)
|
||||
# BROWSEROS_AI_SDK_DEVTOOLS=true
|
||||
|
||||
# Testing
|
||||
BROWSEROS_TEST_HEADLESS=false
|
||||
|
||||
@@ -32,7 +32,6 @@ import { buildMemoryToolSet } from '../tools/memory/build-toolset'
|
||||
import type { ToolRegistry } from '../tools/tool-registry'
|
||||
import { CHAT_MODE_ALLOWED_TOOLS } from './chat-mode'
|
||||
import { createCompactionPrepareStep, type StepWithUsage } from './compaction'
|
||||
import { createContextOverflowMiddleware } from './context-overflow-middleware'
|
||||
import { buildMcpServerSpecs, createMcpClients } from './mcp-builder'
|
||||
import {
|
||||
getMessageNormalizationOptions,
|
||||
@@ -74,7 +73,6 @@ export class AiSdkAgent {
|
||||
config.resolvedConfig.contextWindowSize ??
|
||||
AGENT_LIMITS.DEFAULT_CONTEXT_WINDOW
|
||||
|
||||
// Build language model with middleware stack
|
||||
const rawModel = createLanguageModel(config.resolvedConfig)
|
||||
const isV3Model =
|
||||
typeof rawModel === 'object' &&
|
||||
@@ -83,25 +81,16 @@ export class AiSdkAgent {
|
||||
rawModel.specificationVersion === 'v3'
|
||||
|
||||
let model = rawModel
|
||||
if (isV3Model) {
|
||||
// Always apply context overflow protection
|
||||
if (isV3Model && config.aiSdkDevtoolsEnabled) {
|
||||
model = wrapLanguageModel({
|
||||
model: rawModel as LanguageModelV3,
|
||||
middleware: createContextOverflowMiddleware(contextWindow),
|
||||
middleware: devToolsMiddleware() as LanguageModelV3Middleware,
|
||||
})
|
||||
logger.info('AI SDK DevTools middleware enabled', {
|
||||
conversationId: config.resolvedConfig.conversationId,
|
||||
provider: config.resolvedConfig.provider,
|
||||
model: config.resolvedConfig.model,
|
||||
})
|
||||
|
||||
// Optionally add AI SDK DevTools tracing (dev-only)
|
||||
if (config.aiSdkDevtoolsEnabled) {
|
||||
model = wrapLanguageModel({
|
||||
model: model as LanguageModelV3,
|
||||
middleware: devToolsMiddleware() as LanguageModelV3Middleware,
|
||||
})
|
||||
logger.info('AI SDK DevTools middleware enabled', {
|
||||
conversationId: config.resolvedConfig.conversationId,
|
||||
provider: config.resolvedConfig.provider,
|
||||
model: config.resolvedConfig.model,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Build browser tools from the unified tool registry
|
||||
|
||||
@@ -1,116 +0,0 @@
|
||||
import type {
|
||||
LanguageModelV3CallOptions,
|
||||
LanguageModelV3Message,
|
||||
LanguageModelV3Middleware,
|
||||
LanguageModelV3Prompt,
|
||||
} from '@ai-sdk/provider'
|
||||
import { logger } from '../lib/logger'
|
||||
|
||||
/**
|
||||
* Provider-specific regex patterns for context overflow errors.
|
||||
* Adapted from Pi coding agent's overflow detection.
|
||||
*
|
||||
* @see https://github.com/badlogic/pi-mono/blob/main/packages/ai/src/utils/overflow.ts
|
||||
*/
|
||||
const OVERFLOW_PATTERNS: RegExp[] = [
|
||||
/prompt is too long/i, // Anthropic
|
||||
/input is too long for requested model/i, // Amazon Bedrock
|
||||
/exceeds the context window/i, // OpenAI (Completions & Responses API)
|
||||
/input token count.*exceeds the maximum/i, // Google (Gemini)
|
||||
/maximum prompt length is \d+/i, // xAI (Grok)
|
||||
/reduce the length of the messages/i, // Groq
|
||||
/maximum context length is \d+ tokens/i, // OpenRouter (all backends)
|
||||
/exceeds the limit of \d+/i, // GitHub Copilot
|
||||
/exceeds the available context size/i, // llama.cpp server
|
||||
/greater than the context length/i, // LM Studio
|
||||
/context window exceeds limit/i, // MiniMax
|
||||
/exceeded model token limit/i, // Kimi For Coding
|
||||
/too large for model with \d+ maximum context length/i, // Mistral
|
||||
/model_context_window_exceeded/i, // z.ai non-standard finish_reason
|
||||
/context[_ ]length[_ ]exceeded/i, // Generic fallback
|
||||
/too many tokens/i, // Generic fallback
|
||||
/token limit exceeded/i, // Generic fallback
|
||||
]
|
||||
|
||||
export function isContextOverflowError(error: unknown): boolean {
|
||||
if (!(error instanceof Error)) return false
|
||||
const msg = error.message
|
||||
return OVERFLOW_PATTERNS.some((p) => p.test(msg))
|
||||
}
|
||||
|
||||
function truncatePrompt(
|
||||
prompt: LanguageModelV3Prompt,
|
||||
contextWindow: number,
|
||||
): LanguageModelV3Prompt {
|
||||
const systemMessages: LanguageModelV3Message[] = []
|
||||
const nonSystem: LanguageModelV3Message[] = []
|
||||
for (const m of prompt) {
|
||||
if (m.role === 'system') systemMessages.push(m)
|
||||
else nonSystem.push(m)
|
||||
}
|
||||
|
||||
// Target 60% of context window to leave headroom
|
||||
const targetChars = contextWindow * 4 * 0.6
|
||||
let totalChars = 0
|
||||
let keepFrom = nonSystem.length
|
||||
|
||||
for (let i = nonSystem.length - 1; i >= 0; i--) {
|
||||
totalChars += JSON.stringify(nonSystem[i].content).length
|
||||
if (totalChars > targetChars) break
|
||||
keepFrom = i
|
||||
}
|
||||
|
||||
// Always keep at least the most recent non-system message
|
||||
if (keepFrom >= nonSystem.length && nonSystem.length > 0) {
|
||||
keepFrom = nonSystem.length - 1
|
||||
}
|
||||
|
||||
const kept: LanguageModelV3Prompt = [
|
||||
...systemMessages,
|
||||
...nonSystem.slice(keepFrom),
|
||||
]
|
||||
logger.warn('Emergency prompt truncation', {
|
||||
original: prompt.length,
|
||||
kept: kept.length,
|
||||
dropped: prompt.length - kept.length,
|
||||
})
|
||||
return kept
|
||||
}
|
||||
|
||||
export function createContextOverflowMiddleware(
|
||||
contextWindow: number,
|
||||
): LanguageModelV3Middleware {
|
||||
return {
|
||||
specificationVersion: 'v3',
|
||||
wrapGenerate: async ({ doGenerate, params }) => {
|
||||
try {
|
||||
return await doGenerate()
|
||||
} catch (error) {
|
||||
if (!isContextOverflowError(error)) throw error
|
||||
logger.warn(
|
||||
'Context overflow detected in doGenerate, truncating and retrying',
|
||||
)
|
||||
;(params as LanguageModelV3CallOptions).prompt = truncatePrompt(
|
||||
params.prompt,
|
||||
contextWindow,
|
||||
)
|
||||
return await doGenerate()
|
||||
}
|
||||
},
|
||||
wrapStream: async ({ doStream, params }) => {
|
||||
try {
|
||||
return await doStream()
|
||||
} catch (error) {
|
||||
if (!isContextOverflowError(error)) throw error
|
||||
logger.warn(
|
||||
'Context overflow detected in doStream, truncating and retrying',
|
||||
)
|
||||
;(params as LanguageModelV3CallOptions).prompt = truncatePrompt(
|
||||
params.prompt,
|
||||
contextWindow,
|
||||
)
|
||||
return await doStream()
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -1,13 +1,4 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import type {
|
||||
LanguageModelV3,
|
||||
LanguageModelV3CallOptions,
|
||||
LanguageModelV3GenerateResult,
|
||||
LanguageModelV3Prompt,
|
||||
LanguageModelV3StreamPart,
|
||||
LanguageModelV3StreamResult,
|
||||
LanguageModelV3Usage,
|
||||
} from '@ai-sdk/provider'
|
||||
import { AGENT_LIMITS } from '@browseros/shared/constants/limits'
|
||||
import { LLM_PROVIDERS } from '@browseros/shared/schemas/llm'
|
||||
import type { ModelMessage, ToolResultPart } from 'ai'
|
||||
@@ -29,10 +20,6 @@ import {
|
||||
buildTurnPrefixPrompt,
|
||||
messagesToTranscript,
|
||||
} from '../../src/agent/compaction/prompt'
|
||||
import {
|
||||
createContextOverflowMiddleware,
|
||||
isContextOverflowError,
|
||||
} from '../../src/agent/context-overflow-middleware'
|
||||
import {
|
||||
getMessageNormalizationOptions,
|
||||
normalizeMessagesForModel,
|
||||
@@ -121,18 +108,6 @@ function assistantMsg(text: string): ModelMessage {
|
||||
return { role: 'assistant', content: text }
|
||||
}
|
||||
|
||||
function systemPrompt(text: string): LanguageModelV3Prompt[number] {
|
||||
return { role: 'system', content: text }
|
||||
}
|
||||
|
||||
function userPrompt(text: string): LanguageModelV3Prompt[number] {
|
||||
return { role: 'user', content: [{ type: 'text', text }] }
|
||||
}
|
||||
|
||||
function assistantPrompt(text: string): LanguageModelV3Prompt[number] {
|
||||
return { role: 'assistant', content: [{ type: 'text', text }] }
|
||||
}
|
||||
|
||||
function assistantToolCall(
|
||||
toolName: string,
|
||||
input: Record<string, unknown>,
|
||||
@@ -209,92 +184,6 @@ function userMsgWithImage(text: string): ModelMessage {
|
||||
}
|
||||
}
|
||||
|
||||
function createCallOptions(
|
||||
prompt: LanguageModelV3Prompt,
|
||||
): LanguageModelV3CallOptions {
|
||||
return { prompt }
|
||||
}
|
||||
|
||||
function createUsage(): LanguageModelV3Usage {
|
||||
return {
|
||||
inputTokens: {
|
||||
total: 0,
|
||||
noCache: 0,
|
||||
cacheRead: undefined,
|
||||
cacheWrite: undefined,
|
||||
},
|
||||
outputTokens: {
|
||||
total: 0,
|
||||
text: 0,
|
||||
reasoning: undefined,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
function createTextResult(text: string): LanguageModelV3GenerateResult {
|
||||
return {
|
||||
content: [{ type: 'text', text }],
|
||||
finishReason: { unified: 'stop', raw: 'stop' },
|
||||
usage: createUsage(),
|
||||
warnings: [],
|
||||
}
|
||||
}
|
||||
|
||||
function createStreamResult(): LanguageModelV3StreamResult {
|
||||
return {
|
||||
stream: new ReadableStream<LanguageModelV3StreamPart>(),
|
||||
}
|
||||
}
|
||||
|
||||
function isSystemPrompt(
|
||||
message: LanguageModelV3Prompt[number],
|
||||
): message is Extract<LanguageModelV3Prompt[number], { role: 'system' }> {
|
||||
return message.role === 'system'
|
||||
}
|
||||
|
||||
const mockLanguageModel: LanguageModelV3 = {
|
||||
specificationVersion: 'v3',
|
||||
provider: 'test-provider',
|
||||
modelId: 'test-model',
|
||||
supportedUrls: {},
|
||||
doGenerate: async () => createTextResult('unused'),
|
||||
doStream: async () => createStreamResult(),
|
||||
}
|
||||
|
||||
async function runWrappedGenerate(
|
||||
middleware: ReturnType<typeof createContextOverflowMiddleware>,
|
||||
params: LanguageModelV3CallOptions,
|
||||
doGenerate: () => Promise<LanguageModelV3GenerateResult>,
|
||||
): Promise<LanguageModelV3GenerateResult> {
|
||||
const wrapGenerate = middleware.wrapGenerate
|
||||
if (!wrapGenerate) {
|
||||
throw new Error('wrapGenerate is unavailable')
|
||||
}
|
||||
return await wrapGenerate({
|
||||
doGenerate,
|
||||
doStream: async () => createStreamResult(),
|
||||
model: mockLanguageModel,
|
||||
params,
|
||||
})
|
||||
}
|
||||
|
||||
async function runWrappedStream(
|
||||
middleware: ReturnType<typeof createContextOverflowMiddleware>,
|
||||
params: LanguageModelV3CallOptions,
|
||||
doStream: () => Promise<LanguageModelV3StreamResult>,
|
||||
): Promise<LanguageModelV3StreamResult> {
|
||||
const wrapStream = middleware.wrapStream
|
||||
if (!wrapStream) {
|
||||
throw new Error('wrapStream is unavailable')
|
||||
}
|
||||
return await wrapStream({
|
||||
doGenerate: async () => createTextResult('unused'),
|
||||
doStream,
|
||||
model: mockLanguageModel,
|
||||
params,
|
||||
})
|
||||
}
|
||||
|
||||
function repeat(char: string, count: number): string {
|
||||
return char.repeat(count)
|
||||
}
|
||||
@@ -1372,184 +1261,3 @@ describe('getCurrentTokenCount — Pi-style additive', () => {
|
||||
expect(result).toBe(50_000)
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Context overflow middleware
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('createContextOverflowMiddleware', () => {
|
||||
it('passes through when model succeeds', async () => {
|
||||
const middleware = createContextOverflowMiddleware(200_000)
|
||||
const mockResult = createTextResult('hello')
|
||||
const params = createCallOptions([
|
||||
systemPrompt('You are helpful'),
|
||||
userPrompt('hi'),
|
||||
])
|
||||
|
||||
const result = await runWrappedGenerate(
|
||||
middleware,
|
||||
params,
|
||||
async () => mockResult,
|
||||
)
|
||||
|
||||
expect(result).toBe(mockResult)
|
||||
})
|
||||
|
||||
it('rethrows non-context errors', async () => {
|
||||
const middleware = createContextOverflowMiddleware(200_000)
|
||||
const params = createCallOptions([userPrompt('hi')])
|
||||
|
||||
await expect(
|
||||
runWrappedGenerate(middleware, params, async () => {
|
||||
throw new Error('network timeout')
|
||||
}),
|
||||
).rejects.toThrow('network timeout')
|
||||
})
|
||||
|
||||
it('truncates and retries on context_length error', async () => {
|
||||
const middleware = createContextOverflowMiddleware(200_000)
|
||||
let callCount = 0
|
||||
const mockResult = createTextResult('success after truncation')
|
||||
const params = createCallOptions([
|
||||
systemPrompt('system prompt'),
|
||||
userPrompt('old message 1'),
|
||||
assistantPrompt('old response 1'),
|
||||
userPrompt('old message 2'),
|
||||
assistantPrompt('old response 2'),
|
||||
userPrompt('recent message'),
|
||||
])
|
||||
|
||||
const result = await runWrappedGenerate(middleware, params, async () => {
|
||||
callCount++
|
||||
if (callCount === 1) {
|
||||
throw new Error('context_length_exceeded')
|
||||
}
|
||||
return mockResult
|
||||
})
|
||||
|
||||
expect(callCount).toBe(2)
|
||||
expect(result).toBe(mockResult)
|
||||
// System message should be preserved
|
||||
expect(params.prompt.some((message) => message.role === 'system')).toBe(
|
||||
true,
|
||||
)
|
||||
// Prompt should be shorter after truncation
|
||||
expect(params.prompt.length).toBeLessThanOrEqual(6)
|
||||
})
|
||||
|
||||
it('preserves system messages during truncation', async () => {
|
||||
const middleware = createContextOverflowMiddleware(10_000)
|
||||
const mockResult = createTextResult('ok')
|
||||
let truncatedPrompt: LanguageModelV3Prompt = []
|
||||
const params = createCallOptions([
|
||||
systemPrompt('important system prompt'),
|
||||
userPrompt('a'.repeat(50_000)),
|
||||
assistantPrompt('b'.repeat(50_000)),
|
||||
userPrompt('recent'),
|
||||
])
|
||||
|
||||
await runWrappedGenerate(middleware, params, async () => {
|
||||
if (truncatedPrompt.length === 0) {
|
||||
truncatedPrompt = [...params.prompt]
|
||||
throw new Error('maximum context length exceeded')
|
||||
}
|
||||
truncatedPrompt = [...params.prompt]
|
||||
return mockResult
|
||||
})
|
||||
|
||||
const systemMsgs = truncatedPrompt.filter(isSystemPrompt)
|
||||
expect(systemMsgs.length).toBe(1)
|
||||
expect(systemMsgs[0].content).toBe('important system prompt')
|
||||
})
|
||||
|
||||
it('handles wrapStream the same way', async () => {
|
||||
const middleware = createContextOverflowMiddleware(200_000)
|
||||
let callCount = 0
|
||||
const mockResult = createStreamResult()
|
||||
const params = createCallOptions([
|
||||
systemPrompt('system'),
|
||||
userPrompt('message'),
|
||||
])
|
||||
|
||||
const result = await runWrappedStream(middleware, params, async () => {
|
||||
callCount++
|
||||
if (callCount === 1) {
|
||||
throw new Error('token limit exceeded')
|
||||
}
|
||||
return mockResult
|
||||
})
|
||||
|
||||
expect(callCount).toBe(2)
|
||||
expect(result).toBe(mockResult)
|
||||
})
|
||||
|
||||
it('detects provider-specific context overflow errors', async () => {
|
||||
const middleware = createContextOverflowMiddleware(200_000)
|
||||
const errorMessages = [
|
||||
'context_length_exceeded', // Generic
|
||||
'prompt is too long: 213462 tokens > 200000 maximum', // Anthropic
|
||||
'Your input exceeds the context window of this model', // OpenAI
|
||||
'The input token count (1196265) exceeds the maximum number of tokens allowed', // Google
|
||||
"This model's maximum prompt length is 131072 but the request contains 537812 tokens", // xAI
|
||||
'Please reduce the length of the messages or completion', // Groq
|
||||
'maximum context length is 128000 tokens', // OpenRouter
|
||||
'token limit exceeded', // Generic
|
||||
'too many tokens', // Generic
|
||||
'exceeded model token limit', // Kimi
|
||||
'input is too long for requested model', // Amazon Bedrock
|
||||
]
|
||||
|
||||
for (const errMsg of errorMessages) {
|
||||
let callCount = 0
|
||||
const mockResult = createTextResult('ok')
|
||||
const params = createCallOptions([userPrompt('hi')])
|
||||
|
||||
await runWrappedGenerate(middleware, params, async () => {
|
||||
callCount++
|
||||
if (callCount === 1) throw new Error(errMsg)
|
||||
return mockResult
|
||||
})
|
||||
|
||||
expect(callCount).toBe(2)
|
||||
}
|
||||
})
|
||||
|
||||
it('does not false-positive on unrelated errors', () => {
|
||||
const unrelatedErrors = [
|
||||
'URL is too long',
|
||||
'Invalid max_tokens: must be between 1 and 4096',
|
||||
'session token is too long',
|
||||
'file name is too long',
|
||||
'network timeout',
|
||||
'rate limit exceeded',
|
||||
]
|
||||
|
||||
for (const errMsg of unrelatedErrors) {
|
||||
expect(isContextOverflowError(new Error(errMsg))).toBe(false)
|
||||
}
|
||||
})
|
||||
|
||||
it('keeps at least the last non-system message when it exceeds target', async () => {
|
||||
const middleware = createContextOverflowMiddleware(1_000)
|
||||
const mockResult = createTextResult('ok')
|
||||
let truncatedPrompt: LanguageModelV3Prompt = []
|
||||
const params = createCallOptions([
|
||||
systemPrompt('system'),
|
||||
userPrompt('x'.repeat(100_000)),
|
||||
])
|
||||
|
||||
await runWrappedGenerate(middleware, params, async () => {
|
||||
if (truncatedPrompt.length === 0) {
|
||||
truncatedPrompt = [...params.prompt]
|
||||
throw new Error('context_length_exceeded')
|
||||
}
|
||||
truncatedPrompt = [...params.prompt]
|
||||
return mockResult
|
||||
})
|
||||
|
||||
// Must keep system + at least the last user message (not empty)
|
||||
expect(truncatedPrompt.length).toBe(2)
|
||||
expect(truncatedPrompt[0].role).toBe('system')
|
||||
expect(truncatedPrompt[1].role).toBe('user')
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user