feat: use hidden pages for scheduled tasks (#624)

* feat: use hidden pages for scheduled tasks

* refactor: rework 0331-use_hidden_pages_for_scheduled_tasks based on feedback
This commit is contained in:
Nikhil
2026-03-31 16:02:47 -07:00
committed by GitHub
parent 565ce18eba
commit 2bb432b0f2
10 changed files with 385 additions and 54 deletions

View File

@@ -209,7 +209,7 @@ export class AiSdkAgent {
userSystemPrompt: config.resolvedConfig.userSystemPrompt,
exclude: excludeSections,
isScheduledTask: config.resolvedConfig.isScheduledTask,
scheduledTaskWindowId: config.browserContext?.windowId,
scheduledTaskPageId: config.browserContext?.activeTab?.pageId,
workspaceDir: config.resolvedConfig.workingDir,
soulContent,
isSoulBootstrap: isBootstrap,

View File

@@ -49,7 +49,7 @@ You do not have a filesystem workspace in this session. Return all results direc
// Mode-aware framing
if (options?.isScheduledTask) {
role +=
'\n\nYou are running as a scheduled background task in a dedicated hidden browser window. Complete the task autonomously and report results.'
'\n\nYou are running as a scheduled background task on a system-managed hidden page. Complete the task autonomously and report results.'
} else if (options?.chatMode) {
role +=
'\n\nYou are in read-only chat mode. You can observe pages but cannot interact with them, modify files, or store memories.'
@@ -238,7 +238,7 @@ When a task requires working on multiple pages simultaneously:
7. **Never force-switch the user's active tab.** If you need user interaction on a background tab (e.g., login, CAPTCHA), tell the user which tab needs attention and let them switch manually.
8. **Never navigate the user's current tab** during a multi-tab task. The current tab is the user's anchor — use it only for reading (snapshots, content extraction). All navigation should happen on background tabs.
**Do NOT use \`create_hidden_window\` or \`new_hidden_page\` for user-requested tasks.** Hidden windows are invisible to the user and cannot be screenshotted. Use \`new_page\` (background mode) instead — tabs appear in the user's tab strip and can be inspected. Reserve hidden windows for automated/scheduled runs only.`
**Do NOT use \`create_hidden_window\` or \`new_hidden_page\` for user-requested tasks.** Hidden pages are invisible to the user and do not appear in the user's tab strip. Use \`new_page\` (background mode) instead — tabs appear in the user's tab strip and can be inspected. Reserve hidden pages for automated/scheduled runs only.`
if (!isNewTab) {
executionContent += `
@@ -661,22 +661,24 @@ function getUserContext(
if (options?.isScheduledTask) {
pageCtx +=
'\nYou are running as a **scheduled background task** in a dedicated hidden browser window.'
'\nYou are running as a **scheduled background task** on a system-managed hidden page.'
}
pageCtx +=
'\n\n**CRITICAL RULES:**\n1. **Do NOT call `get_active_page` or `list_pages` to find your starting page.** Use the **page ID from the Browser Context** directly.'
if (options?.isScheduledTask) {
const windowRef = options.scheduledTaskWindowId
? `\`windowId: ${options.scheduledTaskWindowId}\``
: 'the `windowId` from the Browser Context'
pageCtx += `\n2. **Always pass ${windowRef}** when calling \`new_page\` or \`new_hidden_page\`. Never omit the \`windowId\` parameter.`
const pageRef = options.scheduledTaskPageId
? `\`${options.scheduledTaskPageId}\``
: 'the page ID from the Browser Context'
pageCtx += `\n2. **Use starting page ID ${pageRef} directly.** For additional browsing, prefer \`new_hidden_page\` so the work stays invisible to the user.`
pageCtx +=
'\n3. **Do NOT close your dedicated hidden window** (via `close_window`). It is managed by the system and will be cleaned up automatically.'
'\n3. **Do NOT close your starting hidden page** (via `close_page` on that page ID). It is managed by the system and will be cleaned up automatically.'
pageCtx +=
'\n4. **Do NOT create new windows** (via `create_window` or `create_hidden_window`). Use your existing hidden window for all pages.'
pageCtx += '\n5. Complete the task end-to-end and report results.'
'\n4. **Do NOT create new windows** (via `create_window` or `create_hidden_window`). Use hidden pages instead.'
pageCtx +=
'\n5. **Close extra hidden pages when you are done with them** unless you explicitly reveal them with `show_page`.'
pageCtx += '\n6. Complete the task end-to-end and report results.'
}
pageCtx += '\n</page_context>'
@@ -737,7 +739,7 @@ export interface BuildSystemPromptOptions {
userSystemPrompt?: string
exclude?: string[]
isScheduledTask?: boolean
scheduledTaskWindowId?: number
scheduledTaskPageId?: number
workspaceDir?: string
soulContent?: string
isSoulBootstrap?: boolean

View File

@@ -4,8 +4,8 @@ import type { AiSdkAgent } from './ai-sdk-agent'
export interface AgentSession {
agent: AiSdkAgent
hiddenWindowId?: number
/** Browser context scoped to the hidden window (scheduled tasks only) */
hiddenPageId?: number
/** Browser context scoped to the scheduled hidden page. */
browserContext?: BrowserContext
/** MCP server names used when the session was created, for change detection. */
mcpServerKey?: string

View File

@@ -146,34 +146,51 @@ export class ChatService {
if (!session) {
isNewSession = true
let hiddenWindowId: number | undefined
let hiddenPageId: number | undefined
let browserContext = await this.resolvePageIds(request.browserContext)
if (request.isScheduledTask) {
try {
const win = await this.deps.browser.createWindow({ hidden: true })
hiddenWindowId = win.windowId
const pageId = await this.deps.browser.newPage('about:blank', {
windowId: hiddenWindowId,
hiddenPageId = await this.deps.browser.newPage('about:blank', {
hidden: true,
background: true,
})
let hiddenWindowId: number | undefined
try {
const hiddenPage = (await this.deps.browser.listPages()).find(
(page) => page.pageId === hiddenPageId,
)
hiddenWindowId = hiddenPage?.windowId
} catch (error) {
logger.warn('Failed to look up hidden page metadata', {
conversationId: request.conversationId,
pageId: hiddenPageId,
error: error instanceof Error ? error.message : String(error),
})
}
browserContext = {
...browserContext,
windowId: hiddenWindowId,
selectedTabs: undefined,
tabs: undefined,
activeTab: {
id: pageId,
pageId,
id: hiddenPageId,
pageId: hiddenPageId,
url: 'about:blank',
title: 'Scheduled Task',
},
}
logger.info('Created hidden window for scheduled task', {
logger.info('Created hidden page for scheduled task', {
conversationId: request.conversationId,
pageId: hiddenPageId,
windowId: hiddenWindowId,
pageId,
})
} catch (error) {
logger.warn('Failed to create hidden window, using default', {
error: error instanceof Error ? error.message : String(error),
})
logger.warn(
'Failed to create hidden page, using default browser context',
{
error: error instanceof Error ? error.message : String(error),
},
)
}
}
@@ -188,7 +205,7 @@ export class ChatService {
})
session = {
agent,
hiddenWindowId,
hiddenPageId,
browserContext,
mcpServerKey,
workingDir: request.userWorkingDir,
@@ -245,10 +262,10 @@ export class ChatService {
totalMessages: messages.length,
})
if (session?.hiddenWindowId) {
const windowId = session.hiddenWindowId
session.hiddenWindowId = undefined
this.closeHiddenWindow(windowId, request.conversationId)
if (session?.hiddenPageId) {
const pageId = session.hiddenPageId
session.hiddenPageId = undefined
this.closeHiddenPage(pageId, request.conversationId)
}
},
})
@@ -258,10 +275,10 @@ export class ChatService {
conversationId: string,
): Promise<{ deleted: boolean; sessionCount: number }> {
const session = this.deps.sessionStore.get(conversationId)
if (session?.hiddenWindowId) {
const windowId = session.hiddenWindowId
session.hiddenWindowId = undefined
this.closeHiddenWindow(windowId, conversationId)
if (session?.hiddenPageId) {
const pageId = session.hiddenPageId
session.hiddenPageId = undefined
this.closeHiddenPage(pageId, conversationId)
}
const deleted = await this.deps.sessionStore.delete(conversationId)
return { deleted, sessionCount: this.deps.sessionStore.count() }
@@ -309,10 +326,10 @@ export class ChatService {
}
}
private closeHiddenWindow(windowId: number, conversationId: string): void {
this.deps.browser.closeWindow(windowId).catch((error) => {
logger.warn('Failed to close hidden window', {
windowId,
private closeHiddenPage(pageId: number, conversationId: string): void {
this.deps.browser.closePage(pageId).catch((error) => {
logger.warn('Failed to close hidden page', {
pageId,
conversationId,
error: error instanceof Error ? error.message : String(error),
})
@@ -329,7 +346,10 @@ export class ChatService {
await session.agent.dispose()
this.deps.sessionStore.remove(request.conversationId)
const browserContext = await this.resolvePageIds(request.browserContext)
const browserContext = agentConfig.isScheduledTask
? (session.browserContext ??
(await this.resolvePageIds(request.browserContext)))
: await this.resolvePageIds(request.browserContext)
const agent = await AiSdkAgent.create({
resolvedConfig: agentConfig,
browser: this.deps.browser,
@@ -341,6 +361,7 @@ export class ChatService {
})
const newSession: AgentSession = {
agent,
hiddenPageId: session.hiddenPageId,
browserContext,
mcpServerKey,
workingDir: request.userWorkingDir,

View File

@@ -173,7 +173,7 @@ export const new_page = defineTool({
export const new_hidden_page = defineTool({
name: 'new_hidden_page',
description:
'Open a new hidden page (tab) and navigate to a URL. Hidden pages are not visible to the user and useful for background data fetching or automation. Note: take_screenshot is not supported on hidden tabs — use show_page first to make it visible.',
'Open a new hidden page (tab) and navigate to a URL. Hidden pages are not visible to the user and useful for background data fetching or automation.',
input: z.object({
url: z.string().describe('URL to open'),
windowId: z.number().optional().describe('Window ID to create tab in'),
@@ -206,7 +206,7 @@ export const new_hidden_page = defineTool({
export const show_page = defineTool({
name: 'show_page',
description:
'Restore a hidden page back into a visible browser window. Use after new_hidden_page when you need to make the page visible (e.g. for screenshots). Errors if the page is already visible.',
'Restore a hidden page back into a visible browser window. Use after new_hidden_page when you want the user to inspect or interact with it. Errors if the page is already visible.',
input: z.object({
page: pageParam,
windowId: z

View File

@@ -79,7 +79,7 @@ export const create_window = defineTool({
export const create_hidden_window = defineTool({
name: 'create_hidden_window',
description:
'Create a new hidden browser window. Hidden windows are not visible to the user and useful for background automation. Note: take_screenshot is not supported on hidden windows.',
'Create a new hidden browser window. Hidden windows are not visible to the user and useful for background automation.',
input: z.object({}),
output: z.object({
window: windowInfoSchema,

View File

@@ -86,7 +86,7 @@ function buildScheduled(overrides?: Partial<BuildSystemPromptOptions>): string {
return buildSystemPrompt({
isScheduledTask: true,
workspaceDir: '/tmp/scheduled',
scheduledTaskWindowId: 42,
scheduledTaskPageId: 42,
exclude: ['nudges'],
...overrides,
})
@@ -258,7 +258,7 @@ describe('workspace gating (P11)', () => {
// from subtle cues (missing sections, restricted tools), which is unreliable.
//
// - Regular: no extra framing (default behavior)
// - Scheduled: must know it's autonomous, in a hidden window, no user interaction
// - Scheduled: must know it's autonomous, on a hidden page, no user interaction
// - Chat: must know it's read-only, cannot click/fill/write
//
// If mode framing breaks, scheduled tasks may try to ask the user questions,
@@ -310,20 +310,21 @@ describe('mode-aware framing', () => {
expect(prompt).not.toContain('<page_context>')
})
it('scheduled task includes windowId in page context', () => {
const prompt = buildScheduled({ scheduledTaskWindowId: 99 })
expect(prompt).toContain('windowId: 99')
it('scheduled task includes starting pageId in page context', () => {
const prompt = buildScheduled({ scheduledTaskPageId: 99 })
expect(prompt).toContain('starting page ID `99`')
})
it('scheduled task without windowId uses Browser Context reference', () => {
const prompt = buildScheduled({ scheduledTaskWindowId: undefined })
expect(prompt).toContain('the `windowId` from the Browser Context')
it('scheduled task without pageId uses Browser Context reference', () => {
const prompt = buildScheduled({ scheduledTaskPageId: undefined })
expect(prompt).toContain('the page ID from the Browser Context')
})
it('scheduled task includes hidden window management rules', () => {
it('scheduled task includes hidden page management rules', () => {
const prompt = buildScheduled()
expect(prompt).toContain('Do NOT close your dedicated hidden window')
expect(prompt).toContain('Do NOT close your starting hidden page')
expect(prompt).toContain('Do NOT create new windows')
expect(prompt).toContain('Close extra hidden pages')
})
})
@@ -1060,11 +1061,12 @@ describe('execution section', () => {
it('prohibits hidden windows for user tasks', () => {
// Why: Run 2 used create_hidden_window instead of background tabs.
// Hidden windows are invisible to users and can't be screenshotted.
// Hidden pages are invisible to users, so user-requested work must stay on visible tabs.
const prompt = buildRegular()
expect(prompt).toContain('Do NOT use')
expect(prompt).toContain('create_hidden_window')
expect(prompt).toContain('new_hidden_page')
expect(prompt).not.toContain('cannot be screenshotted')
})
it('includes tab retry discipline', () => {

View File

@@ -0,0 +1,291 @@
import { describe, expect, it, mock } from 'bun:test'
interface MockMessage {
id: string
role: 'user' | 'assistant'
parts: Array<{ type: 'text'; text: string }>
}
interface MockAgent {
toolLoopAgent: object
toolNames: Set<string>
messages: MockMessage[]
appendUserMessage(text: string): void
dispose(): Promise<void>
}
interface StoredSession {
agent: MockAgent
hiddenPageId?: number
}
interface StreamResponseOptions {
onFinish(args: { messages: MockMessage[] }): Promise<void>
}
let agentToReturn: MockAgent | undefined
let streamResponseHandler:
| ((options: StreamResponseOptions) => Promise<Response>)
| undefined
const createAgentSpy = mock(async (config: unknown) => {
if (!agentToReturn) {
throw new Error(`No mock agent configured for ${JSON.stringify(config)}`)
}
return agentToReturn
})
const createAgentUIStreamResponseSpy = mock(
async (options: StreamResponseOptions) => {
if (!streamResponseHandler) {
throw new Error('No stream response handler configured')
}
return await streamResponseHandler(options)
},
)
const resolveLLMConfigSpy = mock(async () => ({
provider: 'openai',
model: 'gpt-5',
apiKey: 'test-key',
}))
mock.module('ai', () => ({
createAgentUIStreamResponse: createAgentUIStreamResponseSpy,
}))
mock.module('../../../src/agent/ai-sdk-agent', () => ({
AiSdkAgent: {
create: createAgentSpy,
},
}))
mock.module('../../../src/lib/clients/llm/config', () => ({
resolveLLMConfig: resolveLLMConfigSpy,
}))
mock.module('../../../src/lib/logger', () => ({
logger: {
info: mock(() => {}),
warn: mock(() => {}),
debug: mock(() => {}),
},
}))
const { ChatService } = await import('../../../src/api/services/chat-service')
function createSessionStore() {
const sessions = new Map<string, StoredSession>()
return {
get(conversationId: string) {
return sessions.get(conversationId)
},
set(conversationId: string, session: StoredSession) {
sessions.set(conversationId, session)
},
remove(conversationId: string) {
return sessions.delete(conversationId)
},
async delete(conversationId: string) {
const session = sessions.get(conversationId)
if (!session) return false
await session.agent.dispose()
sessions.delete(conversationId)
return true
},
count() {
return sessions.size
},
}
}
function createFakeAgent() {
const messages: MockMessage[] = []
return {
toolLoopAgent: {},
toolNames: new Set<string>(),
messages,
appendUserMessage(text: string) {
messages.push({
id: 'user-1',
role: 'user',
parts: [{ type: 'text', text }],
})
},
dispose: mock(async () => {}),
}
}
describe('ChatService scheduled task hidden page lifecycle', () => {
it('creates and cleans up a hidden page without creating a hidden window', async () => {
const fakeAgent = createFakeAgent()
agentToReturn = fakeAgent
streamResponseHandler = async ({ onFinish }) => {
await onFinish({ messages: fakeAgent.messages })
return new Response('ok')
}
const browser = {
newPage: mock(async () => 77),
listPages: mock(async () => [
{
pageId: 77,
windowId: 11,
},
]),
closePage: mock(async () => {}),
createWindow: mock(async () => ({ windowId: 11 })),
closeWindow: mock(async () => {}),
resolveTabIds: mock(async () => new Map<number, number>()),
}
const sessionStore = createSessionStore()
const service = new ChatService({
sessionStore: sessionStore as never,
klavisClient: {} as never,
browser: browser as never,
registry: {} as never,
})
await service.processMessage(
{
conversationId: crypto.randomUUID(),
message: 'Run the scheduled task',
isScheduledTask: true,
mode: 'agent',
origin: 'sidepanel',
browserContext: {
windowId: 9,
activeTab: {
id: 3,
url: 'https://example.com',
title: 'Example',
},
selectedTabs: [{ id: 4 }],
enabledMcpServers: ['slack'],
},
} as never,
new AbortController().signal,
)
expect(browser.newPage).toHaveBeenCalledWith('about:blank', {
hidden: true,
background: true,
})
expect(browser.createWindow).not.toHaveBeenCalled()
expect(browser.closePage).toHaveBeenCalledWith(77)
expect(browser.closeWindow).not.toHaveBeenCalled()
const createArgs = createAgentSpy.mock.calls.at(-1)?.[0] as {
browserContext?: {
windowId?: number
selectedTabs?: unknown[]
activeTab?: {
id: number
pageId: number
url: string
title: string
}
enabledMcpServers?: string[]
}
}
expect(createArgs.browserContext?.windowId).toBe(11)
expect(createArgs.browserContext?.selectedTabs).toBeUndefined()
expect(createArgs.browserContext?.activeTab).toEqual({
id: 77,
pageId: 77,
url: 'about:blank',
title: 'Scheduled Task',
})
expect(createArgs.browserContext?.enabledMcpServers).toEqual(['slack'])
})
it('deleteSession closes the tracked hidden page', async () => {
const fakeAgent = createFakeAgent()
const sessionStore = createSessionStore()
const browser = {
closePage: mock(async () => {}),
}
const conversationId = crypto.randomUUID()
sessionStore.set(conversationId, {
agent: fakeAgent,
hiddenPageId: 33,
})
const service = new ChatService({
sessionStore: sessionStore as never,
klavisClient: {} as never,
browser: browser as never,
registry: {} as never,
})
const result = await service.deleteSession(conversationId)
expect(result).toEqual({ deleted: true, sessionCount: 0 })
expect(browser.closePage).toHaveBeenCalledWith(33)
expect(fakeAgent.dispose).toHaveBeenCalledTimes(1)
})
it('keeps the scheduled hidden page context when metadata lookup fails', async () => {
const fakeAgent = createFakeAgent()
agentToReturn = fakeAgent
streamResponseHandler = async ({ onFinish }) => {
await onFinish({ messages: fakeAgent.messages })
return new Response('ok')
}
const browser = {
newPage: mock(async () => 88),
listPages: mock(async () => {
throw new Error('CDP lookup failed')
}),
closePage: mock(async () => {}),
resolveTabIds: mock(async () => new Map<number, number>()),
}
const sessionStore = createSessionStore()
const service = new ChatService({
sessionStore: sessionStore as never,
klavisClient: {} as never,
browser: browser as never,
registry: {} as never,
})
await service.processMessage(
{
conversationId: crypto.randomUUID(),
message: 'Run the scheduled task',
isScheduledTask: true,
mode: 'agent',
origin: 'sidepanel',
browserContext: {
activeTab: {
id: 3,
url: 'https://example.com',
title: 'Example',
},
},
} as never,
new AbortController().signal,
)
const createArgs = createAgentSpy.mock.calls.at(-1)?.[0] as {
browserContext?: {
windowId?: number
activeTab?: {
id: number
pageId: number
url: string
title: string
}
}
}
expect(createArgs.browserContext?.windowId).toBeUndefined()
expect(createArgs.browserContext?.activeTab).toEqual({
id: 88,
pageId: 88,
url: 'about:blank',
title: 'Scheduled Task',
})
expect(browser.closePage).toHaveBeenCalledWith(88)
})
})

View File

@@ -29,6 +29,13 @@ function structuredOf<T>(result: { structuredContent?: unknown }): T {
}
describe('navigation tools', () => {
it('hidden-page tool descriptions do not claim screenshots are unsupported', () => {
assert.ok(
!new_hidden_page.description.includes('take_screenshot is not supported'),
)
assert.ok(!show_page.description.includes('for screenshots'))
})
it('list_pages returns at least one page', async () => {
await withBrowser(async ({ execute }) => {
const result = await execute(list_pages, {})

View File

@@ -24,6 +24,14 @@ function structuredOf<T>(result: { structuredContent?: unknown }): T {
}
describe('window tools', () => {
it('create_hidden_window description does not claim screenshots are unsupported', () => {
assert.ok(
!create_hidden_window.description.includes(
'take_screenshot is not supported',
),
)
})
it('list_windows returns at least one window', async () => {
await withBrowser(async ({ execute }) => {
const result = await execute(list_windows, {})