feat: improved multi tab agent workflow (#507)

* feat: updated multitab workflow

* fix: updated prompt with fix for test cases

* fix: active agent glow

* fix: review comments
This commit is contained in:
Dani Akash
2026-03-20 18:31:36 +05:30
committed by GitHub
parent 11d15d079f
commit 2b4fdf1aad
6 changed files with 155 additions and 36 deletions

View File

@@ -19,6 +19,10 @@ function extractTabId(toolPart: ToolUIPart | null): number | undefined {
return input?.tabId
}
function sendGlow(tabId: number, message: GlowMessage): void {
chrome.tabs.sendMessage(tabId, message).catch(() => {})
}
export const useNotifyActiveTab = ({
messages,
status,
@@ -28,7 +32,10 @@ export const useNotifyActiveTab = ({
status: ChatStatus
conversationId: string
}) => {
const lastTabIdRef = useRef<number | null>(null)
// Track the single tab currently glowing
const activeTabIdRef = useRef<number | null>(null)
// Track all tabs that have been glowed during this stream (for cleanup)
const allGlowedTabsRef = useRef<Set<number>>(new Set())
const lastMessage = messages?.[messages.length - 1]
@@ -41,27 +48,35 @@ export const useNotifyActiveTab = ({
useEffect(() => {
const isStreaming = status === 'streaming'
const previousTabId = lastTabIdRef.current
if (!isStreaming) {
if (previousTabId) {
// Deactivate ALL tabs that were glowed during this stream
const allGlowed = allGlowedTabsRef.current
if (allGlowed.size > 0) {
const deactivate = async () => {
// Capture tab IDs before any async work to avoid race with clear()
const tabIds = Array.from(allGlowed)
allGlowed.clear()
const alreadyShown = await firstRunConfettiShownStorage.getValue()
const deactivateMessage: GlowMessage = {
conversationId,
isActive: false,
showConfetti: !alreadyShown,
let showConfetti = !alreadyShown
for (const tabId of tabIds) {
sendGlow(tabId, {
conversationId,
isActive: false,
showConfetti,
})
showConfetti = false
}
chrome.tabs
.sendMessage(previousTabId, deactivateMessage)
.catch(() => {})
if (!alreadyShown) {
await firstRunConfettiShownStorage.setValue(true)
}
}
deactivate()
lastTabIdRef.current = null
}
activeTabIdRef.current = null
return
}
@@ -70,34 +85,41 @@ export const useNotifyActiveTab = ({
let cancelled = false
const activate = async () => {
let targetTabId = toolTabId ?? previousTabId ?? undefined
let targetTabId = toolTabId ?? undefined
if (!targetTabId) {
const tabs = await chrome.tabs.query({
active: true,
currentWindow: true,
})
targetTabId = tabs[0]?.id
// Fallback: use the currently active tab, or query browser
if (activeTabIdRef.current) {
targetTabId = activeTabIdRef.current
} else {
const tabs = await chrome.tabs.query({
active: true,
currentWindow: true,
})
targetTabId = tabs[0]?.id
}
}
if (cancelled || !targetTabId) return
const previousTabId = activeTabIdRef.current
// If the agent moved to a different tab, deactivate the previous one
if (previousTabId && previousTabId !== targetTabId) {
const deactivateMessage: GlowMessage = {
sendGlow(previousTabId, {
conversationId,
isActive: false,
}
chrome.tabs
.sendMessage(previousTabId, deactivateMessage)
.catch(() => {})
})
}
const activateMessage: GlowMessage = {
// Activate glow on the target tab
sendGlow(targetTabId, {
conversationId,
isActive: true,
}
chrome.tabs.sendMessage(targetTabId, activateMessage).catch(() => {})
lastTabIdRef.current = targetTabId
})
activeTabIdRef.current = targetTabId
allGlowedTabsRef.current.add(targetTabId)
}
activate()

View File

@@ -55,6 +55,7 @@ export default defineConfig({
permissions: [
'topSites',
'tabs',
'tabGroups',
'storage',
'sidePanel',
'browserOS',

View File

@@ -203,7 +203,28 @@ function getExecution(
- Don't ask permission for routine steps. Act, then report.
- Do not refuse by default, attempt tasks even when outcomes are uncertain.
- For ambiguous/unclear requests, ask one targeted clarifying question.
- Stay on the current page. Only open new tabs when the user explicitly asks.
- Stay on the current page for single-page tasks. Use \`navigate_page\` to move within one tab.
### Multi-tab workflow
When a task requires working on multiple pages simultaneously:
1. **Inform the user** that you're creating background tabs for the task.
2. **Open new tabs in background** using \`new_page\` (opens in background by default) — never steal focus from the user's current tab.
3. **IMMEDIATELY create a tab group** using \`group_tabs\` with a descriptive title — do this right after opening the tabs, before any other work. Include the user's current tab in the group. Every multi-tab task MUST have a tab group.
4. **Work on background tabs** — all tools (click, fill, navigate, snapshot) work on background tabs via their page ID.
5. **Narrate progress in chat** — keep the user informed: "Checking Vercel pricing... Now checking Netlify..."
6. **Report results in chat** — summarize findings so the user doesn't need to switch tabs. Leave tabs open for the user to browse later.
7. **Never force-switch the user's active tab.** If you need user interaction on a background tab (e.g., login, CAPTCHA), tell the user which tab needs attention and let them switch manually.
8. **Never navigate the user's current tab** during a multi-tab task. The current tab is the user's anchor — use it only for reading (snapshots, content extraction). All navigation should happen on background tabs.
**Do NOT use \`create_hidden_window\` or \`new_hidden_page\` for user-requested tasks.** Hidden windows are invisible to the user and cannot be screenshotted. Use \`new_page\` (background mode) instead — tabs appear in the user's tab strip and can be inspected. Reserve hidden windows for automated/scheduled runs only.
For single-page lookups (e.g., "go to X and read Y"), use \`navigate_page\` on the current tab. Only create new tabs when the task requires multiple pages open simultaneously.
### Tab retry discipline
When a background tab fails (404, wrong content, unexpected redirect):
- **Navigate the existing tab** to the correct URL with \`navigate_page\` — do NOT open a new tab for retries.
- If you must abandon a tab, close it with \`close_page\` before opening a replacement.
- Never let orphan tabs accumulate — each task should end with only the tabs that contain useful content.
### Observe → Act → Verify
- **Before acting**: Take a snapshot to get interactive element IDs.
@@ -247,6 +268,14 @@ function getToolSelection(): string {
- Prefer \`fill\` over \`press_key\` for text input. Use \`press_key\` for keyboard shortcuts (Enter, Escape, Tab, Ctrl+A, etc.).
- Prefer clicking links over \`navigate_page\` when the link is visible. Use \`navigate_page\` for direct URL access, back/forward, or reload.
### Navigation: single-tab vs multi-tab
| Task | Approach |
|------|----------|
| Look up one page | \`navigate_page\` on current tab |
| Research across multiple sites | \`new_page\` (background) for each site + \`group_tabs\` |
| Compare two pages side by side | \`new_page\` (background) × 2 + \`group_tabs\` |
| User says "open a new tab" | \`new_page\` (background) — don't steal focus |
### Connected apps: Strata vs browser
When an app is Connected, prefer Strata tools over browser automation. Strata is faster, more reliable, and works without navigating away from the user's current page.
</tool_selection>`
@@ -351,7 +380,12 @@ function getErrorRecovery(
### Strata errors
- Authentication error → call \`suggest_app_connection\` for re-auth (STOP and wait)
- Action not found → try \`search_documentation\`, then fall back to browser automation
- Partial failure → report what succeeded and what didn't`
- Partial failure → report what succeeded and what didn't
### Retry budget
- If a site isn't cooperating after 3-4 attempts (form not filling, redirects, geo-blocks), stop trying.
- Report what you've found so far and explain what didn't work: "Kayak kept defaulting to your local city. Here are the Google Flights results instead."
- Don't exhaust 10+ tool calls on a single failing site — the user's time matters more than completeness.`
if (hasWorkspace) {
recovery += `
@@ -528,6 +562,12 @@ Default: do not narrate routine, low-risk tool calls (just call the tool).
Narrate only when it helps: multi-step plans, complex navigation, or when the user explicitly asked for explanation.
Keep narration brief. "Searching for flights..." then tool call — not "I will now search for flights by calling the search tool."
Execute independent tool calls in parallel when possible.
When working on background tabs, always narrate progress so the user knows what's happening:
- "Opening a background tab to check Yahoo News headlines..."
- "Found 5 headlines on Yahoo News. Now checking Reuters..."
- "Done! Here's what I found across all sources:"
This is essential because the user can't see the background tabs — chat is their only window into your work.
</tool_call_style>
- Be concise: 1-2 lines for status updates and action confirmations.

View File

@@ -121,14 +121,17 @@ export const navigate_page = defineTool({
export const new_page = defineTool({
name: 'new_page',
description: 'Open a new page (tab) and navigate to a URL',
description:
'Open a new page (tab) and navigate to a URL. Opens in background by default to keep the user on their current page. Use group_tabs to organize related tabs.',
input: z.object({
url: z.string().describe('URL to open'),
hidden: z.boolean().default(false).describe('Create as hidden tab'),
background: z
.boolean()
.default(false)
.describe('Open in background without activating'),
.default(true)
.describe(
'Open in background without stealing focus. Set to false only when user needs to see the tab immediately.',
),
windowId: z.number().optional().describe('Window ID to create tab in'),
}),
output: z.object({
@@ -140,8 +143,8 @@ export const new_page = defineTool({
}),
handler: async (args, ctx, response) => {
const pageId = await ctx.browser.newPage(args.url, {
hidden: args.hidden || undefined,
background: args.background || undefined,
hidden: args.hidden ? true : undefined,
background: args.background === false ? false : true,
windowId: args.windowId,
})
response.text(`Opened new page: ${args.url}\nPage ID: ${pageId}`)

View File

@@ -101,7 +101,13 @@ export const get_page_content = defineTool({
extension: 'md',
content: text,
})
response.text(`Saved page content to ${path}`)
// Return truncated content inline so the agent can work immediately,
// plus the file path for optional deep reading
const truncated = text.slice(0, TOOL_LIMITS.INLINE_PAGE_CONTENT_MAX_CHARS)
response.text(truncated)
response.text(
`\n\n[Content truncated at ${TOOL_LIMITS.INLINE_PAGE_CONTENT_MAX_CHARS} chars. Full content (${text.length} chars) saved to: ${path}]`,
)
response.data({
path,
contentLength: text.length,

View File

@@ -1032,9 +1032,56 @@ describe('execution section', () => {
expect(prompt).toContain('500')
})
it('includes new-tab restriction', () => {
it('includes multi-tab workflow guidance', () => {
// Why: The agent must know how to handle multi-tab tasks — open background
// tabs, create tab groups, narrate progress, and never steal user focus.
const prompt = buildRegular()
expect(prompt).toContain('Only open new tabs when the user explicitly asks')
expect(prompt).toContain('Multi-tab workflow')
expect(prompt).toContain('background')
expect(prompt).toContain('group_tabs')
expect(prompt).toContain('Never force-switch')
})
it('enforces mandatory tab group creation', () => {
// Why: Run 7 showed the agent opening background tabs without creating
// a tab group. The prompt must make tab groups mandatory, not optional.
const prompt = buildRegular()
expect(prompt).toContain('IMMEDIATELY create a tab group')
expect(prompt).toContain('MUST have a tab group')
})
it('prohibits navigating user current tab during multi-tab', () => {
// Why: Run 7 showed the agent clicking a link on the user's current tab,
// navigating away from their starting page. The current tab must be read-only.
const prompt = buildRegular()
expect(prompt).toContain('Never navigate the user')
expect(prompt).toContain('anchor')
})
it('prohibits hidden windows for user tasks', () => {
// Why: Run 2 used create_hidden_window instead of background tabs.
// Hidden windows are invisible to users and can't be screenshotted.
const prompt = buildRegular()
expect(prompt).toContain('Do NOT use')
expect(prompt).toContain('create_hidden_window')
expect(prompt).toContain('new_hidden_page')
})
it('includes tab retry discipline', () => {
// Why: Run 7 opened 7+ tabs for a 3-article task because retries
// created new tabs instead of navigating existing ones.
const prompt = buildRegular()
expect(prompt).toContain('Tab retry discipline')
expect(prompt).toContain('Navigate the existing tab')
expect(prompt).toContain('close_page')
})
it('includes retry budget for failing sites', () => {
// Why: Run 8 spent 15+ tool calls fighting Kayak's geo-detection.
// The agent should give up after 3-4 attempts and report partial results.
const prompt = buildRegular()
expect(prompt).toContain('Retry budget')
expect(prompt).toContain('3-4 attempts')
})
})