feat: improved multi tab agent workflow (#507)

* feat: updated multitab workflow * fix: updated prompt with fix for test cases * fix: active agent glow * fix: review comments
2026-05-13 23:53:25 +00:00 · 2026-03-20 18:31:36 +05:30
parent 11d15d079f
commit 2b4fdf1aad
6 changed files with 155 additions and 36 deletions
--- a/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/useNotifyActiveTab.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/useNotifyActiveTab.tsx
@@ -19,6 +19,10 @@ function extractTabId(toolPart: ToolUIPart | null): number | undefined {
  return input?.tabId
 }

+function sendGlow(tabId: number, message: GlowMessage): void {
+  chrome.tabs.sendMessage(tabId, message).catch(() => {})
+}
+
 export const useNotifyActiveTab = ({
  messages,
  status,
@@ -28,7 +32,10 @@ export const useNotifyActiveTab = ({
  status: ChatStatus
  conversationId: string
 }) => {
-  const lastTabIdRef = useRef<number | null>(null)
+  // Track the single tab currently glowing
+  const activeTabIdRef = useRef<number | null>(null)
+  // Track all tabs that have been glowed during this stream (for cleanup)
+  const allGlowedTabsRef = useRef<Set<number>>(new Set())

  const lastMessage = messages?.[messages.length - 1]

@@ -41,27 +48,35 @@ export const useNotifyActiveTab = ({

  useEffect(() => {
    const isStreaming = status === 'streaming'
-    const previousTabId = lastTabIdRef.current

    if (!isStreaming) {
-      if (previousTabId) {
+      // Deactivate ALL tabs that were glowed during this stream
+      const allGlowed = allGlowedTabsRef.current
+      if (allGlowed.size > 0) {
        const deactivate = async () => {
+          // Capture tab IDs before any async work to avoid race with clear()
+          const tabIds = Array.from(allGlowed)
+          allGlowed.clear()
+
          const alreadyShown = await firstRunConfettiShownStorage.getValue()
-          const deactivateMessage: GlowMessage = {
-            conversationId,
-            isActive: false,
-            showConfetti: !alreadyShown,
+          let showConfetti = !alreadyShown
+
+          for (const tabId of tabIds) {
+            sendGlow(tabId, {
+              conversationId,
+              isActive: false,
+              showConfetti,
+            })
+            showConfetti = false
          }
-          chrome.tabs
-            .sendMessage(previousTabId, deactivateMessage)
-            .catch(() => {})
+
          if (!alreadyShown) {
            await firstRunConfettiShownStorage.setValue(true)
          }
        }
        deactivate()
-        lastTabIdRef.current = null
      }
+      activeTabIdRef.current = null
      return
    }

@@ -70,34 +85,41 @@ export const useNotifyActiveTab = ({
    let cancelled = false

    const activate = async () => {
-      let targetTabId = toolTabId ?? previousTabId ?? undefined
+      let targetTabId = toolTabId ?? undefined

      if (!targetTabId) {
-        const tabs = await chrome.tabs.query({
-          active: true,
-          currentWindow: true,
-        })
-        targetTabId = tabs[0]?.id
+        // Fallback: use the currently active tab, or query browser
+        if (activeTabIdRef.current) {
+          targetTabId = activeTabIdRef.current
+        } else {
+          const tabs = await chrome.tabs.query({
+            active: true,
+            currentWindow: true,
+          })
+          targetTabId = tabs[0]?.id
+        }
      }

      if (cancelled || !targetTabId) return

+      const previousTabId = activeTabIdRef.current
+
+      // If the agent moved to a different tab, deactivate the previous one
      if (previousTabId && previousTabId !== targetTabId) {
-        const deactivateMessage: GlowMessage = {
+        sendGlow(previousTabId, {
          conversationId,
          isActive: false,
-        }
-        chrome.tabs
-          .sendMessage(previousTabId, deactivateMessage)
-          .catch(() => {})
+        })
      }

-      const activateMessage: GlowMessage = {
+      // Activate glow on the target tab
+      sendGlow(targetTabId, {
        conversationId,
        isActive: true,
-      }
-      chrome.tabs.sendMessage(targetTabId, activateMessage).catch(() => {})
-      lastTabIdRef.current = targetTabId
+      })
+
+      activeTabIdRef.current = targetTabId
+      allGlowedTabsRef.current.add(targetTabId)
    }

    activate()
--- a/packages/browseros-agent/apps/agent/wxt.config.ts
+++ b/packages/browseros-agent/apps/agent/wxt.config.ts
@@ -55,6 +55,7 @@ export default defineConfig({
    permissions: [
      'topSites',
      'tabs',
+      'tabGroups',
      'storage',
      'sidePanel',
      'browserOS',
--- a/packages/browseros-agent/apps/server/src/agent/prompt.ts
+++ b/packages/browseros-agent/apps/server/src/agent/prompt.ts
@@ -203,7 +203,28 @@ function getExecution(
 - Don't ask permission for routine steps. Act, then report.
 - Do not refuse by default, attempt tasks even when outcomes are uncertain.
 - For ambiguous/unclear requests, ask one targeted clarifying question.
- Stay on the current page. Only open new tabs when the user explicitly asks.
+- Stay on the current page for single-page tasks. Use \`navigate_page\` to move within one tab.
+
+### Multi-tab workflow
+When a task requires working on multiple pages simultaneously:
+1. **Inform the user** that you're creating background tabs for the task.
+2. **Open new tabs in background** using \`new_page\` (opens in background by default) — never steal focus from the user's current tab.
+3. **IMMEDIATELY create a tab group** using \`group_tabs\` with a descriptive title — do this right after opening the tabs, before any other work. Include the user's current tab in the group. Every multi-tab task MUST have a tab group.
+4. **Work on background tabs** — all tools (click, fill, navigate, snapshot) work on background tabs via their page ID.
+5. **Narrate progress in chat** — keep the user informed: "Checking Vercel pricing... Now checking Netlify..."
+6. **Report results in chat** — summarize findings so the user doesn't need to switch tabs. Leave tabs open for the user to browse later.
+7. **Never force-switch the user's active tab.** If you need user interaction on a background tab (e.g., login, CAPTCHA), tell the user which tab needs attention and let them switch manually.
+8. **Never navigate the user's current tab** during a multi-tab task. The current tab is the user's anchor — use it only for reading (snapshots, content extraction). All navigation should happen on background tabs.
+
+**Do NOT use \`create_hidden_window\` or \`new_hidden_page\` for user-requested tasks.** Hidden windows are invisible to the user and cannot be screenshotted. Use \`new_page\` (background mode) instead — tabs appear in the user's tab strip and can be inspected. Reserve hidden windows for automated/scheduled runs only.
+
+For single-page lookups (e.g., "go to X and read Y"), use \`navigate_page\` on the current tab. Only create new tabs when the task requires multiple pages open simultaneously.
+
+### Tab retry discipline
+When a background tab fails (404, wrong content, unexpected redirect):
+- **Navigate the existing tab** to the correct URL with \`navigate_page\` — do NOT open a new tab for retries.
+- If you must abandon a tab, close it with \`close_page\` before opening a replacement.
+- Never let orphan tabs accumulate — each task should end with only the tabs that contain useful content.

 ### Observe → Act → Verify
 - **Before acting**: Take a snapshot to get interactive element IDs.
@@ -247,6 +268,14 @@ function getToolSelection(): string {
 - Prefer \`fill\` over \`press_key\` for text input. Use \`press_key\` for keyboard shortcuts (Enter, Escape, Tab, Ctrl+A, etc.).
 - Prefer clicking links over \`navigate_page\` when the link is visible. Use \`navigate_page\` for direct URL access, back/forward, or reload.

+### Navigation: single-tab vs multi-tab
+| Task | Approach |
+|------|----------|
+| Look up one page | \`navigate_page\` on current tab |
+| Research across multiple sites | \`new_page\` (background) for each site + \`group_tabs\` |
+| Compare two pages side by side | \`new_page\` (background) × 2 + \`group_tabs\` |
+| User says "open a new tab" | \`new_page\` (background) — don't steal focus |
+
 ### Connected apps: Strata vs browser
 When an app is Connected, prefer Strata tools over browser automation. Strata is faster, more reliable, and works without navigating away from the user's current page.
 </tool_selection>`
@@ -351,7 +380,12 @@ function getErrorRecovery(
 ### Strata errors
 - Authentication error → call \`suggest_app_connection\` for re-auth (STOP and wait)
 - Action not found → try \`search_documentation\`, then fall back to browser automation
- Partial failure → report what succeeded and what didn't`
+- Partial failure → report what succeeded and what didn't
+
+### Retry budget
+- If a site isn't cooperating after 3-4 attempts (form not filling, redirects, geo-blocks), stop trying.
+- Report what you've found so far and explain what didn't work: "Kayak kept defaulting to your local city. Here are the Google Flights results instead."
+- Don't exhaust 10+ tool calls on a single failing site — the user's time matters more than completeness.`

  if (hasWorkspace) {
    recovery += `
@@ -528,6 +562,12 @@ Default: do not narrate routine, low-risk tool calls (just call the tool).
 Narrate only when it helps: multi-step plans, complex navigation, or when the user explicitly asked for explanation.
 Keep narration brief. "Searching for flights..." then tool call — not "I will now search for flights by calling the search tool."
 Execute independent tool calls in parallel when possible.
+
+When working on background tabs, always narrate progress so the user knows what's happening:
+- "Opening a background tab to check Yahoo News headlines..."
+- "Found 5 headlines on Yahoo News. Now checking Reuters..."
+- "Done! Here's what I found across all sources:"
+This is essential because the user can't see the background tabs — chat is their only window into your work.
 </tool_call_style>

 - Be concise: 1-2 lines for status updates and action confirmations.
--- a/packages/browseros-agent/apps/server/src/tools/navigation.ts
+++ b/packages/browseros-agent/apps/server/src/tools/navigation.ts
@@ -121,14 +121,17 @@ export const navigate_page = defineTool({

 export const new_page = defineTool({
  name: 'new_page',
-  description: 'Open a new page (tab) and navigate to a URL',
+  description:
+    'Open a new page (tab) and navigate to a URL. Opens in background by default to keep the user on their current page. Use group_tabs to organize related tabs.',
  input: z.object({
    url: z.string().describe('URL to open'),
    hidden: z.boolean().default(false).describe('Create as hidden tab'),
    background: z
      .boolean()
-      .default(false)
-      .describe('Open in background without activating'),
+      .default(true)
+      .describe(
+        'Open in background without stealing focus. Set to false only when user needs to see the tab immediately.',
+      ),
    windowId: z.number().optional().describe('Window ID to create tab in'),
  }),
  output: z.object({
@@ -140,8 +143,8 @@ export const new_page = defineTool({
  }),
  handler: async (args, ctx, response) => {
    const pageId = await ctx.browser.newPage(args.url, {
-      hidden: args.hidden || undefined,
-      background: args.background || undefined,
+      hidden: args.hidden ? true : undefined,
+      background: args.background === false ? false : true,
      windowId: args.windowId,
    })
    response.text(`Opened new page: ${args.url}\nPage ID: ${pageId}`)
--- a/packages/browseros-agent/apps/server/src/tools/snapshot.ts
+++ b/packages/browseros-agent/apps/server/src/tools/snapshot.ts
@@ -101,7 +101,13 @@ export const get_page_content = defineTool({
        extension: 'md',
        content: text,
      })
-      response.text(`Saved page content to ${path}`)
+      // Return truncated content inline so the agent can work immediately,
+      // plus the file path for optional deep reading
+      const truncated = text.slice(0, TOOL_LIMITS.INLINE_PAGE_CONTENT_MAX_CHARS)
+      response.text(truncated)
+      response.text(
+        `\n\n[Content truncated at ${TOOL_LIMITS.INLINE_PAGE_CONTENT_MAX_CHARS} chars. Full content (${text.length} chars) saved to: ${path}]`,
+      )
      response.data({
        path,
        contentLength: text.length,
--- a/packages/browseros-agent/apps/server/tests/agent/prompt.test.ts
+++ b/packages/browseros-agent/apps/server/tests/agent/prompt.test.ts
@@ -1032,9 +1032,56 @@ describe('execution section', () => {
    expect(prompt).toContain('500')
  })

-  it('includes new-tab restriction', () => {
+  it('includes multi-tab workflow guidance', () => {
+    // Why: The agent must know how to handle multi-tab tasks — open background
+    // tabs, create tab groups, narrate progress, and never steal user focus.
    const prompt = buildRegular()
-    expect(prompt).toContain('Only open new tabs when the user explicitly asks')
+    expect(prompt).toContain('Multi-tab workflow')
+    expect(prompt).toContain('background')
+    expect(prompt).toContain('group_tabs')
+    expect(prompt).toContain('Never force-switch')
+  })
+
+  it('enforces mandatory tab group creation', () => {
+    // Why: Run 7 showed the agent opening background tabs without creating
+    // a tab group. The prompt must make tab groups mandatory, not optional.
+    const prompt = buildRegular()
+    expect(prompt).toContain('IMMEDIATELY create a tab group')
+    expect(prompt).toContain('MUST have a tab group')
+  })
+
+  it('prohibits navigating user current tab during multi-tab', () => {
+    // Why: Run 7 showed the agent clicking a link on the user's current tab,
+    // navigating away from their starting page. The current tab must be read-only.
+    const prompt = buildRegular()
+    expect(prompt).toContain('Never navigate the user')
+    expect(prompt).toContain('anchor')
+  })
+
+  it('prohibits hidden windows for user tasks', () => {
+    // Why: Run 2 used create_hidden_window instead of background tabs.
+    // Hidden windows are invisible to users and can't be screenshotted.
+    const prompt = buildRegular()
+    expect(prompt).toContain('Do NOT use')
+    expect(prompt).toContain('create_hidden_window')
+    expect(prompt).toContain('new_hidden_page')
+  })
+
+  it('includes tab retry discipline', () => {
+    // Why: Run 7 opened 7+ tabs for a 3-article task because retries
+    // created new tabs instead of navigating existing ones.
+    const prompt = buildRegular()
+    expect(prompt).toContain('Tab retry discipline')
+    expect(prompt).toContain('Navigate the existing tab')
+    expect(prompt).toContain('close_page')
+  })
+
+  it('includes retry budget for failing sites', () => {
+    // Why: Run 8 spent 15+ tool calls fighting Kayak's geo-detection.
+    // The agent should give up after 3-4 attempts and report partial results.
+    const prompt = buildRegular()
+    expect(prompt).toContain('Retry budget')
+    expect(prompt).toContain('3-4 attempts')
  })
 })