mirror of
https://github.com/pocketpaw/pocketpaw.git
synced 2026-05-21 01:04:57 +00:00
* feat(fleet): emit agent.spawned + fleet.* events to journal on install The fleet installer now takes an optional `journal` (and optional `actor`) and writes a correlated event trio per run: `fleet.install.started` at entry, one canonical `agent.spawned` per soul created, and a `fleet.installed` summary at the end. All three events share a single correlation_id and carry the fleet's declared scopes, so downstream projections and tailers can stitch a run back together. The journal parameter is opt-in and defaults to None. Existing call sites (the standalone CLI path, unit tests) keep working untouched. When a run fails at the soul step, only `fleet.install.started` lands — no `agent.spawned` and no terminal `fleet.installed` — so a UI tailing the journal can tell a partial run apart from a clean one without inspecting the install report. Emission is wrapped in try/except and logged at warning level. A broken journal must not translate into a broken install. Unblocks paw-enterprise's InstallFleetPanel (which tails `fleet.*` and `agent.spawned` to render live install progress) and closes the observability gap opened by the installer merge in #940. * fix(deps): promote soul-protocol from extra to base dep + bump to >=0.3.1 Runtime code (ee.fleet.installer) now imports from soul_protocol.spec.journal and soul_protocol.engine.journal at module load. These imports cannot be guarded or lazy — the installer signature exposes Journal as a parameter type. Previously soul-protocol lived only in the [soul], [all], and [dev] extras, so CI (which installs --dev) still couldn't resolve the import. Promote to base dependencies at soul-protocol[engine]>=0.3.1 so: - pip install pocketpaw just works; the journal APIs are available - CI matrix on Python 3.11/3.12/3.13 can collect test modules that import soul_protocol - The 0.3.1 floor guarantees Journal + EventEntry + Actor are present (all shipped in that release) Also regenerated uv.lock to pick up soul-protocol 0.3.1 from the upstream v0.3.1 release. * test: fix two tests exposed by soul-protocol becoming a base dep Promoting soul-protocol from an optional extra to a base dependency unmasked two pre-existing test issues that only surfaced when soul is actually importable in CI: 1. test_integration_headless.py::TestToolBridgeCompleteness _instantiate_all_tools strips remember/recall/forget when a soul manager is active (soul_remember/soul_recall supersede them). A prior test in the suite left a SoulManager globally installed, so the "memory tools present" assertions failed non-deterministically based on test order. Add an autouse _reset_manager fixture to the class, mirroring the pattern from test_soul_v024_smoke.py. 2. test_soul_manager.py::test_get_tools_returns_six Hard-coded exact-match assertion on a set of six tool names. soul-protocol v0.3.1 exposes nine (adds soul_forget, soul_core_memory, soul_context). Rename to test_get_tools_exposes_core_soul_tools and rewrite as a subset check so the core contract (the six names pocketpaw depends on) is still guarded while downstream can add tools freely. --------- Co-authored-by: Prakash-1 <prakash-1@Mac.lan>
650 lines
27 KiB
Python
650 lines
27 KiB
Python
# test_integration_headless.py — Integration tests for headless channel correctness.
|
|
# Created: 2026-03-11
|
|
#
|
|
# Catches regressions like the permission_mode hang bug (where headless channels
|
|
# hang because tool permissions require terminal interaction) and related issues.
|
|
#
|
|
# Covers:
|
|
# 1. Server startup — FastAPI app boots and health endpoint is reachable.
|
|
# 2. Tool bridge completeness — memory tools present for ALL backends.
|
|
# 3. Channel adapter tool access — bypassPermissions always set in SDK options.
|
|
# 4. Timeout guard — tool execution completes within 5 seconds (catches hangs).
|
|
#
|
|
# Tests marked @pytest.mark.integration require a running server or real external
|
|
# deps and are skipped in CI by default. Run locally with:
|
|
# uv run pytest tests/test_integration_headless.py -v
|
|
# uv run pytest tests/test_integration_headless.py -v -m integration # integration only
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import inspect
|
|
from unittest.mock import MagicMock
|
|
|
|
import pytest
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Shared helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _make_settings(*, tool_profile: str = "full", bypass: bool = False) -> MagicMock:
|
|
"""Minimal mock Settings with safe defaults for headless tests."""
|
|
settings = MagicMock()
|
|
settings.bypass_permissions = bypass
|
|
settings.agent_backend = "claude_agent_sdk"
|
|
settings.anthropic_api_key = "sk-ant-test-key"
|
|
settings.claude_sdk_model = ""
|
|
settings.claude_sdk_max_turns = 0
|
|
settings.smart_routing_enabled = False
|
|
settings.tool_profile = tool_profile
|
|
settings.tools_allow = []
|
|
settings.tools_deny = []
|
|
settings.mcp_servers = {}
|
|
settings.claude_sdk_provider = "anthropic"
|
|
settings.ollama_base_url = "http://localhost:11434"
|
|
settings.openai_api_key = ""
|
|
settings.openai_base_url = ""
|
|
settings.openrouter_api_key = ""
|
|
settings.gemini_api_key = ""
|
|
settings.openai_agents_model = ""
|
|
settings.file_jail_path = "/tmp"
|
|
return settings
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 1. Server startup integration tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestServerStartup:
|
|
"""Verify the FastAPI app mounts cleanly and the health endpoint responds.
|
|
|
|
These tests use a minimal FastAPI instance with just the health router —
|
|
same pattern as test_api_v1_health.py — to avoid import-time side effects
|
|
from dashboard.py (Settings.load, CORS origin resolution, etc.).
|
|
"""
|
|
|
|
def test_health_router_mounts_without_error(self):
|
|
"""Mount v1 health router on a bare FastAPI app — should not raise."""
|
|
from fastapi import FastAPI
|
|
|
|
from pocketpaw.api.v1.health import router
|
|
|
|
app = FastAPI()
|
|
# Should not raise during mount
|
|
app.include_router(router, prefix="/api/v1")
|
|
|
|
# Verify the expected routes are registered
|
|
routes = {r.path for r in app.routes}
|
|
assert "/api/v1/health" in routes
|
|
assert "/api/v1/version" in routes
|
|
|
|
def test_health_endpoint_returns_200(self):
|
|
"""GET /api/v1/health returns HTTP 200 with a health summary."""
|
|
from unittest.mock import patch
|
|
|
|
from fastapi import FastAPI
|
|
from fastapi.testclient import TestClient
|
|
|
|
from pocketpaw.api.v1.health import router
|
|
|
|
app = FastAPI()
|
|
app.include_router(router, prefix="/api/v1")
|
|
client = TestClient(app)
|
|
|
|
mock_engine = MagicMock()
|
|
mock_engine.summary = {"status": "healthy", "check_count": 0, "issues": []}
|
|
|
|
with patch("pocketpaw.health.get_health_engine", return_value=mock_engine):
|
|
resp = client.get("/api/v1/health")
|
|
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
# The endpoint returns a HealthSummary — must have a "status" field
|
|
assert "status" in data
|
|
|
|
def test_version_endpoint_returns_package_version(self):
|
|
"""GET /api/v1/version returns the installed pocketpaw version string."""
|
|
from unittest.mock import patch
|
|
|
|
from fastapi import FastAPI
|
|
from fastapi.testclient import TestClient
|
|
|
|
from pocketpaw.api.v1.health import router
|
|
|
|
app = FastAPI()
|
|
app.include_router(router, prefix="/api/v1")
|
|
client = TestClient(app)
|
|
|
|
mock_settings = MagicMock()
|
|
mock_settings.agent_backend = "claude_agent_sdk"
|
|
|
|
with patch("pocketpaw.config.Settings.load", return_value=mock_settings):
|
|
resp = client.get("/api/v1/version")
|
|
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "version" in data
|
|
assert "python" in data
|
|
assert "agent_backend" in data
|
|
|
|
def test_all_critical_v1_routers_mount_without_error(self):
|
|
"""mount_v1_routers() must not raise for Auth, Chat, Health, Sessions.
|
|
|
|
This is the integration point where a bad import in a router module
|
|
would surface as a startup failure rather than a 404.
|
|
"""
|
|
from fastapi import FastAPI
|
|
|
|
from pocketpaw.api.v1 import mount_v1_routers
|
|
|
|
app = FastAPI()
|
|
# Should not raise — critical routers are re-raised by mount_v1_routers
|
|
mount_v1_routers(app)
|
|
|
|
# Spot-check that key health routes are registered
|
|
routes = {r.path for r in app.routes}
|
|
assert "/api/v1/health" in routes
|
|
assert "/api/v1/version" in routes
|
|
|
|
@pytest.mark.integration
|
|
async def test_full_dashboard_app_health_endpoint(self):
|
|
"""Import the full dashboard app and hit /api/v1/health via httpx.
|
|
|
|
Marked @pytest.mark.integration — skipped in CI by default.
|
|
Requires all dashboard dependencies to be installed.
|
|
"""
|
|
from unittest.mock import patch
|
|
|
|
from fastapi.testclient import TestClient
|
|
|
|
from pocketpaw.dashboard import app
|
|
|
|
mock_engine = MagicMock()
|
|
mock_engine.summary = {"status": "healthy", "check_count": 0, "issues": []}
|
|
|
|
with patch("pocketpaw.health.get_health_engine", return_value=mock_engine):
|
|
client = TestClient(app, raise_server_exceptions=False)
|
|
resp = client.get("/api/v1/health")
|
|
|
|
# Health endpoint should respond — even if status is "unknown" due to
|
|
# limited env, it should not 500 or hang
|
|
assert resp.status_code in (200, 401) # 401 if auth middleware is active
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 2. Tool bridge completeness tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestToolBridgeCompleteness:
|
|
"""Verify memory tools (RememberTool, RecallTool, ForgetTool) are available
|
|
for ALL agent backends, not just some.
|
|
|
|
The regression to guard: accidentally adding memory tools to _ALWAYS_EXCLUDED
|
|
or to a backend-specific exclusion list would silently break memory on all
|
|
headless channels (Telegram, Discord, Slack) where the agent can't use Bash
|
|
to invoke the tools via subprocess.
|
|
"""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _reset_soul(self):
|
|
"""Reset the soul manager singleton before each test.
|
|
|
|
`_instantiate_all_tools` strips `remember/recall/forget` when a soul
|
|
manager is active (soul_remember/soul_recall supersede them). A prior
|
|
test in the suite may leave a SoulManager installed globally; without
|
|
this reset the memory-tool assertions below fail non-deterministically
|
|
depending on test order. Mirrors the pattern in test_soul_v024_smoke.py.
|
|
"""
|
|
from pocketpaw.soul.manager import _reset_manager
|
|
|
|
_reset_manager()
|
|
yield
|
|
_reset_manager()
|
|
|
|
# All backends that go through _instantiate_all_tools()
|
|
_ALL_BACKENDS = [
|
|
"openai_agents",
|
|
"google_adk",
|
|
"opencode",
|
|
"codex_cli",
|
|
"copilot_sdk",
|
|
"claude_agent_sdk", # Different exclusion rules — shell/fs excluded, not memory
|
|
]
|
|
|
|
_MEMORY_TOOL_NAMES = {"remember", "recall", "forget"}
|
|
|
|
def _get_tool_names(self, backend: str) -> set[str]:
|
|
"""Return the set of tool names that _instantiate_all_tools returns."""
|
|
from pocketpaw.agents.tool_bridge import _instantiate_all_tools
|
|
|
|
tools = _instantiate_all_tools(backend=backend)
|
|
return {t.name for t in tools}
|
|
|
|
@pytest.mark.parametrize("backend", _ALL_BACKENDS)
|
|
def test_memory_tools_present_for_backend(self, backend: str):
|
|
"""Memory tools must appear in the tool list for every backend."""
|
|
tool_names = self._get_tool_names(backend)
|
|
missing = self._MEMORY_TOOL_NAMES - tool_names
|
|
assert not missing, (
|
|
f"Backend '{backend}' is missing memory tools: {missing}. "
|
|
f"These tools are required for headless channels to save/recall facts."
|
|
)
|
|
|
|
def test_memory_tools_not_in_always_excluded(self):
|
|
"""RememberTool, RecallTool, ForgetTool must not appear in _ALWAYS_EXCLUDED."""
|
|
from pocketpaw.agents.tool_bridge import _ALWAYS_EXCLUDED
|
|
|
|
memory_class_names = {"RememberTool", "RecallTool", "ForgetTool"}
|
|
accidentally_excluded = memory_class_names & _ALWAYS_EXCLUDED
|
|
assert not accidentally_excluded, (
|
|
f"Memory tools accidentally added to _ALWAYS_EXCLUDED: {accidentally_excluded}. "
|
|
"This would break memory on ALL backends and ALL channels."
|
|
)
|
|
|
|
def test_memory_tools_not_in_claude_sdk_excluded(self):
|
|
"""Memory tools must not be in _CLAUDE_SDK_EXCLUDED.
|
|
|
|
The claude_agent_sdk backend excludes shell/fs tools because the SDK
|
|
provides them natively via Bash/Read/Write. Memory tools are NOT
|
|
provided natively by the SDK — they must come through the tool bridge
|
|
(invoked via `python -m pocketpaw.tools.cli`).
|
|
"""
|
|
from pocketpaw.agents.tool_bridge import _CLAUDE_SDK_EXCLUDED
|
|
|
|
memory_class_names = {"RememberTool", "RecallTool", "ForgetTool"}
|
|
accidentally_excluded = memory_class_names & _CLAUDE_SDK_EXCLUDED
|
|
assert not accidentally_excluded, (
|
|
f"Memory tools accidentally added to _CLAUDE_SDK_EXCLUDED: {accidentally_excluded}. "
|
|
"The Claude SDK backend uses Bash to invoke memory tools via subprocess — "
|
|
"they must remain in the tool list so the agent knows about them."
|
|
)
|
|
|
|
def test_shell_tools_excluded_only_for_claude_sdk(self):
|
|
"""Shell/fs tools (ShellTool, ReadFileTool, etc.) are excluded for claude_agent_sdk
|
|
but available for other backends — verify the exclusion is backend-specific."""
|
|
from pocketpaw.agents.tool_bridge import _instantiate_all_tools
|
|
|
|
# For non-SDK backends, shell tools should be included
|
|
openai_tools = {t.name for t in _instantiate_all_tools(backend="openai_agents")}
|
|
# For the SDK backend, shell tools are excluded (SDK provides Bash natively)
|
|
sdk_tools = {t.name for t in _instantiate_all_tools(backend="claude_agent_sdk")}
|
|
|
|
# Shell tool exists under some name in openai_agents but not claude_agent_sdk
|
|
# We can't check by exact tool name easily, so check that sdk has FEWER tools
|
|
# than openai_agents (shell/fs exclusion reduces the count)
|
|
assert len(sdk_tools) < len(openai_tools) or len(sdk_tools) == len(openai_tools), (
|
|
"Expected claude_agent_sdk to have <= tools compared to openai_agents "
|
|
"(shell/fs excluded from SDK backend)"
|
|
)
|
|
|
|
def test_remember_tool_has_correct_name(self):
|
|
"""RememberTool.name must be 'remember' — the name used in tool policy lookups."""
|
|
from pocketpaw.agents.tool_bridge import _instantiate_all_tools
|
|
|
|
tools = {t.name: t for t in _instantiate_all_tools(backend="openai_agents")}
|
|
assert "remember" in tools, "RememberTool not found by name 'remember'"
|
|
assert "recall" in tools, "RecallTool not found by name 'recall'"
|
|
assert "forget" in tools, "ForgetTool not found by name 'forget'"
|
|
|
|
def test_tool_bridge_returns_non_empty_list_for_all_backends(self):
|
|
"""_instantiate_all_tools must return at least the memory tools for every backend."""
|
|
from pocketpaw.agents.tool_bridge import _instantiate_all_tools
|
|
|
|
for backend in self._ALL_BACKENDS:
|
|
tools = _instantiate_all_tools(backend=backend)
|
|
assert len(tools) > 0, (
|
|
f"_instantiate_all_tools('{backend}') returned empty list — "
|
|
"agent would have no tools available."
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 3. Channel adapter tool access / bypassPermissions tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestHeadlessChannelToolAccess:
|
|
"""Verify that headless channel contexts always get bypassPermissions.
|
|
|
|
The key insight: Telegram, Discord, Slack, WhatsApp, and web channels are
|
|
all headless — there is no terminal for interactive permission prompts.
|
|
Without bypassPermissions, tool calls (memory save via Bash, web search,
|
|
etc.) hang indefinitely.
|
|
|
|
These tests are complementary to test_headless_permissions.py — they focus
|
|
on different aspects and do NOT duplicate the source inspection tests there.
|
|
"""
|
|
|
|
def test_permission_mode_is_unconditional_in_run_source(self):
|
|
"""bypassPermissions must be set unconditionally — not inside any if-block.
|
|
|
|
This test specifically checks that the assignment is NOT gated on any
|
|
settings attribute (like bypass_permissions, which defaults to False).
|
|
|
|
Complements test_headless_permissions.py::test_no_conditional_bypass_in_options_build
|
|
by also checking that the assignment line is not indented under a settings check.
|
|
"""
|
|
from pocketpaw.agents.claude_sdk import ClaudeSDKBackend
|
|
|
|
source = inspect.getsource(ClaudeSDKBackend.run)
|
|
lines = source.split("\n")
|
|
|
|
# Find the permission_mode assignment line
|
|
permission_line = None
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
if all(k in stripped for k in ("permission_mode", "=", "bypassPermissions")):
|
|
permission_line = stripped
|
|
break
|
|
|
|
assert permission_line is not None, (
|
|
"Could not find 'permission_mode = ...' assignment with 'bypassPermissions' in run(). "
|
|
"The permission bypass must be explicitly set."
|
|
)
|
|
|
|
# Verify the line is a direct dict assignment, not inside a conditional
|
|
# A conditional guard would look like: `if ...:` on the previous non-empty line
|
|
permission_line_idx = None
|
|
for i, line in enumerate(lines):
|
|
if "permission_mode" in line and "bypassPermissions" in line:
|
|
permission_line_idx = i
|
|
break
|
|
|
|
assert permission_line_idx is not None
|
|
# Walk backwards to find the most recent non-comment, non-empty line
|
|
for j in range(permission_line_idx - 1, max(0, permission_line_idx - 10), -1):
|
|
prev = lines[j].strip()
|
|
if prev and not prev.startswith("#"):
|
|
# If the preceding substantive line is an `if` that checks bypass_permissions,
|
|
# the fix has regressed
|
|
assert "bypass_permissions" not in prev or "if" not in prev, (
|
|
f"permission_mode assignment appears to be inside a bypass_permissions guard. "
|
|
f"Preceding line: {prev!r}"
|
|
)
|
|
break
|
|
|
|
def test_bypass_permissions_false_does_not_gate_permission_mode(self):
|
|
"""When bypass_permissions=False (the default), the run() source must still
|
|
contain the unconditional bypassPermissions assignment.
|
|
|
|
This tests the exact failure mode from the original bug: the setting defaulted
|
|
to False, which gated the permission_mode assignment and caused hangs.
|
|
"""
|
|
from pocketpaw.agents.claude_sdk import ClaudeSDKBackend
|
|
|
|
# Construct with bypass=False (the default / the bug scenario)
|
|
backend = ClaudeSDKBackend(_make_settings(bypass=False))
|
|
source = inspect.getsource(backend.run)
|
|
|
|
# The source must not have the old conditional pattern
|
|
assert "if self.settings.bypass_permissions:" not in source, (
|
|
"Found 'if self.settings.bypass_permissions:' in run() — "
|
|
"this is the regression that causes tool hangs on headless channels."
|
|
)
|
|
# The unconditional assignment must be present
|
|
assert '"bypassPermissions"' in source or "'bypassPermissions'" in source, (
|
|
"bypassPermissions string not found in run() source — permission mode is not being set."
|
|
)
|
|
|
|
@pytest.mark.parametrize(
|
|
"tool_profile",
|
|
["full", "minimal", "coding"],
|
|
ids=["full-profile", "minimal-profile", "coding-profile"],
|
|
)
|
|
def test_memory_tools_allowed_under_all_profiles(self, tool_profile: str):
|
|
"""Memory tools must pass ToolPolicy.is_tool_allowed() for every built-in profile.
|
|
|
|
If a profile accidentally excludes memory tools, the agent can't save/recall
|
|
facts on headless channels — silent data loss with no error message.
|
|
"""
|
|
from pocketpaw.tools.policy import ToolPolicy
|
|
|
|
policy = ToolPolicy(profile=tool_profile, allow=[], deny=[])
|
|
|
|
for tool_name in ("remember", "recall", "forget"):
|
|
allowed = policy.is_tool_allowed(tool_name)
|
|
assert allowed, (
|
|
f"Tool '{tool_name}' is blocked by profile '{tool_profile}'. "
|
|
f"Memory tools must be available on headless channels."
|
|
)
|
|
|
|
def test_tool_policy_deny_list_can_block_memory_tools(self):
|
|
"""Sanity check: explicit deny list DOES block memory tools.
|
|
|
|
This verifies the policy system works correctly — if an operator
|
|
explicitly denies memory tools, the policy should honor that.
|
|
"""
|
|
from pocketpaw.tools.policy import ToolPolicy
|
|
|
|
policy = ToolPolicy(profile="full", allow=[], deny=["remember"])
|
|
assert not policy.is_tool_allowed("remember"), (
|
|
"Explicit deny list should block 'remember' tool."
|
|
)
|
|
# Other memory tools not denied should still pass
|
|
assert policy.is_tool_allowed("recall")
|
|
assert policy.is_tool_allowed("forget")
|
|
|
|
def test_group_memory_in_deny_blocks_all_memory_tools(self):
|
|
"""group:memory in deny list blocks all three memory tools."""
|
|
from pocketpaw.tools.policy import ToolPolicy
|
|
|
|
policy = ToolPolicy(profile="full", allow=[], deny=["group:memory"])
|
|
for tool_name in ("remember", "recall", "forget"):
|
|
assert not policy.is_tool_allowed(tool_name), (
|
|
f"group:memory deny should block '{tool_name}'"
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 4. Timeout guard tests — catch hangs like the permission bug
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestToolExecutionTimeout:
|
|
"""Any tool execution must complete within a 5-second timeout.
|
|
|
|
The permission hang bug manifested as an indefinite block — the tool call
|
|
never returned. These tests wrap executions in asyncio.wait_for() so a
|
|
hang becomes a deterministic test failure rather than a CI timeout.
|
|
|
|
We test the in-process tool execution path (not subprocess) since the
|
|
subprocess path is covered by test_headless_permissions.py.
|
|
"""
|
|
|
|
@staticmethod
|
|
def _make_isolated_manager(tmp_path):
|
|
"""Create a MemoryManager backed by tmp_path for test isolation."""
|
|
from pocketpaw.memory.manager import MemoryManager
|
|
|
|
return MemoryManager(
|
|
backend="file",
|
|
base_path=tmp_path / "memory",
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_remember_tool_completes_within_timeout(self, tmp_path):
|
|
"""RememberTool.execute() must complete within 5 seconds."""
|
|
from unittest.mock import patch
|
|
|
|
from pocketpaw.tools.builtin.memory import RememberTool
|
|
|
|
tool = RememberTool()
|
|
mgr = self._make_isolated_manager(tmp_path)
|
|
|
|
with patch("pocketpaw.tools.builtin.memory.get_memory_manager", return_value=mgr):
|
|
result = await asyncio.wait_for(
|
|
tool.execute(content="User name is Ade", tags=["personal"]),
|
|
timeout=5.0,
|
|
)
|
|
|
|
assert result is not None, "RememberTool returned None"
|
|
assert isinstance(result, dict | str), f"Unexpected result type: {type(result)}"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_recall_tool_completes_within_timeout(self, tmp_path):
|
|
"""RecallTool.execute() must complete within 5 seconds even on empty store."""
|
|
from unittest.mock import patch
|
|
|
|
from pocketpaw.tools.builtin.memory import RecallTool
|
|
|
|
tool = RecallTool()
|
|
mgr = self._make_isolated_manager(tmp_path)
|
|
|
|
with patch("pocketpaw.tools.builtin.memory.get_memory_manager", return_value=mgr):
|
|
result = await asyncio.wait_for(
|
|
tool.execute(query="anything"),
|
|
timeout=5.0,
|
|
)
|
|
|
|
assert result is not None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_forget_tool_completes_within_timeout(self, tmp_path):
|
|
"""ForgetTool.execute() must complete within 5 seconds."""
|
|
from unittest.mock import patch
|
|
|
|
from pocketpaw.tools.builtin.memory import ForgetTool
|
|
|
|
tool = ForgetTool()
|
|
mgr = self._make_isolated_manager(tmp_path)
|
|
|
|
with patch("pocketpaw.tools.builtin.memory.get_memory_manager", return_value=mgr):
|
|
result = await asyncio.wait_for(
|
|
tool.execute(query="nonexistent"),
|
|
timeout=5.0,
|
|
)
|
|
|
|
assert result is not None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_remember_recall_roundtrip_within_timeout(self, tmp_path):
|
|
"""Full memory roundtrip: save then recall, both must complete within 5s each."""
|
|
from unittest.mock import patch
|
|
|
|
from pocketpaw.tools.builtin.memory import RecallTool, RememberTool
|
|
|
|
remember = RememberTool()
|
|
recall = RecallTool()
|
|
mgr = self._make_isolated_manager(tmp_path)
|
|
|
|
with patch("pocketpaw.tools.builtin.memory.get_memory_manager", return_value=mgr):
|
|
save_result = await asyncio.wait_for(
|
|
remember.execute(content="User name is Ade", tags=["personal"]),
|
|
timeout=5.0,
|
|
)
|
|
recall_result = await asyncio.wait_for(
|
|
recall.execute(query="Ade"),
|
|
timeout=5.0,
|
|
)
|
|
|
|
assert save_result is not None
|
|
assert recall_result is not None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_concurrent_tool_calls_complete_within_timeout(self, tmp_path):
|
|
"""Multiple concurrent tool calls should all complete within 5 seconds.
|
|
|
|
The permission hang bug was especially bad under concurrent load — a
|
|
single blocked tool call could starve the entire event loop.
|
|
"""
|
|
from unittest.mock import patch
|
|
|
|
from pocketpaw.tools.builtin.memory import RecallTool, RememberTool
|
|
|
|
remember = RememberTool()
|
|
recall = RecallTool()
|
|
mgr = self._make_isolated_manager(tmp_path)
|
|
|
|
async def run_all():
|
|
with patch("pocketpaw.tools.builtin.memory.get_memory_manager", return_value=mgr):
|
|
await asyncio.gather(
|
|
remember.execute(content="fact one", tags=[]),
|
|
remember.execute(content="fact two", tags=[]),
|
|
recall.execute(query="fact"),
|
|
)
|
|
|
|
try:
|
|
await asyncio.wait_for(run_all(), timeout=5.0)
|
|
except TimeoutError:
|
|
pytest.fail(
|
|
"Concurrent tool calls timed out after 5s — "
|
|
"possible event loop starvation from a blocking tool call."
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 5. Tool bridge integration — full pipeline without real SDK
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestToolBridgePipelineIntegration:
|
|
"""End-to-end tool bridge pipeline tests: policy → registry → tool list.
|
|
|
|
These tests verify the full path that agent backends take when building
|
|
their tool lists, without requiring the actual OpenAI/ADK SDKs to be
|
|
installed.
|
|
"""
|
|
|
|
def test_instantiate_all_tools_full_profile_returns_memory_tools(self):
|
|
"""With profile='full', all memory tools are instantiated."""
|
|
from pocketpaw.agents.tool_bridge import _instantiate_all_tools
|
|
|
|
tools = _instantiate_all_tools(backend="openai_agents")
|
|
names = {t.name for t in tools}
|
|
|
|
assert "remember" in names
|
|
assert "recall" in names
|
|
assert "forget" in names
|
|
|
|
def test_tool_definition_schema_is_valid_for_memory_tools(self):
|
|
"""Each memory tool's definition must have name, description, and parameters."""
|
|
from pocketpaw.agents.tool_bridge import _instantiate_all_tools
|
|
|
|
tools = {t.name: t for t in _instantiate_all_tools(backend="openai_agents")}
|
|
|
|
for tool_name in ("remember", "recall", "forget"):
|
|
assert tool_name in tools, f"Tool '{tool_name}' not found"
|
|
tool = tools[tool_name]
|
|
defn = tool.definition
|
|
|
|
assert defn.name, f"Tool '{tool_name}' has empty name"
|
|
assert defn.description, f"Tool '{tool_name}' has empty description"
|
|
assert defn.parameters is not None, f"Tool '{tool_name}' has no parameters schema"
|
|
assert "properties" in defn.parameters, (
|
|
f"Tool '{tool_name}' parameters schema missing 'properties' key"
|
|
)
|
|
|
|
def test_tool_count_is_consistent_across_backends(self):
|
|
"""The number of tools for non-SDK backends should be equal.
|
|
|
|
All non-claude_agent_sdk backends use the same exclusion set
|
|
(_ALWAYS_EXCLUDED only), so they should return the same tool count.
|
|
"""
|
|
from pocketpaw.agents.tool_bridge import _instantiate_all_tools
|
|
|
|
non_sdk_backends = ["openai_agents", "google_adk", "opencode", "codex_cli", "copilot_sdk"]
|
|
counts = {b: len(_instantiate_all_tools(backend=b)) for b in non_sdk_backends}
|
|
|
|
# All non-SDK backends must return the same count
|
|
unique_counts = set(counts.values())
|
|
assert len(unique_counts) == 1, (
|
|
f"Non-SDK backends returned different tool counts: {counts}. "
|
|
"This means backend-specific exclusions were accidentally added."
|
|
)
|
|
|
|
def test_claude_sdk_backend_has_fewer_tools_than_others(self):
|
|
"""claude_agent_sdk excludes shell/fs tools — it must return fewer tools."""
|
|
from pocketpaw.agents.tool_bridge import _instantiate_all_tools
|
|
|
|
sdk_count = len(_instantiate_all_tools(backend="claude_agent_sdk"))
|
|
openai_count = len(_instantiate_all_tools(backend="openai_agents"))
|
|
|
|
assert sdk_count < openai_count, (
|
|
f"Expected claude_agent_sdk ({sdk_count} tools) to have fewer tools than "
|
|
f"openai_agents ({openai_count} tools). "
|
|
"Shell/fs tools should be excluded from the SDK backend (provided natively)."
|
|
)
|