mirror of
https://github.com/pocketpaw/pocketpaw.git
synced 2026-05-20 08:49:49 +00:00
Updated Tests to use Settings(_env_file=None) to prevent leakage from local .env files, and added monkeypatching for POCKETPAW_ env vars to ensure consistent expected defaults across environments.
342 lines
12 KiB
Python
342 lines
12 KiB
Python
"""
|
|
Tests for Agent backends, protocol, and router.
|
|
|
|
Updated for multi-SDK architecture:
|
|
- ClaudeSDKBackend (was ClaudeAgentSDKWrapper)
|
|
- Registry-based routing
|
|
- Removed: ExecutorProtocol, OrchestratorProtocol, pocketpaw_native, open_interpreter
|
|
"""
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from pocketpaw.config import Settings
|
|
|
|
# =============================================================================
|
|
# PROTOCOL TESTS
|
|
# =============================================================================
|
|
|
|
|
|
class TestAgentProtocol:
|
|
"""Tests for the agent protocol module."""
|
|
|
|
def test_agent_event_creation(self):
|
|
from pocketpaw.agents.protocol import AgentEvent
|
|
|
|
event = AgentEvent(type="message", content="Hello")
|
|
assert event.type == "message"
|
|
assert event.content == "Hello"
|
|
assert event.metadata == {}
|
|
|
|
def test_agent_event_with_metadata(self):
|
|
from pocketpaw.agents.protocol import AgentEvent
|
|
|
|
event = AgentEvent(type="tool_use", content="Using Bash", metadata={"name": "Bash"})
|
|
assert event.metadata == {"name": "Bash"}
|
|
|
|
def test_agent_event_types(self):
|
|
from pocketpaw.agents.protocol import AgentEvent
|
|
|
|
types = ["message", "tool_use", "tool_result", "thinking", "error", "done"]
|
|
for event_type in types:
|
|
event = AgentEvent(type=event_type, content="test")
|
|
assert event.type == event_type
|
|
|
|
|
|
# =============================================================================
|
|
# CLAUDE AGENT SDK TESTS
|
|
# =============================================================================
|
|
|
|
|
|
class TestClaudeAgentSDK:
|
|
"""Tests for Claude Agent SDK backend."""
|
|
|
|
def test_sdk_class_importable(self):
|
|
from pocketpaw.agents.claude_sdk import ClaudeSDKBackend
|
|
|
|
assert ClaudeSDKBackend is not None
|
|
|
|
def test_backward_compat_aliases(self):
|
|
from pocketpaw.agents.claude_sdk import (
|
|
ClaudeAgentSDK,
|
|
ClaudeAgentSDKWrapper,
|
|
ClaudeSDKBackend,
|
|
)
|
|
|
|
assert ClaudeAgentSDK is ClaudeSDKBackend
|
|
assert ClaudeAgentSDKWrapper is ClaudeSDKBackend
|
|
|
|
def test_sdk_initializes_without_error(self):
|
|
from pocketpaw.agents.claude_sdk import ClaudeSDKBackend
|
|
|
|
settings = Settings()
|
|
backend = ClaudeSDKBackend(settings)
|
|
assert backend is not None
|
|
|
|
def test_info_static_method(self):
|
|
from pocketpaw.agents.claude_sdk import ClaudeSDKBackend
|
|
|
|
info = ClaudeSDKBackend.info()
|
|
assert info.name == "claude_agent_sdk"
|
|
assert info.display_name == "Claude Agent SDK"
|
|
assert "Bash" in info.builtin_tools
|
|
assert info.tool_policy_map["Bash"] == "shell"
|
|
|
|
def test_dangerous_pattern_detection(self):
|
|
from pocketpaw.agents.claude_sdk import ClaudeSDKBackend
|
|
|
|
settings = Settings()
|
|
sdk = ClaudeSDKBackend(settings)
|
|
|
|
assert sdk._is_dangerous_command("rm -rf /") is not None
|
|
assert sdk._is_dangerous_command("rm -rf ~") is not None
|
|
assert sdk._is_dangerous_command("sudo rm /important") is not None
|
|
assert sdk._is_dangerous_command("ls -la") is None
|
|
assert sdk._is_dangerous_command("cat file.txt") is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_dangerous_hook_fails_closed_on_exception(self):
|
|
"""Hook must block (not allow) commands when an internal error occurs.
|
|
|
|
Regression test for GH-852: a RuntimeError inside the hook previously
|
|
returned {} (allow). After the fix it must return a deny decision.
|
|
"""
|
|
from unittest.mock import patch
|
|
|
|
from pocketpaw.agents.claude_sdk import ClaudeSDKBackend
|
|
|
|
sdk = ClaudeSDKBackend(Settings())
|
|
|
|
input_data = {"tool_name": "Bash", "tool_input": {"command": "ls"}}
|
|
|
|
# Inject a RuntimeError into _is_dangerous_command so the outer
|
|
# except clause fires.
|
|
with patch.object(sdk, "_is_dangerous_command", side_effect=RuntimeError("boom")):
|
|
result = await sdk._block_dangerous_hook(input_data, None, None)
|
|
|
|
# Must deny, not allow
|
|
hook_output = result.get("hookSpecificOutput", {})
|
|
assert hook_output.get("permissionDecision") == "deny"
|
|
assert "internal error" in hook_output.get("permissionDecisionReason", "").lower()
|
|
|
|
def test_sdk_has_system_prompt(self):
|
|
from pocketpaw.agents.claude_sdk import _DEFAULT_IDENTITY
|
|
|
|
assert "PocketPaw" in _DEFAULT_IDENTITY
|
|
|
|
def test_default_instructions_exist(self):
|
|
from pocketpaw.bootstrap.default_provider import _DEFAULT_INSTRUCTIONS
|
|
|
|
assert len(_DEFAULT_INSTRUCTIONS) > 100
|
|
assert "PocketPaw Tools" in _DEFAULT_INSTRUCTIONS
|
|
|
|
def test_sdk_has_dangerous_patterns(self):
|
|
from pocketpaw.security.rails import DANGEROUS_SUBSTRINGS
|
|
|
|
assert isinstance(DANGEROUS_SUBSTRINGS, list)
|
|
assert "rm -rf /" in DANGEROUS_SUBSTRINGS
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_sdk_status(self):
|
|
from pocketpaw.agents.claude_sdk import ClaudeSDKBackend
|
|
|
|
sdk = ClaudeSDKBackend(Settings())
|
|
status = await sdk.get_status()
|
|
assert status["backend"] == "claude_agent_sdk"
|
|
assert "available" in status
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_sdk_stop(self):
|
|
from pocketpaw.agents.claude_sdk import ClaudeSDKBackend
|
|
|
|
sdk = ClaudeSDKBackend(Settings())
|
|
assert sdk._stop_flag is False
|
|
await sdk.stop()
|
|
assert sdk._stop_flag is True
|
|
|
|
def test_sdk_set_working_directory(self):
|
|
from pocketpaw.agents.claude_sdk import ClaudeSDKBackend
|
|
|
|
sdk = ClaudeSDKBackend(Settings())
|
|
new_path = Path("/tmp")
|
|
sdk.set_working_directory(new_path)
|
|
assert sdk._cwd == new_path
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_sdk_run_without_sdk_installed(self):
|
|
from pocketpaw.agents.claude_sdk import ClaudeSDKBackend
|
|
|
|
sdk = ClaudeSDKBackend(Settings())
|
|
sdk._sdk_available = False
|
|
|
|
events = []
|
|
async for event in sdk.run("test message"):
|
|
events.append(event)
|
|
|
|
assert len(events) == 1
|
|
assert events[0].type == "error"
|
|
assert "not found" in events[0].content.lower()
|
|
|
|
|
|
# =============================================================================
|
|
# ROUTER TESTS
|
|
# =============================================================================
|
|
|
|
|
|
class TestAgentRouter:
|
|
"""Tests for agent router (registry-based)."""
|
|
|
|
def test_router_importable(self):
|
|
from pocketpaw.agents.router import AgentRouter
|
|
|
|
assert AgentRouter is not None
|
|
|
|
def test_router_defaults_to_claude_agent_sdk(self, monkeypatch):
|
|
from pocketpaw.agents.claude_sdk import ClaudeSDKBackend
|
|
from pocketpaw.agents.router import AgentRouter
|
|
|
|
monkeypatch.delenv("POCKETPAW_AGENT_BACKEND", raising=False)
|
|
settings = Settings(_env_file=None)
|
|
router = AgentRouter(settings)
|
|
assert router._backend is not None
|
|
assert isinstance(router._backend, ClaudeSDKBackend)
|
|
|
|
def test_router_selects_claude_agent_sdk(self):
|
|
from pocketpaw.agents.claude_sdk import ClaudeSDKBackend
|
|
from pocketpaw.agents.router import AgentRouter
|
|
|
|
settings = Settings(agent_backend="claude_agent_sdk")
|
|
router = AgentRouter(settings)
|
|
assert isinstance(router._backend, ClaudeSDKBackend)
|
|
|
|
def test_router_legacy_pocketpaw_native_falls_back(self):
|
|
"""Legacy 'pocketpaw_native' should fall back to claude_agent_sdk."""
|
|
from pocketpaw.agents.claude_sdk import ClaudeSDKBackend
|
|
from pocketpaw.agents.router import AgentRouter
|
|
|
|
settings = Settings(agent_backend="pocketpaw_native")
|
|
router = AgentRouter(settings)
|
|
assert isinstance(router._backend, ClaudeSDKBackend)
|
|
|
|
def test_router_legacy_open_interpreter_falls_back(self):
|
|
"""Legacy 'open_interpreter' should fall back to claude_agent_sdk."""
|
|
from pocketpaw.agents.claude_sdk import ClaudeSDKBackend
|
|
from pocketpaw.agents.router import AgentRouter
|
|
|
|
settings = Settings(agent_backend="open_interpreter")
|
|
router = AgentRouter(settings)
|
|
assert isinstance(router._backend, ClaudeSDKBackend)
|
|
|
|
def test_router_falls_back_on_unknown(self):
|
|
from pocketpaw.agents.claude_sdk import ClaudeSDKBackend
|
|
from pocketpaw.agents.router import AgentRouter
|
|
|
|
settings = Settings(agent_backend="unknown_backend_xyz")
|
|
router = AgentRouter(settings)
|
|
assert isinstance(router._backend, ClaudeSDKBackend)
|
|
|
|
def test_router_get_backend_info(self, monkeypatch):
|
|
from pocketpaw.agents.router import AgentRouter
|
|
|
|
monkeypatch.delenv("POCKETPAW_AGENT_BACKEND", raising=False)
|
|
router = AgentRouter(Settings(_env_file=None))
|
|
info = router.get_backend_info()
|
|
assert info is not None
|
|
assert info.name == "claude_agent_sdk"
|
|
|
|
def test_router_get_backend_info_uses_fallback_backend(self):
|
|
"""When configured backend is invalid, info should reflect active fallback backend."""
|
|
from pocketpaw.agents.router import AgentRouter
|
|
|
|
router = AgentRouter(Settings(agent_backend="unknown_backend_xyz"))
|
|
info = router.get_backend_info()
|
|
assert info is not None
|
|
assert info.name == "claude_agent_sdk"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_router_has_run_method(self):
|
|
from pocketpaw.agents.router import AgentRouter
|
|
|
|
router = AgentRouter(Settings())
|
|
assert hasattr(router, "run")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_router_has_stop_method(self):
|
|
from pocketpaw.agents.router import AgentRouter
|
|
|
|
router = AgentRouter(Settings())
|
|
assert hasattr(router, "stop")
|
|
await router.stop()
|
|
|
|
|
|
# =============================================================================
|
|
# CLAUDE SDK CLI AUTH BUG — REPRODUCTION TESTS
|
|
# =============================================================================
|
|
|
|
|
|
class TestClaudeSDKCliAuth:
|
|
"""Bug reproduction: PocketPaw requires API key even when Claude CLI is authenticated."""
|
|
|
|
def test_auto_resolve_no_key_gives_ollama(self, monkeypatch):
|
|
from pocketpaw.llm.client import resolve_llm_client
|
|
|
|
monkeypatch.delenv("POCKETPAW_LLM_PROVIDER", raising=False)
|
|
settings = Settings(_env_file=None)
|
|
llm = resolve_llm_client(settings)
|
|
assert llm.provider == "ollama"
|
|
|
|
def test_force_anthropic_no_key_returns_anthropic(self, monkeypatch):
|
|
from pocketpaw.llm.client import resolve_llm_client
|
|
|
|
monkeypatch.delenv("POCKETPAW_LLM_PROVIDER", raising=False)
|
|
settings = Settings(_env_file=None)
|
|
llm = resolve_llm_client(settings, force_provider="anthropic")
|
|
assert llm.provider == "anthropic"
|
|
assert llm.api_key is None
|
|
|
|
def test_no_key_anthropic_to_sdk_env_is_empty(self):
|
|
from pocketpaw.llm.client import LLMClient
|
|
|
|
llm = LLMClient(
|
|
provider="anthropic",
|
|
model="claude-sonnet-4-5-20250929",
|
|
api_key=None,
|
|
ollama_host="http://localhost:11434",
|
|
)
|
|
assert llm.to_sdk_env() == {}
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_claude_sdk_run_resolves_anthropic_not_ollama(self, monkeypatch):
|
|
"""run() should resolve to anthropic, not fall to ollama."""
|
|
from unittest.mock import patch
|
|
|
|
from pocketpaw.agents.claude_sdk import ClaudeSDKBackend
|
|
from pocketpaw.llm.client import resolve_llm_client as real_resolve
|
|
|
|
monkeypatch.delenv("POCKETPAW_LLM_PROVIDER", raising=False)
|
|
settings = Settings(
|
|
_env_file=None, agent_backend="claude_agent_sdk", smart_routing_enabled=False
|
|
)
|
|
with patch("shutil.which", return_value="/usr/bin/claude"):
|
|
sdk = ClaudeSDKBackend(settings)
|
|
|
|
resolved_providers: list[str] = []
|
|
|
|
def spy_resolve(s, **kwargs):
|
|
result = real_resolve(s, **kwargs)
|
|
resolved_providers.append(result.provider)
|
|
return result
|
|
|
|
def stop_execution(**kwargs):
|
|
raise RuntimeError("test_stop_before_sdk_query")
|
|
|
|
with patch("pocketpaw.llm.client.resolve_llm_client", side_effect=spy_resolve):
|
|
sdk._ClaudeAgentOptions = stop_execution
|
|
events = []
|
|
async for event in sdk.run("test"):
|
|
events.append(event)
|
|
|
|
assert len(resolved_providers) > 0
|
|
assert resolved_providers[0] == "anthropic"
|