mirror of
https://github.com/pocketpaw/pocketpaw.git
synced 2026-05-13 21:21:53 +00:00
* fix(codex_cli): increase subprocess buffer limit to prevent chunk parsing errors (#601) The asyncio StreamReader default buffer of 64 KiB was too small for large NDJSON events from Codex CLI (e.g., Playwright MCP tool results). Increased to 10 MiB and added graceful handling for LimitOverrunError. * fix(codex_cli): properly handle buffer overrun with error event and cleanup The LimitOverrunError/IncompleteReadError handler was silently swallowing the error without yielding any event to the caller or cleaning up self._process. Now yields an error + done event and resets process state.
877 lines
31 KiB
Python
877 lines
31 KiB
Python
"""Tests for Codex CLI backend — mocked (no real CLI needed)."""
|
|
|
|
import sys
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from pocketpaw.agents.backend import Capability
|
|
from pocketpaw.config import Settings
|
|
|
|
# On Windows the backend uses create_subprocess_shell; elsewhere create_subprocess_exec
|
|
_SUBPROCESS_PATCH = (
|
|
"asyncio.create_subprocess_shell"
|
|
if sys.platform == "win32"
|
|
else "asyncio.create_subprocess_exec"
|
|
)
|
|
|
|
|
|
class TestCodexCLIInfo:
|
|
def test_info_static(self):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
info = CodexCLIBackend.info()
|
|
assert info.name == "codex_cli"
|
|
assert info.display_name == "Codex CLI"
|
|
assert Capability.STREAMING in info.capabilities
|
|
assert Capability.TOOLS in info.capabilities
|
|
assert Capability.MCP in info.capabilities
|
|
assert Capability.MULTI_TURN in info.capabilities
|
|
assert Capability.CUSTOM_SYSTEM_PROMPT in info.capabilities
|
|
assert "shell" in info.builtin_tools
|
|
assert "web_search" in info.builtin_tools
|
|
|
|
def test_tool_policy_map(self):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
info = CodexCLIBackend.info()
|
|
assert info.tool_policy_map["shell"] == "shell"
|
|
assert info.tool_policy_map["file_edit"] == "write_file"
|
|
assert info.tool_policy_map["web_search"] == "browser"
|
|
|
|
def test_required_keys(self):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
info = CodexCLIBackend.info()
|
|
assert "openai_api_key" in info.required_keys
|
|
assert "openai" in info.supported_providers
|
|
|
|
|
|
class TestCodexCLIInit:
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
def test_init(self, mock_which):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
assert backend._cli_available is True
|
|
|
|
@patch("shutil.which", return_value=None)
|
|
def test_init_without_cli(self, mock_which):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
assert backend._cli_available is False
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value=None)
|
|
async def test_run_without_cli(self, mock_which):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
events = []
|
|
async for event in backend.run("test"):
|
|
events.append(event)
|
|
|
|
assert any(e.type == "error" for e in events)
|
|
assert any("not found" in e.content for e in events if e.type == "error")
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_stop(self, mock_which):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
await backend.stop()
|
|
assert backend._stop_flag is True
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_get_status(self, mock_which):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
status = await backend.get_status()
|
|
assert status["backend"] == "codex_cli"
|
|
assert status["cli_available"] is True
|
|
assert "model" in status
|
|
|
|
|
|
class TestCodexCLIHelpers:
|
|
def test_inject_history(self):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
history = [
|
|
{"role": "user", "content": "Hello"},
|
|
{"role": "assistant", "content": "Hi!"},
|
|
]
|
|
result = CodexCLIBackend._inject_history("Base prompt.", history)
|
|
assert "Base prompt." in result
|
|
assert "# Recent Conversation" in result
|
|
assert "**User**: Hello" in result
|
|
assert "**Assistant**: Hi!" in result
|
|
|
|
def test_inject_history_truncates(self):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
long_msg = "x" * 600
|
|
history = [{"role": "user", "content": long_msg}]
|
|
result = CodexCLIBackend._inject_history("Base.", history)
|
|
assert "x" * 500 + "..." in result
|
|
assert "x" * 501 not in result
|
|
|
|
|
|
class _AsyncLineIterator:
|
|
"""Helper that simulates async line iteration over bytes."""
|
|
|
|
def __init__(self, lines: list[str]):
|
|
self._lines = [(line + "\n").encode("utf-8") for line in lines]
|
|
self._index = 0
|
|
|
|
def __aiter__(self):
|
|
return self
|
|
|
|
async def __anext__(self):
|
|
if self._index >= len(self._lines):
|
|
raise StopAsyncIteration
|
|
line = self._lines[self._index]
|
|
self._index += 1
|
|
return line
|
|
|
|
|
|
def _ev(data: dict) -> str:
|
|
"""Serialize a dict to a compact JSON string for mock stdout."""
|
|
import json
|
|
|
|
return json.dumps(data, separators=(",", ":"))
|
|
|
|
|
|
def _make_mock_process(stdout_lines: list[str], returncode: int = 0) -> MagicMock:
|
|
"""Create a mock subprocess with given stdout lines."""
|
|
mock_proc = MagicMock()
|
|
mock_proc.returncode = None
|
|
mock_proc.stdout = _AsyncLineIterator(stdout_lines)
|
|
mock_proc.stderr = AsyncMock()
|
|
mock_proc.stderr.read = AsyncMock(return_value=b"")
|
|
|
|
# Mock stdin (prompt is now piped via stdin)
|
|
mock_stdin = MagicMock()
|
|
mock_stdin.written = bytearray()
|
|
mock_stdin.write = lambda data: mock_stdin.written.extend(data)
|
|
mock_stdin.drain = AsyncMock()
|
|
mock_stdin.close = MagicMock()
|
|
mock_stdin.wait_closed = AsyncMock()
|
|
mock_proc.stdin = mock_stdin
|
|
|
|
async def mock_wait():
|
|
mock_proc.returncode = returncode
|
|
|
|
mock_proc.wait = mock_wait
|
|
return mock_proc
|
|
|
|
|
|
class TestCodexCLIRun:
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_parses_agent_message(self, mock_which):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
item = {"id": "item_1", "type": "agent_message", "text": "Hello from Codex!"}
|
|
mock_proc = _make_mock_process(
|
|
[
|
|
_ev({"type": "item.completed", "item": item}),
|
|
]
|
|
)
|
|
|
|
with patch(_SUBPROCESS_PATCH, return_value=mock_proc):
|
|
events = []
|
|
async for event in backend.run("Hi"):
|
|
events.append(event)
|
|
|
|
messages = [e for e in events if e.type == "message"]
|
|
assert len(messages) == 1
|
|
assert messages[0].content == "Hello from Codex!"
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_parses_command_execution_started(self, mock_which):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
item = {
|
|
"id": "item_1",
|
|
"type": "command_execution",
|
|
"command": "bash -lc ls",
|
|
"status": "in_progress",
|
|
}
|
|
mock_proc = _make_mock_process(
|
|
[
|
|
_ev({"type": "item.started", "item": item}),
|
|
]
|
|
)
|
|
|
|
with patch(_SUBPROCESS_PATCH, return_value=mock_proc):
|
|
events = []
|
|
async for event in backend.run("list files"):
|
|
events.append(event)
|
|
|
|
tool_events = [e for e in events if e.type == "tool_use"]
|
|
assert len(tool_events) == 1
|
|
assert tool_events[0].metadata["name"] == "shell"
|
|
assert "ls" in tool_events[0].metadata["input"]["command"]
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_parses_command_execution_completed(self, mock_which):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
item = {
|
|
"id": "item_1",
|
|
"type": "command_execution",
|
|
"output": "file1.txt\nfile2.txt",
|
|
}
|
|
mock_proc = _make_mock_process(
|
|
[
|
|
_ev({"type": "item.completed", "item": item}),
|
|
]
|
|
)
|
|
|
|
with patch(_SUBPROCESS_PATCH, return_value=mock_proc):
|
|
events = []
|
|
async for event in backend.run("list files"):
|
|
events.append(event)
|
|
|
|
results = [e for e in events if e.type == "tool_result"]
|
|
assert len(results) == 1
|
|
assert results[0].metadata["name"] == "shell"
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_parses_file_change_started(self, mock_which):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
item = {
|
|
"id": "item_2",
|
|
"type": "file_change",
|
|
"filename": "main.py",
|
|
"status": "in_progress",
|
|
}
|
|
mock_proc = _make_mock_process(
|
|
[
|
|
_ev({"type": "item.started", "item": item}),
|
|
]
|
|
)
|
|
|
|
with patch(_SUBPROCESS_PATCH, return_value=mock_proc):
|
|
events = []
|
|
async for event in backend.run("edit file"):
|
|
events.append(event)
|
|
|
|
tool_events = [e for e in events if e.type == "tool_use"]
|
|
assert len(tool_events) == 1
|
|
assert tool_events[0].metadata["name"] == "file_edit"
|
|
assert "main.py" in tool_events[0].content
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_parses_file_change_completed(self, mock_which):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
item = {"id": "item_2", "type": "file_change", "filename": "main.py"}
|
|
mock_proc = _make_mock_process(
|
|
[
|
|
_ev({"type": "item.completed", "item": item}),
|
|
]
|
|
)
|
|
|
|
with patch(_SUBPROCESS_PATCH, return_value=mock_proc):
|
|
events = []
|
|
async for event in backend.run("edit file"):
|
|
events.append(event)
|
|
|
|
results = [e for e in events if e.type == "tool_result"]
|
|
assert len(results) == 1
|
|
assert "main.py" in results[0].content
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_parses_web_search(self, mock_which):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
started = {"id": "item_3", "type": "web_search", "query": "python asyncio"}
|
|
completed = {"id": "item_3", "type": "web_search", "output": "Results found"}
|
|
mock_proc = _make_mock_process(
|
|
[
|
|
_ev({"type": "item.started", "item": started}),
|
|
_ev({"type": "item.completed", "item": completed}),
|
|
]
|
|
)
|
|
|
|
with patch(_SUBPROCESS_PATCH, return_value=mock_proc):
|
|
events = []
|
|
async for event in backend.run("search"):
|
|
events.append(event)
|
|
|
|
tool_use = [e for e in events if e.type == "tool_use"]
|
|
tool_result = [e for e in events if e.type == "tool_result"]
|
|
assert len(tool_use) == 1
|
|
assert "asyncio" in tool_use[0].content
|
|
assert len(tool_result) == 1
|
|
assert tool_result[0].metadata["name"] == "web_search"
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_parses_mcp_tool_call(self, mock_which):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
started = {
|
|
"id": "item_4",
|
|
"type": "mcp_tool_call",
|
|
"name": "my_tool",
|
|
"arguments": {"key": "val"},
|
|
}
|
|
completed = {
|
|
"id": "item_4",
|
|
"type": "mcp_tool_call",
|
|
"name": "my_tool",
|
|
"output": "done",
|
|
}
|
|
mock_proc = _make_mock_process(
|
|
[
|
|
_ev({"type": "item.started", "item": started}),
|
|
_ev({"type": "item.completed", "item": completed}),
|
|
]
|
|
)
|
|
|
|
with patch(_SUBPROCESS_PATCH, return_value=mock_proc):
|
|
events = []
|
|
async for event in backend.run("use mcp"):
|
|
events.append(event)
|
|
|
|
tool_use = [e for e in events if e.type == "tool_use"]
|
|
tool_result = [e for e in events if e.type == "tool_result"]
|
|
assert len(tool_use) == 1
|
|
assert tool_use[0].metadata["name"] == "my_tool"
|
|
assert tool_use[0].metadata["input"] == {"key": "val"}
|
|
assert len(tool_result) == 1
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_parses_reasoning(self, mock_which):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
item = {"id": "item_5", "type": "reasoning", "text": "Thinking about this..."}
|
|
mock_proc = _make_mock_process(
|
|
[
|
|
_ev({"type": "item.completed", "item": item}),
|
|
]
|
|
)
|
|
|
|
with patch(_SUBPROCESS_PATCH, return_value=mock_proc):
|
|
events = []
|
|
async for event in backend.run("think"):
|
|
events.append(event)
|
|
|
|
thinking = [e for e in events if e.type == "thinking"]
|
|
assert len(thinking) == 1
|
|
assert "Thinking" in thinking[0].content
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_parses_turn_completed_usage(self, mock_which):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
usage = {
|
|
"input_tokens": 100,
|
|
"cached_input_tokens": 50,
|
|
"output_tokens": 25,
|
|
}
|
|
mock_proc = _make_mock_process(
|
|
[
|
|
_ev({"type": "turn.completed", "usage": usage}),
|
|
]
|
|
)
|
|
|
|
with patch(_SUBPROCESS_PATCH, return_value=mock_proc):
|
|
events = []
|
|
async for event in backend.run("test"):
|
|
events.append(event)
|
|
|
|
usage_evts = [e for e in events if e.type == "token_usage"]
|
|
assert len(usage_evts) == 1
|
|
assert usage_evts[0].metadata["input_tokens"] == 100
|
|
assert usage_evts[0].metadata["output_tokens"] == 25
|
|
assert usage_evts[0].metadata["cached_input_tokens"] == 50
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_handles_error_event(self, mock_which):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
mock_proc = _make_mock_process(
|
|
[
|
|
_ev({"type": "error", "message": "Rate limit exceeded"}),
|
|
]
|
|
)
|
|
|
|
with patch(_SUBPROCESS_PATCH, return_value=mock_proc):
|
|
events = []
|
|
async for event in backend.run("test"):
|
|
events.append(event)
|
|
|
|
errors = [e for e in events if e.type == "error"]
|
|
assert len(errors) == 1
|
|
assert "Rate limit" in errors[0].content
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_handles_turn_failed(self, mock_which):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
mock_proc = _make_mock_process(
|
|
[
|
|
_ev({"type": "turn.failed", "message": "Model overloaded"}),
|
|
]
|
|
)
|
|
|
|
with patch(_SUBPROCESS_PATCH, return_value=mock_proc):
|
|
events = []
|
|
async for event in backend.run("test"):
|
|
events.append(event)
|
|
|
|
errors = [e for e in events if e.type == "error"]
|
|
assert len(errors) == 1
|
|
assert "overloaded" in errors[0].content.lower()
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_handles_process_failure(self, mock_which):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
mock_proc = _make_mock_process([], returncode=1)
|
|
mock_proc.stderr = AsyncMock()
|
|
mock_proc.stderr.read = AsyncMock(return_value=b"fatal error")
|
|
|
|
with patch(_SUBPROCESS_PATCH, return_value=mock_proc):
|
|
events = []
|
|
async for event in backend.run("test"):
|
|
events.append(event)
|
|
|
|
errors = [e for e in events if e.type == "error"]
|
|
assert len(errors) >= 1
|
|
assert any("error" in e.content.lower() for e in errors)
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_skips_invalid_json(self, mock_which):
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
item = {"id": "item_1", "type": "agent_message", "text": "OK"}
|
|
mock_proc = _make_mock_process(
|
|
[
|
|
"not valid json",
|
|
_ev({"type": "item.completed", "item": item}),
|
|
]
|
|
)
|
|
|
|
with patch(_SUBPROCESS_PATCH, return_value=mock_proc):
|
|
events = []
|
|
async for event in backend.run("test"):
|
|
events.append(event)
|
|
|
|
messages = [e for e in events if e.type == "message"]
|
|
assert len(messages) == 1
|
|
assert messages[0].content == "OK"
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_full_conversation_flow(self, mock_which):
|
|
"""End-to-end: thread start -> command -> message -> usage -> done."""
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
cmd_item = {
|
|
"id": "i1",
|
|
"type": "command_execution",
|
|
"command": "bash -lc ls",
|
|
"status": "in_progress",
|
|
}
|
|
cmd_done = {"id": "i1", "type": "command_execution", "output": "README.md"}
|
|
msg_item = {"id": "i2", "type": "agent_message", "text": "Has a README."}
|
|
usage = {
|
|
"input_tokens": 500,
|
|
"cached_input_tokens": 400,
|
|
"output_tokens": 50,
|
|
}
|
|
mock_proc = _make_mock_process(
|
|
[
|
|
_ev({"type": "thread.started", "thread_id": "abc-123"}),
|
|
_ev({"type": "turn.started"}),
|
|
_ev({"type": "item.started", "item": cmd_item}),
|
|
_ev({"type": "item.completed", "item": cmd_done}),
|
|
_ev({"type": "item.completed", "item": msg_item}),
|
|
_ev({"type": "turn.completed", "usage": usage}),
|
|
]
|
|
)
|
|
|
|
with patch(_SUBPROCESS_PATCH, return_value=mock_proc):
|
|
events = []
|
|
async for event in backend.run("summarize"):
|
|
events.append(event)
|
|
|
|
types = [e.type for e in events]
|
|
assert "tool_use" in types
|
|
assert "tool_result" in types
|
|
assert "message" in types
|
|
assert "token_usage" in types
|
|
assert types[-1] == "done"
|
|
|
|
|
|
class TestCodexCLICrossBackend:
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_history_seeded_on_new_session(self, mock_which):
|
|
"""History is injected into prompt for context portability."""
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
|
|
captured_proc = None
|
|
|
|
async def capture_exec(*args, **kwargs):
|
|
nonlocal captured_proc
|
|
captured_proc = _make_mock_process([])
|
|
return captured_proc
|
|
|
|
history = [
|
|
{"role": "user", "content": "From previous backend"},
|
|
{"role": "assistant", "content": "I remember that context"},
|
|
]
|
|
|
|
with patch(_SUBPROCESS_PATCH, side_effect=capture_exec):
|
|
async for _ in backend.run(
|
|
"Continue our chat",
|
|
system_prompt="You are PocketPaw.",
|
|
history=history,
|
|
session_key="s1",
|
|
):
|
|
pass
|
|
|
|
assert captured_proc is not None
|
|
# Prompt is now piped via stdin
|
|
prompt_value = captured_proc.stdin.written.decode("utf-8")
|
|
assert "Recent Conversation" in prompt_value
|
|
assert "From previous backend" in prompt_value
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_history_not_injected_when_empty(self, mock_which):
|
|
"""No history section when history is empty."""
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
|
|
captured_proc = None
|
|
|
|
async def capture_exec(*args, **kwargs):
|
|
nonlocal captured_proc
|
|
captured_proc = _make_mock_process([])
|
|
return captured_proc
|
|
|
|
with patch(_SUBPROCESS_PATCH, side_effect=capture_exec):
|
|
async for _ in backend.run(
|
|
"Hello",
|
|
system_prompt="You are PocketPaw.",
|
|
session_key="s1",
|
|
):
|
|
pass
|
|
|
|
assert captured_proc is not None
|
|
prompt_value = captured_proc.stdin.written.decode("utf-8")
|
|
assert "Recent Conversation" not in prompt_value
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_system_prompt_injected(self, mock_which):
|
|
"""System prompt is passed via model_instructions_file temp file."""
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
|
|
captured_cmd = None
|
|
captured_proc = None
|
|
|
|
async def capture_exec(*args, **kwargs):
|
|
nonlocal captured_cmd, captured_proc
|
|
captured_cmd = args
|
|
captured_proc = _make_mock_process([])
|
|
return captured_proc
|
|
|
|
with patch(_SUBPROCESS_PATCH, side_effect=capture_exec):
|
|
async for _ in backend.run(
|
|
"Hello",
|
|
system_prompt="You are a helpful assistant.",
|
|
session_key="s1",
|
|
):
|
|
pass
|
|
|
|
assert captured_proc is not None
|
|
assert captured_cmd is not None
|
|
|
|
# System prompt is passed via -c model_instructions_file=<path>, not stdin
|
|
if sys.platform == "win32":
|
|
cmd_str = captured_cmd[0]
|
|
assert "model_instructions_file=" in cmd_str
|
|
else:
|
|
cmd_list = list(captured_cmd)
|
|
instructions_args = [a for a in cmd_list if "model_instructions_file=" in a]
|
|
assert instructions_args, "Expected model_instructions_file in command args"
|
|
|
|
# Stdin should contain only the user message, not the system prompt
|
|
prompt_value = captured_proc.stdin.written.decode("utf-8")
|
|
assert "Hello" in prompt_value
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_uses_codex_exec_json_full_auto(self, mock_which):
|
|
"""Verify the subprocess command includes exec --json --full-auto."""
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
|
|
captured_cmd = None
|
|
|
|
async def capture_exec(*args, **kwargs):
|
|
nonlocal captured_cmd
|
|
captured_cmd = args
|
|
return _make_mock_process([])
|
|
|
|
with patch(_SUBPROCESS_PATCH, side_effect=capture_exec):
|
|
async for _ in backend.run("test"):
|
|
pass
|
|
|
|
assert captured_cmd is not None
|
|
if sys.platform == "win32":
|
|
# On Windows, create_subprocess_shell receives a single string
|
|
cmd_str = captured_cmd[0]
|
|
# Ensure "codex" appears as the binary, not as part of a model name
|
|
assert cmd_str.split()[0].endswith("codex")
|
|
assert "exec" in cmd_str
|
|
assert "--json" in cmd_str
|
|
assert "--full-auto" in cmd_str
|
|
assert "--model" in cmd_str
|
|
else:
|
|
cmd_list = list(captured_cmd)
|
|
assert "codex" in cmd_list[0]
|
|
assert cmd_list[1] == "exec"
|
|
assert "--json" in cmd_list
|
|
assert "--full-auto" in cmd_list
|
|
assert "--model" in cmd_list
|
|
assert "-" in cmd_list # prompt read from stdin
|
|
|
|
|
|
class TestCodexCLIValidation:
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_rejects_malicious_model_name(self, mock_which):
|
|
"""Model names with shell metacharacters are rejected."""
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
settings = Settings()
|
|
settings.codex_cli_model = 'gpt-4" & dir'
|
|
backend = CodexCLIBackend(settings)
|
|
events = []
|
|
async for event in backend.run("test"):
|
|
events.append(event)
|
|
|
|
errors = [e for e in events if e.type == "error"]
|
|
assert len(errors) == 1
|
|
assert "Invalid model name" in errors[0].content
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_accepts_valid_model_names(self, mock_which):
|
|
"""Standard model names pass validation."""
|
|
from pocketpaw.agents.codex_cli import _MODEL_NAME_RE
|
|
|
|
valid_names = [
|
|
"gpt-5.3-codex",
|
|
"gpt-4o",
|
|
"o3-mini",
|
|
"claude-3.5-sonnet",
|
|
"my_custom:latest",
|
|
]
|
|
for name in valid_names:
|
|
assert _MODEL_NAME_RE.match(name), f"{name!r} should be valid"
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_rejects_invalid_model_names(self, mock_which):
|
|
"""Model names with dangerous characters are rejected."""
|
|
from pocketpaw.agents.codex_cli import _MODEL_NAME_RE
|
|
|
|
invalid_names = [
|
|
'gpt-4" & dir',
|
|
"model; rm -rf /",
|
|
"model$(whoami)",
|
|
"model`id`",
|
|
"model name with spaces",
|
|
]
|
|
for name in invalid_names:
|
|
assert not _MODEL_NAME_RE.match(name), f"{name!r} should be invalid"
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_broken_pipe_handling(self, mock_which):
|
|
"""BrokenPipeError when Codex CLI crashes before reading stdin."""
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
|
|
mock_proc = MagicMock()
|
|
mock_proc.returncode = None
|
|
mock_proc.stdout = _AsyncLineIterator([])
|
|
mock_proc.stderr = AsyncMock()
|
|
mock_proc.stderr.read = AsyncMock(return_value=b"segfault")
|
|
|
|
mock_stdin = MagicMock()
|
|
mock_stdin.write = MagicMock(side_effect=BrokenPipeError("broken"))
|
|
mock_stdin.drain = AsyncMock()
|
|
mock_stdin.close = MagicMock()
|
|
mock_stdin.wait_closed = AsyncMock()
|
|
mock_proc.stdin = mock_stdin
|
|
|
|
with patch(_SUBPROCESS_PATCH, return_value=mock_proc):
|
|
events = []
|
|
async for event in backend.run("test"):
|
|
events.append(event)
|
|
|
|
errors = [e for e in events if e.type == "error"]
|
|
assert len(errors) == 1
|
|
assert "exited before reading" in errors[0].content
|
|
assert "segfault" in errors[0].content
|
|
|
|
|
|
class TestCodexCLIBufferLimit:
|
|
def test_buffer_limit_constant(self):
|
|
from pocketpaw.agents.codex_cli import _SUBPROCESS_BUFFER_LIMIT
|
|
|
|
# Must be larger than the asyncio default of 64 KiB
|
|
assert _SUBPROCESS_BUFFER_LIMIT > 65536
|
|
assert _SUBPROCESS_BUFFER_LIMIT == 10 * 1024 * 1024
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_subprocess_receives_buffer_limit(self, mock_which):
|
|
"""Verify create_subprocess passes the increased buffer limit."""
|
|
from pocketpaw.agents.codex_cli import _SUBPROCESS_BUFFER_LIMIT, CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
captured_kwargs = {}
|
|
|
|
async def capture_exec(*args, **kwargs):
|
|
captured_kwargs.update(kwargs)
|
|
return _make_mock_process([])
|
|
|
|
with patch(_SUBPROCESS_PATCH, side_effect=capture_exec):
|
|
async for _ in backend.run("test"):
|
|
pass
|
|
|
|
assert "limit" in captured_kwargs
|
|
assert captured_kwargs["limit"] == _SUBPROCESS_BUFFER_LIMIT
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_handles_large_mcp_output(self, mock_which):
|
|
"""Large MCP tool results (>64 KiB) should be parsed without error."""
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
# Simulate a large MCP tool result (100 KiB of content)
|
|
large_output = "x" * (100 * 1024)
|
|
item = {
|
|
"id": "item_mcp",
|
|
"type": "mcp_tool_call",
|
|
"name": "playwright_snapshot",
|
|
"output": large_output,
|
|
}
|
|
mock_proc = _make_mock_process(
|
|
[
|
|
_ev({"type": "item.completed", "item": item}),
|
|
]
|
|
)
|
|
|
|
with patch(_SUBPROCESS_PATCH, return_value=mock_proc):
|
|
events = []
|
|
async for event in backend.run("browse page"):
|
|
events.append(event)
|
|
|
|
results = [e for e in events if e.type == "tool_result"]
|
|
assert len(results) == 1
|
|
assert results[0].metadata["name"] == "playwright_snapshot"
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("shutil.which", return_value="/usr/bin/codex")
|
|
async def test_limit_overrun_recovers_gracefully(self, mock_which):
|
|
"""When output exceeds even the increased limit, the session continues."""
|
|
import asyncio as _asyncio
|
|
|
|
from pocketpaw.agents.codex_cli import CodexCLIBackend
|
|
|
|
backend = CodexCLIBackend(Settings())
|
|
|
|
class _OverrunIterator:
|
|
def __aiter__(self):
|
|
return self
|
|
|
|
async def __anext__(self):
|
|
raise _asyncio.LimitOverrunError("chunk is longer than limit", 0)
|
|
|
|
mock_proc = _make_mock_process([])
|
|
mock_proc.stdout = _OverrunIterator()
|
|
|
|
with patch(_SUBPROCESS_PATCH, return_value=mock_proc):
|
|
events = []
|
|
async for event in backend.run("test"):
|
|
events.append(event)
|
|
|
|
# Should not crash; yields error + done instead of raising
|
|
error_events = [e for e in events if e.type == "error"]
|
|
assert len(error_events) == 1
|
|
assert "buffer limit" in error_events[0].content
|
|
assert events[-1].type == "done"
|
|
|
|
|
|
class TestCodexCLIRegistry:
|
|
def test_registered_in_backend_registry(self):
|
|
from pocketpaw.agents.registry import get_backend_class
|
|
|
|
cls = get_backend_class("codex_cli")
|
|
assert cls is not None
|
|
assert cls.__name__ == "CodexCLIBackend"
|
|
|
|
def test_backend_info_via_registry(self):
|
|
from pocketpaw.agents.registry import get_backend_info
|
|
|
|
info = get_backend_info("codex_cli")
|
|
assert info is not None
|
|
assert info.name == "codex_cli"
|
|
assert info.display_name == "Codex CLI"
|
|
|
|
def test_listed_in_backends(self):
|
|
from pocketpaw.agents.registry import list_backends
|
|
|
|
backends = list_backends()
|
|
assert "codex_cli" in backends
|