Add agent primitives: run_python, install_package, edit_file (#582)

* feat(tools): add run_python, install_package, and edit_file primitives

Three new builtin tools that close the gap between "agent that chats"
and "agent that builds things":

- run_python: sandboxed Python execution via subprocess (trust: elevated).
  Writes code to a temp file in the file jail, runs with sys.executable,
  Guardian AI reviews before execution, cleanup in finally block.

- install_package: pip install with input validation and Guardian review
  (trust: elevated). Whitelist regex blocks shell injection. Subprocess
  runs without shell=True for defense-in-depth.

- edit_file: find-and-replace file editing (trust: standard). Closes the
  gap where policy.py referenced edit_file in group:fs but no tool existed.
  Supports unique match (default) and replace_all modes.

Wiring changes:
- __init__.py: lazy imports for all three tools
- policy.py: run_python added to group:shell, new group:packages with
  install_package, coding profile includes group:packages
- tool_bridge.py: EditFileTool excluded for claude_agent_sdk (has native Edit)

31 new tests across 3 test files, all passing.

Closes #581

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* feat(tools): add deliver_artifact tool for sending files to users

Standalone tool that lets the agent explicitly send any file (image,
video, audio, PDF) to the user through their current channel. Bridges
the gap between _media_result() (tool-internal) and agent-accessible
file delivery.

Uses the existing media pipeline: _media_result() -> AgentLoop extraction
-> OutboundMessage.media -> channel adapter _send_media_file().

9 new tests, all passing.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: use get_running_loop, remove whitespace from regex, clean up em dashes

- Replace deprecated asyncio.get_event_loop() with get_running_loop()
  in python_exec.py and pip_install.py
- Remove \s from package spec regex (single package at a time, no spaces)
- Replace em dashes with hyphens/commas per project convention

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: Rohit Kushwaha <rohitk290106@gmail.com>
Co-authored-by: Rohit Kushwaha <technicalrohit06@gmail.com>
This commit is contained in:
Prakash Dalai
2026-03-16 21:36:34 +05:30
committed by GitHub
parent e5d564264a
commit 18dd74a3a4
12 changed files with 1168 additions and 8 deletions

View File

@@ -7,9 +7,12 @@ Provides:
- get_tool_instructions_compact(): compact markdown for system-prompt injection
Backend-aware exclusion:
- claude_agent_sdk: shell/fs tools excluded (provided natively by CLI)
- All other backends: shell/fs tools included via the bridge
- claude_agent_sdk: shell/fs/edit tools excluded (provided natively by CLI)
- All other backends: shell/fs/edit tools included via the bridge
- BrowserTool/DesktopTool: always excluded (need special session state)
Changes:
- 2026-03-12: Added EditFileTool to _CLAUDE_SDK_EXCLUDED (has native Edit)
"""
from __future__ import annotations
@@ -28,7 +31,9 @@ logger = logging.getLogger(__name__)
_ALWAYS_EXCLUDED = frozenset({"BrowserTool", "DesktopTool"})
# Tools excluded only for claude_agent_sdk -- these are provided natively by the CLI.
_CLAUDE_SDK_EXCLUDED = frozenset({"ShellTool", "ReadFileTool", "WriteFileTool", "ListDirTool"})
_CLAUDE_SDK_EXCLUDED = frozenset({
"ShellTool", "ReadFileTool", "WriteFileTool", "ListDirTool", "EditFileTool",
})
def _instantiate_all_tools(backend: str = "claude_agent_sdk") -> list[BaseTool]:

View File

@@ -7,6 +7,7 @@
# - 2026-02-09: Added STT, Drive, Docs, Spotify, OCR, Reddit tools
# - 2026-02-09: Converted to lazy __getattr__ to avoid ImportError when optional deps missing
# - 2026-02-17: Added HealthCheckTool, ErrorLogTool, ConfigDoctorTool for health engine
# - 2026-03-12: Added EditFileTool, RunPythonTool, InstallPackageTool (issue #581)
import importlib as _importlib
@@ -16,6 +17,7 @@ _LAZY_IMPORTS: dict[str, tuple[str, str]] = {
"ReadFileTool": (".filesystem", "ReadFileTool"),
"WriteFileTool": (".filesystem", "WriteFileTool"),
"ListDirTool": (".filesystem", "ListDirTool"),
"EditFileTool": (".filesystem", "EditFileTool"),
"BrowserTool": (".browser", "BrowserTool"),
"RememberTool": (".memory", "RememberTool"),
"RecallTool": (".memory", "RecallTool"),
@@ -67,6 +69,9 @@ _LAZY_IMPORTS: dict[str, tuple[str, str]] = {
"OpenExplorerTool": (".explorer", "OpenExplorerTool"),
"DirectoryTreeTool": (".tree", "DirectoryTreeTool"),
"SystemInfoTool": (".sysinfo", "SystemInfoTool"),
"RunPythonTool": (".python_exec", "RunPythonTool"),
"InstallPackageTool": (".pip_install", "InstallPackageTool"),
"DeliverArtifactTool": (".deliver", "DeliverArtifactTool"),
"DiscordCLITool": (".discord", "DiscordCLITool"),
}

View File

@@ -0,0 +1,79 @@
# Artifact delivery tool - sends files back to the user via their channel.
# Created: 2026-03-12
import mimetypes
from pathlib import Path
from typing import Any
from pocketpaw.config import get_settings
from pocketpaw.tools.fetch import is_safe_path
from pocketpaw.tools.protocol import BaseTool
class DeliverArtifactTool(BaseTool):
"""Send a file to the user through their current channel."""
@property
def name(self) -> str:
return "deliver_artifact"
@property
def description(self) -> str:
return (
"Send a file (image, video, audio, PDF, etc.) to the user through "
"their current channel. Use after creating or downloading a file "
"that the user should receive."
)
@property
def parameters(self) -> dict[str, Any]:
return {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Path to the file to deliver",
},
"caption": {
"type": "string",
"description": "Optional message to accompany the file",
"default": "",
},
},
"required": ["path"],
}
async def execute(self, path: str, caption: str = "") -> str:
"""Deliver a file to the user."""
try:
file_path = Path(path).expanduser().resolve()
# Security: check file jail
jail = get_settings().file_jail_path.resolve()
if not is_safe_path(file_path, jail):
return self._error(f"Access denied: {path} is outside allowed directory")
if not file_path.exists():
return self._error(f"File not found: {path}")
if not file_path.is_file():
return self._error(f"Not a file: {path}")
# Check file size (100MB limit)
size = file_path.stat().st_size
if size > 100 * 1024 * 1024:
return self._error(f"File too large: {size / (1024 * 1024):.1f}MB (max 100MB)")
# Detect MIME type for the caption
mime, _ = mimetypes.guess_type(str(file_path))
size_str = (
f"{size / (1024 * 1024):.1f}MB" if size > 1024 * 1024 else f"{size / 1024:.1f}KB"
)
info = f"Delivering {file_path.name} ({mime or 'unknown'}, {size_str})"
if caption:
return self._media_result(str(file_path), f"{caption}\n{info}")
return self._media_result(str(file_path), info)
except Exception as e:
return self._error(str(e))

View File

@@ -1,5 +1,6 @@
# Filesystem tools.
# Filesystem tools - ReadFileTool, WriteFileTool, ListDirTool, EditFileTool.
# Created: 2026-02-02
# Modified: 2026-03-12 - Added EditFileTool for find-and-replace file editing
from pathlib import Path
@@ -181,3 +182,95 @@ class ListDirTool(BaseTool):
except Exception as e:
return self._error(str(e))
class EditFileTool(BaseTool):
"""Edit a file by replacing an exact string match with new content."""
@property
def name(self) -> str:
return "edit_file"
@property
def description(self) -> str:
return (
"Edit a file by replacing an exact string match with new content. "
"The old_string must appear exactly once in the file for the edit to succeed, "
"unless replace_all is set to true."
)
@property
def trust_level(self) -> str:
return "standard"
@property
def parameters(self) -> dict[str, Any]:
return {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Path to the file to edit",
},
"old_string": {
"type": "string",
"description": "The exact text to find and replace",
},
"new_string": {
"type": "string",
"description": "The replacement text",
},
"replace_all": {
"type": "boolean",
"description": "Replace all occurrences instead of requiring uniqueness",
"default": False,
},
},
"required": ["path", "old_string", "new_string"],
}
async def execute(
self,
path: str,
old_string: str,
new_string: str,
replace_all: bool = False,
) -> str:
"""Edit a file by replacing old_string with new_string."""
try:
file_path = Path(path).expanduser().resolve()
# Security: check file jail
jail = get_settings().file_jail_path.resolve()
if not is_safe_path(file_path, jail):
return self._error(f"Access denied: {path} is outside allowed directory")
if not file_path.exists():
return self._error(f"File not found: {path}")
if not file_path.is_file():
return self._error(f"Not a file: {path}")
content = file_path.read_text(encoding="utf-8")
count = content.count(old_string)
if count == 0:
return self._error("old_string not found in file")
if not replace_all and count > 1:
return self._error(
f"old_string appears {count} times. Provide more context to make it "
f"unique, or set replace_all=true"
)
new_content = content.replace(old_string, new_string)
file_path.write_text(new_content, encoding="utf-8")
replacements = count if replace_all else 1
return f"Successfully made {replacements} replacement(s) in {path}"
except UnicodeDecodeError:
return self._error(f"Cannot read {path}: not a text file or wrong encoding")
except Exception as e:
return self._error(str(e))

View File

@@ -0,0 +1,117 @@
# Package installation tool - pip install with Guardian review.
# Created: 2026-03-12
import asyncio
import re
import subprocess
import sys
from typing import Any
from pocketpaw.security import get_guardian
from pocketpaw.tools.protocol import BaseTool
# Whitelist: only characters valid in a single pip package spec are allowed.
# Covers package names, extras (brackets), version specifiers, and version numbers.
# No whitespace: this tool installs one package at a time.
# Anything outside this set (semicolons, pipes, ampersands, backticks, dollar signs,
# parens, newlines, spaces) will fail the match and be rejected.
_VALID_PACKAGE_SPEC_RE = re.compile(r"^[a-zA-Z0-9_\-\.\[\],~>=<!]+$")
class InstallPackageTool(BaseTool):
"""Install a Python package using pip with Guardian review."""
def __init__(self, timeout: int = 300):
self.timeout = timeout
@property
def name(self) -> str:
return "install_package"
@property
def description(self) -> str:
return (
"Install a Python package using pip. Guardian AI reviews the package name "
"before installation to prevent typosquatting and malicious packages."
)
@property
def trust_level(self) -> str:
return "elevated"
@property
def parameters(self) -> dict[str, Any]:
return {
"type": "object",
"properties": {
"package": {
"type": "string",
"description": (
"Package name with optional version specifier "
'(e.g. "requests", "paw-ytp>=0.1.0")'
),
},
"upgrade": {
"type": "boolean",
"description": "Whether to use --upgrade flag",
"default": False,
},
},
"required": ["package"],
}
def _is_valid_package_spec(self, package: str) -> bool:
"""Return True if the package spec contains only safe, pip-legal characters."""
return bool(_VALID_PACKAGE_SPEC_RE.match(package))
async def execute(self, package: str, upgrade: bool = False) -> str:
"""Install a package via pip after Guardian review."""
# 1. Validate package spec, reject shell metacharacters
if not self._is_valid_package_spec(package):
return self._error(
f"Invalid package spec '{package}': contains disallowed characters. "
"Only alphanumeric characters, hyphens, underscores, dots, brackets, "
"and version specifiers are allowed."
)
# 2. Guardian AI review
pip_command = f"pip install {package}"
is_safe, reason = await get_guardian().check_command(pip_command)
if not is_safe:
return self._error(f"Package blocked by Guardian: {reason}")
# 3. Build the subprocess command
cmd = [
sys.executable,
"-m",
"pip",
"install",
package,
"--no-input",
"--disable-pip-version-check",
]
if upgrade:
cmd.append("--upgrade")
try:
result = await asyncio.get_running_loop().run_in_executor(
None,
lambda: subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=self.timeout,
),
)
if result.returncode != 0:
error_output = result.stderr.strip() or result.stdout.strip()
return self._error(f"pip exited with code {result.returncode}:\n{error_output}")
return result.stdout.strip() or "(pip produced no output)"
except subprocess.TimeoutExpired:
return self._error(f"pip install timed out after {self.timeout}s")
except Exception as e:
return self._error(str(e))

View File

@@ -0,0 +1,97 @@
# Python execution tool - sandboxed Python script runner.
# Created: 2026-03-12
import asyncio
import subprocess
import sys
import uuid
from typing import Any
from pocketpaw.config import get_settings
from pocketpaw.security import get_guardian
from pocketpaw.tools.protocol import BaseTool
class RunPythonTool(BaseTool):
"""Execute a Python script in a sandboxed subprocess."""
@property
def name(self) -> str:
return "run_python"
@property
def description(self) -> str:
return (
"Execute a Python script in a sandboxed subprocess and return its output. "
"Use for data processing, file generation, calculations, or running installed packages."
)
@property
def trust_level(self) -> str:
return "elevated"
@property
def parameters(self) -> dict[str, Any]:
return {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "Python code to execute",
},
"timeout": {
"type": "integer",
"description": "Execution timeout in seconds (default: 120)",
"default": 120,
},
},
"required": ["code"],
}
async def execute(self, code: str, timeout: int = 120) -> str: # type: ignore[override]
"""Execute Python code in a sandboxed subprocess."""
# Guardian AI check on the code before execution
is_safe, reason = await get_guardian().check_command(code)
if not is_safe:
return self._error(f"Code blocked by Guardian: {reason}")
jail_path = get_settings().file_jail_path
jail_path.mkdir(parents=True, exist_ok=True)
# Write code to a temp file in the jail so multiline scripts work cleanly
script_name = f"_pocketpaw_run_{uuid.uuid4().hex}.py"
script_path = jail_path / script_name
try:
script_path.write_text(code, encoding="utf-8")
result = await asyncio.get_running_loop().run_in_executor(
None,
lambda: subprocess.run(
[sys.executable, str(script_path)],
capture_output=True,
text=True,
timeout=timeout,
cwd=str(jail_path),
),
)
output = result.stdout
if result.stderr:
output += f"\nSTDERR:\n{result.stderr}"
if result.returncode != 0:
output += f"\nExit code: {result.returncode}"
return output.strip() or "(no output)"
except subprocess.TimeoutExpired:
return self._error(f"Python script timed out after {timeout}s")
except Exception as e:
return self._error(str(e))
finally:
# Always clean up the temp script file
if script_path.exists():
try:
script_path.unlink()
except Exception:
pass

View File

@@ -23,7 +23,8 @@ logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
TOOL_GROUPS: dict[str, list[str]] = {
"group:fs": ["read_file", "write_file", "edit_file", "list_dir", "directory_tree"],
"group:shell": ["shell"],
"group:shell": ["shell", "run_python"],
"group:packages": ["install_package"],
"group:browser": ["browser"],
"group:memory": ["remember", "recall", "forget"],
"group:desktop": ["desktop", "system_info"],
@@ -42,7 +43,7 @@ TOOL_GROUPS: dict[str, list[str]] = {
"spotify_playback",
"spotify_playlist",
],
"group:media": ["image_generate", "ocr"],
"group:media": ["image_generate", "ocr", "deliver_artifact"],
"group:translate": ["translate"],
"group:reddit": ["reddit_search", "reddit_read", "reddit_trending"],
"group:sessions": [
@@ -66,7 +67,7 @@ TOOL_PROFILES: dict[str, dict] = {
"allow": ["group:memory", "group:sessions", "group:explorer"],
},
"coding": {
"allow": ["group:fs", "group:shell", "group:memory", "group:explorer"],
"allow": ["group:fs", "group:shell", "group:packages", "group:memory", "group:explorer"],
},
"full": {}, # No restrictions — everything allowed
}

View File

@@ -0,0 +1,97 @@
# Tests for DeliverArtifactTool - file delivery to user channels.
# Created: 2026-03-12
from unittest.mock import patch
import pytest
from pocketpaw.config import Settings
from pocketpaw.tools.builtin.deliver import DeliverArtifactTool
@pytest.fixture
def mock_settings(tmp_path):
settings = Settings(file_jail_path=tmp_path)
with patch("pocketpaw.tools.builtin.deliver.get_settings", return_value=settings):
yield settings
@pytest.fixture
def tool():
return DeliverArtifactTool()
async def test_deliver_basic(tool, mock_settings, tmp_path):
"""Deliver an existing file returns media tag."""
f = tmp_path / "output.txt"
f.write_text("hello")
result = await tool.execute(path=str(f))
assert f"<!-- media:{f} -->" in result
assert "output.txt" in result
async def test_deliver_with_caption(tool, mock_settings, tmp_path):
"""Caption appears in the result."""
f = tmp_path / "chart.png"
f.write_bytes(b"\x89PNG" + b"\x00" * 100)
result = await tool.execute(path=str(f), caption="Here's your chart")
assert "Here's your chart" in result
assert f"<!-- media:{f} -->" in result
async def test_deliver_image_mime(tool, mock_settings, tmp_path):
"""Image files report correct mime type."""
f = tmp_path / "photo.jpg"
f.write_bytes(b"\xff\xd8\xff" + b"\x00" * 50)
result = await tool.execute(path=str(f))
assert "image/jpeg" in result
async def test_deliver_video_mime(tool, mock_settings, tmp_path):
"""Video files report correct mime type."""
f = tmp_path / "clip.mp4"
f.write_bytes(b"\x00" * 200)
result = await tool.execute(path=str(f))
assert "video/mp4" in result
async def test_deliver_file_not_found(tool, mock_settings, tmp_path):
"""Non-existent file returns error."""
result = await tool.execute(path=str(tmp_path / "nope.txt"))
assert "Error" in result
assert "not found" in result.lower()
async def test_deliver_not_a_file(tool, mock_settings, tmp_path):
"""Directory path returns error."""
d = tmp_path / "subdir"
d.mkdir()
result = await tool.execute(path=str(d))
assert "Error" in result
assert "Not a file" in result
async def test_deliver_file_jail(tool, mock_settings, tmp_path):
"""Path outside jail is blocked."""
result = await tool.execute(path="/etc/passwd")
assert "Error" in result
assert "Access denied" in result
async def test_deliver_size_info(tool, mock_settings, tmp_path):
"""Result includes file size information."""
f = tmp_path / "data.csv"
f.write_text("a,b,c\n" * 500)
result = await tool.execute(path=str(f))
assert "KB" in result or "MB" in result
assert "data.csv" in result
async def test_deliver_definition(tool):
"""Tool definition has correct metadata."""
defn = tool.definition
assert defn.name == "deliver_artifact"
assert defn.trust_level == "standard"
assert "path" in defn.parameters["properties"]
assert "caption" in defn.parameters["properties"]
assert "path" in defn.parameters["required"]

175
tests/test_edit_file.py Normal file
View File

@@ -0,0 +1,175 @@
# Tests for EditFileTool - find-and-replace file editing.
# Created: 2026-03-12
from unittest.mock import patch
import pytest
from pocketpaw.config import Settings
from pocketpaw.tools.builtin.filesystem import EditFileTool
@pytest.fixture
def jail(tmp_path):
"""Temporary directory used as the file jail."""
return tmp_path
@pytest.fixture
def mock_settings(jail):
"""Patch filesystem.get_settings to use the temp jail."""
settings = Settings(file_jail_path=jail)
with patch("pocketpaw.tools.builtin.filesystem.get_settings", return_value=settings):
yield settings
@pytest.mark.asyncio
async def test_edit_file_basic(jail, mock_settings):
"""Replace one occurrence, content should change."""
f = jail / "hello.txt"
f.write_text("Hello World")
tool = EditFileTool()
result = await tool.execute(path=str(f), old_string="World", new_string="PocketPaw")
assert "replacement" in result
assert f.read_text() == "Hello PocketPaw"
@pytest.mark.asyncio
async def test_edit_file_not_found(jail, mock_settings):
"""Editing a non-existent file returns a 'not found' error."""
tool = EditFileTool()
result = await tool.execute(
path=str(jail / "missing.txt"),
old_string="anything",
new_string="replacement",
)
assert "Error:" in result
assert "not found" in result.lower()
@pytest.mark.asyncio
async def test_edit_file_old_string_missing(jail, mock_settings):
"""old_string not in file returns an error."""
f = jail / "content.txt"
f.write_text("The quick brown fox")
tool = EditFileTool()
result = await tool.execute(path=str(f), old_string="lazy dog", new_string="cat")
assert "Error:" in result
assert "not found" in result
@pytest.mark.asyncio
async def test_edit_file_ambiguous(jail, mock_settings):
"""old_string appearing 3 times with replace_all=False returns an error mentioning the count."""
f = jail / "repeat.txt"
f.write_text("foo bar foo baz foo")
tool = EditFileTool()
result = await tool.execute(path=str(f), old_string="foo", new_string="qux", replace_all=False)
assert "Error:" in result
assert "3" in result
@pytest.mark.asyncio
async def test_edit_file_replace_all(jail, mock_settings):
"""replace_all=True replaces every occurrence."""
f = jail / "repeat.txt"
f.write_text("foo bar foo baz foo")
tool = EditFileTool()
result = await tool.execute(path=str(f), old_string="foo", new_string="qux", replace_all=True)
assert "Error:" not in result
assert f.read_text() == "qux bar qux baz qux"
assert "3" in result
@pytest.mark.asyncio
async def test_edit_file_multiline(jail, mock_settings):
"""Replace a multi-line block."""
original = "line one\nline two\nline three\n"
f = jail / "multi.txt"
f.write_text(original)
tool = EditFileTool()
result = await tool.execute(
path=str(f),
old_string="line one\nline two\n",
new_string="replaced block\n",
)
assert "Error:" not in result
assert f.read_text() == "replaced block\nline three\n"
@pytest.mark.asyncio
async def test_edit_file_empty_new_string(jail, mock_settings):
"""Replace with empty string effectively deletes the matched text."""
f = jail / "delete.txt"
f.write_text("keep this DELETE that")
tool = EditFileTool()
result = await tool.execute(path=str(f), old_string=" DELETE", new_string="")
assert "Error:" not in result
assert f.read_text() == "keep this that"
@pytest.mark.asyncio
async def test_edit_file_file_jail(jail, mock_settings):
"""Paths outside the jail are denied."""
outside = jail.parent / "outside_secret.txt"
outside.write_text("sensitive data")
tool = EditFileTool()
result = await tool.execute(
path=str(outside),
old_string="sensitive",
new_string="redacted",
)
assert "Access denied" in result
# File should be unchanged
assert outside.read_text() == "sensitive data"
@pytest.mark.asyncio
async def test_edit_file_preserves_rest(jail, mock_settings):
"""Editing one part of a file leaves the rest of the content intact."""
f = jail / "partial.txt"
f.write_text("alpha beta gamma delta")
tool = EditFileTool()
await tool.execute(path=str(f), old_string="beta", new_string="BETA")
content = f.read_text()
assert "alpha" in content
assert "BETA" in content
assert "gamma" in content
assert "delta" in content
assert "beta" not in content
@pytest.mark.asyncio
async def test_edit_file_definition(mock_settings):
"""Tool definition has correct name, trust level, and required parameters."""
tool = EditFileTool()
defn = tool.definition
assert defn.name == "edit_file"
assert defn.trust_level == "standard"
required = defn.parameters.get("required", [])
assert "path" in required
assert "old_string" in required
assert "new_string" in required
props = defn.parameters.get("properties", {})
assert "replace_all" in props
assert props["replace_all"]["type"] == "boolean"

View File

@@ -0,0 +1,271 @@
# Tests for InstallPackageTool - pip install with Guardian review.
# Created: 2026-03-12
import subprocess
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
@pytest.fixture
def mock_guardian():
"""Guardian that approves all commands by default."""
guardian = MagicMock()
guardian.check_command = AsyncMock(return_value=(True, "Looks safe"))
return guardian
@pytest.fixture
def successful_pip_result():
"""Subprocess result simulating a successful pip install."""
result = MagicMock()
result.returncode = 0
result.stdout = "Successfully installed requests-2.31.0"
result.stderr = ""
return result
# ---------------------------------------------------------------------------
# Happy path
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_install_package_basic(mock_guardian, successful_pip_result):
"""A basic install should return pip's stdout on success."""
from pocketpaw.tools.builtin.pip_install import InstallPackageTool
with (
patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=mock_guardian),
patch("subprocess.run", return_value=successful_pip_result),
):
tool = InstallPackageTool()
result = await tool.execute(package="requests")
assert "Successfully installed" in result
assert "Error" not in result
@pytest.mark.asyncio
async def test_install_package_with_version(mock_guardian):
"""Version specifier should be passed through to pip unchanged."""
from pocketpaw.tools.builtin.pip_install import InstallPackageTool
captured: list[list[str]] = []
def capture_run(cmd, **kwargs):
captured.append(cmd)
result = MagicMock()
result.returncode = 0
result.stdout = "Successfully installed requests-2.31.0"
result.stderr = ""
return result
with (
patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=mock_guardian),
patch("subprocess.run", side_effect=capture_run),
):
tool = InstallPackageTool()
await tool.execute(package="requests>=2.28.0")
assert len(captured) == 1
cmd = captured[0]
assert "requests>=2.28.0" in cmd
@pytest.mark.asyncio
async def test_install_package_with_extras(mock_guardian):
"""Bracket extras like pocketpaw[soul] should be allowed and forwarded to pip."""
from pocketpaw.tools.builtin.pip_install import InstallPackageTool
captured: list[list[str]] = []
def capture_run(cmd, **kwargs):
captured.append(cmd)
result = MagicMock()
result.returncode = 0
result.stdout = "Successfully installed pocketpaw-0.4.4"
result.stderr = ""
return result
with (
patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=mock_guardian),
patch("subprocess.run", side_effect=capture_run),
):
tool = InstallPackageTool()
result = await tool.execute(package="pocketpaw[soul]")
assert "Error" not in result
cmd = captured[0]
assert "pocketpaw[soul]" in cmd
@pytest.mark.asyncio
async def test_install_package_upgrade(mock_guardian):
"""upgrade=True should add --upgrade flag to the pip command."""
from pocketpaw.tools.builtin.pip_install import InstallPackageTool
captured: list[list[str]] = []
def capture_run(cmd, **kwargs):
captured.append(cmd)
result = MagicMock()
result.returncode = 0
result.stdout = "Successfully installed pip-24.0"
result.stderr = ""
return result
with (
patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=mock_guardian),
patch("subprocess.run", side_effect=capture_run),
):
tool = InstallPackageTool()
await tool.execute(package="pip", upgrade=True)
cmd = captured[0]
assert "--upgrade" in cmd
# ---------------------------------------------------------------------------
# Shell injection blocking
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_install_package_shell_injection_semicolon(mock_guardian):
"""Semicolons in the package name must be rejected before Guardian or pip runs."""
from pocketpaw.tools.builtin.pip_install import InstallPackageTool
with patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=mock_guardian):
tool = InstallPackageTool()
result = await tool.execute(package="foo; rm -rf /")
assert result.startswith("Error:")
# Guardian should never have been called since validation happens first
mock_guardian.check_command.assert_not_called()
@pytest.mark.asyncio
async def test_install_package_shell_injection_pipe(mock_guardian):
"""Pipes in the package name must be blocked by input validation."""
from pocketpaw.tools.builtin.pip_install import InstallPackageTool
with patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=mock_guardian):
tool = InstallPackageTool()
result = await tool.execute(package="foo | cat /etc/passwd")
assert result.startswith("Error:")
mock_guardian.check_command.assert_not_called()
@pytest.mark.asyncio
async def test_install_package_shell_injection_backtick(mock_guardian):
"""Backtick command substitution in the package name must be blocked."""
from pocketpaw.tools.builtin.pip_install import InstallPackageTool
with patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=mock_guardian):
tool = InstallPackageTool()
result = await tool.execute(package="foo`whoami`")
assert result.startswith("Error:")
mock_guardian.check_command.assert_not_called()
# ---------------------------------------------------------------------------
# Guardian blocking
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_install_package_guardian_block():
"""When Guardian flags a package, installation must be aborted."""
from pocketpaw.tools.builtin.pip_install import InstallPackageTool
blocking_guardian = MagicMock()
blocking_guardian.check_command = AsyncMock(
return_value=(False, "suspicious package, possible typosquatting")
)
with (
patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=blocking_guardian),
patch("subprocess.run") as mock_run,
):
tool = InstallPackageTool()
result = await tool.execute(package="reqeusts") # deliberate typo
assert result.startswith("Error:")
assert "Guardian" in result
# pip should never run if Guardian blocks
mock_run.assert_not_called()
# ---------------------------------------------------------------------------
# Error conditions
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_install_package_timeout(mock_guardian):
"""A subprocess timeout should be reported cleanly as an error."""
from pocketpaw.tools.builtin.pip_install import InstallPackageTool
def raise_timeout(cmd, **kwargs):
raise subprocess.TimeoutExpired(cmd=cmd, timeout=300)
with (
patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=mock_guardian),
patch("subprocess.run", side_effect=raise_timeout),
):
tool = InstallPackageTool(timeout=300)
result = await tool.execute(package="some-large-package")
assert result.startswith("Error:")
assert "timed out" in result
@pytest.mark.asyncio
async def test_install_package_pip_failure(mock_guardian):
"""A non-zero pip exit code should surface as an error with stderr content."""
from pocketpaw.tools.builtin.pip_install import InstallPackageTool
fail_result = MagicMock()
fail_result.returncode = 1
fail_result.stdout = ""
fail_result.stderr = "ERROR: Could not find a version that satisfies the requirement nosuchpkg"
with (
patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=mock_guardian),
patch("subprocess.run", return_value=fail_result),
):
tool = InstallPackageTool()
result = await tool.execute(package="nosuchpkg")
assert result.startswith("Error:")
assert "Could not find" in result
# ---------------------------------------------------------------------------
# Tool definition / metadata
# ---------------------------------------------------------------------------
def test_install_package_definition():
"""Tool definition should expose the correct name, trust level, and parameter schema."""
from pocketpaw.tools.builtin.pip_install import InstallPackageTool
tool = InstallPackageTool()
defn = tool.definition
assert defn.name == "install_package"
assert defn.trust_level == "elevated"
props = defn.parameters["properties"]
assert "package" in props
assert "upgrade" in props
required = defn.parameters["required"]
assert "package" in required
assert "upgrade" not in required
# upgrade should default to False
assert props["upgrade"]["default"] is False

220
tests/test_run_python.py Normal file
View File

@@ -0,0 +1,220 @@
# Tests for RunPythonTool - sandboxed Python execution.
# Created: 2026-03-12
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
@pytest.fixture
def mock_guardian_safe():
"""Return a guardian mock that approves all code."""
guardian = MagicMock()
guardian.check_command = AsyncMock(return_value=(True, ""))
return guardian
@pytest.fixture
def jail(tmp_path):
"""Return a real Path used as the file jail, pointing to tmp_path."""
return tmp_path
@pytest.fixture
def mock_settings(jail):
"""Return a settings mock with file_jail_path pointing to tmp_path."""
settings = MagicMock()
settings.file_jail_path = jail
return settings
# ---------------------------------------------------------------------------
# Happy-path tests
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_run_python_basic(mock_guardian_safe, mock_settings):
"""print('hello') should produce 'hello' in the output."""
from pocketpaw.tools.builtin.python_exec import RunPythonTool
with (
patch("pocketpaw.tools.builtin.python_exec.get_guardian", return_value=mock_guardian_safe),
patch("pocketpaw.tools.builtin.python_exec.get_settings", return_value=mock_settings),
):
tool = RunPythonTool()
result = await tool.execute(code='print("hello")')
assert "hello" in result
@pytest.mark.asyncio
async def test_run_python_multiline(mock_guardian_safe, mock_settings):
"""Multi-line script with stdlib import should run correctly."""
from pocketpaw.tools.builtin.python_exec import RunPythonTool
code = "import math\nresult = math.sqrt(9)\nprint(f'sqrt={result}')"
with (
patch("pocketpaw.tools.builtin.python_exec.get_guardian", return_value=mock_guardian_safe),
patch("pocketpaw.tools.builtin.python_exec.get_settings", return_value=mock_settings),
):
tool = RunPythonTool()
result = await tool.execute(code=code)
assert "sqrt=3.0" in result
@pytest.mark.asyncio
async def test_run_python_stderr(mock_guardian_safe, mock_settings):
"""Code that writes to stderr should have STDERR section in output."""
from pocketpaw.tools.builtin.python_exec import RunPythonTool
code = "import sys\nsys.stderr.write('boom\\n')"
with (
patch("pocketpaw.tools.builtin.python_exec.get_guardian", return_value=mock_guardian_safe),
patch("pocketpaw.tools.builtin.python_exec.get_settings", return_value=mock_settings),
):
tool = RunPythonTool()
result = await tool.execute(code=code)
assert "STDERR" in result
assert "boom" in result
@pytest.mark.asyncio
async def test_run_python_exit_code(mock_guardian_safe, mock_settings):
"""sys.exit(1) should surface 'Exit code: 1' in the output."""
from pocketpaw.tools.builtin.python_exec import RunPythonTool
code = "import sys\nsys.exit(1)"
with (
patch("pocketpaw.tools.builtin.python_exec.get_guardian", return_value=mock_guardian_safe),
patch("pocketpaw.tools.builtin.python_exec.get_settings", return_value=mock_settings),
):
tool = RunPythonTool()
result = await tool.execute(code=code)
assert "Exit code: 1" in result
@pytest.mark.asyncio
async def test_run_python_timeout(mock_guardian_safe, mock_settings):
"""Infinite loop with timeout=1 should return a timed-out error."""
from pocketpaw.tools.builtin.python_exec import RunPythonTool
code = "while True: pass"
with (
patch("pocketpaw.tools.builtin.python_exec.get_guardian", return_value=mock_guardian_safe),
patch("pocketpaw.tools.builtin.python_exec.get_settings", return_value=mock_settings),
):
tool = RunPythonTool()
result = await tool.execute(code=code, timeout=1)
assert "timed out" in result.lower()
@pytest.mark.asyncio
async def test_run_python_syntax_error(mock_guardian_safe, mock_settings):
"""Invalid Python should produce a SyntaxError in stderr."""
from pocketpaw.tools.builtin.python_exec import RunPythonTool
code = "def broken(:" # deliberate syntax error
with (
patch("pocketpaw.tools.builtin.python_exec.get_guardian", return_value=mock_guardian_safe),
patch("pocketpaw.tools.builtin.python_exec.get_settings", return_value=mock_settings),
):
tool = RunPythonTool()
result = await tool.execute(code=code)
# Python writes SyntaxError to stderr and exits non-zero
assert "SyntaxError" in result or "Error" in result
# ---------------------------------------------------------------------------
# Security tests
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_run_python_guardian_block(mock_settings):
"""Guardian returning (False, 'blocked') should prevent execution."""
from pocketpaw.tools.builtin.python_exec import RunPythonTool
blocking_guardian = MagicMock()
blocking_guardian.check_command = AsyncMock(return_value=(False, "blocked by policy"))
with (
patch("pocketpaw.tools.builtin.python_exec.get_guardian", return_value=blocking_guardian),
patch("pocketpaw.tools.builtin.python_exec.get_settings", return_value=mock_settings),
):
tool = RunPythonTool()
result = await tool.execute(code='print("hello")')
assert "blocked" in result.lower()
# ---------------------------------------------------------------------------
# File system / isolation tests
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_run_python_file_creation(mock_guardian_safe, mock_settings, jail):
"""Script that creates a file in cwd should leave that file in the jail."""
from pocketpaw.tools.builtin.python_exec import RunPythonTool
code = "with open('output.txt', 'w') as f:\n f.write('created')"
with (
patch("pocketpaw.tools.builtin.python_exec.get_guardian", return_value=mock_guardian_safe),
patch("pocketpaw.tools.builtin.python_exec.get_settings", return_value=mock_settings),
):
tool = RunPythonTool()
await tool.execute(code=code)
assert (jail / "output.txt").exists()
assert (jail / "output.txt").read_text() == "created"
@pytest.mark.asyncio
async def test_run_python_cleanup(mock_guardian_safe, mock_settings, jail):
"""Temp script file should be removed after execution completes."""
from pocketpaw.tools.builtin.python_exec import RunPythonTool
with (
patch("pocketpaw.tools.builtin.python_exec.get_guardian", return_value=mock_guardian_safe),
patch("pocketpaw.tools.builtin.python_exec.get_settings", return_value=mock_settings),
):
tool = RunPythonTool()
await tool.execute(code='print("cleanup test")')
# No _pocketpaw_run_*.py files should remain
leftover = list(jail.glob("_pocketpaw_run_*.py"))
assert leftover == [], f"Temp script files not cleaned up: {leftover}"
# ---------------------------------------------------------------------------
# Definition / metadata tests
# ---------------------------------------------------------------------------
def test_run_python_definition():
"""Tool definition should have correct name, trust level, and parameters."""
from pocketpaw.tools.builtin.python_exec import RunPythonTool
tool = RunPythonTool()
defn = tool.definition
assert defn.name == "run_python"
assert defn.trust_level == "elevated"
props = defn.parameters["properties"]
assert "code" in props
assert "timeout" in props
assert "code" in defn.parameters["required"]
assert "timeout" not in defn.parameters.get("required", [])

View File

@@ -65,7 +65,7 @@ class TestGroupExpansion:
def test_expand_multiple_groups(self):
result = ToolPolicy._expand_names(["group:shell", "group:memory"])
assert result == {"shell", "remember", "recall", "forget"}
assert result == {"shell", "run_python", "remember", "recall", "forget"}
def test_expand_mixed_groups_and_names(self):
result = ToolPolicy._expand_names(["group:memory", "custom_tool"])