Add agent primitives: run_python, install_package, edit_file (#582)

* feat(tools): add run_python, install_package, and edit_file primitives Three new builtin tools that close the gap between "agent that chats" and "agent that builds things": - run_python: sandboxed Python execution via subprocess (trust: elevated). Writes code to a temp file in the file jail, runs with sys.executable, Guardian AI reviews before execution, cleanup in finally block. - install_package: pip install with input validation and Guardian review (trust: elevated). Whitelist regex blocks shell injection. Subprocess runs without shell=True for defense-in-depth. - edit_file: find-and-replace file editing (trust: standard). Closes the gap where policy.py referenced edit_file in group:fs but no tool existed. Supports unique match (default) and replace_all modes. Wiring changes: - __init__.py: lazy imports for all three tools - policy.py: run_python added to group:shell, new group:packages with install_package, coding profile includes group:packages - tool_bridge.py: EditFileTool excluded for claude_agent_sdk (has native Edit) 31 new tests across 3 test files, all passing. Closes #581 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat(tools): add deliver_artifact tool for sending files to users Standalone tool that lets the agent explicitly send any file (image, video, audio, PDF) to the user through their current channel. Bridges the gap between _media_result() (tool-internal) and agent-accessible file delivery. Uses the existing media pipeline: _media_result() -> AgentLoop extraction -> OutboundMessage.media -> channel adapter _send_media_file(). 9 new tests, all passing. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: use get_running_loop, remove whitespace from regex, clean up em dashes - Replace deprecated asyncio.get_event_loop() with get_running_loop() in python_exec.py and pip_install.py - Remove \s from package spec regex (single package at a time, no spaces) - Replace em dashes with hyphens/commas per project convention --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Rohit Kushwaha <rohitk290106@gmail.com> Co-authored-by: Rohit Kushwaha <technicalrohit06@gmail.com>
2026-05-13 21:21:53 +00:00 · 2026-03-16 21:36:34 +05:30
parent e5d564264a
commit 18dd74a3a4
12 changed files with 1168 additions and 8 deletions
--- a/src/pocketpaw/agents/tool_bridge.py
+++ b/src/pocketpaw/agents/tool_bridge.py
@@ -7,9 +7,12 @@ Provides:
 - get_tool_instructions_compact(): compact markdown for system-prompt injection

 Backend-aware exclusion:
- claude_agent_sdk: shell/fs tools excluded (provided natively by CLI)
- All other backends: shell/fs tools included via the bridge
+- claude_agent_sdk: shell/fs/edit tools excluded (provided natively by CLI)
+- All other backends: shell/fs/edit tools included via the bridge
 - BrowserTool/DesktopTool: always excluded (need special session state)
+
+Changes:
+- 2026-03-12: Added EditFileTool to _CLAUDE_SDK_EXCLUDED (has native Edit)
 """

 from __future__ import annotations
@@ -28,7 +31,9 @@ logger = logging.getLogger(__name__)
 _ALWAYS_EXCLUDED = frozenset({"BrowserTool", "DesktopTool"})

 # Tools excluded only for claude_agent_sdk -- these are provided natively by the CLI.
-_CLAUDE_SDK_EXCLUDED = frozenset({"ShellTool", "ReadFileTool", "WriteFileTool", "ListDirTool"})
+_CLAUDE_SDK_EXCLUDED = frozenset({
+    "ShellTool", "ReadFileTool", "WriteFileTool", "ListDirTool", "EditFileTool",
+})


 def _instantiate_all_tools(backend: str = "claude_agent_sdk") -> list[BaseTool]:
--- a/src/pocketpaw/tools/builtin/init.py
+++ b/src/pocketpaw/tools/builtin/init.py
@@ -7,6 +7,7 @@
 #   - 2026-02-09: Added STT, Drive, Docs, Spotify, OCR, Reddit tools
 #   - 2026-02-09: Converted to lazy __getattr__ to avoid ImportError when optional deps missing
 #   - 2026-02-17: Added HealthCheckTool, ErrorLogTool, ConfigDoctorTool for health engine
+#   - 2026-03-12: Added EditFileTool, RunPythonTool, InstallPackageTool (issue #581)

 import importlib as _importlib

@@ -16,6 +17,7 @@ _LAZY_IMPORTS: dict[str, tuple[str, str]] = {
    "ReadFileTool": (".filesystem", "ReadFileTool"),
    "WriteFileTool": (".filesystem", "WriteFileTool"),
    "ListDirTool": (".filesystem", "ListDirTool"),
+    "EditFileTool": (".filesystem", "EditFileTool"),
    "BrowserTool": (".browser", "BrowserTool"),
    "RememberTool": (".memory", "RememberTool"),
    "RecallTool": (".memory", "RecallTool"),
@@ -67,6 +69,9 @@ _LAZY_IMPORTS: dict[str, tuple[str, str]] = {
    "OpenExplorerTool": (".explorer", "OpenExplorerTool"),
    "DirectoryTreeTool": (".tree", "DirectoryTreeTool"),
    "SystemInfoTool": (".sysinfo", "SystemInfoTool"),
+    "RunPythonTool": (".python_exec", "RunPythonTool"),
+    "InstallPackageTool": (".pip_install", "InstallPackageTool"),
+    "DeliverArtifactTool": (".deliver", "DeliverArtifactTool"),
    "DiscordCLITool": (".discord", "DiscordCLITool"),
 }

--- a/src/pocketpaw/tools/builtin/deliver.py
+++ b/src/pocketpaw/tools/builtin/deliver.py
@@ -0,0 +1,79 @@
+# Artifact delivery tool - sends files back to the user via their channel.
+# Created: 2026-03-12
+
+import mimetypes
+from pathlib import Path
+from typing import Any
+
+from pocketpaw.config import get_settings
+from pocketpaw.tools.fetch import is_safe_path
+from pocketpaw.tools.protocol import BaseTool
+
+
+class DeliverArtifactTool(BaseTool):
+    """Send a file to the user through their current channel."""
+
+    @property
+    def name(self) -> str:
+        return "deliver_artifact"
+
+    @property
+    def description(self) -> str:
+        return (
+            "Send a file (image, video, audio, PDF, etc.) to the user through "
+            "their current channel. Use after creating or downloading a file "
+            "that the user should receive."
+        )
+
+    @property
+    def parameters(self) -> dict[str, Any]:
+        return {
+            "type": "object",
+            "properties": {
+                "path": {
+                    "type": "string",
+                    "description": "Path to the file to deliver",
+                },
+                "caption": {
+                    "type": "string",
+                    "description": "Optional message to accompany the file",
+                    "default": "",
+                },
+            },
+            "required": ["path"],
+        }
+
+    async def execute(self, path: str, caption: str = "") -> str:
+        """Deliver a file to the user."""
+        try:
+            file_path = Path(path).expanduser().resolve()
+
+            # Security: check file jail
+            jail = get_settings().file_jail_path.resolve()
+            if not is_safe_path(file_path, jail):
+                return self._error(f"Access denied: {path} is outside allowed directory")
+
+            if not file_path.exists():
+                return self._error(f"File not found: {path}")
+
+            if not file_path.is_file():
+                return self._error(f"Not a file: {path}")
+
+            # Check file size (100MB limit)
+            size = file_path.stat().st_size
+            if size > 100 * 1024 * 1024:
+                return self._error(f"File too large: {size / (1024 * 1024):.1f}MB (max 100MB)")
+
+            # Detect MIME type for the caption
+            mime, _ = mimetypes.guess_type(str(file_path))
+            size_str = (
+                f"{size / (1024 * 1024):.1f}MB" if size > 1024 * 1024 else f"{size / 1024:.1f}KB"
+            )
+            info = f"Delivering {file_path.name} ({mime or 'unknown'}, {size_str})"
+
+            if caption:
+                return self._media_result(str(file_path), f"{caption}\n{info}")
+            return self._media_result(str(file_path), info)
+
+        except Exception as e:
+            return self._error(str(e))
--- a/src/pocketpaw/tools/builtin/filesystem.py
+++ b/src/pocketpaw/tools/builtin/filesystem.py
@@ -1,5 +1,6 @@
-# Filesystem tools.
+# Filesystem tools - ReadFileTool, WriteFileTool, ListDirTool, EditFileTool.
 # Created: 2026-02-02
+# Modified: 2026-03-12 - Added EditFileTool for find-and-replace file editing


 from pathlib import Path
@@ -181,3 +182,95 @@ class ListDirTool(BaseTool):

        except Exception as e:
            return self._error(str(e))
+
+
+class EditFileTool(BaseTool):
+    """Edit a file by replacing an exact string match with new content."""
+
+    @property
+    def name(self) -> str:
+        return "edit_file"
+
+    @property
+    def description(self) -> str:
+        return (
+            "Edit a file by replacing an exact string match with new content. "
+            "The old_string must appear exactly once in the file for the edit to succeed, "
+            "unless replace_all is set to true."
+        )
+
+    @property
+    def trust_level(self) -> str:
+        return "standard"
+
+    @property
+    def parameters(self) -> dict[str, Any]:
+        return {
+            "type": "object",
+            "properties": {
+                "path": {
+                    "type": "string",
+                    "description": "Path to the file to edit",
+                },
+                "old_string": {
+                    "type": "string",
+                    "description": "The exact text to find and replace",
+                },
+                "new_string": {
+                    "type": "string",
+                    "description": "The replacement text",
+                },
+                "replace_all": {
+                    "type": "boolean",
+                    "description": "Replace all occurrences instead of requiring uniqueness",
+                    "default": False,
+                },
+            },
+            "required": ["path", "old_string", "new_string"],
+        }
+
+    async def execute(
+        self,
+        path: str,
+        old_string: str,
+        new_string: str,
+        replace_all: bool = False,
+    ) -> str:
+        """Edit a file by replacing old_string with new_string."""
+        try:
+            file_path = Path(path).expanduser().resolve()
+
+            # Security: check file jail
+            jail = get_settings().file_jail_path.resolve()
+            if not is_safe_path(file_path, jail):
+                return self._error(f"Access denied: {path} is outside allowed directory")
+
+            if not file_path.exists():
+                return self._error(f"File not found: {path}")
+
+            if not file_path.is_file():
+                return self._error(f"Not a file: {path}")
+
+            content = file_path.read_text(encoding="utf-8")
+
+            count = content.count(old_string)
+
+            if count == 0:
+                return self._error("old_string not found in file")
+
+            if not replace_all and count > 1:
+                return self._error(
+                    f"old_string appears {count} times. Provide more context to make it "
+                    f"unique, or set replace_all=true"
+                )
+
+            new_content = content.replace(old_string, new_string)
+            file_path.write_text(new_content, encoding="utf-8")
+
+            replacements = count if replace_all else 1
+            return f"Successfully made {replacements} replacement(s) in {path}"
+
+        except UnicodeDecodeError:
+            return self._error(f"Cannot read {path}: not a text file or wrong encoding")
+        except Exception as e:
+            return self._error(str(e))
--- a/src/pocketpaw/tools/builtin/pip_install.py
+++ b/src/pocketpaw/tools/builtin/pip_install.py
@@ -0,0 +1,117 @@
+# Package installation tool - pip install with Guardian review.
+# Created: 2026-03-12
+
+import asyncio
+import re
+import subprocess
+import sys
+from typing import Any
+
+from pocketpaw.security import get_guardian
+from pocketpaw.tools.protocol import BaseTool
+
+# Whitelist: only characters valid in a single pip package spec are allowed.
+# Covers package names, extras (brackets), version specifiers, and version numbers.
+# No whitespace: this tool installs one package at a time.
+# Anything outside this set (semicolons, pipes, ampersands, backticks, dollar signs,
+# parens, newlines, spaces) will fail the match and be rejected.
+_VALID_PACKAGE_SPEC_RE = re.compile(r"^[a-zA-Z0-9_\-\.\[\],~>=<!]+$")
+
+
+class InstallPackageTool(BaseTool):
+    """Install a Python package using pip with Guardian review."""
+
+    def __init__(self, timeout: int = 300):
+        self.timeout = timeout
+
+    @property
+    def name(self) -> str:
+        return "install_package"
+
+    @property
+    def description(self) -> str:
+        return (
+            "Install a Python package using pip. Guardian AI reviews the package name "
+            "before installation to prevent typosquatting and malicious packages."
+        )
+
+    @property
+    def trust_level(self) -> str:
+        return "elevated"
+
+    @property
+    def parameters(self) -> dict[str, Any]:
+        return {
+            "type": "object",
+            "properties": {
+                "package": {
+                    "type": "string",
+                    "description": (
+                        "Package name with optional version specifier "
+                        '(e.g. "requests", "paw-ytp>=0.1.0")'
+                    ),
+                },
+                "upgrade": {
+                    "type": "boolean",
+                    "description": "Whether to use --upgrade flag",
+                    "default": False,
+                },
+            },
+            "required": ["package"],
+        }
+
+    def _is_valid_package_spec(self, package: str) -> bool:
+        """Return True if the package spec contains only safe, pip-legal characters."""
+        return bool(_VALID_PACKAGE_SPEC_RE.match(package))
+
+    async def execute(self, package: str, upgrade: bool = False) -> str:
+        """Install a package via pip after Guardian review."""
+
+        # 1. Validate package spec, reject shell metacharacters
+        if not self._is_valid_package_spec(package):
+            return self._error(
+                f"Invalid package spec '{package}': contains disallowed characters. "
+                "Only alphanumeric characters, hyphens, underscores, dots, brackets, "
+                "and version specifiers are allowed."
+            )
+
+        # 2. Guardian AI review
+        pip_command = f"pip install {package}"
+        is_safe, reason = await get_guardian().check_command(pip_command)
+        if not is_safe:
+            return self._error(f"Package blocked by Guardian: {reason}")
+
+        # 3. Build the subprocess command
+        cmd = [
+            sys.executable,
+            "-m",
+            "pip",
+            "install",
+            package,
+            "--no-input",
+            "--disable-pip-version-check",
+        ]
+        if upgrade:
+            cmd.append("--upgrade")
+
+        try:
+            result = await asyncio.get_running_loop().run_in_executor(
+                None,
+                lambda: subprocess.run(
+                    cmd,
+                    capture_output=True,
+                    text=True,
+                    timeout=self.timeout,
+                ),
+            )
+
+            if result.returncode != 0:
+                error_output = result.stderr.strip() or result.stdout.strip()
+                return self._error(f"pip exited with code {result.returncode}:\n{error_output}")
+
+            return result.stdout.strip() or "(pip produced no output)"
+
+        except subprocess.TimeoutExpired:
+            return self._error(f"pip install timed out after {self.timeout}s")
+        except Exception as e:
+            return self._error(str(e))
--- a/src/pocketpaw/tools/builtin/python_exec.py
+++ b/src/pocketpaw/tools/builtin/python_exec.py
@@ -0,0 +1,97 @@
+# Python execution tool - sandboxed Python script runner.
+# Created: 2026-03-12
+
+import asyncio
+import subprocess
+import sys
+import uuid
+from typing import Any
+
+from pocketpaw.config import get_settings
+from pocketpaw.security import get_guardian
+from pocketpaw.tools.protocol import BaseTool
+
+
+class RunPythonTool(BaseTool):
+    """Execute a Python script in a sandboxed subprocess."""
+
+    @property
+    def name(self) -> str:
+        return "run_python"
+
+    @property
+    def description(self) -> str:
+        return (
+            "Execute a Python script in a sandboxed subprocess and return its output. "
+            "Use for data processing, file generation, calculations, or running installed packages."
+        )
+
+    @property
+    def trust_level(self) -> str:
+        return "elevated"
+
+    @property
+    def parameters(self) -> dict[str, Any]:
+        return {
+            "type": "object",
+            "properties": {
+                "code": {
+                    "type": "string",
+                    "description": "Python code to execute",
+                },
+                "timeout": {
+                    "type": "integer",
+                    "description": "Execution timeout in seconds (default: 120)",
+                    "default": 120,
+                },
+            },
+            "required": ["code"],
+        }
+
+    async def execute(self, code: str, timeout: int = 120) -> str:  # type: ignore[override]
+        """Execute Python code in a sandboxed subprocess."""
+        # Guardian AI check on the code before execution
+        is_safe, reason = await get_guardian().check_command(code)
+        if not is_safe:
+            return self._error(f"Code blocked by Guardian: {reason}")
+
+        jail_path = get_settings().file_jail_path
+        jail_path.mkdir(parents=True, exist_ok=True)
+
+        # Write code to a temp file in the jail so multiline scripts work cleanly
+        script_name = f"_pocketpaw_run_{uuid.uuid4().hex}.py"
+        script_path = jail_path / script_name
+
+        try:
+            script_path.write_text(code, encoding="utf-8")
+
+            result = await asyncio.get_running_loop().run_in_executor(
+                None,
+                lambda: subprocess.run(
+                    [sys.executable, str(script_path)],
+                    capture_output=True,
+                    text=True,
+                    timeout=timeout,
+                    cwd=str(jail_path),
+                ),
+            )
+
+            output = result.stdout
+            if result.stderr:
+                output += f"\nSTDERR:\n{result.stderr}"
+            if result.returncode != 0:
+                output += f"\nExit code: {result.returncode}"
+
+            return output.strip() or "(no output)"
+
+        except subprocess.TimeoutExpired:
+            return self._error(f"Python script timed out after {timeout}s")
+        except Exception as e:
+            return self._error(str(e))
+        finally:
+            # Always clean up the temp script file
+            if script_path.exists():
+                try:
+                    script_path.unlink()
+                except Exception:
+                    pass
--- a/src/pocketpaw/tools/policy.py
+++ b/src/pocketpaw/tools/policy.py
@@ -23,7 +23,8 @@ logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
 TOOL_GROUPS: dict[str, list[str]] = {
    "group:fs": ["read_file", "write_file", "edit_file", "list_dir", "directory_tree"],
-    "group:shell": ["shell"],
+    "group:shell": ["shell", "run_python"],
+    "group:packages": ["install_package"],
    "group:browser": ["browser"],
    "group:memory": ["remember", "recall", "forget"],
    "group:desktop": ["desktop", "system_info"],
@@ -42,7 +43,7 @@ TOOL_GROUPS: dict[str, list[str]] = {
        "spotify_playback",
        "spotify_playlist",
    ],
-    "group:media": ["image_generate", "ocr"],
+    "group:media": ["image_generate", "ocr", "deliver_artifact"],
    "group:translate": ["translate"],
    "group:reddit": ["reddit_search", "reddit_read", "reddit_trending"],
    "group:sessions": [
@@ -66,7 +67,7 @@ TOOL_PROFILES: dict[str, dict] = {
        "allow": ["group:memory", "group:sessions", "group:explorer"],
    },
    "coding": {
-        "allow": ["group:fs", "group:shell", "group:memory", "group:explorer"],
+        "allow": ["group:fs", "group:shell", "group:packages", "group:memory", "group:explorer"],
    },
    "full": {},  # No restrictions — everything allowed
 }
--- a/tests/test_deliver_artifact.py
+++ b/tests/test_deliver_artifact.py
@@ -0,0 +1,97 @@
+# Tests for DeliverArtifactTool - file delivery to user channels.
+# Created: 2026-03-12
+
+from unittest.mock import patch
+
+import pytest
+
+from pocketpaw.config import Settings
+from pocketpaw.tools.builtin.deliver import DeliverArtifactTool
+
+
+@pytest.fixture
+def mock_settings(tmp_path):
+    settings = Settings(file_jail_path=tmp_path)
+    with patch("pocketpaw.tools.builtin.deliver.get_settings", return_value=settings):
+        yield settings
+
+
+@pytest.fixture
+def tool():
+    return DeliverArtifactTool()
+
+
+async def test_deliver_basic(tool, mock_settings, tmp_path):
+    """Deliver an existing file returns media tag."""
+    f = tmp_path / "output.txt"
+    f.write_text("hello")
+    result = await tool.execute(path=str(f))
+    assert f"<!-- media:{f} -->" in result
+    assert "output.txt" in result
+
+
+async def test_deliver_with_caption(tool, mock_settings, tmp_path):
+    """Caption appears in the result."""
+    f = tmp_path / "chart.png"
+    f.write_bytes(b"\x89PNG" + b"\x00" * 100)
+    result = await tool.execute(path=str(f), caption="Here's your chart")
+    assert "Here's your chart" in result
+    assert f"<!-- media:{f} -->" in result
+
+
+async def test_deliver_image_mime(tool, mock_settings, tmp_path):
+    """Image files report correct mime type."""
+    f = tmp_path / "photo.jpg"
+    f.write_bytes(b"\xff\xd8\xff" + b"\x00" * 50)
+    result = await tool.execute(path=str(f))
+    assert "image/jpeg" in result
+
+
+async def test_deliver_video_mime(tool, mock_settings, tmp_path):
+    """Video files report correct mime type."""
+    f = tmp_path / "clip.mp4"
+    f.write_bytes(b"\x00" * 200)
+    result = await tool.execute(path=str(f))
+    assert "video/mp4" in result
+
+
+async def test_deliver_file_not_found(tool, mock_settings, tmp_path):
+    """Non-existent file returns error."""
+    result = await tool.execute(path=str(tmp_path / "nope.txt"))
+    assert "Error" in result
+    assert "not found" in result.lower()
+
+
+async def test_deliver_not_a_file(tool, mock_settings, tmp_path):
+    """Directory path returns error."""
+    d = tmp_path / "subdir"
+    d.mkdir()
+    result = await tool.execute(path=str(d))
+    assert "Error" in result
+    assert "Not a file" in result
+
+
+async def test_deliver_file_jail(tool, mock_settings, tmp_path):
+    """Path outside jail is blocked."""
+    result = await tool.execute(path="/etc/passwd")
+    assert "Error" in result
+    assert "Access denied" in result
+
+
+async def test_deliver_size_info(tool, mock_settings, tmp_path):
+    """Result includes file size information."""
+    f = tmp_path / "data.csv"
+    f.write_text("a,b,c\n" * 500)
+    result = await tool.execute(path=str(f))
+    assert "KB" in result or "MB" in result
+    assert "data.csv" in result
+
+
+async def test_deliver_definition(tool):
+    """Tool definition has correct metadata."""
+    defn = tool.definition
+    assert defn.name == "deliver_artifact"
+    assert defn.trust_level == "standard"
+    assert "path" in defn.parameters["properties"]
+    assert "caption" in defn.parameters["properties"]
+    assert "path" in defn.parameters["required"]
--- a/tests/test_edit_file.py
+++ b/tests/test_edit_file.py
@@ -0,0 +1,175 @@
+# Tests for EditFileTool - find-and-replace file editing.
+# Created: 2026-03-12
+
+from unittest.mock import patch
+
+import pytest
+
+from pocketpaw.config import Settings
+from pocketpaw.tools.builtin.filesystem import EditFileTool
+
+
+@pytest.fixture
+def jail(tmp_path):
+    """Temporary directory used as the file jail."""
+    return tmp_path
+
+
+@pytest.fixture
+def mock_settings(jail):
+    """Patch filesystem.get_settings to use the temp jail."""
+    settings = Settings(file_jail_path=jail)
+    with patch("pocketpaw.tools.builtin.filesystem.get_settings", return_value=settings):
+        yield settings
+
+
+@pytest.mark.asyncio
+async def test_edit_file_basic(jail, mock_settings):
+    """Replace one occurrence, content should change."""
+    f = jail / "hello.txt"
+    f.write_text("Hello World")
+
+    tool = EditFileTool()
+    result = await tool.execute(path=str(f), old_string="World", new_string="PocketPaw")
+
+    assert "replacement" in result
+    assert f.read_text() == "Hello PocketPaw"
+
+
+@pytest.mark.asyncio
+async def test_edit_file_not_found(jail, mock_settings):
+    """Editing a non-existent file returns a 'not found' error."""
+    tool = EditFileTool()
+    result = await tool.execute(
+        path=str(jail / "missing.txt"),
+        old_string="anything",
+        new_string="replacement",
+    )
+
+    assert "Error:" in result
+    assert "not found" in result.lower()
+
+
+@pytest.mark.asyncio
+async def test_edit_file_old_string_missing(jail, mock_settings):
+    """old_string not in file returns an error."""
+    f = jail / "content.txt"
+    f.write_text("The quick brown fox")
+
+    tool = EditFileTool()
+    result = await tool.execute(path=str(f), old_string="lazy dog", new_string="cat")
+
+    assert "Error:" in result
+    assert "not found" in result
+
+
+@pytest.mark.asyncio
+async def test_edit_file_ambiguous(jail, mock_settings):
+    """old_string appearing 3 times with replace_all=False returns an error mentioning the count."""
+    f = jail / "repeat.txt"
+    f.write_text("foo bar foo baz foo")
+
+    tool = EditFileTool()
+    result = await tool.execute(path=str(f), old_string="foo", new_string="qux", replace_all=False)
+
+    assert "Error:" in result
+    assert "3" in result
+
+
+@pytest.mark.asyncio
+async def test_edit_file_replace_all(jail, mock_settings):
+    """replace_all=True replaces every occurrence."""
+    f = jail / "repeat.txt"
+    f.write_text("foo bar foo baz foo")
+
+    tool = EditFileTool()
+    result = await tool.execute(path=str(f), old_string="foo", new_string="qux", replace_all=True)
+
+    assert "Error:" not in result
+    assert f.read_text() == "qux bar qux baz qux"
+    assert "3" in result
+
+
+@pytest.mark.asyncio
+async def test_edit_file_multiline(jail, mock_settings):
+    """Replace a multi-line block."""
+    original = "line one\nline two\nline three\n"
+    f = jail / "multi.txt"
+    f.write_text(original)
+
+    tool = EditFileTool()
+    result = await tool.execute(
+        path=str(f),
+        old_string="line one\nline two\n",
+        new_string="replaced block\n",
+    )
+
+    assert "Error:" not in result
+    assert f.read_text() == "replaced block\nline three\n"
+
+
+@pytest.mark.asyncio
+async def test_edit_file_empty_new_string(jail, mock_settings):
+    """Replace with empty string effectively deletes the matched text."""
+    f = jail / "delete.txt"
+    f.write_text("keep this DELETE that")
+
+    tool = EditFileTool()
+    result = await tool.execute(path=str(f), old_string=" DELETE", new_string="")
+
+    assert "Error:" not in result
+    assert f.read_text() == "keep this that"
+
+
+@pytest.mark.asyncio
+async def test_edit_file_file_jail(jail, mock_settings):
+    """Paths outside the jail are denied."""
+    outside = jail.parent / "outside_secret.txt"
+    outside.write_text("sensitive data")
+
+    tool = EditFileTool()
+    result = await tool.execute(
+        path=str(outside),
+        old_string="sensitive",
+        new_string="redacted",
+    )
+
+    assert "Access denied" in result
+    # File should be unchanged
+    assert outside.read_text() == "sensitive data"
+
+
+@pytest.mark.asyncio
+async def test_edit_file_preserves_rest(jail, mock_settings):
+    """Editing one part of a file leaves the rest of the content intact."""
+    f = jail / "partial.txt"
+    f.write_text("alpha beta gamma delta")
+
+    tool = EditFileTool()
+    await tool.execute(path=str(f), old_string="beta", new_string="BETA")
+
+    content = f.read_text()
+    assert "alpha" in content
+    assert "BETA" in content
+    assert "gamma" in content
+    assert "delta" in content
+    assert "beta" not in content
+
+
+@pytest.mark.asyncio
+async def test_edit_file_definition(mock_settings):
+    """Tool definition has correct name, trust level, and required parameters."""
+    tool = EditFileTool()
+    defn = tool.definition
+
+    assert defn.name == "edit_file"
+    assert defn.trust_level == "standard"
+
+    required = defn.parameters.get("required", [])
+    assert "path" in required
+    assert "old_string" in required
+    assert "new_string" in required
+
+    props = defn.parameters.get("properties", {})
+    assert "replace_all" in props
+    assert props["replace_all"]["type"] == "boolean"
--- a/tests/test_install_package.py
+++ b/tests/test_install_package.py
@@ -0,0 +1,271 @@
+# Tests for InstallPackageTool - pip install with Guardian review.
+# Created: 2026-03-12
+
+import subprocess
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+
+@pytest.fixture
+def mock_guardian():
+    """Guardian that approves all commands by default."""
+    guardian = MagicMock()
+    guardian.check_command = AsyncMock(return_value=(True, "Looks safe"))
+    return guardian
+
+
+@pytest.fixture
+def successful_pip_result():
+    """Subprocess result simulating a successful pip install."""
+    result = MagicMock()
+    result.returncode = 0
+    result.stdout = "Successfully installed requests-2.31.0"
+    result.stderr = ""
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Happy path
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_install_package_basic(mock_guardian, successful_pip_result):
+    """A basic install should return pip's stdout on success."""
+    from pocketpaw.tools.builtin.pip_install import InstallPackageTool
+
+    with (
+        patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=mock_guardian),
+        patch("subprocess.run", return_value=successful_pip_result),
+    ):
+        tool = InstallPackageTool()
+        result = await tool.execute(package="requests")
+
+    assert "Successfully installed" in result
+    assert "Error" not in result
+
+
+@pytest.mark.asyncio
+async def test_install_package_with_version(mock_guardian):
+    """Version specifier should be passed through to pip unchanged."""
+    from pocketpaw.tools.builtin.pip_install import InstallPackageTool
+
+    captured: list[list[str]] = []
+
+    def capture_run(cmd, **kwargs):
+        captured.append(cmd)
+        result = MagicMock()
+        result.returncode = 0
+        result.stdout = "Successfully installed requests-2.31.0"
+        result.stderr = ""
+        return result
+
+    with (
+        patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=mock_guardian),
+        patch("subprocess.run", side_effect=capture_run),
+    ):
+        tool = InstallPackageTool()
+        await tool.execute(package="requests>=2.28.0")
+
+    assert len(captured) == 1
+    cmd = captured[0]
+    assert "requests>=2.28.0" in cmd
+
+
+@pytest.mark.asyncio
+async def test_install_package_with_extras(mock_guardian):
+    """Bracket extras like pocketpaw[soul] should be allowed and forwarded to pip."""
+    from pocketpaw.tools.builtin.pip_install import InstallPackageTool
+
+    captured: list[list[str]] = []
+
+    def capture_run(cmd, **kwargs):
+        captured.append(cmd)
+        result = MagicMock()
+        result.returncode = 0
+        result.stdout = "Successfully installed pocketpaw-0.4.4"
+        result.stderr = ""
+        return result
+
+    with (
+        patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=mock_guardian),
+        patch("subprocess.run", side_effect=capture_run),
+    ):
+        tool = InstallPackageTool()
+        result = await tool.execute(package="pocketpaw[soul]")
+
+    assert "Error" not in result
+    cmd = captured[0]
+    assert "pocketpaw[soul]" in cmd
+
+
+@pytest.mark.asyncio
+async def test_install_package_upgrade(mock_guardian):
+    """upgrade=True should add --upgrade flag to the pip command."""
+    from pocketpaw.tools.builtin.pip_install import InstallPackageTool
+
+    captured: list[list[str]] = []
+
+    def capture_run(cmd, **kwargs):
+        captured.append(cmd)
+        result = MagicMock()
+        result.returncode = 0
+        result.stdout = "Successfully installed pip-24.0"
+        result.stderr = ""
+        return result
+
+    with (
+        patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=mock_guardian),
+        patch("subprocess.run", side_effect=capture_run),
+    ):
+        tool = InstallPackageTool()
+        await tool.execute(package="pip", upgrade=True)
+
+    cmd = captured[0]
+    assert "--upgrade" in cmd
+
+
+# ---------------------------------------------------------------------------
+# Shell injection blocking
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_install_package_shell_injection_semicolon(mock_guardian):
+    """Semicolons in the package name must be rejected before Guardian or pip runs."""
+    from pocketpaw.tools.builtin.pip_install import InstallPackageTool
+
+    with patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=mock_guardian):
+        tool = InstallPackageTool()
+        result = await tool.execute(package="foo; rm -rf /")
+
+    assert result.startswith("Error:")
+    # Guardian should never have been called since validation happens first
+    mock_guardian.check_command.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_install_package_shell_injection_pipe(mock_guardian):
+    """Pipes in the package name must be blocked by input validation."""
+    from pocketpaw.tools.builtin.pip_install import InstallPackageTool
+
+    with patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=mock_guardian):
+        tool = InstallPackageTool()
+        result = await tool.execute(package="foo | cat /etc/passwd")
+
+    assert result.startswith("Error:")
+    mock_guardian.check_command.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_install_package_shell_injection_backtick(mock_guardian):
+    """Backtick command substitution in the package name must be blocked."""
+    from pocketpaw.tools.builtin.pip_install import InstallPackageTool
+
+    with patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=mock_guardian):
+        tool = InstallPackageTool()
+        result = await tool.execute(package="foo`whoami`")
+
+    assert result.startswith("Error:")
+    mock_guardian.check_command.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# Guardian blocking
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_install_package_guardian_block():
+    """When Guardian flags a package, installation must be aborted."""
+    from pocketpaw.tools.builtin.pip_install import InstallPackageTool
+
+    blocking_guardian = MagicMock()
+    blocking_guardian.check_command = AsyncMock(
+        return_value=(False, "suspicious package, possible typosquatting")
+    )
+
+    with (
+        patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=blocking_guardian),
+        patch("subprocess.run") as mock_run,
+    ):
+        tool = InstallPackageTool()
+        result = await tool.execute(package="reqeusts")  # deliberate typo
+
+    assert result.startswith("Error:")
+    assert "Guardian" in result
+    # pip should never run if Guardian blocks
+    mock_run.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# Error conditions
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_install_package_timeout(mock_guardian):
+    """A subprocess timeout should be reported cleanly as an error."""
+    from pocketpaw.tools.builtin.pip_install import InstallPackageTool
+
+    def raise_timeout(cmd, **kwargs):
+        raise subprocess.TimeoutExpired(cmd=cmd, timeout=300)
+
+    with (
+        patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=mock_guardian),
+        patch("subprocess.run", side_effect=raise_timeout),
+    ):
+        tool = InstallPackageTool(timeout=300)
+        result = await tool.execute(package="some-large-package")
+
+    assert result.startswith("Error:")
+    assert "timed out" in result
+
+
+@pytest.mark.asyncio
+async def test_install_package_pip_failure(mock_guardian):
+    """A non-zero pip exit code should surface as an error with stderr content."""
+    from pocketpaw.tools.builtin.pip_install import InstallPackageTool
+
+    fail_result = MagicMock()
+    fail_result.returncode = 1
+    fail_result.stdout = ""
+    fail_result.stderr = "ERROR: Could not find a version that satisfies the requirement nosuchpkg"
+
+    with (
+        patch("pocketpaw.tools.builtin.pip_install.get_guardian", return_value=mock_guardian),
+        patch("subprocess.run", return_value=fail_result),
+    ):
+        tool = InstallPackageTool()
+        result = await tool.execute(package="nosuchpkg")
+
+    assert result.startswith("Error:")
+    assert "Could not find" in result
+
+
+# ---------------------------------------------------------------------------
+# Tool definition / metadata
+# ---------------------------------------------------------------------------
+
+
+def test_install_package_definition():
+    """Tool definition should expose the correct name, trust level, and parameter schema."""
+    from pocketpaw.tools.builtin.pip_install import InstallPackageTool
+
+    tool = InstallPackageTool()
+    defn = tool.definition
+
+    assert defn.name == "install_package"
+    assert defn.trust_level == "elevated"
+
+    props = defn.parameters["properties"]
+    assert "package" in props
+    assert "upgrade" in props
+
+    required = defn.parameters["required"]
+    assert "package" in required
+    assert "upgrade" not in required
+
+    # upgrade should default to False
+    assert props["upgrade"]["default"] is False
--- a/tests/test_run_python.py
+++ b/tests/test_run_python.py
@@ -0,0 +1,220 @@
+# Tests for RunPythonTool - sandboxed Python execution.
+# Created: 2026-03-12
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+
+@pytest.fixture
+def mock_guardian_safe():
+    """Return a guardian mock that approves all code."""
+    guardian = MagicMock()
+    guardian.check_command = AsyncMock(return_value=(True, ""))
+    return guardian
+
+
+@pytest.fixture
+def jail(tmp_path):
+    """Return a real Path used as the file jail, pointing to tmp_path."""
+    return tmp_path
+
+
+@pytest.fixture
+def mock_settings(jail):
+    """Return a settings mock with file_jail_path pointing to tmp_path."""
+    settings = MagicMock()
+    settings.file_jail_path = jail
+    return settings
+
+
+# ---------------------------------------------------------------------------
+# Happy-path tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_run_python_basic(mock_guardian_safe, mock_settings):
+    """print('hello') should produce 'hello' in the output."""
+    from pocketpaw.tools.builtin.python_exec import RunPythonTool
+
+    with (
+        patch("pocketpaw.tools.builtin.python_exec.get_guardian", return_value=mock_guardian_safe),
+        patch("pocketpaw.tools.builtin.python_exec.get_settings", return_value=mock_settings),
+    ):
+        tool = RunPythonTool()
+        result = await tool.execute(code='print("hello")')
+
+    assert "hello" in result
+
+
+@pytest.mark.asyncio
+async def test_run_python_multiline(mock_guardian_safe, mock_settings):
+    """Multi-line script with stdlib import should run correctly."""
+    from pocketpaw.tools.builtin.python_exec import RunPythonTool
+
+    code = "import math\nresult = math.sqrt(9)\nprint(f'sqrt={result}')"
+
+    with (
+        patch("pocketpaw.tools.builtin.python_exec.get_guardian", return_value=mock_guardian_safe),
+        patch("pocketpaw.tools.builtin.python_exec.get_settings", return_value=mock_settings),
+    ):
+        tool = RunPythonTool()
+        result = await tool.execute(code=code)
+
+    assert "sqrt=3.0" in result
+
+
+@pytest.mark.asyncio
+async def test_run_python_stderr(mock_guardian_safe, mock_settings):
+    """Code that writes to stderr should have STDERR section in output."""
+    from pocketpaw.tools.builtin.python_exec import RunPythonTool
+
+    code = "import sys\nsys.stderr.write('boom\\n')"
+
+    with (
+        patch("pocketpaw.tools.builtin.python_exec.get_guardian", return_value=mock_guardian_safe),
+        patch("pocketpaw.tools.builtin.python_exec.get_settings", return_value=mock_settings),
+    ):
+        tool = RunPythonTool()
+        result = await tool.execute(code=code)
+
+    assert "STDERR" in result
+    assert "boom" in result
+
+
+@pytest.mark.asyncio
+async def test_run_python_exit_code(mock_guardian_safe, mock_settings):
+    """sys.exit(1) should surface 'Exit code: 1' in the output."""
+    from pocketpaw.tools.builtin.python_exec import RunPythonTool
+
+    code = "import sys\nsys.exit(1)"
+
+    with (
+        patch("pocketpaw.tools.builtin.python_exec.get_guardian", return_value=mock_guardian_safe),
+        patch("pocketpaw.tools.builtin.python_exec.get_settings", return_value=mock_settings),
+    ):
+        tool = RunPythonTool()
+        result = await tool.execute(code=code)
+
+    assert "Exit code: 1" in result
+
+
+@pytest.mark.asyncio
+async def test_run_python_timeout(mock_guardian_safe, mock_settings):
+    """Infinite loop with timeout=1 should return a timed-out error."""
+    from pocketpaw.tools.builtin.python_exec import RunPythonTool
+
+    code = "while True: pass"
+
+    with (
+        patch("pocketpaw.tools.builtin.python_exec.get_guardian", return_value=mock_guardian_safe),
+        patch("pocketpaw.tools.builtin.python_exec.get_settings", return_value=mock_settings),
+    ):
+        tool = RunPythonTool()
+        result = await tool.execute(code=code, timeout=1)
+
+    assert "timed out" in result.lower()
+
+
+@pytest.mark.asyncio
+async def test_run_python_syntax_error(mock_guardian_safe, mock_settings):
+    """Invalid Python should produce a SyntaxError in stderr."""
+    from pocketpaw.tools.builtin.python_exec import RunPythonTool
+
+    code = "def broken(:"  # deliberate syntax error
+
+    with (
+        patch("pocketpaw.tools.builtin.python_exec.get_guardian", return_value=mock_guardian_safe),
+        patch("pocketpaw.tools.builtin.python_exec.get_settings", return_value=mock_settings),
+    ):
+        tool = RunPythonTool()
+        result = await tool.execute(code=code)
+
+    # Python writes SyntaxError to stderr and exits non-zero
+    assert "SyntaxError" in result or "Error" in result
+
+
+# ---------------------------------------------------------------------------
+# Security tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_run_python_guardian_block(mock_settings):
+    """Guardian returning (False, 'blocked') should prevent execution."""
+    from pocketpaw.tools.builtin.python_exec import RunPythonTool
+
+    blocking_guardian = MagicMock()
+    blocking_guardian.check_command = AsyncMock(return_value=(False, "blocked by policy"))
+
+    with (
+        patch("pocketpaw.tools.builtin.python_exec.get_guardian", return_value=blocking_guardian),
+        patch("pocketpaw.tools.builtin.python_exec.get_settings", return_value=mock_settings),
+    ):
+        tool = RunPythonTool()
+        result = await tool.execute(code='print("hello")')
+
+    assert "blocked" in result.lower()
+
+
+# ---------------------------------------------------------------------------
+# File system / isolation tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_run_python_file_creation(mock_guardian_safe, mock_settings, jail):
+    """Script that creates a file in cwd should leave that file in the jail."""
+    from pocketpaw.tools.builtin.python_exec import RunPythonTool
+
+    code = "with open('output.txt', 'w') as f:\n    f.write('created')"
+
+    with (
+        patch("pocketpaw.tools.builtin.python_exec.get_guardian", return_value=mock_guardian_safe),
+        patch("pocketpaw.tools.builtin.python_exec.get_settings", return_value=mock_settings),
+    ):
+        tool = RunPythonTool()
+        await tool.execute(code=code)
+
+    assert (jail / "output.txt").exists()
+    assert (jail / "output.txt").read_text() == "created"
+
+
+@pytest.mark.asyncio
+async def test_run_python_cleanup(mock_guardian_safe, mock_settings, jail):
+    """Temp script file should be removed after execution completes."""
+    from pocketpaw.tools.builtin.python_exec import RunPythonTool
+
+    with (
+        patch("pocketpaw.tools.builtin.python_exec.get_guardian", return_value=mock_guardian_safe),
+        patch("pocketpaw.tools.builtin.python_exec.get_settings", return_value=mock_settings),
+    ):
+        tool = RunPythonTool()
+        await tool.execute(code='print("cleanup test")')
+
+    # No _pocketpaw_run_*.py files should remain
+    leftover = list(jail.glob("_pocketpaw_run_*.py"))
+    assert leftover == [], f"Temp script files not cleaned up: {leftover}"
+
+
+# ---------------------------------------------------------------------------
+# Definition / metadata tests
+# ---------------------------------------------------------------------------
+
+
+def test_run_python_definition():
+    """Tool definition should have correct name, trust level, and parameters."""
+    from pocketpaw.tools.builtin.python_exec import RunPythonTool
+
+    tool = RunPythonTool()
+    defn = tool.definition
+
+    assert defn.name == "run_python"
+    assert defn.trust_level == "elevated"
+
+    props = defn.parameters["properties"]
+    assert "code" in props
+    assert "timeout" in props
+    assert "code" in defn.parameters["required"]
+    assert "timeout" not in defn.parameters.get("required", [])
--- a/tests/test_tool_policy.py
+++ b/tests/test_tool_policy.py
@@ -65,7 +65,7 @@ class TestGroupExpansion:

    def test_expand_multiple_groups(self):
        result = ToolPolicy._expand_names(["group:shell", "group:memory"])
-        assert result == {"shell", "remember", "recall", "forget"}
+        assert result == {"shell", "run_python", "remember", "recall", "forget"}

    def test_expand_mixed_groups_and_names(self):
        result = ToolPolicy._expand_names(["group:memory", "custom_tool"])