diff --git a/src/pocketpaw/deep_work/__init__.py b/src/pocketpaw/deep_work/__init__.py index b43c9112..4b11c724 100644 --- a/src/pocketpaw/deep_work/__init__.py +++ b/src/pocketpaw/deep_work/__init__.py @@ -1,5 +1,7 @@ # Deep Work — AI project orchestration layer for PocketPaw. # Created: 2026-02-12 +# Updated: 2026-02-18 — Added GoalParser and GoalAnalysis exports, +# parse_goal() convenience function. # Updated: 2026-02-12 — Added executor integration, public API functions. # Added research_depth parameter to start_deep_work(). # @@ -9,6 +11,7 @@ # Public API: # get_deep_work_session() -> DeepWorkSession # reset_deep_work_session() -> None +# parse_goal(user_input) -> GoalAnalysis # start_deep_work(user_input) -> Project # approve_project(project_id) -> Project # pause_project(project_id) -> Project @@ -16,6 +19,7 @@ import logging +from pocketpaw.deep_work.goal_parser import GoalAnalysis, GoalParser from pocketpaw.deep_work.models import ( AgentSpec, PlannerResult, @@ -28,12 +32,15 @@ logger = logging.getLogger(__name__) __all__ = [ "AgentSpec", + "GoalAnalysis", + "GoalParser", "PlannerResult", "Project", "ProjectStatus", "TaskSpec", "get_deep_work_session", "reset_deep_work_session", + "parse_goal", "start_deep_work", "approve_project", "pause_project", @@ -75,6 +82,19 @@ def reset_deep_work_session() -> None: _session_instance = None +async def parse_goal(user_input: str) -> GoalAnalysis: + """Parse a user's goal into structured analysis. + + Args: + user_input: Natural language goal description. + + Returns: + GoalAnalysis with domain, complexity, roles, and clarifications. + """ + parser = GoalParser() + return await parser.parse(user_input) + + async def start_deep_work(user_input: str, research_depth: str = "standard") -> Project: """Submit a new project for Deep Work planning. diff --git a/src/pocketpaw/deep_work/api.py b/src/pocketpaw/deep_work/api.py index 4f60d30d..e2816677 100644 --- a/src/pocketpaw/deep_work/api.py +++ b/src/pocketpaw/deep_work/api.py @@ -1,9 +1,13 @@ # Deep Work API endpoints. # Created: 2026-02-12 +# Updated: 2026-02-18 — Added POST /parse-goal endpoint for structured goal +# analysis. Updated /start to accept goal_analysis and pass to session. +# Plan response now includes goal_analysis from project metadata. # Updated: 2026-02-16 — Enrich project dict with folder_path and file_count # in get_plan() so the frontend Output Files panel can browse project output. # # FastAPI router for Deep Work orchestration: +# POST /parse-goal — analyze goal (domain, complexity) # POST /start — submit project (natural language) # GET /projects/{id}/plan — get plan with execution_levels # POST /projects/{id}/approve — approve plan, start execution @@ -25,6 +29,14 @@ logger = logging.getLogger(__name__) router = APIRouter(tags=["Deep Work"]) +class ParseGoalRequest(BaseModel): + """Request body for goal analysis.""" + + description: str = Field( + ..., min_length=10, max_length=5000, description="Natural language goal description" + ) + + class StartDeepWorkRequest(BaseModel): """Request body for starting a Deep Work project.""" @@ -32,8 +44,15 @@ class StartDeepWorkRequest(BaseModel): ..., min_length=10, max_length=5000, description="Natural language project description" ) research_depth: str = Field( - default="standard", - description="Research thoroughness: 'none' (skip entirely), 'quick', 'standard', or 'deep'", + default="auto", + description=( + "Research thoroughness: 'auto' (use goal parser suggestion), " + "'none', 'quick', 'standard', or 'deep'" + ), + ) + goal_analysis: dict | None = Field( + default=None, + description="Pre-parsed goal analysis dict (from /parse-goal). Skips re-parsing.", ) @@ -55,6 +74,27 @@ def _enrich_project_dict(project_dict: dict) -> dict: return project_dict +@router.post("/parse-goal") +async def parse_goal(request: ParseGoalRequest) -> dict[str, Any]: + """Analyze a user's goal and return structured analysis. + + Returns domain detection, complexity estimation, AI/human roles, + and clarification questions. This is a preview step — the user + can review the analysis before starting planning. + """ + from pocketpaw.deep_work.goal_parser import GoalParser + + try: + parser = GoalParser() + analysis = await parser.parse(request.description) + return {"success": True, "goal_analysis": analysis.to_dict()} + except RuntimeError as e: + raise HTTPException(status_code=502, detail=str(e)) + except Exception as e: + logger.exception(f"Goal parsing failed: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + @router.post("/start") async def start_deep_work(request: StartDeepWorkRequest) -> dict[str, Any]: """Submit a new project for Deep Work planning. @@ -86,6 +126,7 @@ async def start_deep_work(request: StartDeepWorkRequest) -> dict[str, Any]: project.id, request.description, research_depth=request.research_depth, + goal_analysis=request.goal_analysis, ) except Exception as e: logger.exception(f"Background planning failed for {project.id}: {e}") @@ -129,6 +170,9 @@ async def get_plan(project_id: str) -> dict[str, Any]: project_dict = _enrich_project_dict(project.to_dict()) + # Include goal analysis from project metadata (if available) + goal_analysis = project.metadata.get("goal_analysis") + return { "project": project_dict, "tasks": [t.to_dict() for t in tasks], @@ -136,6 +180,7 @@ async def get_plan(project_id: str) -> dict[str, Any]: "prd": prd, "execution_levels": execution_levels, "task_level_map": task_level_map, + "goal_analysis": goal_analysis, } diff --git a/src/pocketpaw/deep_work/goal_parser.py b/src/pocketpaw/deep_work/goal_parser.py new file mode 100644 index 00000000..847d9f1a --- /dev/null +++ b/src/pocketpaw/deep_work/goal_parser.py @@ -0,0 +1,275 @@ +# Deep Work Goal Parser — structured goal analysis via LLM. +# Created: 2026-02-18 +# +# First primitive in the Deep Work pipeline. Takes messy human input +# and produces a structured GoalAnalysis: domain detection, complexity +# estimation, AI/human role identification, and clarification questions. +# +# Public API: +# GoalAnalysis — dataclass with parsed goal structure +# GoalParser.parse(user_input) -> GoalAnalysis +# GoalParser.parse_raw(raw_json) -> GoalAnalysis (for testing) + +import json +import logging +import re +from dataclasses import dataclass, field +from typing import Any + +logger = logging.getLogger(__name__) + +# Regex to strip markdown code fences (```json ... ``` or ``` ... ```) +_CODE_FENCE_RE = re.compile(r"```(?:json)?\s*\n?(.*?)\n?\s*```", re.DOTALL) + +# Valid domain values +VALID_DOMAINS = frozenset({"code", "business", "creative", "education", "events", "home", "hybrid"}) + +# Valid complexity values +VALID_COMPLEXITIES = frozenset({"S", "M", "L", "XL"}) + +# Valid research depth values +VALID_RESEARCH_DEPTHS = frozenset({"none", "quick", "standard", "deep"}) + + +@dataclass +class GoalAnalysis: + """Structured analysis of a user's project goal. + + Produced by GoalParser as the first step in the Deep Work pipeline. + Informs research depth, planner context, and frontend display. + + Attributes: + goal: Clear one-sentence restatement of the user's goal. + domain: Primary domain (code, business, creative, education, events, home, hybrid). + sub_domains: Specific sub-domains (e.g. "web-development", "react"). + complexity: Estimated complexity (S, M, L, XL). + estimated_phases: Number of expected project phases (1-10). + ai_capabilities: What AI can do for this project. + human_requirements: What the human must do (AI cannot). + constraints_detected: Budget, timeline, or technical constraints found in input. + clarifications_needed: Questions to ask before planning. + suggested_research_depth: Recommended research depth (none/quick/standard/deep). + confidence: Parser confidence in the analysis (0.0 to 1.0). + """ + + goal: str = "" + domain: str = "code" + sub_domains: list[str] = field(default_factory=list) + complexity: str = "M" + estimated_phases: int = 1 + ai_capabilities: list[str] = field(default_factory=list) + human_requirements: list[str] = field(default_factory=list) + constraints_detected: list[str] = field(default_factory=list) + clarifications_needed: list[str] = field(default_factory=list) + suggested_research_depth: str = "standard" + confidence: float = 0.7 + + def to_dict(self) -> dict[str, Any]: + """Convert to dictionary for JSON serialization.""" + return { + "goal": self.goal, + "domain": self.domain, + "sub_domains": self.sub_domains, + "complexity": self.complexity, + "estimated_phases": self.estimated_phases, + "ai_capabilities": self.ai_capabilities, + "human_requirements": self.human_requirements, + "constraints_detected": self.constraints_detected, + "clarifications_needed": self.clarifications_needed, + "suggested_research_depth": self.suggested_research_depth, + "confidence": self.confidence, + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "GoalAnalysis": + """Create from dictionary.""" + raw_clarifications = data.get("clarifications_needed", []) + if len(raw_clarifications) > 4: + logger.warning("Clarifications truncated from %d to 4", len(raw_clarifications)) + + complexity = _validate_complexity(data.get("complexity", "M")) + estimated_phases = int(_clamp(data.get("estimated_phases", 1), 1, 10)) + # Enforce minimum phases for high complexity + min_phases = {"S": 1, "M": 1, "L": 2, "XL": 3} + estimated_phases = max(estimated_phases, min_phases.get(complexity, 1)) + + return cls( + goal=data.get("goal", ""), + domain=_validate_domain(data.get("domain", "code")), + sub_domains=_sanitize_str_list(data.get("sub_domains", []))[:6], + complexity=complexity, + estimated_phases=estimated_phases, + ai_capabilities=_sanitize_str_list(data.get("ai_capabilities", [])), + human_requirements=_sanitize_str_list(data.get("human_requirements", [])), + constraints_detected=_sanitize_str_list(data.get("constraints_detected", [])), + clarifications_needed=_sanitize_str_list(raw_clarifications)[:4], + suggested_research_depth=_validate_research_depth( + data.get("suggested_research_depth", "standard") + ), + confidence=_clamp(data.get("confidence", 0.7), 0.0, 1.0), + ) + + @property + def needs_clarification(self) -> bool: + """Whether the goal needs clarification before planning.""" + return len(self.clarifications_needed) > 0 + + @property + def domain_label(self) -> str: + """Human-readable domain label.""" + labels = { + "code": "Software & Code", + "business": "Business & Strategy", + "creative": "Creative & Content", + "education": "Learning & Education", + "events": "Events & Logistics", + "home": "Home & Physical", + "hybrid": "Multi-Domain", + } + return labels.get(self.domain, self.domain.title()) + + +class GoalParser: + """Parses user goals into structured GoalAnalysis via LLM. + + Uses AgentRouter to run the GOAL_PARSE_PROMPT and parse the + structured JSON response into a GoalAnalysis dataclass. + """ + + async def parse(self, user_input: str) -> GoalAnalysis: + """Parse a user's goal description into structured analysis. + + Args: + user_input: Natural language goal description. + + Returns: + GoalAnalysis with domain, complexity, roles, and clarifications. + + Raises: + RuntimeError: If the LLM fails to produce valid output. + """ + from pocketpaw.deep_work.prompts import GOAL_PARSE_PROMPT + + # Escape curly braces in user input to prevent format string injection + safe_input = user_input.replace("{", "{{").replace("}", "}}") + prompt = GOAL_PARSE_PROMPT.format(user_input=safe_input) + raw_output = await self._run_prompt(prompt) + + analysis = self.parse_raw(raw_output) + if not analysis.goal: + # Fallback: use input as goal if LLM didn't restate it + analysis.goal = user_input[:200] + + logger.info( + "Goal parsed for '%.50s': domain=%s complexity=%s confidence=%.2f clarifications=%d", + user_input, + analysis.domain, + analysis.complexity, + analysis.confidence, + len(analysis.clarifications_needed), + ) + return analysis + + def parse_raw(self, raw: str) -> GoalAnalysis: + """Parse raw LLM JSON output into a GoalAnalysis. + + Handles markdown code fences and returns a default GoalAnalysis + on parse failure. + + Args: + raw: Raw JSON string (possibly with markdown code fences). + + Returns: + Parsed GoalAnalysis, or default analysis on failure. + """ + cleaned = self._strip_code_fences(raw) + try: + data = json.loads(cleaned) + if isinstance(data, dict): + return GoalAnalysis.from_dict(data) + logger.warning("Goal parse JSON is not an object: %s", type(data).__name__) + return GoalAnalysis() + except (json.JSONDecodeError, TypeError) as e: + logger.warning("Failed to parse goal analysis JSON: %s\nRaw: %s", e, raw[:200]) + return GoalAnalysis() + + async def _run_prompt(self, prompt: str) -> str: + """Run a prompt through AgentRouter and collect message chunks. + + Raises RuntimeError if the router yields only error events. + """ + from pocketpaw.agents.router import AgentRouter + from pocketpaw.config import get_settings + + router = AgentRouter(get_settings()) + output_parts: list[str] = [] + errors: list[str] = [] + + async for event in router.run(prompt): + if event.type == "message": + content = event.content or "" + if content: + output_parts.append(content) + elif event.type == "error": + error_content = event.content or "Unknown error" + errors.append(error_content) + logger.error("LLM error during goal parsing: %s", error_content) + + if not output_parts: + if errors: + raise RuntimeError(f"LLM error during goal parsing: {errors[0]}") + raise RuntimeError("LLM produced empty response during goal parsing") + + return "".join(output_parts) + + @staticmethod + def _strip_code_fences(text: str) -> str: + """Remove markdown code fences from LLM output.""" + match = _CODE_FENCE_RE.search(text) + if match: + return match.group(1).strip() + return text.strip() + + +# ============================================================================ +# Validation helpers +# ============================================================================ + + +def _validate_domain(value: str) -> str: + """Validate and normalize domain value.""" + normalized = value.lower().strip() + if normalized in VALID_DOMAINS: + return normalized + return "hybrid" + + +def _validate_complexity(value: str) -> str: + """Validate and normalize complexity value.""" + normalized = value.upper().strip() + if normalized in VALID_COMPLEXITIES: + return normalized + return "M" + + +def _validate_research_depth(value: str) -> str: + """Validate and normalize research depth value.""" + normalized = value.lower().strip() + if normalized in VALID_RESEARCH_DEPTHS: + return normalized + return "standard" + + +def _sanitize_str_list(items: Any) -> list[str]: + """Filter a list to only non-empty string items.""" + if not isinstance(items, list): + return [] + return [str(item) for item in items if item is not None and str(item).strip()] + + +def _clamp(value, minimum, maximum): + """Clamp a numeric value between min and max.""" + try: + return max(minimum, min(maximum, float(value))) + except (TypeError, ValueError): + return minimum diff --git a/src/pocketpaw/deep_work/planner.py b/src/pocketpaw/deep_work/planner.py index 2ea70dbc..c8271ca8 100644 --- a/src/pocketpaw/deep_work/planner.py +++ b/src/pocketpaw/deep_work/planner.py @@ -196,14 +196,13 @@ class PlannerAgent: output_parts: list[str] = [] errors: list[str] = [] - async for chunk in router.run(prompt): - chunk_type = chunk.get("type") - if chunk_type == "message": - content = chunk.get("content", "") + async for event in router.run(prompt): + if event.type == "message": + content = event.content or "" if content: output_parts.append(content) - elif chunk_type == "error": - error_content = chunk.get("content", "Unknown error") + elif event.type == "error": + error_content = event.content or "Unknown error" errors.append(error_content) logger.error("LLM error during planning: %s", error_content) diff --git a/src/pocketpaw/deep_work/prompts.py b/src/pocketpaw/deep_work/prompts.py index 1caed013..e6953cfc 100644 --- a/src/pocketpaw/deep_work/prompts.py +++ b/src/pocketpaw/deep_work/prompts.py @@ -1,9 +1,12 @@ # Deep Work planner prompt templates. # Created: 2026-02-12 +# Updated: 2026-02-18 — Added GOAL_PARSE_PROMPT for structured goal analysis +# (domain detection, complexity estimation, clarification questions). # Updated: 2026-02-12 — Added RESEARCH_PROMPT_QUICK and RESEARCH_PROMPT_DEEP # for configurable research depth. # -# Four-phase planning prompts: +# Prompt templates: +# GOAL_PARSE_PROMPT — structured goal analysis (domain, complexity, roles) # RESEARCH_PROMPT — domain research (standard depth) # RESEARCH_PROMPT_QUICK — minimal research (skip web search) # RESEARCH_PROMPT_DEEP — thorough research (extensive web search) @@ -11,6 +14,56 @@ # TASK_BREAKDOWN_PROMPT — task decomposition to JSON # TEAM_ASSEMBLY_PROMPT — team recommendation to JSON +GOAL_PARSE_PROMPT = """\ +You are an expert project analyst. Analyze the user's goal and produce a \ +structured JSON assessment. This is the first step before planning — you need \ +to understand WHAT the user wants, WHICH domain it falls into, HOW complex \ +it is, and WHAT needs clarification. + +USER INPUT: +{user_input} + +Analyze the input and output ONLY a valid JSON object (no commentary). \ +You may wrap it in ```json fences. The JSON must have exactly these fields: + +{{ + "goal": "A clear, one-sentence restatement of the user's goal", + "domain": "One of: code, business, creative, education, events, home, hybrid", + "sub_domains": ["Array of specific sub-domains, e.g. 'web-development', 'react', 'aws'"], + "complexity": "One of: S, M, L, XL", + "estimated_phases": 1-10, + "ai_capabilities": ["What AI can do for this project — be specific"], + "human_requirements": ["What the human MUST do — things AI cannot"], + "constraints_detected": ["Any budget, timeline, or technical constraints mentioned"], + "clarifications_needed": ["Questions to ask BEFORE planning — only if truly ambiguous"], + "suggested_research_depth": "One of: none, quick, standard, deep", + "confidence": 0.0 to 1.0 +}} + +DOMAIN DEFINITIONS: +- code: Software development, APIs, apps, websites, scripts, data pipelines +- business: Market research, business plans, accounting, legal, marketing strategy +- creative: Writing, design, music, video, art, content creation +- education: Learning plans, courses, study guides, skill development +- events: Weddings, conferences, parties, travel planning, logistics +- home: Renovation, moving, organization, DIY projects, gardening +- hybrid: Projects spanning multiple domains (set sub_domains to clarify) + +COMPLEXITY RULES: +- S: Single deliverable, < 1 hour, no dependencies +- M: 2-5 tasks, 1-4 hours, minimal dependencies +- L: 5-15 tasks, days to weeks, multiple phases and dependencies +- XL: 15+ tasks, weeks to months, multiple phases, team needed + +CLARIFICATION RULES: +- Only ask if the answer would CHANGE the plan significantly +- Maximum 4 clarification questions +- Skip clarifications for obvious or standard approaches +- Never ask about things you can reasonably assume + +Keep confidence between 0.5 (very vague input) and 1.0 (crystal clear goal). +""" + RESEARCH_PROMPT_QUICK = """\ You are a senior technical researcher. Based ONLY on your existing knowledge \ (no web searches needed), provide brief research notes for the project below. diff --git a/src/pocketpaw/deep_work/session.py b/src/pocketpaw/deep_work/session.py index 36e13f41..f969043d 100644 --- a/src/pocketpaw/deep_work/session.py +++ b/src/pocketpaw/deep_work/session.py @@ -1,14 +1,18 @@ # Deep Work Session — project lifecycle orchestrator. # Created: 2026-02-12 +# Updated: 2026-02-18 — Integrated GoalParser as first step in planning pipeline. +# Goal analysis stored in project.metadata["goal_analysis"]. Suggested research +# depth from GoalParser used when research_depth="auto". # Updated: 2026-02-17 — Record planning errors to health engine ErrorStore. # Updated: 2026-02-12 — Added executor integration for pause/stop, made # planner/scheduler/human_router optional with sensible defaults, # improved _assign_tasks_to_agents to use key_to_id mapping. # Added research_depth parameter to start() for controlling planner depth. # -# Ties together the Planner, DependencyScheduler, MCTaskExecutor, and -# HumanTaskRouter into a single class that manages a Deep Work project -# from user input through planning, approval, execution, and completion. +# Ties together GoalParser, Planner, DependencyScheduler, MCTaskExecutor, +# and HumanTaskRouter into a single class that manages a Deep Work project +# from user input through goal analysis, planning, approval, execution, +# and completion. # # Public API: # session.start(user_input) -> Project (create + plan + await approval) @@ -16,6 +20,7 @@ # session.pause(project_id) -> Project (stop running tasks) # session.resume(project_id) -> Project (resume dispatching) +import asyncio import logging from typing import Any @@ -62,6 +67,7 @@ class DeepWorkSession: self.human_router = human_router or HumanTaskRouter() self.scheduler = scheduler or DependencyScheduler(manager, executor, self.human_router) self._subscribed = False + self._planning_locks: dict[str, asyncio.Lock] = {} # per-project planning locks # Wire direct executor → scheduler callback for reliable cascade dispatch. # This bypasses MessageBus so task completion always triggers dependent @@ -102,8 +108,6 @@ class DeepWorkSession: Returns: Number of projects recovered. """ - import asyncio - recovered = 0 projects = await self.manager.list_projects() @@ -182,22 +186,52 @@ class DeepWorkSession: ) async def plan_existing_project( - self, project_id: str, user_input: str, research_depth: str = "standard" + self, + project_id: str, + user_input: str, + research_depth: str = "standard", + goal_analysis: dict | None = None, ) -> Project: """Run planner on an already-created project. Called by start() or by the async API endpoint. Broadcasts a dw_planning_complete event when done (success or failure). + If goal_analysis is provided (pre-parsed), it's stored in project + metadata and used to inform planning. If research_depth is "auto", + the GoalParser's suggested depth is used. + Args: project_id: ID of the project to plan. user_input: Natural language project description. - research_depth: How thorough to research — "none", "quick", - "standard", or "deep". + research_depth: How thorough to research — "auto" (use goal parser + suggestion), "none", "quick", "standard", or "deep". + goal_analysis: Optional pre-parsed GoalAnalysis dict. If None and + research_depth is "auto", GoalParser runs automatically. Returns: The updated Project. """ + # Per-project lock prevents concurrent planning for the same project + if project_id not in self._planning_locks: + self._planning_locks[project_id] = asyncio.Lock() + lock = self._planning_locks[project_id] + + async with lock: + return await self._plan_existing_project_locked( + project_id, user_input, research_depth, goal_analysis + ) + + async def _plan_existing_project_locked( + self, + project_id: str, + user_input: str, + research_depth: str = "standard", + goal_analysis: dict | None = None, + ) -> Project: + """Internal planning method, called under per-project lock.""" + from pocketpaw.deep_work.goal_parser import GoalParser + project = await self.manager.get_project(project_id) if not project: raise ValueError(f"Project not found: {project_id}") @@ -207,6 +241,27 @@ class DeepWorkSession: project.status = ProjectStatus.PLANNING await self.manager.update_project(project) + # Phase 0: Goal Analysis + # Run GoalParser if we don't have a pre-parsed analysis + if goal_analysis is None: + try: + self._broadcast_phase(project.id, "goal_analysis") + parser = GoalParser() + analysis = await parser.parse(user_input) + goal_analysis = analysis.to_dict() + except Exception as e: + logger.warning("Goal parsing failed (non-fatal): %s", e) + goal_analysis = {} + + # Store goal analysis in project metadata + if goal_analysis: + project.metadata["goal_analysis"] = goal_analysis + await self.manager.update_project(project) + + # Use goal parser's suggested depth if research_depth is "auto" + if research_depth == "auto" and goal_analysis: + research_depth = goal_analysis.get("suggested_research_depth", "standard") + result = await self.planner.plan( user_input, project_id=project.id, research_depth=research_depth ) @@ -315,8 +370,6 @@ class DeepWorkSession: Raises: ValueError: If project not found. """ - import asyncio - project = await self.manager.get_project(project_id) if not project: raise ValueError(f"Project not found: {project_id}") @@ -376,8 +429,6 @@ class DeepWorkSession: Raises: ValueError: If project not found. """ - import asyncio - project = await self.manager.get_project(project_id) if not project: raise ValueError(f"Project not found: {project_id}") @@ -411,6 +462,43 @@ class DeepWorkSession: # Broadcasting helpers # ========================================================================= + def _broadcast_phase(self, project_id: str, phase: str) -> None: + """Publish a SystemEvent for frontend progress tracking. + + Best-effort — silently ignores errors if bus is unavailable. + """ + phase_messages = { + "goal_analysis": "Analyzing your goal...", + "research": "Researching domain knowledge...", + "prd": "Writing product requirements...", + "tasks": "Breaking down into tasks...", + "team": "Assembling agent team...", + } + message = phase_messages.get(phase, f"Planning phase: {phase}") + + try: + import asyncio + + from pocketpaw.bus import get_message_bus + from pocketpaw.bus.events import SystemEvent + + bus = get_message_bus() + loop = asyncio.get_running_loop() + loop.create_task( + bus.publish_system( + SystemEvent( + event_type="dw_planning_phase", + data={ + "project_id": project_id, + "phase": phase, + "message": message, + }, + ) + ) + ) + except Exception: + pass # Best effort + def _broadcast_planning_complete(self, project: Project) -> None: """Broadcast a planning completion event for the frontend. diff --git a/src/pocketpaw/frontend/js/features/deep-work.js b/src/pocketpaw/frontend/js/features/deep-work.js index f32c71d1..c84425e0 100644 --- a/src/pocketpaw/frontend/js/features/deep-work.js +++ b/src/pocketpaw/frontend/js/features/deep-work.js @@ -2,8 +2,11 @@ * PocketPaw - Mission Control: Deep Work Module * * Created: 2026-02-17 — Split from mission-control.js (1,699-line monolith). + * Updated: 2026-02-18 — Added Goal Parser integration: analyzeGoal(), two-step + * start flow (analyze → review → plan), goal analysis state and display helpers. * * Contains Deep Work project orchestration state and methods: + * - Goal analysis (analyzeGoal, domain/complexity display) * - Project CRUD (load, start, approve, pause, resume, delete) * - Project selection and detail loading * - Project status helpers (color, label, icon) @@ -29,11 +32,15 @@ window.PocketPaw.DeepWork = { showStartProject: false, // Start project modal showProjectDetail: false, // Full project detail sheet projectInput: '', // Natural language project input - researchDepth: 'standard', // 'none' | 'quick' | 'standard' | 'deep' + researchDepth: 'auto', // 'auto' | 'none' | 'quick' | 'standard' | 'deep' projectStarting: false, // Loading state while planner runs - planningPhase: '', // Current phase: research, prd, tasks, team + planningPhase: '', // Current phase: goal_analysis, research, prd, tasks, team planningMessage: '', // Phase progress message planningProjectId: null, // Project being planned + // Goal analysis state + goalAnalysis: null, // Parsed goal analysis from /parse-goal + goalAnalyzing: false, // Loading state while goal parser runs + goalAnalysisStep: 'input', // 'input' | 'review' — modal step // Output Files panel state projectOutputFiles: [], // files in project output directory projectOutputLoading: false, // loading state for output files @@ -63,7 +70,59 @@ window.PocketPaw.DeepWork = { }, /** - * Start a new Deep Work project from natural language input + * Analyze a goal before starting planning (Step 1 of 2-step flow) + */ + async analyzeGoal() { + const input = this.missionControl.projectInput.trim(); + if (!input || input.length < 10) { + this.showToast('Please describe your project (at least 10 characters)', 'error'); + return; + } + + this.missionControl.goalAnalyzing = true; + this.missionControl.goalAnalysis = null; + + try { + const res = await fetch('/api/deep-work/parse-goal', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ description: input }) + }); + + if (res.ok) { + const data = await res.json(); + this.missionControl.goalAnalysis = data.goal_analysis; + this.missionControl.researchDepth = data.goal_analysis.suggested_research_depth || 'standard'; + this.missionControl.goalAnalysisStep = 'review'; + } else { + const err = await res.json(); + this.showToast(err.detail || 'Goal analysis failed', 'error'); + } + } catch (e) { + console.error('Failed to analyze goal:', e); + this.showToast('Goal analysis failed — you can still start planning', 'error'); + } finally { + this.missionControl.goalAnalyzing = false; + this.$nextTick(() => { if (window.refreshIcons) window.refreshIcons(); }); + } + }, + + /** + * Reset goal analysis and go back to input step. + * If soft=true (modal close), cache the analysis so reopening restores it. + */ + resetGoalAnalysis(soft = false) { + if (soft && this.missionControl.goalAnalysis) { + // Cache: keep analysis so reopening the modal restores the review step + return; + } + this.missionControl.goalAnalysis = null; + this.missionControl.goalAnalysisStep = 'input'; + this.missionControl.researchDepth = 'auto'; + }, + + /** + * Start a new Deep Work project from natural language input (Step 2) */ async startDeepWork() { const input = this.missionControl.projectInput.trim(); @@ -77,13 +136,19 @@ window.PocketPaw.DeepWork = { this.missionControl.planningMessage = 'Initializing project...'; try { + const body = { + description: input, + research_depth: this.missionControl.researchDepth + }; + // Pass pre-parsed goal analysis to skip re-parsing + if (this.missionControl.goalAnalysis) { + body.goal_analysis = this.missionControl.goalAnalysis; + } + const res = await fetch('/api/deep-work/start', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - description: input, - research_depth: this.missionControl.researchDepth - }) + body: JSON.stringify(body) }); if (res.ok) { @@ -92,6 +157,8 @@ window.PocketPaw.DeepWork = { this.missionControl.projects.unshift(project); this.missionControl.projectInput = ''; this.missionControl.showStartProject = false; + this.missionControl.goalAnalysis = null; + this.missionControl.goalAnalysisStep = 'input'; // Set planningProjectId IMMEDIATELY so WebSocket phase // events can be tracked (planning runs in background) @@ -393,14 +460,44 @@ window.PocketPaw.DeepWork = { getPlanningPhaseInfo() { const phases = { 'starting': { label: 'Initializing', icon: 'loader', step: 0 }, - 'research': { label: 'Researching', icon: 'search', step: 1 }, - 'prd': { label: 'Writing PRD', icon: 'file-text', step: 2 }, - 'tasks': { label: 'Breaking Down Tasks', icon: 'list-checks', step: 3 }, - 'team': { label: 'Assembling Team', icon: 'users', step: 4 } + 'goal_analysis': { label: 'Analyzing Goal', icon: 'target', step: 1 }, + 'research': { label: 'Researching', icon: 'search', step: 2 }, + 'prd': { label: 'Writing PRD', icon: 'file-text', step: 3 }, + 'tasks': { label: 'Breaking Down Tasks', icon: 'list-checks', step: 4 }, + 'team': { label: 'Assembling Team', icon: 'users', step: 5 } }; return phases[this.missionControl.planningPhase] || { label: 'Working', icon: 'loader', step: 0 }; }, + /** + * Get domain display info (icon + color) + */ + getDomainInfo(domain) { + const domains = { + 'code': { label: 'Software & Code', icon: 'code-2', color: 'text-blue-400 bg-blue-500/10' }, + 'business': { label: 'Business & Strategy', icon: 'briefcase', color: 'text-amber-400 bg-amber-500/10' }, + 'creative': { label: 'Creative & Content', icon: 'palette', color: 'text-purple-400 bg-purple-500/10' }, + 'education': { label: 'Learning & Education', icon: 'graduation-cap', color: 'text-green-400 bg-green-500/10' }, + 'events': { label: 'Events & Logistics', icon: 'calendar', color: 'text-pink-400 bg-pink-500/10' }, + 'home': { label: 'Home & Physical', icon: 'home', color: 'text-orange-400 bg-orange-500/10' }, + 'hybrid': { label: 'Multi-Domain', icon: 'layers', color: 'text-cyan-400 bg-cyan-500/10' } + }; + return domains[domain] || { label: domain, icon: 'circle', color: 'text-white/40 bg-white/5' }; + }, + + /** + * Get complexity display info (color + label) + */ + getComplexityInfo(complexity) { + const levels = { + 'S': { label: 'Small', color: 'text-green-400 bg-green-500/10 border-green-500/20' }, + 'M': { label: 'Medium', color: 'text-blue-400 bg-blue-500/10 border-blue-500/20' }, + 'L': { label: 'Large', color: 'text-amber-400 bg-amber-500/10 border-amber-500/20' }, + 'XL': { label: 'Extra Large', color: 'text-red-400 bg-red-500/10 border-red-500/20' } + }; + return levels[complexity] || { label: complexity, color: 'text-white/40 bg-white/5 border-white/10' }; + }, + /** * Get active project count */ diff --git a/src/pocketpaw/frontend/templates/components/missions/modals/start-project.html b/src/pocketpaw/frontend/templates/components/missions/modals/start-project.html index cc3ddc7c..b47e1754 100644 --- a/src/pocketpaw/frontend/templates/components/missions/modals/start-project.html +++ b/src/pocketpaw/frontend/templates/components/missions/modals/start-project.html @@ -2,11 +2,13 @@ PocketPaw - Start Deep Work Project Modal Created: 2026-02-12 - Updated: 2026-02-12 — Added 'None' option to research depth (skip research entirely). - Research depth selector: None / Quick / Standard / Deep. + Updated: 2026-02-18 — Two-step flow: (1) Enter description + Analyze Goal, + (2) Review goal analysis (domain, complexity, roles) + Start Planning. + Goal analysis uses POST /api/deep-work/parse-goal. Natural language input modal for starting a new Deep Work project. - User describes what they want to build, the planner takes over. + Step 1: User describes their goal, clicks "Analyze" + Step 2: Review domain, complexity, AI/human roles, then "Start Planning" --> @@ -14,7 +16,7 @@ x-show="missionControl.showStartProject" x-transition.opacity class="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm p-4" - @click.self="missionControl.showStartProject = false" + @click.self="missionControl.showStartProject = false; resetGoalAnalysis(true)" >

Start a Project

-

Describe what you want to build

+

- - - - - + + + + + diff --git a/tests/test_deep_work_goal_parser.py b/tests/test_deep_work_goal_parser.py new file mode 100644 index 00000000..bf773b08 --- /dev/null +++ b/tests/test_deep_work_goal_parser.py @@ -0,0 +1,710 @@ +# Tests for Deep Work Goal Parser module. +# Created: 2026-02-18 +# +# Tests cover: +# - GoalAnalysis dataclass: from_dict, to_dict, defaults, properties +# - GoalParser.parse_raw(): valid JSON, fenced JSON, invalid input +# - GoalParser._strip_code_fences(): edge cases +# - Validation helpers: domain, complexity, research depth, clamp +# - GoalParser.parse(): full flow with mocked _run_prompt + +import json +from unittest.mock import MagicMock, patch + +import pytest + +from pocketpaw.agents.protocol import AgentEvent +from pocketpaw.deep_work.goal_parser import ( + VALID_COMPLEXITIES, + VALID_DOMAINS, + VALID_RESEARCH_DEPTHS, + GoalAnalysis, + GoalParser, + _clamp, + _sanitize_str_list, + _validate_complexity, + _validate_domain, + _validate_research_depth, +) + +# ============================================================================ +# Sample data +# ============================================================================ + +VALID_GOAL_JSON = json.dumps( + { + "goal": "Build a REST API for a todo application", + "domain": "code", + "sub_domains": ["web-development", "python", "fastapi"], + "complexity": "M", + "estimated_phases": 4, + "ai_capabilities": ["Generate boilerplate code", "Write tests", "Create API docs"], + "human_requirements": ["Decide on database schema", "Provide deployment credentials"], + "constraints_detected": ["No budget mentioned"], + "clarifications_needed": ["Which database do you prefer?"], + "suggested_research_depth": "quick", + "confidence": 0.85, + } +) + +CREATIVE_GOAL_JSON = json.dumps( + { + "goal": "Write a children's book about space exploration", + "domain": "creative", + "sub_domains": ["writing", "illustration-prompts"], + "complexity": "L", + "estimated_phases": 6, + "ai_capabilities": ["Draft story outline", "Generate illustration prompts"], + "human_requirements": ["Final story approval", "Hire illustrator"], + "constraints_detected": [], + "clarifications_needed": ["Target age group?", "Preferred art style?"], + "suggested_research_depth": "standard", + "confidence": 0.72, + } +) + + +# ============================================================================ +# GoalAnalysis dataclass tests +# ============================================================================ + + +class TestGoalAnalysisDefaults: + """Test GoalAnalysis default values.""" + + def test_default_fields(self): + analysis = GoalAnalysis() + assert analysis.goal == "" + assert analysis.domain == "code" + assert analysis.sub_domains == [] + assert analysis.complexity == "M" + assert analysis.estimated_phases == 1 + assert analysis.ai_capabilities == [] + assert analysis.human_requirements == [] + assert analysis.constraints_detected == [] + assert analysis.clarifications_needed == [] + assert analysis.suggested_research_depth == "standard" + assert analysis.confidence == 0.7 + + def test_needs_clarification_false(self): + analysis = GoalAnalysis() + assert analysis.needs_clarification is False + + def test_needs_clarification_true(self): + analysis = GoalAnalysis(clarifications_needed=["What framework?"]) + assert analysis.needs_clarification is True + + def test_domain_label(self): + assert GoalAnalysis(domain="code").domain_label == "Software & Code" + assert GoalAnalysis(domain="business").domain_label == "Business & Strategy" + assert GoalAnalysis(domain="creative").domain_label == "Creative & Content" + assert GoalAnalysis(domain="education").domain_label == "Learning & Education" + assert GoalAnalysis(domain="events").domain_label == "Events & Logistics" + assert GoalAnalysis(domain="home").domain_label == "Home & Physical" + assert GoalAnalysis(domain="hybrid").domain_label == "Multi-Domain" + + def test_domain_label_unknown_fallback(self): + analysis = GoalAnalysis(domain="unknown") + assert analysis.domain_label == "Unknown" + + +class TestGoalAnalysisFromDict: + """Test GoalAnalysis.from_dict() with various inputs.""" + + def test_valid_code_goal(self): + data = json.loads(VALID_GOAL_JSON) + analysis = GoalAnalysis.from_dict(data) + assert analysis.goal == "Build a REST API for a todo application" + assert analysis.domain == "code" + assert analysis.sub_domains == ["web-development", "python", "fastapi"] + assert analysis.complexity == "M" + assert analysis.estimated_phases == 4 + assert len(analysis.ai_capabilities) == 3 + assert len(analysis.human_requirements) == 2 + assert analysis.suggested_research_depth == "quick" + assert analysis.confidence == 0.85 + + def test_valid_creative_goal(self): + data = json.loads(CREATIVE_GOAL_JSON) + analysis = GoalAnalysis.from_dict(data) + assert analysis.domain == "creative" + assert analysis.complexity == "L" + assert analysis.estimated_phases == 6 + assert len(analysis.clarifications_needed) == 2 + + def test_empty_dict(self): + analysis = GoalAnalysis.from_dict({}) + assert analysis.goal == "" + assert analysis.domain == "code" + assert analysis.complexity == "M" + assert analysis.estimated_phases == 1 + assert analysis.confidence == 0.7 + + def test_invalid_domain_falls_back_to_hybrid(self): + analysis = GoalAnalysis.from_dict({"domain": "cooking"}) + assert analysis.domain == "hybrid" + + def test_invalid_complexity_falls_back_to_m(self): + analysis = GoalAnalysis.from_dict({"complexity": "XXL"}) + assert analysis.complexity == "M" + + def test_invalid_research_depth_falls_back_to_standard(self): + analysis = GoalAnalysis.from_dict({"suggested_research_depth": "extreme"}) + assert analysis.suggested_research_depth == "standard" + + def test_estimated_phases_clamped_low(self): + analysis = GoalAnalysis.from_dict({"estimated_phases": -5}) + assert analysis.estimated_phases == 1 + + def test_estimated_phases_clamped_high(self): + analysis = GoalAnalysis.from_dict({"estimated_phases": 50}) + assert analysis.estimated_phases == 10 + + def test_confidence_clamped_low(self): + analysis = GoalAnalysis.from_dict({"confidence": -0.5}) + assert analysis.confidence == 0.0 + + def test_confidence_clamped_high(self): + analysis = GoalAnalysis.from_dict({"confidence": 1.5}) + assert analysis.confidence == 1.0 + + def test_clarifications_truncated_to_4(self): + analysis = GoalAnalysis.from_dict( + {"clarifications_needed": ["Q1", "Q2", "Q3", "Q4", "Q5", "Q6"]} + ) + assert len(analysis.clarifications_needed) == 4 + + def test_estimated_phases_is_int(self): + analysis = GoalAnalysis.from_dict({"estimated_phases": 3.7}) + assert isinstance(analysis.estimated_phases, int) + assert analysis.estimated_phases == 3 + + +class TestGoalAnalysisToDict: + """Test GoalAnalysis.to_dict() serialization.""" + + def test_round_trip(self): + data = json.loads(VALID_GOAL_JSON) + analysis = GoalAnalysis.from_dict(data) + result = analysis.to_dict() + assert result["goal"] == data["goal"] + assert result["domain"] == data["domain"] + assert result["complexity"] == data["complexity"] + assert result["estimated_phases"] == data["estimated_phases"] + assert result["confidence"] == data["confidence"] + + def test_default_to_dict(self): + analysis = GoalAnalysis() + d = analysis.to_dict() + assert d["goal"] == "" + assert d["domain"] == "code" + assert d["complexity"] == "M" + assert d["estimated_phases"] == 1 + assert d["suggested_research_depth"] == "standard" + assert d["confidence"] == 0.7 + assert d["sub_domains"] == [] + assert d["ai_capabilities"] == [] + assert d["human_requirements"] == [] + assert d["constraints_detected"] == [] + assert d["clarifications_needed"] == [] + + +# ============================================================================ +# Validation helper tests +# ============================================================================ + + +class TestValidateDomain: + """Test _validate_domain helper.""" + + def test_all_valid_domains(self): + for domain in VALID_DOMAINS: + assert _validate_domain(domain) == domain + + def test_case_insensitive(self): + assert _validate_domain("CODE") == "code" + assert _validate_domain("Business") == "business" + assert _validate_domain("CREATIVE") == "creative" + + def test_strips_whitespace(self): + assert _validate_domain(" code ") == "code" + + def test_invalid_returns_hybrid(self): + assert _validate_domain("cooking") == "hybrid" + assert _validate_domain("") == "hybrid" + assert _validate_domain("xyz") == "hybrid" + + +class TestValidateComplexity: + """Test _validate_complexity helper.""" + + def test_all_valid_complexities(self): + for c in VALID_COMPLEXITIES: + assert _validate_complexity(c) == c + + def test_case_insensitive(self): + assert _validate_complexity("s") == "S" + assert _validate_complexity("xl") == "XL" + + def test_strips_whitespace(self): + assert _validate_complexity(" M ") == "M" + + def test_invalid_returns_m(self): + assert _validate_complexity("XXL") == "M" + assert _validate_complexity("") == "M" + + +class TestValidateResearchDepth: + """Test _validate_research_depth helper.""" + + def test_all_valid_depths(self): + for d in VALID_RESEARCH_DEPTHS: + assert _validate_research_depth(d) == d + + def test_case_insensitive(self): + assert _validate_research_depth("DEEP") == "deep" + assert _validate_research_depth("Quick") == "quick" + + def test_invalid_returns_standard(self): + assert _validate_research_depth("extreme") == "standard" + assert _validate_research_depth("") == "standard" + + +class TestClamp: + """Test _clamp helper.""" + + def test_within_range(self): + assert _clamp(5, 0, 10) == 5.0 + + def test_below_minimum(self): + assert _clamp(-5, 0, 10) == 0.0 + + def test_above_maximum(self): + assert _clamp(15, 0, 10) == 10.0 + + def test_at_boundaries(self): + assert _clamp(0, 0, 10) == 0.0 + assert _clamp(10, 0, 10) == 10.0 + + def test_non_numeric_returns_minimum(self): + assert _clamp("not a number", 0, 10) == 0 + assert _clamp(None, 1, 10) == 1 + + def test_float_input(self): + assert _clamp(0.85, 0.0, 1.0) == 0.85 + + +# ============================================================================ +# GoalParser._strip_code_fences tests +# ============================================================================ + + +class TestStripCodeFences: + """Test GoalParser._strip_code_fences static method.""" + + def test_no_fences(self): + assert GoalParser._strip_code_fences('{"key": "value"}') == '{"key": "value"}' + + def test_json_fence(self): + text = '```json\n{"key": "value"}\n```' + assert GoalParser._strip_code_fences(text) == '{"key": "value"}' + + def test_plain_fence(self): + text = '```\n{"key": "value"}\n```' + assert GoalParser._strip_code_fences(text) == '{"key": "value"}' + + def test_surrounding_text(self): + text = 'Here is the analysis:\n```json\n{"domain": "code"}\n```\nDone.' + assert GoalParser._strip_code_fences(text) == '{"domain": "code"}' + + def test_empty_string(self): + assert GoalParser._strip_code_fences("") == "" + + def test_whitespace_only(self): + assert GoalParser._strip_code_fences(" ") == "" + + +# ============================================================================ +# GoalParser.parse_raw() tests +# ============================================================================ + + +class TestParseRaw: + """Test GoalParser.parse_raw() with various inputs.""" + + def setup_method(self): + self.parser = GoalParser() + + def test_valid_json(self): + analysis = self.parser.parse_raw(VALID_GOAL_JSON) + assert analysis.goal == "Build a REST API for a todo application" + assert analysis.domain == "code" + assert analysis.complexity == "M" + assert analysis.confidence == 0.85 + + def test_fenced_json(self): + fenced = f"```json\n{VALID_GOAL_JSON}\n```" + analysis = self.parser.parse_raw(fenced) + assert analysis.goal == "Build a REST API for a todo application" + assert analysis.domain == "code" + + def test_fenced_with_surrounding_text(self): + wrapped = f"Here is my analysis:\n```json\n{CREATIVE_GOAL_JSON}\n```\nLet me know." + analysis = self.parser.parse_raw(wrapped) + assert analysis.domain == "creative" + assert analysis.complexity == "L" + + def test_invalid_json_returns_default(self): + analysis = self.parser.parse_raw("this is not json at all") + assert analysis.goal == "" + assert analysis.domain == "code" + assert analysis.complexity == "M" + + def test_empty_string_returns_default(self): + analysis = self.parser.parse_raw("") + assert analysis.goal == "" + assert analysis.domain == "code" + + def test_json_array_returns_default(self): + analysis = self.parser.parse_raw('[{"key": "value"}]') + assert analysis.goal == "" + assert analysis.domain == "code" + + def test_json_number_returns_default(self): + analysis = self.parser.parse_raw("42") + assert analysis.goal == "" + + def test_partial_data(self): + partial = json.dumps({"goal": "Build something", "domain": "business"}) + analysis = self.parser.parse_raw(partial) + assert analysis.goal == "Build something" + assert analysis.domain == "business" + assert analysis.complexity == "M" # default + assert analysis.estimated_phases == 1 # default + + +# ============================================================================ +# GoalParser.parse() integration test (mocked LLM) +# ============================================================================ + + +class TestParseIntegration: + """Test GoalParser.parse() with mocked _run_prompt.""" + + @pytest.mark.asyncio + async def test_parse_returns_goal_analysis(self): + parser = GoalParser() + + async def mock_run_prompt(prompt: str) -> str: + return VALID_GOAL_JSON + + parser._run_prompt = mock_run_prompt + + analysis = await parser.parse("Build a todo REST API") + assert isinstance(analysis, GoalAnalysis) + assert analysis.goal == "Build a REST API for a todo application" + assert analysis.domain == "code" + assert analysis.complexity == "M" + assert analysis.confidence == 0.85 + + @pytest.mark.asyncio + async def test_parse_fills_empty_goal_with_input(self): + parser = GoalParser() + + async def mock_run_prompt(prompt: str) -> str: + return json.dumps({"domain": "business", "complexity": "L"}) + + parser._run_prompt = mock_run_prompt + + analysis = await parser.parse("Plan a product launch") + assert analysis.goal == "Plan a product launch" + assert analysis.domain == "business" + assert analysis.complexity == "L" + + @pytest.mark.asyncio + async def test_parse_handles_fenced_response(self): + parser = GoalParser() + + async def mock_run_prompt(prompt: str) -> str: + return f"Here is the analysis:\n```json\n{CREATIVE_GOAL_JSON}\n```" + + parser._run_prompt = mock_run_prompt + + analysis = await parser.parse("Write a children's book") + assert analysis.domain == "creative" + assert analysis.complexity == "L" + + @pytest.mark.asyncio + async def test_parse_handles_invalid_llm_output(self): + parser = GoalParser() + + async def mock_run_prompt(prompt: str) -> str: + return "I couldn't understand the request." + + parser._run_prompt = mock_run_prompt + + analysis = await parser.parse("Do something vague") + # Should return default analysis with goal filled from input + assert analysis.goal == "Do something vague" + assert analysis.domain == "code" # default + assert analysis.complexity == "M" # default + + @pytest.mark.asyncio + async def test_parse_long_input_truncates_goal(self): + parser = GoalParser() + long_input = "x" * 500 + + async def mock_run_prompt(prompt: str) -> str: + return json.dumps({"domain": "code"}) + + parser._run_prompt = mock_run_prompt + + analysis = await parser.parse(long_input) + assert len(analysis.goal) == 200 # truncated to 200 chars + + @pytest.mark.asyncio + async def test_parse_prompt_contains_user_input(self): + parser = GoalParser() + captured_prompt = None + + async def mock_run_prompt(prompt: str) -> str: + nonlocal captured_prompt + captured_prompt = prompt + return VALID_GOAL_JSON + + parser._run_prompt = mock_run_prompt + + await parser.parse("Build a mobile app for cat tracking") + assert "Build a mobile app for cat tracking" in captured_prompt + + +# ============================================================================ +# GoalParser._run_prompt error handling tests +# ============================================================================ + + +class TestRunPromptErrors: + """Test _run_prompt error handling with mocked AgentRouter.""" + + @pytest.mark.asyncio + async def test_raises_on_error_only_response(self): + parser = GoalParser() + + mock_router = MagicMock() + + async def mock_run(prompt): + yield AgentEvent(type="error", content="API key not configured") + + mock_router.run = mock_run + + with patch("pocketpaw.agents.router.AgentRouter", return_value=mock_router): + with patch("pocketpaw.config.get_settings"): + with pytest.raises(RuntimeError, match="API key not configured"): + await parser._run_prompt("test prompt") + + @pytest.mark.asyncio + async def test_returns_content_with_messages(self): + parser = GoalParser() + + mock_router = MagicMock() + + async def mock_run(prompt): + yield AgentEvent(type="message", content='{"domain": "code"}') + yield AgentEvent(type="done", content="") + + mock_router.run = mock_run + + with patch("pocketpaw.agents.router.AgentRouter", return_value=mock_router): + with patch("pocketpaw.config.get_settings"): + result = await parser._run_prompt("test prompt") + assert result == '{"domain": "code"}' + + +# ============================================================================ +# GOAL_PARSE_PROMPT template test +# ============================================================================ + + +class TestGoalParsePrompt: + """Test GOAL_PARSE_PROMPT template.""" + + def test_has_user_input_placeholder(self): + from pocketpaw.deep_work.prompts import GOAL_PARSE_PROMPT + + assert "{user_input}" in GOAL_PARSE_PROMPT + + def test_can_be_formatted(self): + from pocketpaw.deep_work.prompts import GOAL_PARSE_PROMPT + + result = GOAL_PARSE_PROMPT.format(user_input="Build a todo app") + assert "Build a todo app" in result + assert "{user_input}" not in result + + def test_allows_markdown_fences(self): + from pocketpaw.deep_work.prompts import GOAL_PARSE_PROMPT + + # Prompt should mention that fences are allowed (not prohibited) + assert "```json" in GOAL_PARSE_PROMPT + + +# ============================================================================ +# _sanitize_str_list tests +# ============================================================================ + + +class TestSanitizeStrList: + """Test _sanitize_str_list helper.""" + + def test_valid_strings(self): + assert _sanitize_str_list(["a", "b", "c"]) == ["a", "b", "c"] + + def test_filters_none(self): + assert _sanitize_str_list(["valid", None, "also valid"]) == ["valid", "also valid"] + + def test_converts_numbers_to_str(self): + result = _sanitize_str_list(["text", 123, 45.6]) + assert result == ["text", "123", "45.6"] + + def test_filters_empty_strings(self): + assert _sanitize_str_list(["valid", "", " ", "ok"]) == ["valid", "ok"] + + def test_not_a_list_returns_empty(self): + assert _sanitize_str_list("not a list") == [] + assert _sanitize_str_list(42) == [] + assert _sanitize_str_list(None) == [] + + def test_empty_list(self): + assert _sanitize_str_list([]) == [] + + +# ============================================================================ +# GoalAnalysis.from_dict — sanitization and caps tests +# ============================================================================ + + +class TestGoalAnalysisFromDictSanitization: + """Test from_dict sanitization of list fields and complexity/phase consistency.""" + + def test_sub_domains_capped_at_6(self): + data = {"sub_domains": ["a", "b", "c", "d", "e", "f", "g", "h"]} + analysis = GoalAnalysis.from_dict(data) + assert len(analysis.sub_domains) == 6 + + def test_ai_capabilities_with_nulls(self): + data = {"ai_capabilities": ["Write code", None, 123, "", "Test code"]} + analysis = GoalAnalysis.from_dict(data) + assert analysis.ai_capabilities == ["Write code", "123", "Test code"] + + def test_human_requirements_with_nulls(self): + data = {"human_requirements": ["Decide schema", None, "Approve design"]} + analysis = GoalAnalysis.from_dict(data) + assert analysis.human_requirements == ["Decide schema", "Approve design"] + + def test_constraints_detected_not_a_list(self): + data = {"constraints_detected": "not a list"} + analysis = GoalAnalysis.from_dict(data) + assert analysis.constraints_detected == [] + + def test_xl_complexity_minimum_3_phases(self): + data = {"complexity": "XL", "estimated_phases": 1} + analysis = GoalAnalysis.from_dict(data) + assert analysis.estimated_phases == 3 + + def test_l_complexity_minimum_2_phases(self): + data = {"complexity": "L", "estimated_phases": 1} + analysis = GoalAnalysis.from_dict(data) + assert analysis.estimated_phases == 2 + + def test_s_complexity_allows_1_phase(self): + data = {"complexity": "S", "estimated_phases": 1} + analysis = GoalAnalysis.from_dict(data) + assert analysis.estimated_phases == 1 + + def test_m_complexity_allows_1_phase(self): + data = {"complexity": "M", "estimated_phases": 1} + analysis = GoalAnalysis.from_dict(data) + assert analysis.estimated_phases == 1 + + +# ============================================================================ +# _run_prompt — empty response test +# ============================================================================ + + +class TestRunPromptEmptyResponse: + """Test _run_prompt raises on empty LLM response.""" + + @pytest.mark.asyncio + async def test_raises_on_empty_response(self): + parser = GoalParser() + + mock_router = MagicMock() + + async def mock_run(prompt): + yield AgentEvent(type="done", content="") + + mock_router.run = mock_run + + with patch("pocketpaw.agents.router.AgentRouter", return_value=mock_router): + with patch("pocketpaw.config.get_settings"): + with pytest.raises(RuntimeError, match="empty response"): + await parser._run_prompt("test prompt") + + @pytest.mark.asyncio + async def test_raises_on_only_empty_messages(self): + parser = GoalParser() + + mock_router = MagicMock() + + async def mock_run(prompt): + yield AgentEvent(type="message", content="") + yield AgentEvent(type="message", content="") + + mock_router.run = mock_run + + with patch("pocketpaw.agents.router.AgentRouter", return_value=mock_router): + with patch("pocketpaw.config.get_settings"): + with pytest.raises(RuntimeError, match="empty response"): + await parser._run_prompt("test prompt") + + +# ============================================================================ +# Prompt injection safety tests +# ============================================================================ + + +class TestPromptInjection: + """Test that curly braces in user input don't break prompt formatting.""" + + @pytest.mark.asyncio + async def test_curly_braces_in_input(self): + parser = GoalParser() + captured_prompt = None + + async def mock_run_prompt(prompt: str) -> str: + nonlocal captured_prompt + captured_prompt = prompt + return VALID_GOAL_JSON + + parser._run_prompt = mock_run_prompt + + # Input with curly braces should not crash + await parser.parse("Build a {React} app with {TypeScript}") + assert captured_prompt is not None + assert "{React}" in captured_prompt # braces preserved in final prompt + + @pytest.mark.asyncio + async def test_format_string_attack(self): + parser = GoalParser() + captured_prompt = None + + async def mock_run_prompt(prompt: str) -> str: + nonlocal captured_prompt + captured_prompt = prompt + return VALID_GOAL_JSON + + parser._run_prompt = mock_run_prompt + + # Malicious format string should not cause KeyError + await parser.parse("Build {__class__.__mro__[1]}") + assert captured_prompt is not None diff --git a/tests/test_deep_work_planner.py b/tests/test_deep_work_planner.py index 9b7a7e78..50268d4c 100644 --- a/tests/test_deep_work_planner.py +++ b/tests/test_deep_work_planner.py @@ -17,6 +17,7 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest +from pocketpaw.agents.protocol import AgentEvent from pocketpaw.deep_work.models import AgentSpec, PlannerResult, TaskSpec from pocketpaw.deep_work.planner import PlannerAgent from pocketpaw.deep_work.prompts import ( @@ -539,7 +540,7 @@ class TestRunPromptErrorHandling: # Simulate a router that yields only an error (e.g. bad API key) async def mock_run(prompt): - yield {"type": "error", "content": "API key not configured"} + yield AgentEvent(type="error", content="API key not configured") mock_router = MagicMock() mock_router.run = mock_run @@ -554,9 +555,9 @@ class TestRunPromptErrorHandling: planner = PlannerAgent(manager) async def mock_run(prompt): - yield {"type": "tool_use", "content": "thinking..."} - yield {"type": "error", "content": "Connection refused"} - yield {"type": "done", "content": ""} + yield AgentEvent(type="tool_use", content="thinking...") + yield AgentEvent(type="error", content="Connection refused") + yield AgentEvent(type="done", content="") mock_router = MagicMock() mock_router.run = mock_run @@ -571,9 +572,9 @@ class TestRunPromptErrorHandling: planner = PlannerAgent(manager) async def mock_run(prompt): - yield {"type": "message", "content": "Hello "} - yield {"type": "message", "content": "world"} - yield {"type": "done", "content": ""} + yield AgentEvent(type="message", content="Hello ") + yield AgentEvent(type="message", content="world") + yield AgentEvent(type="done", content="") mock_router = MagicMock() mock_router.run = mock_run @@ -588,10 +589,10 @@ class TestRunPromptErrorHandling: planner = PlannerAgent(manager) async def mock_run(prompt): - yield {"type": "tool_use", "content": "using search"} - yield {"type": "message", "content": "Found results"} - yield {"type": "tool_result", "content": "done"} - yield {"type": "done", "content": ""} + yield AgentEvent(type="tool_use", content="using search") + yield AgentEvent(type="message", content="Found results") + yield AgentEvent(type="tool_result", content="done") + yield AgentEvent(type="done", content="") mock_router = MagicMock() mock_router.run = mock_run