From e9bd04b4f64dc7fe0e98c0bd8a06475633d9d568 Mon Sep 17 00:00:00 2001 From: larchanka Date: Mon, 9 Mar 2026 14:37:13 +0100 Subject: [PATCH] Move docs to lemonade --- _board/TASKS/FP-04_OLLAMA_VISION.md | 14 +- _board/TASKS/FP-07_FILE_PROCESSOR_SERVICE.md | 2 +- _board/TASKS/FP-15_E2E_VERIFICATION.md | 2 +- _board/TASKS/LM-04_CORE_SERVICES_MIGRATION.md | 2 +- _board/TASKS/M1-01_OLLAMA_WARMUP.md | 6 +- _board/TASKS/M1-04_MODEL_MANAGER_TESTS.md | 4 +- _board/TASKS/M2-01_GENERATOR_INTEGRATION.md | 2 +- _board/TASKS/M2-03_INTEGRATION_TESTS.md | 2 +- _board/TASKS/P1-01_INITIALIZE_PROJECT.md | 2 +- .../P10-06_MODEL_SELECTION_VERIFICATION.md | 2 +- _board/TASKS/P10-06_VERIFICATION_NOTES.md | 4 +- _board/TASKS/P2-01_OLLAMA_ADAPTER.md | 12 +- _board/TASKS/P2-02_MODEL_ROUTER.md | 2 +- _board/TASKS/P3-04_CRITIC_AGENT.md | 2 +- _board/TASKS/P4-03_RAG_SERVICE.md | 2 +- .../P6-06_ARCHIVING_INTEGRATION_TESTS.md | 2 +- skills/CONFIG.md | 1 + skills/calendar/SKILL.md | 245 +++++++++ src/__tests__/archiving.test.ts | 184 +++---- src/agents/planner-agent.ts | 212 ++++---- .../__tests__/orchestrator-autonomous.test.ts | 2 +- .../orchestrator-concurrency.test.ts | 6 +- .../__tests__/orchestrator-supervisor.test.ts | 2 +- .../__tests__/generator-model-manager.test.ts | 2 +- src/services/generator-service.ts | 406 +++++++------- src/services/model-router.ts | 34 +- src/services/rag-service.ts | 446 ++++++++-------- src/shared/config.ts | 502 +++++++++--------- 28 files changed, 1175 insertions(+), 929 deletions(-) create mode 100644 skills/calendar/SKILL.md diff --git a/_board/TASKS/FP-04_OLLAMA_VISION.md b/_board/TASKS/FP-04_OLLAMA_VISION.md index 4316360..9b80f02 100644 --- a/_board/TASKS/FP-04_OLLAMA_VISION.md +++ b/_board/TASKS/FP-04_OLLAMA_VISION.md @@ -1,20 +1,20 @@ -# FP-04: Extend OllamaAdapter with Vision/Image Support +# FP-04: Extend LemonadeAdapter with Vision/Image Support -**File**: `src/services/ollama-adapter.ts` +**File**: `src/services/lemonade-adapter.ts` **Dependencies**: FP-02, FP-03 **Phase**: 2 — Core Services ## Description -Add a new `chatWithImage()` method to `OllamaAdapter` that accepts a local image file path, reads and base64-encodes it, and sends it to the Ollama `/api/chat` endpoint using the multimodal `images` field. This is required by the `file-processor` to call `glm-ocr:q8_0` for image OCR. +Add a new `chatWithImage()` method to `LemonadeAdapter` that accepts a local image file path, reads and base64-encodes it, and sends it to the Lemonade `/chat/completions` endpoint as an OpenAI-compatible vision request. This is required by the `file-processor` to call `qwen3-vl` for image OCR. ## Acceptance Criteria - New method signature: ```ts async chatWithImage( - messages: OllamaMessage[], + messages: ChatMessage[], model: string, imagePath: string - ): Promise + ): Promise ``` - Method reads the file at `imagePath` using `fs/promises.readFile` - Encodes to base64 string @@ -23,7 +23,7 @@ Add a new `chatWithImage()` method to `OllamaAdapter` that accepts a local image - Throws a clear error if `imagePath` does not exist or cannot be read ## Implementation Notes -- Ollama vision format: the `images` field is an array of base64 strings, placed in the user message object -- No MIME type needed — Ollama auto-detects from content +- OpenAI vision format: the `images_url` field is part of the message content array. +- MIME type is needed for the data URI. - Reuse existing `fetchWithTimeout` / retry logic from `chat()` — don't duplicate - Add a unit test stub (can be skipped/mocked) confirming the base64 encoding step diff --git a/_board/TASKS/FP-07_FILE_PROCESSOR_SERVICE.md b/_board/TASKS/FP-07_FILE_PROCESSOR_SERVICE.md index 2df7036..1c608ad 100644 --- a/_board/TASKS/FP-07_FILE_PROCESSOR_SERVICE.md +++ b/_board/TASKS/FP-07_FILE_PROCESSOR_SERVICE.md @@ -14,7 +14,7 @@ Create the `file-processor` as an independent `BaseProcess` subprocess. It liste - `category: 'text'` → read file with `fs/promises.readFile` (UTF-8) - If content.length <= `textMaxInlineChars`: respond with `type: 'text'`, full content - If content.length > `textMaxInlineChars`: respond with `type: 'text_long'`, full content (chunking/summarizing is Orchestrator's responsibility — see FP-10) - - `category: 'image'` and `ocrEnabled: true` → call `OllamaAdapter.chatWithImage()` with OCR prompt; respond with `type: 'image_ocr'` + - `category: 'image'` and `ocrEnabled: true` → call `LemonadeAdapter.chatWithImage()` with OCR prompt; respond with `type: 'image_ocr'` - `category: 'audio'` → call `convertToWav()` then `transcribeAudio()`; respond with `type: 'audio_transcript'` - `category: 'unknown'` → respond with `type: 'ignored'`, empty content - After any processing attempt (success or failure), **delete the original file** from disk diff --git a/_board/TASKS/FP-15_E2E_VERIFICATION.md b/_board/TASKS/FP-15_E2E_VERIFICATION.md index 7d8c4a3..055d165 100644 --- a/_board/TASKS/FP-15_E2E_VERIFICATION.md +++ b/_board/TASKS/FP-15_E2E_VERIFICATION.md @@ -29,7 +29,7 @@ Manual end-to-end verification of the complete file processing pipeline across a - [ ] Send file + caption → caption is used as the goal, file is used as context ### Failure Recovery -- [ ] Temporarily disable Ollama → image file sends user an error warning, pipeline continues if other files succeeded +- [ ] Temporarily disable Lemonade → image file sends user an error warning, pipeline continues if other files succeeded - [ ] Send corrupt audio → transcription error is reported silently, pipeline continues ## Implementation Notes diff --git a/_board/TASKS/LM-04_CORE_SERVICES_MIGRATION.md b/_board/TASKS/LM-04_CORE_SERVICES_MIGRATION.md index 5a3580f..dca676f 100644 --- a/_board/TASKS/LM-04_CORE_SERVICES_MIGRATION.md +++ b/_board/TASKS/LM-04_CORE_SERVICES_MIGRATION.md @@ -1,7 +1,7 @@ # LM-04: Update Core Services for Lemonade ## Description -Convert `GeneratorService`, `RAGService`, and `TimeParserService` from using `OllamaAdapter` to `LemonadeAdapter` for general text and embedding tasks. +Convert `GeneratorService`, `RAGService`, and `TimeParserService` from using a legacy adapter to `LemonadeAdapter` for general text and embedding tasks. ## Status - [x] Update `GeneratorService` to use Lemonade's chat endpoint. diff --git a/_board/TASKS/M1-01_OLLAMA_WARMUP.md b/_board/TASKS/M1-01_OLLAMA_WARMUP.md index a12ebec..7e27980 100644 --- a/_board/TASKS/M1-01_OLLAMA_WARMUP.md +++ b/_board/TASKS/M1-01_OLLAMA_WARMUP.md @@ -1,11 +1,11 @@ -# M1-01: Enhance OllamaAdapter with Warmup Support +# M1-01: Enhance LemonadeAdapter with Warmup Support -**File**: `src/services/ollama-adapter.ts` +**File**: `src/services/lemonade-adapter.ts` **Dependencies**: None **Phase**: M1 - Core Infrastructure ## Description -Add a `warmup` method to `OllamaAdapter` that uses the `/api/chat` endpoint with a minimal prompt and supports the `keep_alive` parameter. +Add a `warmup` method to `LemonadeAdapter` that sends a minimal prompt to ensure the model is loaded into VRAM. ## Acceptance Criteria - `warmup(model: string, keepAlive: string | number): Promise` implemented. diff --git a/_board/TASKS/M1-04_MODEL_MANAGER_TESTS.md b/_board/TASKS/M1-04_MODEL_MANAGER_TESTS.md index d9f8cd3..a401d25 100644 --- a/_board/TASKS/M1-04_MODEL_MANAGER_TESTS.md +++ b/_board/TASKS/M1-04_MODEL_MANAGER_TESTS.md @@ -14,6 +14,6 @@ Create comprehensive unit tests for the `ModelManagerService`. - All tests pass with `npm test`. ## Implementation Notes -- Mock `OllamaAdapter` using `vi.fn()`. +- Mock `LemonadeAdapter` using `vi.fn()`. - Use `vi.useFakeTimers()` if needed to test timeouts/delays. -- Verify the number of calls to `ollama.warmup`. +- Verify the number of calls to `lemonade.warmup`. diff --git a/_board/TASKS/M2-01_GENERATOR_INTEGRATION.md b/_board/TASKS/M2-01_GENERATOR_INTEGRATION.md index 1bcb84a..ca37cc3 100644 --- a/_board/TASKS/M2-01_GENERATOR_INTEGRATION.md +++ b/_board/TASKS/M2-01_GENERATOR_INTEGRATION.md @@ -9,7 +9,7 @@ Update `GeneratorService` to call the `ModelManagerService` before performing an ## Acceptance Criteria - `ModelManagerService` injected into `GeneratorService`. -- `ensureModelLoaded` called before `ollama.chat` and `ollama.generate`. +- `ensureModelLoaded` called before `lemonade.chat`. - Works correctly with all model tiers. ## Implementation Notes diff --git a/_board/TASKS/M2-03_INTEGRATION_TESTS.md b/_board/TASKS/M2-03_INTEGRATION_TESTS.md index 7c6930a..5e9f15c 100644 --- a/_board/TASKS/M2-03_INTEGRATION_TESTS.md +++ b/_board/TASKS/M2-03_INTEGRATION_TESTS.md @@ -8,7 +8,7 @@ Integration test to verify that inference requests correctly trigger model loading. ## Acceptance Criteria -- Mock Ollama adapter. +- Mock Lemonade adapter. - Verify `GeneratorService` + `ModelManagerService` interaction. - All tests pass with `npm test`. diff --git a/_board/TASKS/P1-01_INITIALIZE_PROJECT.md b/_board/TASKS/P1-01_INITIALIZE_PROJECT.md index 883263d..ffcd585 100644 --- a/_board/TASKS/P1-01_INITIALIZE_PROJECT.md +++ b/_board/TASKS/P1-01_INITIALIZE_PROJECT.md @@ -11,7 +11,7 @@ Set up the base directory structure, initialize the Node.js project, and configu - `src/` (source code) - `src/core/` (orchestrator) - `src/agents/` (planner, executor, critic) - - `src/services/` (memory, logging, ollama) + - `src/services/` (memory, logging, lemonade) - `src/shared/` (types, protocol) - `src/adapters/` (telegram) diff --git a/_board/TASKS/P10-06_MODEL_SELECTION_VERIFICATION.md b/_board/TASKS/P10-06_MODEL_SELECTION_VERIFICATION.md index 0231268..7a1bce0 100644 --- a/_board/TASKS/P10-06_MODEL_SELECTION_VERIFICATION.md +++ b/_board/TASKS/P10-06_MODEL_SELECTION_VERIFICATION.md @@ -11,7 +11,7 @@ Conduct a verification run to ensure that the complexity mapping works as expect - [ ] Set `plannerComplexity` to `large` in `config.json`. - [ ] Run a task that requires multiple steps (e.g., "Write a complex research paper about black holes"). - [ ] Check terminal logs for IPC messages to `model-router`. -- [ ] Verify that the `model` parameter in Ollama requests is `qwen3:8b` (mapped to `large`). +- [ ] Verify that the `model` parameter in Lemonade requests is `qwen2.5:7b` (mapped to `large`). ## Success Criteria - Planning uses the configured complexity. diff --git a/_board/TASKS/P10-06_VERIFICATION_NOTES.md b/_board/TASKS/P10-06_VERIFICATION_NOTES.md index 6cc8a7e..cb9c129 100644 --- a/_board/TASKS/P10-06_VERIFICATION_NOTES.md +++ b/_board/TASKS/P10-06_VERIFICATION_NOTES.md @@ -38,7 +38,7 @@ All code changes for P10-04 and P10-05 have been implemented: 5. **Verify in logs**: - Look for `GeneratorService` IPC messages - - Check that `model` parameter in Ollama requests is `qwen3:8b` for large complexity nodes + - Check that `model` parameter in Lemonade requests is `qwen2.5:7b` for large complexity nodes - Verify fallback logic works: nodes with explicit `modelClass` use that, others use plan complexity ## Success Criteria @@ -54,6 +54,6 @@ The verification requires manual testing as it involves: - Running the orchestrator - Sending tasks via Telegram - Observing IPC message logs -- Verifying model selection in Ollama requests +- Verifying model selection in Lemonade requests All code changes are complete and the system is ready for verification testing. diff --git a/_board/TASKS/P2-01_OLLAMA_ADAPTER.md b/_board/TASKS/P2-01_OLLAMA_ADAPTER.md index c17bc7c..d2e36bf 100644 --- a/_board/TASKS/P2-01_OLLAMA_ADAPTER.md +++ b/_board/TASKS/P2-01_OLLAMA_ADAPTER.md @@ -1,11 +1,11 @@ -# Task: P2-01 Implement Ollama Adapter +# Task: P2-01 Implement Lemonade Adapter ## Description -Build a bridge between the ManBot platform and the local Ollama instance. This adapter will handle model inference, streaming responses, and token usage reporting. +Build a bridge between the ManBot platform and the local Lemonade instance. This adapter will handle model inference, streaming responses, and token usage reporting in an OpenAI-compatible way. ## Requirements -- Create `src/services/ollama-adapter.ts`. -- Implement a client using `fetch` or the `@ollama/ollama` JS library. +- Create `src/services/lemonade-adapter.ts`. +- Implement a client using `fetch` or the `openai` JS library (or raw fetch for OpenAI compatibility). - Support essential methods: - `generate(prompt, model)`: returns full response. - `chat(messages, model)`: returns full response. @@ -14,6 +14,6 @@ Build a bridge between the ManBot platform and the local Ollama instance. This a - Implement timeout handling and retry logic for network errors. ## Definition of Done -- Adapter correctly retrieves completions from a running Ollama server. +- Adapter correctly retrieves completions from a running Lemonade server. - Supported methods handle streaming and regular responses accurately. -- Token metrics are extracted from the Ollama response payload. +- Token metrics are extracted from the Lemonade response payload. diff --git a/_board/TASKS/P2-02_MODEL_ROUTER.md b/_board/TASKS/P2-02_MODEL_ROUTER.md index f6b1778..c1804ab 100644 --- a/_board/TASKS/P2-02_MODEL_ROUTER.md +++ b/_board/TASKS/P2-02_MODEL_ROUTER.md @@ -1,7 +1,7 @@ # Task: P2-02 Implement Model Router ## Description -Create a service that maps abstract task complexity levels (`small`, `medium`, `large`) to specific local model names in Ollama. +Create a service that maps abstract task complexity levels (`small`, `medium`, `large`) to specific local model names in Lemonade. ## Requirements - Create `src/services/model-router.ts`. diff --git a/_board/TASKS/P3-04_CRITIC_AGENT.md b/_board/TASKS/P3-04_CRITIC_AGENT.md index 0240eae..9cc83fc 100644 --- a/_board/TASKS/P3-04_CRITIC_AGENT.md +++ b/_board/TASKS/P3-04_CRITIC_AGENT.md @@ -6,7 +6,7 @@ Implement the Critic Agent as a standalone process that receives execution resul ## Requirements - Create `src/agents/critic-agent.ts`. - Extend `BaseProcess`. -- Integration with Ollama Adapter and Model Router. +- Integration with Lemonade Adapter and Model Router. - Accepts a `reflection.evaluate` request containing the task context and draft result. - Returns a structured evaluation response. diff --git a/_board/TASKS/P4-03_RAG_SERVICE.md b/_board/TASKS/P4-03_RAG_SERVICE.md index 60eea17..b0d2d1c 100644 --- a/_board/TASKS/P4-03_RAG_SERVICE.md +++ b/_board/TASKS/P4-03_RAG_SERVICE.md @@ -9,7 +9,7 @@ Implement a Retrieval-Augmented Generation (RAG) service using the FAISS vector - Implement methods: - `addDocument(content, metadata)`: Embeds and stores a document. - `search(query, limit)`: Returns relevant snippets based on semantic similarity. -- Use a dedicated embedding model (e.g., `nomic-embed-text`) via the Ollama Adapter. +- Use a dedicated embedding model (e.g., `text-embedding-v3`) via the Lemonade Adapter. ## Definition of Done - RAG Service can index text and retrieve relevant matches for a given query. diff --git a/_board/TASKS/P6-06_ARCHIVING_INTEGRATION_TESTS.md b/_board/TASKS/P6-06_ARCHIVING_INTEGRATION_TESTS.md index c1bb91e..4fd463f 100644 --- a/_board/TASKS/P6-06_ARCHIVING_INTEGRATION_TESTS.md +++ b/_board/TASKS/P6-06_ARCHIVING_INTEGRATION_TESTS.md @@ -4,7 +4,7 @@ Write an integration test that verifies the full conversation archiving flow. ## Requirements -- Mock Ollama for the summarization step. +- Mock Lemonade for the summarization step. - Verify that `chat.new` triggers task history retrieval. - Verify that the summary is correctly inserted into the `RAGService`. - Verify that the SQLite database contains the expected archived record. diff --git a/skills/CONFIG.md b/skills/CONFIG.md index f94e492..50d3b49 100644 --- a/skills/CONFIG.md +++ b/skills/CONFIG.md @@ -9,3 +9,4 @@ | research | PRIMARY SEARCH. Use for deep web research, fact-checking, news gathering, or topical deep dives via the lynx tool. This is the default skill for any query requiring external or up-to-date information. | | reminder | SCHEDULING. Use this skill exclusively to set one-time or recurring reminders (e.g., "remind me in 2 hours"). This is the only tool that interfaces with the cron-manager service. | | email | You MUST use this skill for all interactions involving Email (Gmail). | +| calendar | You MUST use this skill for all interactions involving Google Calendar. | diff --git a/skills/calendar/SKILL.md b/skills/calendar/SKILL.md new file mode 100644 index 0000000..0d2dc05 --- /dev/null +++ b/skills/calendar/SKILL.md @@ -0,0 +1,245 @@ +# Calendar Skill + +Manage Google Calendar events using the `gog` CLI. +Mandatory tool: Use the `shell` tool to execute `gog` commands. + +## When to Use + +✅ **USE this skill when:** +- Checking upcoming meetings or daily schedules. +- Searching for calendar events by title or keyword. +- Creating, updating, or deleting calendar events. +- Checking availability or scheduling conflicts. +- Responding to meeting invitations (accept/decline/tentative). +- Reviewing team calendars or free/busy blocks. + +## When NOT to Use + +❌ **DON'T use this skill when:** +- Managing Gmail messages (use the Email Skill). +- Editing Google Contacts. +- Sending meeting notes via email (unless specifically requested). + +## Commands + +### 📅 Viewing Events +```bash +# Today's events +gog calendar events primary --today + +# Tomorrow's events +gog calendar events primary --tomorrow + +# Events for the week +gog calendar events primary --week + +# Next 3 days +gog calendar events primary --days 3 + +# Specific date range +gog calendar events primary --from today --to friday + +# Fetch events across all calendars +gog calendar events --all + +# Fetch events from specific calendars +gog calendar events --cal Work --cal Personal +``` + +### 🔎 Searching Events + +```bash +# Search events by keyword +gog calendar search "meeting" + +# Search within time windows +gog calendar search "standup" --today +gog calendar search "planning" --week +gog calendar search "demo" --days 365 + +# Custom date range +gog calendar search "conference" \ + --from 2025-01-01T00:00:00Z \ + --to 2025-01-31T00:00:00Z \ + --max 50 +``` + +### 📌 Event Details + +```bash +# Get a specific event +gog calendar get + +# JSON output for structured parsing +gog calendar get --json +``` + +### ➕ Creating Events + +```bash +# Simple meeting +gog calendar create primary \ + --summary "Meeting" \ + --from 2025-01-15T10:00:00Z \ + --to 2025-01-15T11:00:00Z + +# Meeting with attendees +gog calendar create primary \ + --summary "Team Sync" \ + --from 2025-01-15T14:00:00Z \ + --to 2025-01-15T15:00:00Z \ + --attendees "alice@example.com,bob@example.com" \ + --location "Zoom" +``` + +### ✏️ Updating Events + +```bash +# Update event title and time +gog calendar update \ + --summary "Updated Meeting" \ + --from 2025-01-15T11:00:00Z \ + --to 2025-01-15T12:00:00Z + +# Add attendees without replacing existing ones +gog calendar update \ + --add-attendee "alice@example.com,bob@example.com" +``` + +### ❌ Deleting Events + +```bash +gog calendar delete +``` + +### 📩 Invitations + +```bash +# Accept meeting +gog calendar respond --status accepted + +# Decline meeting +gog calendar respond --status declined + +# Tentative +gog calendar respond --status tentative +``` + +### 🕒 Availability & Conflicts + +```bash +# Check free/busy time +gog calendar freebusy \ + --calendars "primary,work@example.com" \ + --from 2025-01-15T00:00:00Z \ + --to 2025-01-16T00:00:00Z + +# Check conflicts +gog calendar conflicts --today + +# Conflicts across all calendars +gog calendar conflicts --all --today +``` + +### 👥 Team Calendars + +```bash +# Team events today +gog calendar team team@example.com --today + +# Team schedule this week +gog calendar team team@example.com --week + +# Free/busy overview +gog calendar team team@example.com --freebusy + +# Filter by title +gog calendar team team@example.com --query "standup" +``` + +### 🧠 Special Event Types + +```bash +# Focus time +gog calendar focus-time \ + --from 2025-01-15T13:00:00Z \ + --to 2025-01-15T14:00:00Z + +# Out of office +gog calendar out-of-office \ + --from 2025-01-20 \ + --to 2025-01-21 \ + --all-day + +# Working location +gog calendar working-location \ + --type office \ + --office-label "HQ" \ + --from 2025-01-22 \ + --to 2025-01-23 +``` + +### Tool Call Examples (JSON) + +When using this skill, format your tool calls as follows: + +Get Today's Events: + +```json +{ + "name": "shell", + "arguments": { + "command": "gog calendar events primary --today" + } +} +``` + +Search for an Event + +```json +{ + "name": "shell", + "arguments": { + "command": "gog calendar search \"meeting\" --today" + } +} +``` + +Create a Meeting + +```json +{ + "name": "shell", + "arguments": { + "command": "gog calendar create primary --summary \"Team Sync\" --from \"2025-01-15T14:00:00Z\" --to \"2025-01-15T15:00:00Z\"" + } +} +``` + +Accept Invitation + +```json +{ + "name": "shell", + "arguments": { + "command": "gog calendar respond primary \"event-123\" --status accepted" + } +} +``` + +Delete Event + +```json +{ + "name": "shell", + "arguments": { + "command": "gog calendar delete primary \"event-123\"" + } +} +``` + +## Notes +- Confirmation: Always confirm the event summary, date/time, and attendees before creating, updating, or deleting an event. +- Calendar IDs: primary refers to the user's main calendar; other calendars can be referenced by name or email. +- Timezone Awareness: The CLI supports timezone-aware timestamps; prefer ISO timestamps (e.g., 2025-01-15T14:00:00Z). +- JSON Output: Use the --json flag when structured event details are required for parsing. diff --git a/src/__tests__/archiving.test.ts b/src/__tests__/archiving.test.ts index 3924c7e..1ad10ee 100644 --- a/src/__tests__/archiving.test.ts +++ b/src/__tests__/archiving.test.ts @@ -15,112 +15,112 @@ import { TaskMemoryStore } from "../services/task-memory.js"; const TEST_DIR = join(process.cwd(), "data", "test-archiving-" + randomUUID()); function freshDbPath(prefix: string): string { - return join(TEST_DIR, `${prefix}-${randomUUID()}.sqlite`); + return join(TEST_DIR, `${prefix}-${randomUUID()}.sqlite`); } /** Fixed 4-dim embedding for test (L2-normalized). */ function norm(v: number[]): number[] { - const len = Math.sqrt(v.reduce((s, x) => s + x * x, 0)) || 1; - return v.map((x) => x / len); + const len = Math.sqrt(v.reduce((s, x) => s + x * x, 0)) || 1; + return v.map((x) => x / len); } describe("Archiving pipeline integration", () => { - let taskDbPath: string; - let ragDbPath: string; - let taskStore: TaskMemoryStore; - let ragStore: RAGStore; + let taskDbPath: string; + let ragDbPath: string; + let taskStore: TaskMemoryStore; + let ragStore: RAGStore; - beforeEach(() => { - mkdirSync(TEST_DIR, { recursive: true }); - taskDbPath = freshDbPath("tasks"); - ragDbPath = freshDbPath("rag"); - taskStore = new TaskMemoryStore(taskDbPath); - ragStore = new RAGStore(ragDbPath, 4); // 4-dim for test vectors - }); - - afterEach(() => { - taskStore.close(); - ragStore.close(); - try { - rmSync(TEST_DIR, { recursive: true, force: true }); - } catch { - // ignore - } - }); - - it("chat.new flow: task history retrieval, mock summarization, RAG insert, SQLite contains record", () => { - const conversationId = "conv-" + randomUUID(); - const taskId1 = "t1-" + randomUUID(); - const taskId2 = "t2-" + randomUUID(); - - // 1. Create tasks in task-memory with same conversation_id (simulate prior conversation) - taskStore.createTaskWithDag({ - taskId: taskId1, - conversationId, - goal: "Explain TypeScript benefits", - nodes: [ - { id: "n1", type: "generate_text", service: "model-router", input: {} }, - ], - edges: [], - }); - taskStore.updateNodeStatus(taskId1, "n1", "completed", { - output: "TypeScript adds static types and better tooling.", + beforeEach(() => { + mkdirSync(TEST_DIR, { recursive: true }); + taskDbPath = freshDbPath("tasks"); + ragDbPath = freshDbPath("rag"); + taskStore = new TaskMemoryStore(taskDbPath); + ragStore = new RAGStore(ragDbPath, 4); // 4-dim for test vectors }); - taskStore.createTaskWithDag({ - taskId: taskId2, - conversationId, - goal: "Summarize the previous answer", - nodes: [ - { id: "n2", type: "generate_text", service: "model-router", input: {} }, - ], - edges: [], - }); - taskStore.updateNodeStatus(taskId2, "n2", "completed", { - output: "User learned about TypeScript benefits.", + afterEach(() => { + taskStore.close(); + ragStore.close(); + try { + rmSync(TEST_DIR, { recursive: true, force: true }); + } catch { + // ignore + } }); - // 2. Retrieve task history by conversation_id (simulate orchestrator step) - const tasks = taskStore.getTasksByConversationId(conversationId); - expect(tasks).toHaveLength(2); - expect(tasks.map((t) => t.goal)).toContain("Explain TypeScript benefits"); - expect(tasks.map((t) => t.goal)).toContain("Summarize the previous answer"); + it("chat.new flow: task history retrieval, mock summarization, RAG insert, SQLite contains record", () => { + const conversationId = "conv-" + randomUUID(); + const taskId1 = "t1-" + randomUUID(); + const taskId2 = "t2-" + randomUUID(); - // 3. Format history and "summarize" (mock: no Ollama) - const historyParts: string[] = []; - for (const t of tasks) { - const task = taskStore.getTask(t.id) as { nodes?: Array<{ output?: string }> } | null; - const nodes = task?.nodes ?? []; - const lastOutput = nodes.filter((n) => n.output != null && n.output !== "").pop()?.output ?? ""; - historyParts.push(`Goal: ${t.goal}\nResult: ${lastOutput || "(no output)"}`); - } - const chatHistory = historyParts.join("\n\n---\n\n"); - expect(chatHistory).toContain("TypeScript"); - const mockSummary = "User preferences: TypeScript. Context: learning about static typing and tooling."; + // 1. Create tasks in task-memory with same conversation_id (simulate prior conversation) + taskStore.createTaskWithDag({ + taskId: taskId1, + conversationId, + goal: "Explain TypeScript benefits", + nodes: [ + { id: "n1", type: "generate_text", service: "model-router", input: {} }, + ], + edges: [], + }); + taskStore.updateNodeStatus(taskId1, "n1", "completed", { + output: "TypeScript adds static types and better tooling.", + }); - // 4. Store summary in RAG (simulate memory.semantic.insert; use dummy embedding) - const docId = randomUUID(); - const embedding = norm([0.5, 0.5, 0.5, 0.5]); - const metadata = { conversationId, chatId: 12345, archivedAt: Date.now(), source: "archiving" }; - ragStore.insert(docId, mockSummary, metadata, embedding); + taskStore.createTaskWithDag({ + taskId: taskId2, + conversationId, + goal: "Summarize the previous answer", + nodes: [ + { id: "n2", type: "generate_text", service: "model-router", input: {} }, + ], + edges: [], + }); + taskStore.updateNodeStatus(taskId2, "n2", "completed", { + output: "User learned about TypeScript benefits.", + }); - // 5. Verify RAG retrieval and SQLite contain the archived record - const results = ragStore.search(embedding, 5); - expect(results).toHaveLength(1); - expect(results[0]?.content).toBe(mockSummary); - expect(results[0]?.metadata).toMatchObject({ conversationId, source: "archiving" }); + // 2. Retrieve task history by conversation_id (simulate orchestrator step) + const tasks = taskStore.getTasksByConversationId(conversationId); + expect(tasks).toHaveLength(2); + expect(tasks.map((t) => t.goal)).toContain("Explain TypeScript benefits"); + expect(tasks.map((t) => t.goal)).toContain("Summarize the previous answer"); - // 6. Verify SQLite rag_documents has the row - const db = new Database(ragDbPath); - const row = db.prepare("SELECT id, content, metadata FROM rag_documents WHERE id = ?").get(docId) as - | { id: string; content: string; metadata: string } - | undefined; - db.close(); - expect(row).toBeDefined(); - expect(row?.id).toBe(docId); - expect(row?.content).toBe(mockSummary); - const meta = JSON.parse(row?.metadata ?? "{}") as Record; - expect(meta.conversationId).toBe(conversationId); - expect(meta.source).toBe("archiving"); - }); + // 3. Format history and "summarize" (mock: no Ollama) + const historyParts: string[] = []; + for (const t of tasks) { + const task = taskStore.getTask(t.id) as { nodes?: Array<{ output?: string }> } | null; + const nodes = task?.nodes ?? []; + const lastOutput = nodes.filter((n) => n.output != null && n.output !== "").pop()?.output ?? ""; + historyParts.push(`Goal: ${t.goal}\nResult: ${lastOutput || "(no output)"}`); + } + const chatHistory = historyParts.join("\n\n---\n\n"); + expect(chatHistory).toContain("TypeScript"); + const mockSummary = "User preferences: TypeScript. Context: learning about static typing and tooling."; + + // 4. Store summary in RAG (simulate memory.semantic.insert; use dummy embedding) + const docId = randomUUID(); + const embedding = norm([0.5, 0.5, 0.5, 0.5]); + const metadata = { conversationId, chatId: 12345, archivedAt: Date.now(), source: "archiving" }; + ragStore.insert(docId, mockSummary, metadata, embedding); + + // 5. Verify RAG retrieval and SQLite contain the archived record + const results = ragStore.search(embedding, 5); + expect(results).toHaveLength(1); + expect(results[0]?.content).toBe(mockSummary); + expect(results[0]?.metadata).toMatchObject({ conversationId, source: "archiving" }); + + // 6. Verify SQLite rag_documents has the row + const db = new Database(ragDbPath); + const row = db.prepare("SELECT id, content, metadata FROM rag_documents WHERE id = ?").get(docId) as + | { id: string; content: string; metadata: string } + | undefined; + db.close(); + expect(row).toBeDefined(); + expect(row?.id).toBe(docId); + expect(row?.content).toBe(mockSummary); + const meta = JSON.parse(row?.metadata ?? "{}") as Record; + expect(meta.conversationId).toBe(conversationId); + expect(meta.source).toBe("archiving"); + }); }); diff --git a/src/agents/planner-agent.ts b/src/agents/planner-agent.ts index 7ebe22a..bc62cc2 100644 --- a/src/agents/planner-agent.ts +++ b/src/agents/planner-agent.ts @@ -18,131 +18,131 @@ import { ConsoleLogger } from "../utils/console-logger.js"; const PLAN_CREATE = "plan.create"; interface PlanCreatePayload { - goal?: string; - message?: string; - complexity?: "small" | "medium" | "large"; - /** When set, a previous attempt failed; use this to produce a corrected plan. */ - previousError?: string; - /** Optional previous plan that failed (object). Will be stringified for the prompt. */ - previousPlan?: Record; - /** Optional conversation history for context. */ - history?: string; + goal?: string; + message?: string; + complexity?: "small" | "medium" | "large"; + /** When set, a previous attempt failed; use this to produce a corrected plan. */ + previousError?: string; + /** Optional previous plan that failed (object). Will be stringified for the prompt. */ + previousPlan?: Record; + /** Optional conversation history for context. */ + history?: string; } function extractJson(text: string): string { - let s = text.trim(); - const start = s.indexOf("{"); - const end = s.lastIndexOf("}"); - if (start !== -1 && end !== -1 && end > start) { - s = s.slice(start, end + 1); - } - return s; + let s = text.trim(); + const start = s.indexOf("{"); + const end = s.lastIndexOf("}"); + if (start !== -1 && end !== -1 && end > start) { + s = s.slice(start, end + 1); + } + return s; } export class PlannerAgent extends BaseProcess { - private readonly lemonade: LemonadeAdapter; - private readonly modelRouter: ModelRouter; - private readonly skillManager: SkillManager; + private readonly lemonade: LemonadeAdapter; + private readonly modelRouter: ModelRouter; + private readonly skillManager: SkillManager; - constructor(options?: { lemonade?: LemonadeAdapter; modelRouter?: ModelRouter; skillManager?: SkillManager }) { - super({ processName: "planner" }); - this.lemonade = options?.lemonade ?? new LemonadeAdapter(); - this.modelRouter = options?.modelRouter ?? new ModelRouter(); - this.skillManager = options?.skillManager ?? new SkillManager(); - } - - protected override handleEnvelope(envelope: Envelope): void { - if (envelope.type !== PLAN_CREATE) return; - - const payload = envelope.payload as Record; - const p = payload as unknown as PlanCreatePayload; - const goal = p.goal ?? p.message ?? ""; - const complexity = p.complexity ?? "medium"; - - if (!goal || typeof goal !== "string") { - this.sendError(envelope, "INVALID_PAYLOAD", "plan.create requires goal or message"); - return; + constructor(options?: { lemonade?: LemonadeAdapter; modelRouter?: ModelRouter; skillManager?: SkillManager }) { + super({ processName: "planner" }); + this.lemonade = options?.lemonade ?? new LemonadeAdapter(); + this.modelRouter = options?.modelRouter ?? new ModelRouter(); + this.skillManager = options?.skillManager ?? new SkillManager(); } - (async () => { - try { - const model = this.modelRouter.getModel(complexity); - const previousError = typeof p.previousError === "string" ? p.previousError : undefined; - const previousPlanJson = - p.previousPlan && typeof p.previousPlan === "object" - ? JSON.stringify(p.previousPlan, null, 2) - : undefined; + protected override handleEnvelope(envelope: Envelope): void { + if (envelope.type !== PLAN_CREATE) return; - // Load dynamic skills - const skills = this.skillManager.listSkills(); + const payload = envelope.payload as Record; + const p = payload as unknown as PlanCreatePayload; + const goal = p.goal ?? p.message ?? ""; + const complexity = p.complexity ?? "medium"; - const promptOptions = { - ...(previousError && { previousError }), - ...(previousPlanJson && { previousPlanJson }), - ...(p.history && { conversationHistory: p.history }), - ...(skills.length > 0 && { skills }), - }; - const prompt = buildPlannerPrompt(goal, promptOptions); - const messages = [ - { role: "system" as const, content: "You output only valid JSON. No markdown, no explanation." }, - { role: "user" as const, content: prompt }, - ]; - const result = await this.lemonade.chat(messages, model); - const raw = result.message?.content ?? ""; - ConsoleLogger.debug("planner", `Raw model response (length: ${raw.length})`); - const jsonStr = extractJson(raw); - const dag = JSON.parse(jsonStr) as CapabilityGraph; - - if (!dag.nodes || !Array.isArray(dag.nodes)) { - this.sendError(envelope, "INVALID_DAG", "Model output missing nodes array"); - return; + if (!goal || typeof goal !== "string") { + this.sendError(envelope, "INVALID_PAYLOAD", "plan.create requires goal or message"); + return; } - const validation = validateGraph(dag); - if (!validation.valid) { - this.sendError(envelope, "DAG_VALIDATION_FAILED", validation.errors.join("; ")); - return; - } + (async () => { + try { + const model = this.modelRouter.getModel(complexity); + const previousError = typeof p.previousError === "string" ? p.previousError : undefined; + const previousPlanJson = + p.previousPlan && typeof p.previousPlan === "object" + ? JSON.stringify(p.previousPlan, null, 2) + : undefined; - this.sendResponse(envelope, dag); - } catch (err) { - const message = err instanceof Error ? err.message : String(err); - this.sendError(envelope, "PLANNER_ERROR", message); - } - })(); - } + // Load dynamic skills + const skills = this.skillManager.listSkills(); - private sendResponse(request: Envelope, result: unknown): void { - const payload = responsePayloadSchema.parse({ status: "success", result }); - this.send({ - id: randomUUID(), - correlationId: request.id, - from: this.processName, - to: request.from, - type: "response", - version: "1.0", - timestamp: Date.now(), - payload, - }); - } + const promptOptions = { + ...(previousError && { previousError }), + ...(previousPlanJson && { previousPlanJson }), + ...(p.history && { conversationHistory: p.history }), + ...(skills.length > 0 && { skills }), + }; + const prompt = buildPlannerPrompt(goal, promptOptions); + const messages = [ + { role: "system" as const, content: "You output only valid JSON. No markdown, no explanation." }, + { role: "user" as const, content: prompt }, + ]; + const result = await this.lemonade.chat(messages, model); + const raw = result.message?.content ?? ""; + ConsoleLogger.debug("planner", `Raw model response (length: ${raw.length})`); + const jsonStr = extractJson(raw); + const dag = JSON.parse(jsonStr) as CapabilityGraph; - private sendError(request: Envelope, code: string, message: string): void { - this.send({ - id: randomUUID(), - correlationId: request.id, - from: this.processName, - to: request.from, - type: "error", - version: "1.0", - timestamp: Date.now(), - payload: { code, message, details: {} }, - }); - } + if (!dag.nodes || !Array.isArray(dag.nodes)) { + this.sendError(envelope, "INVALID_DAG", "Model output missing nodes array"); + return; + } + + const validation = validateGraph(dag); + if (!validation.valid) { + this.sendError(envelope, "DAG_VALIDATION_FAILED", validation.errors.join("; ")); + return; + } + + this.sendResponse(envelope, dag); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + this.sendError(envelope, "PLANNER_ERROR", message); + } + })(); + } + + private sendResponse(request: Envelope, result: unknown): void { + const payload = responsePayloadSchema.parse({ status: "success", result }); + this.send({ + id: randomUUID(), + correlationId: request.id, + from: this.processName, + to: request.from, + type: "response", + version: "1.0", + timestamp: Date.now(), + payload, + }); + } + + private sendError(request: Envelope, code: string, message: string): void { + this.send({ + id: randomUUID(), + correlationId: request.id, + from: this.processName, + to: request.from, + type: "error", + version: "1.0", + timestamp: Date.now(), + payload: { code, message, details: {} }, + }); + } } function main(): void { - const agent = new PlannerAgent(); - agent.start(); + const agent = new PlannerAgent(); + agent.start(); } main(); diff --git a/src/core/__tests__/orchestrator-autonomous.test.ts b/src/core/__tests__/orchestrator-autonomous.test.ts index 9ee45b8..57ef946 100644 --- a/src/core/__tests__/orchestrator-autonomous.test.ts +++ b/src/core/__tests__/orchestrator-autonomous.test.ts @@ -6,7 +6,7 @@ import { PROTOCOL_VERSION } from "../../shared/protocol.js"; // Mock dependencies vi.mock("../../shared/config.js", () => ({ getConfig: () => ({ - ollama: { baseUrl: "http://localhost:11434" }, + lemonade: { baseUrl: "http://localhost:11434" }, modelRouter: { plannerComplexity: "small" }, taskMemory: { dbPath: ":memory:" }, cron: { dbPath: ":memory:" }, diff --git a/src/core/__tests__/orchestrator-concurrency.test.ts b/src/core/__tests__/orchestrator-concurrency.test.ts index 3080b90..c95fbcc 100644 --- a/src/core/__tests__/orchestrator-concurrency.test.ts +++ b/src/core/__tests__/orchestrator-concurrency.test.ts @@ -18,8 +18,8 @@ vi.mock('../../utils/console-logger', () => ({ })); // We need to mock the adapter and services that Orchestrator initializes in its constructor -vi.mock('../../adapters/ollama-adapter', () => ({ - OllamaAdapter: vi.fn().mockImplementation(function () { return {}; }), +vi.mock('../../adapters/lemonade-adapter', () => ({ + LemonadeAdapter: vi.fn().mockImplementation(function () { return {}; }), })); vi.mock('../model-router', () => ({ @@ -37,7 +37,7 @@ describe('Orchestrator Concurrency', () => { vi.clearAllMocks(); (getConfig as any).mockReturnValue({ maxConcurrentTasks: 1, - ollama: { baseUrl: 'http://localhost:11434', timeoutMs: 60000 }, + lemonade: { baseUrl: 'http://localhost:11434', timeoutMs: 60000 }, modelRouter: { modelTiers: {} }, }); orchestrator = new Orchestrator(); diff --git a/src/core/__tests__/orchestrator-supervisor.test.ts b/src/core/__tests__/orchestrator-supervisor.test.ts index 20daec4..654bafb 100644 --- a/src/core/__tests__/orchestrator-supervisor.test.ts +++ b/src/core/__tests__/orchestrator-supervisor.test.ts @@ -4,7 +4,7 @@ import { Orchestrator } from "../orchestrator.js"; // Mock dependencies vi.mock("../../shared/config.js", () => ({ getConfig: () => ({ - ollama: { baseUrl: "http://localhost:11434" }, + lemonade: { baseUrl: "http://localhost:11434" }, modelRouter: { plannerComplexity: "small" }, taskMemory: { dbPath: ":memory:" }, cron: { dbPath: ":memory:" }, diff --git a/src/services/__tests__/generator-model-manager.test.ts b/src/services/__tests__/generator-model-manager.test.ts index e1a7a4d..e94f0d1 100644 --- a/src/services/__tests__/generator-model-manager.test.ts +++ b/src/services/__tests__/generator-model-manager.test.ts @@ -1,7 +1,7 @@ /** * Integration tests: GeneratorService + ModelManagerService interaction. * Verifies that inference requests correctly trigger model loading via - * ModelManagerService.ensureModelLoaded before ollama.generate / ollama.chat. + * ModelManagerService.ensureModelLoaded before lemonade.chat. */ import { describe, it, expect, vi, beforeEach } from "vitest"; diff --git a/src/services/generator-service.ts b/src/services/generator-service.ts index fad986c..9761743 100644 --- a/src/services/generator-service.ts +++ b/src/services/generator-service.ts @@ -19,227 +19,227 @@ const NODE_EXECUTE = "node.execute"; const PROCESS_NAME = "model-router"; interface NodeExecutePayload { - taskId: string; - nodeId: string; - type: string; - service: string; - input: Record; - context?: Record; + taskId: string; + nodeId: string; + type: string; + service: string; + input: Record; + context?: Record; } export class GeneratorService extends BaseProcess { - private readonly lemonade: LemonadeAdapter; - private readonly modelRouter: ModelRouter; - private readonly modelManager: ModelManagerService | null; + private readonly lemonade: LemonadeAdapter; + private readonly modelRouter: ModelRouter; + private readonly modelManager: ModelManagerService | null; - constructor(options?: { lemonade?: LemonadeAdapter; modelRouter?: ModelRouter; modelManager?: ModelManagerService }) { - super({ processName: PROCESS_NAME }); - this.lemonade = options?.lemonade ?? new LemonadeAdapter(); - this.modelRouter = options?.modelRouter ?? new ModelRouter(); - this.modelManager = options?.modelManager ?? null; - } - - protected override handleEnvelope(envelope: Envelope): void { - if (envelope.type !== NODE_EXECUTE || envelope.to !== PROCESS_NAME) return; - - const payload = envelope.payload as Record; - const p = payload as unknown as NodeExecutePayload; - // Accept generate_text, generate, summarize; also "model-router" (planner sometimes uses service name as type) - const isGenerate = - p.type === "generate_text" || p.type === "generate" || p.type === "summarize" || p.type === "model-router"; - if (!isGenerate) { - this.sendError(envelope, "UNSUPPORTED_TYPE", `Generator only handles generate_text, generate, summarize; got ${p.type}`); - return; + constructor(options?: { lemonade?: LemonadeAdapter; modelRouter?: ModelRouter; modelManager?: ModelManagerService }) { + super({ processName: PROCESS_NAME }); + this.lemonade = options?.lemonade ?? new LemonadeAdapter(); + this.modelRouter = options?.modelRouter ?? new ModelRouter(); + this.modelManager = options?.modelManager ?? null; } - (async () => { - let model = "unknown"; - let prompt = ""; - let messages: ChatMessage[] | undefined; - try { - // Check for modelClass in input, then fallback to _complexity from context, then default to "medium" - const modelClass = (p.input?.modelClass as string) ?? - (p.context?._complexity as string) ?? - "medium"; - const tier = modelClass as ModelTier; - model = this.modelRouter.getModel(tier); + protected override handleEnvelope(envelope: Envelope): void { + if (envelope.type !== NODE_EXECUTE || envelope.to !== PROCESS_NAME) return; - // Ensure the model is loaded before inference. - if (this.modelManager) { - await this.modelManager.ensureModelLoaded(tier); + const payload = envelope.payload as Record; + const p = payload as unknown as NodeExecutePayload; + // Accept generate_text, generate, summarize; also "model-router" (planner sometimes uses service name as type) + const isGenerate = + p.type === "generate_text" || p.type === "generate" || p.type === "summarize" || p.type === "model-router"; + if (!isGenerate) { + this.sendError(envelope, "UNSUPPORTED_TYPE", `Generator only handles generate_text, generate, summarize; got ${p.type}`); + return; } - const context = (p.context ?? {}) as Record; - const goal = context["_goal"] as string | undefined; - let systemPrompt: string | undefined; - if (p.type === "summarize") { - const chatHistory = - (typeof p.input?.chatHistory === "string" && p.input.chatHistory) || - (context && typeof context.chatHistory === "string" && context.chatHistory) || - ""; - prompt = buildSummarizerPrompt(chatHistory); - systemPrompt = SUMMARIZER_SYSTEM_PROMPT; - } else if (typeof p.input?.prompt === "string") { - // When there's an explicit prompt, still include dependency outputs if available - const depOutputs = Object.entries(context) - .filter(([k]) => !k.startsWith("_")) - .map(([, v]) => { - // Extract body from http_get responses - if (v && typeof v === "object" && "body" in v && typeof v.body === "string") { - return v.body; - } - // Extract stdout from shell tool responses - if (v && typeof v === "object" && "stdout" in v && typeof v.stdout === "string") { - const shellResult = v as { stdout: string; stderr?: string }; - // Include stderr in context if present (for debugging) - if (shellResult.stderr && shellResult.stderr.trim()) { - return `${shellResult.stdout}\n\n[stderr: ${shellResult.stderr}]`; + + (async () => { + let model = "unknown"; + let prompt = ""; + let messages: ChatMessage[] | undefined; + try { + // Check for modelClass in input, then fallback to _complexity from context, then default to "medium" + const modelClass = (p.input?.modelClass as string) ?? + (p.context?._complexity as string) ?? + "medium"; + const tier = modelClass as ModelTier; + model = this.modelRouter.getModel(tier); + + // Ensure the model is loaded before inference. + if (this.modelManager) { + await this.modelManager.ensureModelLoaded(tier); } - return shellResult.stdout; - } - // For strings, return as-is - if (typeof v === "string") { - return v; - } - // For other objects, stringify - return JSON.stringify(v); - }); - if (depOutputs.length > 0) { - prompt = `${p.input.prompt}\n\nContent:\n${depOutputs.join("\n\n")}`; - } else { - prompt = p.input.prompt; - } - if (typeof p.input?.system_prompt === "string") { - systemPrompt = p.input.system_prompt === "analyzer" ? ANALYZER_SYSTEM_PROMPT : p.input.system_prompt; - // If it's an analyzer prompt, use the specialized user prompt builder - if (p.input.system_prompt === "analyzer") { - prompt = buildAnalyzerUserPrompt(goal ?? p.input.prompt as string, depOutputs.join("\n\n")); - } - } - } else if (goal && (context["_criticFeedback"] != null || context["_previousDraft"] != null)) { - const feedback = context["_criticFeedback"] as string | undefined; - const previous = context["_previousDraft"] as string | undefined; - prompt = `User goal: ${goal}\n\nPrevious draft:\n${previous ?? ""}\n\nCritic feedback:\n${feedback ?? ""}\n\nProduce an improved draft that addresses the feedback. Output only the improved text.`; - } else if (goal) { - const depOutputs = Object.entries(context) - .filter(([k]) => !k.startsWith("_")) - .map(([, v]) => { - // Extract body from http_get responses - if (v && typeof v === "object" && "body" in v && typeof v.body === "string") { - return v.body; - } - // Extract stdout from shell tool responses - if (v && typeof v === "object" && "stdout" in v && typeof v.stdout === "string") { - const shellResult = v as { stdout: string; stderr?: string }; - // Include stderr in context if present (for debugging) - if (shellResult.stderr && shellResult.stderr.trim()) { - return `${shellResult.stdout}\n\n[stderr: ${shellResult.stderr}]`; + const context = (p.context ?? {}) as Record; + const goal = context["_goal"] as string | undefined; + let systemPrompt: string | undefined; + if (p.type === "summarize") { + const chatHistory = + (typeof p.input?.chatHistory === "string" && p.input.chatHistory) || + (context && typeof context.chatHistory === "string" && context.chatHistory) || + ""; + prompt = buildSummarizerPrompt(chatHistory); + systemPrompt = SUMMARIZER_SYSTEM_PROMPT; + } else if (typeof p.input?.prompt === "string") { + // When there's an explicit prompt, still include dependency outputs if available + const depOutputs = Object.entries(context) + .filter(([k]) => !k.startsWith("_")) + .map(([, v]) => { + // Extract body from http_get responses + if (v && typeof v === "object" && "body" in v && typeof v.body === "string") { + return v.body; + } + // Extract stdout from shell tool responses + if (v && typeof v === "object" && "stdout" in v && typeof v.stdout === "string") { + const shellResult = v as { stdout: string; stderr?: string }; + // Include stderr in context if present (for debugging) + if (shellResult.stderr && shellResult.stderr.trim()) { + return `${shellResult.stdout}\n\n[stderr: ${shellResult.stderr}]`; + } + return shellResult.stdout; + } + // For strings, return as-is + if (typeof v === "string") { + return v; + } + // For other objects, stringify + return JSON.stringify(v); + }); + if (depOutputs.length > 0) { + prompt = `${p.input.prompt}\n\nContent:\n${depOutputs.join("\n\n")}`; + } else { + prompt = p.input.prompt; + } + if (typeof p.input?.system_prompt === "string") { + systemPrompt = p.input.system_prompt === "analyzer" ? ANALYZER_SYSTEM_PROMPT : p.input.system_prompt; + // If it's an analyzer prompt, use the specialized user prompt builder + if (p.input.system_prompt === "analyzer") { + prompt = buildAnalyzerUserPrompt(goal ?? p.input.prompt as string, depOutputs.join("\n\n")); + } + } + } else if (goal && (context["_criticFeedback"] != null || context["_previousDraft"] != null)) { + const feedback = context["_criticFeedback"] as string | undefined; + const previous = context["_previousDraft"] as string | undefined; + prompt = `User goal: ${goal}\n\nPrevious draft:\n${previous ?? ""}\n\nCritic feedback:\n${feedback ?? ""}\n\nProduce an improved draft that addresses the feedback. Output only the improved text.`; + } else if (goal) { + const depOutputs = Object.entries(context) + .filter(([k]) => !k.startsWith("_")) + .map(([, v]) => { + // Extract body from http_get responses + if (v && typeof v === "object" && "body" in v && typeof v.body === "string") { + return v.body; + } + // Extract stdout from shell tool responses + if (v && typeof v === "object" && "stdout" in v && typeof v.stdout === "string") { + const shellResult = v as { stdout: string; stderr?: string }; + // Include stderr in context if present (for debugging) + if (shellResult.stderr && shellResult.stderr.trim()) { + return `${shellResult.stdout}\n\n[stderr: ${shellResult.stderr}]`; + } + return shellResult.stdout; + } + // For strings, return as-is + if (typeof v === "string") { + return v; + } + // For other objects, stringify + return JSON.stringify(v); + }); + prompt = `User goal: ${goal}\n\nContext from previous steps:\n${depOutputs.join("\n\n")}\n\nProduce a direct response to the goal. Output only the response text.`; + } else { + const depOutputs = Object.values(context).map((v) => { + // Extract body from http_get responses + if (v && typeof v === "object" && "body" in v && typeof v.body === "string") { + return v.body; + } + // Extract stdout from shell tool responses + if (v && typeof v === "object" && "stdout" in v && typeof v.stdout === "string") { + const shellResult = v as { stdout: string; stderr?: string }; + // Include stderr in context if present (for debugging) + if (shellResult.stderr && shellResult.stderr.trim()) { + return `${shellResult.stdout}\n\n[stderr: ${shellResult.stderr}]`; + } + return shellResult.stdout; + } + // For strings, return as-is + if (typeof v === "string") { + return v; + } + // For other objects, stringify + return JSON.stringify(v); + }); + prompt = depOutputs.join("\n\n") || "Generate a brief response."; + } + if (p.input?.messages && Array.isArray(p.input.messages)) { + messages = p.input.messages as ChatMessage[]; } - return shellResult.stdout; - } - // For strings, return as-is - if (typeof v === "string") { - return v; - } - // For other objects, stringify - return JSON.stringify(v); - }); - prompt = `User goal: ${goal}\n\nContext from previous steps:\n${depOutputs.join("\n\n")}\n\nProduce a direct response to the goal. Output only the response text.`; - } else { - const depOutputs = Object.values(context).map((v) => { - // Extract body from http_get responses - if (v && typeof v === "object" && "body" in v && typeof v.body === "string") { - return v.body; - } - // Extract stdout from shell tool responses - if (v && typeof v === "object" && "stdout" in v && typeof v.stdout === "string") { - const shellResult = v as { stdout: string; stderr?: string }; - // Include stderr in context if present (for debugging) - if (shellResult.stderr && shellResult.stderr.trim()) { - return `${shellResult.stdout}\n\n[stderr: ${shellResult.stderr}]`; - } - return shellResult.stdout; - } - // For strings, return as-is - if (typeof v === "string") { - return v; - } - // For other objects, stringify - return JSON.stringify(v); - }); - prompt = depOutputs.join("\n\n") || "Generate a brief response."; - } - if (p.input?.messages && Array.isArray(p.input.messages)) { - messages = p.input.messages as ChatMessage[]; - } - if (!messages) { - messages = systemPrompt - ? [{ role: "system" as const, content: systemPrompt }, { role: "user" as const, content: prompt }] - : [{ role: "user" as const, content: prompt }]; - } + if (!messages) { + messages = systemPrompt + ? [{ role: "system" as const, content: systemPrompt }, { role: "user" as const, content: prompt }] + : [{ role: "user" as const, content: prompt }]; + } - const genResult = await this.lemonade.chat(messages, model, { - tools: p.input?.tools as any[], + const genResult = await this.lemonade.chat(messages, model, { + tools: p.input?.tools as any[], + }); + + const text = genResult.message.content; + const tool_calls = genResult.message.tool_calls; + this.sendResponse(envelope, { + text, + tool_calls, + usage: genResult.usage + }); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + const isTimeout = err instanceof Error && (err.name === "AbortError" || message.includes("aborted") || message.includes("timeout")); + const errorCode = isTimeout ? "GENERATOR_TIMEOUT" : "GENERATOR_ERROR"; + + const details: Record = { + originalError: message, + isTimeout, + model, + promptLength: prompt.length, + messageCount: messages?.length ?? 0 + }; + if (err instanceof Error && err.stack) { + details.stack = err.stack; + } + this.sendError(envelope, errorCode, message, details); + } + })(); + } + + private sendResponse(request: Envelope, result: unknown): void { + const payload = responsePayloadSchema.parse({ status: "success", result }); + this.send({ + id: randomUUID(), + correlationId: request.id, + from: this.processName, + to: request.from, + type: "response", + version: PROTOCOL_VERSION, + timestamp: Date.now(), + payload, }); + } - const text = genResult.message.content; - const tool_calls = genResult.message.tool_calls; - this.sendResponse(envelope, { - text, - tool_calls, - usage: genResult.usage + private sendError(request: Envelope, code: string, message: string, details: Record = {}): void { + this.send({ + id: randomUUID(), + correlationId: request.id, + from: this.processName, + to: request.from, + type: "error", + version: PROTOCOL_VERSION, + timestamp: Date.now(), + payload: { code, message, details }, }); - } catch (err) { - const message = err instanceof Error ? err.message : String(err); - const isTimeout = err instanceof Error && (err.name === "AbortError" || message.includes("aborted") || message.includes("timeout")); - const errorCode = isTimeout ? "GENERATOR_TIMEOUT" : "GENERATOR_ERROR"; - - const details: Record = { - originalError: message, - isTimeout, - model, - promptLength: prompt.length, - messageCount: messages?.length ?? 0 - }; - if (err instanceof Error && err.stack) { - details.stack = err.stack; - } - this.sendError(envelope, errorCode, message, details); - } - })(); - } - - private sendResponse(request: Envelope, result: unknown): void { - const payload = responsePayloadSchema.parse({ status: "success", result }); - this.send({ - id: randomUUID(), - correlationId: request.id, - from: this.processName, - to: request.from, - type: "response", - version: PROTOCOL_VERSION, - timestamp: Date.now(), - payload, - }); - } - - private sendError(request: Envelope, code: string, message: string, details: Record = {}): void { - this.send({ - id: randomUUID(), - correlationId: request.id, - from: this.processName, - to: request.from, - type: "error", - version: PROTOCOL_VERSION, - timestamp: Date.now(), - payload: { code, message, details }, - }); - } + } } function main(): void { - const service = new GeneratorService(); - service.start(); + const service = new GeneratorService(); + service.start(); } main(); diff --git a/src/services/model-router.ts b/src/services/model-router.ts index 8a83e8c..ffd69a0 100644 --- a/src/services/model-router.ts +++ b/src/services/model-router.ts @@ -8,27 +8,27 @@ import { getConfig } from "../shared/config.js"; export type ComplexityLevel = "small" | "medium" | "large"; export interface ModelRouterConfig { - small: string; - medium: string; - large: string; + small: string; + medium: string; + large: string; } export class ModelRouter { - private readonly config: ModelRouterConfig; + private readonly config: ModelRouterConfig; - constructor(config?: Partial) { - const defaults = getConfig().modelRouter; - this.config = { ...defaults, ...config }; - } + constructor(config?: Partial) { + const defaults = getConfig().modelRouter; + this.config = { ...defaults, ...config }; + } - /** - * Return the Ollama model name for the given complexity level. - */ - getModel(complexity: ComplexityLevel): string { - return this.config[complexity]; - } + /** + * Return the Ollama model name for the given complexity level. + */ + getModel(complexity: ComplexityLevel): string { + return this.config[complexity]; + } - getConfig(): ModelRouterConfig { - return { ...this.config }; - } + getConfig(): ModelRouterConfig { + return { ...this.config }; + } } diff --git a/src/services/rag-service.ts b/src/services/rag-service.ts index 6513719..e8791cc 100644 --- a/src/services/rag-service.ts +++ b/src/services/rag-service.ts @@ -29,276 +29,276 @@ CREATE TABLE IF NOT EXISTS rag_documents ( `; function embeddingToBuffer(embedding: number[]): Buffer { - const f64 = new Float64Array(embedding); - return Buffer.from(f64.buffer, f64.byteOffset, f64.byteLength); + const f64 = new Float64Array(embedding); + return Buffer.from(f64.buffer, f64.byteOffset, f64.byteLength); } function bufferToEmbedding(buf: Buffer): number[] { - const f64 = new Float64Array(buf.buffer, buf.byteOffset, buf.byteLength / 8); - return Array.from(f64); + const f64 = new Float64Array(buf.buffer, buf.byteOffset, buf.byteLength / 8); + return Array.from(f64); } function dotProduct(a: number[], b: number[]): number { - let sum = 0; - const len = Math.min(a.length, b.length); - for (let i = 0; i < len; i++) sum += a[i]! * b[i]!; - return sum; + let sum = 0; + const len = Math.min(a.length, b.length); + for (let i = 0; i < len; i++) sum += a[i]! * b[i]!; + return sum; } /** L2 distance to score: higher = more similar. */ function distanceToScore(distance: number): number { - return 1 / (1 + Math.max(0, distance)); + return 1 / (1 + Math.max(0, distance)); } /** SQLite-backed store for RAG documents. Uses sqlite-vss for KNN when available, else dot-product full scan. */ export class RAGStore { - private db: Database.Database; - private useVss: boolean; - private readonly embeddingDimensions: number; - private readonly vssTableName = "vss_rag_embedding"; + private db: Database.Database; + private useVss: boolean; + private readonly embeddingDimensions: number; + private readonly vssTableName = "vss_rag_embedding"; - constructor(dbPath: string, embeddingDimensions = 768) { - mkdirSync(dirname(dbPath), { recursive: true }); - this.db = new Database(dbPath); - this.embeddingDimensions = embeddingDimensions; - this.db.exec(RAG_SCHEMA); - let vssLoaded = false; - try { - sqliteVss.load(this.db); - this.db.exec( - `CREATE VIRTUAL TABLE IF NOT EXISTS ${this.vssTableName} USING vss0( embedding(${this.embeddingDimensions}) )`, - ); - vssLoaded = true; - } catch { - // Extension unavailable (e.g. unsupported platform); use fallback + constructor(dbPath: string, embeddingDimensions = 768) { + mkdirSync(dirname(dbPath), { recursive: true }); + this.db = new Database(dbPath); + this.embeddingDimensions = embeddingDimensions; + this.db.exec(RAG_SCHEMA); + let vssLoaded = false; + try { + sqliteVss.load(this.db); + this.db.exec( + `CREATE VIRTUAL TABLE IF NOT EXISTS ${this.vssTableName} USING vss0( embedding(${this.embeddingDimensions}) )`, + ); + vssLoaded = true; + } catch { + // Extension unavailable (e.g. unsupported platform); use fallback + } + this.useVss = vssLoaded; } - this.useVss = vssLoaded; - } - close(): void { - this.db.close(); - } + close(): void { + this.db.close(); + } - /** Whether sqlite-vss is active (KNN search). */ - isVssEnabled(): boolean { - return this.useVss; - } + /** Whether sqlite-vss is active (KNN search). */ + isVssEnabled(): boolean { + return this.useVss; + } - insert(id: string, content: string, metadata: Record, embedding: number[]): void { - const metaJson = JSON.stringify(metadata); - const blob = embeddingToBuffer(embedding); - const run = (): void => { - this.db - .prepare( - `INSERT INTO rag_documents (id, content, metadata, embedding) VALUES (?, ?, ?, ?)`, - ) - .run(id, content, metaJson, blob); - if (this.useVss) { - const rowid = this.db.prepare("SELECT last_insert_rowid()").pluck().get() as number; - const embeddingJson = JSON.stringify(embedding); - this.db.prepare(`INSERT INTO ${this.vssTableName}(rowid, embedding) VALUES (?, ?)`).run(rowid, embeddingJson); - } - }; - this.db.transaction(run)(); - } + insert(id: string, content: string, metadata: Record, embedding: number[]): void { + const metaJson = JSON.stringify(metadata); + const blob = embeddingToBuffer(embedding); + const run = (): void => { + this.db + .prepare( + `INSERT INTO rag_documents (id, content, metadata, embedding) VALUES (?, ?, ?, ?)`, + ) + .run(id, content, metaJson, blob); + if (this.useVss) { + const rowid = this.db.prepare("SELECT last_insert_rowid()").pluck().get() as number; + const embeddingJson = JSON.stringify(embedding); + this.db.prepare(`INSERT INTO ${this.vssTableName}(rowid, embedding) VALUES (?, ?)`).run(rowid, embeddingJson); + } + }; + this.db.transaction(run)(); + } - search(queryEmbedding: number[], limit: number, filters?: { conversationId?: string | undefined }): Array<{ content: string; metadata: Record; score: number }> { - const k = Math.max(1, Math.floor(limit)); - if (this.useVss) { - const hasRows = (this.db.prepare(`SELECT 1 FROM ${this.vssTableName} LIMIT 1`).get() as unknown) != null; - if (!hasRows) return []; - const queryJson = JSON.stringify(queryEmbedding); - // Use a CTE or subquery to filter if conversationId is provided - let vssRows: Array<{ rowid: number; distance: number }>; - if (filters?.conversationId) { - vssRows = this.db - .prepare( - `SELECT t.rowid, t.distance + search(queryEmbedding: number[], limit: number, filters?: { conversationId?: string | undefined }): Array<{ content: string; metadata: Record; score: number }> { + const k = Math.max(1, Math.floor(limit)); + if (this.useVss) { + const hasRows = (this.db.prepare(`SELECT 1 FROM ${this.vssTableName} LIMIT 1`).get() as unknown) != null; + if (!hasRows) return []; + const queryJson = JSON.stringify(queryEmbedding); + // Use a CTE or subquery to filter if conversationId is provided + let vssRows: Array<{ rowid: number; distance: number }>; + if (filters?.conversationId) { + vssRows = this.db + .prepare( + `SELECT t.rowid, t.distance FROM ${this.vssTableName} t JOIN rag_documents d ON d.rowid = t.rowid WHERE vss_search(t.embedding, ?) AND json_extract(d.metadata, '$.conversationId') = ? LIMIT ?`, - ) - .all(queryJson, filters.conversationId, k) as Array<{ rowid: number; distance: number }>; - } else { - vssRows = this.db - .prepare( - `SELECT rowid, distance FROM ${this.vssTableName} WHERE vss_search(embedding, ?) LIMIT ?`, - ) - .all(queryJson, k) as Array<{ rowid: number; distance: number }>; - } - if (vssRows.length === 0) return []; - const rowids = vssRows.map((r) => r.rowid); - const order = new Map(rowids.map((r, i) => [r, i])); - const placeholders = rowids.map(() => "?").join(","); - const docs = this.db - .prepare( - `SELECT rowid, content, metadata FROM rag_documents WHERE rowid IN (${placeholders})`, - ) - .all(...rowids) as Array<{ rowid: number; content: string; metadata: string }>; - docs.sort((a, b) => (order.get(a.rowid) ?? 0) - (order.get(b.rowid) ?? 0)); - const distanceByRowid = new Map(vssRows.map((r) => [r.rowid, r.distance])); - return docs.map((d) => ({ - content: d.content, - metadata: (JSON.parse(d.metadata || "{}") ?? {}) as Record, - score: distanceToScore(distanceByRowid.get(d.rowid) ?? 0), - })); + ) + .all(queryJson, filters.conversationId, k) as Array<{ rowid: number; distance: number }>; + } else { + vssRows = this.db + .prepare( + `SELECT rowid, distance FROM ${this.vssTableName} WHERE vss_search(embedding, ?) LIMIT ?`, + ) + .all(queryJson, k) as Array<{ rowid: number; distance: number }>; + } + if (vssRows.length === 0) return []; + const rowids = vssRows.map((r) => r.rowid); + const order = new Map(rowids.map((r, i) => [r, i])); + const placeholders = rowids.map(() => "?").join(","); + const docs = this.db + .prepare( + `SELECT rowid, content, metadata FROM rag_documents WHERE rowid IN (${placeholders})`, + ) + .all(...rowids) as Array<{ rowid: number; content: string; metadata: string }>; + docs.sort((a, b) => (order.get(a.rowid) ?? 0) - (order.get(b.rowid) ?? 0)); + const distanceByRowid = new Map(vssRows.map((r) => [r.rowid, r.distance])); + return docs.map((d) => ({ + content: d.content, + metadata: (JSON.parse(d.metadata || "{}") ?? {}) as Record, + score: distanceToScore(distanceByRowid.get(d.rowid) ?? 0), + })); + } + const rows = this.db.prepare(`SELECT id, content, metadata, embedding FROM rag_documents`).all() as Array<{ + id: string; + content: string; + metadata: string; + embedding: Buffer; + }>; + if (rows.length === 0) return []; + let scored = rows.map((row) => { + const embedding = bufferToEmbedding(row.embedding); + const score = dotProduct(queryEmbedding, embedding); + const metadata = (JSON.parse(row.metadata || "{}") ?? {}) as Record; + return { content: row.content, metadata, score }; + }); + if (filters?.conversationId) { + scored = scored.filter((s) => s.metadata.conversationId === filters.conversationId); + } + scored.sort((a, b) => b.score - a.score); + return scored.slice(0, k); } - const rows = this.db.prepare(`SELECT id, content, metadata, embedding FROM rag_documents`).all() as Array<{ - id: string; - content: string; - metadata: string; - embedding: Buffer; - }>; - if (rows.length === 0) return []; - let scored = rows.map((row) => { - const embedding = bufferToEmbedding(row.embedding); - const score = dotProduct(queryEmbedding, embedding); - const metadata = (JSON.parse(row.metadata || "{}") ?? {}) as Record; - return { content: row.content, metadata, score }; - }); - if (filters?.conversationId) { - scored = scored.filter((s) => s.metadata.conversationId === filters.conversationId); - } - scored.sort((a, b) => b.score - a.score); - return scored.slice(0, k); - } } interface MemorySearchPayload { - query: string; - limit?: number; + query: string; + limit?: number; } interface MemoryInsertPayload { - content: string; - metadata?: Record; + content: string; + metadata?: Record; } export class RAGService extends BaseProcess { - private readonly lemonade: LemonadeAdapter; - private readonly embedModel: string; - private readonly store: RAGStore; + private readonly lemonade: LemonadeAdapter; + private readonly embedModel: string; + private readonly store: RAGStore; - constructor(options?: { lemonade?: LemonadeAdapter; embedModel?: string; dbPath?: string; embeddingDimensions?: number }) { - super({ processName: PROCESS_NAME }); - this.lemonade = options?.lemonade ?? new LemonadeAdapter(); - this.embedModel = options?.embedModel ?? getConfig().rag.embedModel; - const dbPath = options?.dbPath ?? getConfig().rag.dbPath; - const embeddingDimensions = options?.embeddingDimensions ?? getConfig().rag.embeddingDimensions; - this.store = new RAGStore(dbPath, embeddingDimensions); - } - - /** Embed and store a document */ - async addDocument(content: string, metadata: Record = {}): Promise { - ConsoleLogger.debug(PROCESS_NAME, `Embedding document: ${content.substring(0, 50)}...`); - const { embedding } = await this.lemonade.embed(content, this.embedModel, { timeoutMs: 60000 }); - const id = randomUUID(); - this.store.insert(id, content, metadata, embedding); - ConsoleLogger.info(PROCESS_NAME, `Stored document ${id.substring(0, 8)}`); - return id; - } - - /** Return relevant snippets by semantic similarity (cosine via dot product for L2-normalized vectors) */ - async search(query: string, limit = 5, filters?: { conversationId?: string | undefined }): Promise; score: number }>> { - ConsoleLogger.debug(PROCESS_NAME, `Searching RAG: "${query}"${filters?.conversationId ? ` (filter: ${filters.conversationId})` : ""}`); - const { embedding: queryEmbed } = await this.lemonade.embed(query, this.embedModel, { timeoutMs: 30000 }); - const results = this.store.search(queryEmbed, limit, filters); - ConsoleLogger.info(PROCESS_NAME, `Found ${results.length} results for query`); - return results; - } - - protected override handleEnvelope(envelope: Envelope): void { - if (envelope.to !== PROCESS_NAME) return; - const type = envelope.type; - const payload = envelope.payload as Record; - - if (type === "node.execute") { - const p = payload as { type?: string; input?: Record }; - if (p.type !== "semantic_search") return; - const query = (p.input?.query ?? "") as string; - const limit = (typeof p.input?.limit === "number" ? p.input.limit : 5) as number; - const conversationId = p.input?.conversationId as string | undefined; - - this.search(query, limit, { conversationId }).then((results) => { - this.sendResponse(envelope, { results, snippets: results.map((r) => r.content) }); - }).catch((err) => { - this.sendError(envelope, "RAG_SEARCH_ERROR", err instanceof Error ? err.message : String(err)); - }); - return; + constructor(options?: { lemonade?: LemonadeAdapter; embedModel?: string; dbPath?: string; embeddingDimensions?: number }) { + super({ processName: PROCESS_NAME }); + this.lemonade = options?.lemonade ?? new LemonadeAdapter(); + this.embedModel = options?.embedModel ?? getConfig().rag.embedModel; + const dbPath = options?.dbPath ?? getConfig().rag.dbPath; + const embeddingDimensions = options?.embeddingDimensions ?? getConfig().rag.embeddingDimensions; + this.store = new RAGStore(dbPath, embeddingDimensions); } - if (type === "memory.semantic.insert") { - const p = payload as unknown as MemoryInsertPayload; - const content = p.content ?? ""; - const metadata = (p.metadata ?? {}) as Record; - if (typeof content !== "string") { - this.sendError(envelope, "INVALID_PAYLOAD", "memory.semantic.insert requires content (string)"); - return; - } - this.addDocument(content, metadata).then((id) => { - this.sendResponse(envelope, { id }); - }).catch((err) => { - this.sendError(envelope, "RAG_INSERT_ERROR", err instanceof Error ? err.message : String(err)); - }); - return; + /** Embed and store a document */ + async addDocument(content: string, metadata: Record = {}): Promise { + ConsoleLogger.debug(PROCESS_NAME, `Embedding document: ${content.substring(0, 50)}...`); + const { embedding } = await this.lemonade.embed(content, this.embedModel, { timeoutMs: 60000 }); + const id = randomUUID(); + this.store.insert(id, content, metadata, embedding); + ConsoleLogger.info(PROCESS_NAME, `Stored document ${id.substring(0, 8)}`); + return id; } - if (type === "memory.semantic.search") { - const p = payload as unknown as MemorySearchPayload & { conversationId?: string }; - const query = p.query ?? ""; - const limit = typeof p.limit === "number" ? p.limit : 5; - const conversationId = p.conversationId; - - if (typeof query !== "string") { - this.sendError(envelope, "INVALID_PAYLOAD", "memory.semantic.search requires query (string)"); - return; - } - this.search(query, limit, { conversationId }).then((results) => { - this.sendResponse(envelope, { results }); - }).catch((err) => { - this.sendError(envelope, "RAG_SEARCH_ERROR", err instanceof Error ? err.message : String(err)); - }); - return; + /** Return relevant snippets by semantic similarity (cosine via dot product for L2-normalized vectors) */ + async search(query: string, limit = 5, filters?: { conversationId?: string | undefined }): Promise; score: number }>> { + ConsoleLogger.debug(PROCESS_NAME, `Searching RAG: "${query}"${filters?.conversationId ? ` (filter: ${filters.conversationId})` : ""}`); + const { embedding: queryEmbed } = await this.lemonade.embed(query, this.embedModel, { timeoutMs: 30000 }); + const results = this.store.search(queryEmbed, limit, filters); + ConsoleLogger.info(PROCESS_NAME, `Found ${results.length} results for query`); + return results; } - // Catch-all: respond with error for any unrecognized message type - // This prevents the executor from hanging forever waiting for a response - this.sendError(envelope, "UNSUPPORTED_TYPE", `rag-service does not handle type="${type}"`); - } + protected override handleEnvelope(envelope: Envelope): void { + if (envelope.to !== PROCESS_NAME) return; + const type = envelope.type; + const payload = envelope.payload as Record; - private sendResponse(request: Envelope, result: unknown): void { - const payload = responsePayloadSchema.parse({ status: "success", result }); - this.send({ - id: randomUUID(), - correlationId: request.id, - from: this.processName, - to: request.from, - type: "response", - version: PROTOCOL_VERSION, - timestamp: Date.now(), - payload, - }); - } + if (type === "node.execute") { + const p = payload as { type?: string; input?: Record }; + if (p.type !== "semantic_search") return; + const query = (p.input?.query ?? "") as string; + const limit = (typeof p.input?.limit === "number" ? p.input.limit : 5) as number; + const conversationId = p.input?.conversationId as string | undefined; - private sendError(request: Envelope, code: string, message: string): void { - this.send({ - id: randomUUID(), - correlationId: request.id, - from: this.processName, - to: request.from, - type: "error", - version: PROTOCOL_VERSION, - timestamp: Date.now(), - payload: { code, message, details: {} }, - }); - } + this.search(query, limit, { conversationId }).then((results) => { + this.sendResponse(envelope, { results, snippets: results.map((r) => r.content) }); + }).catch((err) => { + this.sendError(envelope, "RAG_SEARCH_ERROR", err instanceof Error ? err.message : String(err)); + }); + return; + } + + if (type === "memory.semantic.insert") { + const p = payload as unknown as MemoryInsertPayload; + const content = p.content ?? ""; + const metadata = (p.metadata ?? {}) as Record; + if (typeof content !== "string") { + this.sendError(envelope, "INVALID_PAYLOAD", "memory.semantic.insert requires content (string)"); + return; + } + this.addDocument(content, metadata).then((id) => { + this.sendResponse(envelope, { id }); + }).catch((err) => { + this.sendError(envelope, "RAG_INSERT_ERROR", err instanceof Error ? err.message : String(err)); + }); + return; + } + + if (type === "memory.semantic.search") { + const p = payload as unknown as MemorySearchPayload & { conversationId?: string }; + const query = p.query ?? ""; + const limit = typeof p.limit === "number" ? p.limit : 5; + const conversationId = p.conversationId; + + if (typeof query !== "string") { + this.sendError(envelope, "INVALID_PAYLOAD", "memory.semantic.search requires query (string)"); + return; + } + this.search(query, limit, { conversationId }).then((results) => { + this.sendResponse(envelope, { results }); + }).catch((err) => { + this.sendError(envelope, "RAG_SEARCH_ERROR", err instanceof Error ? err.message : String(err)); + }); + return; + } + + // Catch-all: respond with error for any unrecognized message type + // This prevents the executor from hanging forever waiting for a response + this.sendError(envelope, "UNSUPPORTED_TYPE", `rag-service does not handle type="${type}"`); + } + + private sendResponse(request: Envelope, result: unknown): void { + const payload = responsePayloadSchema.parse({ status: "success", result }); + this.send({ + id: randomUUID(), + correlationId: request.id, + from: this.processName, + to: request.from, + type: "response", + version: PROTOCOL_VERSION, + timestamp: Date.now(), + payload, + }); + } + + private sendError(request: Envelope, code: string, message: string): void { + this.send({ + id: randomUUID(), + correlationId: request.id, + from: this.processName, + to: request.from, + type: "error", + version: PROTOCOL_VERSION, + timestamp: Date.now(), + payload: { code, message, details: {} }, + }); + } } function main(): void { - const service = new RAGService(); - service.start(); + const service = new RAGService(); + service.start(); } main(); diff --git a/src/shared/config.ts b/src/shared/config.ts index 460d7a5..2b6e6c0 100644 --- a/src/shared/config.ts +++ b/src/shared/config.ts @@ -7,320 +7,320 @@ import { readFileSync, existsSync } from "node:fs"; import { join } from "node:path"; export interface LemonadeConfig { - baseUrl: string; - timeoutMs: number; - retries: number; - numCtx: number; + baseUrl: string; + timeoutMs: number; + retries: number; + numCtx: number; } export interface TelegramConfig { - botToken: string; - /** Comma-separated Telegram user IDs allowed to use the bot. Empty or omit = allow all. */ - allowedUserIds: string; + botToken: string; + /** Comma-separated Telegram user IDs allowed to use the bot. Empty or omit = allow all. */ + allowedUserIds: string; } export interface TaskMemoryConfig { - dbPath: string; + dbPath: string; } export interface LoggerConfig { - logDir: string; - logFile: string; + logDir: string; + logFile: string; } export interface RagConfig { - embedModel: string; - /** SQLite database path for RAG document storage. */ - dbPath: string; - /** Embedding vector dimension (must match embed model, e.g. 768 for nomic-embed-text). Used for sqlite-vss. */ - embeddingDimensions: number; + embedModel: string; + /** SQLite database path for RAG document storage. */ + dbPath: string; + /** Embedding vector dimension (must match embed model, e.g. 768 for nomic-embed-text). Used for sqlite-vss. */ + embeddingDimensions: number; } export interface ToolHostConfig { - /** Directory allowed for shell tool file operations. Paths outside are rejected. */ - sandboxDir: string; - /** Optional colon-separated list of additional directories to add to PATH for shell commands. */ - additionalPath?: string | undefined; + /** Directory allowed for shell tool file operations. Paths outside are rejected. */ + sandboxDir: string; + /** Optional colon-separated list of additional directories to add to PATH for shell commands. */ + additionalPath?: string | undefined; } export interface CronConfig { - dbPath: string; + dbPath: string; } export interface ModelRouterConfig { - small: string; - medium: string; - large: string; - /** Complexity level to use for initial planning phase */ - plannerComplexity: string; + small: string; + medium: string; + large: string; + /** Complexity level to use for initial planning phase */ + plannerComplexity: string; } export interface ExecutorConfig { - /** Timeout for individual node execution in milliseconds. */ - nodeTimeoutMs: number; + /** Timeout for individual node execution in milliseconds. */ + nodeTimeoutMs: number; } export interface BrowserServiceConfig { - /** Run browser in headless mode (default: true for production). */ - headless: boolean; - /** Timeout for browser operations in milliseconds (default: 30000). */ - timeout: number; - /** Enable stealth plugin to bypass bot detection (default: true). */ - enableStealth: boolean; - /** Reuse browser context across requests (default: true). */ - reuseContext: boolean; - /** Directory to store browser user data (persistent cookies, etc). */ - userDataDir?: string | undefined; + /** Run browser in headless mode (default: true for production). */ + headless: boolean; + /** Timeout for browser operations in milliseconds (default: 30000). */ + timeout: number; + /** Enable stealth plugin to bypass bot detection (default: true). */ + enableStealth: boolean; + /** Reuse browser context across requests (default: true). */ + reuseContext: boolean; + /** Directory to store browser user data (persistent cookies, etc). */ + userDataDir?: string | undefined; } export interface ModelManagerConfig { - /** - * How long to keep a small model in memory after the last request. - * Accepts an Ollama/Lemonade duration string (e.g. "5m") or a number of seconds. - */ - smallModelKeepAlive: string | number; - /** - * How long to keep a medium model in memory after the last request. - */ - mediumModelKeepAlive: string | number; - /** - * How long to keep a large model in memory after the last request. - */ - largeModelKeepAlive: string | number; - /** - * Minimal prompt sent during warmup to ensure the model is loaded. - */ - warmupPrompt: string; + /** + * How long to keep a small model in memory after the last request. + * Accepts an Ollama/Lemonade duration string (e.g. "5m") or a number of seconds. + */ + smallModelKeepAlive: string | number; + /** + * How long to keep a medium model in memory after the last request. + */ + mediumModelKeepAlive: string | number; + /** + * How long to keep a large model in memory after the last request. + */ + largeModelKeepAlive: string | number; + /** + * Minimal prompt sent during warmup to ensure the model is loaded. + */ + warmupPrompt: string; } export interface SkillsConfig { - /** Directory containing skills/CONFIG.md and subfolders. */ - skillsDir: string; + /** Directory containing skills/CONFIG.md and subfolders. */ + skillsDir: string; } export interface WhisperConfig { - /** Whisper model to use for transcription (e.g. "Whisper-Tiny", "Whisper-Base"). */ - modelName: string; - /** Language code for transcription ("auto" for auto-detect). */ - language: string; - /** Directory where Whisper model files are stored (unused when using Lemonade API). */ - modelDir: string; + /** Whisper model to use for transcription (e.g. "Whisper-Tiny", "Whisper-Base"). */ + modelName: string; + /** Language code for transcription ("auto" for auto-detect). */ + language: string; + /** Directory where Whisper model files are stored (unused when using Lemonade API). */ + modelDir: string; } export interface FileProcessorConfig { - /** Directory where uploaded files are temporarily stored during processing. */ - uploadDir: string; - /** Maximum allowed file size in bytes (default: 52428800 = 50 MB). */ - maxFileSizeBytes: number; - /** Files with text content shorter than this are inlined into the planner goal directly. */ - textMaxInlineChars: number; - /** Lemonade model used for image OCR and description. */ - ocrModel: string; - /** Whether image OCR/description is enabled. */ - ocrEnabled: boolean; + /** Directory where uploaded files are temporarily stored during processing. */ + uploadDir: string; + /** Maximum allowed file size in bytes (default: 52428800 = 50 MB). */ + maxFileSizeBytes: number; + /** Files with text content shorter than this are inlined into the planner goal directly. */ + textMaxInlineChars: number; + /** Lemonade model used for image OCR and description. */ + ocrModel: string; + /** Whether image OCR/description is enabled. */ + ocrEnabled: boolean; } export interface AppConfig { - lemonade: LemonadeConfig; - telegram: TelegramConfig; - taskMemory: TaskMemoryConfig; - logger: LoggerConfig; - rag: RagConfig; - toolHost: ToolHostConfig; - cron: CronConfig; - modelRouter: ModelRouterConfig; - executor: ExecutorConfig; - browserService: BrowserServiceConfig; - modelManager: ModelManagerConfig; - skills: SkillsConfig; - whisper: WhisperConfig; - fileProcessor: FileProcessorConfig; - maxConcurrentTasks: number; + lemonade: LemonadeConfig; + telegram: TelegramConfig; + taskMemory: TaskMemoryConfig; + logger: LoggerConfig; + rag: RagConfig; + toolHost: ToolHostConfig; + cron: CronConfig; + modelRouter: ModelRouterConfig; + executor: ExecutorConfig; + browserService: BrowserServiceConfig; + modelManager: ModelManagerConfig; + skills: SkillsConfig; + whisper: WhisperConfig; + fileProcessor: FileProcessorConfig; + maxConcurrentTasks: number; } const DEFAULT_CONFIG: AppConfig = { - lemonade: { - baseUrl: "http://127.0.0.1:8000/api/v1", - timeoutMs: 600_000, // 10 minutes default - retries: 3, - numCtx: 16384, - }, - telegram: { - botToken: "", - allowedUserIds: "", - }, - taskMemory: { - dbPath: "data/tasks.sqlite", - }, - logger: { - logDir: "logs", - logFile: "events.log", - }, - rag: { - embedModel: "text-embedding-v3", // Common OpenAI/Lemonade embed model name - dbPath: "data/rag.sqlite", - embeddingDimensions: 768, - }, - toolHost: { - sandboxDir: process.cwd(), - additionalPath: "", - }, - cron: { - dbPath: "data/cron.sqlite", - }, - modelRouter: { - small: "qwen2.5:0.5b", - medium: "qwen2.5:1.5b", - large: "qwen2.5:7b", - plannerComplexity: "small", - }, - executor: { - nodeTimeoutMs: 600_000, // 10 minutes default - }, - browserService: { - headless: true, - timeout: 30_000, // 30 seconds default - enableStealth: true, - reuseContext: true, - }, - modelManager: { - smallModelKeepAlive: "10m", - mediumModelKeepAlive: "30m", - largeModelKeepAlive: "60m", - warmupPrompt: "hello", - }, - skills: { - skillsDir: "skills", - }, - whisper: { - modelName: "Whisper-Tiny", - language: "auto", - modelDir: "data/whisper-models", - }, - fileProcessor: { - uploadDir: "data/uploads", - maxFileSizeBytes: 52_428_800, // 50 MB - textMaxInlineChars: 8_000, - ocrModel: "qwen3-vl", - ocrEnabled: true, - }, - maxConcurrentTasks: 1, + lemonade: { + baseUrl: "http://127.0.0.1:8000/api/v1", + timeoutMs: 600_000, // 10 minutes default + retries: 3, + numCtx: 16384, + }, + telegram: { + botToken: "", + allowedUserIds: "", + }, + taskMemory: { + dbPath: "data/tasks.sqlite", + }, + logger: { + logDir: "logs", + logFile: "events.log", + }, + rag: { + embedModel: "text-embedding-v3", // Common OpenAI/Lemonade embed model name + dbPath: "data/rag.sqlite", + embeddingDimensions: 768, + }, + toolHost: { + sandboxDir: process.cwd(), + additionalPath: "", + }, + cron: { + dbPath: "data/cron.sqlite", + }, + modelRouter: { + small: "qwen2.5:0.5b", + medium: "qwen2.5:1.5b", + large: "qwen2.5:7b", + plannerComplexity: "small", + }, + executor: { + nodeTimeoutMs: 600_000, // 10 minutes default + }, + browserService: { + headless: true, + timeout: 30_000, // 30 seconds default + enableStealth: true, + reuseContext: true, + }, + modelManager: { + smallModelKeepAlive: "10m", + mediumModelKeepAlive: "30m", + largeModelKeepAlive: "60m", + warmupPrompt: "hello", + }, + skills: { + skillsDir: "skills", + }, + whisper: { + modelName: "Whisper-Tiny", + language: "auto", + modelDir: "data/whisper-models", + }, + fileProcessor: { + uploadDir: "data/uploads", + maxFileSizeBytes: 52_428_800, // 50 MB + textMaxInlineChars: 8_000, + ocrModel: "qwen3-vl", + ocrEnabled: true, + }, + maxConcurrentTasks: 1, }; function loadConfigFile(): Partial { - const configPath = - process.env.CONFIG_PATH ?? - join(process.cwd(), "config.json"); - if (!existsSync(configPath)) return {}; - try { - const raw = readFileSync(configPath, "utf-8"); - return JSON.parse(raw) as Partial; - } catch { - return {}; - } + const configPath = + process.env.CONFIG_PATH ?? + join(process.cwd(), "config.json"); + if (!existsSync(configPath)) return {}; + try { + const raw = readFileSync(configPath, "utf-8"); + return JSON.parse(raw) as Partial; + } catch { + return {}; + } } function mergeEnv(config: AppConfig): AppConfig { - return { - lemonade: { - baseUrl: process.env.LEMONADE_BASE_URL ?? config.lemonade.baseUrl, - timeoutMs: Number(process.env.LEMONADE_TIMEOUT_MS) || config.lemonade.timeoutMs, - retries: Number(process.env.LEMONADE_RETRIES) || config.lemonade.retries, - numCtx: Number(process.env.LEMONADE_NUM_CTX) || config.lemonade.numCtx, - }, - telegram: { - botToken: process.env.TELEGRAM_BOT_TOKEN ?? config.telegram.botToken, - allowedUserIds: process.env.TELEGRAM_ALLOWED_USER_IDS ?? config.telegram.allowedUserIds, - }, - taskMemory: { - dbPath: process.env.TASK_MEMORY_DB ?? config.taskMemory.dbPath, - }, - logger: { - logDir: process.env.LOG_DIR ?? config.logger.logDir, - logFile: process.env.LOG_FILE ?? config.logger.logFile, - }, - rag: { - embedModel: process.env.RAG_EMBED_MODEL ?? config.rag.embedModel, - dbPath: process.env.RAG_DB ?? config.rag.dbPath, - embeddingDimensions: Number(process.env.RAG_EMBEDDING_DIMENSIONS) || config.rag.embeddingDimensions, - }, - toolHost: { - sandboxDir: process.env.TOOL_SANDBOX_DIR ?? config.toolHost.sandboxDir, - additionalPath: process.env.TOOL_ADDITIONAL_PATH ?? config.toolHost.additionalPath, - }, - cron: { - dbPath: process.env.CRON_DB ?? config.cron.dbPath, - }, - modelRouter: { - small: process.env.MODEL_ROUTER_SMALL ?? config.modelRouter.small, - medium: process.env.MODEL_ROUTER_MEDIUM ?? config.modelRouter.medium, - large: process.env.MODEL_ROUTER_LARGE ?? config.modelRouter.large, - plannerComplexity: process.env.MODEL_ROUTER_PLANNER_COMPLEXITY ?? config.modelRouter.plannerComplexity, - }, - executor: { - nodeTimeoutMs: Number(process.env.EXECUTOR_NODE_TIMEOUT_MS) || config.executor.nodeTimeoutMs, - }, - browserService: { - headless: process.env.BROWSER_SERVICE_HEADLESS === "false" ? false : config.browserService.headless, - timeout: Number(process.env.BROWSER_SERVICE_TIMEOUT) || config.browserService.timeout, - enableStealth: process.env.BROWSER_SERVICE_ENABLE_STEALTH === "false" ? false : config.browserService.enableStealth, - reuseContext: process.env.BROWSER_SERVICE_REUSE_CONTEXT === "false" ? false : config.browserService.reuseContext, - userDataDir: process.env.BROWSER_SERVICE_USER_DATA_DIR ?? config.browserService.userDataDir, - }, - modelManager: { - smallModelKeepAlive: process.env.MODEL_MANAGER_SMALL_KEEP_ALIVE ?? config.modelManager.smallModelKeepAlive, - mediumModelKeepAlive: process.env.MODEL_MANAGER_MEDIUM_KEEP_ALIVE ?? config.modelManager.mediumModelKeepAlive, - largeModelKeepAlive: process.env.MODEL_MANAGER_LARGE_KEEP_ALIVE ?? config.modelManager.largeModelKeepAlive, - warmupPrompt: process.env.MODEL_MANAGER_WARMUP_PROMPT ?? config.modelManager.warmupPrompt, - }, - skills: { - skillsDir: process.env.SKILLS_DIR ?? config.skills.skillsDir, - }, - whisper: { - modelName: process.env.WHISPER_MODEL_NAME ?? config.whisper.modelName, - language: process.env.WHISPER_LANGUAGE ?? config.whisper.language, - modelDir: process.env.WHISPER_MODEL_DIR ?? config.whisper.modelDir, - }, - fileProcessor: { - uploadDir: process.env.FILE_PROCESSOR_UPLOAD_DIR ?? config.fileProcessor.uploadDir, - maxFileSizeBytes: Number(process.env.FILE_PROCESSOR_MAX_FILE_SIZE_BYTES) || config.fileProcessor.maxFileSizeBytes, - textMaxInlineChars: Number(process.env.FILE_PROCESSOR_TEXT_MAX_INLINE_CHARS) || config.fileProcessor.textMaxInlineChars, - ocrModel: process.env.FILE_PROCESSOR_OCR_MODEL ?? config.fileProcessor.ocrModel, - ocrEnabled: process.env.FILE_PROCESSOR_OCR_ENABLED === "false" ? false : config.fileProcessor.ocrEnabled, - }, - maxConcurrentTasks: Number(process.env.MAX_CONCURRENT_TASKS) || config.maxConcurrentTasks, - }; + return { + lemonade: { + baseUrl: process.env.LEMONADE_BASE_URL ?? config.lemonade.baseUrl, + timeoutMs: Number(process.env.LEMONADE_TIMEOUT_MS) || config.lemonade.timeoutMs, + retries: Number(process.env.LEMONADE_RETRIES) || config.lemonade.retries, + numCtx: Number(process.env.LEMONADE_NUM_CTX) || config.lemonade.numCtx, + }, + telegram: { + botToken: process.env.TELEGRAM_BOT_TOKEN ?? config.telegram.botToken, + allowedUserIds: process.env.TELEGRAM_ALLOWED_USER_IDS ?? config.telegram.allowedUserIds, + }, + taskMemory: { + dbPath: process.env.TASK_MEMORY_DB ?? config.taskMemory.dbPath, + }, + logger: { + logDir: process.env.LOG_DIR ?? config.logger.logDir, + logFile: process.env.LOG_FILE ?? config.logger.logFile, + }, + rag: { + embedModel: process.env.RAG_EMBED_MODEL ?? config.rag.embedModel, + dbPath: process.env.RAG_DB ?? config.rag.dbPath, + embeddingDimensions: Number(process.env.RAG_EMBEDDING_DIMENSIONS) || config.rag.embeddingDimensions, + }, + toolHost: { + sandboxDir: process.env.TOOL_SANDBOX_DIR ?? config.toolHost.sandboxDir, + additionalPath: process.env.TOOL_ADDITIONAL_PATH ?? config.toolHost.additionalPath, + }, + cron: { + dbPath: process.env.CRON_DB ?? config.cron.dbPath, + }, + modelRouter: { + small: process.env.MODEL_ROUTER_SMALL ?? config.modelRouter.small, + medium: process.env.MODEL_ROUTER_MEDIUM ?? config.modelRouter.medium, + large: process.env.MODEL_ROUTER_LARGE ?? config.modelRouter.large, + plannerComplexity: process.env.MODEL_ROUTER_PLANNER_COMPLEXITY ?? config.modelRouter.plannerComplexity, + }, + executor: { + nodeTimeoutMs: Number(process.env.EXECUTOR_NODE_TIMEOUT_MS) || config.executor.nodeTimeoutMs, + }, + browserService: { + headless: process.env.BROWSER_SERVICE_HEADLESS === "false" ? false : config.browserService.headless, + timeout: Number(process.env.BROWSER_SERVICE_TIMEOUT) || config.browserService.timeout, + enableStealth: process.env.BROWSER_SERVICE_ENABLE_STEALTH === "false" ? false : config.browserService.enableStealth, + reuseContext: process.env.BROWSER_SERVICE_REUSE_CONTEXT === "false" ? false : config.browserService.reuseContext, + userDataDir: process.env.BROWSER_SERVICE_USER_DATA_DIR ?? config.browserService.userDataDir, + }, + modelManager: { + smallModelKeepAlive: process.env.MODEL_MANAGER_SMALL_KEEP_ALIVE ?? config.modelManager.smallModelKeepAlive, + mediumModelKeepAlive: process.env.MODEL_MANAGER_MEDIUM_KEEP_ALIVE ?? config.modelManager.mediumModelKeepAlive, + largeModelKeepAlive: process.env.MODEL_MANAGER_LARGE_KEEP_ALIVE ?? config.modelManager.largeModelKeepAlive, + warmupPrompt: process.env.MODEL_MANAGER_WARMUP_PROMPT ?? config.modelManager.warmupPrompt, + }, + skills: { + skillsDir: process.env.SKILLS_DIR ?? config.skills.skillsDir, + }, + whisper: { + modelName: process.env.WHISPER_MODEL_NAME ?? config.whisper.modelName, + language: process.env.WHISPER_LANGUAGE ?? config.whisper.language, + modelDir: process.env.WHISPER_MODEL_DIR ?? config.whisper.modelDir, + }, + fileProcessor: { + uploadDir: process.env.FILE_PROCESSOR_UPLOAD_DIR ?? config.fileProcessor.uploadDir, + maxFileSizeBytes: Number(process.env.FILE_PROCESSOR_MAX_FILE_SIZE_BYTES) || config.fileProcessor.maxFileSizeBytes, + textMaxInlineChars: Number(process.env.FILE_PROCESSOR_TEXT_MAX_INLINE_CHARS) || config.fileProcessor.textMaxInlineChars, + ocrModel: process.env.FILE_PROCESSOR_OCR_MODEL ?? config.fileProcessor.ocrModel, + ocrEnabled: process.env.FILE_PROCESSOR_OCR_ENABLED === "false" ? false : config.fileProcessor.ocrEnabled, + }, + maxConcurrentTasks: Number(process.env.MAX_CONCURRENT_TASKS) || config.maxConcurrentTasks, + }; } function deepMerge(base: T, override: Partial): T { - const out = { ...base }; - for (const key of Object.keys(override) as (keyof T)[]) { - const v = override[key]; - if (v === undefined) continue; - if (typeof v === "object" && v !== null && !Array.isArray(v) && typeof base[key] === "object" && base[key] !== null) { - (out as Record)[key as string] = deepMerge( - base[key] as object, - v as Partial, - ); - } else { - (out as Record)[key as string] = v; + const out = { ...base }; + for (const key of Object.keys(override) as (keyof T)[]) { + const v = override[key]; + if (v === undefined) continue; + if (typeof v === "object" && v !== null && !Array.isArray(v) && typeof base[key] === "object" && base[key] !== null) { + (out as Record)[key as string] = deepMerge( + base[key] as object, + v as Partial, + ); + } else { + (out as Record)[key as string] = v; + } } - } - return out; + return out; } let cached: AppConfig | null = null; /** Get app config. Config file is merged over defaults, then env overrides. */ export function getConfig(): AppConfig { - if (cached) return cached; - const fileConfig = loadConfigFile(); - const merged = deepMerge(DEFAULT_CONFIG, fileConfig); - cached = mergeEnv(merged); - return cached; + if (cached) return cached; + const fileConfig = loadConfigFile(); + const merged = deepMerge(DEFAULT_CONFIG, fileConfig); + cached = mergeEnv(merged); + return cached; } /** Reset cached config (e.g. for tests). */ export function resetConfig(): void { - cached = null; + cached = null; }