diff --git a/AI-Agent.md b/AI-Agent.md index 695a52e..d99bd96 100644 --- a/AI-Agent.md +++ b/AI-Agent.md @@ -32,7 +32,7 @@ AI-Agent/ │ ├── core/ │ │ └── orchestrator.ts # Core Orchestrator: spawns processes, routes messages, task pipeline │ ├── agents/ -│ │ ├── planner-agent.ts # Plan creation (goal → DAG) via Ollama +│ │ ├── planner-agent.ts # Plan creation (goal → DAG) via Lemonade │ │ ├── executor-agent.ts # DAG traversal, node dispatch, Task Memory, revision loop │ │ ├── critic-agent.ts # Reflection: PASS/REVISE on draft output │ │ └── prompts/ @@ -42,8 +42,8 @@ AI-Agent/ │ ├── adapters/ │ │ └── telegram-adapter.ts # Telegram bot → protocol; task.create, telegram.send/progress │ ├── services/ -│ │ ├── ollama-adapter.ts # Ollama API: generate, chat, embed -│ │ ├── model-router.ts # Complexity → Ollama model name +│ │ ├── lemonade-adapter.ts # Lemonade API: generate, chat, embed +│ │ ├── model-router.ts # Complexity → Lemonade model name │ │ ├── generator-service.ts # node.execute generate_text (model-router process) │ │ ├── task-memory.ts # SQLite: tasks, nodes, edges, reflections │ │ ├── logger-service.ts # event.* → pino file log @@ -84,25 +84,25 @@ AI-Agent/ ### Shared layer -- **config.ts**: Loads `config.json` (path from `CONFIG_PATH` or default), merges with env, exports `getConfig()`. Used by all services/adapters for Ollama URL, Telegram token/allow-list, DB paths (task memory, RAG, cron, logger), RAG embed model and `rag.dbPath`, tool sandbox, model router names. +- **config.ts**: Loads `config.json` (path from `CONFIG_PATH` or default), merges with env, exports `getConfig()`. Used by all services/adapters for Lemonade URL, Telegram token/allow-list, DB paths (task memory, RAG, cron, logger), RAG embed model and `rag.dbPath`, tool sandbox, model router names. - **protocol.ts**: Envelope and response/error/event schemas; `parseEnvelope`, `parseResponse`, etc. - **base-process.ts**: `BaseProcess` extends EventEmitter; readline on stdin, `handleEnvelope(line)` → emit `"message"`; `send(envelope)` writes JSONL to stdout. Subclasses override `handleEnvelope` and call `send` for responses. - **graph-utils.ts**: `CapabilityGraph`, `CapabilityNode`, `validateGraph`, `getDependencyMap`, `getReadyNodes` for DAG execution order. ### Agent layer -- **Planner**: Listens for `plan.create`; uses Ollama + Model Router to produce a DAG; validates with `validateGraph`; responds with plan. +- **Planner**: Listens for `plan.create`; uses Lemonade + Model Router to produce a DAG; validates with `validateGraph`; responds with plan. - **Executor**: Listens for `plan.execute`; computes ready nodes (parallel batch, concurrency limit); dispatches `node.execute` to `node.service` (model-router, rag-service, critic-agent, tool-host); waits for response by `correlationId`; updates Task Memory; after DAG, optional reflection loop (Critic → REVISE → re-run generation, max 3); aggregates result and completes task. -- **Critic**: Listens for `reflection.evaluate`; uses Ollama with Critic prompt; returns structured `{ decision: PASS|REVISE, feedback, score }`. +- **Critic**: Listens for `reflection.evaluate`; uses Lemonade with Critic prompt; returns structured `{ decision: PASS|REVISE, feedback, score }`. ### Service layer -- **Ollama Adapter**: HTTP to Ollama `baseUrl` (from config); `generate`, `chat`, `embed`; timeout and retries. -- **Model Router**: Maps small/medium/large to Ollama model names (from config). -- **Generator Service**: Handles `node.execute` with `type: "generate_text"` or `type: "summarize"`; for `summarize`, uses summarizer system prompt and `input.chatHistory`; builds prompt from context (goal, deps, optional critic feedback); calls Ollama; responds with `{ text, ... }`. +- **Lemonade Adapter**: HTTP to Lemonade `baseUrl` (from config); `generate`, `chat`, `embed`; timeout and retries. +- **Model Router**: Maps small/medium/large to Lemonade model names (from config). +- **Generator Service**: Handles `node.execute` with `type: "generate_text"` or `type: "summarize"`; for `summarize`, uses summarizer system prompt and `input.chatHistory`; builds prompt from context (goal, deps, optional critic feedback); calls Lemonade; responds with `{ text, ... }`. - **Task Memory**: SQLite store; `conversation_id` on tasks; handles `task.create`, `task.update`, `task.get`, `task.getByConversationId`, `task.appendReflection`, `task.complete`, `task.fail`. - **Logger**: Subscribes to `event.*`; writes structured log (pino) to `logDir/logFile` from config. -- **RAG Service**: Ollama embed; SQLite-backed document store; **sqlite-vss** for KNN vector search when extension loads (macOS/Linux x64), else in-DB dot-product; configurable `rag.embeddingDimensions` (768); `memory.semantic.insert`, `memory.semantic.search`; search is session-scoped by default to prevent cross-chat leakage; `node.execute` for `semantic_search` returns snippets for downstream nodes. +- **RAG Service**: Lemonade embed; SQLite-backed document store; **sqlite-vss** for KNN vector search when extension loads (macOS/Linux x64), else in-DB dot-product; configurable `rag.embeddingDimensions` (768); `memory.semantic.insert`, `memory.semantic.search`; search is session-scoped by default to prevent cross-chat leakage; `node.execute` for `semantic_search` returns snippets for downstream nodes. - **Tool Host**: Registry of tools (shell, http_get, http_search); sandbox dir from config; `tool.execute` and `node.execute` for type `tool`; shell tool executes commands with sandbox restrictions; browsing supports persistent context (cookies) via `userDataDir`. - **Cron Manager**: SQLite schedule table; node-cron; `cron.schedule.add/list/remove`; emits `event.cron.started/completed/failed` to Logger. - **Dashboard Service**: Standalone monitoring web dashboard; provides real-time overview of tasks, processes, and IPC logs. UI is served from static files in `src/services/dashboard/`. @@ -129,4 +129,4 @@ AI-Agent/ **Archiving (on `/new`)**: Telegram sends `chat.new` (chatId, old conversationId). Core fetches tasks by conversationId, formats history, calls model-router `summarize`, inserts summary into RAG, sends "Archived" to user. -All configurable behavior (Ollama URL, Telegram token/allow-list, DB paths, logger paths, RAG model, sandbox, cron DB, model names) is driven by **config.json** and environment overrides via **config.ts**. +All configurable behavior (Lemonade URL, Telegram token/allow-list, DB paths, logger paths, RAG model, sandbox, cron DB, model names) is driven by **config.json** and environment overrides via **config.ts**. diff --git a/README.md b/README.md index 802962d..9ffeab9 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ A multi-process AI platform with type-safe IPC and capability-graph execution. U # 🧬 ManBot -> **Important:** This is **not** an AI chatbot. It is designed for **heavy tasks** that require time and substantial processing—planning, research, multi-step execution, tool use. It runs locally (Ollama) and performance depends on your machine's compute power; expect slower responses compared to cloud-based chat services. +> **Important:** This is **not** an AI chatbot. It is designed for **heavy tasks** that require time and substantial processing—planning, research, multi-step execution, tool use. It runs locally (Lemonade) and performance depends on your machine's compute power; expect slower responses compared to cloud-based chat services. ## Features @@ -19,27 +19,29 @@ A multi-process AI platform with type-safe IPC and capability-graph execution. U - **Session-Scoped RAG**: Memory searches are session-scoped by default to prevent context leakage after `/new`, with an optional `global` scope. - **Telegram adapter**: Commands `/start`, `/task`, `/new`, `/help`; session tracking and conversation archiving; robust message delivery with automatic plain-text fallback. - **Reminder System**: Schedule one-time or recurring reminders via natural language; cron-based scheduling with Telegram delivery -- **File Processing**: Upload photos, documents, voice notes, or audio files via Telegram. Images are OCR'd locally (Ollama vision model), audio is transcribed locally (Whisper), and text files are inlined or chunked into RAG — all without any cloud calls. +- **File Processing**: Upload photos, documents, voice notes, or audio files via Telegram. Images are OCR'd locally (Lemonade vision model), audio is transcribed locally (Whisper), and text files are inlined or chunked into RAG — all without any cloud calls. - **Monitoring Dashboard**: A Notion-style internal web dashboard for real-time tracking of tasks, system stats, and event logs. ## Requirements - **Node.js** >= 20 -- **Ollama** running locally (for LLM and embeddings) +- **Lemonade** running locally (for LLM and embeddings) - **Telegram Bot Token** (from [@BotFather](https://t.me/BotFather)) if using the Telegram adapter -### Ollama models (recommended) +### Lemonade models (recommended) -- Small: `llama3:8b` -- Medium: `mistral` -- Large: `mixtral` -- Embeddings: `nomic-embed-text` (for RAG) +- Small: `qwen2.5:0.5b` +- Medium: `qwen2.5:1.5b` +- Large: `qwen2.5:7b` +- Embeddings: `text-embedding-v3` (via Lemonade) +- Vision: `qwen3-vl` -Install and run Ollama, then pull the models you need: +Install and run Lemonade, then pull the models you need: ```bash -ollama pull nomic-embed-text -ollama pull mistral +ollama pull qwen2.5:0.5b +ollama pull qwen2.5:1.5b +ollama pull qwen3-vl ``` ## Configuration @@ -53,18 +55,18 @@ ollama pull mistral 2. Edit `config.json` with your settings. Important keys: - **telegram.botToken** — Telegram bot API token (required for Telegram adapter) - **telegram.allowedUserIds** — Comma-separated Telegram user IDs; leave empty to allow all - - **ollama.baseUrl** — Ollama API URL (default `http://127.0.0.1:11434`) - - **rag.embedModel** — Embedding model for RAG (default `nomic-embed-text`) + - **lemonade.baseUrl** — Lemonade API URL (default `http://127.0.0.1:8000`) + - **rag.embedModel** — Embedding model for RAG (default `text-embedding-v3`) - **rag.dbPath** — SQLite path for RAG document storage (default `data/rag.sqlite`) - - **rag.embeddingDimensions** — Vector dimension for sqlite-vss (default 768 for nomic-embed-text) - - **modelRouter** — Ollama model names for small/medium/large + - **rag.embeddingDimensions** — Vector dimension for sqlite-vss (default 768 for text-embedding-v3) + - **modelRouter** — Lemonade model names for small/medium/large - **toolHost.sandboxDir** — Directory allowed for shell tool file operations (default: cwd) - **browserService.headless** — Run browser in headless mode (default: `true`) - **browserService.timeout** — Browser operation timeout in milliseconds (default: `30000`) - **browserService.enableStealth** — Enable stealth plugin for bot detection bypass (default: `true`) - **browserService.reuseContext** — Reuse browser context across requests (default: `true`) - **browserService.userDataDir** — Directory to store browser user data (persistent cookies, logins, etc.; default: `undefined`) - - **modelManager.smallModelKeepAlive** — Keep-alive for small model (default: `"10m"`, Ollama duration string or seconds) + - **modelManager.smallModelKeepAlive** — Keep-alive for small model (default: `"10m"`, Lemonade duration string or seconds) - **modelManager.mediumModelKeepAlive** — Keep-alive for medium model (default: `"30m"`) - **modelManager.largeModelKeepAlive** — Keep-alive for large model after on-demand use (default: `"60m"`) - **modelManager.warmupPrompt** — Minimal prompt sent during warmup (default: `"hello"`) @@ -73,14 +75,14 @@ ollama pull mistral - **fileProcessor.uploadDir** — Temp directory for uploaded files (default: `"data/uploads"`) - **fileProcessor.maxFileSizeBytes** — Max upload size allowed (default: `52428800` = 50 MB) - **fileProcessor.textMaxInlineChars** — Files shorter than this are inlined in the goal (default: `8000`) - - **fileProcessor.ocrModel** — Ollama vision model for image OCR (default: `"glm-ocr:q8_0"`) + - **fileProcessor.ocrModel** — Lemonade vision model for image OCR (default: `"qwen3-vl"`) - **fileProcessor.ocrEnabled** — Enable/disable image OCR (default: `true`) Environment variables override `config.json`. Supported env vars: - `CONFIG_PATH` — Path to config file (default: `./config.json`) - `TELEGRAM_BOT_TOKEN`, `TELEGRAM_ALLOWED_USER_IDS` -- `OLLAMA_BASE_URL`, `OLLAMA_TIMEOUT_MS`, `OLLAMA_RETRIES` +- `LEMONADE_BASE_URL`, `LEMONADE_TIMEOUT_MS`, `LEMONADE_RETRIES` - `TASK_MEMORY_DB`, `CRON_DB`, `LOG_DIR`, `LOG_FILE` - `RAG_EMBED_MODEL`, `RAG_DB`, `RAG_EMBEDDING_DIMENSIONS`, `TOOL_SANDBOX_DIR` - `MODEL_ROUTER_SMALL`, `MODEL_ROUTER_MEDIUM`, `MODEL_ROUTER_LARGE` @@ -124,7 +126,7 @@ For development (TypeScript without pre-build): npm run dev:orchestrator ``` -Ensure `config.json` has a valid **telegram.botToken** and Ollama is running. +Ensure `config.json` has a valid **telegram.botToken** and Lemonade is running. ### Standalone services (for testing or custom setups) @@ -214,11 +216,11 @@ See [Troubleshooting](#troubleshooting) for common issues and debugging tips. ## Model Management -The system includes a `ModelManagerService` that manages Ollama model lifecycles: +The system includes a `ModelManagerService` that manages Lemonade model lifecycles: - **Startup prewarming**: On startup, the Orchestrator pre-warms the **small** and **medium** models sequentially, so the first request is served without cold-start delay. - **On-demand loading**: The **large** model is loaded on demand when needed for a task. -- **Keep-alive control**: Each tier has a configurable keep-alive duration (Ollama removes a model from VRAM after it has been idle for the configured time). +- **Keep-alive control**: Each tier has a configurable keep-alive duration (Lemonade removes a model from VRAM after it has been idle for the configured time). - **Concurrency safety**: Concurrent warmup requests for the same model are deduplicated — only one `/api/chat` call is made regardless of how many parallel requests arrive. ### Keep-alive defaults @@ -229,7 +231,7 @@ The system includes a `ModelManagerService` that manages Ollama model lifecycles | medium | `30m` | Stays loaded for 30 minutes after last use | | large | `60m` | Stays loaded for 60 minutes after last use | -Set keep-alive to `-1` (the number) to keep a model loaded indefinitely until Ollama is restarted. +Set keep-alive to `-1` (the number) to keep a model loaded indefinitely until Lemonade is restarted. ### Monitoring model state @@ -269,7 +271,7 @@ You can configure the port using the `DASHBOARD_PORT` environment variable or by - **src/core/** — Core Orchestrator (process spawning, message routing, task pipeline, file ingest) - **src/agents/** — Planner, Executor, Critic; **prompts/** for system prompts (planner, critic, summarizer) - **src/adapters/** — Telegram adapter (including file detection and download) -- **src/services/** — Task Memory, Logger, Ollama adapter (with vision), Model Router, Generator, RAG (SQLite), Tool Host, Cron Manager, Dashboard Service, **File Processor** +- **src/services/** — Task Memory, Logger, Lemonade adapter (with vision), Model Router, Generator, RAG (SQLite), Tool Host, Cron Manager, Dashboard Service, **File Processor** - **src/utils/** — Console logger, audio-converter (ffmpeg-static), whisper-transcriber (nodejs-whisper) - **src/shared/** — Protocol (Zod schemas), BaseProcess, graph-utils, config, **file-protocol** - **_docs/** — Architecture and protocol specs @@ -286,7 +288,7 @@ ManBot can process file attachments sent directly in Telegram — no cloud servi | Type | Telegram attachment | Processing | |---|---|---| | **Text** | Any document (`.txt`, `.md`, `.json`, `.pdf`, etc.) | Content read directly; short files inlined into goal, long files chunked + summarised + indexed in RAG | -| **Image** | Photo or image document | OCR/description via Ollama vision model (`glm-ocr:q8_0`) | +| **Image** | Photo or image document | OCR/description via Lemonade vision model (`qwen3-vl`) | | **Voice / Audio** | Voice message or audio file | Converted to WAV (ffmpeg-static) → transcribed (OpenAI Whisper, local) | | **Video** | Video or video note | ⚠️ Not supported yet | @@ -295,7 +297,7 @@ ManBot can process file attachments sent directly in Telegram — no cloud servi 1. Send any supported file to the bot, optionally with a caption as your instruction 2. The bot downloads the file locally to `data/uploads/` 3. Processing runs in the dedicated `file-processor` subprocess: - - Images → `OllamaAdapter.chatWithImage()` with the configured OCR model + - Images → `LemonadeAdapter.chatWithImage()` with the configured OCR model - Audio → `convertToWav()` (ffmpeg-static) → `transcribeAudio()` (Whisper `base.en` by default) - Text → `readFile()`, check length against `textMaxInlineChars` 4. Extracted content is injected into the planner goal as structured context @@ -306,9 +308,9 @@ ManBot can process file attachments sent directly in Telegram — no cloud servi The Whisper model (~75 MB for `base.en`) is automatically downloaded on first voice/audio transcription. Retry if the first request fails — the model downloads in the background. ### Requirements for image OCR -Pull the vision model from Ollama before use: +Pull the vision model from Lemonade before use: ```bash -ollama pull glm-ocr:q8_0 +ollama pull qwen3-vl ``` ## Troubleshooting diff --git a/_board/TASKS/LM-01_LEMONADE_ADAPTER.md b/_board/TASKS/LM-01_LEMONADE_ADAPTER.md new file mode 100644 index 0000000..22d3ebd --- /dev/null +++ b/_board/TASKS/LM-01_LEMONADE_ADAPTER.md @@ -0,0 +1,13 @@ +# LM-01: Implement LemonadeAdapter + +## Description +Replace `OllamaAdapter` with `LemonadeAdapter` to support Lemonade Server's OpenAI-compatible API. + +## Status +- [x] Create `LemonadeAdapter.ts` with `chat`, `chatWithImage`, `embed`, and `transcribe`. +- [x] Implement server warmup logic. +- [x] Add configuration support in `config.ts`. +- [x] Unit tests for `LemonadeAdapter`. + +## Context +Migrating from Ollama to Lemonade for better local multimodal and audio support. diff --git a/_board/TASKS/LM-02_FILE_PROCESSOR_MIGRATION.md b/_board/TASKS/LM-02_FILE_PROCESSOR_MIGRATION.md new file mode 100644 index 0000000..9791e9f --- /dev/null +++ b/_board/TASKS/LM-02_FILE_PROCESSOR_MIGRATION.md @@ -0,0 +1,13 @@ +# LM-02: Migrate FileProcessorService to Lemonade + +## Description +Update `FileProcessorService` to use `LemonadeAdapter` for image processing and OCR task via the `qwen3-vl` model. + +## Status +- [x] Update `FileProcessorService` constructor and imports. +- [x] Refactor `processImage` to use `lemonade.chatWithImage`. +- [x] Integrate `qwen3-vl` for OCR and description. +- [x] Unit tests for `FileProcessorService` with Lemonade mock. + +## Context +Enables vision capability using Lemonade's multimodal support. diff --git a/_board/TASKS/LM-03_WHISPER_LEMONADE_MIGRATION.md b/_board/TASKS/LM-03_WHISPER_LEMONADE_MIGRATION.md new file mode 100644 index 0000000..9ab42de --- /dev/null +++ b/_board/TASKS/LM-03_WHISPER_LEMONADE_MIGRATION.md @@ -0,0 +1,13 @@ +# LM-03: Migrate WhisperTranscriber to Lemonade API + +## Description +Replace local `nodejs-whisper` dependency with Lemonade's transcription API for reliable audio processing. + +## Status +- [x] Update `whisper-transcriber.ts` to use `LemonadeAdapter.transcribe`. +- [x] Integrate `Whisper-Tiny` model. +- [x] Unit tests for `WhisperTranscriber` with Lemonade mock. +- [x] Remove unused `nodejs-whisper` folder and related logic. + +## Context +Improves audio transcription speed and reliability using a specialized local server endpoint. diff --git a/_board/TASKS/LM-04_CORE_SERVICES_MIGRATION.md b/_board/TASKS/LM-04_CORE_SERVICES_MIGRATION.md new file mode 100644 index 0000000..5a3580f --- /dev/null +++ b/_board/TASKS/LM-04_CORE_SERVICES_MIGRATION.md @@ -0,0 +1,13 @@ +# LM-04: Update Core Services for Lemonade + +## Description +Convert `GeneratorService`, `RAGService`, and `TimeParserService` from using `OllamaAdapter` to `LemonadeAdapter` for general text and embedding tasks. + +## Status +- [x] Update `GeneratorService` to use Lemonade's chat endpoint. +- [x] Update `RAGService` to use Lemonade's embeddings. +- [x] Update `TimeParserService` for natural language time parsing via Lemonade. +- [x] Verify chat completions, tool calls, and embedding flows. + +## Context +Ensures all LLM-dependent components are using the new centralized Lemonade adapter. diff --git a/_board/TASKS/LM-05_MODEL_LIFECYCLE_MIGRATION.md b/_board/TASKS/LM-05_MODEL_LIFECYCLE_MIGRATION.md new file mode 100644 index 0000000..af1aea6 --- /dev/null +++ b/_board/TASKS/LM-05_MODEL_LIFECYCLE_MIGRATION.md @@ -0,0 +1,13 @@ +# LM-05: Model Lifecycle and Prewarming Refactor + +## Description +Update `ModelManagerService` to manage model lifecycles via the Lemonade `warmup` endpoint. + +## Status +- [x] Refactor `ModelManagerService` to use Lemonade's warmup logic. +- [x] Update constructor and tier-to-model mapping for Lemonade. +- [x] Update model manager and integration tests. +- [x] Verify sequential prewarming logic at startup. + +## Context +Ensures efficient model loading and memory management in Lemonade. diff --git a/_board/TASKS/LM-06_VERIFICATION_DOCS.md b/_board/TASKS/LM-06_VERIFICATION_DOCS.md new file mode 100644 index 0000000..5d7eb9a --- /dev/null +++ b/_board/TASKS/LM-06_VERIFICATION_DOCS.md @@ -0,0 +1,13 @@ +# LM-06: Verification and Documentation + +## Description +Final build check, test suite run, and documentation sync to reflect transition to Lemonade.ai. + +## Status +- [x] Run full test suite for all transitioned components. +- [x] Fix remaining lint and build errors in tests. +- [x] Verify successful build via `npm run build`. +- [x] Update `README.md`, `ARCHITECTURE.md`, and `TECH.md`. + +## Context +Ensures project consistency and documentation accuracy after the transition. diff --git a/_board/_BOARD.md b/_board/_BOARD.md index 48ecfe8..9161cae 100644 --- a/_board/_BOARD.md +++ b/_board/_BOARD.md @@ -7,6 +7,30 @@ ## Done +### LM-06 Verification and Documentation +- Status: Completed +- Date: 2026-02-28 + +### LM-05 Model Lifecycle and Prewarming Refactor +- Status: Completed +- Date: 2026-02-28 + +### LM-04 Update Core Services for Lemonade +- Status: Completed +- Date: 2026-02-28 + +### LM-03 Migrate WhisperTranscriber to Lemonade API +- Status: Completed +- Date: 2026-02-28 + +### LM-02 Migrate FileProcessorService to Lemonade +- Status: Completed +- Date: 2026-02-28 + +### LM-01 Implement LemonadeAdapter +- Status: Completed +- Date: 2026-02-28 + ### AO-14 Refactor Dashboard Service (Split UI/Logic) - Status: Completed - Commit: (current) diff --git a/_docs/COMPONENTS.md b/_docs/COMPONENTS.md index 4b0ef84..816c238 100644 --- a/_docs/COMPONENTS.md +++ b/_docs/COMPONENTS.md @@ -67,8 +67,8 @@ Routes tasks to: --- -### Ollama Adapter -Interface to local Ollama instance. +### Lemonade Adapter +Interface to local Lemonade Server instance (OpenAI-compatible). Supports: - Streaming @@ -151,8 +151,8 @@ Stores: - Receives `file.process` envelopes from Core Orchestrator - Routes by file category: - **text** → reads file content; inlines if short, returns `text_long` if long (orchestrator handles RAG) - - **image** → OCR/description via `OllamaAdapter.chatWithImage()` with configured vision model (`glm-ocr:q8_0`) - - **audio** → `convertToWav()` (ffmpeg-static) → `transcribeAudio()` (Whisper local inference) + - **image** → OCR/description via `LemonadeAdapter.chatWithImage()` with configured vision model (`qwen3-vl`) + - **audio** → `convertToWav()` (ffmpeg-static) → `transcribeAudio()` (Lemonade Whisper API) - **unknown** → returns `ignored` with reason - Deletes every uploaded file from disk after processing (succeed or fail) - Emits `event.file.processed` audit event for logging @@ -185,7 +185,7 @@ Stores: 3. Telegram Adapter → Core: `file.ingest` envelope (FileIngestPayload) 4. Core Orchestrator: notify user "Processing N file(s)..." 5. Core → File Processor: `file.process` per file (parallel, Promise.allSettled) -6. File Processor: routes by category, calls Ollama/Whisper/readFile, deletes original, responds +6. File Processor: routes by category, calls Lemonade/Whisper, deletes original, responds 7. Core: collects results, builds `enrichedGoal` (inline context + transcript + caption) - Long text files (> textMaxInlineChars) → indexLongText() → model-router chunk summaries → rag-service 8. Core → Planner → Executor: runs normal task pipeline with `enrichedGoal` diff --git a/_docs/PROJECT.md b/_docs/PROJECT.md index c3ea182..8909229 100644 --- a/_docs/PROJECT.md +++ b/_docs/PROJECT.md @@ -13,7 +13,7 @@ It supports: - Capability-based execution planning - Task-level isolated memory - Layered memory system -- Local LLM inference via Ollama +- Local LLM inference via Lemonade - RAG via SQLite-backed document store; sqlite-vss for scalable vector search when available (fallback: dot-product) - Structured persistence via SQLite - Tool execution via MCP-compatible tool host diff --git a/_docs/TECH.md b/_docs/TECH.md index 5065fc9..5a79058 100644 --- a/_docs/TECH.md +++ b/_docs/TECH.md @@ -23,17 +23,17 @@ ## LLM -- Ollama -- Models: - - Small: llama3:8b - - Medium: mistral - - Large: deepseek-coder / mixtral +- Lemonade Server (OpenAI-compatible API) +- Models (Local): + - Small: qwen2.5:0.5b + - Medium: qwen2.5:1.5b + - Large: qwen2.5:7b --- ## Embeddings -- Ollama embedding models +- Lemonade embedding models - RAG: SQLite-backed document store; **sqlite-vss** for scalable KNN vector search when available (macOS x64/arm64, Linux x64); fallback to in-DB dot-product on other platforms. Configurable `rag.embeddingDimensions` (default 768). --- @@ -74,9 +74,9 @@ ## File Processing -- **nodejs-whisper** (`^0.2.9`) — local Whisper speech-to-text inference; model auto-downloaded on first use +- **Lemonade /transcriptions** — local Whisper-Tiny speech-to-text inference - **ffmpeg-static** (`^5.3.0`) — bundled ffmpeg binary for audio format conversion (any → 16 kHz mono WAV) -- **OllamaAdapter.chatWithImage()** — multimodal image OCR/description via configured vision model +- **LemonadeAdapter.chatWithImage()** — multimodal image OCR/description via `qwen3-vl` model --- diff --git a/config.json.example b/config.json.example index 3ca94ce..0152ef9 100644 --- a/config.json.example +++ b/config.json.example @@ -1,8 +1,9 @@ { - "ollama": { - "baseUrl": "http://127.0.0.1:11434/api/chat", - "timeoutMs": 60000, - "retries": 2 + "lemonade": { + "baseUrl": "http://127.0.0.1:8000/api/v1", + "timeoutMs": 600000, + "retries": 3, + "numCtx": 16384 }, "telegram": { "botToken": "YOUR_TELEGRAM_BOT_TOKEN", @@ -16,7 +17,7 @@ "logFile": "events.log" }, "rag": { - "embedModel": "nomic-embed-text", + "embedModel": "text-embedding-v3", "dbPath": "data/rag.sqlite", "embeddingDimensions": 768 }, @@ -27,10 +28,10 @@ "dbPath": "data/cron.sqlite" }, "modelRouter": { - "small": "qwen3:0.6b", - "medium": "qwen3:1.7b", - "large": "qwen3:4b", - "plannerComplexity": "medium" + "small": "qwen2.5:0.5b", + "medium": "qwen2.5:1.5b", + "large": "qwen2.5:7b", + "plannerComplexity": "small" }, "executor": { "nodeTimeoutMs": 600000 @@ -49,7 +50,7 @@ "warmupPrompt": "hello" }, "whisper": { - "modelName": "base", + "modelName": "Whisper-Tiny", "language": "auto", "modelDir": "data/whisper-models" }, @@ -57,7 +58,8 @@ "uploadDir": "data/uploads", "maxFileSizeBytes": 52428800, "textMaxInlineChars": 8000, - "ocrModel": "glm-ocr:q8_0", + "ocrModel": "qwen3-vl", "ocrEnabled": true - } + }, + "maxConcurrentTasks": 1 } diff --git a/src/agents/critic-agent.ts b/src/agents/critic-agent.ts index 37406c9..9216603 100644 --- a/src/agents/critic-agent.ts +++ b/src/agents/critic-agent.ts @@ -10,7 +10,7 @@ import type { Envelope } from "../shared/protocol.js"; import { PROTOCOL_VERSION } from "../shared/protocol.js"; import { responsePayloadSchema } from "../shared/protocol.js"; import { CRITIC_SYSTEM_PROMPT, buildCriticPrompt } from "./prompts/critic.js"; -import { OllamaAdapter } from "../services/ollama-adapter.js"; +import { LemonadeAdapter } from "../services/lemonade-adapter.js"; import { ModelRouter } from "../services/model-router.js"; const REFLECTION_EVALUATE = "reflection.evaluate"; @@ -41,12 +41,12 @@ function extractJson(text: string): string { } export class CriticAgent extends BaseProcess { - private readonly ollama: OllamaAdapter; + private readonly lemonade: LemonadeAdapter; private readonly modelRouter: ModelRouter; - constructor(options?: { ollama?: OllamaAdapter; modelRouter?: ModelRouter }) { + constructor(options?: { lemonade?: LemonadeAdapter; modelRouter?: ModelRouter }) { super({ processName: PROCESS_NAME }); - this.ollama = options?.ollama ?? new OllamaAdapter(); + this.lemonade = options?.lemonade ?? new LemonadeAdapter(); this.modelRouter = options?.modelRouter ?? new ModelRouter(); } @@ -72,7 +72,7 @@ export class CriticAgent extends BaseProcess { { role: "system" as const, content: CRITIC_SYSTEM_PROMPT }, { role: "user" as const, content: userContent }, ]; - const result = await this.ollama.chat(messages, model); + const result = await this.lemonade.chat(messages, model); const raw = result.message?.content ?? ""; const jsonStr = extractJson(raw); const parsed = JSON.parse(jsonStr) as Record; diff --git a/src/agents/planner-agent.ts b/src/agents/planner-agent.ts index f925ce2..7ebe22a 100644 --- a/src/agents/planner-agent.ts +++ b/src/agents/planner-agent.ts @@ -11,7 +11,7 @@ import type { Envelope } from "../shared/protocol.js"; import { responsePayloadSchema } from "../shared/protocol.js"; import { buildPlannerPrompt } from "./prompts/planner.js"; import { ModelRouter } from "../services/model-router.js"; -import { OllamaAdapter } from "../services/ollama-adapter.js"; +import { LemonadeAdapter } from "../services/lemonade-adapter.js"; import { SkillManager } from "../services/skill-manager.js"; import { ConsoleLogger } from "../utils/console-logger.js"; @@ -40,13 +40,13 @@ function extractJson(text: string): string { } export class PlannerAgent extends BaseProcess { - private readonly ollama: OllamaAdapter; + private readonly lemonade: LemonadeAdapter; private readonly modelRouter: ModelRouter; private readonly skillManager: SkillManager; - constructor(options?: { ollama?: OllamaAdapter; modelRouter?: ModelRouter; skillManager?: SkillManager }) { + constructor(options?: { lemonade?: LemonadeAdapter; modelRouter?: ModelRouter; skillManager?: SkillManager }) { super({ processName: "planner" }); - this.ollama = options?.ollama ?? new OllamaAdapter(); + this.lemonade = options?.lemonade ?? new LemonadeAdapter(); this.modelRouter = options?.modelRouter ?? new ModelRouter(); this.skillManager = options?.skillManager ?? new SkillManager(); } @@ -87,7 +87,7 @@ export class PlannerAgent extends BaseProcess { { role: "system" as const, content: "You output only valid JSON. No markdown, no explanation." }, { role: "user" as const, content: prompt }, ]; - const result = await this.ollama.chat(messages, model); + const result = await this.lemonade.chat(messages, model); const raw = result.message?.content ?? ""; ConsoleLogger.debug("planner", `Raw model response (length: ${raw.length})`); const jsonStr = extractJson(raw); diff --git a/src/core/orchestrator.ts b/src/core/orchestrator.ts index 78d4d8c..c5ef491 100644 --- a/src/core/orchestrator.ts +++ b/src/core/orchestrator.ts @@ -15,7 +15,7 @@ import { envelopeSchema } from "../shared/protocol.js"; import type { Envelope } from "../shared/protocol.js"; import { ConsoleLogger } from "../utils/console-logger.js"; import { getConfig } from "../shared/config.js"; -import { OllamaAdapter } from "../services/ollama-adapter.js"; +import { LemonadeAdapter } from "../services/lemonade-adapter.js"; import { ModelRouter } from "../services/model-router.js"; import { ModelManagerService } from "../services/model-manager.js"; import type { FileIngestPayload, ProcessedFile } from "../shared/file-protocol.js"; @@ -69,9 +69,9 @@ export class Orchestrator { private activeTaskCount = 0; constructor() { - const ollama = new OllamaAdapter(); + const lemonade = new LemonadeAdapter(); const modelRouter = new ModelRouter(); - this.modelManager = new ModelManagerService({ ollama, modelRouter }); + this.modelManager = new ModelManagerService({ lemonade, modelRouter }); } private spawnProcess(name: string, scriptPath: string, restartCount = 0): ChildEntry { diff --git a/src/services/__tests__/generator-model-manager.test.ts b/src/services/__tests__/generator-model-manager.test.ts index 916c4ba..e1a7a4d 100644 --- a/src/services/__tests__/generator-model-manager.test.ts +++ b/src/services/__tests__/generator-model-manager.test.ts @@ -7,7 +7,7 @@ import { describe, it, expect, vi, beforeEach } from "vitest"; import { GeneratorService } from "../generator-service.js"; import { ModelManagerService } from "../model-manager.js"; -import type { OllamaAdapter } from "../ollama-adapter.js"; +import type { LemonadeAdapter } from "../lemonade-adapter.js"; import type { ModelRouter } from "../model-router.js"; import type { Envelope } from "../../shared/protocol.js"; import { randomUUID } from "node:crypto"; @@ -50,21 +50,12 @@ function makeNodeExecute( // ── test setup ──────────────────────────────────────────────────────────────── function createIntegrationSetup() { - const generate = vi.fn().mockResolvedValue({ - text: "mock response", - prompt_eval_count: 5, - eval_count: 10, - done: true, - }); const chat = vi.fn().mockResolvedValue({ - message: { role: "assistant", content: "mock chat response" }, - prompt_eval_count: 5, - eval_count: 10, - done: true, + message: { role: "assistant", content: "mock response" }, + usage: { prompt_tokens: 5, completion_tokens: 10, total_tokens: 15 }, }); - const warmup = vi.fn((_model: string, _keepAlive: string | number): Promise => Promise.resolve()); - - const mockOllama = { generate, chat, warmup } as unknown as OllamaAdapter; + const warmup = vi.fn((_model: string): Promise => Promise.resolve()); + const mockLemonade = { chat, warmup } as unknown as LemonadeAdapter; const getModel = vi.fn((tier: string) => { const map: Record = { @@ -77,19 +68,19 @@ function createIntegrationSetup() { const mockRouter = { getModel } as unknown as ModelRouter; const modelManager = new ModelManagerService({ - ollama: mockOllama, + lemonade: mockLemonade, modelRouter: mockRouter, }); const ensureModelLoaded = vi.spyOn(modelManager, "ensureModelLoaded"); const service = new GeneratorService({ - ollama: mockOllama, + lemonade: mockLemonade, modelRouter: mockRouter, modelManager, }); - return { service, generate, chat, warmup, ensureModelLoaded, mockOllama, mockRouter }; + return { service, chat, warmup, ensureModelLoaded, mockLemonade, mockRouter }; } // ── tests ───────────────────────────────────────────────────────────────────── @@ -101,18 +92,18 @@ describe("GeneratorService + ModelManagerService integration", () => { setup = createIntegrationSetup(); }); - it("calls ensureModelLoaded before generate for the default (medium) tier", async () => { - const { service, generate, ensureModelLoaded } = setup; + it("calls ensureModelLoaded before chat for the default (medium) tier", async () => { + const { service, chat, ensureModelLoaded } = setup; const callOrder: string[] = []; ensureModelLoaded.mockImplementation(async () => { callOrder.push("ensure"); }); - generate.mockImplementation(async () => { callOrder.push("generate"); return { text: "ok", done: true }; }); + chat.mockImplementation(async () => { callOrder.push("chat"); return { message: { role: "assistant", content: "ok" } }; }); const envelope = makeNodeExecute(); (service as unknown as { handleEnvelope: (e: Envelope) => void }).handleEnvelope(envelope); await new Promise((r) => setTimeout(r, 150)); - expect(callOrder).toEqual(["ensure", "generate"]); + expect(callOrder).toEqual(["ensure", "chat"]); }); it("passes the correct model tier (small) to ensureModelLoaded", async () => { @@ -131,15 +122,15 @@ describe("GeneratorService + ModelManagerService integration", () => { expect(ensureModelLoaded).toHaveBeenCalledWith("large"); }); - it("does NOT call generate if ensureModelLoaded rejects", async () => { - const { service, generate, ensureModelLoaded } = setup; + it("does NOT call chat if ensureModelLoaded rejects", async () => { + const { service, chat, ensureModelLoaded } = setup; ensureModelLoaded.mockRejectedValue(new Error("model load failed")); const envelope = makeNodeExecute(); (service as unknown as { handleEnvelope: (e: Envelope) => void }).handleEnvelope(envelope); await new Promise((r) => setTimeout(r, 150)); - expect(generate).not.toHaveBeenCalled(); + expect(chat).not.toHaveBeenCalled(); }); it("concurrent requests for the same tier deduplicate warmup calls", async () => { diff --git a/src/services/__tests__/generator-service.test.ts b/src/services/__tests__/generator-service.test.ts index b203e7f..e655966 100644 --- a/src/services/__tests__/generator-service.test.ts +++ b/src/services/__tests__/generator-service.test.ts @@ -5,33 +5,27 @@ import { describe, expect, it, vi, beforeEach } from "vitest"; import { GeneratorService } from "../generator-service.js"; -import type { OllamaAdapter } from "../ollama-adapter.js"; +import type { LemonadeAdapter } from "../lemonade-adapter.js"; import type { ModelRouter } from "../model-router.js"; import type { Envelope } from "../../shared/protocol.js"; import { randomUUID } from "node:crypto"; describe("GeneratorService shell tool response handling", () => { let generatorService: GeneratorService; - let mockOllama: OllamaAdapter; + let mockLemonade: LemonadeAdapter; let mockModelRouter: ModelRouter; beforeEach(() => { - // Mock OllamaAdapter - mockOllama = { - generate: vi.fn().mockResolvedValue({ - text: "Generated response", - prompt_eval_count: 10, - eval_count: 20, - }), - chat: vi.fn().mockResolvedValue({ + // Mock LemonadeAdapter + mockLemonade = { + chat: vi.fn((_messages: any[]) => Promise.resolve({ message: { content: "Generated response", role: "assistant", }, - prompt_eval_count: 10, - eval_count: 20, - }), - } as unknown as OllamaAdapter; + usage: { prompt_tokens: 10, completion_tokens: 20 }, + })), + } as unknown as LemonadeAdapter; // Mock ModelRouter mockModelRouter = { @@ -39,7 +33,7 @@ describe("GeneratorService shell tool response handling", () => { } as unknown as ModelRouter; generatorService = new GeneratorService({ - ollama: mockOllama, + lemonade: mockLemonade, modelRouter: mockModelRouter, }); @@ -81,10 +75,11 @@ describe("GeneratorService shell tool response handling", () => { // Wait for async processing await new Promise((resolve) => setTimeout(resolve, 100)); - expect(mockOllama.generate).toHaveBeenCalled(); - const callArgs = (mockOllama.generate as ReturnType).mock.calls[0]; + expect(mockLemonade.chat).toHaveBeenCalled(); + const callArgs = (mockLemonade.chat as any).mock.calls[0]; expect(callArgs).toBeDefined(); - const prompt = callArgs![0] as string; + const messages = callArgs![0] as any[]; + const prompt = messages[messages.length - 1].content; expect(prompt).toContain("File content from cat command"); expect(prompt).toContain("Summarize the file content"); @@ -125,10 +120,11 @@ describe("GeneratorService shell tool response handling", () => { // Wait for async processing await new Promise((resolve) => setTimeout(resolve, 100)); - expect(mockOllama.generate).toHaveBeenCalled(); - const callArgs = (mockOllama.generate as ReturnType).mock.calls[0]; + expect(mockLemonade.chat).toHaveBeenCalled(); + const callArgs = (mockLemonade.chat as any).mock.calls[0]; expect(callArgs).toBeDefined(); - const prompt = callArgs![0] as string; + const messages = callArgs![0] as any[]; + const prompt = messages[messages.length - 1].content; expect(prompt).toContain("Main output"); expect(prompt).toContain("[stderr: Warning: file not found]"); @@ -169,10 +165,11 @@ describe("GeneratorService shell tool response handling", () => { // Wait for async processing await new Promise((resolve) => setTimeout(resolve, 100)); - expect(mockOllama.generate).toHaveBeenCalled(); - const callArgs = (mockOllama.generate as ReturnType).mock.calls[0]; + expect(mockLemonade.chat).toHaveBeenCalled(); + const callArgs = (mockLemonade.chat as any).mock.calls[0]; expect(callArgs).toBeDefined(); - const prompt = callArgs![0] as string; + const messages = callArgs![0] as any[]; + const prompt = messages[messages.length - 1].content; // Empty stdout should not break the prompt expect(prompt).toContain("Process this"); @@ -222,10 +219,11 @@ describe("GeneratorService shell tool response handling", () => { // Wait for async processing await new Promise((resolve) => setTimeout(resolve, 100)); - expect(mockOllama.generate).toHaveBeenCalled(); - const callArgs = (mockOllama.generate as ReturnType).mock.calls[0]; + expect(mockLemonade.chat).toHaveBeenCalled(); + const callArgs = (mockLemonade.chat as any).mock.calls[0]; expect(callArgs).toBeDefined(); - const prompt = callArgs![0] as string; + const messages = callArgs![0] as any[]; + const prompt = messages[messages.length - 1].content; expect(prompt).toContain("First file content"); expect(prompt).toContain("Second file content"); @@ -265,10 +263,11 @@ describe("GeneratorService shell tool response handling", () => { // Wait for async processing await new Promise((resolve) => setTimeout(resolve, 100)); - expect(mockOllama.generate).toHaveBeenCalled(); - const callArgs = (mockOllama.generate as ReturnType).mock.calls[0]; + expect(mockLemonade.chat).toHaveBeenCalled(); + const callArgs = (mockLemonade.chat as any).mock.calls[0]; expect(callArgs).toBeDefined(); - const prompt = callArgs![0] as string; + const messages = callArgs![0] as any[]; + const prompt = messages[messages.length - 1].content; expect(prompt).toContain("User goal: Analyze the configuration"); expect(prompt).toContain("Configuration file content"); @@ -309,10 +308,11 @@ describe("GeneratorService shell tool response handling", () => { // Wait for async processing await new Promise((resolve) => setTimeout(resolve, 100)); - expect(mockOllama.generate).toHaveBeenCalled(); - const callArgs = (mockOllama.generate as ReturnType).mock.calls[0]; + expect(mockLemonade.chat).toHaveBeenCalled(); + const callArgs = (mockLemonade.chat as any).mock.calls[0]; expect(callArgs).toBeDefined(); - const prompt = callArgs![0] as string; + const messages = callArgs![0] as any[]; + const prompt = messages[messages.length - 1].content; expect(prompt).toContain("Output content"); expect(prompt).not.toContain("[stderr:"); @@ -364,10 +364,11 @@ describe("GeneratorService shell tool response handling", () => { // Wait for async processing await new Promise((resolve) => setTimeout(resolve, 100)); - expect(mockOllama.generate).toHaveBeenCalled(); - const callArgs = (mockOllama.generate as ReturnType).mock.calls[0]; + expect(mockLemonade.chat).toHaveBeenCalled(); + const callArgs = (mockLemonade.chat as any).mock.calls[0]; expect(callArgs).toBeDefined(); - const prompt = callArgs![0] as string; + const messages = callArgs![0] as any[]; + const prompt = messages[messages.length - 1].content; expect(prompt).toContain("Local file content"); expect(prompt).toContain("Web page content"); diff --git a/src/services/__tests__/model-manager.test.ts b/src/services/__tests__/model-manager.test.ts index 5a87a60..9a38b7a 100644 --- a/src/services/__tests__/model-manager.test.ts +++ b/src/services/__tests__/model-manager.test.ts @@ -6,7 +6,7 @@ import { describe, it, expect, vi } from "vitest"; import { ModelManagerService } from "../model-manager.js"; -import type { OllamaAdapter } from "../ollama-adapter.js"; +import type { LemonadeAdapter } from "../lemonade-adapter.js"; import type { ModelRouter } from "../model-router.js"; // Mock getConfig so tests don't depend on config.json on disk. @@ -21,8 +21,8 @@ vi.mock("../../shared/config.js", () => ({ }), })); -function makeWarmup(impl?: (model: string, keepAlive: string | number) => Promise) { - return vi.fn(impl ?? ((_model: string, _keepAlive: string | number): Promise => Promise.resolve())); +function makeWarmup(impl?: (model: string) => Promise) { + return vi.fn(impl ?? ((_model: string): Promise => Promise.resolve())); } function makeRouter(map: Record = { small: "llama3:8b", medium: "mistral", large: "mixtral" }) { @@ -31,15 +31,15 @@ function makeRouter(map: Record = { small: "llama3:8b", medium: function createMocks() { const warmup = makeWarmup(); - const mockOllama = { warmup } as unknown as OllamaAdapter; + const mockLemonade = { warmup } as unknown as LemonadeAdapter; const mockRouter = makeRouter(); const service = new ModelManagerService({ - ollama: mockOllama, + lemonade: mockLemonade, modelRouter: mockRouter, }); - return { service, warmup, mockOllama, mockRouter }; + return { service, warmup, mockLemonade, mockRouter }; } // --------------------------------------------------------------------------- @@ -47,25 +47,25 @@ function createMocks() { // --------------------------------------------------------------------------- describe("ensureModelLoaded – tier mapping", () => { - it("warms up the small model with small keep-alive", async () => { + it("warms up the small model", async () => { const { service, warmup } = createMocks(); await service.ensureModelLoaded("small"); expect(warmup).toHaveBeenCalledOnce(); - expect(warmup).toHaveBeenCalledWith("llama3:8b", "10m"); + expect(warmup).toHaveBeenCalledWith("llama3:8b"); }); - it("warms up the medium model with medium keep-alive", async () => { + it("warms up the medium model", async () => { const { service, warmup } = createMocks(); await service.ensureModelLoaded("medium"); expect(warmup).toHaveBeenCalledOnce(); - expect(warmup).toHaveBeenCalledWith("mistral", "30m"); + expect(warmup).toHaveBeenCalledWith("mistral"); }); - it("warms up the large model with large keep-alive", async () => { + it("warms up the large model", async () => { const { service, warmup } = createMocks(); await service.ensureModelLoaded("large"); expect(warmup).toHaveBeenCalledOnce(); - expect(warmup).toHaveBeenCalledWith("mixtral", "5m"); + expect(warmup).toHaveBeenCalledWith("mixtral"); }); }); @@ -80,9 +80,9 @@ describe("ensureModelLoaded – concurrency deduplication", () => { resolveWarmup = res; }); - const warmup = vi.fn((_model: string, _keepAlive: string | number): Promise => pending); - const mockOllama = { warmup } as unknown as OllamaAdapter; - const service = new ModelManagerService({ ollama: mockOllama, modelRouter: makeRouter() }); + const warmup = vi.fn((_model: string): Promise => pending); + const mockLemonade = { warmup } as unknown as LemonadeAdapter; + const service = new ModelManagerService({ lemonade: mockLemonade, modelRouter: makeRouter() }); // Start three concurrent calls. const p1 = service.ensureModelLoaded("small"); @@ -129,23 +129,23 @@ describe("ensureModelLoaded – concurrency deduplication", () => { describe("ensureModelLoaded – error propagation", () => { it("rejects when warmup fails", async () => { - const warmup = vi.fn((_model: string, _keepAlive: string | number): Promise => + const warmup = vi.fn((_model: string): Promise => Promise.reject(new Error("network error")), ); - const mockOllama = { warmup } as unknown as OllamaAdapter; - const service = new ModelManagerService({ ollama: mockOllama, modelRouter: makeRouter() }); + const mockLemonade = { warmup } as unknown as LemonadeAdapter; + const service = new ModelManagerService({ lemonade: mockLemonade, modelRouter: makeRouter() }); await expect(service.ensureModelLoaded("small")).rejects.toThrow("network error"); }); it("clears the in-flight entry even when warmup fails", async () => { let callCount = 0; - const warmup = vi.fn((_model: string, _keepAlive: string | number): Promise => { + const warmup = vi.fn((_model: string): Promise => { callCount++; if (callCount === 1) return Promise.reject(new Error("transient error")); return Promise.resolve(); }); - const mockOllama = { warmup } as unknown as OllamaAdapter; - const service = new ModelManagerService({ ollama: mockOllama, modelRouter: makeRouter() }); + const mockLemonade = { warmup } as unknown as LemonadeAdapter; + const service = new ModelManagerService({ lemonade: mockLemonade, modelRouter: makeRouter() }); await expect(service.ensureModelLoaded("small")).rejects.toThrow("transient error"); // After initial failure, a retry should succeed. @@ -161,12 +161,12 @@ describe("ensureModelLoaded – error propagation", () => { describe("prewarmModels", () => { it("warms up small before medium, in order", async () => { const callOrder: string[] = []; - const warmup = vi.fn((model: string, _keepAlive: string | number): Promise => { + const warmup = vi.fn((model: string): Promise => { callOrder.push(model); return Promise.resolve(); }); - const mockOllama = { warmup } as unknown as OllamaAdapter; - const service = new ModelManagerService({ ollama: mockOllama, modelRouter: makeRouter() }); + const mockLemonade = { warmup } as unknown as LemonadeAdapter; + const service = new ModelManagerService({ lemonade: mockLemonade, modelRouter: makeRouter() }); await service.prewarmModels(); expect(callOrder).toEqual(["llama3:8b", "mistral"]); diff --git a/src/services/__tests__/time-parser.test.ts b/src/services/__tests__/time-parser.test.ts index 58e1bae..fbeb2f4 100644 --- a/src/services/__tests__/time-parser.test.ts +++ b/src/services/__tests__/time-parser.test.ts @@ -5,19 +5,19 @@ import { describe, expect, it, vi, beforeEach } from "vitest"; import { TimeParserService } from "../time-parser.js"; -import type { OllamaAdapter } from "../ollama-adapter.js"; +import type { LemonadeAdapter } from "../lemonade-adapter.js"; import type { ModelRouter } from "../model-router.js"; -import type { ChatResult } from "../ollama-adapter.js"; +import type { ChatResult } from "../lemonade-adapter.js"; describe("TimeParserService", () => { - let mockOllama: OllamaAdapter; + let mockLemonade: LemonadeAdapter; let mockModelRouter: ModelRouter; beforeEach(() => { - // Mock OllamaAdapter - mockOllama = { + // Mock LemonadeAdapter + mockLemonade = { chat: vi.fn(), - } as unknown as OllamaAdapter; + } as unknown as LemonadeAdapter; // Mock ModelRouter mockModelRouter = { @@ -38,15 +38,15 @@ describe("TimeParserService", () => { }, done: true, }; - vi.mocked(mockOllama.chat).mockResolvedValue(mockResponse); + vi.mocked(mockLemonade.chat).mockResolvedValue(mockResponse); - const parser = new TimeParserService({ ollama: mockOllama, modelRouter: mockModelRouter }); + const parser = new TimeParserService({ lemonade: mockLemonade, modelRouter: mockModelRouter }); const result = await parser.parseTimeExpression("in 5 minutes"); expect(result.cronExpr).toBe("35 14 17 2 *"); expect(result.isRecurring).toBe(false); expect(result.description).toContain("5 minutes"); - expect(mockOllama.chat).toHaveBeenCalled(); + expect(mockLemonade.chat).toHaveBeenCalled(); }); it("parses relative time 'in 2 hours' correctly", async () => { @@ -61,9 +61,9 @@ describe("TimeParserService", () => { }, done: true, }; - vi.mocked(mockOllama.chat).mockResolvedValue(mockResponse); + vi.mocked(mockLemonade.chat).mockResolvedValue(mockResponse); - const parser = new TimeParserService({ ollama: mockOllama, modelRouter: mockModelRouter }); + const parser = new TimeParserService({ lemonade: mockLemonade, modelRouter: mockModelRouter }); const result = await parser.parseTimeExpression("in 2 hours"); expect(result.cronExpr).toBe("30 16 17 2 *"); @@ -83,9 +83,9 @@ describe("TimeParserService", () => { }, done: true, }; - vi.mocked(mockOllama.chat).mockResolvedValue(mockResponse); + vi.mocked(mockLemonade.chat).mockResolvedValue(mockResponse); - const parser = new TimeParserService({ ollama: mockOllama, modelRouter: mockModelRouter }); + const parser = new TimeParserService({ lemonade: mockLemonade, modelRouter: mockModelRouter }); const result = await parser.parseTimeExpression("in 3 days"); expect(result.cronExpr).toBe("0 9 20 2 *"); @@ -105,9 +105,9 @@ describe("TimeParserService", () => { }, done: true, }; - vi.mocked(mockOllama.chat).mockResolvedValue(mockResponse); + vi.mocked(mockLemonade.chat).mockResolvedValue(mockResponse); - const parser = new TimeParserService({ ollama: mockOllama, modelRouter: mockModelRouter }); + const parser = new TimeParserService({ lemonade: mockLemonade, modelRouter: mockModelRouter }); const result = await parser.parseTimeExpression("tomorrow at 3pm"); expect(result.cronExpr).toBe("0 15 18 2 *"); @@ -128,9 +128,9 @@ describe("TimeParserService", () => { }, done: true, }; - vi.mocked(mockOllama.chat).mockResolvedValue(mockResponse); + vi.mocked(mockLemonade.chat).mockResolvedValue(mockResponse); - const parser = new TimeParserService({ ollama: mockOllama, modelRouter: mockModelRouter }); + const parser = new TimeParserService({ lemonade: mockLemonade, modelRouter: mockModelRouter }); const result = await parser.parseTimeExpression("next Monday at 9am"); expect(result.cronExpr).toBe("0 9 24 2 *"); @@ -151,9 +151,9 @@ describe("TimeParserService", () => { }, done: true, }; - vi.mocked(mockOllama.chat).mockResolvedValue(mockResponse); + vi.mocked(mockLemonade.chat).mockResolvedValue(mockResponse); - const parser = new TimeParserService({ ollama: mockOllama, modelRouter: mockModelRouter }); + const parser = new TimeParserService({ lemonade: mockLemonade, modelRouter: mockModelRouter }); const result = await parser.parseTimeExpression("every day at 9am"); expect(result.cronExpr).toBe("0 9 * * *"); @@ -174,9 +174,9 @@ describe("TimeParserService", () => { }, done: true, }; - vi.mocked(mockOllama.chat).mockResolvedValue(mockResponse); + vi.mocked(mockLemonade.chat).mockResolvedValue(mockResponse); - const parser = new TimeParserService({ ollama: mockOllama, modelRouter: mockModelRouter }); + const parser = new TimeParserService({ lemonade: mockLemonade, modelRouter: mockModelRouter }); const result = await parser.parseTimeExpression("every Monday at 10am"); expect(result.cronExpr).toBe("0 10 * * 1"); @@ -197,9 +197,9 @@ describe("TimeParserService", () => { }, done: true, }; - vi.mocked(mockOllama.chat).mockResolvedValue(mockResponse); + vi.mocked(mockLemonade.chat).mockResolvedValue(mockResponse); - const parser = new TimeParserService({ ollama: mockOllama, modelRouter: mockModelRouter }); + const parser = new TimeParserService({ lemonade: mockLemonade, modelRouter: mockModelRouter }); const result = await parser.parseTimeExpression("every week"); expect(result.cronExpr).toBe("0 0 * * 0"); @@ -221,9 +221,9 @@ describe("TimeParserService", () => { }, done: true, }; - vi.mocked(mockOllama.chat).mockResolvedValue(mockResponse); + vi.mocked(mockLemonade.chat).mockResolvedValue(mockResponse); - const parser = new TimeParserService({ ollama: mockOllama, modelRouter: mockModelRouter }); + const parser = new TimeParserService({ lemonade: mockLemonade, modelRouter: mockModelRouter }); const result = await parser.parseTimeExpression("every day at 9am"); expect(result.cronExpr).toBe("0 9 * * *"); @@ -242,15 +242,15 @@ describe("TimeParserService", () => { }, done: true, }; - vi.mocked(mockOllama.chat).mockResolvedValue(mockResponse); + vi.mocked(mockLemonade.chat).mockResolvedValue(mockResponse); - const parser = new TimeParserService({ ollama: mockOllama, modelRouter: mockModelRouter }); + const parser = new TimeParserService({ lemonade: mockLemonade, modelRouter: mockModelRouter }); await expect(parser.parseTimeExpression("invalid")).rejects.toThrow("Generated cron expression is invalid"); }); it("throws error for empty string input", async () => { - const parser = new TimeParserService({ ollama: mockOllama, modelRouter: mockModelRouter }); + const parser = new TimeParserService({ lemonade: mockLemonade, modelRouter: mockModelRouter }); await expect(parser.parseTimeExpression("")).rejects.toThrow("Invalid input"); await expect(parser.parseTimeExpression(" ")).rejects.toThrow("Invalid input"); @@ -264,9 +264,9 @@ describe("TimeParserService", () => { }, done: true, }; - vi.mocked(mockOllama.chat).mockResolvedValue(mockResponse); + vi.mocked(mockLemonade.chat).mockResolvedValue(mockResponse); - const parser = new TimeParserService({ ollama: mockOllama, modelRouter: mockModelRouter }); + const parser = new TimeParserService({ lemonade: mockLemonade, modelRouter: mockModelRouter }); await expect(parser.parseTimeExpression("test")).rejects.toThrow("Failed to parse LLM response"); }); @@ -282,15 +282,15 @@ describe("TimeParserService", () => { }, done: true, }; - vi.mocked(mockOllama.chat).mockResolvedValue(mockResponse); + vi.mocked(mockLemonade.chat).mockResolvedValue(mockResponse); - const parser = new TimeParserService({ ollama: mockOllama, modelRouter: mockModelRouter }); + const parser = new TimeParserService({ lemonade: mockLemonade, modelRouter: mockModelRouter }); await expect(parser.parseTimeExpression("test")).rejects.toThrow("missing or invalid"); }); it("throws error for non-string input", async () => { - const parser = new TimeParserService({ ollama: mockOllama, modelRouter: mockModelRouter }); + const parser = new TimeParserService({ lemonade: mockLemonade, modelRouter: mockModelRouter }); // @ts-expect-error Testing invalid input type await expect(parser.parseTimeExpression(null)).rejects.toThrow("Invalid input"); @@ -299,9 +299,9 @@ describe("TimeParserService", () => { }); it("handles LLM network errors", async () => { - vi.mocked(mockOllama.chat).mockRejectedValue(new Error("Network error")); + vi.mocked(mockLemonade.chat).mockRejectedValue(new Error("Network error")); - const parser = new TimeParserService({ ollama: mockOllama, modelRouter: mockModelRouter }); + const parser = new TimeParserService({ lemonade: mockLemonade, modelRouter: mockModelRouter }); await expect(parser.parseTimeExpression("test")).rejects.toThrow("Network error"); }); @@ -314,9 +314,9 @@ describe("TimeParserService", () => { }, done: true, }; - vi.mocked(mockOllama.chat).mockResolvedValue(mockResponse); + vi.mocked(mockLemonade.chat).mockResolvedValue(mockResponse); - const parser = new TimeParserService({ ollama: mockOllama, modelRouter: mockModelRouter }); + const parser = new TimeParserService({ lemonade: mockLemonade, modelRouter: mockModelRouter }); await expect(parser.parseTimeExpression("test")).rejects.toThrow("empty response"); }); diff --git a/src/services/file-processor.ts b/src/services/file-processor.ts index c9afa42..8bca1f0 100644 --- a/src/services/file-processor.ts +++ b/src/services/file-processor.ts @@ -19,7 +19,7 @@ import { BaseProcess } from "../shared/base-process.js"; import type { Envelope } from "../shared/protocol.js"; import { PROTOCOL_VERSION } from "../shared/protocol.js"; import { getConfig } from "../shared/config.js"; -import { OllamaAdapter } from "../services/ollama-adapter.js"; +import { LemonadeAdapter } from "../services/lemonade-adapter.js"; import { convertToWav } from "../utils/audio-converter.js"; import { transcribeAudio } from "../utils/whisper-transcriber.js"; import type { @@ -38,11 +38,11 @@ const OCR_PROMPT = "If no text is found, describe the visual content in detail."; class FileProcessorService extends BaseProcess { - private readonly ollama: OllamaAdapter; + private readonly lemonade: LemonadeAdapter; constructor() { super({ processName: PROCESS_NAME }); - this.ollama = new OllamaAdapter(); + this.lemonade = new LemonadeAdapter(); } protected override handleEnvelope(envelope: Envelope): void { @@ -186,10 +186,6 @@ class FileProcessorService extends BaseProcess { }; } - // ------------------------------------------------------------------------- - // Image handler — OCR / description via Ollama vision model - // ------------------------------------------------------------------------- - private async processImage(req: FileProcessRequest): Promise { const { ocrModel, ocrEnabled } = getConfig().fileProcessor; @@ -203,8 +199,12 @@ class FileProcessorService extends BaseProcess { }; } - const result = await this.ollama.generateWithImage(OCR_PROMPT, ocrModel, req.localPath); - const content = result.text?.trim() ?? ""; + const result = await this.lemonade.chatWithImage( + [{ role: "user", content: OCR_PROMPT }], + ocrModel, + req.localPath + ); + const content = result.message.content?.trim() ?? ""; // Log basic info for debugging without leaking full sensitive content process.stderr.write(`[file-processor] [DEBUG] OCR Result for ${req.fileName} (len: ${content.length}, words: ${content.split(/\s+/).length}): ${content.substring(0, 150).replace(/\n/g, " ")}...\n`); @@ -216,8 +216,7 @@ class FileProcessorService extends BaseProcess { content, metadata: { model: ocrModel, - promptEvalCount: result.prompt_eval_count, - evalCount: result.eval_count, + usage: result.usage, }, }; } diff --git a/src/services/generator-service.ts b/src/services/generator-service.ts index 3721eba..b7d5dc2 100644 --- a/src/services/generator-service.ts +++ b/src/services/generator-service.ts @@ -9,10 +9,9 @@ import { BaseProcess } from "../shared/base-process.js"; import type { Envelope } from "../shared/protocol.js"; import { PROTOCOL_VERSION } from "../shared/protocol.js"; import { responsePayloadSchema } from "../shared/protocol.js"; -import { getConfig } from "../shared/config.js"; import { buildSummarizerPrompt, SUMMARIZER_SYSTEM_PROMPT } from "../agents/prompts/summarizer.js"; import { ANALYZER_SYSTEM_PROMPT, buildAnalyzerUserPrompt } from "../agents/prompts/analyzer.js"; -import { OllamaAdapter, type ChatMessage } from "./ollama-adapter.js"; +import { LemonadeAdapter, type ChatMessage } from "./lemonade-adapter.js"; import { ModelRouter } from "./model-router.js"; import { ModelManagerService, type ModelTier } from "./model-manager.js"; @@ -29,13 +28,13 @@ interface NodeExecutePayload { } export class GeneratorService extends BaseProcess { - private readonly ollama: OllamaAdapter; + private readonly lemonade: LemonadeAdapter; private readonly modelRouter: ModelRouter; private readonly modelManager: ModelManagerService | null; - constructor(options?: { ollama?: OllamaAdapter; modelRouter?: ModelRouter; modelManager?: ModelManagerService }) { + constructor(options?: { lemonade?: LemonadeAdapter; modelRouter?: ModelRouter; modelManager?: ModelManagerService }) { super({ processName: PROCESS_NAME }); - this.ollama = options?.ollama ?? new OllamaAdapter(); + this.lemonade = options?.lemonade ?? new LemonadeAdapter(); this.modelRouter = options?.modelRouter ?? new ModelRouter(); this.modelManager = options?.modelManager ?? null; } @@ -169,23 +168,22 @@ export class GeneratorService extends BaseProcess { }); prompt = depOutputs.join("\n\n") || "Generate a brief response."; } - messages = (p.input?.messages as ChatMessage[]) ?? - (systemPrompt + if (!messages) { + messages = systemPrompt ? [{ role: "system" as const, content: systemPrompt }, { role: "user" as const, content: prompt }] - : undefined); + : [{ role: "user" as const, content: prompt }]; + } - const ollamaOptions = { num_ctx: getConfig().ollama.numCtx }; - const genResult = messages - ? await this.ollama.chat(messages, model, { tools: p.input?.tools as any[], options: ollamaOptions }) - : await this.ollama.generate(prompt, model, { options: ollamaOptions }); + const genResult = await this.lemonade.chat(messages, model, { + tools: p.input?.tools as any[], + }); - const text = "message" in genResult ? genResult.message.content : genResult.text; - const tool_calls = "message" in genResult ? genResult.message.tool_calls : undefined; + const text = genResult.message.content; + const tool_calls = genResult.message.tool_calls; this.sendResponse(envelope, { text, tool_calls, - prompt_eval_count: genResult.prompt_eval_count, - eval_count: genResult.eval_count + usage: genResult.usage }); } catch (err) { const message = err instanceof Error ? err.message : String(err); diff --git a/src/services/lemonade-adapter.ts b/src/services/lemonade-adapter.ts new file mode 100644 index 0000000..be366d9 --- /dev/null +++ b/src/services/lemonade-adapter.ts @@ -0,0 +1,294 @@ +/** + * Lemonade adapter: bridge to Lemonade-Server (OpenAI-compatible local AI). + * Supports chat completions, vision, embeddings, and audio transcription. + */ + +import { readFile } from "node:fs/promises"; +import { getConfig } from "../shared/config.js"; + +export interface ChatMessagePart { + type: "text" | "image_url"; + text?: string; + image_url?: { + url: string; + }; +} + +export interface ChatMessage { + role: "system" | "user" | "assistant" | "tool"; + content: string | ChatMessagePart[]; + tool_calls?: ToolCall[]; + tool_call_id?: string; + name?: string; +} + +export interface ToolCall { + id: string; + type: string; + function: { + name: string; + arguments: string; // OpenAI uses stringified JSON for arguments + }; +} + +export interface ChatOptions { + timeoutMs?: number; + tools?: any[]; + tool_choice?: string | object; + temperature?: number; + max_tokens?: number; + response_format?: { type: "json_object" | "text" }; +} + +export interface ChatResult { + message: { + role: string; + content: string; + tool_calls?: ToolCall[]; + }; + usage?: { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + }; + done: boolean; +} + +export interface EmbedResult { + embedding: number[]; +} + +export interface TranscriptionResult { + text: string; +} + +export interface LemonadeAdapterOptions { + baseUrl?: string; + timeoutMs?: number; + retries?: number; +} + +export class LemonadeAdapter { + private readonly baseUrl: string; + private readonly timeoutMs: number; + private readonly retries: number; + + constructor(options: LemonadeAdapterOptions = {}) { + const c = getConfig().lemonade; + this.baseUrl = options.baseUrl ?? c.baseUrl; + this.timeoutMs = options.timeoutMs ?? c.timeoutMs; + this.retries = options.retries ?? c.retries; + } + + /** + * Chat completion (OpenAI compatible). + */ + async chat( + messages: ChatMessage[], + model: string, + opts: ChatOptions = {}, + ): Promise { + const timeoutMs = opts.timeoutMs ?? this.timeoutMs; + const url = `${this.baseUrl}/chat/completions`; + + const body: Record = { + model, + messages, + stream: false, + temperature: opts.temperature ?? 0.7, + max_tokens: opts.max_tokens, + tools: opts.tools, + tool_choice: opts.tool_choice, + response_format: opts.response_format, + }; + + const res = await this.fetchWithRetry(url, body, timeoutMs); + const data = (await res.json()) as any; + + const choice = data.choices?.[0]; + if (!choice) { + throw new Error(`Lemonade chat error: No choice returned. Data: ${JSON.stringify(data)}`); + } + + // Convert OpenAI tool calls format if necessary (Lemonade usually follows OpenAI strictly) + const tool_calls = choice.message.tool_calls?.map((tc: any) => ({ + id: tc.id, + type: tc.type, + function: { + name: tc.function.name, + arguments: tc.function.arguments // keep as string for compatibility or parse if needed + } + })); + + return { + message: { + role: choice.message.role, + content: choice.message.content ?? "", + tool_calls: tool_calls, + }, + usage: data.usage, + done: true, + }; + } + + /** + * Chat with an image attachment (OpenAI Vision compatible). + */ + async chatWithImage( + messages: ChatMessage[], + model: string, + imagePath: string, + opts: ChatOptions = {}, + ): Promise { + const imageBytes = await readFile(imagePath); + const base64Image = imageBytes.toString("base64"); + const mimeType = this.getMimeType(imagePath); + const dataUri = `data:${mimeType};base64,${base64Image}`; + + // Clone messages and inject image into the last user message + const updatedMessages: ChatMessage[] = JSON.parse(JSON.stringify(messages)); + const lastUserMsg = [...updatedMessages].reverse().find(m => m.role === "user"); + + const imagePart: ChatMessagePart = { + type: "image_url", + image_url: { url: dataUri }, + }; + + if (lastUserMsg) { + if (typeof lastUserMsg.content === "string") { + lastUserMsg.content = [ + { type: "text", text: lastUserMsg.content }, + imagePart, + ]; + } else { + lastUserMsg.content.push(imagePart); + } + } else { + updatedMessages.push({ + role: "user", + content: [imagePart], + }); + } + + return this.chat(updatedMessages, model, opts); + } + + /** + * Embed text (OpenAI compatible). + */ + async embed(input: string, model: string, opts: { timeoutMs?: number } = {}): Promise { + const timeoutMs = opts.timeoutMs ?? this.timeoutMs; + const url = `${this.baseUrl}/embeddings`; + const body = { model, input }; + const res = await this.fetchWithRetry(url, body, timeoutMs); + const data = (await res.json()) as any; + + const embedding = data.data?.[0]?.embedding; + if (!embedding) { + throw new Error("Lemonade embed error: No embedding returned"); + } + + return { embedding }; + } + + /** + * Transcribe audio using Whisper model on Lemonade-Server. + */ + async transcribe( + audioPath: string, + model: string = "Whisper-Tiny", + language: string = "auto", + ): Promise { + const url = `${this.baseUrl}/audio/transcriptions`; + const formData = new FormData(); + + const audioBlob = new Blob([await readFile(audioPath)]); + formData.append("file", audioBlob, "audio.wav"); + formData.append("model", model); + if (language !== "auto") { + formData.append("language", language); + } + + const res = await fetch(url, { + method: "POST", + body: formData, + // FormData sets content-type automatically with boundary + }); + + if (!res.ok) { + const text = await res.text(); + throw new Error(`Lemonade transcription error ${res.status}: ${text}`); + } + + const data = await res.json() as TranscriptionResult; + return data; + } + + /** + * Warmup model (loads it into memory). + * Lemonade might not have a specific warmup endpoint identical to Ollama, + * but sending a small message often works. + */ + async warmup(model: string): Promise { + try { + await this.chat([{ role: "user", content: "hi" }], model, { max_tokens: 1, timeoutMs: 30000 }); + } catch (err) { + console.warn(`Lemonade warmup for ${model} failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`); + } + } + + private async fetchWithRetry( + url: string, + body: unknown, + timeoutMs: number, + ): Promise { + let lastError: unknown; + for (let attempt = 0; attempt <= this.retries; attempt++) { + try { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), timeoutMs); + const res = await fetch(url, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + signal: controller.signal, + }); + clearTimeout(timeout); + if (!res.ok) { + const text = await res.text(); + throw new Error(`Lemonade ${res.status}: ${text}`); + } + return res; + } catch (err) { + lastError = err; + const isRetryable = + err instanceof Error && + (err.name === "AbortError" || + err.message.includes("fetch") || + err.message.includes("ECONNREFUSED") || + err.message.includes("network") || + err.message.includes("reset") || + err.message.includes("hangup")); + + if (attempt === this.retries || !isRetryable) { + throw err; + } + console.warn(`[lemonade-adapter] Fetch failed: ${err instanceof Error ? err.message : String(err)}. Retrying in ${(attempt + 1)}s...`); + await new Promise((resolve) => setTimeout(resolve, (attempt + 1) * 1000)); + } + } + throw lastError; + } + + private getMimeType(filePath: string): string { + const ext = filePath.split(".").pop()?.toLowerCase(); + switch (ext) { + case "png": return "image/png"; + case "jpg": + case "jpeg": return "image/jpeg"; + case "webp": return "image/webp"; + case "gif": return "image/gif"; + default: return "image/jpeg"; + } + } +} diff --git a/src/services/model-manager.ts b/src/services/model-manager.ts index 0f5b2c3..bf06b5e 100644 --- a/src/services/model-manager.ts +++ b/src/services/model-manager.ts @@ -1,23 +1,23 @@ /** - * ModelManagerService: manages tiered Ollama model lifecycles. - * Ensures models are loaded on demand with appropriate keep-alive settings, + * ModelManagerService: manages tiered Lemonade model lifecycles. + * Ensures models are loaded on demand, * with concurrency safety (deduplication of concurrent load calls). */ -import { getConfig } from "../shared/config.js"; -import { OllamaAdapter } from "./ollama-adapter.js"; + +import { LemonadeAdapter } from "./lemonade-adapter.js"; import { ModelRouter } from "./model-router.js"; /** Model tier corresponds to the three tiers in ModelRouter. */ export type ModelTier = "small" | "medium" | "large"; export interface ModelManagerServiceOptions { - ollama: OllamaAdapter; + lemonade: LemonadeAdapter; modelRouter: ModelRouter; } export class ModelManagerService { - private readonly ollama: OllamaAdapter; + private readonly lemonade: LemonadeAdapter; private readonly modelRouter: ModelRouter; /** @@ -27,7 +27,7 @@ export class ModelManagerService { private readonly inflight = new Map>(); constructor(opts: ModelManagerServiceOptions) { - this.ollama = opts.ollama; + this.lemonade = opts.lemonade; this.modelRouter = opts.modelRouter; } @@ -37,7 +37,6 @@ export class ModelManagerService { */ async ensureModelLoaded(tier: ModelTier): Promise { const model = this.modelRouter.getModel(tier); - const keepAlive = this.resolveKeepAlive(tier); // Reuse an in-flight warmup if one is already running for this model. const existing = this.inflight.get(model); @@ -45,8 +44,8 @@ export class ModelManagerService { return existing; } - const promise = this.ollama - .warmup(model, keepAlive) + const promise = this.lemonade + .warmup(model) .finally(() => { this.inflight.delete(model); }); @@ -63,21 +62,4 @@ export class ModelManagerService { await this.ensureModelLoaded("small"); await this.ensureModelLoaded("medium"); } - - /** - * Map a tier to its configured keep-alive value. - * Small/Medium use infinite keep-alive (-1) per spec; - * large uses the configured largeModelKeepAlive duration. - */ - private resolveKeepAlive(tier: ModelTier): string | number { - const cfg = getConfig().modelManager; - switch (tier) { - case "small": - return cfg.smallModelKeepAlive; - case "medium": - return cfg.mediumModelKeepAlive; - case "large": - return cfg.largeModelKeepAlive; - } - } } diff --git a/src/services/ollama-adapter.ts b/src/services/ollama-adapter.ts index c1e6e04..7702662 100644 --- a/src/services/ollama-adapter.ts +++ b/src/services/ollama-adapter.ts @@ -79,7 +79,7 @@ export class OllamaAdapter { private readonly numCtx: number; constructor(options: OllamaAdapterOptions = {}) { - const c = getConfig().ollama; + const c = getConfig().lemonade; this.baseUrl = options.baseUrl ?? c.baseUrl; this.timeoutMs = options.timeoutMs ?? c.timeoutMs; this.retries = options.retries ?? c.retries; diff --git a/src/services/rag-service.ts b/src/services/rag-service.ts index 29d6992..6513719 100644 --- a/src/services/rag-service.ts +++ b/src/services/rag-service.ts @@ -14,7 +14,7 @@ import type { Envelope } from "../shared/protocol.js"; import { PROTOCOL_VERSION } from "../shared/protocol.js"; import { responsePayloadSchema } from "../shared/protocol.js"; import { getConfig } from "../shared/config.js"; -import { OllamaAdapter } from "./ollama-adapter.js"; +import { LemonadeAdapter } from "./lemonade-adapter.js"; import { ConsoleLogger } from "../utils/console-logger.js"; const PROCESS_NAME = "rag-service"; @@ -177,13 +177,13 @@ interface MemoryInsertPayload { } export class RAGService extends BaseProcess { - private readonly ollama: OllamaAdapter; + private readonly lemonade: LemonadeAdapter; private readonly embedModel: string; private readonly store: RAGStore; - constructor(options?: { ollama?: OllamaAdapter; embedModel?: string; dbPath?: string; embeddingDimensions?: number }) { + constructor(options?: { lemonade?: LemonadeAdapter; embedModel?: string; dbPath?: string; embeddingDimensions?: number }) { super({ processName: PROCESS_NAME }); - this.ollama = options?.ollama ?? new OllamaAdapter(); + this.lemonade = options?.lemonade ?? new LemonadeAdapter(); this.embedModel = options?.embedModel ?? getConfig().rag.embedModel; const dbPath = options?.dbPath ?? getConfig().rag.dbPath; const embeddingDimensions = options?.embeddingDimensions ?? getConfig().rag.embeddingDimensions; @@ -193,7 +193,7 @@ export class RAGService extends BaseProcess { /** Embed and store a document */ async addDocument(content: string, metadata: Record = {}): Promise { ConsoleLogger.debug(PROCESS_NAME, `Embedding document: ${content.substring(0, 50)}...`); - const { embedding } = await this.ollama.embed(content, this.embedModel, { timeoutMs: 60000 }); + const { embedding } = await this.lemonade.embed(content, this.embedModel, { timeoutMs: 60000 }); const id = randomUUID(); this.store.insert(id, content, metadata, embedding); ConsoleLogger.info(PROCESS_NAME, `Stored document ${id.substring(0, 8)}`); @@ -203,7 +203,7 @@ export class RAGService extends BaseProcess { /** Return relevant snippets by semantic similarity (cosine via dot product for L2-normalized vectors) */ async search(query: string, limit = 5, filters?: { conversationId?: string | undefined }): Promise; score: number }>> { ConsoleLogger.debug(PROCESS_NAME, `Searching RAG: "${query}"${filters?.conversationId ? ` (filter: ${filters.conversationId})` : ""}`); - const { embedding: queryEmbed } = await this.ollama.embed(query, this.embedModel, { timeoutMs: 30000 }); + const { embedding: queryEmbed } = await this.lemonade.embed(query, this.embedModel, { timeoutMs: 30000 }); const results = this.store.search(queryEmbed, limit, filters); ConsoleLogger.info(PROCESS_NAME, `Found ${results.length} results for query`); return results; diff --git a/src/services/time-parser.ts b/src/services/time-parser.ts index d184673..3419f45 100644 --- a/src/services/time-parser.ts +++ b/src/services/time-parser.ts @@ -4,7 +4,7 @@ */ import cron from "node-cron"; -import { OllamaAdapter } from "./ollama-adapter.js"; +import { LemonadeAdapter } from "./lemonade-adapter.js"; import { ModelRouter } from "./model-router.js"; const SYSTEM_PROMPT = `You are a time expression parser. Your task is to convert natural language time expressions into cron expressions. @@ -52,11 +52,11 @@ export interface ParseTimeExpressionResult { } export class TimeParserService { - private readonly ollama: OllamaAdapter; + private readonly lemonade: LemonadeAdapter; private readonly modelRouter: ModelRouter; - constructor(options?: { ollama?: OllamaAdapter; modelRouter?: ModelRouter }) { - this.ollama = options?.ollama ?? new OllamaAdapter(); + constructor(options?: { lemonade?: LemonadeAdapter; modelRouter?: ModelRouter }) { + this.lemonade = options?.lemonade ?? new LemonadeAdapter(); this.modelRouter = options?.modelRouter ?? new ModelRouter(); } @@ -76,7 +76,7 @@ export class TimeParserService { try { // Use "small" model for this task as it's relatively straightforward const model = this.modelRouter.getModel("small"); - const result = await this.ollama.chat( + const result = await this.lemonade.chat( [ { role: "system", content: SYSTEM_PROMPT }, { role: "user", content: prompt }, diff --git a/src/shared/config.ts b/src/shared/config.ts index 4b29476..460d7a5 100644 --- a/src/shared/config.ts +++ b/src/shared/config.ts @@ -6,7 +6,7 @@ import { readFileSync, existsSync } from "node:fs"; import { join } from "node:path"; -export interface OllamaConfig { +export interface LemonadeConfig { baseUrl: string; timeoutMs: number; retries: number; @@ -76,7 +76,7 @@ export interface BrowserServiceConfig { export interface ModelManagerConfig { /** * How long to keep a small model in memory after the last request. - * Accepts an Ollama duration string (e.g. "5m") or a number of seconds. + * Accepts an Ollama/Lemonade duration string (e.g. "5m") or a number of seconds. */ smallModelKeepAlive: string | number; /** @@ -99,11 +99,11 @@ export interface SkillsConfig { } export interface WhisperConfig { - /** Whisper model to use for transcription (e.g. "base.en", "small", "medium"). */ + /** Whisper model to use for transcription (e.g. "Whisper-Tiny", "Whisper-Base"). */ modelName: string; /** Language code for transcription ("auto" for auto-detect). */ language: string; - /** Directory where Whisper model files are stored (downloaded on first use). */ + /** Directory where Whisper model files are stored (unused when using Lemonade API). */ modelDir: string; } @@ -114,14 +114,14 @@ export interface FileProcessorConfig { maxFileSizeBytes: number; /** Files with text content shorter than this are inlined into the planner goal directly. */ textMaxInlineChars: number; - /** Ollama model used for image OCR and description. */ + /** Lemonade model used for image OCR and description. */ ocrModel: string; /** Whether image OCR/description is enabled. */ ocrEnabled: boolean; } export interface AppConfig { - ollama: OllamaConfig; + lemonade: LemonadeConfig; telegram: TelegramConfig; taskMemory: TaskMemoryConfig; logger: LoggerConfig; @@ -139,8 +139,8 @@ export interface AppConfig { } const DEFAULT_CONFIG: AppConfig = { - ollama: { - baseUrl: "http://127.0.0.1:11434", + lemonade: { + baseUrl: "http://127.0.0.1:8000/api/v1", timeoutMs: 600_000, // 10 minutes default retries: 3, numCtx: 16384, @@ -157,7 +157,7 @@ const DEFAULT_CONFIG: AppConfig = { logFile: "events.log", }, rag: { - embedModel: "nomic-embed-text", + embedModel: "text-embedding-v3", // Common OpenAI/Lemonade embed model name dbPath: "data/rag.sqlite", embeddingDimensions: 768, }, @@ -169,9 +169,9 @@ const DEFAULT_CONFIG: AppConfig = { dbPath: "data/cron.sqlite", }, modelRouter: { - small: "qwen3:0.6b", - medium: "qwen3:1.7b", - large: "qwen3:4b", + small: "qwen2.5:0.5b", + medium: "qwen2.5:1.5b", + large: "qwen2.5:7b", plannerComplexity: "small", }, executor: { @@ -193,7 +193,7 @@ const DEFAULT_CONFIG: AppConfig = { skillsDir: "skills", }, whisper: { - modelName: "base", + modelName: "Whisper-Tiny", language: "auto", modelDir: "data/whisper-models", }, @@ -201,7 +201,7 @@ const DEFAULT_CONFIG: AppConfig = { uploadDir: "data/uploads", maxFileSizeBytes: 52_428_800, // 50 MB textMaxInlineChars: 8_000, - ocrModel: "glm-ocr:q8_0", + ocrModel: "qwen3-vl", ocrEnabled: true, }, maxConcurrentTasks: 1, @@ -222,11 +222,11 @@ function loadConfigFile(): Partial { function mergeEnv(config: AppConfig): AppConfig { return { - ollama: { - baseUrl: process.env.OLLAMA_BASE_URL ?? config.ollama.baseUrl, - timeoutMs: Number(process.env.OLLAMA_TIMEOUT_MS) || config.ollama.timeoutMs, - retries: Number(process.env.OLLAMA_RETRIES) || config.ollama.retries, - numCtx: Number(process.env.OLLAMA_NUM_CTX) || config.ollama.numCtx, + lemonade: { + baseUrl: process.env.LEMONADE_BASE_URL ?? config.lemonade.baseUrl, + timeoutMs: Number(process.env.LEMONADE_TIMEOUT_MS) || config.lemonade.timeoutMs, + retries: Number(process.env.LEMONADE_RETRIES) || config.lemonade.retries, + numCtx: Number(process.env.LEMONADE_NUM_CTX) || config.lemonade.numCtx, }, telegram: { botToken: process.env.TELEGRAM_BOT_TOKEN ?? config.telegram.botToken, diff --git a/src/tests/cron-ai.test.ts b/src/tests/cron-ai.test.ts index bb68e87..44f8d36 100644 --- a/src/tests/cron-ai.test.ts +++ b/src/tests/cron-ai.test.ts @@ -6,7 +6,7 @@ import { PROTOCOL_VERSION } from "../shared/protocol.js"; // Mock dependencies vi.mock("../shared/config.js", () => ({ getConfig: () => ({ - ollama: { baseUrl: "http://localhost:11434" }, + lemonade: { baseUrl: "http://localhost:11434" }, modelRouter: { plannerComplexity: "small" }, taskMemory: { dbPath: ":memory:" }, cron: { dbPath: ":memory:" }, diff --git a/src/utils/whisper-transcriber.ts b/src/utils/whisper-transcriber.ts index 6d8425c..2ca4b55 100644 --- a/src/utils/whisper-transcriber.ts +++ b/src/utils/whisper-transcriber.ts @@ -1,96 +1,28 @@ /** * Whisper Transcription Utility * - * Speech-to-text transcription using nodejs-whisper (OpenAI Whisper local inference). - * Accepts a pre-converted .wav file (16 kHz mono PCM — see audio-converter.ts) and - * returns the transcript as a plain string. - * - * On first use, the Whisper model (~200 MB for base.en) is automatically downloaded. - * A descriptive error guides the user to retry after the download completes. + * Speech-to-text transcription using Lemonade-Server (OpenAI-compatible). + * Returns the transcript as a plain string. */ -import { nodewhisper } from "nodejs-whisper"; -import type { WhisperOptions } from "nodejs-whisper/dist/types.js"; import { getConfig } from "../shared/config.js"; - -/** Maximum time to wait for transcription (5 minutes for long recordings). */ -const TRANSCRIPTION_TIMEOUT_MS = 5 * 60 * 1000; +import { LemonadeAdapter } from "../services/lemonade-adapter.js"; /** - * Transcribe a WAV audio file using the configured Whisper model. + * Transcribe a WAV audio file using the configured Whisper model via Lemonade. * * @param wavPath Absolute path to a 16 kHz mono PCM WAV file. - * The caller is responsible for deleting this file after use. * @returns Promise resolving to the trimmed transcript string. - * @throws Error with descriptive message on failure, including a - * user-friendly message when the model is still downloading. */ export async function transcribeAudio(wavPath: string): Promise { const cfg = getConfig().whisper; + const lemonade = new LemonadeAdapter(); - // Build WhisperOptions — only set language when it's not "auto" - // to satisfy exactOptionalPropertyTypes strictness - const whisperOptions: WhisperOptions = { outputInText: true }; - if (cfg.language !== "auto") { - whisperOptions.language = cfg.language; - } - - /** - * Race nodewhisper against a timeout. - * nodewhisper has no built-in timeout, so we enforce one here. - */ - const transcriptionPromise = nodewhisper(wavPath, { - modelName: cfg.modelName, - // autoDownloadModelName triggers an automatic download if the model is absent - autoDownloadModelName: cfg.modelName, - whisperOptions, - // Suppress verbose progress logs; re-surface genuine errors via stderr - logger: { - debug: () => { }, - log: () => { }, - error: (msg: unknown) => { - if (typeof msg === "string" && msg.toLowerCase().includes("error")) { - process.stderr.write(`[whisper] ${msg}\n`); - } - }, - }, - }); - - const timeoutPromise = new Promise((_, reject) => - setTimeout( - () => - reject( - new Error( - `whisper-transcriber: transcription timed out after ` + - `${TRANSCRIPTION_TIMEOUT_MS / 1000}s. ` + - "The audio may be too long or the model may still be downloading.", - ), - ), - TRANSCRIPTION_TIMEOUT_MS, - ), - ); - - let raw: string; try { - raw = await Promise.race([transcriptionPromise, timeoutPromise]); + const result = await lemonade.transcribe(wavPath, cfg.modelName, cfg.language); + return result.text.trim(); } catch (err) { const message = err instanceof Error ? err.message : String(err); - - // Detect the model-not-downloaded scenario - if ( - message.includes("No such file") || - message.includes("ENOENT") || - message.includes("model") || - message.includes("download") - ) { - throw new Error( - "whisper-transcriber: Audio transcription model is downloading for the " + - "first time (~200 MB). Please retry in a few minutes.", - ); - } - throw new Error(`whisper-transcriber: transcription failed — ${message}`); } - - return raw.trim(); }