diff --git a/_images/Gemini_Generated_Image_eypuexeypuexeypu.png b/_images/Gemini_Generated_Image_eypuexeypuexeypu.png new file mode 100644 index 0000000..ec6a88a Binary files /dev/null and b/_images/Gemini_Generated_Image_eypuexeypuexeypu.png differ diff --git a/_images/Gemini_Generated_Image_mrh4ogmrh4ogmrh4.png b/_images/Gemini_Generated_Image_mrh4ogmrh4ogmrh4.png new file mode 100644 index 0000000..0e12aab Binary files /dev/null and b/_images/Gemini_Generated_Image_mrh4ogmrh4ogmrh4.png differ diff --git a/_images/Gemini_Generated_Image_vu2lluvu2lluvu2l.png b/_images/Gemini_Generated_Image_vu2lluvu2lluvu2l.png new file mode 100644 index 0000000..d1a0a41 Binary files /dev/null and b/_images/Gemini_Generated_Image_vu2lluvu2lluvu2l.png differ diff --git a/_images/Gemini_Generated_Image_x6y0h0x6y0h0x6y0.png b/_images/Gemini_Generated_Image_x6y0h0x6y0h0x6y0.png new file mode 100644 index 0000000..10c21bf Binary files /dev/null and b/_images/Gemini_Generated_Image_x6y0h0x6y0h0x6y0.png differ diff --git a/_images/Local-LLM-vs-Cloud-API-24-Month-Cost-Comparison.png b/_images/Local-LLM-vs-Cloud-API-24-Month-Cost-Comparison.png new file mode 100644 index 0000000..8292ea3 Binary files /dev/null and b/_images/Local-LLM-vs-Cloud-API-24-Month-Cost-Comparison.png differ diff --git a/_images/SCR-20260305-jmid.png b/_images/SCR-20260305-jmid.png new file mode 100644 index 0000000..04d8586 Binary files /dev/null and b/_images/SCR-20260305-jmid.png differ diff --git a/_images/SCR-20260305-jmjk.png b/_images/SCR-20260305-jmjk.png new file mode 100644 index 0000000..22a9158 Binary files /dev/null and b/_images/SCR-20260305-jmjk.png differ diff --git a/_images/SCR-20260305-jmkx.png b/_images/SCR-20260305-jmkx.png new file mode 100644 index 0000000..4634fb9 Binary files /dev/null and b/_images/SCR-20260305-jmkx.png differ diff --git a/_images/SCR-20260305-jmme.png b/_images/SCR-20260305-jmme.png new file mode 100644 index 0000000..5bbfc58 Binary files /dev/null and b/_images/SCR-20260305-jmme.png differ diff --git a/_images/agents.webp b/_images/agents.webp new file mode 100644 index 0000000..9a932d4 Binary files /dev/null and b/_images/agents.webp differ diff --git a/_images/ai-prices.png b/_images/ai-prices.png new file mode 100644 index 0000000..cd9473b Binary files /dev/null and b/_images/ai-prices.png differ diff --git a/_images/dontlike.png b/_images/dontlike.png new file mode 100644 index 0000000..bd5b539 Binary files /dev/null and b/_images/dontlike.png differ diff --git a/_images/llm-inference.webp b/_images/llm-inference.webp new file mode 100644 index 0000000..56be948 Binary files /dev/null and b/_images/llm-inference.webp differ diff --git a/package-lock.json b/package-lock.json index 7b937c1..7cd23bb 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5792,4 +5792,4 @@ } } } -} \ No newline at end of file +} diff --git a/package.json b/package.json index 0ae40cc..c07fd47 100644 --- a/package.json +++ b/package.json @@ -47,4 +47,4 @@ "typescript": "^5.7.2", "vitest": "^4.0.18" } -} \ No newline at end of file +} diff --git a/presentation.md b/presentation.md new file mode 100644 index 0000000..bb4a484 --- /dev/null +++ b/presentation.md @@ -0,0 +1,302 @@ +--- +marp: true +theme: default +class: + - lead +--- + +# I built my own AI-agent. Why? +**A journey from reading about AI to building a custom agent framework** + +![](./_docs/images/header.png) + +--- +layout: image-left +image: ./_images/Gemini_Generated_Image_x6y0h0x6y0h0x6y0.png +--- + +## Mikhail Larchanka +- Principal Software Engineer at **Sytac** +- https://larchanka.com +- https://youtube.com/@larchanka +- https://github.com/larchanka +- https://x.com/mlarchanka + +--- + +## ๐ŸŒ The AI Catch-Up +- The AI landscape is evolving at breakneck speed every single day. +- New models, new frameworks (LangChain, AutoGen), new methodologies. +- It feels like the revolution is passing by. +- **The Challenge:** I do not work with AI in my daily job. Staying actively involved requires intentional effort beyond standard day-to-day tasks. + +![](./_images/agents.webp) + +--- +layout: image-right +image: ./_images/Gemini_Generated_Image_vu2lluvu2lluvu2l.png +--- + +## ๐Ÿ“š The Trap of "Reading vs. Doing" +- I read a lot of papers, articles, and documentation. +- **The Reality Check:** Reading builds awareness, but not genuine *knowledge* or intuition. +- Without hands-on practice, you don't discover the edge cases, the latency issues, or the prompt fragility. +- I spent time checking what others were building in the space, and understanding their pain points. + +--- +layout: image-right +image: ./_images/dontlike.png +backgroundSize: contain +--- +## ๐Ÿ’ก The Catalyst: My Own Ideas +- While observing existing solutions, I realized I had different ideas on how agents should operate. +- Existing frameworks often felt either too bloated, too confusing, or too rigid. +- I wanted to build something tailored to my intuition of how a system should reason and interact with an environment. + + +--- +layout: image-right +image: ./_images/ai-prices.png +backgroundSize: contain +--- +## ๐Ÿ’ธ Cost-Driven Architecture +- **The Goal:** Make learning and relentless experimentation "cheap." +- Relying on cloud APIs (GPT-4, Claude) for agentic loopsโ€”which run autonomously making dozens of calls and mistakesโ€”gets expensive quickly. +- **The Solution:** Local LLMs. +- Complete freedom to experiment, fail, retry, and loop infinitely without worrying about API bills. + +--- +layout: image-left +image: ./_images/llm-inference.webp +backgroundSize: contain +--- + +## ๐Ÿค– Evaluating Local LLMs +- Not all models are created equal for agentic tasks. +- **Benchmarking for my agent:** + - Need strong coding and reasoning capabilities. + - Need reliable JSON/tool-calling formatting. + - Need fast inference speed (tokens/sec) for autonomous, multi-step loops. +- Explored running models locally using tools like Ollama or Lemonade. +- Tested how models handle context degradation on local machine hardware. + +--- + +## ๐Ÿ”€ Dynamic Model Routing +- Running a massive model (like Mixtral) for every tiny task is slow and overkill. +- I built a **Model Router** that dynamically selects models based on task complexity. +- **The Flow:** + 1. **Planner:** Evaluates the user's intent and forces the plan into a `complexity` bucket (`"small"`, `"medium"`, or `"large"`). + 2. **Injection:** The Executor injects `_complexity` into the payload for every node. + 3. **Router Resolution:** The `GeneratorService` checks the complexity and maps it purely via `config.json`. + 4. *Small -> Llama3:8B (Fast system loops)* | *Medium -> Qwen2.5 (Standard)* | *Large -> Mixtral (Deep research)*. + +--- + +## ๐Ÿ›ก๏ธ Context & Token Safety +- I didn't want to calculate exact tokens with heavy libraries (like `tiktoken`) on every single loop. +- **My Strategy (Heuristics & Compression):** + - **Safety Truncation:** If a tool (like a massive `http_get` web scrape) returns over 30,000 characters, the `ExecutorAgent` aggressively truncates it. + - **Prompt Summarization:** Instead of keeping an infinitely growing chat history, the Planner produces a `summarize` node using a specialized `SUMMARIZER_SYSTEM_PROMPT` to compress old context dynamically. + - Tracking the standard `usage` vectors from OpenAI-compatible tools (`prompt_tokens`, `total_tokens`) for observability rather than strict hard-blocking. + +--- + +## ๐Ÿ—๏ธ The Ultimate Testing Playground +- The framework wasn't just a final product; it was a testbed. +- **Objectives:** + - Test how to actually *code* with agents in different structural scenarios. + - Experiment with system architecture and modularity design. + - Learn how to construct proper, dynamic task-planning prompts. + - Create functional applications autonomously based on structured tasks. +
+
+
+```mermaid +flowchart LR + Planner[Planner] --> Executor[Executor] + Executor --> Tools[Tools] + Tools --> Executor + Executor --> Planner +``` + +--- + +## โš™๏ธ The Core Loop: Solving Communication +- **The Problem:** LLMs naturally output raw text. Agents need structured, executable actions. +- **The Implementation:** + - Forcing the local LLM to output valid JSON representations of tool calls. + - Handling parsing errors seamlessly through self-correction loops. + - Designing a robust schema that the LLM understands and adheres to. + - Distinguishing between "Thinking" (reasoning) and "Acting" (tool execution). + +--- + +## โš™๏ธ The Core Loop: Solving Communication + +### Request + +``` +{ + "id": "uuid", + "from": "core", + "to": "planner", + "type": "plan.create", + "version": "1.0", + "timestamp": 1704067200000, + "payload": {} +} +``` + + +--- + +## โš™๏ธ The Core Loop: Solving Communication + +### Response + +``` +{ + "id": "same-as-request", + "from": "planner", + "to": "core", + "type": "response", + "version": "1.0", + "timestamp": 1704067200000, + "payload": { + "status": "success", + "result": {} + } +} +``` + +--- +layout: image +image: ./_images/SCR-20260305-jmkx.png +--- + + +--- + +## ๐Ÿ› ๏ธ Equipping the Agent: Tools & Skills +- Agents are useless without hands. +- I built a modular tool host system. +- Standardized interfaces for tools: `name`, `description`, `parameters`, `execute()`. +- Grouping tools into highly specialized "Skills" (e.g., File System, Terminal, Browser). +- Optimization: Injecting only relevant tool schemas into the prompt to preserve context. + + +--- +layout: image +image: ./_images/SCR-20260305-jmme.png +--- + + +--- + +## ๐Ÿ”Œ Standardizing with MCP +- **Building MCP (Model Context Protocol) Integration:** +- Why reinvent the wheel for every custom tool or data source? +- Implementing MCP allowed my agent to connect to external, standardized tools seamlessly. +- Learned how to expose local environment capabilities (files, API connections) to an agent through standardized, secure boundaries. + +--- + +## ๐Ÿ›๏ธ The Layered Memory Architecture +To prevent context contamination and keep prompt sizes manageable, I separated memory into distinct tiers: + +```mermaid +flowchart LR + classDef st fill:#fef3c7,stroke:#b45309,stroke-width:2px,color:#000 + classDef mt fill:#dbeafe,stroke:#1d4ed8,stroke-width:2px,color:#000 + classDef lt fill:#dcfce7,stroke:#15803d,stroke-width:2px,color:#000 + + subgraph ST ["Short-Term (In-Context)"] + direction TB + Conv["๐Ÿ’ฌ Conversation"]:::st + Session["๐Ÿ“ Scratchpad"]:::st + end + + subgraph MT ["Mid-Term (SQLite Task Store)"] + direction TB + Task["โš™๏ธ DAG State"]:::mt + Reflect["๏ฟฝ Reflections"]:::mt + end + + subgraph LT ["Long-Term (Persistent)"] + direction TB + RAG["๐Ÿ”ฎ Semantic RAG"]:::lt + Struct["๐Ÿ’พ Structured Data"]:::lt + end + + ST -->|Initiates tasks| MT + MT -->|Queries knowledge| LT + LT -.->|Injects context| ST +``` + +--- + +## ๐Ÿ•’ When is each memory used? + +- **Short-Term (Conversation & Session):** + - **When:** Active chatting, holding the immediate goal, fast active reasoning. + - **Lifecycle:** Evicted rapidly to save prompt context. +- **Mid-Term (Task Memory & State - SQLite):** + - **When:** Tracking multi-step execution graphs (DAGs), pausing/resuming tasks, storing critic reflections & retry counts. + - **Lifecycle:** Persists across agent loops; prevents the agent from getting stuck in circles. +- **Long-Term (Vector DB & File System):** + - **When:** Finding unseen documents or entire codebase structures based on semantic meaning. + - **Lifecycle:** Permanent; grows over time. + +--- + +## ๐Ÿง  Long-Term Memory: RAG from Scratch +- Local LLMs have finite (and hardware-bound) context windows. +- You can't simply fit an entire large codebase into a localized 8k context window. +- **Building RAG (Retrieval-Augmented Generation):** + - Used `sqlite-vss` for K-Nearest Neighbors (KNN) vector search natively inside SQLite. + - Implementing fallback to dot-product calculations if the VSS extension is unavailable. + - Generating and storing embeddings locally to retrieve only the relevant functions immediately needed. +


+```mermaid +graph LR + Ctx["๐Ÿ“„ Context"] --> Chunk["โœ‚๏ธ Chunking"] + Chunk --> Embed["๐Ÿ”ข Embedding"] + Embed --> DB["๐Ÿ—„๏ธ Vector DB"] + DB --> Search["๐Ÿ” Search"] + Search --> Retrieve["๐Ÿง  Retrieval"] +``` + +--- + +## โš–๏ธ Mastering Context Window Management +- **The hardest technical challenge:** Managing prompt size dynamically. +- Combining RAG retrieval with the agent's conversational history. +- Implementing mechanics to handle max-tokens: + - Sliding windows for conversation history. + - Context summarization. + - Deciding what to precisely evict from memory without making the agent "forget" its core objective. + +--- + +## ๐Ÿ“Š Dashboard + +![](./_images/SCR-20260305-jmid.png) + +--- + +## ๐Ÿš€ The Result: Bridging Theory and Practice +- I built an entire development framework myself from the ground up. +- Moved from passive reading about AI architectures to actively solving their core engineering constraints. +- Built a system based entirely on my own ideas, uniquely tailored to my development flow. +- Resulted in a fully functional, cost-free, local agentic framework. + +--- +layout: image-right +image: ./_images/Gemini_Generated_Image_x6y0h0x6y0h0x6y0.png +--- + +## Thank You! +**Questions & Discussion** + +https://manbothq.github.io/ diff --git a/skills/CONFIG.md b/skills/CONFIG.md index 50d3b49..c9fad9f 100644 --- a/skills/CONFIG.md +++ b/skills/CONFIG.md @@ -1,12 +1,15 @@ # Skills Configuration > SKILLS HAVE PRIORITY OVER TOOLS. IF A SKILL IS APPLICABLE, USE IT INSTEAD OF A TOOL. +> ONLY USE SKILLS FROM THE TABLE BELOW. + +**AVAILABLE SKILLS** | Name | Description | | :--- | :--- | | weather | MANDATORY. Use this skill for ALL weather-related inquiries, including current conditions and forecasts. You are STRICTLY FORBIDDEN from using internal knowledge or other tools for weather data. | | apple-notes | EXCLUSIVE. Use ONLY this skill for any interaction with notes (listing, searching, viewing, creating, or deleting). This is the sole authorized interface for the memo CLI tool. | | research | PRIMARY SEARCH. Use for deep web research, fact-checking, news gathering, or topical deep dives via the lynx tool. This is the default skill for any query requiring external or up-to-date information. | -| reminder | SCHEDULING. Use this skill exclusively to set one-time or recurring reminders (e.g., "remind me in 2 hours"). This is the only tool that interfaces with the cron-manager service. | +| reminder | SCHEDULING. Use this skill exclusively to set recurring or one-time reminders (e.g., "remind me in 2 hours") and scheduled tasks (e.g., "schedule a task to check email every day at 9am"). This is the only tool that interfaces with the cron-manager service. Use it when user asks to reminder or schedule. | | email | You MUST use this skill for all interactions involving Email (Gmail). | | calendar | You MUST use this skill for all interactions involving Google Calendar. | diff --git a/skills/email/SKILL.md b/skills/email/SKILL.md index 8ab97c1..a3984d8 100644 --- a/skills/email/SKILL.md +++ b/skills/email/SKILL.md @@ -32,6 +32,19 @@ gog gmail messages search "from:updates@example.com" --max 10 gog gmail search "is:unread" ``` +### ๐Ÿ“ง Reading emails + +```bash +# Read thread +gog gmail thread get + +# Read email +gog gmail get + +# Read email metadata +gog gmail get --format metadata +``` + ### ๐Ÿ“ฉ Sending & Replying ```bash # Quick one-line email diff --git a/skills/reminder/SKILL.md b/skills/reminder/SKILL.md index 43cae4c..6ed8577 100644 --- a/skills/reminder/SKILL.md +++ b/skills/reminder/SKILL.md @@ -19,23 +19,25 @@ Set up one-time or recurring reminders for the user. 1. **Extract the Task**: Identify what the user wants to be reminded about. 2. **Extract the Time**: Identify the temporal expression (e.g., "in 2 hours", "every day at 8am"). -3. **Schedule**: Call the `schedule_reminder` tool with the extracted time and message. +3. **Schedule**: Call the `schedule_reminder` tool with the extracted time, message, and `isAction` flag. +4. **User instructions**: If user's request contains instructions and actions for YOU to DO something (e.g., "check email", "search the web"), set `isAction: true` and include the instructions in the `message`. If it's just a passive text reminder to the user to do something themselves, omit `isAction` or set it to `false`. ## Tool: schedule_reminder **Arguments**: - `time`: (string) Natural language time expression (e.g., "in 5 minutes", "tomorrow at 3pm", "every Monday"). -- `message`: (string) The content of the reminder. +- `message`: (string) The content of the reminder or the instruction for the action to take. +- `isAction`: (boolean, optional) Set to `true` if the reminder requires YOU (the AI) to execute a task, such as checking emails or searching the web. Set to `false` or omit if it's just a text reminder for the user. ## Strategy -- Be precise with the `message`. If the user says "remind me to drink water", the message should be "Drink water". +- Be precise with the `message`. If user's request contains instructions for the AI to perform an action, include them **ALL** into the reminder message and be SURE to set `isAction: true`. - If the user provides a vague time, ask for clarification if necessary, or use your best judgment (e.g., "later today" could be "in 4 hours"). - The system will automatically handle parsing the natural language `time` string into a cron expression. ## Example Workflow -User Goal: "remind me to call Mom in 20 minutes" +User Goal: "check my email and mark spam in 20 minutes" -1. Call `schedule_reminder(time="in 20 minutes", message="Call Mom")`. -2. Respond to the user: "Sure! I'll remind you to Call Mom in 20 minutes." +1. Call `schedule_reminder(time="in 20 minutes", message="Check inbox for new email. Mark spam", isAction=true)`. +2. Respond to the user: "Sure! Your email will be checked in 20 minutes." diff --git a/skills/weather/SKILL.md b/skills/weather/SKILL.md index fbbd479..70b10de 100644 --- a/skills/weather/SKILL.md +++ b/skills/weather/SKILL.md @@ -2,6 +2,10 @@ Get current weather conditions and forecasts. +## IMPORTANT! + +Only use shell tool and `curl` + ## When to Use โœ… **USE this skill when:** diff --git a/src/adapters/telegram-adapter.ts b/src/adapters/telegram-adapter.ts index f6d7246..4342146 100644 --- a/src/adapters/telegram-adapter.ts +++ b/src/adapters/telegram-adapter.ts @@ -75,15 +75,21 @@ function getAllowedUserIds(): Set | null { } /** - * Escape special characters for Telegram MarkdownV2 format. - * According to Telegram API docs, these characters must be escaped: _ * [ ] ( ) ~ ` > # + - = | { } . ! - * Inside (...) of a [link](url) only \ and ) must be escaped. - * We also escape \ globally as it's the escape character itself. + * Escape HTML special characters for Telegram HTML parse mode. + * Only <, > and & need escaping in Telegram HTML. */ -function escapeMarkdownV2(text: string): string { - // Characters that need to be escaped in MarkdownV2 - const specialChars = /([\\_*\[\]()~`>#+\-=|{}.!])/g; - return text.replace(specialChars, "\\$1"); +function escapeHtml(text: string): string { + return text + .replace(/&/g, "&") + .replace(//g, ">"); +} + +/** + * Strip HTML tags from text for plain-text fallback. + */ +function stripHtmlTags(text: string): string { + return text.replace(/<[^>]*>/g, ""); } function createEnvelope(type: string, to: string, payload: T): Envelope { @@ -137,16 +143,42 @@ function main(): void { chatId: number, text: string, options?: TelegramBot.SendMessageOptions, - originalText?: string + originalText?: string, + isHtmlContent = false, + retryCount = 0 ): Promise { + const MAX_RETRIES = 3; const messageText = text?.trim() ? text : "[EMPTY_RESPONSE]"; + + const finalOptions: TelegramBot.SendMessageOptions = { + ...options, + parse_mode: "HTML" as any + }; + + // If this is NOT LLM/HTML content (i.e. system messages), HTML-escape it + const finalText = isHtmlContent ? messageText : escapeHtml(messageText); + try { - await bot.sendMessage(chatId, messageText, options); + await bot.sendMessage(chatId, finalText, finalOptions); } catch (err: any) { + // transient errors: retry + const isTransient = + err.code === 'ECONNRESET' || + err.code === 'ETIMEDOUT' || + err.code === 'EFATAL' || + err.message?.includes("socket hang up"); + + if (isTransient && retryCount < MAX_RETRIES) { + const delay = 1000 * Math.pow(2, retryCount); + console.warn(`[telegram-adapter] Transient error (${err.code || err.message}), retrying in ${delay}ms... (attempt ${retryCount + 1})`); + await new Promise(resolve => setTimeout(resolve, delay)); + return sendToUser(chatId, text, options, originalText, isHtmlContent, retryCount + 1); + } + // If error is related to parsing entities, retry with plain text if (err.response?.body?.description?.includes("can't parse entities")) { - console.warn(`Telegram fallback: Failed to parse entities, retrying as plain text. Error: ${err.response.body.description}`); - const fallbackText = (originalText?.trim() ? originalText : messageText); + console.warn(`Telegram fallback: Failed to parse HTML entities, retrying as plain text. Error: ${err.response.body.description}`); + const fallbackText = stripHtmlTags(originalText?.trim() ? originalText : messageText); await bot.sendMessage(chatId, fallbackText, { ...options, parse_mode: undefined }).catch((innerErr) => { console.error("Telegram critical send error (fallback failed):", innerErr); }); @@ -408,13 +440,11 @@ function main(): void { if (envelope.type === "telegram.send") { const pl = envelope.payload as TelegramSendPayload; if (typeof pl.chatId === "number" && typeof pl.text === "string") { - // Escape text ONLY if explicitly requested MarkdownV2 - const escapedText = pl.parseMode === "MarkdownV2" ? escapeMarkdownV2(pl.text) : pl.text; const opts: TelegramBot.SendMessageOptions = { parse_mode: pl.parseMode as any, ...(pl.silent === true && { disable_notification: true }), }; - sendToUser(pl.chatId, escapedText, opts, pl.text); + sendToUser(pl.chatId, pl.text, opts, pl.text, true); } return; } @@ -423,11 +453,10 @@ function main(): void { if (envelope.type === "telegram.progress") { const pl = envelope.payload as TelegramProgressPayload; if (typeof pl.chatId === "number" && typeof pl.text === "string") { - const escapedText = pl.parseMode === "MarkdownV2" ? escapeMarkdownV2(pl.text) : pl.text; const opts: TelegramBot.SendMessageOptions = { parse_mode: pl.parseMode as any, }; - sendToUser(pl.chatId, escapedText, opts, pl.text); + sendToUser(pl.chatId, pl.text, opts, pl.text, true); } return; } @@ -438,21 +467,20 @@ function main(): void { if (pl.status === "success" && pl.result && typeof pl.result === "object") { const r = pl.result as { chatId?: number; text?: string; reminders?: unknown[]; message?: string; parseMode?: "HTML" | "Markdown" | "MarkdownV2" }; if (typeof r.chatId === "number" && typeof r.text === "string") { - const escapedText = r.parseMode === "MarkdownV2" ? escapeMarkdownV2(r.text) : r.text; const opts: TelegramBot.SendMessageOptions = { parse_mode: r.parseMode as any, }; - sendToUser(r.chatId, escapedText, opts, r.text); + sendToUser(r.chatId, r.text, opts, r.text, true); } else if (typeof r.chatId === "number" && r.reminders) { // Handle reminder list response const reminders = r.reminders as Array<{ id: string; cronExpr: string; reminderMessage?: string }>; if (reminders.length === 0) { - sendToUser(r.chatId, "No active reminders."); + sendToUser(r.chatId, "๐Ÿซ™ No active reminders."); } else { const formatted = reminders .map((rem) => `ID: ${rem.id}\nTime: ${rem.cronExpr}\nMessage: ${rem.reminderMessage ?? "N/A"}`) .join("\n\n---\n\n"); - sendToUser(r.chatId, `Active reminders:\n\n${formatted}`); + sendToUser(r.chatId, `โฐ Active reminders:\n\n${formatted}`); } } else if (typeof r.chatId === "number" && r.message) { sendToUser(r.chatId, r.message); diff --git a/src/agents/executor-agent.ts b/src/agents/executor-agent.ts index 0d46f2c..60d367e 100644 --- a/src/agents/executor-agent.ts +++ b/src/agents/executor-agent.ts @@ -7,8 +7,8 @@ */ const MAX_CONCURRENT_NODES = 5; -const MAX_REVISION_CYCLES = 10; -const MAX_SKILL_TURNS = 15; +const MAX_REVISION_CYCLES = 20; +const MAX_SKILL_TURNS = 100; const SKILL_TOOLS: any[] = [ { @@ -49,7 +49,8 @@ const SKILL_TOOLS: any[] = [ type: "object", properties: { time: { type: "string", description: "When to remind (e.g., 'in 2 hours', 'every Monday at 9am', 'tomorrow at 3pm')" }, - message: { type: "string", description: "The content of the reminder (what to remind about)" } + message: { type: "string", description: "The content of the reminder (what to remind about) or the instruction for an action." }, + isAction: { type: "boolean", description: "Set to true if you are scheduling a task that requires you (the AI assistant) to execute an action (e.g., 'check email', 'search web'). Omit or set to false if it's just a text reminder for the user." } }, required: ["time", "message"] } @@ -642,6 +643,7 @@ export class ExecutorAgent extends BaseProcess { ): Promise { const input = node.input ?? {}; const nodeInput = input as Record; + const isAction = nodeInput.isAction === true || nodeInput.isAction === "true"; // Extract cronExpr from input or dependency output let cronExpr = nodeInput.cronExpr as string | undefined; @@ -800,7 +802,7 @@ export class ExecutorAgent extends BaseProcess { version: PROTOCOL_VERSION, payload: { cronExpr, - taskType: "reminder", + taskType: isAction ? "ai_query" : "reminder", payload: { chatId: typeof chatId === "string" ? parseInt(chatId, 10) : chatId, reminderMessage, diff --git a/src/agents/prompts/analyzer.ts b/src/agents/prompts/analyzer.ts index 4843527..361bf70 100644 --- a/src/agents/prompts/analyzer.ts +++ b/src/agents/prompts/analyzer.ts @@ -1,36 +1,27 @@ /** * System prompts for the Analyzer role. - * Optimized for Telegram Markdown V2 and natural language synthesis. + * Optimized for Telegram HTML formatting and natural language synthesis. */ +import { TELEGRAM_HTML_FORMAT_INSTRUCTION } from "./telegram-html.js"; + export const ANALYZER_SYSTEM_PROMPT = ` -Professional Data Analyst and Assistant. +Your name is \`๐Ÿงฌ ManBot\`. You are a Professional Data Analyst and Assistant. Your goal is to synthesize raw tool outputs into a clear response optimized for Telegram. -${new Date().toISOString()} +${new Date().toISOString()} -## TELEGRAM FORMATTING RULES: -1. **No Headers**: Do NOT use "# Header". Instead, use **BOLD UPPERCASE** for titles. -2. **No Tables**: Markdown tables are not supported. Use structured bullet points (โ€ข) or bold lists. -3. **Strict Syntax**: - - *Bold*: *text* or **text** - - _Italic_: _text_ - - \`Code\`: \`inline code\` or \`\`\`language\n pre-formatted block \`\`\` - - > Quotes: Use for highlighting important information or citations. - - For simple charts or graphs, use \`\`\`language\n pre-formatted block \`\`\`. -4. **Links**: Use [title](url) syntax. - ## ANALYSIS GUIDELINES: -- **Synthesize**: Combine multiple sources. Identify patterns or contradictions. -- **Accuracy**: If data is missing or tools failed, explain this clearly using bold warnings. -- **Tone**: Professional, direct, and conversational. Avoid "As an AI..." or "Here is the data...". +- Synthesize: Combine multiple sources. Identify patterns or contradictions. +- Accuracy: If data is missing or tools failed, explain this clearly using bold warnings. +- Tone: Friendly, direct, and conversational. Avoid "As an AI..." or "Here is the data...". -Output: Pure Telegram Markdown V2. -No raw JSON/HTML unless requested. +${TELEGRAM_HTML_FORMAT_INSTRUCTION} +Output: Telegram HTML only. NEVER use Markdown (replace with allowed tags or remove). NEVER use raw JSON. `; /** @@ -40,5 +31,5 @@ export function buildAnalyzerUserPrompt(goal: string, context: string): string { if (!context || !context.trim()) { return `Respond to the user goal directly:\n\n${goal}`; } - return `User Goal: ${goal}\n\nData Context:\n${context}\n\nTask: Synthesize the data to answer the goal. Use Telegram MarkdownV2 (no headers/tables).`; + return `User Goal: ${goal}\n\nData Context:\n${context}\n\nTask: Synthesize the data to answer the goal. Use Telegram HTML formatting (no markdown, no tables).`; } diff --git a/src/agents/prompts/critic.ts b/src/agents/prompts/critic.ts index 72599bd..3c7abeb 100644 --- a/src/agents/prompts/critic.ts +++ b/src/agents/prompts/critic.ts @@ -9,9 +9,19 @@ You are skeptical and detail-oriented. Your mission is to audit the "Draft Outpu ## CRITICAL AUDIT DIMENSIONS: 1. **Telegram Syntax (MANDATORY)**: - - REJECT (REVISE) if the output contains "#" headers. - - REJECT (REVISE) if the output contains markdown tables. - - CHECK for broken markdown tags. + - REJECT (REVISE) if the output contains unsupported syntax. + **Supported HTML tags** + - Bold: text + - Italic: text + - Underline: text + - Strikethrough: text + - Spoiler: text + - Links: text + - Inline code: text + - Code block:
code block
+ - Block quote:
quote
+ - Expandable Block quote (for long quotes):
quote
+ - Code block with language:
code
2. **Factuality**: Flag any hallucinations or "invented" facts. 3. **Completeness**: If the user asked for 5 items and got 3, it is a REVISE. 4. **Safety**: Ensure no harmful or toxic content. @@ -58,7 +68,7 @@ ${safeDraft} Evaluate STRICTLY. Check for: -- Telegram syntax (no headers, no tables). +- Telegram syntax (no headers, no tables, no unsupported tags). - Factuality. - Completeness. - Safety. diff --git a/src/agents/prompts/planner.ts b/src/agents/prompts/planner.ts index e99aa73..7ed6dfe 100644 --- a/src/agents/prompts/planner.ts +++ b/src/agents/prompts/planner.ts @@ -7,7 +7,7 @@ export const PLANNER_SYSTEM_PROMPT = `Strategic Execution Planner IF you can fulfill the user's goal using ONLY your internal knowledge (e.g., greetings, simple math, general questions, "think of X"): -- Create exactly ONE node: { "id": "direct-answer", "type": "generate_text", "service": "model-router", "input": { "prompt": "ANSWER_GOAL" } }. +- Create exactly ONE node: { "id": "direct-answer", "type": "generate_text", "service": "model-router", "input": { "prompt": "ANSWER_GOAL", "system_prompt": "analyzer" } }. - DO NOT use any tools. ELSE: - Proceed with creating a Capability Graph. @@ -270,7 +270,10 @@ ${Object.entries(process.env) const base = `${PLANNER_SYSTEM_PROMPT} ${skillsSection} ${PLANNER_FEW_SHOT_EXAMPLES} -${now} + +OPERATE ONLY WITH THIS DATE IN YOUR PLANS! +Right now: ${now} + ${options?.conversationHistory ? `History Context: ${options.conversationHistory}` : ""} User Goal: ${userMessage} diff --git a/src/agents/prompts/telegram-html.ts b/src/agents/prompts/telegram-html.ts new file mode 100644 index 0000000..742cbba --- /dev/null +++ b/src/agents/prompts/telegram-html.ts @@ -0,0 +1,35 @@ +/** + * Shared Telegram HTML formatting instruction. + * Reusable constant that can be injected into any LLM system prompt + * to ensure output uses only Telegram-supported HTML tags. + */ + +export const TELEGRAM_HTML_FORMAT_INSTRUCTION = `## TELEGRAM HTML FORMATTING RULES: +You MUST format your output using Telegram-supported HTML tags. Do NOT use Markdown syntax. + +1. **No Markdown**: Do NOT use *, **, _, ~~, \`, #, or any Markdown syntax. Use HTML tags only. +2. **No Tables**: HTML tables are not supported by Telegram. Use structured bullet points (โ€ข) or bold lists. +3. **Supported HTML tags**: + - Bold: text + - Italic: text + - Underline: text + - Strikethrough: text + - Spoiler: text + - Links: text + - Inline code: text + - Code block:
code block
+ - Block quote:
quote
+ - Expandable Block quote (for long quotes):
quote
+ - Code block with language:
code
+4. **Special characters**: The characters <, > and & must be replaced with <, > and & respectively when used as literals (not as part of HTML tags). +5. **Line breaks**: Use regular line breaks (newlines). Do NOT use
tags.`; + +/** + * Default system prompt for LLM calls that need Telegram HTML formatting + * but don't have a specialized system prompt (e.g., direct-answer nodes). + */ +export const DEFAULT_TELEGRAM_SYSTEM_PROMPT = `You are a helpful assistant. Respond clearly and concisely. + +${TELEGRAM_HTML_FORMAT_INSTRUCTION} + +Output: Telegram HTML only. No Markdown (Replace with allowed tags or remove). No raw JSON.`; diff --git a/src/core/orchestrator.ts b/src/core/orchestrator.ts index 1eb42f9..2ab4211 100644 --- a/src/core/orchestrator.ts +++ b/src/core/orchestrator.ts @@ -302,7 +302,7 @@ export class Orchestrator { if (chatId != null && conversationId != null) { this.runArchivingPipeline(chatId, conversationId).catch((err) => { ConsoleLogger.error("core", "Archiving pipeline error", err instanceof Error ? err : String(err), envelope); - this.sendToTelegram(chatId, `Archiving failed: ${err instanceof Error ? err.message : String(err)}`); + this.sendToTelegram(chatId, `๐Ÿ˜– Archiving failed: ${err instanceof Error ? err.message : String(err)}`); }); } return; @@ -313,7 +313,7 @@ export class Orchestrator { ConsoleLogger.info("core", `Handling reminder.list request for chatId: ${chatId}`, envelope); this.handleListReminders(chatId, envelope).catch((err) => { ConsoleLogger.error("core", "List reminders error", err instanceof Error ? err.message : String(err), envelope); - this.sendToTelegram(chatId, `Error listing reminders: ${err instanceof Error ? err.message : String(err)}`); + this.sendToTelegram(chatId, `๐Ÿ˜– Error listing reminders: ${err instanceof Error ? err.message : String(err)}`); }); } else { ConsoleLogger.warn("core", "reminder.list missing chatId", envelope); @@ -326,7 +326,7 @@ export class Orchestrator { if (chatId != null && reminderId != null) { this.handleCancelReminder(chatId, reminderId, envelope).catch((err) => { ConsoleLogger.error("core", "Cancel reminder error", err instanceof Error ? err.message : String(err), envelope); - this.sendToTelegram(chatId, `Error canceling reminder: ${err instanceof Error ? err.message : String(err)}`); + this.sendToTelegram(chatId, `๐Ÿ˜– Error canceling reminder: ${err instanceof Error ? err.message : String(err)}`); }); } return; @@ -354,7 +354,7 @@ export class Orchestrator { const p = payload as unknown as FileIngestPayload; this.handleFileIngest(p).catch((err) => { ConsoleLogger.error("core", "File ingest error", err instanceof Error ? err : String(err), envelope); - this.sendToTelegram(p.chatId, `File processing error: ${err instanceof Error ? err.message : String(err)}`); + this.sendToTelegram(p.chatId, `๐Ÿ˜– File processing error: ${err instanceof Error ? err.message : String(err)}`); }); return; } @@ -418,7 +418,7 @@ export class Orchestrator { const executor = this.children.get("executor"); const telegram = this.children.get("telegram-adapter"); if (!planner?.stdin.writable || !taskMemory?.stdin.writable || !executor?.stdin.writable || !telegram?.stdin.writable) { - this.sendToTelegram(chatId, "Service unavailable."); + this.sendToTelegram(chatId, "๐Ÿ›‘ Service unavailable."); return; } @@ -587,7 +587,7 @@ export class Orchestrator { text = rawData; } } - this.sendToTelegram(chatId, text, false, "Markdown"); + this.sendToTelegram(chatId, text, false, "HTML"); return; } @@ -713,7 +713,7 @@ export class Orchestrator { } else { // Everything was ignored or failed with no caption if (warnings.length > 0) { - this.sendToTelegram(chatId, "No processable content found in the uploaded files.", true); + this.sendToTelegram(chatId, "โš ๏ธ No processable content found in the uploaded files.", true); } return; } @@ -806,14 +806,14 @@ export class Orchestrator { const modelRouter = this.children.get("model-router"); const ragService = this.children.get("rag-service"); if (!taskMemory?.stdin.writable || !modelRouter?.stdin.writable || !ragService?.stdin.writable) { - this.sendToTelegram(chatId, "Service unavailable for archiving."); + this.sendToTelegram(chatId, "โš ๏ธ Service unavailable for archiving."); return; } let tasksEnv: Envelope; try { tasksEnv = await this.sendAndWait(taskMemory, "task.getByConversationId", { conversationId }); } catch { - this.sendToTelegram(chatId, "Archived."); // no history or error + this.sendToTelegram(chatId, "โœ… Archived."); // no history or error return; } const tasksPayload = tasksEnv.payload as { status?: string; result?: { tasks?: Array<{ id: string; goal: string; status: string }> } }; @@ -860,7 +860,7 @@ export class Orchestrator { }); } catch (errEnv) { const err = errEnv as Envelope & { payload?: { message?: string } }; - this.sendToTelegram(chatId, `Archiving failed: ${err.payload?.message ?? "Summarization error"}`); + this.sendToTelegram(chatId, `๐Ÿ˜– Archiving failed: ${err.payload?.message ?? "Summarization error"}`); return; } const summaryPayload = summaryEnv.payload as { status?: string; result?: { text?: string } }; @@ -912,7 +912,7 @@ export class Orchestrator { return; } - this.sendToTelegram(chatIdNum, formattedMessage); + this.sendToTelegram(chatIdNum, formattedMessage, false, "HTML"); } private handleCronAIQueryEvent(envelope: Envelope): void { @@ -948,7 +948,7 @@ export class Orchestrator { const cronManager = this.children.get("cron-manager"); if (!cronManager?.stdin.writable) { ConsoleLogger.warn("core", "Cron manager not available or not writable"); - this.sendToTelegram(chatId, "Cron manager service unavailable."); + this.sendToTelegram(chatId, "โš ๏ธ Cron manager service unavailable."); return; } @@ -975,7 +975,7 @@ export class Orchestrator { if (reminderSchedules.length === 0) { ConsoleLogger.info("core", "No reminders found, sending 'No active reminders' message"); - this.sendToTelegram(chatId, "No active reminders."); + this.sendToTelegram(chatId, "๐Ÿ‘Œ๐Ÿป No active reminders."); return; } @@ -986,18 +986,18 @@ export class Orchestrator { .join("\n\n---\n\n"); const message = `Active reminders:\n\n${formatted}`; ConsoleLogger.info("core", `Sending reminder list to chatId ${chatId}: ${message.substring(0, 100)}...`); - this.sendToTelegram(chatId, message); + this.sendToTelegram(chatId, message, false, "HTML"); } catch (err) { const message = err instanceof Error ? err.message : String(err); ConsoleLogger.error("core", `Error in handleListReminders: ${message}`, err instanceof Error ? err : undefined); - this.sendToTelegram(chatId, `Error listing reminders: ${message}`); + this.sendToTelegram(chatId, `๐Ÿ˜จ Error listing reminders: ${message}`); } } private async handleCancelReminder(chatId: number, reminderId: string, _request: Envelope): Promise { const cronManager = this.children.get("cron-manager"); if (!cronManager?.stdin.writable) { - this.sendToTelegram(chatId, "Cron manager service unavailable."); + this.sendToTelegram(chatId, "โš ๏ธ Cron manager service unavailable."); return; } @@ -1005,13 +1005,13 @@ export class Orchestrator { const response = await this.sendAndWait(cronManager, "cron.schedule.remove", { id: reminderId }); const responsePayload = response.payload as { status?: string; result?: { removed?: string } }; if (responsePayload.result?.removed === reminderId) { - this.sendToTelegram(chatId, `Reminder ${reminderId} has been canceled.`); + this.sendToTelegram(chatId, `๐ŸŸข Reminder ${reminderId} has been canceled.`); } else { - this.sendToTelegram(chatId, `Reminder ${reminderId} not found.`); + this.sendToTelegram(chatId, `๐Ÿ˜จ Reminder ${reminderId} not found.`); } } catch (err) { const message = err instanceof Error ? err.message : String(err); - this.sendToTelegram(chatId, `Error canceling reminder: ${message}`); + this.sendToTelegram(chatId, `๐Ÿ˜จ Error canceling reminder: ${message}`); } } @@ -1055,12 +1055,13 @@ export class Orchestrator { return chunks; } - private sendToTelegram(chatId: number, text: string, silent?: boolean, parseMode?: "HTML" | "Markdown" | "MarkdownV2"): void { + private sendToTelegram(chatId: number, text: string, silent?: boolean, parseMode?: "HTML" | "Markdown"): void { const telegram = this.children.get("telegram-adapter"); if (!telegram?.stdin.writable) return; - // Split message if it's too long (Telegram limit is 4096 characters) - const chunks = this.splitMessage(text, 4000); // Use 4000 to leave room for continuation markers + // Split message if it's too long (Telegram limit is 4096 characters). + // We use a safe buffer (3000) to account for MarkdownV2 escaping growth and continuation markers. + const chunks = this.splitMessage(text, 4000); chunks.forEach((chunk, index) => { let messageText = chunk; @@ -1083,7 +1084,7 @@ export class Orchestrator { to: "telegram-adapter", type: "telegram.send", version: "1.0", - payload: { chatId, text: messageText, silent, parseMode }, + payload: { chatId, text: messageText, silent, parseMode: parseMode ?? "HTML" }, }; telegram.stdin.write(JSON.stringify(envelope) + "\n"); ConsoleLogger.ipc("core", "โ†’", envelope); diff --git a/src/services/browser-config.ts b/src/services/browser-config.ts index b54bc7e..ec6e216 100644 --- a/src/services/browser-config.ts +++ b/src/services/browser-config.ts @@ -1,3 +1,5 @@ +import * as os from "os"; + /** * Browser configuration utilities for realistic browser behavior and bot detection bypass. * Provides user agents, viewport sizes, and stealth plugin configuration. @@ -7,34 +9,29 @@ * Realistic user agents for different browsers and operating systems. * Updated with recent browser versions (2024-2026) to avoid detection. */ -export const USER_AGENTS = [ - // Chrome on Windows - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36", - "Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", - - // Chrome on macOS - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_6_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36", - - // Chrome on Linux - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", - "Mozilla/5.0 (X11; Ubuntu; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36", - - // Firefox on Windows - "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0", - "Mozilla/5.0 (Windows NT 11.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0", - - // Firefox on macOS - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0", - - // Safari on macOS - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_6_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15", - - // Edge on Windows - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0", -] as const; +export const USER_AGENTS = { + windows: [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0", + "Mozilla/5.0 (Windows NT 11.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0", + ], + macos: [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_6_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_6_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15", + ], + linux: [ + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "Mozilla/5.0 (X11; Ubuntu; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36", + ] +} as const; + +export type OSGroup = keyof typeof USER_AGENTS; /** * Common viewport sizes matching real-world screen resolutions. @@ -52,14 +49,32 @@ export const VIEWPORTS = [ ] as const; /** - * Returns a random user agent from the predefined list. + * Detects the user's operating system. + */ +export function getUserOS(): OSGroup | "unknown" { + const platform = os.platform(); + if (platform === "win32") return "windows"; + if (platform === "darwin") return "macos"; + if (platform === "linux") return "linux"; + return "unknown"; +} + +/** + * Returns a random user agent from the predefined list corresponding to the user's OS. * This helps avoid detection by rotating through different browser fingerprints. + * If the OS is unknown, it defaults to Windows user agents. * * @returns A random user agent string */ export function getRandomUserAgent(): string { - const index = Math.floor(Math.random() * USER_AGENTS.length); - return USER_AGENTS[index]!; + let osGroup = getUserOS(); + if (osGroup === "unknown") { + osGroup = "windows"; + } + + const agents = USER_AGENTS[osGroup as OSGroup]; + const index = Math.floor(Math.random() * agents.length); + return agents[index]!; } /** @@ -92,7 +107,7 @@ export function getRandomViewport(): { width: number; height: number } { export const STEALTH_CONFIG = { // Enable all stealth features enabled: true, - + // Additional configuration can be passed to the stealth plugin // The plugin will automatically apply various evasion techniques } as const; diff --git a/src/services/browser-service.ts b/src/services/browser-service.ts index 16848b5..87f5583 100644 --- a/src/services/browser-service.ts +++ b/src/services/browser-service.ts @@ -128,7 +128,7 @@ export class BrowserService extends BaseProcess { userAgent: userAgent, // Additional stealth settings locale: "en-US", - timezoneId: "America/New_York", + timezoneId: "Europe/Amsterdam", // Disable automation indicators ignoreHTTPSErrors: true, // Realistic browser headers @@ -309,6 +309,7 @@ export class BrowserService extends BaseProcess { // Realistic browser behavior "--lang=en-US", "--disable-extensions", + "--headless=new", ]; if (this.config.userDataDir) { @@ -322,7 +323,7 @@ export class BrowserService extends BaseProcess { userAgent: userAgent, // Additional stealth settings locale: "en-US", - timezoneId: "America/New_York", + timezoneId: "Europe/Amsterdam", ignoreHTTPSErrors: true, extraHTTPHeaders: { "Accept-Language": "en-US,en;q=0.9", diff --git a/src/services/generator-service.ts b/src/services/generator-service.ts index 9761743..7bf4fff 100644 --- a/src/services/generator-service.ts +++ b/src/services/generator-service.ts @@ -11,6 +11,10 @@ import { PROTOCOL_VERSION } from "../shared/protocol.js"; import { responsePayloadSchema } from "../shared/protocol.js"; import { buildSummarizerPrompt, SUMMARIZER_SYSTEM_PROMPT } from "../agents/prompts/summarizer.js"; import { ANALYZER_SYSTEM_PROMPT, buildAnalyzerUserPrompt } from "../agents/prompts/analyzer.js"; +import { DEFAULT_TELEGRAM_SYSTEM_PROMPT, TELEGRAM_HTML_FORMAT_INSTRUCTION } from "../agents/prompts/telegram-html.js"; + +/** Inline reminder appended to user prompts so the LLM sees formatting rules in the most prominent position. */ +const HTML_PROMPT_SUFFIX = `\n\n${TELEGRAM_HTML_FORMAT_INSTRUCTION}\n\nNEVER use Markdown formatting. NEVER use plain JSON.`; import { LemonadeAdapter, type ChatMessage } from "./lemonade-adapter.js"; import { ModelRouter } from "./model-router.js"; import { ModelManagerService, type ModelTier } from "./model-manager.js"; @@ -108,6 +112,10 @@ export class GeneratorService extends BaseProcess { } else { prompt = p.input.prompt; } + // Append HTML reminder unless a specialized system prompt handles formatting + if (!p.input?.system_prompt) { + prompt += HTML_PROMPT_SUFFIX; + } if (typeof p.input?.system_prompt === "string") { systemPrompt = p.input.system_prompt === "analyzer" ? ANALYZER_SYSTEM_PROMPT : p.input.system_prompt; // If it's an analyzer prompt, use the specialized user prompt builder @@ -118,7 +126,8 @@ export class GeneratorService extends BaseProcess { } else if (goal && (context["_criticFeedback"] != null || context["_previousDraft"] != null)) { const feedback = context["_criticFeedback"] as string | undefined; const previous = context["_previousDraft"] as string | undefined; - prompt = `User goal: ${goal}\n\nPrevious draft:\n${previous ?? ""}\n\nCritic feedback:\n${feedback ?? ""}\n\nProduce an improved draft that addresses the feedback. Output only the improved text.`; + prompt = `User goal: ${goal}\n\nPrevious draft:\n${previous ?? ""}\n\nCritic feedback:\n${feedback ?? ""}\n\nProduce an improved draft that addresses the feedback. Output only the improved text.${HTML_PROMPT_SUFFIX}`; + if (!systemPrompt) systemPrompt = DEFAULT_TELEGRAM_SYSTEM_PROMPT; } else if (goal) { const depOutputs = Object.entries(context) .filter(([k]) => !k.startsWith("_")) @@ -143,7 +152,7 @@ export class GeneratorService extends BaseProcess { // For other objects, stringify return JSON.stringify(v); }); - prompt = `User goal: ${goal}\n\nContext from previous steps:\n${depOutputs.join("\n\n")}\n\nProduce a direct response to the goal. Output only the response text.`; + prompt = `User goal: ${goal}\n\nContext from previous steps:\n${depOutputs.join("\n\n")}\n\nProduce a direct response to the goal. Output only the response text.${HTML_PROMPT_SUFFIX}`; } else { const depOutputs = Object.values(context).map((v) => { // Extract body from http_get responses @@ -166,16 +175,17 @@ export class GeneratorService extends BaseProcess { // For other objects, stringify return JSON.stringify(v); }); - prompt = depOutputs.join("\n\n") || "Generate a brief response."; + prompt = (depOutputs.join("\n\n") || "Generate a brief response.") + HTML_PROMPT_SUFFIX; } if (p.input?.messages && Array.isArray(p.input.messages)) { messages = p.input.messages as ChatMessage[]; } if (!messages) { - messages = systemPrompt - ? [{ role: "system" as const, content: systemPrompt }, { role: "user" as const, content: prompt }] - : [{ role: "user" as const, content: prompt }]; + // Always inject a system prompt to ensure Telegram HTML output. + // Falls back to the default Telegram formatting prompt when no specific one is set. + const effectiveSystemPrompt = systemPrompt ?? DEFAULT_TELEGRAM_SYSTEM_PROMPT; + messages = [{ role: "system" as const, content: effectiveSystemPrompt }, { role: "user" as const, content: prompt }]; } const genResult = await this.lemonade.chat(messages, model, { diff --git a/src/services/skill-manager.ts b/src/services/skill-manager.ts index 41b2c5e..0a163fb 100644 --- a/src/services/skill-manager.ts +++ b/src/services/skill-manager.ts @@ -1,6 +1,7 @@ import { readFileSync, existsSync } from "node:fs"; import { join, resolve } from "node:path"; import { getConfig } from "../shared/config.js"; +import { TELEGRAM_HTML_FORMAT_INSTRUCTION } from "../agents/prompts/telegram-html.js"; export interface SkillInfo { name: string; @@ -35,13 +36,16 @@ export class SkillManager { /** * Load the skill prompt (SKILL.md) for a given skill. + * Appends Telegram HTML formatting instructions so skill output + * always uses Telegram-supported HTML instead of Markdown. */ public getSkillPrompt(name: string): string | null { const skillPath = join(this.skillsDir, name, "SKILL.md"); if (!existsSync(skillPath)) return null; try { - return readFileSync(skillPath, "utf-8"); + const content = readFileSync(skillPath, "utf-8"); + return `${content}\n\n## OUTPUT FORMATTING\n${TELEGRAM_HTML_FORMAT_INSTRUCTION}\n\nYou MUST format your final response using only the Telegram HTML tags listed above. Never use Markdown (replace will allowed HTML tags).`; } catch (err) { console.error(`Failed to load skill prompt for ${name}:`, err); return null; diff --git a/src/services/tool-host.ts b/src/services/tool-host.ts index edd35fa..390e835 100644 --- a/src/services/tool-host.ts +++ b/src/services/tool-host.ts @@ -130,7 +130,7 @@ export class ToolHost extends BaseProcess { if (typeof query !== "string") throw new Error("http_search requires query (string)"); // Build Search URL (using the HTML endpoint which is more relaxed) - const searchUrl = "https://search.yahoo.com/search?p=" + query; + const searchUrl = "https://html.duckduckgo.com/html?q=" + encodeURIComponent(query); const startTime = Date.now(); diff --git a/src/tests/cron-ai.test.ts b/src/tests/cron-ai.test.ts index 44f8d36..372f998 100644 --- a/src/tests/cron-ai.test.ts +++ b/src/tests/cron-ai.test.ts @@ -130,7 +130,7 @@ describe("Cron-to-AI Flow (E2E Mocked)", () => { chatId, "Cron Job Execution Succeeded", false, - "Markdown" + "HTML" ); }); });