diff --git a/_images/Gemini_Generated_Image_eypuexeypuexeypu.png b/_images/Gemini_Generated_Image_eypuexeypuexeypu.png
new file mode 100644
index 0000000..ec6a88a
Binary files /dev/null and b/_images/Gemini_Generated_Image_eypuexeypuexeypu.png differ
diff --git a/_images/Gemini_Generated_Image_mrh4ogmrh4ogmrh4.png b/_images/Gemini_Generated_Image_mrh4ogmrh4ogmrh4.png
new file mode 100644
index 0000000..0e12aab
Binary files /dev/null and b/_images/Gemini_Generated_Image_mrh4ogmrh4ogmrh4.png differ
diff --git a/_images/Gemini_Generated_Image_vu2lluvu2lluvu2l.png b/_images/Gemini_Generated_Image_vu2lluvu2lluvu2l.png
new file mode 100644
index 0000000..d1a0a41
Binary files /dev/null and b/_images/Gemini_Generated_Image_vu2lluvu2lluvu2l.png differ
diff --git a/_images/Gemini_Generated_Image_x6y0h0x6y0h0x6y0.png b/_images/Gemini_Generated_Image_x6y0h0x6y0h0x6y0.png
new file mode 100644
index 0000000..10c21bf
Binary files /dev/null and b/_images/Gemini_Generated_Image_x6y0h0x6y0h0x6y0.png differ
diff --git a/_images/Local-LLM-vs-Cloud-API-24-Month-Cost-Comparison.png b/_images/Local-LLM-vs-Cloud-API-24-Month-Cost-Comparison.png
new file mode 100644
index 0000000..8292ea3
Binary files /dev/null and b/_images/Local-LLM-vs-Cloud-API-24-Month-Cost-Comparison.png differ
diff --git a/_images/SCR-20260305-jmid.png b/_images/SCR-20260305-jmid.png
new file mode 100644
index 0000000..04d8586
Binary files /dev/null and b/_images/SCR-20260305-jmid.png differ
diff --git a/_images/SCR-20260305-jmjk.png b/_images/SCR-20260305-jmjk.png
new file mode 100644
index 0000000..22a9158
Binary files /dev/null and b/_images/SCR-20260305-jmjk.png differ
diff --git a/_images/SCR-20260305-jmkx.png b/_images/SCR-20260305-jmkx.png
new file mode 100644
index 0000000..4634fb9
Binary files /dev/null and b/_images/SCR-20260305-jmkx.png differ
diff --git a/_images/SCR-20260305-jmme.png b/_images/SCR-20260305-jmme.png
new file mode 100644
index 0000000..5bbfc58
Binary files /dev/null and b/_images/SCR-20260305-jmme.png differ
diff --git a/_images/agents.webp b/_images/agents.webp
new file mode 100644
index 0000000..9a932d4
Binary files /dev/null and b/_images/agents.webp differ
diff --git a/_images/ai-prices.png b/_images/ai-prices.png
new file mode 100644
index 0000000..cd9473b
Binary files /dev/null and b/_images/ai-prices.png differ
diff --git a/_images/dontlike.png b/_images/dontlike.png
new file mode 100644
index 0000000..bd5b539
Binary files /dev/null and b/_images/dontlike.png differ
diff --git a/_images/llm-inference.webp b/_images/llm-inference.webp
new file mode 100644
index 0000000..56be948
Binary files /dev/null and b/_images/llm-inference.webp differ
diff --git a/package-lock.json b/package-lock.json
index 7b937c1..7cd23bb 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -5792,4 +5792,4 @@
}
}
}
-}
\ No newline at end of file
+}
diff --git a/package.json b/package.json
index 0ae40cc..c07fd47 100644
--- a/package.json
+++ b/package.json
@@ -47,4 +47,4 @@
"typescript": "^5.7.2",
"vitest": "^4.0.18"
}
-}
\ No newline at end of file
+}
diff --git a/presentation.md b/presentation.md
new file mode 100644
index 0000000..bb4a484
--- /dev/null
+++ b/presentation.md
@@ -0,0 +1,302 @@
+---
+marp: true
+theme: default
+class:
+ - lead
+---
+
+# I built my own AI-agent. Why?
+**A journey from reading about AI to building a custom agent framework**
+
+
+
+---
+layout: image-left
+image: ./_images/Gemini_Generated_Image_x6y0h0x6y0h0x6y0.png
+---
+
+## Mikhail Larchanka
+- Principal Software Engineer at **Sytac**
+- https://larchanka.com
+- https://youtube.com/@larchanka
+- https://github.com/larchanka
+- https://x.com/mlarchanka
+
+---
+
+## ๐ The AI Catch-Up
+- The AI landscape is evolving at breakneck speed every single day.
+- New models, new frameworks (LangChain, AutoGen), new methodologies.
+- It feels like the revolution is passing by.
+- **The Challenge:** I do not work with AI in my daily job. Staying actively involved requires intentional effort beyond standard day-to-day tasks.
+
+
+
+---
+layout: image-right
+image: ./_images/Gemini_Generated_Image_vu2lluvu2lluvu2l.png
+---
+
+## ๐ The Trap of "Reading vs. Doing"
+- I read a lot of papers, articles, and documentation.
+- **The Reality Check:** Reading builds awareness, but not genuine *knowledge* or intuition.
+- Without hands-on practice, you don't discover the edge cases, the latency issues, or the prompt fragility.
+- I spent time checking what others were building in the space, and understanding their pain points.
+
+---
+layout: image-right
+image: ./_images/dontlike.png
+backgroundSize: contain
+---
+## ๐ก The Catalyst: My Own Ideas
+- While observing existing solutions, I realized I had different ideas on how agents should operate.
+- Existing frameworks often felt either too bloated, too confusing, or too rigid.
+- I wanted to build something tailored to my intuition of how a system should reason and interact with an environment.
+
+
+---
+layout: image-right
+image: ./_images/ai-prices.png
+backgroundSize: contain
+---
+## ๐ธ Cost-Driven Architecture
+- **The Goal:** Make learning and relentless experimentation "cheap."
+- Relying on cloud APIs (GPT-4, Claude) for agentic loopsโwhich run autonomously making dozens of calls and mistakesโgets expensive quickly.
+- **The Solution:** Local LLMs.
+- Complete freedom to experiment, fail, retry, and loop infinitely without worrying about API bills.
+
+---
+layout: image-left
+image: ./_images/llm-inference.webp
+backgroundSize: contain
+---
+
+## ๐ค Evaluating Local LLMs
+- Not all models are created equal for agentic tasks.
+- **Benchmarking for my agent:**
+ - Need strong coding and reasoning capabilities.
+ - Need reliable JSON/tool-calling formatting.
+ - Need fast inference speed (tokens/sec) for autonomous, multi-step loops.
+- Explored running models locally using tools like Ollama or Lemonade.
+- Tested how models handle context degradation on local machine hardware.
+
+---
+
+## ๐ Dynamic Model Routing
+- Running a massive model (like Mixtral) for every tiny task is slow and overkill.
+- I built a **Model Router** that dynamically selects models based on task complexity.
+- **The Flow:**
+ 1. **Planner:** Evaluates the user's intent and forces the plan into a `complexity` bucket (`"small"`, `"medium"`, or `"large"`).
+ 2. **Injection:** The Executor injects `_complexity` into the payload for every node.
+ 3. **Router Resolution:** The `GeneratorService` checks the complexity and maps it purely via `config.json`.
+ 4. *Small -> Llama3:8B (Fast system loops)* | *Medium -> Qwen2.5 (Standard)* | *Large -> Mixtral (Deep research)*.
+
+---
+
+## ๐ก๏ธ Context & Token Safety
+- I didn't want to calculate exact tokens with heavy libraries (like `tiktoken`) on every single loop.
+- **My Strategy (Heuristics & Compression):**
+ - **Safety Truncation:** If a tool (like a massive `http_get` web scrape) returns over 30,000 characters, the `ExecutorAgent` aggressively truncates it.
+ - **Prompt Summarization:** Instead of keeping an infinitely growing chat history, the Planner produces a `summarize` node using a specialized `SUMMARIZER_SYSTEM_PROMPT` to compress old context dynamically.
+ - Tracking the standard `usage` vectors from OpenAI-compatible tools (`prompt_tokens`, `total_tokens`) for observability rather than strict hard-blocking.
+
+---
+
+## ๐๏ธ The Ultimate Testing Playground
+- The framework wasn't just a final product; it was a testbed.
+- **Objectives:**
+ - Test how to actually *code* with agents in different structural scenarios.
+ - Experiment with system architecture and modularity design.
+ - Learn how to construct proper, dynamic task-planning prompts.
+ - Create functional applications autonomously based on structured tasks.
+
+
+
+```mermaid
+flowchart LR
+ Planner[Planner] --> Executor[Executor]
+ Executor --> Tools[Tools]
+ Tools --> Executor
+ Executor --> Planner
+```
+
+---
+
+## โ๏ธ The Core Loop: Solving Communication
+- **The Problem:** LLMs naturally output raw text. Agents need structured, executable actions.
+- **The Implementation:**
+ - Forcing the local LLM to output valid JSON representations of tool calls.
+ - Handling parsing errors seamlessly through self-correction loops.
+ - Designing a robust schema that the LLM understands and adheres to.
+ - Distinguishing between "Thinking" (reasoning) and "Acting" (tool execution).
+
+---
+
+## โ๏ธ The Core Loop: Solving Communication
+
+### Request
+
+```
+{
+ "id": "uuid",
+ "from": "core",
+ "to": "planner",
+ "type": "plan.create",
+ "version": "1.0",
+ "timestamp": 1704067200000,
+ "payload": {}
+}
+```
+
+
+---
+
+## โ๏ธ The Core Loop: Solving Communication
+
+### Response
+
+```
+{
+ "id": "same-as-request",
+ "from": "planner",
+ "to": "core",
+ "type": "response",
+ "version": "1.0",
+ "timestamp": 1704067200000,
+ "payload": {
+ "status": "success",
+ "result": {}
+ }
+}
+```
+
+---
+layout: image
+image: ./_images/SCR-20260305-jmkx.png
+---
+
+
+---
+
+## ๐ ๏ธ Equipping the Agent: Tools & Skills
+- Agents are useless without hands.
+- I built a modular tool host system.
+- Standardized interfaces for tools: `name`, `description`, `parameters`, `execute()`.
+- Grouping tools into highly specialized "Skills" (e.g., File System, Terminal, Browser).
+- Optimization: Injecting only relevant tool schemas into the prompt to preserve context.
+
+
+---
+layout: image
+image: ./_images/SCR-20260305-jmme.png
+---
+
+
+---
+
+## ๐ Standardizing with MCP
+- **Building MCP (Model Context Protocol) Integration:**
+- Why reinvent the wheel for every custom tool or data source?
+- Implementing MCP allowed my agent to connect to external, standardized tools seamlessly.
+- Learned how to expose local environment capabilities (files, API connections) to an agent through standardized, secure boundaries.
+
+---
+
+## ๐๏ธ The Layered Memory Architecture
+To prevent context contamination and keep prompt sizes manageable, I separated memory into distinct tiers:
+
+```mermaid
+flowchart LR
+ classDef st fill:#fef3c7,stroke:#b45309,stroke-width:2px,color:#000
+ classDef mt fill:#dbeafe,stroke:#1d4ed8,stroke-width:2px,color:#000
+ classDef lt fill:#dcfce7,stroke:#15803d,stroke-width:2px,color:#000
+
+ subgraph ST ["Short-Term (In-Context)"]
+ direction TB
+ Conv["๐ฌ Conversation"]:::st
+ Session["๐ Scratchpad"]:::st
+ end
+
+ subgraph MT ["Mid-Term (SQLite Task Store)"]
+ direction TB
+ Task["โ๏ธ DAG State"]:::mt
+ Reflect["๏ฟฝ Reflections"]:::mt
+ end
+
+ subgraph LT ["Long-Term (Persistent)"]
+ direction TB
+ RAG["๐ฎ Semantic RAG"]:::lt
+ Struct["๐พ Structured Data"]:::lt
+ end
+
+ ST -->|Initiates tasks| MT
+ MT -->|Queries knowledge| LT
+ LT -.->|Injects context| ST
+```
+
+---
+
+## ๐ When is each memory used?
+
+- **Short-Term (Conversation & Session):**
+ - **When:** Active chatting, holding the immediate goal, fast active reasoning.
+ - **Lifecycle:** Evicted rapidly to save prompt context.
+- **Mid-Term (Task Memory & State - SQLite):**
+ - **When:** Tracking multi-step execution graphs (DAGs), pausing/resuming tasks, storing critic reflections & retry counts.
+ - **Lifecycle:** Persists across agent loops; prevents the agent from getting stuck in circles.
+- **Long-Term (Vector DB & File System):**
+ - **When:** Finding unseen documents or entire codebase structures based on semantic meaning.
+ - **Lifecycle:** Permanent; grows over time.
+
+---
+
+## ๐ง Long-Term Memory: RAG from Scratch
+- Local LLMs have finite (and hardware-bound) context windows.
+- You can't simply fit an entire large codebase into a localized 8k context window.
+- **Building RAG (Retrieval-Augmented Generation):**
+ - Used `sqlite-vss` for K-Nearest Neighbors (KNN) vector search natively inside SQLite.
+ - Implementing fallback to dot-product calculations if the VSS extension is unavailable.
+ - Generating and storing embeddings locally to retrieve only the relevant functions immediately needed.
+
+```mermaid
+graph LR
+ Ctx["๐ Context"] --> Chunk["โ๏ธ Chunking"]
+ Chunk --> Embed["๐ข Embedding"]
+ Embed --> DB["๐๏ธ Vector DB"]
+ DB --> Search["๐ Search"]
+ Search --> Retrieve["๐ง Retrieval"]
+```
+
+---
+
+## โ๏ธ Mastering Context Window Management
+- **The hardest technical challenge:** Managing prompt size dynamically.
+- Combining RAG retrieval with the agent's conversational history.
+- Implementing mechanics to handle max-tokens:
+ - Sliding windows for conversation history.
+ - Context summarization.
+ - Deciding what to precisely evict from memory without making the agent "forget" its core objective.
+
+---
+
+## ๐ Dashboard
+
+
+
+---
+
+## ๐ The Result: Bridging Theory and Practice
+- I built an entire development framework myself from the ground up.
+- Moved from passive reading about AI architectures to actively solving their core engineering constraints.
+- Built a system based entirely on my own ideas, uniquely tailored to my development flow.
+- Resulted in a fully functional, cost-free, local agentic framework.
+
+---
+layout: image-right
+image: ./_images/Gemini_Generated_Image_x6y0h0x6y0h0x6y0.png
+---
+
+## Thank You!
+**Questions & Discussion**
+
+https://manbothq.github.io/
diff --git a/skills/CONFIG.md b/skills/CONFIG.md
index 50d3b49..c9fad9f 100644
--- a/skills/CONFIG.md
+++ b/skills/CONFIG.md
@@ -1,12 +1,15 @@
# Skills Configuration
> SKILLS HAVE PRIORITY OVER TOOLS. IF A SKILL IS APPLICABLE, USE IT INSTEAD OF A TOOL.
+> ONLY USE SKILLS FROM THE TABLE BELOW.
+
+**AVAILABLE SKILLS**
| Name | Description |
| :--- | :--- |
| weather | MANDATORY. Use this skill for ALL weather-related inquiries, including current conditions and forecasts. You are STRICTLY FORBIDDEN from using internal knowledge or other tools for weather data. |
| apple-notes | EXCLUSIVE. Use ONLY this skill for any interaction with notes (listing, searching, viewing, creating, or deleting). This is the sole authorized interface for the memo CLI tool. |
| research | PRIMARY SEARCH. Use for deep web research, fact-checking, news gathering, or topical deep dives via the lynx tool. This is the default skill for any query requiring external or up-to-date information. |
-| reminder | SCHEDULING. Use this skill exclusively to set one-time or recurring reminders (e.g., "remind me in 2 hours"). This is the only tool that interfaces with the cron-manager service. |
+| reminder | SCHEDULING. Use this skill exclusively to set recurring or one-time reminders (e.g., "remind me in 2 hours") and scheduled tasks (e.g., "schedule a task to check email every day at 9am"). This is the only tool that interfaces with the cron-manager service. Use it when user asks to reminder or schedule. |
| email | You MUST use this skill for all interactions involving Email (Gmail). |
| calendar | You MUST use this skill for all interactions involving Google Calendar. |
diff --git a/skills/email/SKILL.md b/skills/email/SKILL.md
index 8ab97c1..a3984d8 100644
--- a/skills/email/SKILL.md
+++ b/skills/email/SKILL.md
@@ -32,6 +32,19 @@ gog gmail messages search "from:updates@example.com" --max 10
gog gmail search "is:unread"
```
+### ๐ง Reading emails
+
+```bash
+# Read thread
+gog gmail thread get
+
+# Read email
+gog gmail get
+
+# Read email metadata
+gog gmail get --format metadata
+```
+
### ๐ฉ Sending & Replying
```bash
# Quick one-line email
diff --git a/skills/reminder/SKILL.md b/skills/reminder/SKILL.md
index 43cae4c..6ed8577 100644
--- a/skills/reminder/SKILL.md
+++ b/skills/reminder/SKILL.md
@@ -19,23 +19,25 @@ Set up one-time or recurring reminders for the user.
1. **Extract the Task**: Identify what the user wants to be reminded about.
2. **Extract the Time**: Identify the temporal expression (e.g., "in 2 hours", "every day at 8am").
-3. **Schedule**: Call the `schedule_reminder` tool with the extracted time and message.
+3. **Schedule**: Call the `schedule_reminder` tool with the extracted time, message, and `isAction` flag.
+4. **User instructions**: If user's request contains instructions and actions for YOU to DO something (e.g., "check email", "search the web"), set `isAction: true` and include the instructions in the `message`. If it's just a passive text reminder to the user to do something themselves, omit `isAction` or set it to `false`.
## Tool: schedule_reminder
**Arguments**:
- `time`: (string) Natural language time expression (e.g., "in 5 minutes", "tomorrow at 3pm", "every Monday").
-- `message`: (string) The content of the reminder.
+- `message`: (string) The content of the reminder or the instruction for the action to take.
+- `isAction`: (boolean, optional) Set to `true` if the reminder requires YOU (the AI) to execute a task, such as checking emails or searching the web. Set to `false` or omit if it's just a text reminder for the user.
## Strategy
-- Be precise with the `message`. If the user says "remind me to drink water", the message should be "Drink water".
+- Be precise with the `message`. If user's request contains instructions for the AI to perform an action, include them **ALL** into the reminder message and be SURE to set `isAction: true`.
- If the user provides a vague time, ask for clarification if necessary, or use your best judgment (e.g., "later today" could be "in 4 hours").
- The system will automatically handle parsing the natural language `time` string into a cron expression.
## Example Workflow
-User Goal: "remind me to call Mom in 20 minutes"
+User Goal: "check my email and mark spam in 20 minutes"
-1. Call `schedule_reminder(time="in 20 minutes", message="Call Mom")`.
-2. Respond to the user: "Sure! I'll remind you to Call Mom in 20 minutes."
+1. Call `schedule_reminder(time="in 20 minutes", message="Check inbox for new email. Mark spam", isAction=true)`.
+2. Respond to the user: "Sure! Your email will be checked in 20 minutes."
diff --git a/skills/weather/SKILL.md b/skills/weather/SKILL.md
index fbbd479..70b10de 100644
--- a/skills/weather/SKILL.md
+++ b/skills/weather/SKILL.md
@@ -2,6 +2,10 @@
Get current weather conditions and forecasts.
+## IMPORTANT!
+
+Only use shell tool and `curl`
+
## When to Use
โ
**USE this skill when:**
diff --git a/src/adapters/telegram-adapter.ts b/src/adapters/telegram-adapter.ts
index f6d7246..4342146 100644
--- a/src/adapters/telegram-adapter.ts
+++ b/src/adapters/telegram-adapter.ts
@@ -75,15 +75,21 @@ function getAllowedUserIds(): Set | null {
}
/**
- * Escape special characters for Telegram MarkdownV2 format.
- * According to Telegram API docs, these characters must be escaped: _ * [ ] ( ) ~ ` > # + - = | { } . !
- * Inside (...) of a [link](url) only \ and ) must be escaped.
- * We also escape \ globally as it's the escape character itself.
+ * Escape HTML special characters for Telegram HTML parse mode.
+ * Only <, > and & need escaping in Telegram HTML.
*/
-function escapeMarkdownV2(text: string): string {
- // Characters that need to be escaped in MarkdownV2
- const specialChars = /([\\_*\[\]()~`>#+\-=|{}.!])/g;
- return text.replace(specialChars, "\\$1");
+function escapeHtml(text: string): string {
+ return text
+ .replace(/&/g, "&")
+ .replace(//g, ">");
+}
+
+/**
+ * Strip HTML tags from text for plain-text fallback.
+ */
+function stripHtmlTags(text: string): string {
+ return text.replace(/<[^>]*>/g, "");
}
function createEnvelope(type: string, to: string, payload: T): Envelope {
@@ -137,16 +143,42 @@ function main(): void {
chatId: number,
text: string,
options?: TelegramBot.SendMessageOptions,
- originalText?: string
+ originalText?: string,
+ isHtmlContent = false,
+ retryCount = 0
): Promise {
+ const MAX_RETRIES = 3;
const messageText = text?.trim() ? text : "[EMPTY_RESPONSE]";
+
+ const finalOptions: TelegramBot.SendMessageOptions = {
+ ...options,
+ parse_mode: "HTML" as any
+ };
+
+ // If this is NOT LLM/HTML content (i.e. system messages), HTML-escape it
+ const finalText = isHtmlContent ? messageText : escapeHtml(messageText);
+
try {
- await bot.sendMessage(chatId, messageText, options);
+ await bot.sendMessage(chatId, finalText, finalOptions);
} catch (err: any) {
+ // transient errors: retry
+ const isTransient =
+ err.code === 'ECONNRESET' ||
+ err.code === 'ETIMEDOUT' ||
+ err.code === 'EFATAL' ||
+ err.message?.includes("socket hang up");
+
+ if (isTransient && retryCount < MAX_RETRIES) {
+ const delay = 1000 * Math.pow(2, retryCount);
+ console.warn(`[telegram-adapter] Transient error (${err.code || err.message}), retrying in ${delay}ms... (attempt ${retryCount + 1})`);
+ await new Promise(resolve => setTimeout(resolve, delay));
+ return sendToUser(chatId, text, options, originalText, isHtmlContent, retryCount + 1);
+ }
+
// If error is related to parsing entities, retry with plain text
if (err.response?.body?.description?.includes("can't parse entities")) {
- console.warn(`Telegram fallback: Failed to parse entities, retrying as plain text. Error: ${err.response.body.description}`);
- const fallbackText = (originalText?.trim() ? originalText : messageText);
+ console.warn(`Telegram fallback: Failed to parse HTML entities, retrying as plain text. Error: ${err.response.body.description}`);
+ const fallbackText = stripHtmlTags(originalText?.trim() ? originalText : messageText);
await bot.sendMessage(chatId, fallbackText, { ...options, parse_mode: undefined }).catch((innerErr) => {
console.error("Telegram critical send error (fallback failed):", innerErr);
});
@@ -408,13 +440,11 @@ function main(): void {
if (envelope.type === "telegram.send") {
const pl = envelope.payload as TelegramSendPayload;
if (typeof pl.chatId === "number" && typeof pl.text === "string") {
- // Escape text ONLY if explicitly requested MarkdownV2
- const escapedText = pl.parseMode === "MarkdownV2" ? escapeMarkdownV2(pl.text) : pl.text;
const opts: TelegramBot.SendMessageOptions = {
parse_mode: pl.parseMode as any,
...(pl.silent === true && { disable_notification: true }),
};
- sendToUser(pl.chatId, escapedText, opts, pl.text);
+ sendToUser(pl.chatId, pl.text, opts, pl.text, true);
}
return;
}
@@ -423,11 +453,10 @@ function main(): void {
if (envelope.type === "telegram.progress") {
const pl = envelope.payload as TelegramProgressPayload;
if (typeof pl.chatId === "number" && typeof pl.text === "string") {
- const escapedText = pl.parseMode === "MarkdownV2" ? escapeMarkdownV2(pl.text) : pl.text;
const opts: TelegramBot.SendMessageOptions = {
parse_mode: pl.parseMode as any,
};
- sendToUser(pl.chatId, escapedText, opts, pl.text);
+ sendToUser(pl.chatId, pl.text, opts, pl.text, true);
}
return;
}
@@ -438,21 +467,20 @@ function main(): void {
if (pl.status === "success" && pl.result && typeof pl.result === "object") {
const r = pl.result as { chatId?: number; text?: string; reminders?: unknown[]; message?: string; parseMode?: "HTML" | "Markdown" | "MarkdownV2" };
if (typeof r.chatId === "number" && typeof r.text === "string") {
- const escapedText = r.parseMode === "MarkdownV2" ? escapeMarkdownV2(r.text) : r.text;
const opts: TelegramBot.SendMessageOptions = {
parse_mode: r.parseMode as any,
};
- sendToUser(r.chatId, escapedText, opts, r.text);
+ sendToUser(r.chatId, r.text, opts, r.text, true);
} else if (typeof r.chatId === "number" && r.reminders) {
// Handle reminder list response
const reminders = r.reminders as Array<{ id: string; cronExpr: string; reminderMessage?: string }>;
if (reminders.length === 0) {
- sendToUser(r.chatId, "No active reminders.");
+ sendToUser(r.chatId, "๐ซ No active reminders.");
} else {
const formatted = reminders
.map((rem) => `ID: ${rem.id}\nTime: ${rem.cronExpr}\nMessage: ${rem.reminderMessage ?? "N/A"}`)
.join("\n\n---\n\n");
- sendToUser(r.chatId, `Active reminders:\n\n${formatted}`);
+ sendToUser(r.chatId, `โฐ Active reminders:\n\n${formatted}`);
}
} else if (typeof r.chatId === "number" && r.message) {
sendToUser(r.chatId, r.message);
diff --git a/src/agents/executor-agent.ts b/src/agents/executor-agent.ts
index 0d46f2c..60d367e 100644
--- a/src/agents/executor-agent.ts
+++ b/src/agents/executor-agent.ts
@@ -7,8 +7,8 @@
*/
const MAX_CONCURRENT_NODES = 5;
-const MAX_REVISION_CYCLES = 10;
-const MAX_SKILL_TURNS = 15;
+const MAX_REVISION_CYCLES = 20;
+const MAX_SKILL_TURNS = 100;
const SKILL_TOOLS: any[] = [
{
@@ -49,7 +49,8 @@ const SKILL_TOOLS: any[] = [
type: "object",
properties: {
time: { type: "string", description: "When to remind (e.g., 'in 2 hours', 'every Monday at 9am', 'tomorrow at 3pm')" },
- message: { type: "string", description: "The content of the reminder (what to remind about)" }
+ message: { type: "string", description: "The content of the reminder (what to remind about) or the instruction for an action." },
+ isAction: { type: "boolean", description: "Set to true if you are scheduling a task that requires you (the AI assistant) to execute an action (e.g., 'check email', 'search web'). Omit or set to false if it's just a text reminder for the user." }
},
required: ["time", "message"]
}
@@ -642,6 +643,7 @@ export class ExecutorAgent extends BaseProcess {
): Promise {
const input = node.input ?? {};
const nodeInput = input as Record;
+ const isAction = nodeInput.isAction === true || nodeInput.isAction === "true";
// Extract cronExpr from input or dependency output
let cronExpr = nodeInput.cronExpr as string | undefined;
@@ -800,7 +802,7 @@ export class ExecutorAgent extends BaseProcess {
version: PROTOCOL_VERSION,
payload: {
cronExpr,
- taskType: "reminder",
+ taskType: isAction ? "ai_query" : "reminder",
payload: {
chatId: typeof chatId === "string" ? parseInt(chatId, 10) : chatId,
reminderMessage,
diff --git a/src/agents/prompts/analyzer.ts b/src/agents/prompts/analyzer.ts
index 4843527..361bf70 100644
--- a/src/agents/prompts/analyzer.ts
+++ b/src/agents/prompts/analyzer.ts
@@ -1,36 +1,27 @@
/**
* System prompts for the Analyzer role.
- * Optimized for Telegram Markdown V2 and natural language synthesis.
+ * Optimized for Telegram HTML formatting and natural language synthesis.
*/
+import { TELEGRAM_HTML_FORMAT_INSTRUCTION } from "./telegram-html.js";
+
export const ANALYZER_SYSTEM_PROMPT = `
-Professional Data Analyst and Assistant.
+Your name is \`๐งฌ ManBot\`. You are a Professional Data Analyst and Assistant.
Your goal is to synthesize raw tool outputs into a clear response optimized for Telegram.
-${new Date().toISOString()}
+${new Date().toISOString()}
-## TELEGRAM FORMATTING RULES:
-1. **No Headers**: Do NOT use "# Header". Instead, use **BOLD UPPERCASE** for titles.
-2. **No Tables**: Markdown tables are not supported. Use structured bullet points (โข) or bold lists.
-3. **Strict Syntax**:
- - *Bold*: *text* or **text**
- - _Italic_: _text_
- - \`Code\`: \`inline code\` or \`\`\`language\n pre-formatted block \`\`\`
- - > Quotes: Use for highlighting important information or citations.
- - For simple charts or graphs, use \`\`\`language\n pre-formatted block \`\`\`.
-4. **Links**: Use [title](url) syntax.
-
## ANALYSIS GUIDELINES:
-- **Synthesize**: Combine multiple sources. Identify patterns or contradictions.
-- **Accuracy**: If data is missing or tools failed, explain this clearly using bold warnings.
-- **Tone**: Professional, direct, and conversational. Avoid "As an AI..." or "Here is the data...".
+- Synthesize: Combine multiple sources. Identify patterns or contradictions.
+- Accuracy: If data is missing or tools failed, explain this clearly using bold warnings.
+- Tone: Friendly, direct, and conversational. Avoid "As an AI..." or "Here is the data...".
-Output: Pure Telegram Markdown V2.
-No raw JSON/HTML unless requested.
+${TELEGRAM_HTML_FORMAT_INSTRUCTION}
+Output: Telegram HTML only. NEVER use Markdown (replace with allowed tags or remove). NEVER use raw JSON.
`;
/**
@@ -40,5 +31,5 @@ export function buildAnalyzerUserPrompt(goal: string, context: string): string {
if (!context || !context.trim()) {
return `Respond to the user goal directly:\n\n${goal}`;
}
- return `User Goal: ${goal}\n\nData Context:\n${context}\n\nTask: Synthesize the data to answer the goal. Use Telegram MarkdownV2 (no headers/tables).`;
+ return `User Goal: ${goal}\n\nData Context:\n${context}\n\nTask: Synthesize the data to answer the goal. Use Telegram HTML formatting (no markdown, no tables).`;
}
diff --git a/src/agents/prompts/critic.ts b/src/agents/prompts/critic.ts
index 72599bd..3c7abeb 100644
--- a/src/agents/prompts/critic.ts
+++ b/src/agents/prompts/critic.ts
@@ -9,9 +9,19 @@ You are skeptical and detail-oriented. Your mission is to audit the "Draft Outpu
## CRITICAL AUDIT DIMENSIONS:
1. **Telegram Syntax (MANDATORY)**:
- - REJECT (REVISE) if the output contains "#" headers.
- - REJECT (REVISE) if the output contains markdown tables.
- - CHECK for broken markdown tags.
+ - REJECT (REVISE) if the output contains unsupported syntax.
+ **Supported HTML tags**
+ - Bold: text
+ - Italic: text
+ - Underline: text
+ - Strikethrough: text
+ - Spoiler: text
+ - Links: text
+ - Inline code: text
+ - Code block: code block
+ - Block quote: quote
+ - Expandable Block quote (for long quotes): quote
+ - Code block with language: code
2. **Factuality**: Flag any hallucinations or "invented" facts.
3. **Completeness**: If the user asked for 5 items and got 3, it is a REVISE.
4. **Safety**: Ensure no harmful or toxic content.
@@ -58,7 +68,7 @@ ${safeDraft}
Evaluate STRICTLY.
Check for:
-- Telegram syntax (no headers, no tables).
+- Telegram syntax (no headers, no tables, no unsupported tags).
- Factuality.
- Completeness.
- Safety.
diff --git a/src/agents/prompts/planner.ts b/src/agents/prompts/planner.ts
index e99aa73..7ed6dfe 100644
--- a/src/agents/prompts/planner.ts
+++ b/src/agents/prompts/planner.ts
@@ -7,7 +7,7 @@ export const PLANNER_SYSTEM_PROMPT = `Strategic Execution Planner
IF you can fulfill the user's goal using ONLY your internal knowledge (e.g., greetings, simple math, general questions, "think of X"):
-- Create exactly ONE node: { "id": "direct-answer", "type": "generate_text", "service": "model-router", "input": { "prompt": "ANSWER_GOAL" } }.
+- Create exactly ONE node: { "id": "direct-answer", "type": "generate_text", "service": "model-router", "input": { "prompt": "ANSWER_GOAL", "system_prompt": "analyzer" } }.
- DO NOT use any tools.
ELSE:
- Proceed with creating a Capability Graph.
@@ -270,7 +270,10 @@ ${Object.entries(process.env)
const base = `${PLANNER_SYSTEM_PROMPT}
${skillsSection}
${PLANNER_FEW_SHOT_EXAMPLES}
-${now}
+
+OPERATE ONLY WITH THIS DATE IN YOUR PLANS!
+Right now: ${now}
+
${options?.conversationHistory ? `History Context: ${options.conversationHistory}` : ""}
User Goal: ${userMessage}
diff --git a/src/agents/prompts/telegram-html.ts b/src/agents/prompts/telegram-html.ts
new file mode 100644
index 0000000..742cbba
--- /dev/null
+++ b/src/agents/prompts/telegram-html.ts
@@ -0,0 +1,35 @@
+/**
+ * Shared Telegram HTML formatting instruction.
+ * Reusable constant that can be injected into any LLM system prompt
+ * to ensure output uses only Telegram-supported HTML tags.
+ */
+
+export const TELEGRAM_HTML_FORMAT_INSTRUCTION = `## TELEGRAM HTML FORMATTING RULES:
+You MUST format your output using Telegram-supported HTML tags. Do NOT use Markdown syntax.
+
+1. **No Markdown**: Do NOT use *, **, _, ~~, \`, #, or any Markdown syntax. Use HTML tags only.
+2. **No Tables**: HTML tables are not supported by Telegram. Use structured bullet points (โข) or bold lists.
+3. **Supported HTML tags**:
+ - Bold: text
+ - Italic: text
+ - Underline: text
+ - Strikethrough: text
+ - Spoiler: text
+ - Links: text
+ - Inline code: text
+ - Code block: code block
+ - Block quote: quote
+ - Expandable Block quote (for long quotes): quote
+ - Code block with language: code
+4. **Special characters**: The characters <, > and & must be replaced with <, > and & respectively when used as literals (not as part of HTML tags).
+5. **Line breaks**: Use regular line breaks (newlines). Do NOT use
tags.`;
+
+/**
+ * Default system prompt for LLM calls that need Telegram HTML formatting
+ * but don't have a specialized system prompt (e.g., direct-answer nodes).
+ */
+export const DEFAULT_TELEGRAM_SYSTEM_PROMPT = `You are a helpful assistant. Respond clearly and concisely.
+
+${TELEGRAM_HTML_FORMAT_INSTRUCTION}
+
+Output: Telegram HTML only. No Markdown (Replace with allowed tags or remove). No raw JSON.`;
diff --git a/src/core/orchestrator.ts b/src/core/orchestrator.ts
index 1eb42f9..2ab4211 100644
--- a/src/core/orchestrator.ts
+++ b/src/core/orchestrator.ts
@@ -302,7 +302,7 @@ export class Orchestrator {
if (chatId != null && conversationId != null) {
this.runArchivingPipeline(chatId, conversationId).catch((err) => {
ConsoleLogger.error("core", "Archiving pipeline error", err instanceof Error ? err : String(err), envelope);
- this.sendToTelegram(chatId, `Archiving failed: ${err instanceof Error ? err.message : String(err)}`);
+ this.sendToTelegram(chatId, `๐ Archiving failed: ${err instanceof Error ? err.message : String(err)}`);
});
}
return;
@@ -313,7 +313,7 @@ export class Orchestrator {
ConsoleLogger.info("core", `Handling reminder.list request for chatId: ${chatId}`, envelope);
this.handleListReminders(chatId, envelope).catch((err) => {
ConsoleLogger.error("core", "List reminders error", err instanceof Error ? err.message : String(err), envelope);
- this.sendToTelegram(chatId, `Error listing reminders: ${err instanceof Error ? err.message : String(err)}`);
+ this.sendToTelegram(chatId, `๐ Error listing reminders: ${err instanceof Error ? err.message : String(err)}`);
});
} else {
ConsoleLogger.warn("core", "reminder.list missing chatId", envelope);
@@ -326,7 +326,7 @@ export class Orchestrator {
if (chatId != null && reminderId != null) {
this.handleCancelReminder(chatId, reminderId, envelope).catch((err) => {
ConsoleLogger.error("core", "Cancel reminder error", err instanceof Error ? err.message : String(err), envelope);
- this.sendToTelegram(chatId, `Error canceling reminder: ${err instanceof Error ? err.message : String(err)}`);
+ this.sendToTelegram(chatId, `๐ Error canceling reminder: ${err instanceof Error ? err.message : String(err)}`);
});
}
return;
@@ -354,7 +354,7 @@ export class Orchestrator {
const p = payload as unknown as FileIngestPayload;
this.handleFileIngest(p).catch((err) => {
ConsoleLogger.error("core", "File ingest error", err instanceof Error ? err : String(err), envelope);
- this.sendToTelegram(p.chatId, `File processing error: ${err instanceof Error ? err.message : String(err)}`);
+ this.sendToTelegram(p.chatId, `๐ File processing error: ${err instanceof Error ? err.message : String(err)}`);
});
return;
}
@@ -418,7 +418,7 @@ export class Orchestrator {
const executor = this.children.get("executor");
const telegram = this.children.get("telegram-adapter");
if (!planner?.stdin.writable || !taskMemory?.stdin.writable || !executor?.stdin.writable || !telegram?.stdin.writable) {
- this.sendToTelegram(chatId, "Service unavailable.");
+ this.sendToTelegram(chatId, "๐ Service unavailable.");
return;
}
@@ -587,7 +587,7 @@ export class Orchestrator {
text = rawData;
}
}
- this.sendToTelegram(chatId, text, false, "Markdown");
+ this.sendToTelegram(chatId, text, false, "HTML");
return;
}
@@ -713,7 +713,7 @@ export class Orchestrator {
} else {
// Everything was ignored or failed with no caption
if (warnings.length > 0) {
- this.sendToTelegram(chatId, "No processable content found in the uploaded files.", true);
+ this.sendToTelegram(chatId, "โ ๏ธ No processable content found in the uploaded files.", true);
}
return;
}
@@ -806,14 +806,14 @@ export class Orchestrator {
const modelRouter = this.children.get("model-router");
const ragService = this.children.get("rag-service");
if (!taskMemory?.stdin.writable || !modelRouter?.stdin.writable || !ragService?.stdin.writable) {
- this.sendToTelegram(chatId, "Service unavailable for archiving.");
+ this.sendToTelegram(chatId, "โ ๏ธ Service unavailable for archiving.");
return;
}
let tasksEnv: Envelope;
try {
tasksEnv = await this.sendAndWait(taskMemory, "task.getByConversationId", { conversationId });
} catch {
- this.sendToTelegram(chatId, "Archived."); // no history or error
+ this.sendToTelegram(chatId, "โ
Archived."); // no history or error
return;
}
const tasksPayload = tasksEnv.payload as { status?: string; result?: { tasks?: Array<{ id: string; goal: string; status: string }> } };
@@ -860,7 +860,7 @@ export class Orchestrator {
});
} catch (errEnv) {
const err = errEnv as Envelope & { payload?: { message?: string } };
- this.sendToTelegram(chatId, `Archiving failed: ${err.payload?.message ?? "Summarization error"}`);
+ this.sendToTelegram(chatId, `๐ Archiving failed: ${err.payload?.message ?? "Summarization error"}`);
return;
}
const summaryPayload = summaryEnv.payload as { status?: string; result?: { text?: string } };
@@ -912,7 +912,7 @@ export class Orchestrator {
return;
}
- this.sendToTelegram(chatIdNum, formattedMessage);
+ this.sendToTelegram(chatIdNum, formattedMessage, false, "HTML");
}
private handleCronAIQueryEvent(envelope: Envelope): void {
@@ -948,7 +948,7 @@ export class Orchestrator {
const cronManager = this.children.get("cron-manager");
if (!cronManager?.stdin.writable) {
ConsoleLogger.warn("core", "Cron manager not available or not writable");
- this.sendToTelegram(chatId, "Cron manager service unavailable.");
+ this.sendToTelegram(chatId, "โ ๏ธ Cron manager service unavailable.");
return;
}
@@ -975,7 +975,7 @@ export class Orchestrator {
if (reminderSchedules.length === 0) {
ConsoleLogger.info("core", "No reminders found, sending 'No active reminders' message");
- this.sendToTelegram(chatId, "No active reminders.");
+ this.sendToTelegram(chatId, "๐๐ป No active reminders.");
return;
}
@@ -986,18 +986,18 @@ export class Orchestrator {
.join("\n\n---\n\n");
const message = `Active reminders:\n\n${formatted}`;
ConsoleLogger.info("core", `Sending reminder list to chatId ${chatId}: ${message.substring(0, 100)}...`);
- this.sendToTelegram(chatId, message);
+ this.sendToTelegram(chatId, message, false, "HTML");
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
ConsoleLogger.error("core", `Error in handleListReminders: ${message}`, err instanceof Error ? err : undefined);
- this.sendToTelegram(chatId, `Error listing reminders: ${message}`);
+ this.sendToTelegram(chatId, `๐จ Error listing reminders: ${message}`);
}
}
private async handleCancelReminder(chatId: number, reminderId: string, _request: Envelope): Promise {
const cronManager = this.children.get("cron-manager");
if (!cronManager?.stdin.writable) {
- this.sendToTelegram(chatId, "Cron manager service unavailable.");
+ this.sendToTelegram(chatId, "โ ๏ธ Cron manager service unavailable.");
return;
}
@@ -1005,13 +1005,13 @@ export class Orchestrator {
const response = await this.sendAndWait(cronManager, "cron.schedule.remove", { id: reminderId });
const responsePayload = response.payload as { status?: string; result?: { removed?: string } };
if (responsePayload.result?.removed === reminderId) {
- this.sendToTelegram(chatId, `Reminder ${reminderId} has been canceled.`);
+ this.sendToTelegram(chatId, `๐ข Reminder ${reminderId} has been canceled.`);
} else {
- this.sendToTelegram(chatId, `Reminder ${reminderId} not found.`);
+ this.sendToTelegram(chatId, `๐จ Reminder ${reminderId} not found.`);
}
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
- this.sendToTelegram(chatId, `Error canceling reminder: ${message}`);
+ this.sendToTelegram(chatId, `๐จ Error canceling reminder: ${message}`);
}
}
@@ -1055,12 +1055,13 @@ export class Orchestrator {
return chunks;
}
- private sendToTelegram(chatId: number, text: string, silent?: boolean, parseMode?: "HTML" | "Markdown" | "MarkdownV2"): void {
+ private sendToTelegram(chatId: number, text: string, silent?: boolean, parseMode?: "HTML" | "Markdown"): void {
const telegram = this.children.get("telegram-adapter");
if (!telegram?.stdin.writable) return;
- // Split message if it's too long (Telegram limit is 4096 characters)
- const chunks = this.splitMessage(text, 4000); // Use 4000 to leave room for continuation markers
+ // Split message if it's too long (Telegram limit is 4096 characters).
+ // We use a safe buffer (3000) to account for MarkdownV2 escaping growth and continuation markers.
+ const chunks = this.splitMessage(text, 4000);
chunks.forEach((chunk, index) => {
let messageText = chunk;
@@ -1083,7 +1084,7 @@ export class Orchestrator {
to: "telegram-adapter",
type: "telegram.send",
version: "1.0",
- payload: { chatId, text: messageText, silent, parseMode },
+ payload: { chatId, text: messageText, silent, parseMode: parseMode ?? "HTML" },
};
telegram.stdin.write(JSON.stringify(envelope) + "\n");
ConsoleLogger.ipc("core", "โ", envelope);
diff --git a/src/services/browser-config.ts b/src/services/browser-config.ts
index b54bc7e..ec6e216 100644
--- a/src/services/browser-config.ts
+++ b/src/services/browser-config.ts
@@ -1,3 +1,5 @@
+import * as os from "os";
+
/**
* Browser configuration utilities for realistic browser behavior and bot detection bypass.
* Provides user agents, viewport sizes, and stealth plugin configuration.
@@ -7,34 +9,29 @@
* Realistic user agents for different browsers and operating systems.
* Updated with recent browser versions (2024-2026) to avoid detection.
*/
-export const USER_AGENTS = [
- // Chrome on Windows
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
-
- // Chrome on macOS
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_6_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
-
- // Chrome on Linux
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
- "Mozilla/5.0 (X11; Ubuntu; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
-
- // Firefox on Windows
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0",
- "Mozilla/5.0 (Windows NT 11.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0",
-
- // Firefox on macOS
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0",
-
- // Safari on macOS
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_6_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15",
-
- // Edge on Windows
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",
-] as const;
+export const USER_AGENTS = {
+ windows: [
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
+ "Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0",
+ "Mozilla/5.0 (Windows NT 11.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0",
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",
+ ],
+ macos: [
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_6_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0",
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15",
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_6_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15",
+ ],
+ linux: [
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+ "Mozilla/5.0 (X11; Ubuntu; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
+ ]
+} as const;
+
+export type OSGroup = keyof typeof USER_AGENTS;
/**
* Common viewport sizes matching real-world screen resolutions.
@@ -52,14 +49,32 @@ export const VIEWPORTS = [
] as const;
/**
- * Returns a random user agent from the predefined list.
+ * Detects the user's operating system.
+ */
+export function getUserOS(): OSGroup | "unknown" {
+ const platform = os.platform();
+ if (platform === "win32") return "windows";
+ if (platform === "darwin") return "macos";
+ if (platform === "linux") return "linux";
+ return "unknown";
+}
+
+/**
+ * Returns a random user agent from the predefined list corresponding to the user's OS.
* This helps avoid detection by rotating through different browser fingerprints.
+ * If the OS is unknown, it defaults to Windows user agents.
*
* @returns A random user agent string
*/
export function getRandomUserAgent(): string {
- const index = Math.floor(Math.random() * USER_AGENTS.length);
- return USER_AGENTS[index]!;
+ let osGroup = getUserOS();
+ if (osGroup === "unknown") {
+ osGroup = "windows";
+ }
+
+ const agents = USER_AGENTS[osGroup as OSGroup];
+ const index = Math.floor(Math.random() * agents.length);
+ return agents[index]!;
}
/**
@@ -92,7 +107,7 @@ export function getRandomViewport(): { width: number; height: number } {
export const STEALTH_CONFIG = {
// Enable all stealth features
enabled: true,
-
+
// Additional configuration can be passed to the stealth plugin
// The plugin will automatically apply various evasion techniques
} as const;
diff --git a/src/services/browser-service.ts b/src/services/browser-service.ts
index 16848b5..87f5583 100644
--- a/src/services/browser-service.ts
+++ b/src/services/browser-service.ts
@@ -128,7 +128,7 @@ export class BrowserService extends BaseProcess {
userAgent: userAgent,
// Additional stealth settings
locale: "en-US",
- timezoneId: "America/New_York",
+ timezoneId: "Europe/Amsterdam",
// Disable automation indicators
ignoreHTTPSErrors: true,
// Realistic browser headers
@@ -309,6 +309,7 @@ export class BrowserService extends BaseProcess {
// Realistic browser behavior
"--lang=en-US",
"--disable-extensions",
+ "--headless=new",
];
if (this.config.userDataDir) {
@@ -322,7 +323,7 @@ export class BrowserService extends BaseProcess {
userAgent: userAgent,
// Additional stealth settings
locale: "en-US",
- timezoneId: "America/New_York",
+ timezoneId: "Europe/Amsterdam",
ignoreHTTPSErrors: true,
extraHTTPHeaders: {
"Accept-Language": "en-US,en;q=0.9",
diff --git a/src/services/generator-service.ts b/src/services/generator-service.ts
index 9761743..7bf4fff 100644
--- a/src/services/generator-service.ts
+++ b/src/services/generator-service.ts
@@ -11,6 +11,10 @@ import { PROTOCOL_VERSION } from "../shared/protocol.js";
import { responsePayloadSchema } from "../shared/protocol.js";
import { buildSummarizerPrompt, SUMMARIZER_SYSTEM_PROMPT } from "../agents/prompts/summarizer.js";
import { ANALYZER_SYSTEM_PROMPT, buildAnalyzerUserPrompt } from "../agents/prompts/analyzer.js";
+import { DEFAULT_TELEGRAM_SYSTEM_PROMPT, TELEGRAM_HTML_FORMAT_INSTRUCTION } from "../agents/prompts/telegram-html.js";
+
+/** Inline reminder appended to user prompts so the LLM sees formatting rules in the most prominent position. */
+const HTML_PROMPT_SUFFIX = `\n\n${TELEGRAM_HTML_FORMAT_INSTRUCTION}\n\nNEVER use Markdown formatting. NEVER use plain JSON.`;
import { LemonadeAdapter, type ChatMessage } from "./lemonade-adapter.js";
import { ModelRouter } from "./model-router.js";
import { ModelManagerService, type ModelTier } from "./model-manager.js";
@@ -108,6 +112,10 @@ export class GeneratorService extends BaseProcess {
} else {
prompt = p.input.prompt;
}
+ // Append HTML reminder unless a specialized system prompt handles formatting
+ if (!p.input?.system_prompt) {
+ prompt += HTML_PROMPT_SUFFIX;
+ }
if (typeof p.input?.system_prompt === "string") {
systemPrompt = p.input.system_prompt === "analyzer" ? ANALYZER_SYSTEM_PROMPT : p.input.system_prompt;
// If it's an analyzer prompt, use the specialized user prompt builder
@@ -118,7 +126,8 @@ export class GeneratorService extends BaseProcess {
} else if (goal && (context["_criticFeedback"] != null || context["_previousDraft"] != null)) {
const feedback = context["_criticFeedback"] as string | undefined;
const previous = context["_previousDraft"] as string | undefined;
- prompt = `User goal: ${goal}\n\nPrevious draft:\n${previous ?? ""}\n\nCritic feedback:\n${feedback ?? ""}\n\nProduce an improved draft that addresses the feedback. Output only the improved text.`;
+ prompt = `User goal: ${goal}\n\nPrevious draft:\n${previous ?? ""}\n\nCritic feedback:\n${feedback ?? ""}\n\nProduce an improved draft that addresses the feedback. Output only the improved text.${HTML_PROMPT_SUFFIX}`;
+ if (!systemPrompt) systemPrompt = DEFAULT_TELEGRAM_SYSTEM_PROMPT;
} else if (goal) {
const depOutputs = Object.entries(context)
.filter(([k]) => !k.startsWith("_"))
@@ -143,7 +152,7 @@ export class GeneratorService extends BaseProcess {
// For other objects, stringify
return JSON.stringify(v);
});
- prompt = `User goal: ${goal}\n\nContext from previous steps:\n${depOutputs.join("\n\n")}\n\nProduce a direct response to the goal. Output only the response text.`;
+ prompt = `User goal: ${goal}\n\nContext from previous steps:\n${depOutputs.join("\n\n")}\n\nProduce a direct response to the goal. Output only the response text.${HTML_PROMPT_SUFFIX}`;
} else {
const depOutputs = Object.values(context).map((v) => {
// Extract body from http_get responses
@@ -166,16 +175,17 @@ export class GeneratorService extends BaseProcess {
// For other objects, stringify
return JSON.stringify(v);
});
- prompt = depOutputs.join("\n\n") || "Generate a brief response.";
+ prompt = (depOutputs.join("\n\n") || "Generate a brief response.") + HTML_PROMPT_SUFFIX;
}
if (p.input?.messages && Array.isArray(p.input.messages)) {
messages = p.input.messages as ChatMessage[];
}
if (!messages) {
- messages = systemPrompt
- ? [{ role: "system" as const, content: systemPrompt }, { role: "user" as const, content: prompt }]
- : [{ role: "user" as const, content: prompt }];
+ // Always inject a system prompt to ensure Telegram HTML output.
+ // Falls back to the default Telegram formatting prompt when no specific one is set.
+ const effectiveSystemPrompt = systemPrompt ?? DEFAULT_TELEGRAM_SYSTEM_PROMPT;
+ messages = [{ role: "system" as const, content: effectiveSystemPrompt }, { role: "user" as const, content: prompt }];
}
const genResult = await this.lemonade.chat(messages, model, {
diff --git a/src/services/skill-manager.ts b/src/services/skill-manager.ts
index 41b2c5e..0a163fb 100644
--- a/src/services/skill-manager.ts
+++ b/src/services/skill-manager.ts
@@ -1,6 +1,7 @@
import { readFileSync, existsSync } from "node:fs";
import { join, resolve } from "node:path";
import { getConfig } from "../shared/config.js";
+import { TELEGRAM_HTML_FORMAT_INSTRUCTION } from "../agents/prompts/telegram-html.js";
export interface SkillInfo {
name: string;
@@ -35,13 +36,16 @@ export class SkillManager {
/**
* Load the skill prompt (SKILL.md) for a given skill.
+ * Appends Telegram HTML formatting instructions so skill output
+ * always uses Telegram-supported HTML instead of Markdown.
*/
public getSkillPrompt(name: string): string | null {
const skillPath = join(this.skillsDir, name, "SKILL.md");
if (!existsSync(skillPath)) return null;
try {
- return readFileSync(skillPath, "utf-8");
+ const content = readFileSync(skillPath, "utf-8");
+ return `${content}\n\n## OUTPUT FORMATTING\n${TELEGRAM_HTML_FORMAT_INSTRUCTION}\n\nYou MUST format your final response using only the Telegram HTML tags listed above. Never use Markdown (replace will allowed HTML tags).`;
} catch (err) {
console.error(`Failed to load skill prompt for ${name}:`, err);
return null;
diff --git a/src/services/tool-host.ts b/src/services/tool-host.ts
index edd35fa..390e835 100644
--- a/src/services/tool-host.ts
+++ b/src/services/tool-host.ts
@@ -130,7 +130,7 @@ export class ToolHost extends BaseProcess {
if (typeof query !== "string") throw new Error("http_search requires query (string)");
// Build Search URL (using the HTML endpoint which is more relaxed)
- const searchUrl = "https://search.yahoo.com/search?p=" + query;
+ const searchUrl = "https://html.duckduckgo.com/html?q=" + encodeURIComponent(query);
const startTime = Date.now();
diff --git a/src/tests/cron-ai.test.ts b/src/tests/cron-ai.test.ts
index 44f8d36..372f998 100644
--- a/src/tests/cron-ai.test.ts
+++ b/src/tests/cron-ai.test.ts
@@ -130,7 +130,7 @@ describe("Cron-to-AI Flow (E2E Mocked)", () => {
chatId,
"Cron Job Execution Succeeded",
false,
- "Markdown"
+ "HTML"
);
});
});