M1-01: Enhance OllamaAdapter with warmup support and keep_alive parameter

- Add keep_alive?: string | number to GenerateOptions interface - Add new ChatOptions interface with timeoutMs and keep_alive fields - Update generate() to conditionally include keep_alive in request body - Update chat() to use ChatOptions (replaces anonymous type) - Update streamChat() to use ChatOptions and conditionally include keep_alive - Add warmup(model, keepAlive) method using /api/chat with a minimal prompt and stream:false; wraps network errors with descriptive message - Move task M1-01 to Done on _board/_BOARD.md
2026-05-13 21:42:08 +00:00 · 2026-02-19 17:12:52 +01:00
parent 95004e534f
commit a334824cf0
2 changed files with 45 additions and 14 deletions
--- a/_board/_BOARD.md
+++ b/_board/_BOARD.md
@@ -2,15 +2,6 @@

 ## To Do

-### M1-01 Enhance OllamaAdapter with Warmup Support
-  - tags: [todo]
-  - defaultExpanded: false
-    ```md
-    Add a warmup method to OllamaAdapter that uses the /api/chat endpoint with a minimal prompt and supports the keep_alive parameter.
-    
-    Source: M1-01_OLLAMA_WARMUP.md
-    ```
-
 ### M1-02 Add Model Manager Configuration
  - tags: [todo]
  - defaultExpanded: false
@@ -76,6 +67,15 @@

 ## In Progress

+### M1-01 Enhance OllamaAdapter with Warmup Support
+  - tags: [in-progress]
+  - defaultExpanded: true
+    ```md
+    Add a warmup method to OllamaAdapter that uses the /api/chat endpoint with a minimal prompt and supports the keep_alive parameter.
+    
+    Source: M1-01_OLLAMA_WARMUP.md
+    ```
+
 ## Done

 ### S5-05 End-to-End Integration Test
--- a/src/services/ollama-adapter.ts
+++ b/src/services/ollama-adapter.ts
@@ -7,6 +7,12 @@ import { getConfig } from "../shared/config.js";

 export interface GenerateOptions {
  timeoutMs?: number;
+  keep_alive?: string | number;
+}
+
+export interface ChatOptions {
+  timeoutMs?: number;
+  keep_alive?: string | number;
 }

 export interface GenerateResult {
@@ -68,7 +74,8 @@ export class OllamaAdapter {
  ): Promise<GenerateResult> {
    const timeoutMs = opts.timeoutMs ?? this.timeoutMs;
    const url = `${this.baseUrl}/api/generate`;
-    const body = { model, prompt, stream: false };
+    const body: Record<string, unknown> = { model, prompt, stream: false };
+    if (opts.keep_alive !== undefined) body.keep_alive = opts.keep_alive;
    const res = await this.fetchWithRetry(url, body, timeoutMs);
    const data = (await res.json()) as {
      response?: string;
@@ -91,11 +98,12 @@ export class OllamaAdapter {
  async chat(
    messages: ChatMessage[],
    model: string,
-    opts: { timeoutMs?: number } = {},
+    opts: ChatOptions = {},
  ): Promise<ChatResult> {
    const timeoutMs = opts.timeoutMs ?? this.timeoutMs;
    const url = `${this.baseUrl}/api/chat`;
-    const body = { model, messages, stream: false };
+    const body: Record<string, unknown> = { model, messages, stream: false };
+    if (opts.keep_alive !== undefined) body.keep_alive = opts.keep_alive;
    const res = await this.fetchWithRetry(url, body, timeoutMs);
    const data = (await res.json()) as {
      message?: { role: string; content: string };
@@ -130,17 +138,40 @@ export class OllamaAdapter {
    return result;
  }

+  /**
+   * Warm up a model by sending a minimal prompt, ensuring it is loaded into memory.
+   * The keep_alive parameter controls how long the model stays in memory after the call.
+   */
+  async warmup(model: string, keepAlive: string | number): Promise<void> {
+    const url = `${this.baseUrl}/api/chat`;
+    const body = {
+      model,
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+      keep_alive: keepAlive,
+    };
+    try {
+      await this.fetchWithRetry(url, body, this.timeoutMs);
+    } catch (err) {
+      throw new Error(
+        `OllamaAdapter.warmup failed for model "${model}": ${err instanceof Error ? err.message : String(err)
+        }`,
+      );
+    }
+  }
+
  /**
   * Stream chat response. Returns async iterator of chunks (NDJSON).
   */
  async *streamChat(
    messages: ChatMessage[],
    model: string,
-    opts: { timeoutMs?: number } = {},
+    opts: ChatOptions = {},
  ): AsyncGenerator<StreamChunk> {
    const timeoutMs = opts.timeoutMs ?? this.timeoutMs;
    const url = `${this.baseUrl}/api/chat`;
-    const body = { model, messages, stream: true };
+    const body: Record<string, unknown> = { model, messages, stream: true };
+    if (opts.keep_alive !== undefined) body.keep_alive = opts.keep_alive;
    const res = await this.fetchWithRetry(url, body, timeoutMs);
    if (!res.body) return;
    const reader = res.body.getReader();