diff --git a/_board/_BOARD.md b/_board/_BOARD.md index a173247..8bf4a37 100644 --- a/_board/_BOARD.md +++ b/_board/_BOARD.md @@ -2,15 +2,6 @@ ## To Do -### M1-01 Enhance OllamaAdapter with Warmup Support - - tags: [todo] - - defaultExpanded: false - ```md - Add a warmup method to OllamaAdapter that uses the /api/chat endpoint with a minimal prompt and supports the keep_alive parameter. - - Source: M1-01_OLLAMA_WARMUP.md - ``` - ### M1-02 Add Model Manager Configuration - tags: [todo] - defaultExpanded: false @@ -76,6 +67,15 @@ ## In Progress +### M1-01 Enhance OllamaAdapter with Warmup Support + - tags: [in-progress] + - defaultExpanded: true + ```md + Add a warmup method to OllamaAdapter that uses the /api/chat endpoint with a minimal prompt and supports the keep_alive parameter. + + Source: M1-01_OLLAMA_WARMUP.md + ``` + ## Done ### S5-05 End-to-End Integration Test diff --git a/src/services/ollama-adapter.ts b/src/services/ollama-adapter.ts index 37730d3..de76ebc 100644 --- a/src/services/ollama-adapter.ts +++ b/src/services/ollama-adapter.ts @@ -7,6 +7,12 @@ import { getConfig } from "../shared/config.js"; export interface GenerateOptions { timeoutMs?: number; + keep_alive?: string | number; +} + +export interface ChatOptions { + timeoutMs?: number; + keep_alive?: string | number; } export interface GenerateResult { @@ -68,7 +74,8 @@ export class OllamaAdapter { ): Promise { const timeoutMs = opts.timeoutMs ?? this.timeoutMs; const url = `${this.baseUrl}/api/generate`; - const body = { model, prompt, stream: false }; + const body: Record = { model, prompt, stream: false }; + if (opts.keep_alive !== undefined) body.keep_alive = opts.keep_alive; const res = await this.fetchWithRetry(url, body, timeoutMs); const data = (await res.json()) as { response?: string; @@ -91,11 +98,12 @@ export class OllamaAdapter { async chat( messages: ChatMessage[], model: string, - opts: { timeoutMs?: number } = {}, + opts: ChatOptions = {}, ): Promise { const timeoutMs = opts.timeoutMs ?? this.timeoutMs; const url = `${this.baseUrl}/api/chat`; - const body = { model, messages, stream: false }; + const body: Record = { model, messages, stream: false }; + if (opts.keep_alive !== undefined) body.keep_alive = opts.keep_alive; const res = await this.fetchWithRetry(url, body, timeoutMs); const data = (await res.json()) as { message?: { role: string; content: string }; @@ -130,17 +138,40 @@ export class OllamaAdapter { return result; } + /** + * Warm up a model by sending a minimal prompt, ensuring it is loaded into memory. + * The keep_alive parameter controls how long the model stays in memory after the call. + */ + async warmup(model: string, keepAlive: string | number): Promise { + const url = `${this.baseUrl}/api/chat`; + const body = { + model, + messages: [{ role: "user", content: "hello" }], + stream: false, + keep_alive: keepAlive, + }; + try { + await this.fetchWithRetry(url, body, this.timeoutMs); + } catch (err) { + throw new Error( + `OllamaAdapter.warmup failed for model "${model}": ${err instanceof Error ? err.message : String(err) + }`, + ); + } + } + /** * Stream chat response. Returns async iterator of chunks (NDJSON). */ async *streamChat( messages: ChatMessage[], model: string, - opts: { timeoutMs?: number } = {}, + opts: ChatOptions = {}, ): AsyncGenerator { const timeoutMs = opts.timeoutMs ?? this.timeoutMs; const url = `${this.baseUrl}/api/chat`; - const body = { model, messages, stream: true }; + const body: Record = { model, messages, stream: true }; + if (opts.keep_alive !== undefined) body.keep_alive = opts.keep_alive; const res = await this.fetchWithRetry(url, body, timeoutMs); if (!res.body) return; const reader = res.body.getReader();