M1-01: Enhance OllamaAdapter with warmup support and keep_alive parameter

- Add keep_alive?: string | number to GenerateOptions interface
- Add new ChatOptions interface with timeoutMs and keep_alive fields
- Update generate() to conditionally include keep_alive in request body
- Update chat() to use ChatOptions (replaces anonymous type)
- Update streamChat() to use ChatOptions and conditionally include keep_alive
- Add warmup(model, keepAlive) method using /api/chat with a minimal prompt
  and stream:false; wraps network errors with descriptive message
- Move task M1-01 to Done on _board/_BOARD.md
This commit is contained in:
larchanka
2026-02-19 17:12:52 +01:00
committed by Mikhail Larchanka
parent 95004e534f
commit a334824cf0
2 changed files with 45 additions and 14 deletions

View File

@@ -2,15 +2,6 @@
## To Do
### M1-01 Enhance OllamaAdapter with Warmup Support
- tags: [todo]
- defaultExpanded: false
```md
Add a warmup method to OllamaAdapter that uses the /api/chat endpoint with a minimal prompt and supports the keep_alive parameter.
Source: M1-01_OLLAMA_WARMUP.md
```
### M1-02 Add Model Manager Configuration
- tags: [todo]
- defaultExpanded: false
@@ -76,6 +67,15 @@
## In Progress
### M1-01 Enhance OllamaAdapter with Warmup Support
- tags: [in-progress]
- defaultExpanded: true
```md
Add a warmup method to OllamaAdapter that uses the /api/chat endpoint with a minimal prompt and supports the keep_alive parameter.
Source: M1-01_OLLAMA_WARMUP.md
```
## Done
### S5-05 End-to-End Integration Test

View File

@@ -7,6 +7,12 @@ import { getConfig } from "../shared/config.js";
export interface GenerateOptions {
timeoutMs?: number;
keep_alive?: string | number;
}
export interface ChatOptions {
timeoutMs?: number;
keep_alive?: string | number;
}
export interface GenerateResult {
@@ -68,7 +74,8 @@ export class OllamaAdapter {
): Promise<GenerateResult> {
const timeoutMs = opts.timeoutMs ?? this.timeoutMs;
const url = `${this.baseUrl}/api/generate`;
const body = { model, prompt, stream: false };
const body: Record<string, unknown> = { model, prompt, stream: false };
if (opts.keep_alive !== undefined) body.keep_alive = opts.keep_alive;
const res = await this.fetchWithRetry(url, body, timeoutMs);
const data = (await res.json()) as {
response?: string;
@@ -91,11 +98,12 @@ export class OllamaAdapter {
async chat(
messages: ChatMessage[],
model: string,
opts: { timeoutMs?: number } = {},
opts: ChatOptions = {},
): Promise<ChatResult> {
const timeoutMs = opts.timeoutMs ?? this.timeoutMs;
const url = `${this.baseUrl}/api/chat`;
const body = { model, messages, stream: false };
const body: Record<string, unknown> = { model, messages, stream: false };
if (opts.keep_alive !== undefined) body.keep_alive = opts.keep_alive;
const res = await this.fetchWithRetry(url, body, timeoutMs);
const data = (await res.json()) as {
message?: { role: string; content: string };
@@ -130,17 +138,40 @@ export class OllamaAdapter {
return result;
}
/**
* Warm up a model by sending a minimal prompt, ensuring it is loaded into memory.
* The keep_alive parameter controls how long the model stays in memory after the call.
*/
async warmup(model: string, keepAlive: string | number): Promise<void> {
const url = `${this.baseUrl}/api/chat`;
const body = {
model,
messages: [{ role: "user", content: "hello" }],
stream: false,
keep_alive: keepAlive,
};
try {
await this.fetchWithRetry(url, body, this.timeoutMs);
} catch (err) {
throw new Error(
`OllamaAdapter.warmup failed for model "${model}": ${err instanceof Error ? err.message : String(err)
}`,
);
}
}
/**
* Stream chat response. Returns async iterator of chunks (NDJSON).
*/
async *streamChat(
messages: ChatMessage[],
model: string,
opts: { timeoutMs?: number } = {},
opts: ChatOptions = {},
): AsyncGenerator<StreamChunk> {
const timeoutMs = opts.timeoutMs ?? this.timeoutMs;
const url = `${this.baseUrl}/api/chat`;
const body = { model, messages, stream: true };
const body: Record<string, unknown> = { model, messages, stream: true };
if (opts.keep_alive !== undefined) body.keep_alive = opts.keep_alive;
const res = await this.fetchWithRetry(url, body, timeoutMs);
if (!res.body) return;
const reader = res.body.getReader();