mirror of
https://github.com/larchanka/manbot.git
synced 2026-05-13 21:42:08 +00:00
M1-01: Enhance OllamaAdapter with warmup support and keep_alive parameter
- Add keep_alive?: string | number to GenerateOptions interface - Add new ChatOptions interface with timeoutMs and keep_alive fields - Update generate() to conditionally include keep_alive in request body - Update chat() to use ChatOptions (replaces anonymous type) - Update streamChat() to use ChatOptions and conditionally include keep_alive - Add warmup(model, keepAlive) method using /api/chat with a minimal prompt and stream:false; wraps network errors with descriptive message - Move task M1-01 to Done on _board/_BOARD.md
This commit is contained in:
committed by
Mikhail Larchanka
parent
95004e534f
commit
a334824cf0
@@ -2,15 +2,6 @@
|
||||
|
||||
## To Do
|
||||
|
||||
### M1-01 Enhance OllamaAdapter with Warmup Support
|
||||
- tags: [todo]
|
||||
- defaultExpanded: false
|
||||
```md
|
||||
Add a warmup method to OllamaAdapter that uses the /api/chat endpoint with a minimal prompt and supports the keep_alive parameter.
|
||||
|
||||
Source: M1-01_OLLAMA_WARMUP.md
|
||||
```
|
||||
|
||||
### M1-02 Add Model Manager Configuration
|
||||
- tags: [todo]
|
||||
- defaultExpanded: false
|
||||
@@ -76,6 +67,15 @@
|
||||
|
||||
## In Progress
|
||||
|
||||
### M1-01 Enhance OllamaAdapter with Warmup Support
|
||||
- tags: [in-progress]
|
||||
- defaultExpanded: true
|
||||
```md
|
||||
Add a warmup method to OllamaAdapter that uses the /api/chat endpoint with a minimal prompt and supports the keep_alive parameter.
|
||||
|
||||
Source: M1-01_OLLAMA_WARMUP.md
|
||||
```
|
||||
|
||||
## Done
|
||||
|
||||
### S5-05 End-to-End Integration Test
|
||||
|
||||
@@ -7,6 +7,12 @@ import { getConfig } from "../shared/config.js";
|
||||
|
||||
export interface GenerateOptions {
|
||||
timeoutMs?: number;
|
||||
keep_alive?: string | number;
|
||||
}
|
||||
|
||||
export interface ChatOptions {
|
||||
timeoutMs?: number;
|
||||
keep_alive?: string | number;
|
||||
}
|
||||
|
||||
export interface GenerateResult {
|
||||
@@ -68,7 +74,8 @@ export class OllamaAdapter {
|
||||
): Promise<GenerateResult> {
|
||||
const timeoutMs = opts.timeoutMs ?? this.timeoutMs;
|
||||
const url = `${this.baseUrl}/api/generate`;
|
||||
const body = { model, prompt, stream: false };
|
||||
const body: Record<string, unknown> = { model, prompt, stream: false };
|
||||
if (opts.keep_alive !== undefined) body.keep_alive = opts.keep_alive;
|
||||
const res = await this.fetchWithRetry(url, body, timeoutMs);
|
||||
const data = (await res.json()) as {
|
||||
response?: string;
|
||||
@@ -91,11 +98,12 @@ export class OllamaAdapter {
|
||||
async chat(
|
||||
messages: ChatMessage[],
|
||||
model: string,
|
||||
opts: { timeoutMs?: number } = {},
|
||||
opts: ChatOptions = {},
|
||||
): Promise<ChatResult> {
|
||||
const timeoutMs = opts.timeoutMs ?? this.timeoutMs;
|
||||
const url = `${this.baseUrl}/api/chat`;
|
||||
const body = { model, messages, stream: false };
|
||||
const body: Record<string, unknown> = { model, messages, stream: false };
|
||||
if (opts.keep_alive !== undefined) body.keep_alive = opts.keep_alive;
|
||||
const res = await this.fetchWithRetry(url, body, timeoutMs);
|
||||
const data = (await res.json()) as {
|
||||
message?: { role: string; content: string };
|
||||
@@ -130,17 +138,40 @@ export class OllamaAdapter {
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Warm up a model by sending a minimal prompt, ensuring it is loaded into memory.
|
||||
* The keep_alive parameter controls how long the model stays in memory after the call.
|
||||
*/
|
||||
async warmup(model: string, keepAlive: string | number): Promise<void> {
|
||||
const url = `${this.baseUrl}/api/chat`;
|
||||
const body = {
|
||||
model,
|
||||
messages: [{ role: "user", content: "hello" }],
|
||||
stream: false,
|
||||
keep_alive: keepAlive,
|
||||
};
|
||||
try {
|
||||
await this.fetchWithRetry(url, body, this.timeoutMs);
|
||||
} catch (err) {
|
||||
throw new Error(
|
||||
`OllamaAdapter.warmup failed for model "${model}": ${err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream chat response. Returns async iterator of chunks (NDJSON).
|
||||
*/
|
||||
async *streamChat(
|
||||
messages: ChatMessage[],
|
||||
model: string,
|
||||
opts: { timeoutMs?: number } = {},
|
||||
opts: ChatOptions = {},
|
||||
): AsyncGenerator<StreamChunk> {
|
||||
const timeoutMs = opts.timeoutMs ?? this.timeoutMs;
|
||||
const url = `${this.baseUrl}/api/chat`;
|
||||
const body = { model, messages, stream: true };
|
||||
const body: Record<string, unknown> = { model, messages, stream: true };
|
||||
if (opts.keep_alive !== undefined) body.keep_alive = opts.keep_alive;
|
||||
const res = await this.fetchWithRetry(url, body, timeoutMs);
|
||||
if (!res.body) return;
|
||||
const reader = res.body.getReader();
|
||||
|
||||
Reference in New Issue
Block a user