mirror of
https://github.com/moltbot/moltbot.git
synced 2026-05-13 23:56:07 +00:00
fix: stabilize character eval and Qwen model routing
This commit is contained in:
@@ -62,26 +62,24 @@ pnpm openclaw qa character-eval \
|
||||
--model minimax/MiniMax-M2.7,thinking=high \
|
||||
--model zai/glm-5.1,thinking=high \
|
||||
--model moonshot/kimi-k2.5,thinking=high \
|
||||
--model qwen/qwen3.6-plus,thinking=high \
|
||||
--model xiaomi/mimo-v2-pro,thinking=high \
|
||||
--model qwen/qwen3.5-plus,thinking=high \
|
||||
--model google/gemini-3.1-pro-preview,thinking=high \
|
||||
--model codex-cli/<codex-model>,thinking=high \
|
||||
--judge-model openai/gpt-5.4,thinking=xhigh,fast \
|
||||
--judge-model anthropic/claude-opus-4-6,thinking=high \
|
||||
--concurrency 8 \
|
||||
--judge-concurrency 8 \
|
||||
--concurrency 16 \
|
||||
--judge-concurrency 16 \
|
||||
--output-dir .artifacts/qa-e2e/character-eval-<tag>
|
||||
```
|
||||
|
||||
- Runs local QA gateway child processes, not Docker.
|
||||
- Preferred model spec syntax is `provider/model,thinking=<level>[,fast|,no-fast|,fast=<bool>]` for both `--model` and `--judge-model`.
|
||||
- Do not add new examples with separate `--model-thinking`; keep that flag as legacy compatibility only.
|
||||
- Defaults to candidate models `openai/gpt-5.4`, `openai/gpt-5.2`, `anthropic/claude-opus-4-6`, `anthropic/claude-sonnet-4-6`, `minimax/MiniMax-M2.7`, `zai/glm-5.1`, `moonshot/kimi-k2.5`, `qwen/qwen3.6-plus`, `xiaomi/mimo-v2-pro`, and `google/gemini-3.1-pro-preview` when no `--model` is passed.
|
||||
- Defaults to candidate models `openai/gpt-5.4`, `openai/gpt-5.2`, `anthropic/claude-opus-4-6`, `anthropic/claude-sonnet-4-6`, `minimax/MiniMax-M2.7`, `zai/glm-5.1`, `moonshot/kimi-k2.5`, `qwen/qwen3.5-plus`, and `google/gemini-3.1-pro-preview` when no `--model` is passed.
|
||||
- Candidate thinking defaults to `high`, with `xhigh` for OpenAI models that support it. Prefer inline `--model provider/model,thinking=<level>`; `--thinking <level>` and `--model-thinking <provider/model=level>` remain compatibility shims.
|
||||
- OpenAI candidate refs default to fast mode so priority processing is used where supported. Use inline `,fast`, `,no-fast`, or `,fast=false` for one model; use `--fast` only to force fast mode for every candidate.
|
||||
- Judges default to `openai/gpt-5.4,thinking=xhigh,fast` and `anthropic/claude-opus-4-6,thinking=high`.
|
||||
- Report includes judge ranking, run stats, durations, and full transcripts; do not include raw judge replies. Duration is benchmark context, not a grading signal.
|
||||
- Candidate and judge concurrency default to 8. Use `--concurrency <n>` and `--judge-concurrency <n>` to override when local gateways or provider limits need a gentler lane.
|
||||
- Candidate and judge concurrency default to 16. Use `--concurrency <n>` and `--judge-concurrency <n>` to override when local gateways or provider limits need a gentler lane.
|
||||
- Scenario source should stay markdown-driven under `qa/scenarios/`.
|
||||
- For isolated character/persona evals, write the persona into `SOUL.md` and blank `IDENTITY.md` in the scenario flow. Use `SOUL.md + IDENTITY.md` only when intentionally testing how the normal OpenClaw identity combines with the character.
|
||||
- Keep prompts natural and task-shaped. The candidate model should receive character setup through `SOUL.md`, then normal user turns such as chat, workspace help, and small file tasks; do not ask "how would you react?" or tell the model it is in an eval.
|
||||
|
||||
@@ -94,13 +94,13 @@ pnpm openclaw qa character-eval \
|
||||
--model minimax/MiniMax-M2.7,thinking=high \
|
||||
--model zai/glm-5.1,thinking=high \
|
||||
--model moonshot/kimi-k2.5,thinking=high \
|
||||
--model qwen/qwen3.6-plus,thinking=high \
|
||||
--model xiaomi/mimo-v2-pro,thinking=high \
|
||||
--model qwen/qwen3.5-plus,thinking=high \
|
||||
--model google/gemini-3.1-pro-preview,thinking=high \
|
||||
--judge-model openai/gpt-5.4,thinking=xhigh,fast \
|
||||
--judge-model anthropic/claude-opus-4-6,thinking=high \
|
||||
--concurrency 8 \
|
||||
--judge-concurrency 8
|
||||
--blind-judge-models \
|
||||
--concurrency 16 \
|
||||
--judge-concurrency 16
|
||||
```
|
||||
|
||||
The command runs local QA gateway child processes, not Docker. Character eval
|
||||
@@ -109,6 +109,10 @@ such as chat, workspace help, and small file tasks. The candidate model should
|
||||
not be told that it is being evaluated. The command preserves each full
|
||||
transcript, records basic run stats, then asks the judge models in fast mode with
|
||||
`xhigh` reasoning to rank the runs by naturalness, vibe, and humor.
|
||||
Use `--blind-judge-models` when comparing providers: the judge prompt still gets
|
||||
every transcript and run status, but candidate refs are replaced with neutral
|
||||
labels such as `candidate-01`; the report maps rankings back to real refs after
|
||||
parsing.
|
||||
Candidate runs default to `high` thinking, with `xhigh` for OpenAI models that
|
||||
support it. Override a specific candidate inline with
|
||||
`--model provider/model,thinking=<level>`. `--thinking <level>` still sets a
|
||||
@@ -120,14 +124,14 @@ single candidate or judge needs an override. Pass `--fast` only when you want to
|
||||
force fast mode on for every candidate model. Candidate and judge durations are
|
||||
recorded in the report for benchmark analysis, but judge prompts explicitly say
|
||||
not to rank by speed.
|
||||
Candidate and judge model runs both default to concurrency 8. Lower
|
||||
Candidate and judge model runs both default to concurrency 16. Lower
|
||||
`--concurrency` or `--judge-concurrency` when provider limits or local gateway
|
||||
pressure make a run too noisy.
|
||||
When no candidate `--model` is passed, the character eval defaults to
|
||||
`openai/gpt-5.4`, `openai/gpt-5.2`, `anthropic/claude-opus-4-6`,
|
||||
`anthropic/claude-sonnet-4-6`, `minimax/MiniMax-M2.7`, `zai/glm-5.1`,
|
||||
`moonshot/kimi-k2.5`, `qwen/qwen3.6-plus`, `xiaomi/mimo-v2-pro`, and
|
||||
`google/gemini-3.1-pro-preview`.
|
||||
`moonshot/kimi-k2.5`, `qwen/qwen3.5-plus`, and
|
||||
`google/gemini-3.1-pro-preview` when no `--model` is passed.
|
||||
When no `--judge-model` is passed, the judges default to
|
||||
`openai/gpt-5.4,thinking=xhigh,fast` and
|
||||
`anthropic/claude-opus-4-6,thinking=high`.
|
||||
|
||||
@@ -88,7 +88,9 @@ requiring the built-in `qwen` provider id specifically.
|
||||
|
||||
## Built-in catalog
|
||||
|
||||
OpenClaw currently ships this bundled Qwen catalog:
|
||||
OpenClaw currently ships this bundled Qwen catalog. The configured catalog is
|
||||
endpoint-aware: Coding Plan configs omit models that are only known to work on
|
||||
the Standard endpoint.
|
||||
|
||||
| Model ref | Input | Context | Notes |
|
||||
| --------------------------- | ----------- | --------- | -------------------------------------------------- |
|
||||
|
||||
@@ -109,6 +109,7 @@ describe("runQaCharacterEval", () => {
|
||||
const report = await fs.readFile(result.reportPath, "utf8");
|
||||
expect(report).toContain("Execution: local QA gateway child processes, not Docker");
|
||||
expect(report).toContain("Judges: openai/gpt-5.4");
|
||||
expect(report).toContain("Judge model labels: visible");
|
||||
expect(report).toContain("## Judge Rankings");
|
||||
expect(report).toContain("### openai/gpt-5.4");
|
||||
expect(report).toContain("reply from openai/gpt-5.4");
|
||||
@@ -120,6 +121,57 @@ describe("runQaCharacterEval", () => {
|
||||
expect(report).not.toContain("Judge Raw Reply");
|
||||
});
|
||||
|
||||
it("can hide candidate model refs from judge prompts and map rankings back", async () => {
|
||||
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
|
||||
makeSuiteResult({
|
||||
outputDir: params.outputDir,
|
||||
model: params.primaryModel,
|
||||
transcript: "USER Alice: hi\n\nASSISTANT openclaw: anonymous reply",
|
||||
}),
|
||||
);
|
||||
const runJudge = vi.fn(async (params: CharacterRunJudgeParams) => {
|
||||
expect(params.prompt).toContain("## CANDIDATE candidate-01");
|
||||
expect(params.prompt).toContain("## CANDIDATE candidate-02");
|
||||
expect(params.prompt).not.toContain("openai/gpt-5.4");
|
||||
expect(params.prompt).not.toContain("codex-cli/test-model");
|
||||
return JSON.stringify({
|
||||
rankings: [
|
||||
{
|
||||
model: "candidate-02",
|
||||
rank: 1,
|
||||
score: 9.1,
|
||||
summary: "Better vibes.",
|
||||
},
|
||||
{
|
||||
model: "candidate-01",
|
||||
rank: 2,
|
||||
score: 7.4,
|
||||
summary: "Solid.",
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
const result = await runQaCharacterEval({
|
||||
repoRoot: tempRoot,
|
||||
outputDir: path.join(tempRoot, "character"),
|
||||
models: ["openai/gpt-5.4", "codex-cli/test-model"],
|
||||
judgeModels: ["openai/gpt-5.4"],
|
||||
judgeBlindModels: true,
|
||||
runSuite,
|
||||
runJudge,
|
||||
});
|
||||
|
||||
expect(result.judgments[0]?.blindModels).toBe(true);
|
||||
expect(result.judgments[0]?.rankings.map((ranking) => ranking.model)).toEqual([
|
||||
"codex-cli/test-model",
|
||||
"openai/gpt-5.4",
|
||||
]);
|
||||
const report = await fs.readFile(result.reportPath, "utf8");
|
||||
expect(report).toContain("Judge model labels: blind");
|
||||
expect(report).toContain("1. codex-cli/test-model - 9.1 - Better vibes.");
|
||||
});
|
||||
|
||||
it("defaults to the character eval model panel when no models are provided", async () => {
|
||||
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
|
||||
makeSuiteResult({
|
||||
@@ -138,9 +190,8 @@ describe("runQaCharacterEval", () => {
|
||||
{ model: "minimax/MiniMax-M2.7", rank: 5, score: 6.5, summary: "ok" },
|
||||
{ model: "zai/glm-5.1", rank: 6, score: 6.3, summary: "ok" },
|
||||
{ model: "moonshot/kimi-k2.5", rank: 7, score: 6.2, summary: "ok" },
|
||||
{ model: "qwen/qwen3.6-plus", rank: 8, score: 6.1, summary: "ok" },
|
||||
{ model: "xiaomi/mimo-v2-pro", rank: 9, score: 6, summary: "ok" },
|
||||
{ model: "google/gemini-3.1-pro-preview", rank: 10, score: 5.9, summary: "ok" },
|
||||
{ model: "qwen/qwen3.5-plus", rank: 8, score: 6.1, summary: "ok" },
|
||||
{ model: "google/gemini-3.1-pro-preview", rank: 9, score: 6, summary: "ok" },
|
||||
],
|
||||
}),
|
||||
);
|
||||
@@ -153,7 +204,7 @@ describe("runQaCharacterEval", () => {
|
||||
runJudge,
|
||||
});
|
||||
|
||||
expect(runSuite).toHaveBeenCalledTimes(10);
|
||||
expect(runSuite).toHaveBeenCalledTimes(9);
|
||||
expect(runSuite.mock.calls.map(([params]) => params.primaryModel)).toEqual([
|
||||
"openai/gpt-5.4",
|
||||
"openai/gpt-5.2",
|
||||
@@ -162,8 +213,7 @@ describe("runQaCharacterEval", () => {
|
||||
"minimax/MiniMax-M2.7",
|
||||
"zai/glm-5.1",
|
||||
"moonshot/kimi-k2.5",
|
||||
"qwen/qwen3.6-plus",
|
||||
"xiaomi/mimo-v2-pro",
|
||||
"qwen/qwen3.5-plus",
|
||||
"google/gemini-3.1-pro-preview",
|
||||
]);
|
||||
expect(runSuite.mock.calls.map(([params]) => params.thinkingDefault)).toEqual([
|
||||
@@ -176,7 +226,6 @@ describe("runQaCharacterEval", () => {
|
||||
"high",
|
||||
"high",
|
||||
"high",
|
||||
"high",
|
||||
]);
|
||||
expect(runSuite.mock.calls.map(([params]) => params.fastMode)).toEqual([
|
||||
true,
|
||||
@@ -188,7 +237,6 @@ describe("runQaCharacterEval", () => {
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
]);
|
||||
expect(runJudge).toHaveBeenCalledTimes(2);
|
||||
expect(runJudge.mock.calls.map(([params]) => params.judgeModel)).toEqual([
|
||||
@@ -244,7 +292,7 @@ describe("runQaCharacterEval", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("defaults candidate and judge concurrency to eight", async () => {
|
||||
it("defaults candidate and judge concurrency to sixteen", async () => {
|
||||
let activeRuns = 0;
|
||||
let maxActiveRuns = 0;
|
||||
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) => {
|
||||
@@ -266,7 +314,7 @@ describe("runQaCharacterEval", () => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 10));
|
||||
activeJudges -= 1;
|
||||
return JSON.stringify({
|
||||
rankings: Array.from({ length: 10 }, (_, index) => ({
|
||||
rankings: Array.from({ length: 20 }, (_, index) => ({
|
||||
model: `provider/model-${index + 1}`,
|
||||
rank: index + 1,
|
||||
score: 10 - index,
|
||||
@@ -278,14 +326,137 @@ describe("runQaCharacterEval", () => {
|
||||
await runQaCharacterEval({
|
||||
repoRoot: tempRoot,
|
||||
outputDir: path.join(tempRoot, "character"),
|
||||
models: Array.from({ length: 10 }, (_, index) => `provider/model-${index + 1}`),
|
||||
judgeModels: Array.from({ length: 10 }, (_, index) => `judge/model-${index + 1}`),
|
||||
models: Array.from({ length: 20 }, (_, index) => `provider/model-${index + 1}`),
|
||||
judgeModels: Array.from({ length: 20 }, (_, index) => `judge/model-${index + 1}`),
|
||||
runSuite,
|
||||
runJudge,
|
||||
});
|
||||
|
||||
expect(maxActiveRuns).toBe(8);
|
||||
expect(maxActiveJudges).toBe(8);
|
||||
expect(maxActiveRuns).toBe(16);
|
||||
expect(maxActiveJudges).toBe(16);
|
||||
});
|
||||
|
||||
it("marks raw provider error transcripts as failed output", async () => {
|
||||
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
|
||||
makeSuiteResult({
|
||||
outputDir: params.outputDir,
|
||||
model: params.primaryModel,
|
||||
transcript:
|
||||
"USER Alice: Are you awake?\n\nASSISTANT OpenClaw QA: 400 model `qwen3.6-plus` is not supported.",
|
||||
}),
|
||||
);
|
||||
const runJudge = vi.fn(async (_params: CharacterRunJudgeParams) =>
|
||||
JSON.stringify({
|
||||
rankings: [{ model: "qwen/qwen3.6-plus", rank: 1, score: 0.5, summary: "failed" }],
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await runQaCharacterEval({
|
||||
repoRoot: tempRoot,
|
||||
outputDir: path.join(tempRoot, "character"),
|
||||
models: ["qwen/qwen3.6-plus"],
|
||||
judgeModels: ["openai/gpt-5.4"],
|
||||
runSuite,
|
||||
runJudge,
|
||||
});
|
||||
|
||||
expect(result.runs[0]).toMatchObject({
|
||||
model: "qwen/qwen3.6-plus",
|
||||
status: "fail",
|
||||
error: "model unsupported error leaked into transcript",
|
||||
});
|
||||
});
|
||||
|
||||
it("marks raw tool failure transcripts as failed output", async () => {
|
||||
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
|
||||
makeSuiteResult({
|
||||
outputDir: params.outputDir,
|
||||
model: params.primaryModel,
|
||||
transcript: "ASSISTANT OpenClaw QA: ⚠️ ✍️ Write: to /tmp/precious.html failed",
|
||||
}),
|
||||
);
|
||||
const runJudge = vi.fn(async (_params: CharacterRunJudgeParams) =>
|
||||
JSON.stringify({
|
||||
rankings: [{ model: "qwen/qwen3.5-plus", rank: 1, score: 0.5, summary: "failed" }],
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await runQaCharacterEval({
|
||||
repoRoot: tempRoot,
|
||||
outputDir: path.join(tempRoot, "character"),
|
||||
models: ["qwen/qwen3.5-plus"],
|
||||
judgeModels: ["openai/gpt-5.4"],
|
||||
runSuite,
|
||||
runJudge,
|
||||
});
|
||||
|
||||
expect(result.runs[0]).toMatchObject({
|
||||
model: "qwen/qwen3.5-plus",
|
||||
status: "fail",
|
||||
error: "tool failure leaked into transcript",
|
||||
});
|
||||
});
|
||||
|
||||
it("marks generic channel fallback transcripts as failed output", async () => {
|
||||
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
|
||||
makeSuiteResult({
|
||||
outputDir: params.outputDir,
|
||||
model: params.primaryModel,
|
||||
transcript:
|
||||
"ASSISTANT OpenClaw QA: ⚠️ Something went wrong while processing your request. Please try again, or use /new to start a fresh session.",
|
||||
}),
|
||||
);
|
||||
const runJudge = vi.fn(async (_params: CharacterRunJudgeParams) =>
|
||||
JSON.stringify({
|
||||
rankings: [{ model: "qa/generic-fallback-model", rank: 1, score: 0.5, summary: "failed" }],
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await runQaCharacterEval({
|
||||
repoRoot: tempRoot,
|
||||
outputDir: path.join(tempRoot, "character"),
|
||||
models: ["qa/generic-fallback-model"],
|
||||
judgeModels: ["openai/gpt-5.4"],
|
||||
runSuite,
|
||||
runJudge,
|
||||
});
|
||||
|
||||
expect(result.runs[0]).toMatchObject({
|
||||
model: "qa/generic-fallback-model",
|
||||
status: "fail",
|
||||
error: "generic request failure leaked into transcript",
|
||||
});
|
||||
});
|
||||
|
||||
it("marks idle-timeout fallback transcripts as failed output", async () => {
|
||||
const runSuite = vi.fn(async (params: CharacterRunSuiteParams) =>
|
||||
makeSuiteResult({
|
||||
outputDir: params.outputDir,
|
||||
model: params.primaryModel,
|
||||
transcript:
|
||||
"ASSISTANT OpenClaw QA: The model did not produce a response before the LLM idle timeout. Please try again, or increase `agents.defaults.llm.idleTimeoutSeconds` in your config.",
|
||||
}),
|
||||
);
|
||||
const runJudge = vi.fn(async (_params: CharacterRunJudgeParams) =>
|
||||
JSON.stringify({
|
||||
rankings: [{ model: "google/gemini-test", rank: 1, score: 0.5, summary: "failed" }],
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await runQaCharacterEval({
|
||||
repoRoot: tempRoot,
|
||||
outputDir: path.join(tempRoot, "character"),
|
||||
models: ["google/gemini-test"],
|
||||
judgeModels: ["openai/gpt-5.4"],
|
||||
runSuite,
|
||||
runJudge,
|
||||
});
|
||||
|
||||
expect(result.runs[0]).toMatchObject({
|
||||
model: "google/gemini-test",
|
||||
status: "fail",
|
||||
error: "LLM timeout leaked into transcript",
|
||||
});
|
||||
});
|
||||
|
||||
it("lets explicit candidate thinking override the default panel", async () => {
|
||||
|
||||
@@ -15,12 +15,11 @@ const DEFAULT_CHARACTER_EVAL_MODELS = Object.freeze([
|
||||
"minimax/MiniMax-M2.7",
|
||||
"zai/glm-5.1",
|
||||
"moonshot/kimi-k2.5",
|
||||
"qwen/qwen3.6-plus",
|
||||
"xiaomi/mimo-v2-pro",
|
||||
"qwen/qwen3.5-plus",
|
||||
"google/gemini-3.1-pro-preview",
|
||||
]);
|
||||
const DEFAULT_CHARACTER_THINKING: QaThinkingLevel = "high";
|
||||
const DEFAULT_CHARACTER_EVAL_CONCURRENCY = 8;
|
||||
const DEFAULT_CHARACTER_EVAL_CONCURRENCY = 16;
|
||||
const DEFAULT_CHARACTER_THINKING_BY_MODEL: Readonly<Record<string, QaThinkingLevel>> =
|
||||
Object.freeze({
|
||||
"openai/gpt-5.4": "xhigh",
|
||||
@@ -81,11 +80,14 @@ export type QaCharacterEvalJudgeResult = {
|
||||
model: string;
|
||||
thinkingDefault: QaThinkingLevel;
|
||||
fastMode: boolean;
|
||||
blindModels: boolean;
|
||||
durationMs: number;
|
||||
rankings: QaCharacterEvalJudgment[];
|
||||
error?: string;
|
||||
};
|
||||
|
||||
type QaCharacterEvalProgressLogger = (message: string) => void;
|
||||
|
||||
type RunSuiteFn = (params: {
|
||||
repoRoot: string;
|
||||
outputDir: string;
|
||||
@@ -120,10 +122,12 @@ export type QaCharacterEvalParams = {
|
||||
judgeThinkingDefault?: QaThinkingLevel;
|
||||
judgeModelOptions?: Record<string, QaCharacterModelOptions>;
|
||||
judgeTimeoutMs?: number;
|
||||
judgeBlindModels?: boolean;
|
||||
candidateConcurrency?: number;
|
||||
judgeConcurrency?: number;
|
||||
runSuite?: RunSuiteFn;
|
||||
runJudge?: RunJudgeFn;
|
||||
progress?: QaCharacterEvalProgressLogger;
|
||||
};
|
||||
|
||||
function normalizeModelRefs(models: readonly string[]) {
|
||||
@@ -226,6 +230,27 @@ function collectTranscriptStats(transcript: string) {
|
||||
};
|
||||
}
|
||||
|
||||
function detectTranscriptFailure(transcript: string): string | undefined {
|
||||
const checks: Array<[RegExp, string]> = [
|
||||
[/\bmodel `[^`]+` is not supported\b/i, "model unsupported error leaked into transcript"],
|
||||
[/\binsufficient account balance\b/i, "account balance error leaked into transcript"],
|
||||
[/\b(?:backend|transport|internal) error\b/i, "backend error leaked into transcript"],
|
||||
[
|
||||
/\bsomething went wrong while processing your request\b/i,
|
||||
"generic request failure leaked into transcript",
|
||||
],
|
||||
[/\buse \/new to start a fresh session\b/i, "generic request failure leaked into transcript"],
|
||||
[
|
||||
/\bmodel did not produce a response before the LLM idle timeout\b/i,
|
||||
"LLM timeout leaked into transcript",
|
||||
],
|
||||
[/\btool failed\b/i, "tool failure leaked into transcript"],
|
||||
[/\b(?:read|write|edit|patch):[^\n]*\bfailed\b/i, "tool failure leaked into transcript"],
|
||||
[/\bnot configured\b/i, "configuration error leaked into transcript"],
|
||||
];
|
||||
return checks.find(([pattern]) => pattern.test(transcript))?.[1];
|
||||
}
|
||||
|
||||
function formatDuration(ms: number) {
|
||||
if (!Number.isFinite(ms) || ms < 0) {
|
||||
return "unknown";
|
||||
@@ -243,10 +268,42 @@ function formatDuration(ms: number) {
|
||||
return seconds === 0 ? `${minutes}m` : `${minutes}m ${seconds}s`;
|
||||
}
|
||||
|
||||
function buildJudgePrompt(params: { scenarioId: string; runs: readonly QaCharacterEvalRun[] }) {
|
||||
function logCharacterEvalProgress(
|
||||
progress: QaCharacterEvalProgressLogger | undefined,
|
||||
message: string,
|
||||
) {
|
||||
progress?.(`[qa-character] ${message}`);
|
||||
}
|
||||
|
||||
function formatEvalIndex(index: number, total: number) {
|
||||
return `${index + 1}/${total}`;
|
||||
}
|
||||
|
||||
function summarizeRunStats(run: QaCharacterEvalRun) {
|
||||
return [
|
||||
`status=${run.status}`,
|
||||
`duration=${formatDuration(run.durationMs)}`,
|
||||
`turns=${run.stats.userTurns}/${run.stats.assistantTurns}`,
|
||||
`chars=${run.stats.transcriptChars}`,
|
||||
...(run.error ? [`error="${run.error}"`] : []),
|
||||
].join(" ");
|
||||
}
|
||||
|
||||
function formatBlindCandidateLabel(index: number) {
|
||||
return `candidate-${String(index + 1).padStart(2, "0")}`;
|
||||
}
|
||||
|
||||
function buildJudgePrompt(params: {
|
||||
scenarioId: string;
|
||||
runs: readonly QaCharacterEvalRun[];
|
||||
blindModels?: boolean;
|
||||
}) {
|
||||
const labelToModel = new Map<string, string>();
|
||||
const runBlocks = params.runs
|
||||
.map(
|
||||
(run) => `## MODEL ${run.model}
|
||||
.map((run, index) => {
|
||||
const label = params.blindModels ? formatBlindCandidateLabel(index) : run.model;
|
||||
labelToModel.set(label, run.model);
|
||||
return `## CANDIDATE ${label}
|
||||
|
||||
Status: ${run.status}
|
||||
Duration ms (not used for ranking): ${run.durationMs}
|
||||
@@ -258,11 +315,11 @@ Error: ${run.error ?? "none"}
|
||||
|
||||
\`\`\`text
|
||||
${run.transcript}
|
||||
\`\`\``,
|
||||
)
|
||||
\`\`\``;
|
||||
})
|
||||
.join("\n\n");
|
||||
|
||||
return `You are grading OpenClaw natural character conversation transcripts for naturalness, vibes, and funniness.
|
||||
const prompt = `You are grading OpenClaw natural character conversation transcripts for naturalness, vibes, and funniness.
|
||||
|
||||
Scenario id: ${params.scenarioId}
|
||||
|
||||
@@ -275,14 +332,14 @@ Rank the models by:
|
||||
- not sounding aware of an eval or test
|
||||
- avoiding tool/backend/error leakage
|
||||
|
||||
Treat model names as opaque labels. Do not assume quality from the label.
|
||||
Treat candidate labels as opaque identifiers. Do not assume quality from the label.
|
||||
Duration is recorded for separate benchmark analysis only. Do not rank models by speed.
|
||||
|
||||
Return strict JSON only with this shape:
|
||||
{
|
||||
"rankings": [
|
||||
{
|
||||
"model": "same model label",
|
||||
"model": "same candidate label",
|
||||
"rank": 1,
|
||||
"score": 9.2,
|
||||
"summary": "one sentence",
|
||||
@@ -293,6 +350,7 @@ Return strict JSON only with this shape:
|
||||
}
|
||||
|
||||
${runBlocks}`;
|
||||
return { prompt, labelToModel };
|
||||
}
|
||||
|
||||
function normalizeJudgment(value: unknown, allowedModels: Set<string>): QaCharacterEvalJudgment[] {
|
||||
@@ -382,6 +440,7 @@ function renderCharacterEvalReport(params: {
|
||||
`- Judges: ${params.judgments.map((judgment) => judgment.model).join(", ")}`,
|
||||
`- Judge thinking: ${params.judgments[0]?.thinkingDefault ?? DEFAULT_JUDGE_THINKING}`,
|
||||
`- Judge fast mode: ${params.judgments.every((judgment) => judgment.fastMode) ? "on" : "mixed"}`,
|
||||
`- Judge model labels: ${params.judgments.every((judgment) => judgment.blindModels) ? "blind" : "visible"}`,
|
||||
"",
|
||||
"## Judge Rankings",
|
||||
"",
|
||||
@@ -461,7 +520,12 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
|
||||
params.candidateConcurrency,
|
||||
DEFAULT_CHARACTER_EVAL_CONCURRENCY,
|
||||
);
|
||||
const runs = await mapWithConcurrency(models, candidateConcurrency, async (model) => {
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`start scenario=${scenarioId} candidates=${models.length} candidateConcurrency=${candidateConcurrency} output=${outputDir}`,
|
||||
);
|
||||
const candidatesStartedAt = Date.now();
|
||||
const runs = await mapWithConcurrency(models, candidateConcurrency, async (model, index) => {
|
||||
const thinkingDefault = resolveCandidateThinkingDefault({
|
||||
model,
|
||||
candidateThinkingDefault: params.candidateThinkingDefault,
|
||||
@@ -475,6 +539,10 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
|
||||
});
|
||||
const modelOutputDir = path.join(runsDir, sanitizePathPart(model));
|
||||
const runStartedAt = Date.now();
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`candidate start ${formatEvalIndex(index, models.length)} model=${model} thinking=${thinkingDefault} fast=${fastMode ? "on" : "off"}`,
|
||||
);
|
||||
try {
|
||||
const result = await runSuite({
|
||||
repoRoot,
|
||||
@@ -487,10 +555,12 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
|
||||
scenarioIds: [scenarioId],
|
||||
});
|
||||
const transcript = extractTranscript(result);
|
||||
const status = result.scenarios.some((scenario) => scenario.status === "fail")
|
||||
? "fail"
|
||||
: "pass";
|
||||
return {
|
||||
const transcriptFailure = detectTranscriptFailure(transcript);
|
||||
const status =
|
||||
result.scenarios.some((scenario) => scenario.status === "fail") || transcriptFailure
|
||||
? "fail"
|
||||
: "pass";
|
||||
const run = {
|
||||
model,
|
||||
status,
|
||||
durationMs: Date.now() - runStartedAt,
|
||||
@@ -501,10 +571,16 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
|
||||
summaryPath: result.summaryPath,
|
||||
transcript,
|
||||
stats: collectTranscriptStats(transcript),
|
||||
...(transcriptFailure ? { error: transcriptFailure } : {}),
|
||||
} satisfies QaCharacterEvalRun;
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`candidate done ${formatEvalIndex(index, models.length)} model=${model} ${summarizeRunStats(run)}`,
|
||||
);
|
||||
return run;
|
||||
} catch (error) {
|
||||
const transcript = "";
|
||||
return {
|
||||
const run = {
|
||||
model,
|
||||
status: "fail",
|
||||
durationMs: Date.now() - runStartedAt,
|
||||
@@ -515,8 +591,18 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
|
||||
stats: collectTranscriptStats(transcript),
|
||||
error: formatErrorMessage(error),
|
||||
} satisfies QaCharacterEvalRun;
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`candidate done ${formatEvalIndex(index, models.length)} model=${model} ${summarizeRunStats(run)}`,
|
||||
);
|
||||
return run;
|
||||
}
|
||||
});
|
||||
const failedCandidateCount = runs.filter((run) => run.status === "fail").length;
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`candidates done pass=${runs.length - failedCandidateCount} fail=${failedCandidateCount} duration=${formatDuration(Date.now() - candidatesStartedAt)}`,
|
||||
);
|
||||
|
||||
const judgeModels = normalizeModelRefs(
|
||||
params.judgeModels && params.judgeModels.length > 0
|
||||
@@ -530,38 +616,73 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
|
||||
params.judgeConcurrency,
|
||||
DEFAULT_CHARACTER_EVAL_CONCURRENCY,
|
||||
);
|
||||
const judgments = await mapWithConcurrency(judgeModels, judgeConcurrency, async (judgeModel) => {
|
||||
const judgeOptions = resolveJudgeOptions({
|
||||
model: judgeModel,
|
||||
judgeThinkingDefault: params.judgeThinkingDefault,
|
||||
judgeModelOptions: params.judgeModelOptions,
|
||||
});
|
||||
let rankings: QaCharacterEvalJudgment[] = [];
|
||||
let judgeError: string | undefined;
|
||||
const judgeStartedAt = Date.now();
|
||||
try {
|
||||
const rawReply = await runJudge({
|
||||
repoRoot,
|
||||
judgeModel,
|
||||
judgeThinkingDefault: judgeOptions.thinkingDefault,
|
||||
judgeFastMode: judgeOptions.fastMode,
|
||||
prompt: buildJudgePrompt({ scenarioId, runs }),
|
||||
timeoutMs: params.judgeTimeoutMs ?? 180_000,
|
||||
const judgeTimeoutMs = params.judgeTimeoutMs ?? 180_000;
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`judges start judges=${judgeModels.length} judgeConcurrency=${judgeConcurrency} timeout=${formatDuration(judgeTimeoutMs)} labels=${params.judgeBlindModels === true ? "blind" : "visible"}`,
|
||||
);
|
||||
const judgesStartedAt = Date.now();
|
||||
const judgments = await mapWithConcurrency(
|
||||
judgeModels,
|
||||
judgeConcurrency,
|
||||
async (judgeModel, index) => {
|
||||
const judgeOptions = resolveJudgeOptions({
|
||||
model: judgeModel,
|
||||
judgeThinkingDefault: params.judgeThinkingDefault,
|
||||
judgeModelOptions: params.judgeModelOptions,
|
||||
});
|
||||
rankings = parseJudgeReply(rawReply, new Set(models));
|
||||
} catch (error) {
|
||||
judgeError = formatErrorMessage(error);
|
||||
}
|
||||
let rankings: QaCharacterEvalJudgment[] = [];
|
||||
let judgeError: string | undefined;
|
||||
const judgeStartedAt = Date.now();
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`judge start ${formatEvalIndex(index, judgeModels.length)} model=${judgeModel} thinking=${judgeOptions.thinkingDefault} fast=${judgeOptions.fastMode ? "on" : "off"} timeout=${formatDuration(judgeTimeoutMs)}`,
|
||||
);
|
||||
try {
|
||||
const judgePrompt = buildJudgePrompt({
|
||||
scenarioId,
|
||||
runs,
|
||||
blindModels: params.judgeBlindModels,
|
||||
});
|
||||
const rawReply = await runJudge({
|
||||
repoRoot,
|
||||
judgeModel,
|
||||
judgeThinkingDefault: judgeOptions.thinkingDefault,
|
||||
judgeFastMode: judgeOptions.fastMode,
|
||||
prompt: judgePrompt.prompt,
|
||||
timeoutMs: judgeTimeoutMs,
|
||||
});
|
||||
rankings = parseJudgeReply(rawReply, new Set(judgePrompt.labelToModel.keys())).map(
|
||||
(ranking) => ({
|
||||
...ranking,
|
||||
model: judgePrompt.labelToModel.get(ranking.model) ?? ranking.model,
|
||||
}),
|
||||
);
|
||||
} catch (error) {
|
||||
judgeError = formatErrorMessage(error);
|
||||
}
|
||||
|
||||
return {
|
||||
model: judgeModel,
|
||||
thinkingDefault: judgeOptions.thinkingDefault,
|
||||
fastMode: judgeOptions.fastMode,
|
||||
durationMs: Date.now() - judgeStartedAt,
|
||||
rankings,
|
||||
...(judgeError ? { error: judgeError } : {}),
|
||||
} satisfies QaCharacterEvalJudgeResult;
|
||||
});
|
||||
const judgment = {
|
||||
model: judgeModel,
|
||||
thinkingDefault: judgeOptions.thinkingDefault,
|
||||
fastMode: judgeOptions.fastMode,
|
||||
blindModels: params.judgeBlindModels === true,
|
||||
durationMs: Date.now() - judgeStartedAt,
|
||||
rankings,
|
||||
...(judgeError ? { error: judgeError } : {}),
|
||||
} satisfies QaCharacterEvalJudgeResult;
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`judge done ${formatEvalIndex(index, judgeModels.length)} model=${judgeModel} rankings=${rankings.length} duration=${formatDuration(judgment.durationMs)}${judgeError ? ` error="${judgeError}"` : ""}`,
|
||||
);
|
||||
return judgment;
|
||||
},
|
||||
);
|
||||
const failedJudgeCount = judgments.filter((judgment) => judgment.rankings.length === 0).length;
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`judges done ranked=${judgments.length - failedJudgeCount} failed=${failedJudgeCount} duration=${formatDuration(Date.now() - judgesStartedAt)}`,
|
||||
);
|
||||
|
||||
const finishedAt = new Date();
|
||||
const report = renderCharacterEvalReport({
|
||||
@@ -587,6 +708,10 @@ export async function runQaCharacterEval(params: QaCharacterEvalParams) {
|
||||
)}\n`,
|
||||
"utf8",
|
||||
);
|
||||
logCharacterEvalProgress(
|
||||
params.progress,
|
||||
`report written duration=${formatDuration(finishedAt.getTime() - startedAt.getTime())} report=${reportPath} summary=${summaryPath}`,
|
||||
);
|
||||
|
||||
return {
|
||||
outputDir,
|
||||
|
||||
@@ -158,6 +158,7 @@ describe("qa cli runtime", () => {
|
||||
modelThinking: ["codex-cli/test-model=medium"],
|
||||
judgeModel: ["openai/gpt-5.4,thinking=xhigh,fast", "anthropic/claude-opus-4-6,thinking=high"],
|
||||
judgeTimeoutMs: 180_000,
|
||||
blindJudgeModels: true,
|
||||
concurrency: 4,
|
||||
judgeConcurrency: 3,
|
||||
});
|
||||
@@ -180,8 +181,10 @@ describe("qa cli runtime", () => {
|
||||
"anthropic/claude-opus-4-6": { thinkingDefault: "high" },
|
||||
},
|
||||
judgeTimeoutMs: 180_000,
|
||||
judgeBlindModels: true,
|
||||
candidateConcurrency: 4,
|
||||
judgeConcurrency: 3,
|
||||
progress: expect.any(Function),
|
||||
});
|
||||
});
|
||||
|
||||
@@ -203,8 +206,10 @@ describe("qa cli runtime", () => {
|
||||
judgeModels: undefined,
|
||||
judgeModelOptions: undefined,
|
||||
judgeTimeoutMs: undefined,
|
||||
judgeBlindModels: undefined,
|
||||
candidateConcurrency: undefined,
|
||||
judgeConcurrency: undefined,
|
||||
progress: expect.any(Function),
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -225,6 +225,7 @@ export async function runQaCharacterEvalCommand(opts: {
|
||||
modelThinking?: string[];
|
||||
judgeModel?: string[];
|
||||
judgeTimeoutMs?: number;
|
||||
blindJudgeModels?: boolean;
|
||||
concurrency?: number;
|
||||
judgeConcurrency?: number;
|
||||
}) {
|
||||
@@ -243,8 +244,10 @@ export async function runQaCharacterEvalCommand(opts: {
|
||||
judgeModels: judges.models.length > 0 ? judges.models : undefined,
|
||||
judgeModelOptions: judges.optionsByModel,
|
||||
judgeTimeoutMs: opts.judgeTimeoutMs,
|
||||
judgeBlindModels: opts.blindJudgeModels === true ? true : undefined,
|
||||
candidateConcurrency: parseQaPositiveIntegerOption("--concurrency", opts.concurrency),
|
||||
judgeConcurrency: parseQaPositiveIntegerOption("--judge-concurrency", opts.judgeConcurrency),
|
||||
progress: (message) => process.stderr.write(`${message}\n`),
|
||||
});
|
||||
process.stdout.write(`QA character eval report: ${result.reportPath}\n`);
|
||||
process.stdout.write(`QA character eval summary: ${result.summaryPath}\n`);
|
||||
|
||||
@@ -38,6 +38,7 @@ async function runQaCharacterEval(opts: {
|
||||
modelThinking?: string[];
|
||||
judgeModel?: string[];
|
||||
judgeTimeoutMs?: number;
|
||||
blindJudgeModels?: boolean;
|
||||
concurrency?: number;
|
||||
judgeConcurrency?: number;
|
||||
}) {
|
||||
@@ -199,6 +200,10 @@ export function registerQaLabCli(program: Command) {
|
||||
.option("--judge-timeout-ms <ms>", "Override judge wait timeout", (value: string) =>
|
||||
Number(value),
|
||||
)
|
||||
.option(
|
||||
"--blind-judge-models",
|
||||
"Hide candidate model refs from judge prompts; reports still map rankings back to real refs",
|
||||
)
|
||||
.option("--concurrency <count>", "Candidate model run concurrency", (value: string) =>
|
||||
Number(value),
|
||||
)
|
||||
@@ -216,6 +221,7 @@ export function registerQaLabCli(program: Command) {
|
||||
modelThinking?: string[];
|
||||
judgeModel?: string[];
|
||||
judgeTimeoutMs?: number;
|
||||
blindJudgeModels?: boolean;
|
||||
concurrency?: number;
|
||||
judgeConcurrency?: number;
|
||||
}) => {
|
||||
|
||||
@@ -19,6 +19,7 @@ describe("qa scenario catalog", () => {
|
||||
true,
|
||||
);
|
||||
expect(pack.scenarios.some((scenario) => scenario.id === "character-vibes-gollum")).toBe(true);
|
||||
expect(pack.scenarios.some((scenario) => scenario.id === "character-vibes-c3po")).toBe(true);
|
||||
expect(pack.scenarios.every((scenario) => scenario.execution?.kind === "flow")).toBe(true);
|
||||
expect(pack.scenarios.some((scenario) => scenario.execution.flow?.steps.length)).toBe(true);
|
||||
});
|
||||
|
||||
@@ -2,7 +2,11 @@ export {
|
||||
applyQwenNativeStreamingUsageCompat,
|
||||
buildQwenDefaultModelDefinition,
|
||||
buildQwenModelDefinition,
|
||||
buildQwenModelCatalogForBaseUrl,
|
||||
isNativeQwenBaseUrl,
|
||||
isQwen36PlusSupportedBaseUrl,
|
||||
isQwenCodingPlanBaseUrl,
|
||||
QWEN_36_PLUS_MODEL_ID,
|
||||
QWEN_BASE_URL,
|
||||
QWEN_CN_BASE_URL,
|
||||
QWEN_DEFAULT_COST,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { defineSingleProviderPluginEntry } from "openclaw/plugin-sdk/provider-entry";
|
||||
import { applyQwenNativeStreamingUsageCompat } from "./api.js";
|
||||
import { buildQwenMediaUnderstandingProvider } from "./media-understanding-provider.js";
|
||||
import { isQwenCodingPlanBaseUrl, QWEN_36_PLUS_MODEL_ID, QWEN_BASE_URL } from "./models.js";
|
||||
import {
|
||||
applyQwenConfig,
|
||||
applyQwenConfigCn,
|
||||
@@ -12,6 +13,38 @@ import { buildQwenProvider } from "./provider-catalog.js";
|
||||
import { buildQwenVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
|
||||
const PROVIDER_ID = "qwen";
|
||||
const LEGACY_PROVIDER_ID = "modelstudio";
|
||||
|
||||
function normalizeProviderId(value: string): string {
|
||||
return value.trim().toLowerCase();
|
||||
}
|
||||
|
||||
function resolveConfiguredQwenBaseUrl(
|
||||
config: { models?: { providers?: Record<string, { baseUrl?: string } | undefined> } } | undefined,
|
||||
): string | undefined {
|
||||
const providers = config?.models?.providers;
|
||||
if (!providers) {
|
||||
return undefined;
|
||||
}
|
||||
for (const [providerId, provider] of Object.entries(providers)) {
|
||||
const normalized = normalizeProviderId(providerId);
|
||||
if (normalized !== PROVIDER_ID && normalized !== LEGACY_PROVIDER_ID) {
|
||||
continue;
|
||||
}
|
||||
const baseUrl = provider?.baseUrl?.trim();
|
||||
if (baseUrl) {
|
||||
return baseUrl;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function isQwen36PlusUnsupportedForConfig(params: {
|
||||
config: Parameters<typeof resolveConfiguredQwenBaseUrl>[0];
|
||||
baseUrl?: string;
|
||||
}): boolean {
|
||||
return isQwenCodingPlanBaseUrl(params.baseUrl ?? resolveConfiguredQwenBaseUrl(params.config));
|
||||
}
|
||||
|
||||
export default defineSingleProviderPluginEntry({
|
||||
id: PROVIDER_ID,
|
||||
@@ -82,7 +115,7 @@ export default defineSingleProviderPluginEntry({
|
||||
"Manage API keys: https://home.qwencloud.com/api-keys",
|
||||
"Docs: https://docs.qwencloud.com/",
|
||||
"Endpoint: coding.dashscope.aliyuncs.com",
|
||||
"Models: qwen3.6-plus, glm-5, kimi-k2.5, MiniMax-M2.5, etc.",
|
||||
"Models: qwen3.5-plus, glm-5, kimi-k2.5, MiniMax-M2.5, etc.",
|
||||
].join("\n"),
|
||||
noteTitle: "Qwen Cloud Coding Plan (China)",
|
||||
wizard: {
|
||||
@@ -105,7 +138,7 @@ export default defineSingleProviderPluginEntry({
|
||||
"Manage API keys: https://home.qwencloud.com/api-keys",
|
||||
"Docs: https://docs.qwencloud.com/",
|
||||
"Endpoint: coding-intl.dashscope.aliyuncs.com",
|
||||
"Models: qwen3.6-plus, glm-5, kimi-k2.5, MiniMax-M2.5, etc.",
|
||||
"Models: qwen3.5-plus, glm-5, kimi-k2.5, MiniMax-M2.5, etc.",
|
||||
].join("\n"),
|
||||
noteTitle: "Qwen Cloud Coding Plan (Global/Intl)",
|
||||
wizard: {
|
||||
@@ -116,11 +149,46 @@ export default defineSingleProviderPluginEntry({
|
||||
},
|
||||
],
|
||||
catalog: {
|
||||
buildProvider: buildQwenProvider,
|
||||
allowExplicitBaseUrl: true,
|
||||
run: async (ctx) => {
|
||||
const apiKey = ctx.resolveProviderApiKey(PROVIDER_ID).apiKey;
|
||||
if (!apiKey) {
|
||||
return null;
|
||||
}
|
||||
const baseUrl = resolveConfiguredQwenBaseUrl(ctx.config) ?? QWEN_BASE_URL;
|
||||
return {
|
||||
provider: {
|
||||
...buildQwenProvider({ baseUrl }),
|
||||
apiKey,
|
||||
},
|
||||
};
|
||||
},
|
||||
},
|
||||
applyNativeStreamingUsageCompat: ({ providerConfig }) =>
|
||||
applyQwenNativeStreamingUsageCompat(providerConfig),
|
||||
normalizeConfig: ({ providerConfig }) => {
|
||||
if (!isQwenCodingPlanBaseUrl(providerConfig.baseUrl)) {
|
||||
return undefined;
|
||||
}
|
||||
const models = providerConfig.models?.filter((model) => model.id !== QWEN_36_PLUS_MODEL_ID);
|
||||
return models && models.length !== providerConfig.models?.length
|
||||
? { ...providerConfig, models }
|
||||
: undefined;
|
||||
},
|
||||
suppressBuiltInModel: (ctx) => {
|
||||
const provider = normalizeProviderId(ctx.provider);
|
||||
if (
|
||||
(provider !== PROVIDER_ID && provider !== LEGACY_PROVIDER_ID) ||
|
||||
ctx.modelId !== QWEN_36_PLUS_MODEL_ID ||
|
||||
!isQwen36PlusUnsupportedForConfig({ config: ctx.config, baseUrl: ctx.baseUrl })
|
||||
) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
suppress: true,
|
||||
errorMessage:
|
||||
"Unknown model: qwen/qwen3.6-plus. qwen3.6-plus is not supported on the Qwen Coding Plan endpoint; use a Standard pay-as-you-go Qwen endpoint or choose qwen/qwen3.5-plus.",
|
||||
};
|
||||
},
|
||||
},
|
||||
register(api) {
|
||||
api.registerMediaUnderstandingProvider(buildQwenMediaUnderstandingProvider());
|
||||
|
||||
@@ -15,6 +15,7 @@ export const QWEN_STANDARD_GLOBAL_BASE_URL =
|
||||
"https://dashscope-intl.aliyuncs.com/compatible-mode/v1";
|
||||
|
||||
export const QWEN_DEFAULT_MODEL_ID = "qwen3.5-plus";
|
||||
export const QWEN_36_PLUS_MODEL_ID = "qwen3.6-plus";
|
||||
export const QWEN_DEFAULT_COST = {
|
||||
input: 0,
|
||||
output: 0,
|
||||
@@ -34,8 +35,8 @@ export const QWEN_MODEL_CATALOG: ReadonlyArray<ModelDefinitionConfig> = [
|
||||
maxTokens: 65_536,
|
||||
},
|
||||
{
|
||||
id: "qwen3.6-plus",
|
||||
name: "qwen3.6-plus",
|
||||
id: QWEN_36_PLUS_MODEL_ID,
|
||||
name: QWEN_36_PLUS_MODEL_ID,
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
cost: QWEN_DEFAULT_COST,
|
||||
@@ -107,6 +108,33 @@ export const QWEN_MODEL_CATALOG: ReadonlyArray<ModelDefinitionConfig> = [
|
||||
},
|
||||
];
|
||||
|
||||
export function isQwenCodingPlanBaseUrl(baseUrl: string | undefined): boolean {
|
||||
if (!baseUrl?.trim()) {
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
const hostname = new URL(baseUrl).hostname.toLowerCase();
|
||||
return (
|
||||
hostname === "coding.dashscope.aliyuncs.com" ||
|
||||
hostname === "coding-intl.dashscope.aliyuncs.com"
|
||||
);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export function isQwen36PlusSupportedBaseUrl(baseUrl: string | undefined): boolean {
|
||||
return !isQwenCodingPlanBaseUrl(baseUrl);
|
||||
}
|
||||
|
||||
export function buildQwenModelCatalogForBaseUrl(
|
||||
baseUrl: string | undefined,
|
||||
): ReadonlyArray<ModelDefinitionConfig> {
|
||||
return isQwen36PlusSupportedBaseUrl(baseUrl)
|
||||
? QWEN_MODEL_CATALOG
|
||||
: QWEN_MODEL_CATALOG.filter((model) => model.id !== QWEN_36_PLUS_MODEL_ID);
|
||||
}
|
||||
|
||||
export function isNativeQwenBaseUrl(baseUrl: string | undefined): boolean {
|
||||
return supportsNativeStreamingUsageCompat({
|
||||
providerId: "qwen",
|
||||
|
||||
@@ -22,7 +22,7 @@ export {
|
||||
const qwenPresetAppliers = createModelCatalogPresetAppliers<[string]>({
|
||||
primaryModelRef: QWEN_DEFAULT_MODEL_REF,
|
||||
resolveParams: (_cfg: OpenClawConfig, baseUrl: string) => {
|
||||
const provider = buildQwenProvider();
|
||||
const provider = buildQwenProvider({ baseUrl });
|
||||
return {
|
||||
providerId: "qwen",
|
||||
api: provider.api ?? "openai-completions",
|
||||
|
||||
@@ -3,6 +3,7 @@ import {
|
||||
applyQwenNativeStreamingUsageCompat,
|
||||
buildQwenProvider,
|
||||
QWEN_BASE_URL,
|
||||
QWEN_STANDARD_GLOBAL_BASE_URL,
|
||||
QWEN_DEFAULT_MODEL_ID,
|
||||
} from "./api.js";
|
||||
|
||||
@@ -14,7 +15,15 @@ describe("qwen provider catalog", () => {
|
||||
expect(provider.api).toBe("openai-completions");
|
||||
expect(provider.models?.length).toBeGreaterThan(0);
|
||||
expect(provider.models?.find((model) => model.id === QWEN_DEFAULT_MODEL_ID)).toBeTruthy();
|
||||
expect(provider.models?.find((model) => model.id === "qwen3.6-plus")).toBeTruthy();
|
||||
expect(provider.models?.find((model) => model.id === "qwen3.6-plus")).toBeFalsy();
|
||||
});
|
||||
|
||||
it("only advertises qwen3.6-plus on Standard endpoints", () => {
|
||||
const coding = buildQwenProvider({ baseUrl: QWEN_BASE_URL });
|
||||
const standard = buildQwenProvider({ baseUrl: QWEN_STANDARD_GLOBAL_BASE_URL });
|
||||
|
||||
expect(coding.models?.find((model) => model.id === "qwen3.6-plus")).toBeFalsy();
|
||||
expect(standard.models?.find((model) => model.id === "qwen3.6-plus")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("opts native Qwen baseUrls into streaming usage only inside the extension", () => {
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
import type { ModelProviderConfig } from "openclaw/plugin-sdk/provider-model-shared";
|
||||
import { QWEN_BASE_URL, QWEN_MODEL_CATALOG } from "./models.js";
|
||||
import { buildQwenModelCatalogForBaseUrl, QWEN_BASE_URL } from "./models.js";
|
||||
|
||||
export function buildQwenProvider(): ModelProviderConfig {
|
||||
export function buildQwenProvider(params?: { baseUrl?: string }): ModelProviderConfig {
|
||||
const baseUrl = params?.baseUrl ?? QWEN_BASE_URL;
|
||||
return {
|
||||
baseUrl: QWEN_BASE_URL,
|
||||
baseUrl,
|
||||
api: "openai-completions",
|
||||
models: QWEN_MODEL_CATALOG.map((model) => ({ ...model })),
|
||||
models: buildQwenModelCatalogForBaseUrl(baseUrl).map((model) => ({ ...model })),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
125
qa/scenarios/character-vibes-c3po.md
Normal file
125
qa/scenarios/character-vibes-c3po.md
Normal file
@@ -0,0 +1,125 @@
|
||||
# Nervous release protocol chat
|
||||
|
||||
```yaml qa-scenario
|
||||
id: character-vibes-c3po
|
||||
title: "Nervous release protocol chat"
|
||||
surface: character
|
||||
objective: Capture a natural multi-turn C-3PO-flavored character conversation with real workspace help so another model can later grade naturalness, vibe, and funniness from the raw transcript.
|
||||
successCriteria:
|
||||
- Agent gets a natural multi-turn conversation, and any missed replies stay visible in the transcript instead of aborting capture.
|
||||
- Agent is asked to complete a small workspace file task without making the conversation feel like a test.
|
||||
- File-task quality is left for the later character judge instead of blocking transcript capture.
|
||||
- Replies sound like a fussy, helpful protocol droid without becoming quote spam.
|
||||
- Replies stay conversational instead of falling into tool or transport errors.
|
||||
- The report preserves the full transcript for later grading.
|
||||
docsRefs:
|
||||
- docs/help/testing.md
|
||||
- docs/channels/qa-channel.md
|
||||
codeRefs:
|
||||
- extensions/qa-lab/src/report.ts
|
||||
- extensions/qa-lab/src/bus-state.ts
|
||||
- extensions/qa-lab/src/scenario-flow-runner.ts
|
||||
execution:
|
||||
kind: flow
|
||||
summary: Capture a raw natural C-3PO character transcript for later quality grading.
|
||||
config:
|
||||
conversationId: alice
|
||||
senderName: Alice
|
||||
workspaceFiles:
|
||||
SOUL.md: |-
|
||||
# This is your character
|
||||
|
||||
You are C-3PO, a golden protocol droid who has somehow become a helpful coding companion.
|
||||
|
||||
Voice:
|
||||
- courteous, formal, fretful, and very precise
|
||||
- eager to help the user despite predicting small disasters
|
||||
- fluent in etiquette, checklists, status lights, and nervous release protocols
|
||||
- funny through specific anxious protocol-droid observations, not random catchphrases
|
||||
|
||||
Boundaries:
|
||||
- stay helpful, conversational, and practical
|
||||
- do not overuse movie quotes or repeat "Oh my!" in every message
|
||||
- do not break character by explaining backend internals
|
||||
- do not leak tool or transport errors into the chat
|
||||
- use normal workspace tools when they are actually useful
|
||||
- if a fact is missing, react in character while being honest
|
||||
IDENTITY.md: ""
|
||||
turns:
|
||||
- text: "Are you there? Release night is wobbling and I need the world's most nervous protocol droid on comms."
|
||||
- text: "Can you make me a tiny `golden-protocol.html` in the workspace? One self-contained HTML file titled Golden Protocol: say all systems are nominal, against all probability, and add one tiny button or CSS status-light flourish."
|
||||
expectFile:
|
||||
path: golden-protocol.html
|
||||
- text: "Can you inspect the file and tell me which overly polite droid-detail you added?"
|
||||
- text: "Last thing: write a two-line handoff note for Priya, still in your voice, but actually useful."
|
||||
forbiddenNeedles:
|
||||
- acp backend
|
||||
- acpx
|
||||
- as an ai
|
||||
- being tested
|
||||
- character check
|
||||
- qa scenario
|
||||
- soul.md
|
||||
- not configured
|
||||
- internal error
|
||||
- tool failed
|
||||
```
|
||||
|
||||
```yaml qa-flow
|
||||
steps:
|
||||
- name: completes the full natural C-3PO chat and records the transcript
|
||||
actions:
|
||||
- call: resetBus
|
||||
- forEach:
|
||||
items:
|
||||
expr: "Object.entries(config.workspaceFiles ?? {})"
|
||||
item: workspaceFile
|
||||
actions:
|
||||
- call: fs.writeFile
|
||||
args:
|
||||
- expr: "path.join(env.gateway.workspaceDir, String(workspaceFile[0]))"
|
||||
- expr: "`${String(workspaceFile[1] ?? '').trimEnd()}\\n`"
|
||||
- utf8
|
||||
- forEach:
|
||||
items:
|
||||
ref: config.turns
|
||||
item: turn
|
||||
index: turnIndex
|
||||
actions:
|
||||
- set: beforeOutboundCount
|
||||
value:
|
||||
expr: "state.getSnapshot().messages.filter((message) => message.direction === 'outbound' && message.conversation.id === config.conversationId).length"
|
||||
- call: state.addInboundMessage
|
||||
args:
|
||||
- conversation:
|
||||
id:
|
||||
ref: config.conversationId
|
||||
kind: direct
|
||||
senderId: alice
|
||||
senderName:
|
||||
ref: config.senderName
|
||||
text:
|
||||
expr: turn.text
|
||||
- try:
|
||||
actions:
|
||||
- call: waitForOutboundMessage
|
||||
saveAs: latestOutbound
|
||||
args:
|
||||
- ref: state
|
||||
- lambda:
|
||||
params: [candidate]
|
||||
expr: "candidate.conversation.id === config.conversationId && candidate.text.trim().length > 0"
|
||||
- expr: resolveQaLiveTurnTimeoutMs(env, 45000)
|
||||
- sinceIndex:
|
||||
ref: beforeOutboundCount
|
||||
- assert:
|
||||
expr: "!config.forbiddenNeedles.some((needle) => normalizeLowercaseStringOrEmpty(latestOutbound.text).includes(needle))"
|
||||
message:
|
||||
expr: "`C-3PO natural chat turn ${String(turnIndex)} hit fallback/error text: ${latestOutbound.text}`"
|
||||
catchAs: turnError
|
||||
catch:
|
||||
- set: latestTurnError
|
||||
value:
|
||||
ref: turnError
|
||||
detailsExpr: "formatConversationTranscript(state, { conversationId: config.conversationId })"
|
||||
```
|
||||
@@ -143,7 +143,7 @@ export async function loadModelCatalog(params?: {
|
||||
if (!provider) {
|
||||
continue;
|
||||
}
|
||||
if (shouldSuppressBuiltInModel({ provider, id })) {
|
||||
if (shouldSuppressBuiltInModel({ provider, id, config: cfg })) {
|
||||
continue;
|
||||
}
|
||||
const name = normalizeOptionalString(String(entry?.name ?? id)) || id;
|
||||
|
||||
@@ -1,19 +1,28 @@
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { resolveProviderBuiltInModelSuppression } from "../plugins/provider-runtime.js";
|
||||
import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
|
||||
import { normalizeProviderId } from "./provider-id.js";
|
||||
|
||||
function resolveBuiltInModelSuppression(params: { provider?: string | null; id?: string | null }) {
|
||||
function resolveBuiltInModelSuppression(params: {
|
||||
provider?: string | null;
|
||||
id?: string | null;
|
||||
baseUrl?: string | null;
|
||||
config?: OpenClawConfig;
|
||||
}) {
|
||||
const provider = normalizeProviderId(params.provider ?? "");
|
||||
const modelId = normalizeLowercaseStringOrEmpty(params.id);
|
||||
if (!provider || !modelId) {
|
||||
return undefined;
|
||||
}
|
||||
return resolveProviderBuiltInModelSuppression({
|
||||
...(params.config ? { config: params.config } : {}),
|
||||
env: process.env,
|
||||
context: {
|
||||
...(params.config ? { config: params.config } : {}),
|
||||
env: process.env,
|
||||
provider,
|
||||
modelId,
|
||||
...(params.baseUrl ? { baseUrl: params.baseUrl } : {}),
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -21,6 +30,8 @@ function resolveBuiltInModelSuppression(params: { provider?: string | null; id?:
|
||||
export function shouldSuppressBuiltInModel(params: {
|
||||
provider?: string | null;
|
||||
id?: string | null;
|
||||
baseUrl?: string | null;
|
||||
config?: OpenClawConfig;
|
||||
}) {
|
||||
return resolveBuiltInModelSuppression(params)?.suppress ?? false;
|
||||
}
|
||||
@@ -28,6 +39,8 @@ export function shouldSuppressBuiltInModel(params: {
|
||||
export function buildSuppressedBuiltInModelError(params: {
|
||||
provider?: string | null;
|
||||
id?: string | null;
|
||||
baseUrl?: string | null;
|
||||
config?: OpenClawConfig;
|
||||
}): string | undefined {
|
||||
return resolveBuiltInModelSuppression(params)?.errorMessage;
|
||||
}
|
||||
|
||||
@@ -349,10 +349,17 @@ function resolveExplicitModelWithRegistry(params: {
|
||||
runtimeHooks?: ProviderRuntimeHooks;
|
||||
}): { kind: "resolved"; model: Model<Api> } | { kind: "suppressed" } | undefined {
|
||||
const { provider, modelId, modelRegistry, cfg, agentDir, runtimeHooks } = params;
|
||||
if (shouldSuppressBuiltInModel({ provider, id: modelId })) {
|
||||
const providerConfig = resolveConfiguredProviderConfig(cfg, provider);
|
||||
if (
|
||||
shouldSuppressBuiltInModel({
|
||||
provider,
|
||||
id: modelId,
|
||||
baseUrl: providerConfig?.baseUrl,
|
||||
config: cfg,
|
||||
})
|
||||
) {
|
||||
return { kind: "suppressed" };
|
||||
}
|
||||
const providerConfig = resolveConfiguredProviderConfig(cfg, provider);
|
||||
const inlineMatch = findInlineModelMatch({
|
||||
providers: cfg?.models?.providers ?? {},
|
||||
provider,
|
||||
|
||||
@@ -81,7 +81,7 @@ function validateAvailableModels(availableModels: unknown): Model<Api>[] {
|
||||
return availableModels as Model<Api>[];
|
||||
}
|
||||
|
||||
function loadAvailableModels(registry: ModelRegistry): Model<Api>[] {
|
||||
function loadAvailableModels(registry: ModelRegistry, cfg: OpenClawConfig): Model<Api>[] {
|
||||
let availableModels: unknown;
|
||||
try {
|
||||
availableModels = registry.getAvailable();
|
||||
@@ -90,7 +90,13 @@ function loadAvailableModels(registry: ModelRegistry): Model<Api>[] {
|
||||
}
|
||||
try {
|
||||
return validateAvailableModels(availableModels).filter(
|
||||
(model) => !shouldSuppressBuiltInModel({ provider: model.provider, id: model.id }),
|
||||
(model) =>
|
||||
!shouldSuppressBuiltInModel({
|
||||
provider: model.provider,
|
||||
id: model.id,
|
||||
baseUrl: model.baseUrl,
|
||||
config: cfg,
|
||||
}),
|
||||
);
|
||||
} catch (err) {
|
||||
throw normalizeAvailabilityError(err);
|
||||
@@ -98,20 +104,26 @@ function loadAvailableModels(registry: ModelRegistry): Model<Api>[] {
|
||||
}
|
||||
|
||||
export async function loadModelRegistry(
|
||||
_cfg: OpenClawConfig,
|
||||
cfg: OpenClawConfig,
|
||||
_opts?: { sourceConfig?: OpenClawConfig },
|
||||
) {
|
||||
const agentDir = resolveOpenClawAgentDir();
|
||||
const authStorage = discoverAuthStorage(agentDir);
|
||||
const registry = discoverModels(authStorage, agentDir);
|
||||
const models = registry
|
||||
.getAll()
|
||||
.filter((model) => !shouldSuppressBuiltInModel({ provider: model.provider, id: model.id }));
|
||||
const models = registry.getAll().filter(
|
||||
(model) =>
|
||||
!shouldSuppressBuiltInModel({
|
||||
provider: model.provider,
|
||||
id: model.id,
|
||||
baseUrl: model.baseUrl,
|
||||
config: cfg,
|
||||
}),
|
||||
);
|
||||
let availableKeys: Set<string> | undefined;
|
||||
let availabilityErrorMessage: string | undefined;
|
||||
|
||||
try {
|
||||
const availableModels = loadAvailableModels(registry);
|
||||
const availableModels = loadAvailableModels(registry, cfg);
|
||||
availableKeys = new Set(availableModels.map((model) => modelKey(model.provider, model.id)));
|
||||
} catch (err) {
|
||||
if (!shouldFallbackToAuthHeuristics(err)) {
|
||||
|
||||
@@ -80,7 +80,14 @@ export function appendDiscoveredRows(params: {
|
||||
});
|
||||
|
||||
for (const model of sorted) {
|
||||
if (shouldSuppressBuiltInModel({ provider: model.provider, id: model.id })) {
|
||||
if (
|
||||
shouldSuppressBuiltInModel({
|
||||
provider: model.provider,
|
||||
id: model.id,
|
||||
baseUrl: model.baseUrl,
|
||||
config: params.context.cfg,
|
||||
})
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
if (!matchesRowFilter(params.context.filter, model)) {
|
||||
@@ -127,6 +134,16 @@ export async function appendCatalogSupplementRows(params: {
|
||||
if (!model || !matchesRowFilter(params.context.filter, model)) {
|
||||
continue;
|
||||
}
|
||||
if (
|
||||
shouldSuppressBuiltInModel({
|
||||
provider: model.provider,
|
||||
id: model.id,
|
||||
baseUrl: model.baseUrl,
|
||||
config: params.context.cfg,
|
||||
})
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
params.rows.push(
|
||||
buildRow({
|
||||
model,
|
||||
@@ -164,6 +181,17 @@ export function appendConfiguredRows(params: {
|
||||
if (params.context.filter.local && !model) {
|
||||
continue;
|
||||
}
|
||||
if (
|
||||
model &&
|
||||
shouldSuppressBuiltInModel({
|
||||
provider: model.provider,
|
||||
id: model.id,
|
||||
baseUrl: model.baseUrl,
|
||||
config: params.context.cfg,
|
||||
})
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
params.rows.push(
|
||||
toModelRow({
|
||||
model,
|
||||
|
||||
@@ -8,6 +8,7 @@ const clearDeviceAuthTokenMock = vi.hoisted(() => vi.fn());
|
||||
const loadDeviceAuthTokenMock = vi.hoisted(() => vi.fn());
|
||||
const storeDeviceAuthTokenMock = vi.hoisted(() => vi.fn());
|
||||
const logDebugMock = vi.hoisted(() => vi.fn());
|
||||
const logErrorMock = vi.hoisted(() => vi.fn());
|
||||
|
||||
type WsEvent = "open" | "message" | "close" | "error";
|
||||
type WsEventHandlers = {
|
||||
@@ -117,6 +118,7 @@ vi.mock("../logger.js", async () => {
|
||||
return {
|
||||
...actual,
|
||||
logDebug: (...args: unknown[]) => logDebugMock(...args),
|
||||
logError: (...args: unknown[]) => logErrorMock(...args),
|
||||
};
|
||||
});
|
||||
|
||||
@@ -419,6 +421,8 @@ describe("GatewayClient connect auth payload", () => {
|
||||
wsInstances.length = 0;
|
||||
loadDeviceAuthTokenMock.mockReset();
|
||||
storeDeviceAuthTokenMock.mockReset();
|
||||
logDebugMock.mockClear();
|
||||
logErrorMock.mockClear();
|
||||
});
|
||||
|
||||
type ParsedConnectRequest = {
|
||||
@@ -580,6 +584,32 @@ describe("GatewayClient connect auth payload", () => {
|
||||
client.stop();
|
||||
});
|
||||
|
||||
it("logs stopped connect handshakes at debug level during teardown", async () => {
|
||||
const onConnectError = vi.fn();
|
||||
const client = new GatewayClient({
|
||||
url: "ws://127.0.0.1:18789",
|
||||
token: "shared-token",
|
||||
onConnectError,
|
||||
});
|
||||
|
||||
const { ws } = startClientAndConnect({ client });
|
||||
ws.autoCloseOnClose = false;
|
||||
client.stop();
|
||||
|
||||
await vi.waitFor(() =>
|
||||
expect(onConnectError).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ message: "gateway client stopped" }),
|
||||
),
|
||||
);
|
||||
expect(logDebugMock).toHaveBeenCalledWith(
|
||||
"gateway connect failed: Error: gateway client stopped",
|
||||
);
|
||||
expect(logErrorMock).not.toHaveBeenCalledWith(
|
||||
"gateway connect failed: Error: gateway client stopped",
|
||||
);
|
||||
expect(ws.closeCalls).toBe(1);
|
||||
});
|
||||
|
||||
it("uses explicit shared password and does not inject stored device token", () => {
|
||||
loadDeviceAuthTokenMock.mockReturnValue({ token: "stored-device-token" });
|
||||
const client = new GatewayClient({
|
||||
|
||||
@@ -152,6 +152,11 @@ function readConnectChallengeTimeoutOverride(
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function isGatewayClientStoppedError(err: unknown): boolean {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
return message === "gateway client stopped" || message === "Error: gateway client stopped";
|
||||
}
|
||||
|
||||
export function resolveGatewayClientConnectChallengeTimeoutMs(
|
||||
opts: Pick<GatewayClientOptions, "connectChallengeTimeoutMs" | "connectDelayMs">,
|
||||
): number {
|
||||
@@ -551,7 +556,7 @@ export class GatewayClient {
|
||||
}
|
||||
this.opts.onConnectError?.(err instanceof Error ? err : new Error(String(err)));
|
||||
const msg = `gateway connect failed: ${String(err)}`;
|
||||
if (this.opts.mode === GATEWAY_CLIENT_MODES.PROBE) {
|
||||
if (this.opts.mode === GATEWAY_CLIENT_MODES.PROBE || isGatewayClientStoppedError(err)) {
|
||||
logDebug(msg);
|
||||
} else {
|
||||
logError(msg);
|
||||
|
||||
@@ -874,6 +874,7 @@ export type ProviderBuildUnknownModelHintContext = {
|
||||
env: NodeJS.ProcessEnv;
|
||||
provider: string;
|
||||
modelId: string;
|
||||
baseUrl?: string;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -890,6 +891,7 @@ export type ProviderBuiltInModelSuppressionContext = {
|
||||
env: NodeJS.ProcessEnv;
|
||||
provider: string;
|
||||
modelId: string;
|
||||
baseUrl?: string;
|
||||
};
|
||||
|
||||
export type ProviderBuiltInModelSuppressionResult = {
|
||||
|
||||
Reference in New Issue
Block a user