From 145e0e0b5dbd75fa1c5a72ec8e19c0ed88755d1e Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sun, 29 Mar 2026 12:46:00 +0800 Subject: [PATCH 01/42] fix(claude): add default max_tokens for models --- internal/runtime/executor/claude_executor.go | 26 ++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 0ec35199..bc5d2065 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -22,6 +22,7 @@ import ( claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" @@ -44,10 +45,33 @@ type ClaudeExecutor struct { // Previously "proxy_" was used but this is a detectable fingerprint difference. const claudeToolPrefix = "" +// Anthropic-compatible upstreams may reject or even crash when dynamically +// registered Claude models omit max_tokens. Use a conservative default. +const defaultModelMaxTokens = 1024 + func NewClaudeExecutor(cfg *config.Config) *ClaudeExecutor { return &ClaudeExecutor{cfg: cfg} } func (e *ClaudeExecutor) Identifier() string { return "claude" } +func ensureModelMaxTokens(body []byte, modelID string) []byte { + if len(body) == 0 || !gjson.ValidBytes(body) { + return body + } + + if maxTokens := gjson.GetBytes(body, "max_tokens"); maxTokens.Exists() { + return body + } + + for _, provider := range registry.GetGlobalRegistry().GetModelProviders(strings.TrimSpace(modelID)) { + if strings.EqualFold(provider, "claude") { + body, _ = sjson.SetBytes(body, "max_tokens", defaultModelMaxTokens) + return body + } + } + + return body +} + // PrepareRequest injects Claude credentials into the outgoing HTTP request. func (e *ClaudeExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth.Auth) error { if req == nil { @@ -127,6 +151,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r requestedModel := payloadRequestedModel(opts, req.Model) body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + body = ensureModelMaxTokens(body, baseModel) // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) @@ -293,6 +318,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A requestedModel := payloadRequestedModel(opts, req.Model) body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + body = ensureModelMaxTokens(body, baseModel) // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) From f033d3a6df8ebd94a8c4d73ff7e0641b92f45164 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sun, 29 Mar 2026 13:00:43 +0800 Subject: [PATCH 02/42] fix(claude): enhance ensureModelMaxTokens to use registered max_completion_tokens and fallback to default --- internal/runtime/executor/claude_executor.go | 46 ++++++----- .../runtime/executor/claude_executor_test.go | 78 +++++++++++++++++++ 2 files changed, 103 insertions(+), 21 deletions(-) diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index bc5d2065..cc88dd77 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -45,33 +45,14 @@ type ClaudeExecutor struct { // Previously "proxy_" was used but this is a detectable fingerprint difference. const claudeToolPrefix = "" -// Anthropic-compatible upstreams may reject or even crash when dynamically -// registered Claude models omit max_tokens. Use a conservative default. +// Anthropic-compatible upstreams may reject or even crash when Claude models +// omit max_tokens. Prefer registered model metadata before using a fallback. const defaultModelMaxTokens = 1024 func NewClaudeExecutor(cfg *config.Config) *ClaudeExecutor { return &ClaudeExecutor{cfg: cfg} } func (e *ClaudeExecutor) Identifier() string { return "claude" } -func ensureModelMaxTokens(body []byte, modelID string) []byte { - if len(body) == 0 || !gjson.ValidBytes(body) { - return body - } - - if maxTokens := gjson.GetBytes(body, "max_tokens"); maxTokens.Exists() { - return body - } - - for _, provider := range registry.GetGlobalRegistry().GetModelProviders(strings.TrimSpace(modelID)) { - if strings.EqualFold(provider, "claude") { - body, _ = sjson.SetBytes(body, "max_tokens", defaultModelMaxTokens) - return body - } - } - - return body -} - // PrepareRequest injects Claude credentials into the outgoing HTTP request. func (e *ClaudeExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth.Auth) error { if req == nil { @@ -1906,3 +1887,26 @@ func injectSystemCacheControl(payload []byte) []byte { return payload } + +func ensureModelMaxTokens(body []byte, modelID string) []byte { + if len(body) == 0 || !gjson.ValidBytes(body) { + return body + } + + if maxTokens := gjson.GetBytes(body, "max_tokens"); maxTokens.Exists() { + return body + } + + for _, provider := range registry.GetGlobalRegistry().GetModelProviders(strings.TrimSpace(modelID)) { + if strings.EqualFold(provider, "claude") { + maxTokens := defaultModelMaxTokens + if info := registry.GetGlobalRegistry().GetModelInfo(strings.TrimSpace(modelID), "claude"); info != nil && info.MaxCompletionTokens > 0 { + maxTokens = info.MaxCompletionTokens + } + body, _ = sjson.SetBytes(body, "max_tokens", maxTokens) + return body + } + } + + return body +} diff --git a/internal/runtime/executor/claude_executor_test.go b/internal/runtime/executor/claude_executor_test.go index c163d7ea..ee8e9025 100644 --- a/internal/runtime/executor/claude_executor_test.go +++ b/internal/runtime/executor/claude_executor_test.go @@ -15,6 +15,7 @@ import ( "github.com/gin-gonic/gin" "github.com/klauspost/compress/zstd" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" @@ -1183,6 +1184,83 @@ func testClaudeExecutorInvalidCompressedErrorBody( } } +func TestEnsureModelMaxTokens_UsesRegisteredMaxCompletionTokens(t *testing.T) { + reg := registry.GetGlobalRegistry() + clientID := "test-claude-max-completion-tokens-client" + modelID := "test-claude-max-completion-tokens-model" + reg.RegisterClient(clientID, "claude", []*registry.ModelInfo{{ + ID: modelID, + Type: "claude", + OwnedBy: "anthropic", + Object: "model", + Created: time.Now().Unix(), + MaxCompletionTokens: 4096, + UserDefined: true, + }}) + defer reg.UnregisterClient(clientID) + + input := []byte(`{"model":"test-claude-max-completion-tokens-model","messages":[{"role":"user","content":"hi"}]}`) + out := ensureModelMaxTokens(input, modelID) + + if got := gjson.GetBytes(out, "max_tokens").Int(); got != 4096 { + t.Fatalf("max_tokens = %d, want %d", got, 4096) + } +} + +func TestEnsureModelMaxTokens_DefaultsMissingValue(t *testing.T) { + reg := registry.GetGlobalRegistry() + clientID := "test-claude-default-max-tokens-client" + modelID := "test-claude-default-max-tokens-model" + reg.RegisterClient(clientID, "claude", []*registry.ModelInfo{{ + ID: modelID, + Type: "claude", + OwnedBy: "anthropic", + Object: "model", + Created: time.Now().Unix(), + UserDefined: true, + }}) + defer reg.UnregisterClient(clientID) + + input := []byte(`{"model":"test-claude-default-max-tokens-model","messages":[{"role":"user","content":"hi"}]}`) + out := ensureModelMaxTokens(input, modelID) + + if got := gjson.GetBytes(out, "max_tokens").Int(); got != defaultModelMaxTokens { + t.Fatalf("max_tokens = %d, want %d", got, defaultModelMaxTokens) + } +} + +func TestEnsureModelMaxTokens_PreservesExplicitValue(t *testing.T) { + reg := registry.GetGlobalRegistry() + clientID := "test-claude-preserve-max-tokens-client" + modelID := "test-claude-preserve-max-tokens-model" + reg.RegisterClient(clientID, "claude", []*registry.ModelInfo{{ + ID: modelID, + Type: "claude", + OwnedBy: "anthropic", + Object: "model", + Created: time.Now().Unix(), + MaxCompletionTokens: 4096, + UserDefined: true, + }}) + defer reg.UnregisterClient(clientID) + + input := []byte(`{"model":"test-claude-preserve-max-tokens-model","max_tokens":2048,"messages":[{"role":"user","content":"hi"}]}`) + out := ensureModelMaxTokens(input, modelID) + + if got := gjson.GetBytes(out, "max_tokens").Int(); got != 2048 { + t.Fatalf("max_tokens = %d, want %d", got, 2048) + } +} + +func TestEnsureModelMaxTokens_SkipsUnregisteredModel(t *testing.T) { + input := []byte(`{"model":"test-claude-unregistered-model","messages":[{"role":"user","content":"hi"}]}`) + out := ensureModelMaxTokens(input, "test-claude-unregistered-model") + + if gjson.GetBytes(out, "max_tokens").Exists() { + t.Fatalf("max_tokens should remain unset, got %s", gjson.GetBytes(out, "max_tokens").Raw) + } +} + // TestClaudeExecutor_ExecuteStream_SetsIdentityAcceptEncoding verifies that streaming // requests use Accept-Encoding: identity so the upstream cannot respond with a // compressed SSE body that would silently break the line scanner. From 6d8de0ade4d9051bb379a9e03bc616ea0d80c706 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Sun, 29 Mar 2026 13:49:01 +0800 Subject: [PATCH 03/42] feat(auth): implement weighted provider rotation for improved scheduling fairness --- sdk/cliproxy/auth/scheduler.go | 58 ++++++++++++++++++++++++++--- sdk/cliproxy/auth/scheduler_test.go | 16 ++++---- 2 files changed, 61 insertions(+), 13 deletions(-) diff --git a/sdk/cliproxy/auth/scheduler.go b/sdk/cliproxy/auth/scheduler.go index bfff53bf..fd8c9490 100644 --- a/sdk/cliproxy/auth/scheduler.go +++ b/sdk/cliproxy/auth/scheduler.go @@ -293,12 +293,46 @@ func (s *authScheduler) pickMixed(ctx context.Context, providers []string, model } cursorKey := strings.Join(normalized, ",") + ":" + modelKey - start := 0 - if len(normalized) > 0 { - start = s.mixedCursors[cursorKey] % len(normalized) + weights := make([]int, len(normalized)) + segmentStarts := make([]int, len(normalized)) + segmentEnds := make([]int, len(normalized)) + totalWeight := 0 + for providerIndex, shard := range candidateShards { + segmentStarts[providerIndex] = totalWeight + if shard != nil { + weights[providerIndex] = shard.readyCountAtPriorityLocked(false, bestPriority) + } + totalWeight += weights[providerIndex] + segmentEnds[providerIndex] = totalWeight } + if totalWeight == 0 { + return nil, "", s.mixedUnavailableErrorLocked(normalized, model, tried) + } + + startSlot := s.mixedCursors[cursorKey] % totalWeight + startProviderIndex := -1 + for providerIndex := range normalized { + if weights[providerIndex] == 0 { + continue + } + if startSlot < segmentEnds[providerIndex] { + startProviderIndex = providerIndex + break + } + } + if startProviderIndex < 0 { + return nil, "", s.mixedUnavailableErrorLocked(normalized, model, tried) + } + + slot := startSlot for offset := 0; offset < len(normalized); offset++ { - providerIndex := (start + offset) % len(normalized) + providerIndex := (startProviderIndex + offset) % len(normalized) + if weights[providerIndex] == 0 { + continue + } + if providerIndex != startProviderIndex { + slot = segmentStarts[providerIndex] + } providerKey := normalized[providerIndex] shard := candidateShards[providerIndex] if shard == nil { @@ -308,7 +342,7 @@ func (s *authScheduler) pickMixed(ctx context.Context, providers []string, model if picked == nil { continue } - s.mixedCursors[cursorKey] = providerIndex + 1 + s.mixedCursors[cursorKey] = slot + 1 return picked, providerKey, nil } return nil, "", s.mixedUnavailableErrorLocked(normalized, model, tried) @@ -704,6 +738,20 @@ func (m *modelScheduler) pickReadyAtPriorityLocked(preferWebsocket bool, priorit return picked.auth } +func (m *modelScheduler) readyCountAtPriorityLocked(preferWebsocket bool, priority int) int { + if m == nil { + return 0 + } + bucket := m.readyByPriority[priority] + if bucket == nil { + return 0 + } + if preferWebsocket && len(bucket.ws.flat) > 0 { + return len(bucket.ws.flat) + } + return len(bucket.all.flat) +} + // unavailableErrorLocked returns the correct unavailable or cooldown error for the shard. func (m *modelScheduler) unavailableErrorLocked(provider, model string, predicate func(*scheduledAuth) bool) error { now := time.Now() diff --git a/sdk/cliproxy/auth/scheduler_test.go b/sdk/cliproxy/auth/scheduler_test.go index e7d435a9..3988c90a 100644 --- a/sdk/cliproxy/auth/scheduler_test.go +++ b/sdk/cliproxy/auth/scheduler_test.go @@ -208,7 +208,7 @@ func TestSchedulerPick_CodexWebsocketPrefersWebsocketEnabledSubset(t *testing.T) } } -func TestSchedulerPick_MixedProvidersUsesProviderRotationOverReadyCandidates(t *testing.T) { +func TestSchedulerPick_MixedProvidersUsesWeightedProviderRotationOverReadyCandidates(t *testing.T) { t.Parallel() scheduler := newSchedulerForTest( @@ -218,8 +218,8 @@ func TestSchedulerPick_MixedProvidersUsesProviderRotationOverReadyCandidates(t * &Auth{ID: "claude-a", Provider: "claude"}, ) - wantProviders := []string{"gemini", "claude", "gemini", "claude"} - wantIDs := []string{"gemini-a", "claude-a", "gemini-b", "claude-a"} + wantProviders := []string{"gemini", "gemini", "claude", "gemini"} + wantIDs := []string{"gemini-a", "gemini-b", "claude-a", "gemini-a"} for index := range wantProviders { got, provider, errPick := scheduler.pickMixed(context.Background(), []string{"gemini", "claude"}, "", cliproxyexecutor.Options{}, nil) if errPick != nil { @@ -272,7 +272,7 @@ func TestSchedulerPick_MixedProvidersPrefersHighestPriorityTier(t *testing.T) { } } -func TestManager_PickNextMixed_UsesProviderRotationBeforeCredentialRotation(t *testing.T) { +func TestManager_PickNextMixed_UsesWeightedProviderRotationBeforeCredentialRotation(t *testing.T) { t.Parallel() manager := NewManager(nil, &RoundRobinSelector{}, nil) @@ -288,8 +288,8 @@ func TestManager_PickNextMixed_UsesProviderRotationBeforeCredentialRotation(t *t t.Fatalf("Register(claude-a) error = %v", errRegister) } - wantProviders := []string{"gemini", "claude", "gemini", "claude"} - wantIDs := []string{"gemini-a", "claude-a", "gemini-b", "claude-a"} + wantProviders := []string{"gemini", "gemini", "claude", "gemini"} + wantIDs := []string{"gemini-a", "gemini-b", "claude-a", "gemini-a"} for index := range wantProviders { got, _, provider, errPick := manager.pickNextMixed(context.Background(), []string{"gemini", "claude"}, "", cliproxyexecutor.Options{}, map[string]struct{}{}) if errPick != nil { @@ -399,8 +399,8 @@ func TestManager_PickNextMixed_UsesSchedulerRotation(t *testing.T) { t.Fatalf("Register(claude-a) error = %v", errRegister) } - wantProviders := []string{"gemini", "claude", "gemini", "claude"} - wantIDs := []string{"gemini-a", "claude-a", "gemini-b", "claude-a"} + wantProviders := []string{"gemini", "gemini", "claude", "gemini"} + wantIDs := []string{"gemini-a", "gemini-b", "claude-a", "gemini-a"} for index := range wantProviders { got, _, provider, errPick := manager.pickNextMixed(context.Background(), []string{"gemini", "claude"}, "", cliproxyexecutor.Options{}, nil) if errPick != nil { From 134a9eac9da0496b3eae57783053df36e1f33118 Mon Sep 17 00:00:00 2001 From: trph <894304504@qq.com> Date: Sun, 29 Mar 2026 17:23:16 +0800 Subject: [PATCH 04/42] fix: preserve SSE event boundaries for Responses streams --- .../openai/openai_responses_handlers.go | 28 +++++++++------ ...ai_responses_handlers_stream_error_test.go | 35 +++++++++++++++++++ 2 files changed, 53 insertions(+), 10 deletions(-) diff --git a/sdk/api/handlers/openai/openai_responses_handlers.go b/sdk/api/handlers/openai/openai_responses_handlers.go index 3bca75f9..8e3fee33 100644 --- a/sdk/api/handlers/openai/openai_responses_handlers.go +++ b/sdk/api/handlers/openai/openai_responses_handlers.go @@ -10,6 +10,7 @@ import ( "bytes" "context" "fmt" + "io" "net/http" "github.com/gin-gonic/gin" @@ -21,6 +22,21 @@ import ( "github.com/tidwall/sjson" ) +func writeResponsesSSEChunk(w io.Writer, chunk []byte) { + if w == nil || len(chunk) == 0 { + return + } + _, _ = w.Write(chunk) + switch { + case bytes.HasSuffix(chunk, []byte("\n\n")): + return + case bytes.HasSuffix(chunk, []byte("\n")): + _, _ = w.Write([]byte("\n")) + default: + _, _ = w.Write([]byte("\n\n")) + } +} + // OpenAIResponsesAPIHandler contains the handlers for OpenAIResponses API endpoints. // It holds a pool of clients to interact with the backend service. type OpenAIResponsesAPIHandler struct { @@ -230,11 +246,7 @@ func (h *OpenAIResponsesAPIHandler) handleStreamingResponse(c *gin.Context, rawJ handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders) // Write first chunk logic (matching forwardResponsesStream) - if bytes.HasPrefix(chunk, []byte("event:")) { - _, _ = c.Writer.Write([]byte("\n")) - } - _, _ = c.Writer.Write(chunk) - _, _ = c.Writer.Write([]byte("\n")) + writeResponsesSSEChunk(c.Writer, chunk) flusher.Flush() // Continue @@ -247,11 +259,7 @@ func (h *OpenAIResponsesAPIHandler) handleStreamingResponse(c *gin.Context, rawJ func (h *OpenAIResponsesAPIHandler) forwardResponsesStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) { h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{ WriteChunk: func(chunk []byte) { - if bytes.HasPrefix(chunk, []byte("event:")) { - _, _ = c.Writer.Write([]byte("\n")) - } - _, _ = c.Writer.Write(chunk) - _, _ = c.Writer.Write([]byte("\n")) + writeResponsesSSEChunk(c.Writer, chunk) }, WriteTerminalError: func(errMsg *interfaces.ErrorMessage) { if errMsg == nil { diff --git a/sdk/api/handlers/openai/openai_responses_handlers_stream_error_test.go b/sdk/api/handlers/openai/openai_responses_handlers_stream_error_test.go index dce73807..e1e6e7aa 100644 --- a/sdk/api/handlers/openai/openai_responses_handlers_stream_error_test.go +++ b/sdk/api/handlers/openai/openai_responses_handlers_stream_error_test.go @@ -41,3 +41,38 @@ func TestForwardResponsesStreamTerminalErrorUsesResponsesErrorChunk(t *testing.T t.Fatalf("expected streaming error chunk (top-level type), got HTTP error body: %q", body) } } + +func TestForwardResponsesStreamSeparatesDataOnlySSEChunks(t *testing.T) { + gin.SetMode(gin.TestMode) + base := handlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, nil) + h := NewOpenAIResponsesAPIHandler(base) + + recorder := httptest.NewRecorder() + c, _ := gin.CreateTestContext(recorder) + c.Request = httptest.NewRequest(http.MethodPost, "/v1/responses", nil) + + flusher, ok := c.Writer.(http.Flusher) + if !ok { + t.Fatalf("expected gin writer to implement http.Flusher") + } + + data := make(chan []byte, 2) + errs := make(chan *interfaces.ErrorMessage) + data <- []byte("data: {\"type\":\"response.output_item.done\",\"item\":{\"type\":\"function_call\",\"arguments\":\"{}\"}}") + data <- []byte("data: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp-1\",\"output\":[]}}") + close(data) + close(errs) + + h.forwardResponsesStream(c, flusher, func(error) {}, data, errs) + body := recorder.Body.String() + + if !strings.Contains(body, "data: {\"type\":\"response.output_item.done\"") { + t.Fatalf("expected first SSE data chunk, got: %q", body) + } + if !strings.Contains(body, "\n\ndata: {\"type\":\"response.completed\"") { + t.Fatalf("expected blank-line separation before second SSE event, got: %q", body) + } + if strings.Contains(body, "arguments\":\"{}\"}}data: {\"type\":\"response.completed\"") { + t.Fatalf("second SSE event was concatenated onto first event body: %q", body) + } +} From c03883ccf0f7b2539a669a09549789bf642f6b0d Mon Sep 17 00:00:00 2001 From: trph <894304504@qq.com> Date: Sun, 29 Mar 2026 22:00:46 +0800 Subject: [PATCH 05/42] fix: address responses SSE review feedback --- .../openai/openai_responses_handlers.go | 12 +++-- ...ai_responses_handlers_stream_error_test.go | 35 -------------- .../openai_responses_handlers_stream_test.go | 48 +++++++++++++++++++ 3 files changed, 55 insertions(+), 40 deletions(-) create mode 100644 sdk/api/handlers/openai/openai_responses_handlers_stream_test.go diff --git a/sdk/api/handlers/openai/openai_responses_handlers.go b/sdk/api/handlers/openai/openai_responses_handlers.go index 8e3fee33..4fb00af6 100644 --- a/sdk/api/handlers/openai/openai_responses_handlers.go +++ b/sdk/api/handlers/openai/openai_responses_handlers.go @@ -26,13 +26,15 @@ func writeResponsesSSEChunk(w io.Writer, chunk []byte) { if w == nil || len(chunk) == 0 { return } - _, _ = w.Write(chunk) - switch { - case bytes.HasSuffix(chunk, []byte("\n\n")): + if _, err := w.Write(chunk); err != nil { return - case bytes.HasSuffix(chunk, []byte("\n")): + } + if bytes.HasSuffix(chunk, []byte("\n\n")) { + return + } + if bytes.HasSuffix(chunk, []byte("\n")) { _, _ = w.Write([]byte("\n")) - default: + } else { _, _ = w.Write([]byte("\n\n")) } } diff --git a/sdk/api/handlers/openai/openai_responses_handlers_stream_error_test.go b/sdk/api/handlers/openai/openai_responses_handlers_stream_error_test.go index e1e6e7aa..dce73807 100644 --- a/sdk/api/handlers/openai/openai_responses_handlers_stream_error_test.go +++ b/sdk/api/handlers/openai/openai_responses_handlers_stream_error_test.go @@ -41,38 +41,3 @@ func TestForwardResponsesStreamTerminalErrorUsesResponsesErrorChunk(t *testing.T t.Fatalf("expected streaming error chunk (top-level type), got HTTP error body: %q", body) } } - -func TestForwardResponsesStreamSeparatesDataOnlySSEChunks(t *testing.T) { - gin.SetMode(gin.TestMode) - base := handlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, nil) - h := NewOpenAIResponsesAPIHandler(base) - - recorder := httptest.NewRecorder() - c, _ := gin.CreateTestContext(recorder) - c.Request = httptest.NewRequest(http.MethodPost, "/v1/responses", nil) - - flusher, ok := c.Writer.(http.Flusher) - if !ok { - t.Fatalf("expected gin writer to implement http.Flusher") - } - - data := make(chan []byte, 2) - errs := make(chan *interfaces.ErrorMessage) - data <- []byte("data: {\"type\":\"response.output_item.done\",\"item\":{\"type\":\"function_call\",\"arguments\":\"{}\"}}") - data <- []byte("data: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp-1\",\"output\":[]}}") - close(data) - close(errs) - - h.forwardResponsesStream(c, flusher, func(error) {}, data, errs) - body := recorder.Body.String() - - if !strings.Contains(body, "data: {\"type\":\"response.output_item.done\"") { - t.Fatalf("expected first SSE data chunk, got: %q", body) - } - if !strings.Contains(body, "\n\ndata: {\"type\":\"response.completed\"") { - t.Fatalf("expected blank-line separation before second SSE event, got: %q", body) - } - if strings.Contains(body, "arguments\":\"{}\"}}data: {\"type\":\"response.completed\"") { - t.Fatalf("second SSE event was concatenated onto first event body: %q", body) - } -} diff --git a/sdk/api/handlers/openai/openai_responses_handlers_stream_test.go b/sdk/api/handlers/openai/openai_responses_handlers_stream_test.go new file mode 100644 index 00000000..8fa908bb --- /dev/null +++ b/sdk/api/handlers/openai/openai_responses_handlers_stream_test.go @@ -0,0 +1,48 @@ +package openai + +import ( + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/gin-gonic/gin" + "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces" + "github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers" + sdkconfig "github.com/router-for-me/CLIProxyAPI/v6/sdk/config" +) + +func TestForwardResponsesStreamSeparatesDataOnlySSEChunks(t *testing.T) { + gin.SetMode(gin.TestMode) + base := handlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, nil) + h := NewOpenAIResponsesAPIHandler(base) + + recorder := httptest.NewRecorder() + c, _ := gin.CreateTestContext(recorder) + c.Request = httptest.NewRequest(http.MethodPost, "/v1/responses", nil) + + flusher, ok := c.Writer.(http.Flusher) + if !ok { + t.Fatalf("expected gin writer to implement http.Flusher") + } + + data := make(chan []byte, 2) + errs := make(chan *interfaces.ErrorMessage) + data <- []byte("data: {\"type\":\"response.output_item.done\",\"item\":{\"type\":\"function_call\",\"arguments\":\"{}\"}}") + data <- []byte("data: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp-1\",\"output\":[]}}") + close(data) + close(errs) + + h.forwardResponsesStream(c, flusher, func(error) {}, data, errs) + body := recorder.Body.String() + + if !strings.Contains(body, "data: {\"type\":\"response.output_item.done\"") { + t.Fatalf("expected first SSE data chunk, got: %q", body) + } + if !strings.Contains(body, "\n\ndata: {\"type\":\"response.completed\"") { + t.Fatalf("expected blank-line separation before second SSE event, got: %q", body) + } + if strings.Contains(body, "arguments\":\"{}\"}}data: {\"type\":\"response.completed\"") { + t.Fatalf("second SSE event was concatenated onto first event body: %q", body) + } +} From 0fcc02fbea046c06ea91c5418950f11187cc19dd Mon Sep 17 00:00:00 2001 From: trph <894304504@qq.com> Date: Sun, 29 Mar 2026 22:10:28 +0800 Subject: [PATCH 06/42] fix: tighten responses SSE review follow-up --- .../openai/openai_responses_handlers.go | 8 ++++++-- .../openai_responses_handlers_stream_test.go | 18 +++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/sdk/api/handlers/openai/openai_responses_handlers.go b/sdk/api/handlers/openai/openai_responses_handlers.go index 4fb00af6..9d722162 100644 --- a/sdk/api/handlers/openai/openai_responses_handlers.go +++ b/sdk/api/handlers/openai/openai_responses_handlers.go @@ -33,9 +33,13 @@ func writeResponsesSSEChunk(w io.Writer, chunk []byte) { return } if bytes.HasSuffix(chunk, []byte("\n")) { - _, _ = w.Write([]byte("\n")) + if _, err := w.Write([]byte("\n")); err != nil { + return + } } else { - _, _ = w.Write([]byte("\n\n")) + if _, err := w.Write([]byte("\n\n")); err != nil { + return + } } } diff --git a/sdk/api/handlers/openai/openai_responses_handlers_stream_test.go b/sdk/api/handlers/openai/openai_responses_handlers_stream_test.go index 8fa908bb..185a455a 100644 --- a/sdk/api/handlers/openai/openai_responses_handlers_stream_test.go +++ b/sdk/api/handlers/openai/openai_responses_handlers_stream_test.go @@ -35,14 +35,18 @@ func TestForwardResponsesStreamSeparatesDataOnlySSEChunks(t *testing.T) { h.forwardResponsesStream(c, flusher, func(error) {}, data, errs) body := recorder.Body.String() + parts := strings.Split(strings.TrimSpace(body), "\n\n") + if len(parts) != 2 { + t.Fatalf("expected 2 SSE events, got %d. Body: %q", len(parts), body) + } - if !strings.Contains(body, "data: {\"type\":\"response.output_item.done\"") { - t.Fatalf("expected first SSE data chunk, got: %q", body) + expectedPart1 := "data: {\"type\":\"response.output_item.done\",\"item\":{\"type\":\"function_call\",\"arguments\":\"{}\"}}" + if parts[0] != expectedPart1 { + t.Errorf("unexpected first event.\nGot: %q\nWant: %q", parts[0], expectedPart1) } - if !strings.Contains(body, "\n\ndata: {\"type\":\"response.completed\"") { - t.Fatalf("expected blank-line separation before second SSE event, got: %q", body) - } - if strings.Contains(body, "arguments\":\"{}\"}}data: {\"type\":\"response.completed\"") { - t.Fatalf("second SSE event was concatenated onto first event body: %q", body) + + expectedPart2 := "data: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp-1\",\"output\":[]}}" + if parts[1] != expectedPart2 { + t.Errorf("unexpected second event.\nGot: %q\nWant: %q", parts[1], expectedPart2) } } From 13aa5b3375ccba8e1335215e2f04b4e4671ab10a Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Sun, 29 Mar 2026 22:18:14 +0800 Subject: [PATCH 07/42] Revert "fix(codex): restore prompt cache continuity for Codex requests" --- internal/runtime/executor/codex_continuity.go | 125 ----------------- internal/runtime/executor/codex_executor.go | 36 ++--- .../executor/codex_executor_cache_test.go | 128 +----------------- .../executor/codex_websockets_executor.go | 28 ++-- .../codex_websockets_executor_test.go | 45 ------ 5 files changed, 37 insertions(+), 325 deletions(-) delete mode 100644 internal/runtime/executor/codex_continuity.go diff --git a/internal/runtime/executor/codex_continuity.go b/internal/runtime/executor/codex_continuity.go deleted file mode 100644 index 9a0cd1b4..00000000 --- a/internal/runtime/executor/codex_continuity.go +++ /dev/null @@ -1,125 +0,0 @@ -package executor - -import ( - "context" - "fmt" - "net/http" - "strings" - - "github.com/google/uuid" - cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" - cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" - log "github.com/sirupsen/logrus" - "github.com/tidwall/gjson" - "github.com/tidwall/sjson" -) - -type codexContinuity struct { - Key string - Source string -} - -func metadataString(meta map[string]any, key string) string { - if len(meta) == 0 { - return "" - } - raw, ok := meta[key] - if !ok || raw == nil { - return "" - } - switch v := raw.(type) { - case string: - return strings.TrimSpace(v) - case []byte: - return strings.TrimSpace(string(v)) - default: - return "" - } -} - -func principalString(raw any) string { - switch v := raw.(type) { - case string: - return strings.TrimSpace(v) - case fmt.Stringer: - return strings.TrimSpace(v.String()) - default: - return strings.TrimSpace(fmt.Sprintf("%v", raw)) - } -} - -func resolveCodexContinuity(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) codexContinuity { - if promptCacheKey := strings.TrimSpace(gjson.GetBytes(req.Payload, "prompt_cache_key").String()); promptCacheKey != "" { - return codexContinuity{Key: promptCacheKey, Source: "prompt_cache_key"} - } - if executionSession := metadataString(opts.Metadata, cliproxyexecutor.ExecutionSessionMetadataKey); executionSession != "" { - return codexContinuity{Key: executionSession, Source: "execution_session"} - } - if ginCtx := ginContextFrom(ctx); ginCtx != nil { - if ginCtx.Request != nil { - if v := strings.TrimSpace(ginCtx.GetHeader("Idempotency-Key")); v != "" { - return codexContinuity{Key: v, Source: "idempotency_key"} - } - } - if v, exists := ginCtx.Get("apiKey"); exists && v != nil { - if trimmed := principalString(v); trimmed != "" { - return codexContinuity{Key: uuid.NewSHA1(uuid.NameSpaceOID, []byte("cli-proxy-api:codex:prompt-cache:"+trimmed)).String(), Source: "client_principal"} - } - } - } - if auth != nil { - if authID := strings.TrimSpace(auth.ID); authID != "" { - return codexContinuity{Key: uuid.NewSHA1(uuid.NameSpaceOID, []byte("cli-proxy-api:codex:prompt-cache:auth:"+authID)).String(), Source: "auth_id"} - } - } - return codexContinuity{} -} - -func applyCodexContinuityBody(rawJSON []byte, continuity codexContinuity) []byte { - if continuity.Key == "" { - return rawJSON - } - rawJSON, _ = sjson.SetBytes(rawJSON, "prompt_cache_key", continuity.Key) - return rawJSON -} - -func applyCodexContinuityHeaders(headers http.Header, continuity codexContinuity) { - if headers == nil || continuity.Key == "" { - return - } - headers.Set("session_id", continuity.Key) -} - -func logCodexRequestDiagnostics(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, headers http.Header, body []byte, continuity codexContinuity) { - if !log.IsLevelEnabled(log.DebugLevel) { - return - } - entry := logWithRequestID(ctx) - authID := "" - authFile := "" - if auth != nil { - authID = strings.TrimSpace(auth.ID) - authFile = strings.TrimSpace(auth.FileName) - } - selectedAuthID := metadataString(opts.Metadata, cliproxyexecutor.SelectedAuthMetadataKey) - executionSessionID := metadataString(opts.Metadata, cliproxyexecutor.ExecutionSessionMetadataKey) - entry.Debugf( - "codex request diagnostics auth_id=%s selected_auth_id=%s auth_file=%s exec_session=%s continuity_source=%s session_id=%s prompt_cache_key=%s prompt_cache_retention=%s store=%t has_instructions=%t reasoning_effort=%s reasoning_summary=%s chatgpt_account_id=%t originator=%s model=%s source_format=%s", - authID, - selectedAuthID, - authFile, - executionSessionID, - continuity.Source, - strings.TrimSpace(headers.Get("session_id")), - gjson.GetBytes(body, "prompt_cache_key").String(), - gjson.GetBytes(body, "prompt_cache_retention").String(), - gjson.GetBytes(body, "store").Bool(), - gjson.GetBytes(body, "instructions").Exists(), - gjson.GetBytes(body, "reasoning.effort").String(), - gjson.GetBytes(body, "reasoning.summary").String(), - strings.TrimSpace(headers.Get("Chatgpt-Account-Id")) != "", - strings.TrimSpace(headers.Get("Originator")), - req.Model, - opts.SourceFormat.String(), - ) -} diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index b39ec939..fddf343d 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -111,6 +111,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re body, _ = sjson.SetBytes(body, "model", baseModel) body, _ = sjson.SetBytes(body, "stream", true) body, _ = sjson.DeleteBytes(body, "previous_response_id") + body, _ = sjson.DeleteBytes(body, "prompt_cache_retention") body, _ = sjson.DeleteBytes(body, "safety_identifier") body, _ = sjson.DeleteBytes(body, "stream_options") if !gjson.GetBytes(body, "instructions").Exists() { @@ -118,12 +119,11 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re } url := strings.TrimSuffix(baseURL, "/") + "/responses" - httpReq, continuity, err := e.cacheHelper(ctx, auth, from, url, req, opts, body) + httpReq, err := e.cacheHelper(ctx, from, url, req, body) if err != nil { return resp, err } applyCodexHeaders(httpReq, auth, apiKey, true, e.cfg) - logCodexRequestDiagnostics(ctx, auth, req, opts, httpReq.Header, body, continuity) var authID, authLabel, authType, authValue string if auth != nil { authID = auth.ID @@ -223,12 +223,11 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A body, _ = sjson.DeleteBytes(body, "stream") url := strings.TrimSuffix(baseURL, "/") + "/responses/compact" - httpReq, continuity, err := e.cacheHelper(ctx, auth, from, url, req, opts, body) + httpReq, err := e.cacheHelper(ctx, from, url, req, body) if err != nil { return resp, err } applyCodexHeaders(httpReq, auth, apiKey, false, e.cfg) - logCodexRequestDiagnostics(ctx, auth, req, opts, httpReq.Header, body, continuity) var authID, authLabel, authType, authValue string if auth != nil { authID = auth.ID @@ -311,6 +310,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au requestedModel := payloadRequestedModel(opts, req.Model) body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body, _ = sjson.DeleteBytes(body, "previous_response_id") + body, _ = sjson.DeleteBytes(body, "prompt_cache_retention") body, _ = sjson.DeleteBytes(body, "safety_identifier") body, _ = sjson.DeleteBytes(body, "stream_options") body, _ = sjson.SetBytes(body, "model", baseModel) @@ -319,12 +319,11 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au } url := strings.TrimSuffix(baseURL, "/") + "/responses" - httpReq, continuity, err := e.cacheHelper(ctx, auth, from, url, req, opts, body) + httpReq, err := e.cacheHelper(ctx, from, url, req, body) if err != nil { return nil, err } applyCodexHeaders(httpReq, auth, apiKey, true, e.cfg) - logCodexRequestDiagnostics(ctx, auth, req, opts, httpReq.Header, body, continuity) var authID, authLabel, authType, authValue string if auth != nil { authID = auth.ID @@ -600,9 +599,8 @@ func (e *CodexExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (* return auth, nil } -func (e *CodexExecutor) cacheHelper(ctx context.Context, auth *cliproxyauth.Auth, from sdktranslator.Format, url string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, rawJSON []byte) (*http.Request, codexContinuity, error) { +func (e *CodexExecutor) cacheHelper(ctx context.Context, from sdktranslator.Format, url string, req cliproxyexecutor.Request, rawJSON []byte) (*http.Request, error) { var cache codexCache - continuity := codexContinuity{} if from == "claude" { userIDResult := gjson.GetBytes(req.Payload, "metadata.user_id") if userIDResult.Exists() { @@ -615,26 +613,30 @@ func (e *CodexExecutor) cacheHelper(ctx context.Context, auth *cliproxyauth.Auth } setCodexCache(key, cache) } - continuity = codexContinuity{Key: cache.ID, Source: "claude_user_cache"} } } else if from == "openai-response" { promptCacheKey := gjson.GetBytes(req.Payload, "prompt_cache_key") if promptCacheKey.Exists() { cache.ID = promptCacheKey.String() - continuity = codexContinuity{Key: cache.ID, Source: "prompt_cache_key"} } } else if from == "openai" { - continuity = resolveCodexContinuity(ctx, auth, req, opts) - cache.ID = continuity.Key + if apiKey := strings.TrimSpace(apiKeyFromContext(ctx)); apiKey != "" { + cache.ID = uuid.NewSHA1(uuid.NameSpaceOID, []byte("cli-proxy-api:codex:prompt-cache:"+apiKey)).String() + } } - rawJSON = applyCodexContinuityBody(rawJSON, continuity) + if cache.ID != "" { + rawJSON, _ = sjson.SetBytes(rawJSON, "prompt_cache_key", cache.ID) + } httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(rawJSON)) if err != nil { - return nil, continuity, err + return nil, err } - applyCodexContinuityHeaders(httpReq.Header, continuity) - return httpReq, continuity, nil + if cache.ID != "" { + httpReq.Header.Set("Conversation_id", cache.ID) + httpReq.Header.Set("Session_id", cache.ID) + } + return httpReq, nil } func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string, stream bool, cfg *config.Config) { @@ -647,7 +649,7 @@ func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string, s } misc.EnsureHeader(r.Header, ginHeaders, "Version", "") - misc.EnsureHeader(r.Header, ginHeaders, "session_id", uuid.NewString()) + misc.EnsureHeader(r.Header, ginHeaders, "Session_id", uuid.NewString()) misc.EnsureHeader(r.Header, ginHeaders, "X-Codex-Turn-Metadata", "") misc.EnsureHeader(r.Header, ginHeaders, "X-Client-Request-Id", "") cfgUserAgent, _ := codexHeaderDefaults(cfg, auth) diff --git a/internal/runtime/executor/codex_executor_cache_test.go b/internal/runtime/executor/codex_executor_cache_test.go index f6def7ae..d6dca031 100644 --- a/internal/runtime/executor/codex_executor_cache_test.go +++ b/internal/runtime/executor/codex_executor_cache_test.go @@ -8,7 +8,6 @@ import ( "github.com/gin-gonic/gin" "github.com/google/uuid" - cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" "github.com/tidwall/gjson" @@ -28,7 +27,7 @@ func TestCodexExecutorCacheHelper_OpenAIChatCompletions_StablePromptCacheKeyFrom } url := "https://example.com/responses" - httpReq, _, err := executor.cacheHelper(ctx, nil, sdktranslator.FromString("openai"), url, req, cliproxyexecutor.Options{}, rawJSON) + httpReq, err := executor.cacheHelper(ctx, sdktranslator.FromString("openai"), url, req, rawJSON) if err != nil { t.Fatalf("cacheHelper error: %v", err) } @@ -43,14 +42,14 @@ func TestCodexExecutorCacheHelper_OpenAIChatCompletions_StablePromptCacheKeyFrom if gotKey != expectedKey { t.Fatalf("prompt_cache_key = %q, want %q", gotKey, expectedKey) } - if gotSession := httpReq.Header.Get("session_id"); gotSession != expectedKey { - t.Fatalf("session_id = %q, want %q", gotSession, expectedKey) + if gotConversation := httpReq.Header.Get("Conversation_id"); gotConversation != expectedKey { + t.Fatalf("Conversation_id = %q, want %q", gotConversation, expectedKey) } - if got := httpReq.Header.Get("Conversation_id"); got != "" { - t.Fatalf("Conversation_id = %q, want empty", got) + if gotSession := httpReq.Header.Get("Session_id"); gotSession != expectedKey { + t.Fatalf("Session_id = %q, want %q", gotSession, expectedKey) } - httpReq2, _, err := executor.cacheHelper(ctx, nil, sdktranslator.FromString("openai"), url, req, cliproxyexecutor.Options{}, rawJSON) + httpReq2, err := executor.cacheHelper(ctx, sdktranslator.FromString("openai"), url, req, rawJSON) if err != nil { t.Fatalf("cacheHelper error (second call): %v", err) } @@ -63,118 +62,3 @@ func TestCodexExecutorCacheHelper_OpenAIChatCompletions_StablePromptCacheKeyFrom t.Fatalf("prompt_cache_key (second call) = %q, want %q", gotKey2, expectedKey) } } - -func TestCodexExecutorCacheHelper_OpenAIResponses_PreservesPromptCacheRetention(t *testing.T) { - executor := &CodexExecutor{} - url := "https://example.com/responses" - req := cliproxyexecutor.Request{ - Model: "gpt-5.3-codex", - Payload: []byte(`{"model":"gpt-5.3-codex","prompt_cache_key":"cache-key-1","prompt_cache_retention":"persistent"}`), - } - rawJSON := []byte(`{"model":"gpt-5.3-codex","stream":true,"prompt_cache_retention":"persistent"}`) - - httpReq, _, err := executor.cacheHelper(context.Background(), nil, sdktranslator.FromString("openai-response"), url, req, cliproxyexecutor.Options{}, rawJSON) - if err != nil { - t.Fatalf("cacheHelper error: %v", err) - } - - body, err := io.ReadAll(httpReq.Body) - if err != nil { - t.Fatalf("read request body: %v", err) - } - - if got := gjson.GetBytes(body, "prompt_cache_key").String(); got != "cache-key-1" { - t.Fatalf("prompt_cache_key = %q, want %q", got, "cache-key-1") - } - if got := gjson.GetBytes(body, "prompt_cache_retention").String(); got != "persistent" { - t.Fatalf("prompt_cache_retention = %q, want %q", got, "persistent") - } - if got := httpReq.Header.Get("session_id"); got != "cache-key-1" { - t.Fatalf("session_id = %q, want %q", got, "cache-key-1") - } - if got := httpReq.Header.Get("Conversation_id"); got != "" { - t.Fatalf("Conversation_id = %q, want empty", got) - } -} - -func TestCodexExecutorCacheHelper_OpenAIChatCompletions_UsesExecutionSessionForContinuity(t *testing.T) { - executor := &CodexExecutor{} - rawJSON := []byte(`{"model":"gpt-5.4","stream":true}`) - req := cliproxyexecutor.Request{ - Model: "gpt-5.4", - Payload: []byte(`{"model":"gpt-5.4"}`), - } - opts := cliproxyexecutor.Options{Metadata: map[string]any{cliproxyexecutor.ExecutionSessionMetadataKey: "exec-session-1"}} - - httpReq, _, err := executor.cacheHelper(context.Background(), nil, sdktranslator.FromString("openai"), "https://example.com/responses", req, opts, rawJSON) - if err != nil { - t.Fatalf("cacheHelper error: %v", err) - } - - body, err := io.ReadAll(httpReq.Body) - if err != nil { - t.Fatalf("read request body: %v", err) - } - - if got := gjson.GetBytes(body, "prompt_cache_key").String(); got != "exec-session-1" { - t.Fatalf("prompt_cache_key = %q, want %q", got, "exec-session-1") - } - if got := httpReq.Header.Get("session_id"); got != "exec-session-1" { - t.Fatalf("session_id = %q, want %q", got, "exec-session-1") - } -} - -func TestCodexExecutorCacheHelper_OpenAIChatCompletions_FallsBackToStableAuthID(t *testing.T) { - executor := &CodexExecutor{} - rawJSON := []byte(`{"model":"gpt-5.4","stream":true}`) - req := cliproxyexecutor.Request{ - Model: "gpt-5.4", - Payload: []byte(`{"model":"gpt-5.4"}`), - } - auth := &cliproxyauth.Auth{ID: "codex-auth-1", Provider: "codex"} - - httpReq, _, err := executor.cacheHelper(context.Background(), auth, sdktranslator.FromString("openai"), "https://example.com/responses", req, cliproxyexecutor.Options{}, rawJSON) - if err != nil { - t.Fatalf("cacheHelper error: %v", err) - } - - body, err := io.ReadAll(httpReq.Body) - if err != nil { - t.Fatalf("read request body: %v", err) - } - - expected := uuid.NewSHA1(uuid.NameSpaceOID, []byte("cli-proxy-api:codex:prompt-cache:auth:codex-auth-1")).String() - if got := gjson.GetBytes(body, "prompt_cache_key").String(); got != expected { - t.Fatalf("prompt_cache_key = %q, want %q", got, expected) - } - if got := httpReq.Header.Get("session_id"); got != expected { - t.Fatalf("session_id = %q, want %q", got, expected) - } -} - -func TestCodexExecutorCacheHelper_ClaudePreservesCacheContinuity(t *testing.T) { - executor := &CodexExecutor{} - req := cliproxyexecutor.Request{ - Model: "claude-3-7-sonnet", - Payload: []byte(`{"metadata":{"user_id":"user-1"}}`), - } - rawJSON := []byte(`{"model":"gpt-5.4","stream":true}`) - - httpReq, continuity, err := executor.cacheHelper(context.Background(), nil, sdktranslator.FromString("claude"), "https://example.com/responses", req, cliproxyexecutor.Options{}, rawJSON) - if err != nil { - t.Fatalf("cacheHelper error: %v", err) - } - if continuity.Key == "" { - t.Fatal("continuity.Key = empty, want non-empty") - } - body, err := io.ReadAll(httpReq.Body) - if err != nil { - t.Fatalf("read request body: %v", err) - } - if got := gjson.GetBytes(body, "prompt_cache_key").String(); got != continuity.Key { - t.Fatalf("prompt_cache_key = %q, want %q", got, continuity.Key) - } - if got := httpReq.Header.Get("session_id"); got != continuity.Key { - t.Fatalf("session_id = %q, want %q", got, continuity.Key) - } -} diff --git a/internal/runtime/executor/codex_websockets_executor.go b/internal/runtime/executor/codex_websockets_executor.go index 50cc736d..fca82fe7 100644 --- a/internal/runtime/executor/codex_websockets_executor.go +++ b/internal/runtime/executor/codex_websockets_executor.go @@ -178,6 +178,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut body, _ = sjson.SetBytes(body, "model", baseModel) body, _ = sjson.SetBytes(body, "stream", true) body, _ = sjson.DeleteBytes(body, "previous_response_id") + body, _ = sjson.DeleteBytes(body, "prompt_cache_retention") body, _ = sjson.DeleteBytes(body, "safety_identifier") if !gjson.GetBytes(body, "instructions").Exists() { body, _ = sjson.SetBytes(body, "instructions", "") @@ -189,7 +190,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut return resp, err } - body, wsHeaders, continuity := applyCodexPromptCacheHeaders(ctx, auth, from, req, opts, body) + body, wsHeaders := applyCodexPromptCacheHeaders(from, req, body) wsHeaders = applyCodexWebsocketHeaders(ctx, wsHeaders, auth, apiKey, e.cfg) var authID, authLabel, authType, authValue string @@ -208,7 +209,6 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut } wsReqBody := buildCodexWebsocketRequestBody(body) - logCodexRequestDiagnostics(ctx, auth, req, opts, wsHeaders, body, continuity) recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ URL: wsURL, Method: "WEBSOCKET", @@ -385,7 +385,7 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr return nil, err } - body, wsHeaders, continuity := applyCodexPromptCacheHeaders(ctx, auth, from, req, opts, body) + body, wsHeaders := applyCodexPromptCacheHeaders(from, req, body) wsHeaders = applyCodexWebsocketHeaders(ctx, wsHeaders, auth, apiKey, e.cfg) var authID, authLabel, authType, authValue string @@ -403,7 +403,6 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr } wsReqBody := buildCodexWebsocketRequestBody(body) - logCodexRequestDiagnostics(ctx, auth, req, opts, wsHeaders, body, continuity) recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ URL: wsURL, Method: "WEBSOCKET", @@ -762,14 +761,13 @@ func buildCodexResponsesWebsocketURL(httpURL string) (string, error) { return parsed.String(), nil } -func applyCodexPromptCacheHeaders(ctx context.Context, auth *cliproxyauth.Auth, from sdktranslator.Format, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, rawJSON []byte) ([]byte, http.Header, codexContinuity) { +func applyCodexPromptCacheHeaders(from sdktranslator.Format, req cliproxyexecutor.Request, rawJSON []byte) ([]byte, http.Header) { headers := http.Header{} if len(rawJSON) == 0 { - return rawJSON, headers, codexContinuity{} + return rawJSON, headers } var cache codexCache - continuity := codexContinuity{} if from == "claude" { userIDResult := gjson.GetBytes(req.Payload, "metadata.user_id") if userIDResult.Exists() { @@ -783,22 +781,20 @@ func applyCodexPromptCacheHeaders(ctx context.Context, auth *cliproxyauth.Auth, } setCodexCache(key, cache) } - continuity = codexContinuity{Key: cache.ID, Source: "claude_user_cache"} } } else if from == "openai-response" { if promptCacheKey := gjson.GetBytes(req.Payload, "prompt_cache_key"); promptCacheKey.Exists() { cache.ID = promptCacheKey.String() - continuity = codexContinuity{Key: cache.ID, Source: "prompt_cache_key"} } - } else if from == "openai" { - continuity = resolveCodexContinuity(ctx, auth, req, opts) - cache.ID = continuity.Key } - rawJSON = applyCodexContinuityBody(rawJSON, continuity) - applyCodexContinuityHeaders(headers, continuity) + if cache.ID != "" { + rawJSON, _ = sjson.SetBytes(rawJSON, "prompt_cache_key", cache.ID) + headers.Set("Conversation_id", cache.ID) + headers.Set("Session_id", cache.ID) + } - return rawJSON, headers, continuity + return rawJSON, headers } func applyCodexWebsocketHeaders(ctx context.Context, headers http.Header, auth *cliproxyauth.Auth, token string, cfg *config.Config) http.Header { @@ -830,7 +826,7 @@ func applyCodexWebsocketHeaders(ctx context.Context, headers http.Header, auth * betaHeader = codexResponsesWebsocketBetaHeaderValue } headers.Set("OpenAI-Beta", betaHeader) - misc.EnsureHeader(headers, ginHeaders, "session_id", uuid.NewString()) + misc.EnsureHeader(headers, ginHeaders, "Session_id", uuid.NewString()) ensureHeaderWithConfigPrecedence(headers, ginHeaders, "User-Agent", cfgUserAgent, codexUserAgent) isAPIKey := false diff --git a/internal/runtime/executor/codex_websockets_executor_test.go b/internal/runtime/executor/codex_websockets_executor_test.go index 0a06982f..d34e7c39 100644 --- a/internal/runtime/executor/codex_websockets_executor_test.go +++ b/internal/runtime/executor/codex_websockets_executor_test.go @@ -9,9 +9,7 @@ import ( "github.com/gin-gonic/gin" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" - cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" sdkconfig "github.com/router-for-me/CLIProxyAPI/v6/sdk/config" - sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" "github.com/tidwall/gjson" ) @@ -34,49 +32,6 @@ func TestBuildCodexWebsocketRequestBodyPreservesPreviousResponseID(t *testing.T) } } -func TestApplyCodexPromptCacheHeaders_PreservesPromptCacheRetention(t *testing.T) { - req := cliproxyexecutor.Request{ - Model: "gpt-5-codex", - Payload: []byte(`{"prompt_cache_key":"cache-key-1","prompt_cache_retention":"persistent"}`), - } - body := []byte(`{"model":"gpt-5-codex","stream":true,"prompt_cache_retention":"persistent"}`) - - updatedBody, headers, _ := applyCodexPromptCacheHeaders(context.Background(), nil, sdktranslator.FromString("openai-response"), req, cliproxyexecutor.Options{}, body) - - if got := gjson.GetBytes(updatedBody, "prompt_cache_key").String(); got != "cache-key-1" { - t.Fatalf("prompt_cache_key = %q, want %q", got, "cache-key-1") - } - if got := gjson.GetBytes(updatedBody, "prompt_cache_retention").String(); got != "persistent" { - t.Fatalf("prompt_cache_retention = %q, want %q", got, "persistent") - } - if got := headers.Get("session_id"); got != "cache-key-1" { - t.Fatalf("session_id = %q, want %q", got, "cache-key-1") - } - if got := headers.Get("Conversation_id"); got != "" { - t.Fatalf("Conversation_id = %q, want empty", got) - } -} - -func TestApplyCodexPromptCacheHeaders_ClaudePreservesContinuity(t *testing.T) { - req := cliproxyexecutor.Request{ - Model: "claude-3-7-sonnet", - Payload: []byte(`{"metadata":{"user_id":"user-1"}}`), - } - body := []byte(`{"model":"gpt-5.4","stream":true}`) - - updatedBody, headers, continuity := applyCodexPromptCacheHeaders(context.Background(), nil, sdktranslator.FromString("claude"), req, cliproxyexecutor.Options{}, body) - - if continuity.Key == "" { - t.Fatal("continuity.Key = empty, want non-empty") - } - if got := gjson.GetBytes(updatedBody, "prompt_cache_key").String(); got != continuity.Key { - t.Fatalf("prompt_cache_key = %q, want %q", got, continuity.Key) - } - if got := headers.Get("session_id"); got != continuity.Key { - t.Fatalf("session_id = %q, want %q", got, continuity.Key) - } -} - func TestApplyCodexWebsocketHeadersDefaultsToCurrentResponsesBeta(t *testing.T) { headers := applyCodexWebsocketHeaders(context.Background(), http.Header{}, nil, "", nil) From f73d55ddaadde6b5f450c240403214a0233640bb Mon Sep 17 00:00:00 2001 From: trph <894304504@qq.com> Date: Sun, 29 Mar 2026 22:19:25 +0800 Subject: [PATCH 08/42] fix: simplify responses SSE suffix handling --- sdk/api/handlers/openai/openai_responses_handlers.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sdk/api/handlers/openai/openai_responses_handlers.go b/sdk/api/handlers/openai/openai_responses_handlers.go index 9d722162..d1ba68c7 100644 --- a/sdk/api/handlers/openai/openai_responses_handlers.go +++ b/sdk/api/handlers/openai/openai_responses_handlers.go @@ -32,14 +32,12 @@ func writeResponsesSSEChunk(w io.Writer, chunk []byte) { if bytes.HasSuffix(chunk, []byte("\n\n")) { return } + suffix := []byte("\n\n") if bytes.HasSuffix(chunk, []byte("\n")) { - if _, err := w.Write([]byte("\n")); err != nil { - return - } - } else { - if _, err := w.Write([]byte("\n\n")); err != nil { - return - } + suffix = []byte("\n") + } + if _, err := w.Write(suffix); err != nil { + return } } From 04ba8c8bc358650c3436bba9e6ab9c832a142fa7 Mon Sep 17 00:00:00 2001 From: CharTyr Date: Sun, 29 Mar 2026 22:23:18 -0400 Subject: [PATCH 09/42] feat(amp): sanitize signatures and handle stream suppression for Amp compatibility --- internal/api/modules/amp/fallback_handlers.go | 10 + internal/api/modules/amp/response_rewriter.go | 315 ++++++++++++++++-- .../api/modules/amp/response_rewriter_test.go | 38 +++ 3 files changed, 328 insertions(+), 35 deletions(-) diff --git a/internal/api/modules/amp/fallback_handlers.go b/internal/api/modules/amp/fallback_handlers.go index 7d7f7f5f..97dd0c9d 100644 --- a/internal/api/modules/amp/fallback_handlers.go +++ b/internal/api/modules/amp/fallback_handlers.go @@ -123,6 +123,10 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc return } + // Sanitize request body: remove thinking blocks with invalid signatures + // to prevent upstream API 400 errors + bodyBytes = SanitizeAmpRequestBody(bodyBytes) + // Restore the body for the handler to read c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes)) @@ -259,10 +263,16 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc } else if len(providers) > 0 { // Log: Using local provider (free) logAmpRouting(RouteTypeLocalProvider, modelName, resolvedModel, providerName, requestPath) + // Wrap with ResponseRewriter for local providers too, because upstream + // proxies (e.g. NewAPI) may return a different model name and lack + // Amp-required fields like thinking.signature. + rewriter := NewResponseRewriter(c.Writer, modelName) + c.Writer = rewriter // Filter Anthropic-Beta header only for local handling paths filterAntropicBetaHeader(c) c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes)) handler(c) + rewriter.Flush() } else { // No provider, no mapping, no proxy: fall back to the wrapped handler so it can return an error response c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes)) diff --git a/internal/api/modules/amp/response_rewriter.go b/internal/api/modules/amp/response_rewriter.go index 715034f1..fa83f7b9 100644 --- a/internal/api/modules/amp/response_rewriter.go +++ b/internal/api/modules/amp/response_rewriter.go @@ -2,6 +2,7 @@ package amp import ( "bytes" + "fmt" "net/http" "strings" @@ -12,32 +13,83 @@ import ( ) // ResponseRewriter wraps a gin.ResponseWriter to intercept and modify the response body -// It's used to rewrite model names in responses when model mapping is used +// It is used to rewrite model names in responses when model mapping is used +// and to keep Amp-compatible response shapes. type ResponseRewriter struct { gin.ResponseWriter - body *bytes.Buffer - originalModel string - isStreaming bool + body *bytes.Buffer + originalModel string + isStreaming bool + suppressedContentBlock map[int]struct{} } -// NewResponseRewriter creates a new response rewriter for model name substitution +// NewResponseRewriter creates a new response rewriter for model name substitution. func NewResponseRewriter(w gin.ResponseWriter, originalModel string) *ResponseRewriter { return &ResponseRewriter{ - ResponseWriter: w, - body: &bytes.Buffer{}, - originalModel: originalModel, + ResponseWriter: w, + body: &bytes.Buffer{}, + originalModel: originalModel, + suppressedContentBlock: make(map[int]struct{}), } } -// Write intercepts response writes and buffers them for model name replacement +const maxBufferedResponseBytes = 2 * 1024 * 1024 // 2MB safety cap + +func looksLikeSSEChunk(data []byte) bool { + return bytes.Contains(data, []byte("data:")) || + bytes.Contains(data, []byte("event:")) || + bytes.Contains(data, []byte("message_start")) || + bytes.Contains(data, []byte("message_delta")) || + bytes.Contains(data, []byte("content_block_start")) || + bytes.Contains(data, []byte("content_block_delta")) || + bytes.Contains(data, []byte("content_block_stop")) || + bytes.Contains(data, []byte("\n\n")) +} + +func (rw *ResponseRewriter) enableStreaming(reason string) error { + if rw.isStreaming { + return nil + } + rw.isStreaming = true + + if rw.body != nil && rw.body.Len() > 0 { + buf := rw.body.Bytes() + toFlush := make([]byte, len(buf)) + copy(toFlush, buf) + rw.body.Reset() + + if _, err := rw.ResponseWriter.Write(rw.rewriteStreamChunk(toFlush)); err != nil { + return err + } + if flusher, ok := rw.ResponseWriter.(http.Flusher); ok { + flusher.Flush() + } + } + + log.Debugf("amp response rewriter: switched to streaming (%s)", reason) + return nil +} + func (rw *ResponseRewriter) Write(data []byte) (int, error) { - // Detect streaming on first write - if rw.body.Len() == 0 && !rw.isStreaming { + if !rw.isStreaming && rw.body.Len() == 0 { contentType := rw.Header().Get("Content-Type") rw.isStreaming = strings.Contains(contentType, "text/event-stream") || strings.Contains(contentType, "stream") } + if !rw.isStreaming { + if looksLikeSSEChunk(data) { + if err := rw.enableStreaming("sse heuristic"); err != nil { + return 0, err + } + } else if rw.body.Len()+len(data) > maxBufferedResponseBytes { + log.Warnf("amp response rewriter: buffer exceeded %d bytes, switching to streaming", maxBufferedResponseBytes) + if err := rw.enableStreaming("buffer limit"); err != nil { + return 0, err + } + } + } + if rw.isStreaming { n, err := rw.ResponseWriter.Write(rw.rewriteStreamChunk(data)) if err == nil { @@ -50,7 +102,6 @@ func (rw *ResponseRewriter) Write(data []byte) (int, error) { return rw.body.Write(data) } -// Flush writes the buffered response with model names rewritten func (rw *ResponseRewriter) Flush() { if rw.isStreaming { if flusher, ok := rw.ResponseWriter.(http.Flusher); ok { @@ -59,26 +110,68 @@ func (rw *ResponseRewriter) Flush() { return } if rw.body.Len() > 0 { - if _, err := rw.ResponseWriter.Write(rw.rewriteModelInResponse(rw.body.Bytes())); err != nil { + rewritten := rw.rewriteModelInResponse(rw.body.Bytes()) + // Update Content-Length to match the rewritten body size, since + // signature injection and model name changes alter the payload length. + rw.ResponseWriter.Header().Set("Content-Length", fmt.Sprintf("%d", len(rewritten))) + if _, err := rw.ResponseWriter.Write(rewritten); err != nil { log.Warnf("amp response rewriter: failed to write rewritten response: %v", err) } } } -// modelFieldPaths lists all JSON paths where model name may appear var modelFieldPaths = []string{"message.model", "model", "modelVersion", "response.model", "response.modelVersion"} -// rewriteModelInResponse replaces all occurrences of the mapped model with the original model in JSON -// It also suppresses "thinking" blocks if "tool_use" is present to ensure Amp client compatibility -func (rw *ResponseRewriter) rewriteModelInResponse(data []byte) []byte { - // 1. Amp Compatibility: Suppress thinking blocks if tool use is detected - // The Amp client struggles when both thinking and tool_use blocks are present +// ensureAmpSignature injects empty signature fields into tool_use/thinking blocks +// in API responses so that the Amp TUI does not crash on P.signature.length. +func ensureAmpSignature(data []byte) []byte { + for index, block := range gjson.GetBytes(data, "content").Array() { + blockType := block.Get("type").String() + if blockType != "tool_use" && blockType != "thinking" { + continue + } + signaturePath := fmt.Sprintf("content.%d.signature", index) + if gjson.GetBytes(data, signaturePath).Exists() { + continue + } + var err error + data, err = sjson.SetBytes(data, signaturePath, "") + if err != nil { + log.Warnf("Amp ResponseRewriter: failed to add empty signature to %s block: %v", blockType, err) + break + } + } + + contentBlockType := gjson.GetBytes(data, "content_block.type").String() + if (contentBlockType == "tool_use" || contentBlockType == "thinking") && !gjson.GetBytes(data, "content_block.signature").Exists() { + var err error + data, err = sjson.SetBytes(data, "content_block.signature", "") + if err != nil { + log.Warnf("Amp ResponseRewriter: failed to add empty signature to streaming %s block: %v", contentBlockType, err) + } + } + + return data +} + +func (rw *ResponseRewriter) markSuppressedContentBlock(index int) { + if rw.suppressedContentBlock == nil { + rw.suppressedContentBlock = make(map[int]struct{}) + } + rw.suppressedContentBlock[index] = struct{}{} +} + +func (rw *ResponseRewriter) isSuppressedContentBlock(index int) bool { + _, ok := rw.suppressedContentBlock[index] + return ok +} + +func (rw *ResponseRewriter) suppressAmpThinking(data []byte) []byte { if gjson.GetBytes(data, `content.#(type=="tool_use")`).Exists() { filtered := gjson.GetBytes(data, `content.#(type!="thinking")#`) if filtered.Exists() { originalCount := gjson.GetBytes(data, "content.#").Int() filteredCount := filtered.Get("#").Int() - if originalCount > filteredCount { var err error data, err = sjson.SetBytes(data, "content", filtered.Value()) @@ -86,13 +179,41 @@ func (rw *ResponseRewriter) rewriteModelInResponse(data []byte) []byte { log.Warnf("Amp ResponseRewriter: failed to suppress thinking blocks: %v", err) } else { log.Debugf("Amp ResponseRewriter: Suppressed %d thinking blocks due to tool usage", originalCount-filteredCount) - // Log the result for verification - log.Debugf("Amp ResponseRewriter: Resulting content: %s", gjson.GetBytes(data, "content").String()) } } } } + eventType := gjson.GetBytes(data, "type").String() + indexResult := gjson.GetBytes(data, "index") + if eventType == "content_block_start" && gjson.GetBytes(data, "content_block.type").String() == "thinking" && indexResult.Exists() { + rw.markSuppressedContentBlock(int(indexResult.Int())) + return nil + } + if gjson.GetBytes(data, "delta.type").String() == "thinking_delta" { + if indexResult.Exists() { + rw.markSuppressedContentBlock(int(indexResult.Int())) + } + return nil + } + if eventType == "content_block_stop" && indexResult.Exists() { + index := int(indexResult.Int()) + if rw.isSuppressedContentBlock(index) { + delete(rw.suppressedContentBlock, index) + return nil + } + } + + return data +} + +func (rw *ResponseRewriter) rewriteModelInResponse(data []byte) []byte { + data = ensureAmpSignature(data) + data = rw.suppressAmpThinking(data) + if len(data) == 0 { + return data + } + if rw.originalModel == "" { return data } @@ -104,24 +225,148 @@ func (rw *ResponseRewriter) rewriteModelInResponse(data []byte) []byte { return data } -// rewriteStreamChunk rewrites model names in SSE stream chunks func (rw *ResponseRewriter) rewriteStreamChunk(chunk []byte) []byte { - if rw.originalModel == "" { - return chunk + lines := bytes.Split(chunk, []byte("\n")) + var out [][]byte + + i := 0 + for i < len(lines) { + line := lines[i] + trimmed := bytes.TrimSpace(line) + + // Case 1: "event:" line - look ahead for its "data:" line + if bytes.HasPrefix(trimmed, []byte("event: ")) { + // Scan forward past blank lines to find the data: line + dataIdx := -1 + for j := i + 1; j < len(lines); j++ { + t := bytes.TrimSpace(lines[j]) + if len(t) == 0 { + continue + } + if bytes.HasPrefix(t, []byte("data: ")) { + dataIdx = j + } + break + } + + if dataIdx >= 0 { + // Found event+data pair - process through model rewriter only + // (no thinking suppression for streaming) + jsonData := bytes.TrimPrefix(bytes.TrimSpace(lines[dataIdx]), []byte("data: ")) + if len(jsonData) > 0 && jsonData[0] == '{' { + rewritten := rw.rewriteStreamEvent(jsonData) + // Emit event line + out = append(out, line) + // Emit blank lines between event and data + for k := i + 1; k < dataIdx; k++ { + out = append(out, lines[k]) + } + // Emit rewritten data + out = append(out, append([]byte("data: "), rewritten...)) + i = dataIdx + 1 + continue + } + } + + // No data line found (orphan event from cross-chunk split) + // Pass it through as-is - the data will arrive in the next chunk + out = append(out, line) + i++ + continue + } + + // Case 2: standalone "data:" line (no preceding event: in this chunk) + if bytes.HasPrefix(trimmed, []byte("data: ")) { + jsonData := bytes.TrimPrefix(trimmed, []byte("data: ")) + if len(jsonData) > 0 && jsonData[0] == '{' { + rewritten := rw.rewriteStreamEvent(jsonData) + out = append(out, append([]byte("data: "), rewritten...)) + i++ + continue + } + } + + // Case 3: everything else + out = append(out, line) + i++ } - // SSE format: "data: {json}\n\n" - lines := bytes.Split(chunk, []byte("\n")) - for i, line := range lines { - if bytes.HasPrefix(line, []byte("data: ")) { - jsonData := bytes.TrimPrefix(line, []byte("data: ")) - if len(jsonData) > 0 && jsonData[0] == '{' { - // Rewrite JSON in the data line - rewritten := rw.rewriteModelInResponse(jsonData) - lines[i] = append([]byte("data: "), rewritten...) + return bytes.Join(out, []byte("\n")) +} + +// rewriteStreamEvent processes a single JSON event in the SSE stream. +// It rewrites model names and ensures signature fields exist. +// Unlike rewriteModelInResponse, it does NOT suppress thinking blocks +// in streaming mode - they are passed through with signature injection. +func (rw *ResponseRewriter) rewriteStreamEvent(data []byte) []byte { + // Inject empty signature where needed + data = ensureAmpSignature(data) + + // Rewrite model name + if rw.originalModel != "" { + for _, path := range modelFieldPaths { + if gjson.GetBytes(data, path).Exists() { + data, _ = sjson.SetBytes(data, path, rw.originalModel) } } } - return bytes.Join(lines, []byte("\n")) + return data +} + +// SanitizeAmpRequestBody removes thinking blocks with empty/missing/invalid signatures +// from the messages array in a request body before forwarding to the upstream API. +// This prevents 400 errors from the API which requires valid signatures on thinking blocks. +func SanitizeAmpRequestBody(body []byte) []byte { + messages := gjson.GetBytes(body, "messages") + if !messages.Exists() || !messages.IsArray() { + return body + } + + modified := false + for msgIdx, msg := range messages.Array() { + if msg.Get("role").String() != "assistant" { + continue + } + content := msg.Get("content") + if !content.Exists() || !content.IsArray() { + continue + } + + var keepBlocks []interface{} + removedCount := 0 + + for _, block := range content.Array() { + blockType := block.Get("type").String() + if blockType == "thinking" { + sig := block.Get("signature") + if !sig.Exists() || sig.Type != gjson.String || strings.TrimSpace(sig.String()) == "" { + removedCount++ + continue + } + } + keepBlocks = append(keepBlocks, block.Value()) + } + + if removedCount > 0 { + contentPath := fmt.Sprintf("messages.%d.content", msgIdx) + var err error + if len(keepBlocks) == 0 { + body, err = sjson.SetBytes(body, contentPath, []interface{}{}) + } else { + body, err = sjson.SetBytes(body, contentPath, keepBlocks) + } + if err != nil { + log.Warnf("Amp RequestSanitizer: failed to remove thinking blocks from message %d: %v", msgIdx, err) + continue + } + modified = true + log.Debugf("Amp RequestSanitizer: removed %d thinking blocks with invalid signatures from message %d", removedCount, msgIdx) + } + } + + if modified { + log.Debugf("Amp RequestSanitizer: sanitized request body") + } + return body } diff --git a/internal/api/modules/amp/response_rewriter_test.go b/internal/api/modules/amp/response_rewriter_test.go index 114a9516..ca477d4e 100644 --- a/internal/api/modules/amp/response_rewriter_test.go +++ b/internal/api/modules/amp/response_rewriter_test.go @@ -100,6 +100,44 @@ func TestRewriteStreamChunk_MessageModel(t *testing.T) { } } +func TestRewriteStreamChunk_SuppressesThinkingContentBlockFrames(t *testing.T) { + rw := &ResponseRewriter{} + + chunk := []byte("event: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"thinking\",\"thinking\":\"\"}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"abc\"}}\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0}\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":1,\"content_block\":{\"type\":\"tool_use\",\"name\":\"bash\",\"input\":{}}}\n\n") + result := rw.rewriteStreamChunk(chunk) + + if contains(result, []byte("\"thinking\"")) || contains(result, []byte("\"thinking_delta\"")) { + t.Fatalf("expected thinking content_block frames to be suppressed, got %s", string(result)) + } + if contains(result, []byte("content_block_stop")) { + t.Fatalf("expected suppressed thinking content_block_stop to be removed, got %s", string(result)) + } + if !contains(result, []byte("\"tool_use\"")) { + t.Fatalf("expected tool_use content_block frame to remain, got %s", string(result)) + } + if !contains(result, []byte("\"signature\":\"\"")) { + t.Fatalf("expected tool_use content_block signature injection, got %s", string(result)) + } +} + +func TestSanitizeAmpRequestBody_RemovesWhitespaceAndNonStringSignatures(t *testing.T) { + input := []byte(`{"messages":[{"role":"assistant","content":[{"type":"thinking","thinking":"drop-whitespace","signature":" "},{"type":"thinking","thinking":"drop-number","signature":123},{"type":"thinking","thinking":"keep-valid","signature":"valid-signature"},{"type":"text","text":"keep-text"}]}]}`) + result := SanitizeAmpRequestBody(input) + + if contains(result, []byte("drop-whitespace")) { + t.Fatalf("expected whitespace-only signature block to be removed, got %s", string(result)) + } + if contains(result, []byte("drop-number")) { + t.Fatalf("expected non-string signature block to be removed, got %s", string(result)) + } + if !contains(result, []byte("keep-valid")) { + t.Fatalf("expected valid thinking block to remain, got %s", string(result)) + } + if !contains(result, []byte("keep-text")) { + t.Fatalf("expected non-thinking content to remain, got %s", string(result)) + } +} + func contains(data, substr []byte) bool { for i := 0; i <= len(data)-len(substr); i++ { if string(data[i:i+len(substr)]) == string(substr) { From b15453c369897df02b016d1dbb2d879fe9c1c68c Mon Sep 17 00:00:00 2001 From: CharTyr Date: Mon, 30 Mar 2026 00:42:04 -0400 Subject: [PATCH 10/42] fix(amp): address PR review - stream thinking suppression, SSE detection, test init - Call suppressAmpThinking in rewriteStreamEvent for streaming path - Handle nil return from suppressAmpThinking to skip suppressed events - Narrow looksLikeSSEChunk to line-prefix detection (HasPrefix vs Contains) - Initialize suppressedContentBlock map in test --- internal/api/modules/amp/response_rewriter.go | 36 ++++++++++++------- .../api/modules/amp/response_rewriter_test.go | 2 +- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/internal/api/modules/amp/response_rewriter.go b/internal/api/modules/amp/response_rewriter.go index fa83f7b9..64757963 100644 --- a/internal/api/modules/amp/response_rewriter.go +++ b/internal/api/modules/amp/response_rewriter.go @@ -36,14 +36,14 @@ func NewResponseRewriter(w gin.ResponseWriter, originalModel string) *ResponseRe const maxBufferedResponseBytes = 2 * 1024 * 1024 // 2MB safety cap func looksLikeSSEChunk(data []byte) bool { - return bytes.Contains(data, []byte("data:")) || - bytes.Contains(data, []byte("event:")) || - bytes.Contains(data, []byte("message_start")) || - bytes.Contains(data, []byte("message_delta")) || - bytes.Contains(data, []byte("content_block_start")) || - bytes.Contains(data, []byte("content_block_delta")) || - bytes.Contains(data, []byte("content_block_stop")) || - bytes.Contains(data, []byte("\n\n")) + for _, line := range bytes.Split(data, []byte("\n")) { + trimmed := bytes.TrimSpace(line) + if bytes.HasPrefix(trimmed, []byte("data:")) || + bytes.HasPrefix(trimmed, []byte("event:")) { + return true + } + } + return false } func (rw *ResponseRewriter) enableStreaming(reason string) error { @@ -250,11 +250,15 @@ func (rw *ResponseRewriter) rewriteStreamChunk(chunk []byte) []byte { } if dataIdx >= 0 { - // Found event+data pair - process through model rewriter only - // (no thinking suppression for streaming) + // Found event+data pair - process through rewriter jsonData := bytes.TrimPrefix(bytes.TrimSpace(lines[dataIdx]), []byte("data: ")) if len(jsonData) > 0 && jsonData[0] == '{' { rewritten := rw.rewriteStreamEvent(jsonData) + if rewritten == nil { + // Event suppressed (e.g. thinking block), skip event+data pair + i = dataIdx + 1 + continue + } // Emit event line out = append(out, line) // Emit blank lines between event and data @@ -280,7 +284,9 @@ func (rw *ResponseRewriter) rewriteStreamChunk(chunk []byte) []byte { jsonData := bytes.TrimPrefix(trimmed, []byte("data: ")) if len(jsonData) > 0 && jsonData[0] == '{' { rewritten := rw.rewriteStreamEvent(jsonData) - out = append(out, append([]byte("data: "), rewritten...)) + if rewritten != nil { + out = append(out, append([]byte("data: "), rewritten...)) + } i++ continue } @@ -296,9 +302,13 @@ func (rw *ResponseRewriter) rewriteStreamChunk(chunk []byte) []byte { // rewriteStreamEvent processes a single JSON event in the SSE stream. // It rewrites model names and ensures signature fields exist. -// Unlike rewriteModelInResponse, it does NOT suppress thinking blocks -// in streaming mode - they are passed through with signature injection. func (rw *ResponseRewriter) rewriteStreamEvent(data []byte) []byte { + // Suppress thinking blocks before any other processing. + data = rw.suppressAmpThinking(data) + if len(data) == 0 { + return nil + } + // Inject empty signature where needed data = ensureAmpSignature(data) diff --git a/internal/api/modules/amp/response_rewriter_test.go b/internal/api/modules/amp/response_rewriter_test.go index ca477d4e..2f23d74d 100644 --- a/internal/api/modules/amp/response_rewriter_test.go +++ b/internal/api/modules/amp/response_rewriter_test.go @@ -101,7 +101,7 @@ func TestRewriteStreamChunk_MessageModel(t *testing.T) { } func TestRewriteStreamChunk_SuppressesThinkingContentBlockFrames(t *testing.T) { - rw := &ResponseRewriter{} + rw := &ResponseRewriter{suppressedContentBlock: make(map[int]struct{})} chunk := []byte("event: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"thinking\",\"thinking\":\"\"}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"abc\"}}\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0}\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":1,\"content_block\":{\"type\":\"tool_use\",\"name\":\"bash\",\"input\":{}}}\n\n") result := rw.rewriteStreamChunk(chunk) From 25feceb78341a195e40237fad290e896683fb5fd Mon Sep 17 00:00:00 2001 From: sususu98 Date: Mon, 30 Mar 2026 15:09:33 +0800 Subject: [PATCH 11/42] =?UTF-8?q?fix(antigravity):=20reorder=20model=20par?= =?UTF-8?q?ts=20to=20prevent=20tool=5Fuse=E2=86=94tool=5Fresult=20pairing?= =?UTF-8?q?=20breakage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a Claude assistant message contains [text, tool_use, text], the Antigravity API internally splits the model message at functionCall boundaries, creating an extra assistant turn between tool_use and the following tool_result. Claude then rejects with: tool_use ids were found without tool_result blocks immediately after Fix: extend the existing 2-way part reordering (thinking-first) to a 3-way partition: thinking → regular → functionCall. This ensures functionCall parts are always last, so Antigravity's split cannot insert an extra assistant turn before the user's tool_result. Fixes #989 --- .../claude/antigravity_claude_request.go | 53 +++--- .../claude/antigravity_claude_request_test.go | 161 ++++++++++++++++++ 2 files changed, 194 insertions(+), 20 deletions(-) diff --git a/internal/translator/antigravity/claude/antigravity_claude_request.go b/internal/translator/antigravity/claude/antigravity_claude_request.go index 9e504d3f..243550c0 100644 --- a/internal/translator/antigravity/claude/antigravity_claude_request.go +++ b/internal/translator/antigravity/claude/antigravity_claude_request.go @@ -330,32 +330,45 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _ } } - // Reorder parts for 'model' role to ensure thinking block is first + // Reorder parts for 'model' role: + // 1. Thinking parts first (Antigravity API requirement) + // 2. Regular parts (text, inlineData, etc.) + // 3. FunctionCall parts last + // + // Moving functionCall parts to the end prevents tool_use↔tool_result + // pairing breakage: the Antigravity API internally splits model messages + // at functionCall boundaries. If a text part follows a functionCall, the + // split creates an extra assistant turn between tool_use and tool_result, + // which Claude rejects with "tool_use ids were found without tool_result + // blocks immediately after". if role == "model" { partsResult := gjson.GetBytes(clientContentJSON, "parts") if partsResult.IsArray() { parts := partsResult.Array() - var thinkingParts []gjson.Result - var otherParts []gjson.Result - for _, part := range parts { - if part.Get("thought").Bool() { - thinkingParts = append(thinkingParts, part) - } else { - otherParts = append(otherParts, part) - } - } - if len(thinkingParts) > 0 { - firstPartIsThinking := parts[0].Get("thought").Bool() - if !firstPartIsThinking || len(thinkingParts) > 1 { - var newParts []interface{} - for _, p := range thinkingParts { - newParts = append(newParts, p.Value()) + if len(parts) > 1 { + var thinkingParts []gjson.Result + var regularParts []gjson.Result + var functionCallParts []gjson.Result + for _, part := range parts { + if part.Get("thought").Bool() { + thinkingParts = append(thinkingParts, part) + } else if part.Get("functionCall").Exists() { + functionCallParts = append(functionCallParts, part) + } else { + regularParts = append(regularParts, part) } - for _, p := range otherParts { - newParts = append(newParts, p.Value()) - } - clientContentJSON, _ = sjson.SetBytes(clientContentJSON, "parts", newParts) } + var newParts []interface{} + for _, p := range thinkingParts { + newParts = append(newParts, p.Value()) + } + for _, p := range regularParts { + newParts = append(newParts, p.Value()) + } + for _, p := range functionCallParts { + newParts = append(newParts, p.Value()) + } + clientContentJSON, _ = sjson.SetBytes(clientContentJSON, "parts", newParts) } } } diff --git a/internal/translator/antigravity/claude/antigravity_claude_request_test.go b/internal/translator/antigravity/claude/antigravity_claude_request_test.go index df84ac54..cad61ca3 100644 --- a/internal/translator/antigravity/claude/antigravity_claude_request_test.go +++ b/internal/translator/antigravity/claude/antigravity_claude_request_test.go @@ -361,6 +361,167 @@ func TestConvertClaudeRequestToAntigravity_ReorderThinking(t *testing.T) { } } +func TestConvertClaudeRequestToAntigravity_ReorderTextAfterFunctionCall(t *testing.T) { + // Bug: text part after tool_use in an assistant message causes Antigravity + // to split at functionCall boundary, creating an extra assistant turn that + // breaks tool_use↔tool_result adjacency (upstream issue #989). + // Fix: reorder parts so functionCall comes last. + inputJSON := []byte(`{ + "model": "claude-sonnet-4-5", + "messages": [ + { + "role": "assistant", + "content": [ + {"type": "text", "text": "Let me check..."}, + { + "type": "tool_use", + "id": "call_abc", + "name": "Read", + "input": {"file": "test.go"} + }, + {"type": "text", "text": "Reading the file now"} + ] + }, + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "call_abc", + "content": "file content" + } + ] + } + ] + }`) + + output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5", inputJSON, false) + outputStr := string(output) + + parts := gjson.Get(outputStr, "request.contents.0.parts").Array() + if len(parts) != 3 { + t.Fatalf("Expected 3 parts, got %d", len(parts)) + } + + // Text parts should come before functionCall + if parts[0].Get("text").String() != "Let me check..." { + t.Errorf("Expected first text part first, got %s", parts[0].Raw) + } + if parts[1].Get("text").String() != "Reading the file now" { + t.Errorf("Expected second text part second, got %s", parts[1].Raw) + } + if !parts[2].Get("functionCall").Exists() { + t.Errorf("Expected functionCall last, got %s", parts[2].Raw) + } + if parts[2].Get("functionCall.name").String() != "Read" { + t.Errorf("Expected functionCall name 'Read', got '%s'", parts[2].Get("functionCall.name").String()) + } +} + +func TestConvertClaudeRequestToAntigravity_ReorderParallelFunctionCalls(t *testing.T) { + inputJSON := []byte(`{ + "model": "claude-sonnet-4-5", + "messages": [ + { + "role": "assistant", + "content": [ + {"type": "text", "text": "Reading both files."}, + { + "type": "tool_use", + "id": "call_1", + "name": "Read", + "input": {"file": "a.go"} + }, + {"type": "text", "text": "And this one too."}, + { + "type": "tool_use", + "id": "call_2", + "name": "Read", + "input": {"file": "b.go"} + } + ] + } + ] + }`) + + output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5", inputJSON, false) + outputStr := string(output) + + parts := gjson.Get(outputStr, "request.contents.0.parts").Array() + if len(parts) != 4 { + t.Fatalf("Expected 4 parts, got %d", len(parts)) + } + + if parts[0].Get("text").String() != "Reading both files." { + t.Errorf("Expected first text, got %s", parts[0].Raw) + } + if parts[1].Get("text").String() != "And this one too." { + t.Errorf("Expected second text, got %s", parts[1].Raw) + } + if parts[2].Get("functionCall.name").String() != "Read" || parts[2].Get("functionCall.id").String() != "call_1" { + t.Errorf("Expected fc1 third, got %s", parts[2].Raw) + } + if parts[3].Get("functionCall.name").String() != "Read" || parts[3].Get("functionCall.id").String() != "call_2" { + t.Errorf("Expected fc2 fourth, got %s", parts[3].Raw) + } +} + +func TestConvertClaudeRequestToAntigravity_ReorderThinkingAndTextBeforeFunctionCall(t *testing.T) { + cache.ClearSignatureCache("") + + validSignature := "abc123validSignature1234567890123456789012345678901234567890" + thinkingText := "Let me think about this..." + + inputJSON := []byte(`{ + "model": "claude-sonnet-4-5-thinking", + "messages": [ + { + "role": "user", + "content": [{"type": "text", "text": "Hello"}] + }, + { + "role": "assistant", + "content": [ + {"type": "text", "text": "Before thinking"}, + {"type": "thinking", "thinking": "` + thinkingText + `", "signature": "` + validSignature + `"}, + { + "type": "tool_use", + "id": "call_xyz", + "name": "Bash", + "input": {"command": "ls"} + }, + {"type": "text", "text": "After tool call"} + ] + } + ] + }`) + + cache.CacheSignature("claude-sonnet-4-5-thinking", thinkingText, validSignature) + + output := ConvertClaudeRequestToAntigravity("claude-sonnet-4-5-thinking", inputJSON, false) + outputStr := string(output) + + // contents.1 = assistant message (contents.0 = user) + parts := gjson.Get(outputStr, "request.contents.1.parts").Array() + if len(parts) != 4 { + t.Fatalf("Expected 4 parts, got %d", len(parts)) + } + + // Order: thinking → text → text → functionCall + if !parts[0].Get("thought").Bool() { + t.Error("First part should be thinking") + } + if parts[1].Get("functionCall").Exists() || parts[1].Get("thought").Bool() { + t.Errorf("Second part should be text, got %s", parts[1].Raw) + } + if parts[2].Get("functionCall").Exists() || parts[2].Get("thought").Bool() { + t.Errorf("Third part should be text, got %s", parts[2].Raw) + } + if !parts[3].Get("functionCall").Exists() { + t.Errorf("Last part should be functionCall, got %s", parts[3].Raw) + } +} + func TestConvertClaudeRequestToAntigravity_ToolResult(t *testing.T) { inputJSON := []byte(`{ "model": "claude-3-5-sonnet-20240620", From 279cbbbb8a82aec934e851616c2243ba859df45a Mon Sep 17 00:00:00 2001 From: CharTyr Date: Mon, 30 Mar 2026 19:57:43 +0800 Subject: [PATCH 12/42] fix(amp): don't suppress thinking blocks in streaming mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reverts the streaming thinking suppression introduced in b15453c. rewriteStreamEvent should only inject signatures and rewrite model names — suppressing thinking blocks in streaming mode breaks SSE index alignment and causes the Amp TUI to render empty responses on the second message onward (especially with model-mapped non-Claude providers like GPT-5.4). Non-streaming responses still suppress thinking when tool_use is present via rewriteModelInResponse. --- internal/api/modules/amp/response_rewriter.go | 18 ++++----------- .../api/modules/amp/response_rewriter_test.go | 23 ++++++++++++------- 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/internal/api/modules/amp/response_rewriter.go b/internal/api/modules/amp/response_rewriter.go index 64757963..8e08abe3 100644 --- a/internal/api/modules/amp/response_rewriter.go +++ b/internal/api/modules/amp/response_rewriter.go @@ -254,11 +254,6 @@ func (rw *ResponseRewriter) rewriteStreamChunk(chunk []byte) []byte { jsonData := bytes.TrimPrefix(bytes.TrimSpace(lines[dataIdx]), []byte("data: ")) if len(jsonData) > 0 && jsonData[0] == '{' { rewritten := rw.rewriteStreamEvent(jsonData) - if rewritten == nil { - // Event suppressed (e.g. thinking block), skip event+data pair - i = dataIdx + 1 - continue - } // Emit event line out = append(out, line) // Emit blank lines between event and data @@ -284,9 +279,7 @@ func (rw *ResponseRewriter) rewriteStreamChunk(chunk []byte) []byte { jsonData := bytes.TrimPrefix(trimmed, []byte("data: ")) if len(jsonData) > 0 && jsonData[0] == '{' { rewritten := rw.rewriteStreamEvent(jsonData) - if rewritten != nil { - out = append(out, append([]byte("data: "), rewritten...)) - } + out = append(out, append([]byte("data: "), rewritten...)) i++ continue } @@ -302,13 +295,10 @@ func (rw *ResponseRewriter) rewriteStreamChunk(chunk []byte) []byte { // rewriteStreamEvent processes a single JSON event in the SSE stream. // It rewrites model names and ensures signature fields exist. +// NOTE: streaming mode does NOT suppress thinking blocks - they are +// passed through with signature injection to avoid breaking SSE index +// alignment and TUI rendering. func (rw *ResponseRewriter) rewriteStreamEvent(data []byte) []byte { - // Suppress thinking blocks before any other processing. - data = rw.suppressAmpThinking(data) - if len(data) == 0 { - return nil - } - // Inject empty signature where needed data = ensureAmpSignature(data) diff --git a/internal/api/modules/amp/response_rewriter_test.go b/internal/api/modules/amp/response_rewriter_test.go index 2f23d74d..50712cf9 100644 --- a/internal/api/modules/amp/response_rewriter_test.go +++ b/internal/api/modules/amp/response_rewriter_test.go @@ -1,6 +1,7 @@ package amp import ( + "strings" "testing" ) @@ -100,23 +101,29 @@ func TestRewriteStreamChunk_MessageModel(t *testing.T) { } } -func TestRewriteStreamChunk_SuppressesThinkingContentBlockFrames(t *testing.T) { +func TestRewriteStreamChunk_PreservesThinkingWithSignatureInjection(t *testing.T) { rw := &ResponseRewriter{suppressedContentBlock: make(map[int]struct{})} chunk := []byte("event: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"thinking\",\"thinking\":\"\"}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"abc\"}}\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0}\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":1,\"content_block\":{\"type\":\"tool_use\",\"name\":\"bash\",\"input\":{}}}\n\n") result := rw.rewriteStreamChunk(chunk) - if contains(result, []byte("\"thinking\"")) || contains(result, []byte("\"thinking_delta\"")) { - t.Fatalf("expected thinking content_block frames to be suppressed, got %s", string(result)) + // Streaming mode preserves thinking blocks (does NOT suppress them) + // to avoid breaking SSE index alignment and TUI rendering + if !contains(result, []byte(`"content_block":{"type":"thinking"`)) { + t.Fatalf("expected thinking content_block_start to be preserved, got %s", string(result)) } - if contains(result, []byte("content_block_stop")) { - t.Fatalf("expected suppressed thinking content_block_stop to be removed, got %s", string(result)) + if !contains(result, []byte(`"delta":{"type":"thinking_delta"`)) { + t.Fatalf("expected thinking_delta to be preserved, got %s", string(result)) } - if !contains(result, []byte("\"tool_use\"")) { + if !contains(result, []byte(`"type":"content_block_stop","index":0`)) { + t.Fatalf("expected content_block_stop for thinking block to be preserved, got %s", string(result)) + } + if !contains(result, []byte(`"content_block":{"type":"tool_use"`)) { t.Fatalf("expected tool_use content_block frame to remain, got %s", string(result)) } - if !contains(result, []byte("\"signature\":\"\"")) { - t.Fatalf("expected tool_use content_block signature injection, got %s", string(result)) + // Signature should be injected into both thinking and tool_use blocks + if count := strings.Count(string(result), `"signature":""`); count != 2 { + t.Fatalf("expected 2 signature injections, but got %d in %s", count, string(result)) } } From 17363edf253499751ce4aba1f4b9ce2a45b7438d Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Mon, 30 Mar 2026 22:22:42 +0800 Subject: [PATCH 13/42] fix(auth): skip downtime for request-scoped 404 errors in model state management --- sdk/cliproxy/auth/conductor.go | 157 ++++++++++-------- sdk/cliproxy/auth/conductor_overrides_test.go | 113 +++++++++++++ 2 files changed, 204 insertions(+), 66 deletions(-) diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index 64c110dc..61f32278 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -1734,77 +1734,79 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) { } } else { if result.Model != "" { - state := ensureModelState(auth, result.Model) - state.Unavailable = true - state.Status = StatusError - state.UpdatedAt = now - if result.Error != nil { - state.LastError = cloneError(result.Error) - state.StatusMessage = result.Error.Message - auth.LastError = cloneError(result.Error) - auth.StatusMessage = result.Error.Message - } + if !isRequestScopedNotFoundResultError(result.Error) { + state := ensureModelState(auth, result.Model) + state.Unavailable = true + state.Status = StatusError + state.UpdatedAt = now + if result.Error != nil { + state.LastError = cloneError(result.Error) + state.StatusMessage = result.Error.Message + auth.LastError = cloneError(result.Error) + auth.StatusMessage = result.Error.Message + } - statusCode := statusCodeFromResult(result.Error) - if isModelSupportResultError(result.Error) { - next := now.Add(12 * time.Hour) - state.NextRetryAfter = next - suspendReason = "model_not_supported" - shouldSuspendModel = true - } else { - switch statusCode { - case 401: - next := now.Add(30 * time.Minute) - state.NextRetryAfter = next - suspendReason = "unauthorized" - shouldSuspendModel = true - case 402, 403: - next := now.Add(30 * time.Minute) - state.NextRetryAfter = next - suspendReason = "payment_required" - shouldSuspendModel = true - case 404: + statusCode := statusCodeFromResult(result.Error) + if isModelSupportResultError(result.Error) { next := now.Add(12 * time.Hour) state.NextRetryAfter = next - suspendReason = "not_found" + suspendReason = "model_not_supported" shouldSuspendModel = true - case 429: - var next time.Time - backoffLevel := state.Quota.BackoffLevel - if result.RetryAfter != nil { - next = now.Add(*result.RetryAfter) - } else { - cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth)) - if cooldown > 0 { - next = now.Add(cooldown) - } - backoffLevel = nextLevel - } - state.NextRetryAfter = next - state.Quota = QuotaState{ - Exceeded: true, - Reason: "quota", - NextRecoverAt: next, - BackoffLevel: backoffLevel, - } - suspendReason = "quota" - shouldSuspendModel = true - setModelQuota = true - case 408, 500, 502, 503, 504: - if quotaCooldownDisabledForAuth(auth) { - state.NextRetryAfter = time.Time{} - } else { - next := now.Add(1 * time.Minute) + } else { + switch statusCode { + case 401: + next := now.Add(30 * time.Minute) state.NextRetryAfter = next + suspendReason = "unauthorized" + shouldSuspendModel = true + case 402, 403: + next := now.Add(30 * time.Minute) + state.NextRetryAfter = next + suspendReason = "payment_required" + shouldSuspendModel = true + case 404: + next := now.Add(12 * time.Hour) + state.NextRetryAfter = next + suspendReason = "not_found" + shouldSuspendModel = true + case 429: + var next time.Time + backoffLevel := state.Quota.BackoffLevel + if result.RetryAfter != nil { + next = now.Add(*result.RetryAfter) + } else { + cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth)) + if cooldown > 0 { + next = now.Add(cooldown) + } + backoffLevel = nextLevel + } + state.NextRetryAfter = next + state.Quota = QuotaState{ + Exceeded: true, + Reason: "quota", + NextRecoverAt: next, + BackoffLevel: backoffLevel, + } + suspendReason = "quota" + shouldSuspendModel = true + setModelQuota = true + case 408, 500, 502, 503, 504: + if quotaCooldownDisabledForAuth(auth) { + state.NextRetryAfter = time.Time{} + } else { + next := now.Add(1 * time.Minute) + state.NextRetryAfter = next + } + default: + state.NextRetryAfter = time.Time{} } - default: - state.NextRetryAfter = time.Time{} } - } - auth.Status = StatusError - auth.UpdatedAt = now - updateAggregatedAvailability(auth, now) + auth.Status = StatusError + auth.UpdatedAt = now + updateAggregatedAvailability(auth, now) + } } else { applyAuthFailureState(auth, result.Error, result.RetryAfter, now) } @@ -2056,11 +2058,29 @@ func isModelSupportResultError(err *Error) bool { return isModelSupportErrorMessage(err.Message) } +func isRequestScopedNotFoundMessage(message string) bool { + if message == "" { + return false + } + lower := strings.ToLower(message) + return strings.Contains(lower, "item with id") && + strings.Contains(lower, "not found") && + strings.Contains(lower, "items are not persisted when `store` is set to false") +} + +func isRequestScopedNotFoundResultError(err *Error) bool { + if err == nil || statusCodeFromResult(err) != http.StatusNotFound { + return false + } + return isRequestScopedNotFoundMessage(err.Message) +} + // isRequestInvalidError returns true if the error represents a client request // error that should not be retried. Specifically, it treats 400 responses with -// "invalid_request_error" and all 422 responses as request-shape failures, -// where switching auths or pooled upstream models will not help. Model-support -// errors are excluded so routing can fall through to another auth or upstream. +// "invalid_request_error", request-scoped 404 item misses caused by `store=false`, +// and all 422 responses as request-shape failures, where switching auths or +// pooled upstream models will not help. Model-support errors are excluded so +// routing can fall through to another auth or upstream. func isRequestInvalidError(err error) bool { if err == nil { return false @@ -2072,6 +2092,8 @@ func isRequestInvalidError(err error) bool { switch status { case http.StatusBadRequest: return strings.Contains(err.Error(), "invalid_request_error") + case http.StatusNotFound: + return isRequestScopedNotFoundMessage(err.Error()) case http.StatusUnprocessableEntity: return true default: @@ -2083,6 +2105,9 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati if auth == nil { return } + if isRequestScopedNotFoundResultError(resultErr) { + return + } auth.Unavailable = true auth.Status = StatusError auth.UpdatedAt = now diff --git a/sdk/cliproxy/auth/conductor_overrides_test.go b/sdk/cliproxy/auth/conductor_overrides_test.go index 3ad0ce67..50915ce0 100644 --- a/sdk/cliproxy/auth/conductor_overrides_test.go +++ b/sdk/cliproxy/auth/conductor_overrides_test.go @@ -12,6 +12,8 @@ import ( cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" ) +const requestScopedNotFoundMessage = "Item with id 'rs_0b5f3eb6f51f175c0169ca74e4a85881998539920821603a74' not found. Items are not persisted when `store` is set to false. Try again with `store` set to true, or remove this item from your input." + func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testing.T) { m := NewManager(nil, nil, nil) m.SetRetryConfig(3, 30*time.Second, 0) @@ -447,3 +449,114 @@ func TestManager_MarkResult_RespectsAuthDisableCoolingOverride(t *testing.T) { t.Fatalf("expected NextRetryAfter to be zero when disable_cooling=true, got %v", state.NextRetryAfter) } } + +func TestManager_MarkResult_RequestScopedNotFoundDoesNotCooldownAuth(t *testing.T) { + m := NewManager(nil, nil, nil) + + auth := &Auth{ + ID: "auth-1", + Provider: "openai", + } + if _, errRegister := m.Register(context.Background(), auth); errRegister != nil { + t.Fatalf("register auth: %v", errRegister) + } + + model := "gpt-4.1" + m.MarkResult(context.Background(), Result{ + AuthID: auth.ID, + Provider: auth.Provider, + Model: model, + Success: false, + Error: &Error{ + HTTPStatus: http.StatusNotFound, + Message: requestScopedNotFoundMessage, + }, + }) + + updated, ok := m.GetByID(auth.ID) + if !ok || updated == nil { + t.Fatalf("expected auth to be present") + } + if updated.Unavailable { + t.Fatalf("expected request-scoped 404 to keep auth available") + } + if !updated.NextRetryAfter.IsZero() { + t.Fatalf("expected request-scoped 404 to keep auth cooldown unset, got %v", updated.NextRetryAfter) + } + if state := updated.ModelStates[model]; state != nil { + t.Fatalf("expected request-scoped 404 to avoid model cooldown state, got %#v", state) + } +} + +func TestManager_RequestScopedNotFoundStopsRetryWithoutSuspendingAuth(t *testing.T) { + m := NewManager(nil, nil, nil) + executor := &authFallbackExecutor{ + id: "openai", + executeErrors: map[string]error{ + "aa-bad-auth": &Error{ + HTTPStatus: http.StatusNotFound, + Message: requestScopedNotFoundMessage, + }, + }, + } + m.RegisterExecutor(executor) + + model := "gpt-4.1" + badAuth := &Auth{ID: "aa-bad-auth", Provider: "openai"} + goodAuth := &Auth{ID: "bb-good-auth", Provider: "openai"} + + reg := registry.GetGlobalRegistry() + reg.RegisterClient(badAuth.ID, "openai", []*registry.ModelInfo{{ID: model}}) + reg.RegisterClient(goodAuth.ID, "openai", []*registry.ModelInfo{{ID: model}}) + t.Cleanup(func() { + reg.UnregisterClient(badAuth.ID) + reg.UnregisterClient(goodAuth.ID) + }) + + if _, errRegister := m.Register(context.Background(), badAuth); errRegister != nil { + t.Fatalf("register bad auth: %v", errRegister) + } + if _, errRegister := m.Register(context.Background(), goodAuth); errRegister != nil { + t.Fatalf("register good auth: %v", errRegister) + } + + _, errExecute := m.Execute(context.Background(), []string{"openai"}, cliproxyexecutor.Request{Model: model}, cliproxyexecutor.Options{}) + if errExecute == nil { + t.Fatal("expected request-scoped not-found error") + } + errResult, ok := errExecute.(*Error) + if !ok { + t.Fatalf("expected *Error, got %T", errExecute) + } + if errResult.HTTPStatus != http.StatusNotFound { + t.Fatalf("status = %d, want %d", errResult.HTTPStatus, http.StatusNotFound) + } + if errResult.Message != requestScopedNotFoundMessage { + t.Fatalf("message = %q, want %q", errResult.Message, requestScopedNotFoundMessage) + } + + got := executor.ExecuteCalls() + want := []string{badAuth.ID} + if len(got) != len(want) { + t.Fatalf("execute calls = %v, want %v", got, want) + } + for i := range want { + if got[i] != want[i] { + t.Fatalf("execute call %d auth = %q, want %q", i, got[i], want[i]) + } + } + + updatedBad, ok := m.GetByID(badAuth.ID) + if !ok || updatedBad == nil { + t.Fatalf("expected bad auth to remain registered") + } + if updatedBad.Unavailable { + t.Fatalf("expected request-scoped 404 to keep bad auth available") + } + if !updatedBad.NextRetryAfter.IsZero() { + t.Fatalf("expected request-scoped 404 to keep bad auth cooldown unset, got %v", updatedBad.NextRetryAfter) + } + if state := updatedBad.ModelStates[model]; state != nil { + t.Fatalf("expected request-scoped 404 to avoid bad auth model cooldown state, got %#v", state) + } +} From d11936f292c3040f631786e33376233026e3f449 Mon Sep 17 00:00:00 2001 From: MonsterQiu <72pgstan@gmail.com> Date: Mon, 30 Mar 2026 22:44:46 +0800 Subject: [PATCH 14/42] fix(codex): add default instructions for /responses/compact --- internal/runtime/executor/codex_executor.go | 3 + .../executor/codex_executor_compact_test.go | 58 +++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 internal/runtime/executor/codex_executor_compact_test.go diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 7e4163b8..ed38570d 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -220,6 +220,9 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body, _ = sjson.SetBytes(body, "model", baseModel) body, _ = sjson.DeleteBytes(body, "stream") + if !gjson.GetBytes(body, "instructions").Exists() { + body, _ = sjson.SetBytes(body, "instructions", "") + } url := strings.TrimSuffix(baseURL, "/") + "/responses/compact" httpReq, err := e.cacheHelper(ctx, from, url, req, body) diff --git a/internal/runtime/executor/codex_executor_compact_test.go b/internal/runtime/executor/codex_executor_compact_test.go new file mode 100644 index 00000000..4fcd7a8e --- /dev/null +++ b/internal/runtime/executor/codex_executor_compact_test.go @@ -0,0 +1,58 @@ +package executor + +import ( + "context" + "io" + "net/http" + "net/http/httptest" + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" + cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" + sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" + "github.com/tidwall/gjson" +) + +func TestCodexExecutorCompactAddsDefaultInstructions(t *testing.T) { + var gotPath string + var gotBody []byte + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotPath = r.URL.Path + body, _ := io.ReadAll(r.Body) + gotBody = body + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"id":"resp_1","object":"response.compaction","usage":{"input_tokens":1,"output_tokens":2,"total_tokens":3}}`)) + })) + defer server.Close() + + executor := NewCodexExecutor(&config.Config{}) + auth := &cliproxyauth.Auth{Attributes: map[string]string{ + "base_url": server.URL, + "api_key": "test", + }} + + resp, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{ + Model: "gpt-5.4", + Payload: []byte(`{"model":"gpt-5.4","input":"hello"}`), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FromString("openai-response"), + Alt: "responses/compact", + Stream: false, + }) + if err != nil { + t.Fatalf("Execute error: %v", err) + } + if gotPath != "/responses/compact" { + t.Fatalf("path = %q, want %q", gotPath, "/responses/compact") + } + if !gjson.GetBytes(gotBody, "instructions").Exists() { + t.Fatalf("expected instructions in compact request body, got %s", string(gotBody)) + } + if gjson.GetBytes(gotBody, "instructions").String() != "" { + t.Fatalf("instructions = %q, want empty string", gjson.GetBytes(gotBody, "instructions").String()) + } + if string(resp.Payload) != `{"id":"resp_1","object":"response.compaction","usage":{"input_tokens":1,"output_tokens":2,"total_tokens":3}}` { + t.Fatalf("payload = %s", string(resp.Payload)) + } +} From c1d7599829045ceacdf91c4357cb42800b78b72a Mon Sep 17 00:00:00 2001 From: apparition <38576169+possible055@users.noreply.github.com> Date: Mon, 30 Mar 2026 22:44:58 +0800 Subject: [PATCH 15/42] fix(openai): handle transcript replacement after websocket compaction - Add shouldReplaceWebsocketTranscript() to detect historical model output in input - Add normalizeResponseTranscriptReplacement() for full transcript reset handling - Prevent duplicate stale turn-state when clients replace local history post-compaction - Avoid orphaned function_call items from incremental append on compact transcripts - Add unit tests for transcript replacement detection and state reset behavior --- .../openai/openai_responses_websocket.go | 57 ++++++ .../openai/openai_responses_websocket_test.go | 183 ++++++++++++++++++ 2 files changed, 240 insertions(+) diff --git a/sdk/api/handlers/openai/openai_responses_websocket.go b/sdk/api/handlers/openai/openai_responses_websocket.go index 5c68f40e..211b8b81 100644 --- a/sdk/api/handlers/openai/openai_responses_websocket.go +++ b/sdk/api/handlers/openai/openai_responses_websocket.go @@ -277,6 +277,15 @@ func normalizeResponseSubsequentRequest(rawJSON []byte, lastRequest []byte, last } } + // Compaction can cause clients to replace local websocket history with a new + // compact transcript on the next `response.create`. When the input already + // contains historical model output items, treating it as an incremental append + // duplicates stale turn-state and can leave late orphaned function_call items. + if shouldReplaceWebsocketTranscript(rawJSON, nextInput) { + normalized := normalizeResponseTranscriptReplacement(rawJSON, lastRequest) + return normalized, bytes.Clone(normalized), nil + } + // Websocket v2 mode uses response.create with previous_response_id + incremental input. // Do not expand it into a full input transcript; upstream expects the incremental payload. if allowIncrementalInputWithPreviousResponseID { @@ -348,6 +357,54 @@ func normalizeResponseSubsequentRequest(rawJSON []byte, lastRequest []byte, last return normalized, bytes.Clone(normalized), nil } +func shouldReplaceWebsocketTranscript(rawJSON []byte, nextInput gjson.Result) bool { + if strings.TrimSpace(gjson.GetBytes(rawJSON, "type").String()) != wsRequestTypeCreate { + return false + } + if strings.TrimSpace(gjson.GetBytes(rawJSON, "previous_response_id").String()) != "" { + return false + } + if !nextInput.Exists() || !nextInput.IsArray() { + return false + } + + for _, item := range nextInput.Array() { + switch strings.TrimSpace(item.Get("type").String()) { + case "function_call": + return true + case "message": + role := strings.TrimSpace(item.Get("role").String()) + if role == "assistant" || role == "developer" { + return true + } + } + } + + return false +} + +func normalizeResponseTranscriptReplacement(rawJSON []byte, lastRequest []byte) []byte { + normalized, errDelete := sjson.DeleteBytes(rawJSON, "type") + if errDelete != nil { + normalized = bytes.Clone(rawJSON) + } + normalized, _ = sjson.DeleteBytes(normalized, "previous_response_id") + if !gjson.GetBytes(normalized, "model").Exists() { + modelName := strings.TrimSpace(gjson.GetBytes(lastRequest, "model").String()) + if modelName != "" { + normalized, _ = sjson.SetBytes(normalized, "model", modelName) + } + } + if !gjson.GetBytes(normalized, "instructions").Exists() { + instructions := gjson.GetBytes(lastRequest, "instructions") + if instructions.Exists() { + normalized, _ = sjson.SetRawBytes(normalized, "instructions", []byte(instructions.Raw)) + } + } + normalized, _ = sjson.SetBytes(normalized, "stream", true) + return bytes.Clone(normalized) +} + func websocketUpstreamSupportsIncrementalInput(attributes map[string]string, metadata map[string]any) bool { if len(attributes) > 0 { if raw := strings.TrimSpace(attributes["websockets"]); raw != "" { diff --git a/sdk/api/handlers/openai/openai_responses_websocket_test.go b/sdk/api/handlers/openai/openai_responses_websocket_test.go index b3a32c5c..b1440a95 100644 --- a/sdk/api/handlers/openai/openai_responses_websocket_test.go +++ b/sdk/api/handlers/openai/openai_responses_websocket_test.go @@ -27,6 +27,12 @@ type websocketCaptureExecutor struct { payloads [][]byte } +type websocketCompactionCaptureExecutor struct { + mu sync.Mutex + streamPayloads [][]byte + compactPayload []byte +} + type orderedWebsocketSelector struct { mu sync.Mutex order []string @@ -126,6 +132,52 @@ func (e *websocketCaptureExecutor) HttpRequest(context.Context, *coreauth.Auth, return nil, errors.New("not implemented") } +func (e *websocketCompactionCaptureExecutor) Identifier() string { return "test-provider" } + +func (e *websocketCompactionCaptureExecutor) Execute(_ context.Context, _ *coreauth.Auth, req coreexecutor.Request, opts coreexecutor.Options) (coreexecutor.Response, error) { + e.mu.Lock() + e.compactPayload = bytes.Clone(req.Payload) + e.mu.Unlock() + if opts.Alt != "responses/compact" { + return coreexecutor.Response{}, fmt.Errorf("unexpected non-compact execute alt: %q", opts.Alt) + } + return coreexecutor.Response{Payload: []byte(`{"id":"cmp-1","object":"response.compaction"}`)}, nil +} + +func (e *websocketCompactionCaptureExecutor) ExecuteStream(_ context.Context, _ *coreauth.Auth, req coreexecutor.Request, _ coreexecutor.Options) (*coreexecutor.StreamResult, error) { + e.mu.Lock() + callIndex := len(e.streamPayloads) + e.streamPayloads = append(e.streamPayloads, bytes.Clone(req.Payload)) + e.mu.Unlock() + + var payload []byte + switch callIndex { + case 0: + payload = []byte(`{"type":"response.completed","response":{"id":"resp-1","output":[{"type":"function_call","id":"fc-1","call_id":"call-1","name":"tool"}]}}`) + case 1: + payload = []byte(`{"type":"response.completed","response":{"id":"resp-2","output":[{"type":"message","id":"assistant-1"}]}}`) + default: + payload = []byte(`{"type":"response.completed","response":{"id":"resp-3","output":[{"type":"message","id":"assistant-2"}]}}`) + } + + chunks := make(chan coreexecutor.StreamChunk, 1) + chunks <- coreexecutor.StreamChunk{Payload: payload} + close(chunks) + return &coreexecutor.StreamResult{Chunks: chunks}, nil +} + +func (e *websocketCompactionCaptureExecutor) Refresh(_ context.Context, auth *coreauth.Auth) (*coreauth.Auth, error) { + return auth, nil +} + +func (e *websocketCompactionCaptureExecutor) CountTokens(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (coreexecutor.Response, error) { + return coreexecutor.Response{}, errors.New("not implemented") +} + +func (e *websocketCompactionCaptureExecutor) HttpRequest(context.Context, *coreauth.Auth, *http.Request) (*http.Response, error) { + return nil, errors.New("not implemented") +} + func TestNormalizeResponsesWebsocketRequestCreate(t *testing.T) { raw := []byte(`{"type":"response.create","model":"test-model","stream":false,"input":[{"type":"message","id":"msg-1"}]}`) @@ -662,3 +714,134 @@ func TestResponsesWebsocketPinsOnlyWebsocketCapableAuth(t *testing.T) { t.Fatalf("selected auth IDs = %v, want [auth-sse auth-ws]", got) } } + +func TestNormalizeResponsesWebsocketRequestTreatsTranscriptReplacementAsReset(t *testing.T) { + lastRequest := []byte(`{"model":"test-model","stream":true,"input":[{"type":"message","id":"msg-1"},{"type":"function_call","id":"fc-1","call_id":"call-1"},{"type":"function_call_output","id":"tool-out-1","call_id":"call-1"},{"type":"message","id":"assistant-1","role":"assistant"}]}`) + lastResponseOutput := []byte(`[ + {"type":"message","id":"assistant-1","role":"assistant"} + ]`) + raw := []byte(`{"type":"response.create","input":[{"type":"function_call","id":"fc-compact","call_id":"call-1","name":"tool"},{"type":"message","id":"msg-2"}]}`) + + normalized, next, errMsg := normalizeResponsesWebsocketRequest(raw, lastRequest, lastResponseOutput) + if errMsg != nil { + t.Fatalf("unexpected error: %v", errMsg.Error) + } + if gjson.GetBytes(normalized, "previous_response_id").Exists() { + t.Fatalf("previous_response_id must not exist in transcript replacement mode") + } + items := gjson.GetBytes(normalized, "input").Array() + if len(items) != 2 { + t.Fatalf("replacement input len = %d, want 2: %s", len(items), normalized) + } + if items[0].Get("id").String() != "fc-compact" || items[1].Get("id").String() != "msg-2" { + t.Fatalf("replacement transcript was not preserved as-is: %s", normalized) + } + if !bytes.Equal(next, normalized) { + t.Fatalf("next request snapshot should match replacement request") + } +} + +func TestResponsesWebsocketCompactionResetsTurnStateOnTranscriptReplacement(t *testing.T) { + gin.SetMode(gin.TestMode) + + executor := &websocketCompactionCaptureExecutor{} + manager := coreauth.NewManager(nil, nil, nil) + manager.RegisterExecutor(executor) + auth := &coreauth.Auth{ID: "auth-sse", Provider: executor.Identifier(), Status: coreauth.StatusActive} + if _, err := manager.Register(context.Background(), auth); err != nil { + t.Fatalf("Register auth: %v", err) + } + registry.GetGlobalRegistry().RegisterClient(auth.ID, auth.Provider, []*registry.ModelInfo{{ID: "test-model"}}) + t.Cleanup(func() { + registry.GetGlobalRegistry().UnregisterClient(auth.ID) + }) + + base := handlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, manager) + h := NewOpenAIResponsesAPIHandler(base) + router := gin.New() + router.GET("/v1/responses/ws", h.ResponsesWebsocket) + router.POST("/v1/responses/compact", h.Compact) + + server := httptest.NewServer(router) + defer server.Close() + + wsURL := "ws" + strings.TrimPrefix(server.URL, "http") + "/v1/responses/ws" + conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil) + if err != nil { + t.Fatalf("dial websocket: %v", err) + } + defer func() { + if errClose := conn.Close(); errClose != nil { + t.Fatalf("close websocket: %v", errClose) + } + }() + + requests := []string{ + `{"type":"response.create","model":"test-model","input":[{"type":"message","id":"msg-1"}]}`, + `{"type":"response.create","input":[{"type":"function_call_output","call_id":"call-1","id":"tool-out-1"}]}`, + } + for i := range requests { + if errWrite := conn.WriteMessage(websocket.TextMessage, []byte(requests[i])); errWrite != nil { + t.Fatalf("write websocket message %d: %v", i+1, errWrite) + } + _, payload, errReadMessage := conn.ReadMessage() + if errReadMessage != nil { + t.Fatalf("read websocket message %d: %v", i+1, errReadMessage) + } + if got := gjson.GetBytes(payload, "type").String(); got != wsEventTypeCompleted { + t.Fatalf("message %d payload type = %s, want %s", i+1, got, wsEventTypeCompleted) + } + } + + compactResp, errPost := server.Client().Post( + server.URL+"/v1/responses/compact", + "application/json", + strings.NewReader(`{"model":"test-model","input":[{"type":"message","id":"summary-1"}]}`), + ) + if errPost != nil { + t.Fatalf("compact request failed: %v", errPost) + } + if errClose := compactResp.Body.Close(); errClose != nil { + t.Fatalf("close compact response body: %v", errClose) + } + if compactResp.StatusCode != http.StatusOK { + t.Fatalf("compact status = %d, want %d", compactResp.StatusCode, http.StatusOK) + } + + // Simulate a post-compaction client turn that replaces local history with a compacted transcript. + // The websocket handler must treat this as a state reset, not append it to stale pre-compaction state. + postCompact := `{"type":"response.create","input":[{"type":"function_call","id":"fc-compact","call_id":"call-1","name":"tool"},{"type":"message","id":"msg-2"}]}` + if errWrite := conn.WriteMessage(websocket.TextMessage, []byte(postCompact)); errWrite != nil { + t.Fatalf("write post-compact websocket message: %v", errWrite) + } + _, payload, errReadMessage := conn.ReadMessage() + if errReadMessage != nil { + t.Fatalf("read post-compact websocket message: %v", errReadMessage) + } + if got := gjson.GetBytes(payload, "type").String(); got != wsEventTypeCompleted { + t.Fatalf("post-compact payload type = %s, want %s", got, wsEventTypeCompleted) + } + + executor.mu.Lock() + defer executor.mu.Unlock() + + if executor.compactPayload == nil { + t.Fatalf("compact payload was not captured") + } + if len(executor.streamPayloads) != 3 { + t.Fatalf("stream payload count = %d, want 3", len(executor.streamPayloads)) + } + + merged := executor.streamPayloads[2] + items := gjson.GetBytes(merged, "input").Array() + if len(items) != 2 { + t.Fatalf("merged input len = %d, want 2: %s", len(items), merged) + } + if items[0].Get("id").String() != "fc-compact" || + items[1].Get("id").String() != "msg-2" { + t.Fatalf("unexpected post-compact input order: %s", merged) + } + if items[0].Get("call_id").String() != "call-1" { + t.Fatalf("post-compact function call id = %s, want call-1", items[0].Get("call_id").String()) + } +} From d3b94c924100f120b67daee6b663aa19bc82071b Mon Sep 17 00:00:00 2001 From: MonsterQiu <72pgstan@gmail.com> Date: Mon, 30 Mar 2026 22:58:05 +0800 Subject: [PATCH 16/42] fix(codex): normalize null instructions for compact requests --- internal/runtime/executor/codex_executor.go | 3 +- .../executor/codex_executor_compact_test.go | 95 +++++++++++-------- 2 files changed, 60 insertions(+), 38 deletions(-) diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index ed38570d..7bbf0e68 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -220,7 +220,8 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body, _ = sjson.SetBytes(body, "model", baseModel) body, _ = sjson.DeleteBytes(body, "stream") - if !gjson.GetBytes(body, "instructions").Exists() { + instructions := gjson.GetBytes(body, "instructions") + if !instructions.Exists() || instructions.Type == gjson.Null { body, _ = sjson.SetBytes(body, "instructions", "") } diff --git a/internal/runtime/executor/codex_executor_compact_test.go b/internal/runtime/executor/codex_executor_compact_test.go index 4fcd7a8e..02c6db29 100644 --- a/internal/runtime/executor/codex_executor_compact_test.go +++ b/internal/runtime/executor/codex_executor_compact_test.go @@ -15,44 +15,65 @@ import ( ) func TestCodexExecutorCompactAddsDefaultInstructions(t *testing.T) { - var gotPath string - var gotBody []byte - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - gotPath = r.URL.Path - body, _ := io.ReadAll(r.Body) - gotBody = body - w.Header().Set("Content-Type", "application/json") - _, _ = w.Write([]byte(`{"id":"resp_1","object":"response.compaction","usage":{"input_tokens":1,"output_tokens":2,"total_tokens":3}}`)) - })) - defer server.Close() + cases := []struct { + name string + payload string + }{ + { + name: "missing instructions", + payload: `{"model":"gpt-5.4","input":"hello"}`, + }, + { + name: "null instructions", + payload: `{"model":"gpt-5.4","instructions":null,"input":"hello"}`, + }, + } - executor := NewCodexExecutor(&config.Config{}) - auth := &cliproxyauth.Auth{Attributes: map[string]string{ - "base_url": server.URL, - "api_key": "test", - }} + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + var gotPath string + var gotBody []byte + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotPath = r.URL.Path + body, _ := io.ReadAll(r.Body) + gotBody = body + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"id":"resp_1","object":"response.compaction","usage":{"input_tokens":1,"output_tokens":2,"total_tokens":3}}`)) + })) + defer server.Close() - resp, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{ - Model: "gpt-5.4", - Payload: []byte(`{"model":"gpt-5.4","input":"hello"}`), - }, cliproxyexecutor.Options{ - SourceFormat: sdktranslator.FromString("openai-response"), - Alt: "responses/compact", - Stream: false, - }) - if err != nil { - t.Fatalf("Execute error: %v", err) - } - if gotPath != "/responses/compact" { - t.Fatalf("path = %q, want %q", gotPath, "/responses/compact") - } - if !gjson.GetBytes(gotBody, "instructions").Exists() { - t.Fatalf("expected instructions in compact request body, got %s", string(gotBody)) - } - if gjson.GetBytes(gotBody, "instructions").String() != "" { - t.Fatalf("instructions = %q, want empty string", gjson.GetBytes(gotBody, "instructions").String()) - } - if string(resp.Payload) != `{"id":"resp_1","object":"response.compaction","usage":{"input_tokens":1,"output_tokens":2,"total_tokens":3}}` { - t.Fatalf("payload = %s", string(resp.Payload)) + executor := NewCodexExecutor(&config.Config{}) + auth := &cliproxyauth.Auth{Attributes: map[string]string{ + "base_url": server.URL, + "api_key": "test", + }} + + resp, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{ + Model: "gpt-5.4", + Payload: []byte(tc.payload), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FromString("openai-response"), + Alt: "responses/compact", + Stream: false, + }) + if err != nil { + t.Fatalf("Execute error: %v", err) + } + if gotPath != "/responses/compact" { + t.Fatalf("path = %q, want %q", gotPath, "/responses/compact") + } + if !gjson.GetBytes(gotBody, "instructions").Exists() { + t.Fatalf("expected instructions in compact request body, got %s", string(gotBody)) + } + if gjson.GetBytes(gotBody, "instructions").Type != gjson.String { + t.Fatalf("instructions type = %v, want string", gjson.GetBytes(gotBody, "instructions").Type) + } + if gjson.GetBytes(gotBody, "instructions").String() != "" { + t.Fatalf("instructions = %q, want empty string", gjson.GetBytes(gotBody, "instructions").String()) + } + if string(resp.Payload) != `{"id":"resp_1","object":"response.compaction","usage":{"input_tokens":1,"output_tokens":2,"total_tokens":3}}` { + t.Fatalf("payload = %s", string(resp.Payload)) + } + }) } } From a3e21df81488587475a5392be8300dd563e6b9c8 Mon Sep 17 00:00:00 2001 From: apparition <38576169+possible055@users.noreply.github.com> Date: Mon, 30 Mar 2026 23:33:16 +0800 Subject: [PATCH 17/42] fix(openai): avoid developer transcript resets - Narrow websocket transcript replacement detection to assistant outputs and function calls - Preserve existing merge behavior for follow-up developer messages without previous_response_id - Add a regression test covering mid-session developer message updates --- .../openai/openai_responses_websocket.go | 2 +- .../openai/openai_responses_websocket_test.go | 26 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/sdk/api/handlers/openai/openai_responses_websocket.go b/sdk/api/handlers/openai/openai_responses_websocket.go index 211b8b81..15a6bda7 100644 --- a/sdk/api/handlers/openai/openai_responses_websocket.go +++ b/sdk/api/handlers/openai/openai_responses_websocket.go @@ -374,7 +374,7 @@ func shouldReplaceWebsocketTranscript(rawJSON []byte, nextInput gjson.Result) bo return true case "message": role := strings.TrimSpace(item.Get("role").String()) - if role == "assistant" || role == "developer" { + if role == "assistant" { return true } } diff --git a/sdk/api/handlers/openai/openai_responses_websocket_test.go b/sdk/api/handlers/openai/openai_responses_websocket_test.go index b1440a95..5619e6b1 100644 --- a/sdk/api/handlers/openai/openai_responses_websocket_test.go +++ b/sdk/api/handlers/openai/openai_responses_websocket_test.go @@ -741,6 +741,32 @@ func TestNormalizeResponsesWebsocketRequestTreatsTranscriptReplacementAsReset(t } } +func TestNormalizeResponsesWebsocketRequestDoesNotTreatDeveloperMessageAsReplacement(t *testing.T) { + lastRequest := []byte(`{"model":"test-model","stream":true,"input":[{"type":"message","id":"msg-1"}]}`) + lastResponseOutput := []byte(`[ + {"type":"message","id":"assistant-1","role":"assistant"} + ]`) + raw := []byte(`{"type":"response.create","input":[{"type":"message","id":"dev-1","role":"developer"},{"type":"message","id":"msg-2"}]}`) + + normalized, next, errMsg := normalizeResponsesWebsocketRequest(raw, lastRequest, lastResponseOutput) + if errMsg != nil { + t.Fatalf("unexpected error: %v", errMsg.Error) + } + items := gjson.GetBytes(normalized, "input").Array() + if len(items) != 4 { + t.Fatalf("merged input len = %d, want 4: %s", len(items), normalized) + } + if items[0].Get("id").String() != "msg-1" || + items[1].Get("id").String() != "assistant-1" || + items[2].Get("id").String() != "dev-1" || + items[3].Get("id").String() != "msg-2" { + t.Fatalf("developer follow-up should preserve merge behavior: %s", normalized) + } + if !bytes.Equal(next, normalized) { + t.Fatalf("next request snapshot should match merged request") + } +} + func TestResponsesWebsocketCompactionResetsTurnStateOnTranscriptReplacement(t *testing.T) { gin.SetMode(gin.TestMode) From 88dd9c715d970ec65216683cff23efb9378acc45 Mon Sep 17 00:00:00 2001 From: xixiwenxuanhe Date: Mon, 30 Mar 2026 23:58:12 +0800 Subject: [PATCH 18/42] feat(antigravity): add AI credits quota fallback --- config.example.yaml | 1 + internal/config/config.go | 4 + .../runtime/executor/antigravity_executor.go | 412 ++++++++++++++++-- .../antigravity_executor_credits_test.go | 291 +++++++++++++ internal/watcher/diff/config_diff.go | 3 + internal/watcher/diff/config_diff_test.go | 10 +- 6 files changed, 670 insertions(+), 51 deletions(-) create mode 100644 internal/runtime/executor/antigravity_executor_credits_test.go diff --git a/config.example.yaml b/config.example.yaml index 1b365d87..a394f979 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -91,6 +91,7 @@ max-retry-interval: 30 quota-exceeded: switch-project: true # Whether to automatically switch to another project when a quota is exceeded switch-preview-model: true # Whether to automatically switch to a preview model when a quota is exceeded + antigravity-credits: true # Whether to retry Antigravity quota_exhausted 429s once with enabledCreditTypes=["GOOGLE_ONE_AI"] # Routing strategy for selecting credentials when multiple match. routing: diff --git a/internal/config/config.go b/internal/config/config.go index c4156e97..ceb2e7bd 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -194,6 +194,10 @@ type QuotaExceeded struct { // SwitchPreviewModel indicates whether to automatically switch to a preview model when a quota is exceeded. SwitchPreviewModel bool `yaml:"switch-preview-model" json:"switch-preview-model"` + + // AntigravityCredits indicates whether to retry Antigravity quota_exhausted 429s once + // on the same credential with enabledCreditTypes=["GOOGLE_ONE_AI"]. + AntigravityCredits bool `yaml:"antigravity-credits" json:"antigravity-credits"` } // RoutingConfig configures how credentials are selected for requests. diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 18079a43..76ce9586 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -47,12 +47,41 @@ const ( defaultAntigravityAgent = "antigravity/1.19.6 darwin/arm64" antigravityAuthType = "antigravity" refreshSkew = 3000 * time.Second + antigravityCreditsRetryTTL = 5 * time.Hour // systemInstruction = "You are Antigravity, a powerful agentic AI coding assistant designed by the Google Deepmind team working on Advanced Agentic Coding.You are pair programming with a USER to solve their coding task. The task may require creating a new codebase, modifying or debugging an existing codebase, or simply answering a question.**Absolute paths only****Proactiveness**" ) +type antigravity429Category string + +const ( + antigravity429Unknown antigravity429Category = "unknown" + antigravity429RateLimited antigravity429Category = "rate_limited" + antigravity429QuotaExhausted antigravity429Category = "quota_exhausted" +) + var ( - randSource = rand.New(rand.NewSource(time.Now().UnixNano())) - randSourceMutex sync.Mutex + randSource = rand.New(rand.NewSource(time.Now().UnixNano())) + randSourceMutex sync.Mutex + antigravityCreditsExhaustedByAuth sync.Map + antigravityPreferCreditsByModel sync.Map + antigravityQuotaExhaustedKeywords = []string{ + "quota_exhausted", + "quota exhausted", + } + antigravityCreditsExhaustedKeywords = []string{ + "google_one_ai", + "insufficient credit", + "insufficient credits", + "not enough credit", + "not enough credits", + "credit exhausted", + "credits exhausted", + "credit balance", + "minimumcreditamountforusage", + "minimum credit amount for usage", + "minimum credit", + "resource has been exhausted", + } ) // AntigravityExecutor proxies requests to the antigravity upstream. @@ -183,6 +212,231 @@ func (e *AntigravityExecutor) HttpRequest(ctx context.Context, auth *cliproxyaut return httpClient.Do(httpReq) } +func injectEnabledCreditTypes(payload []byte) []byte { + if len(payload) == 0 { + return nil + } + if !gjson.ValidBytes(payload) { + return nil + } + updated, err := sjson.SetRawBytes(payload, "enabledCreditTypes", []byte(`["GOOGLE_ONE_AI"]`)) + if err != nil { + return nil + } + return updated +} + +func classifyAntigravity429(body []byte) antigravity429Category { + if len(body) == 0 { + return antigravity429Unknown + } + lowerBody := strings.ToLower(string(body)) + for _, keyword := range antigravityQuotaExhaustedKeywords { + if strings.Contains(lowerBody, keyword) { + return antigravity429QuotaExhausted + } + } + status := strings.TrimSpace(gjson.GetBytes(body, "error.status").String()) + if !strings.EqualFold(status, "RESOURCE_EXHAUSTED") { + return antigravity429Unknown + } + details := gjson.GetBytes(body, "error.details") + if !details.Exists() || !details.IsArray() { + return antigravity429Unknown + } + for _, detail := range details.Array() { + if detail.Get("@type").String() != "type.googleapis.com/google.rpc.ErrorInfo" { + continue + } + reason := strings.TrimSpace(detail.Get("reason").String()) + if strings.EqualFold(reason, "QUOTA_EXHAUSTED") { + return antigravity429QuotaExhausted + } + if strings.EqualFold(reason, "RATE_LIMIT_EXCEEDED") { + return antigravity429RateLimited + } + } + return antigravity429Unknown +} + +func antigravityCreditsRetryEnabled(cfg *config.Config) bool { + return cfg != nil && cfg.QuotaExceeded.AntigravityCredits +} + +func antigravityCreditsExhausted(auth *cliproxyauth.Auth, now time.Time) bool { + if auth == nil || strings.TrimSpace(auth.ID) == "" { + return false + } + value, ok := antigravityCreditsExhaustedByAuth.Load(auth.ID) + if !ok { + return false + } + until, ok := value.(time.Time) + if !ok || until.IsZero() { + antigravityCreditsExhaustedByAuth.Delete(auth.ID) + return false + } + if !until.After(now) { + antigravityCreditsExhaustedByAuth.Delete(auth.ID) + return false + } + return true +} + +func markAntigravityCreditsExhausted(auth *cliproxyauth.Auth, now time.Time) { + if auth == nil || strings.TrimSpace(auth.ID) == "" { + return + } + antigravityCreditsExhaustedByAuth.Store(auth.ID, now.Add(antigravityCreditsRetryTTL)) +} + +func clearAntigravityCreditsExhausted(auth *cliproxyauth.Auth) { + if auth == nil || strings.TrimSpace(auth.ID) == "" { + return + } + antigravityCreditsExhaustedByAuth.Delete(auth.ID) +} + +func antigravityPreferCreditsKey(auth *cliproxyauth.Auth, modelName string) string { + if auth == nil { + return "" + } + authID := strings.TrimSpace(auth.ID) + modelName = strings.TrimSpace(modelName) + if authID == "" || modelName == "" { + return "" + } + return authID + "|" + modelName +} + +func antigravityShouldPreferCredits(auth *cliproxyauth.Auth, modelName string, now time.Time) bool { + key := antigravityPreferCreditsKey(auth, modelName) + if key == "" { + return false + } + value, ok := antigravityPreferCreditsByModel.Load(key) + if !ok { + return false + } + until, ok := value.(time.Time) + if !ok || until.IsZero() { + antigravityPreferCreditsByModel.Delete(key) + return false + } + if !until.After(now) { + antigravityPreferCreditsByModel.Delete(key) + return false + } + return true +} + +func markAntigravityPreferCredits(auth *cliproxyauth.Auth, modelName string, now time.Time, retryAfter *time.Duration) { + key := antigravityPreferCreditsKey(auth, modelName) + if key == "" { + return + } + until := now.Add(antigravityCreditsRetryTTL) + if retryAfter != nil && *retryAfter > 0 { + until = now.Add(*retryAfter) + } + antigravityPreferCreditsByModel.Store(key, until) +} + +func clearAntigravityPreferCredits(auth *cliproxyauth.Auth, modelName string) { + key := antigravityPreferCreditsKey(auth, modelName) + if key == "" { + return + } + antigravityPreferCreditsByModel.Delete(key) +} + +func shouldMarkAntigravityCreditsExhausted(statusCode int, body []byte, reqErr error) bool { + if reqErr != nil || statusCode == 0 { + return false + } + if statusCode >= http.StatusInternalServerError || statusCode == http.StatusRequestTimeout { + return false + } + lowerBody := strings.ToLower(string(body)) + for _, keyword := range antigravityCreditsExhaustedKeywords { + if strings.Contains(lowerBody, keyword) { + return true + } + } + return false +} + +func newAntigravityStatusErr(statusCode int, body []byte) statusErr { + err := statusErr{code: statusCode, msg: string(body)} + if statusCode == http.StatusTooManyRequests { + if retryAfter, parseErr := parseRetryDelay(body); parseErr == nil && retryAfter != nil { + err.retryAfter = retryAfter + } + } + return err +} + +func (e *AntigravityExecutor) attemptCreditsFallback( + ctx context.Context, + auth *cliproxyauth.Auth, + httpClient *http.Client, + token string, + modelName string, + payload []byte, + stream bool, + alt string, + baseURL string, + originalBody []byte, +) (*http.Response, bool) { + if !antigravityCreditsRetryEnabled(e.cfg) { + return nil, false + } + if classifyAntigravity429(originalBody) != antigravity429QuotaExhausted { + return nil, false + } + now := time.Now() + if antigravityCreditsExhausted(auth, now) { + return nil, false + } + creditsPayload := injectEnabledCreditTypes(payload) + if len(creditsPayload) == 0 { + return nil, false + } + + httpReq, errReq := e.buildRequest(ctx, auth, token, modelName, creditsPayload, stream, alt, baseURL) + if errReq != nil { + recordAPIResponseError(ctx, e.cfg, errReq) + return nil, true + } + httpResp, errDo := httpClient.Do(httpReq) + if errDo != nil { + recordAPIResponseError(ctx, e.cfg, errDo) + return nil, true + } + if httpResp.StatusCode >= http.StatusOK && httpResp.StatusCode < http.StatusMultipleChoices { + retryAfter, _ := parseRetryDelay(originalBody) + markAntigravityPreferCredits(auth, modelName, now, retryAfter) + clearAntigravityCreditsExhausted(auth) + return httpResp, true + } + + recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + bodyBytes, errRead := io.ReadAll(httpResp.Body) + if errClose := httpResp.Body.Close(); errClose != nil { + log.Errorf("antigravity executor: close credits fallback response body error: %v", errClose) + } + if errRead != nil { + recordAPIResponseError(ctx, e.cfg, errRead) + return nil, true + } + appendAPIResponseChunk(ctx, e.cfg, bodyBytes) + if shouldMarkAntigravityCreditsExhausted(httpResp.StatusCode, bodyBytes, nil) { + clearAntigravityPreferCredits(auth, modelName) + markAntigravityCreditsExhausted(auth, now) + } + return nil, true +} + // Execute performs a non-streaming request to the Antigravity API. func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { if opts.Alt == "responses/compact" { @@ -237,7 +491,15 @@ attemptLoop: var lastErr error for idx, baseURL := range baseURLs { - httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, false, opts.Alt, baseURL) + requestPayload := translated + usedCreditsDirect := false + if antigravityShouldPreferCredits(auth, baseModel, time.Now()) { + if creditsPayload := injectEnabledCreditTypes(translated); len(creditsPayload) > 0 { + requestPayload = creditsPayload + usedCreditsDirect = true + } + } + httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, requestPayload, false, opts.Alt, baseURL) if errReq != nil { err = errReq return resp, err @@ -272,6 +534,40 @@ attemptLoop: } appendAPIResponseChunk(ctx, e.cfg, bodyBytes) + if httpResp.StatusCode == http.StatusTooManyRequests { + if usedCreditsDirect { + if shouldMarkAntigravityCreditsExhausted(httpResp.StatusCode, bodyBytes, nil) { + clearAntigravityPreferCredits(auth, baseModel) + markAntigravityCreditsExhausted(auth, time.Now()) + } + } else { + creditsResp, attemptedCredits := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, false, opts.Alt, baseURL, bodyBytes) + if creditsResp != nil { + recordAPIResponseMetadata(ctx, e.cfg, creditsResp.StatusCode, creditsResp.Header.Clone()) + creditsBody, errCreditsRead := io.ReadAll(creditsResp.Body) + if errClose := creditsResp.Body.Close(); errClose != nil { + log.Errorf("antigravity executor: close credits success response body error: %v", errClose) + } + if errCreditsRead != nil { + recordAPIResponseError(ctx, e.cfg, errCreditsRead) + err = errCreditsRead + return resp, err + } + appendAPIResponseChunk(ctx, e.cfg, creditsBody) + reporter.publish(ctx, parseAntigravityUsage(creditsBody)) + var param any + converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, creditsBody, ¶m) + resp = cliproxyexecutor.Response{Payload: converted, Headers: creditsResp.Header.Clone()} + reporter.ensurePublished(ctx) + return resp, nil + } + if attemptedCredits { + err = newAntigravityStatusErr(httpResp.StatusCode, bodyBytes) + return resp, err + } + } + } + if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices { log.Debugf("antigravity executor: upstream error status: %d, body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), bodyBytes)) lastStatus = httpResp.StatusCode @@ -295,13 +591,7 @@ attemptLoop: continue attemptLoop } } - sErr := statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)} - if httpResp.StatusCode == http.StatusTooManyRequests { - if retryAfter, parseErr := parseRetryDelay(bodyBytes); parseErr == nil && retryAfter != nil { - sErr.retryAfter = retryAfter - } - } - err = sErr + err = newAntigravityStatusErr(httpResp.StatusCode, bodyBytes) return resp, err } @@ -315,13 +605,7 @@ attemptLoop: switch { case lastStatus != 0: - sErr := statusErr{code: lastStatus, msg: string(lastBody)} - if lastStatus == http.StatusTooManyRequests { - if retryAfter, parseErr := parseRetryDelay(lastBody); parseErr == nil && retryAfter != nil { - sErr.retryAfter = retryAfter - } - } - err = sErr + err = newAntigravityStatusErr(lastStatus, lastBody) case lastErr != nil: err = lastErr default: @@ -379,7 +663,15 @@ attemptLoop: var lastErr error for idx, baseURL := range baseURLs { - httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL) + requestPayload := translated + usedCreditsDirect := false + if antigravityShouldPreferCredits(auth, baseModel, time.Now()) { + if creditsPayload := injectEnabledCreditTypes(translated); len(creditsPayload) > 0 { + requestPayload = creditsPayload + usedCreditsDirect = true + } + } + httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, requestPayload, true, opts.Alt, baseURL) if errReq != nil { err = errReq return resp, err @@ -428,6 +720,26 @@ attemptLoop: return resp, err } appendAPIResponseChunk(ctx, e.cfg, bodyBytes) + if httpResp.StatusCode == http.StatusTooManyRequests { + if usedCreditsDirect { + if shouldMarkAntigravityCreditsExhausted(httpResp.StatusCode, bodyBytes, nil) { + clearAntigravityPreferCredits(auth, baseModel) + markAntigravityCreditsExhausted(auth, time.Now()) + } + } else { + creditsResp, attemptedCredits := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, true, opts.Alt, baseURL, bodyBytes) + if creditsResp != nil { + httpResp = creditsResp + recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + } else if attemptedCredits { + err = newAntigravityStatusErr(http.StatusTooManyRequests, bodyBytes) + return resp, err + } + } + } + if httpResp.StatusCode >= http.StatusOK && httpResp.StatusCode < http.StatusMultipleChoices { + goto streamSuccessClaudeNonStream + } lastStatus = httpResp.StatusCode lastBody = append([]byte(nil), bodyBytes...) lastErr = nil @@ -449,16 +761,11 @@ attemptLoop: continue attemptLoop } } - sErr := statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)} - if httpResp.StatusCode == http.StatusTooManyRequests { - if retryAfter, parseErr := parseRetryDelay(bodyBytes); parseErr == nil && retryAfter != nil { - sErr.retryAfter = retryAfter - } - } - err = sErr + err = newAntigravityStatusErr(httpResp.StatusCode, bodyBytes) return resp, err } + streamSuccessClaudeNonStream: out := make(chan cliproxyexecutor.StreamChunk) go func(resp *http.Response) { defer close(out) @@ -520,13 +827,7 @@ attemptLoop: switch { case lastStatus != 0: - sErr := statusErr{code: lastStatus, msg: string(lastBody)} - if lastStatus == http.StatusTooManyRequests { - if retryAfter, parseErr := parseRetryDelay(lastBody); parseErr == nil && retryAfter != nil { - sErr.retryAfter = retryAfter - } - } - err = sErr + err = newAntigravityStatusErr(lastStatus, lastBody) case lastErr != nil: err = lastErr default: @@ -782,7 +1083,15 @@ attemptLoop: var lastErr error for idx, baseURL := range baseURLs { - httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL) + requestPayload := translated + usedCreditsDirect := false + if antigravityShouldPreferCredits(auth, baseModel, time.Now()) { + if creditsPayload := injectEnabledCreditTypes(translated); len(creditsPayload) > 0 { + requestPayload = creditsPayload + usedCreditsDirect = true + } + } + httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, requestPayload, true, opts.Alt, baseURL) if errReq != nil { err = errReq return nil, err @@ -830,6 +1139,26 @@ attemptLoop: return nil, err } appendAPIResponseChunk(ctx, e.cfg, bodyBytes) + if httpResp.StatusCode == http.StatusTooManyRequests { + if usedCreditsDirect { + if shouldMarkAntigravityCreditsExhausted(httpResp.StatusCode, bodyBytes, nil) { + clearAntigravityPreferCredits(auth, baseModel) + markAntigravityCreditsExhausted(auth, time.Now()) + } + } else { + creditsResp, attemptedCredits := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, true, opts.Alt, baseURL, bodyBytes) + if creditsResp != nil { + httpResp = creditsResp + recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + } else if attemptedCredits { + err = newAntigravityStatusErr(http.StatusTooManyRequests, bodyBytes) + return nil, err + } + } + } + if httpResp.StatusCode >= http.StatusOK && httpResp.StatusCode < http.StatusMultipleChoices { + goto streamSuccessExecuteStream + } lastStatus = httpResp.StatusCode lastBody = append([]byte(nil), bodyBytes...) lastErr = nil @@ -851,16 +1180,11 @@ attemptLoop: continue attemptLoop } } - sErr := statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)} - if httpResp.StatusCode == http.StatusTooManyRequests { - if retryAfter, parseErr := parseRetryDelay(bodyBytes); parseErr == nil && retryAfter != nil { - sErr.retryAfter = retryAfter - } - } - err = sErr + err = newAntigravityStatusErr(httpResp.StatusCode, bodyBytes) return nil, err } + streamSuccessExecuteStream: out := make(chan cliproxyexecutor.StreamChunk) go func(resp *http.Response) { defer close(out) @@ -911,13 +1235,7 @@ attemptLoop: switch { case lastStatus != 0: - sErr := statusErr{code: lastStatus, msg: string(lastBody)} - if lastStatus == http.StatusTooManyRequests { - if retryAfter, parseErr := parseRetryDelay(lastBody); parseErr == nil && retryAfter != nil { - sErr.retryAfter = retryAfter - } - } - err = sErr + err = newAntigravityStatusErr(lastStatus, lastBody) case lastErr != nil: err = lastErr default: diff --git a/internal/runtime/executor/antigravity_executor_credits_test.go b/internal/runtime/executor/antigravity_executor_credits_test.go new file mode 100644 index 00000000..ecac0c83 --- /dev/null +++ b/internal/runtime/executor/antigravity_executor_credits_test.go @@ -0,0 +1,291 @@ +package executor + +import ( + "context" + "io" + "net/http" + "net/http/httptest" + "strings" + "sync" + "testing" + "time" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" + cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" + sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" +) + +func resetAntigravityCreditsRetryState() { + antigravityCreditsExhaustedByAuth = sync.Map{} + antigravityPreferCreditsByModel = sync.Map{} +} + +func TestClassifyAntigravity429(t *testing.T) { + t.Run("quota exhausted", func(t *testing.T) { + body := []byte(`{"error":{"status":"RESOURCE_EXHAUSTED","message":"QUOTA_EXHAUSTED"}}`) + if got := classifyAntigravity429(body); got != antigravity429QuotaExhausted { + t.Fatalf("classifyAntigravity429() = %q, want %q", got, antigravity429QuotaExhausted) + } + }) + + t.Run("structured rate limit", func(t *testing.T) { + body := []byte(`{ + "error": { + "status": "RESOURCE_EXHAUSTED", + "details": [ + {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "reason": "RATE_LIMIT_EXCEEDED"}, + {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"} + ] + } + }`) + if got := classifyAntigravity429(body); got != antigravity429RateLimited { + t.Fatalf("classifyAntigravity429() = %q, want %q", got, antigravity429RateLimited) + } + }) + + t.Run("structured quota exhausted", func(t *testing.T) { + body := []byte(`{ + "error": { + "status": "RESOURCE_EXHAUSTED", + "details": [ + {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "reason": "QUOTA_EXHAUSTED"} + ] + } + }`) + if got := classifyAntigravity429(body); got != antigravity429QuotaExhausted { + t.Fatalf("classifyAntigravity429() = %q, want %q", got, antigravity429QuotaExhausted) + } + }) + + t.Run("unknown", func(t *testing.T) { + body := []byte(`{"error":{"message":"too many requests"}}`) + if got := classifyAntigravity429(body); got != antigravity429Unknown { + t.Fatalf("classifyAntigravity429() = %q, want %q", got, antigravity429Unknown) + } + }) +} + +func TestInjectEnabledCreditTypes(t *testing.T) { + body := []byte(`{"model":"gemini-2.5-flash","request":{}}`) + got := injectEnabledCreditTypes(body) + if got == nil { + t.Fatal("injectEnabledCreditTypes() returned nil") + } + if !strings.Contains(string(got), `"enabledCreditTypes":["GOOGLE_ONE_AI"]`) { + t.Fatalf("injectEnabledCreditTypes() = %s, want enabledCreditTypes", string(got)) + } + + if got := injectEnabledCreditTypes([]byte(`not json`)); got != nil { + t.Fatalf("injectEnabledCreditTypes() for invalid json = %s, want nil", string(got)) + } +} + +func TestShouldMarkAntigravityCreditsExhausted(t *testing.T) { + for _, body := range [][]byte{ + []byte(`{"error":{"message":"Insufficient GOOGLE_ONE_AI credits"}}`), + []byte(`{"error":{"message":"minimumCreditAmountForUsage requirement not met"}}`), + []byte(`{"error":{"message":"Resource has been exhausted"}}`), + } { + if !shouldMarkAntigravityCreditsExhausted(http.StatusForbidden, body, nil) { + t.Fatalf("shouldMarkAntigravityCreditsExhausted(%s) = false, want true", string(body)) + } + } + if shouldMarkAntigravityCreditsExhausted(http.StatusServiceUnavailable, []byte(`{"error":{"message":"credits exhausted"}}`), nil) { + t.Fatal("shouldMarkAntigravityCreditsExhausted() = true for 5xx, want false") + } +} + +func TestAntigravityExecute_RetriesQuotaExhaustedWithCredits(t *testing.T) { + resetAntigravityCreditsRetryState() + t.Cleanup(resetAntigravityCreditsRetryState) + + var ( + mu sync.Mutex + requestBodies []string + ) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + _ = r.Body.Close() + + mu.Lock() + requestBodies = append(requestBodies, string(body)) + reqNum := len(requestBodies) + mu.Unlock() + + if reqNum == 1 { + w.WriteHeader(http.StatusTooManyRequests) + _, _ = w.Write([]byte(`{"error":{"status":"RESOURCE_EXHAUSTED","message":"QUOTA_EXHAUSTED"}}`)) + return + } + + if !strings.Contains(string(body), `"enabledCreditTypes":["GOOGLE_ONE_AI"]`) { + t.Fatalf("second request body missing enabledCreditTypes: %s", string(body)) + } + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"ok"}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2}}}`)) + })) + defer server.Close() + + exec := NewAntigravityExecutor(&config.Config{ + QuotaExceeded: config.QuotaExceeded{AntigravityCredits: true}, + }) + auth := &cliproxyauth.Auth{ + ID: "auth-credits-ok", + Attributes: map[string]string{ + "base_url": server.URL, + }, + Metadata: map[string]any{ + "access_token": "token", + "project_id": "project-1", + "expired": time.Now().Add(1 * time.Hour).Format(time.RFC3339), + }, + } + + resp, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{ + Model: "gemini-2.5-flash", + Payload: []byte(`{"request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FormatAntigravity, + }) + if err != nil { + t.Fatalf("Execute() error = %v", err) + } + if len(resp.Payload) == 0 { + t.Fatal("Execute() returned empty payload") + } + + mu.Lock() + defer mu.Unlock() + if len(requestBodies) != 2 { + t.Fatalf("request count = %d, want 2", len(requestBodies)) + } +} + +func TestAntigravityExecute_SkipsCreditsRetryWhenAlreadyExhausted(t *testing.T) { + resetAntigravityCreditsRetryState() + t.Cleanup(resetAntigravityCreditsRetryState) + + var requestCount int + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + requestCount++ + w.WriteHeader(http.StatusTooManyRequests) + _, _ = w.Write([]byte(`{"error":{"status":"RESOURCE_EXHAUSTED","message":"QUOTA_EXHAUSTED"}}`)) + })) + defer server.Close() + + exec := NewAntigravityExecutor(&config.Config{ + QuotaExceeded: config.QuotaExceeded{AntigravityCredits: true}, + }) + auth := &cliproxyauth.Auth{ + ID: "auth-credits-exhausted", + Attributes: map[string]string{ + "base_url": server.URL, + }, + Metadata: map[string]any{ + "access_token": "token", + "project_id": "project-1", + "expired": time.Now().Add(1 * time.Hour).Format(time.RFC3339), + }, + } + markAntigravityCreditsExhausted(auth, time.Now()) + + _, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{ + Model: "gemini-2.5-flash", + Payload: []byte(`{"request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FormatAntigravity, + }) + if err == nil { + t.Fatal("Execute() error = nil, want 429") + } + sErr, ok := err.(statusErr) + if !ok { + t.Fatalf("Execute() error type = %T, want statusErr", err) + } + if got := sErr.StatusCode(); got != http.StatusTooManyRequests { + t.Fatalf("Execute() status code = %d, want %d", got, http.StatusTooManyRequests) + } + if requestCount != 1 { + t.Fatalf("request count = %d, want 1", requestCount) + } +} + +func TestAntigravityExecute_PrefersCreditsAfterSuccessfulFallback(t *testing.T) { + resetAntigravityCreditsRetryState() + t.Cleanup(resetAntigravityCreditsRetryState) + + var ( + mu sync.Mutex + requestBodies []string + ) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + _ = r.Body.Close() + + mu.Lock() + requestBodies = append(requestBodies, string(body)) + reqNum := len(requestBodies) + mu.Unlock() + + switch reqNum { + case 1: + w.WriteHeader(http.StatusTooManyRequests) + _, _ = w.Write([]byte(`{"error":{"status":"RESOURCE_EXHAUSTED","details":[{"@type":"type.googleapis.com/google.rpc.ErrorInfo","reason":"QUOTA_EXHAUSTED"},{"@type":"type.googleapis.com/google.rpc.RetryInfo","retryDelay":"10s"}]}}`)) + case 2, 3: + if !strings.Contains(string(body), `"enabledCreditTypes":["GOOGLE_ONE_AI"]`) { + t.Fatalf("request %d body missing enabledCreditTypes: %s", reqNum, string(body)) + } + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"OK"}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2}}}`)) + default: + t.Fatalf("unexpected request count %d", reqNum) + } + })) + defer server.Close() + + exec := NewAntigravityExecutor(&config.Config{ + QuotaExceeded: config.QuotaExceeded{AntigravityCredits: true}, + }) + auth := &cliproxyauth.Auth{ + ID: "auth-prefer-credits", + Attributes: map[string]string{ + "base_url": server.URL, + }, + Metadata: map[string]any{ + "access_token": "token", + "project_id": "project-1", + "expired": time.Now().Add(1 * time.Hour).Format(time.RFC3339), + }, + } + + request := cliproxyexecutor.Request{ + Model: "gemini-2.5-flash", + Payload: []byte(`{"request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`), + } + opts := cliproxyexecutor.Options{SourceFormat: sdktranslator.FormatAntigravity} + + if _, err := exec.Execute(context.Background(), auth, request, opts); err != nil { + t.Fatalf("first Execute() error = %v", err) + } + if _, err := exec.Execute(context.Background(), auth, request, opts); err != nil { + t.Fatalf("second Execute() error = %v", err) + } + + mu.Lock() + defer mu.Unlock() + if len(requestBodies) != 3 { + t.Fatalf("request count = %d, want 3", len(requestBodies)) + } + if strings.Contains(requestBodies[0], `"enabledCreditTypes":["GOOGLE_ONE_AI"]`) { + t.Fatalf("first request unexpectedly used credits: %s", requestBodies[0]) + } + if !strings.Contains(requestBodies[1], `"enabledCreditTypes":["GOOGLE_ONE_AI"]`) { + t.Fatalf("fallback request missing credits: %s", requestBodies[1]) + } + if !strings.Contains(requestBodies[2], `"enabledCreditTypes":["GOOGLE_ONE_AI"]`) { + t.Fatalf("preferred request missing credits: %s", requestBodies[2]) + } +} diff --git a/internal/watcher/diff/config_diff.go b/internal/watcher/diff/config_diff.go index fccdaf8d..11f9093e 100644 --- a/internal/watcher/diff/config_diff.go +++ b/internal/watcher/diff/config_diff.go @@ -80,6 +80,9 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string { if oldCfg.QuotaExceeded.SwitchPreviewModel != newCfg.QuotaExceeded.SwitchPreviewModel { changes = append(changes, fmt.Sprintf("quota-exceeded.switch-preview-model: %t -> %t", oldCfg.QuotaExceeded.SwitchPreviewModel, newCfg.QuotaExceeded.SwitchPreviewModel)) } + if oldCfg.QuotaExceeded.AntigravityCredits != newCfg.QuotaExceeded.AntigravityCredits { + changes = append(changes, fmt.Sprintf("quota-exceeded.antigravity-credits: %t -> %t", oldCfg.QuotaExceeded.AntigravityCredits, newCfg.QuotaExceeded.AntigravityCredits)) + } if oldCfg.Routing.Strategy != newCfg.Routing.Strategy { changes = append(changes, fmt.Sprintf("routing.strategy: %s -> %s", oldCfg.Routing.Strategy, newCfg.Routing.Strategy)) diff --git a/internal/watcher/diff/config_diff_test.go b/internal/watcher/diff/config_diff_test.go index c7b73f11..2d45aa57 100644 --- a/internal/watcher/diff/config_diff_test.go +++ b/internal/watcher/diff/config_diff_test.go @@ -229,7 +229,7 @@ func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) { MaxRetryCredentials: 1, MaxRetryInterval: 1, WebsocketAuth: false, - QuotaExceeded: config.QuotaExceeded{SwitchProject: false, SwitchPreviewModel: false}, + QuotaExceeded: config.QuotaExceeded{SwitchProject: false, SwitchPreviewModel: false, AntigravityCredits: false}, ClaudeKey: []config.ClaudeKey{{APIKey: "c1"}}, CodexKey: []config.CodexKey{{APIKey: "x1"}}, AmpCode: config.AmpCode{UpstreamAPIKey: "keep", RestrictManagementToLocalhost: false}, @@ -253,7 +253,7 @@ func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) { MaxRetryCredentials: 3, MaxRetryInterval: 3, WebsocketAuth: true, - QuotaExceeded: config.QuotaExceeded{SwitchProject: true, SwitchPreviewModel: true}, + QuotaExceeded: config.QuotaExceeded{SwitchProject: true, SwitchPreviewModel: true, AntigravityCredits: true}, ClaudeKey: []config.ClaudeKey{ {APIKey: "c1", BaseURL: "http://new", ProxyURL: "http://p", Headers: map[string]string{"H": "1"}, ExcludedModels: []string{"a"}}, {APIKey: "c2"}, @@ -297,6 +297,7 @@ func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) { expectContains(t, details, "nonstream-keepalive-interval: 0 -> 5") expectContains(t, details, "quota-exceeded.switch-project: false -> true") expectContains(t, details, "quota-exceeded.switch-preview-model: false -> true") + expectContains(t, details, "quota-exceeded.antigravity-credits: false -> true") expectContains(t, details, "api-keys count: 1 -> 2") expectContains(t, details, "claude-api-key count: 1 -> 2") expectContains(t, details, "codex-api-key count: 1 -> 2") @@ -320,7 +321,7 @@ func TestBuildConfigChangeDetails_AllBranches(t *testing.T) { MaxRetryCredentials: 1, MaxRetryInterval: 1, WebsocketAuth: false, - QuotaExceeded: config.QuotaExceeded{SwitchProject: false, SwitchPreviewModel: false}, + QuotaExceeded: config.QuotaExceeded{SwitchProject: false, SwitchPreviewModel: false, AntigravityCredits: false}, GeminiKey: []config.GeminiKey{ {APIKey: "g-old", BaseURL: "http://g-old", ProxyURL: "http://gp-old", Headers: map[string]string{"A": "1"}}, }, @@ -374,7 +375,7 @@ func TestBuildConfigChangeDetails_AllBranches(t *testing.T) { MaxRetryCredentials: 3, MaxRetryInterval: 3, WebsocketAuth: true, - QuotaExceeded: config.QuotaExceeded{SwitchProject: true, SwitchPreviewModel: true}, + QuotaExceeded: config.QuotaExceeded{SwitchProject: true, SwitchPreviewModel: true, AntigravityCredits: true}, GeminiKey: []config.GeminiKey{ {APIKey: "g-new", BaseURL: "http://g-new", ProxyURL: "http://gp-new", Headers: map[string]string{"A": "2"}, ExcludedModels: []string{"x", "y"}}, }, @@ -437,6 +438,7 @@ func TestBuildConfigChangeDetails_AllBranches(t *testing.T) { expectContains(t, changes, "ws-auth: false -> true") expectContains(t, changes, "quota-exceeded.switch-project: false -> true") expectContains(t, changes, "quota-exceeded.switch-preview-model: false -> true") + expectContains(t, changes, "quota-exceeded.antigravity-credits: false -> true") expectContains(t, changes, "api-keys: values updated (count unchanged, redacted)") expectContains(t, changes, "gemini[0].base-url: http://g-old -> http://g-new") expectContains(t, changes, "gemini[0].proxy-url: http://gp-old -> http://gp-new") From a0bf33eca674cf3bff005a107d504359b23adea9 Mon Sep 17 00:00:00 2001 From: xixiwenxuanhe Date: Tue, 31 Mar 2026 00:14:05 +0800 Subject: [PATCH 19/42] fix(antigravity): preserve fallback and honor config gate --- .../runtime/executor/antigravity_executor.go | 24 +--- .../antigravity_executor_credits_test.go | 132 ++++++++++++++++++ 2 files changed, 139 insertions(+), 17 deletions(-) diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 76ce9586..6ee972a7 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -493,7 +493,7 @@ attemptLoop: for idx, baseURL := range baseURLs { requestPayload := translated usedCreditsDirect := false - if antigravityShouldPreferCredits(auth, baseModel, time.Now()) { + if antigravityCreditsRetryEnabled(e.cfg) && antigravityShouldPreferCredits(auth, baseModel, time.Now()) { if creditsPayload := injectEnabledCreditTypes(translated); len(creditsPayload) > 0 { requestPayload = creditsPayload usedCreditsDirect = true @@ -541,7 +541,7 @@ attemptLoop: markAntigravityCreditsExhausted(auth, time.Now()) } } else { - creditsResp, attemptedCredits := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, false, opts.Alt, baseURL, bodyBytes) + creditsResp, _ := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, false, opts.Alt, baseURL, bodyBytes) if creditsResp != nil { recordAPIResponseMetadata(ctx, e.cfg, creditsResp.StatusCode, creditsResp.Header.Clone()) creditsBody, errCreditsRead := io.ReadAll(creditsResp.Body) @@ -561,10 +561,6 @@ attemptLoop: reporter.ensurePublished(ctx) return resp, nil } - if attemptedCredits { - err = newAntigravityStatusErr(httpResp.StatusCode, bodyBytes) - return resp, err - } } } @@ -665,7 +661,7 @@ attemptLoop: for idx, baseURL := range baseURLs { requestPayload := translated usedCreditsDirect := false - if antigravityShouldPreferCredits(auth, baseModel, time.Now()) { + if antigravityCreditsRetryEnabled(e.cfg) && antigravityShouldPreferCredits(auth, baseModel, time.Now()) { if creditsPayload := injectEnabledCreditTypes(translated); len(creditsPayload) > 0 { requestPayload = creditsPayload usedCreditsDirect = true @@ -727,13 +723,10 @@ attemptLoop: markAntigravityCreditsExhausted(auth, time.Now()) } } else { - creditsResp, attemptedCredits := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, true, opts.Alt, baseURL, bodyBytes) + creditsResp, _ := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, true, opts.Alt, baseURL, bodyBytes) if creditsResp != nil { httpResp = creditsResp recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) - } else if attemptedCredits { - err = newAntigravityStatusErr(http.StatusTooManyRequests, bodyBytes) - return resp, err } } } @@ -1085,7 +1078,7 @@ attemptLoop: for idx, baseURL := range baseURLs { requestPayload := translated usedCreditsDirect := false - if antigravityShouldPreferCredits(auth, baseModel, time.Now()) { + if antigravityCreditsRetryEnabled(e.cfg) && antigravityShouldPreferCredits(auth, baseModel, time.Now()) { if creditsPayload := injectEnabledCreditTypes(translated); len(creditsPayload) > 0 { requestPayload = creditsPayload usedCreditsDirect = true @@ -1146,13 +1139,10 @@ attemptLoop: markAntigravityCreditsExhausted(auth, time.Now()) } } else { - creditsResp, attemptedCredits := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, true, opts.Alt, baseURL, bodyBytes) + creditsResp, _ := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, true, opts.Alt, baseURL, bodyBytes) if creditsResp != nil { httpResp = creditsResp recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) - } else if attemptedCredits { - err = newAntigravityStatusErr(http.StatusTooManyRequests, bodyBytes) - return nil, err } } } @@ -1797,7 +1787,7 @@ func antigravityWait(ctx context.Context, wait time.Duration) error { } } -func antigravityBaseURLFallbackOrder(auth *cliproxyauth.Auth) []string { +var antigravityBaseURLFallbackOrder = func(auth *cliproxyauth.Auth) []string { if base := resolveCustomAntigravityBaseURL(auth); base != "" { return []string{base} } diff --git a/internal/runtime/executor/antigravity_executor_credits_test.go b/internal/runtime/executor/antigravity_executor_credits_test.go index ecac0c83..13ab662b 100644 --- a/internal/runtime/executor/antigravity_executor_credits_test.go +++ b/internal/runtime/executor/antigravity_executor_credits_test.go @@ -289,3 +289,135 @@ func TestAntigravityExecute_PrefersCreditsAfterSuccessfulFallback(t *testing.T) t.Fatalf("preferred request missing credits: %s", requestBodies[2]) } } + +func TestAntigravityExecute_PreservesBaseURLFallbackAfterCreditsRetryFailure(t *testing.T) { + resetAntigravityCreditsRetryState() + t.Cleanup(resetAntigravityCreditsRetryState) + + var ( + mu sync.Mutex + firstCount int + secondCount int + ) + + firstServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + _ = r.Body.Close() + + mu.Lock() + firstCount++ + reqNum := firstCount + mu.Unlock() + + switch reqNum { + case 1: + w.WriteHeader(http.StatusTooManyRequests) + _, _ = w.Write([]byte(`{"error":{"status":"RESOURCE_EXHAUSTED","details":[{"@type":"type.googleapis.com/google.rpc.ErrorInfo","reason":"QUOTA_EXHAUSTED"}]}}`)) + case 2: + if !strings.Contains(string(body), `"enabledCreditTypes":["GOOGLE_ONE_AI"]`) { + t.Fatalf("credits retry missing enabledCreditTypes: %s", string(body)) + } + w.WriteHeader(http.StatusForbidden) + _, _ = w.Write([]byte(`{"error":{"message":"permission denied"}}`)) + default: + t.Fatalf("unexpected first server request count %d", reqNum) + } + })) + defer firstServer.Close() + + secondServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + mu.Lock() + secondCount++ + mu.Unlock() + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"ok"}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2}}}`)) + })) + defer secondServer.Close() + + exec := NewAntigravityExecutor(&config.Config{ + QuotaExceeded: config.QuotaExceeded{AntigravityCredits: true}, + }) + auth := &cliproxyauth.Auth{ + ID: "auth-baseurl-fallback", + Attributes: map[string]string{ + "base_url": firstServer.URL, + }, + Metadata: map[string]any{ + "access_token": "token", + "project_id": "project-1", + "expired": time.Now().Add(1 * time.Hour).Format(time.RFC3339), + }, + } + + originalOrder := antigravityBaseURLFallbackOrder + defer func() { antigravityBaseURLFallbackOrder = originalOrder }() + antigravityBaseURLFallbackOrder = func(auth *cliproxyauth.Auth) []string { + return []string{firstServer.URL, secondServer.URL} + } + + resp, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{ + Model: "gemini-2.5-flash", + Payload: []byte(`{"request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FormatAntigravity, + }) + if err != nil { + t.Fatalf("Execute() error = %v", err) + } + if len(resp.Payload) == 0 { + t.Fatal("Execute() returned empty payload") + } + if firstCount != 2 { + t.Fatalf("first server request count = %d, want 2", firstCount) + } + if secondCount != 1 { + t.Fatalf("second server request count = %d, want 1", secondCount) + } +} + +func TestAntigravityExecute_DoesNotDirectInjectCreditsWhenFlagDisabled(t *testing.T) { + resetAntigravityCreditsRetryState() + t.Cleanup(resetAntigravityCreditsRetryState) + + var requestBodies []string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + _ = r.Body.Close() + requestBodies = append(requestBodies, string(body)) + w.WriteHeader(http.StatusTooManyRequests) + _, _ = w.Write([]byte(`{"error":{"status":"RESOURCE_EXHAUSTED","message":"QUOTA_EXHAUSTED"}}`)) + })) + defer server.Close() + + exec := NewAntigravityExecutor(&config.Config{ + QuotaExceeded: config.QuotaExceeded{AntigravityCredits: false}, + }) + auth := &cliproxyauth.Auth{ + ID: "auth-flag-disabled", + Attributes: map[string]string{ + "base_url": server.URL, + }, + Metadata: map[string]any{ + "access_token": "token", + "project_id": "project-1", + "expired": time.Now().Add(1 * time.Hour).Format(time.RFC3339), + }, + } + markAntigravityPreferCredits(auth, "gemini-2.5-flash", time.Now(), nil) + + _, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{ + Model: "gemini-2.5-flash", + Payload: []byte(`{"request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FormatAntigravity, + }) + if err == nil { + t.Fatal("Execute() error = nil, want 429") + } + if len(requestBodies) != 1 { + t.Fatalf("request count = %d, want 1", len(requestBodies)) + } + if strings.Contains(requestBodies[0], `"enabledCreditTypes":["GOOGLE_ONE_AI"]`) { + t.Fatalf("request unexpectedly used enabledCreditTypes with flag disabled: %s", requestBodies[0]) + } +} From c10f8ae2e222c7461fdf5500bf8be8e97fee7a9e Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Tue, 31 Mar 2026 07:23:02 +0800 Subject: [PATCH 20/42] Fixed: #2420 docs(readme): remove ProxyPal section from all README translations --- README.md | 4 ---- README_CN.md | 4 ---- README_JA.md | 4 ---- 3 files changed, 12 deletions(-) diff --git a/README.md b/README.md index 40d8c595..ca01bbdc 100644 --- a/README.md +++ b/README.md @@ -126,10 +126,6 @@ Browser-based tool to translate SRT subtitles using your Gemini subscription via CLI wrapper for instant switching between multiple Claude accounts and alternative models (Gemini, Codex, Antigravity) via CLIProxyAPI OAuth - no API keys needed -### [ProxyPal](https://github.com/heyhuynhgiabuu/proxypal) - -Native macOS GUI for managing CLIProxyAPI: configure providers, model mappings, and endpoints via OAuth - no API keys needed. - ### [Quotio](https://github.com/nguyenphutrong/quotio) Native macOS menu bar app that unifies Claude, Gemini, OpenAI, Qwen, and Antigravity subscriptions with real-time quota tracking and smart auto-failover for AI coding tools like Claude Code, OpenCode, and Droid - no API keys needed. diff --git a/README_CN.md b/README_CN.md index 618b86dd..3c96dbd6 100644 --- a/README_CN.md +++ b/README_CN.md @@ -125,10 +125,6 @@ CLIProxyAPI 已内置对 [Amp CLI](https://ampcode.com) 和 Amp IDE 扩展的支 CLI 封装器,用于通过 CLIProxyAPI OAuth 即时切换多个 Claude 账户和替代模型(Gemini, Codex, Antigravity),无需 API 密钥。 -### [ProxyPal](https://github.com/heyhuynhgiabuu/proxypal) - -基于 macOS 平台的原生 CLIProxyAPI GUI:配置供应商、模型映射以及OAuth端点,无需 API 密钥。 - ### [Quotio](https://github.com/nguyenphutrong/quotio) 原生 macOS 菜单栏应用,统一管理 Claude、Gemini、OpenAI、Qwen 和 Antigravity 订阅,提供实时配额追踪和智能自动故障转移,支持 Claude Code、OpenCode 和 Droid 等 AI 编程工具,无需 API 密钥。 diff --git a/README_JA.md b/README_JA.md index d1b64ba7..2222c32a 100644 --- a/README_JA.md +++ b/README_JA.md @@ -126,10 +126,6 @@ CLIProxyAPI経由でGeminiサブスクリプションを使用してSRT字幕を CLIProxyAPI OAuthを使用して複数のClaudeアカウントや代替モデル(Gemini、Codex、Antigravity)を即座に切り替えるCLIラッパー - APIキー不要 -### [ProxyPal](https://github.com/heyhuynhgiabuu/proxypal) - -CLIProxyAPI管理用のmacOSネイティブGUI:OAuth経由でプロバイダー、モデルマッピング、エンドポイントを設定 - APIキー不要 - ### [Quotio](https://github.com/nguyenphutrong/quotio) Claude、Gemini、OpenAI、Qwen、Antigravityのサブスクリプションを統合し、リアルタイムのクォータ追跡とスマート自動フェイルオーバーを備えたmacOSネイティブのメニューバーアプリ。Claude Code、OpenCode、Droidなどのコーディングツール向け - APIキー不要 From bd855abec9eb7dfee7c47b96e1e7eed578655826 Mon Sep 17 00:00:00 2001 From: MonsterQiu <72pgstan@gmail.com> Date: Tue, 31 Mar 2026 10:06:04 +0800 Subject: [PATCH 21/42] fix(codex): normalize null instructions for responses requests --- internal/runtime/executor/codex_executor.go | 3 +- .../codex_executor_instructions_test.go | 54 +++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 internal/runtime/executor/codex_executor_instructions_test.go diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index fddf343d..bd5ef00b 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -114,7 +114,8 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re body, _ = sjson.DeleteBytes(body, "prompt_cache_retention") body, _ = sjson.DeleteBytes(body, "safety_identifier") body, _ = sjson.DeleteBytes(body, "stream_options") - if !gjson.GetBytes(body, "instructions").Exists() { + instructions := gjson.GetBytes(body, "instructions") + if !instructions.Exists() || instructions.Type == gjson.Null { body, _ = sjson.SetBytes(body, "instructions", "") } diff --git a/internal/runtime/executor/codex_executor_instructions_test.go b/internal/runtime/executor/codex_executor_instructions_test.go new file mode 100644 index 00000000..0ed791c0 --- /dev/null +++ b/internal/runtime/executor/codex_executor_instructions_test.go @@ -0,0 +1,54 @@ +package executor + +import ( + "context" + "io" + "net/http" + "net/http/httptest" + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" + cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" + sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" + "github.com/tidwall/gjson" +) + +func TestCodexExecutorExecuteNormalizesNullInstructions(t *testing.T) { + var gotPath string + var gotBody []byte + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotPath = r.URL.Path + body, _ := io.ReadAll(r.Body) + gotBody = body + w.Header().Set("Content-Type", "text/event-stream") + _, _ = w.Write([]byte("data: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp_1\",\"object\":\"response\",\"created_at\":0,\"status\":\"completed\",\"background\":false,\"error\":null}}\n\n")) + })) + defer server.Close() + + executor := NewCodexExecutor(&config.Config{}) + auth := &cliproxyauth.Auth{Attributes: map[string]string{ + "base_url": server.URL, + "api_key": "test", + }} + + _, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{ + Model: "gpt-5.4", + Payload: []byte(`{"model":"gpt-5.4","instructions":null,"input":"hello"}`), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FromString("openai-response"), + Stream: false, + }) + if err != nil { + t.Fatalf("Execute error: %v", err) + } + if gotPath != "/responses" { + t.Fatalf("path = %q, want %q", gotPath, "/responses") + } + if gjson.GetBytes(gotBody, "instructions").Type != gjson.String { + t.Fatalf("instructions type = %v, want string", gjson.GetBytes(gotBody, "instructions").Type) + } + if gjson.GetBytes(gotBody, "instructions").String() != "" { + t.Fatalf("instructions = %q, want empty string", gjson.GetBytes(gotBody, "instructions").String()) + } +} From 39b9a38fbc5526b1a97b19d107398778fdd69791 Mon Sep 17 00:00:00 2001 From: MonsterQiu <72pgstan@gmail.com> Date: Tue, 31 Mar 2026 10:32:39 +0800 Subject: [PATCH 22/42] fix(codex): normalize null instructions across responses paths --- internal/runtime/executor/codex_executor.go | 21 +++--- .../codex_executor_instructions_test.go | 69 +++++++++++++++++++ 2 files changed, 80 insertions(+), 10 deletions(-) diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index bd5ef00b..c41af032 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -114,10 +114,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re body, _ = sjson.DeleteBytes(body, "prompt_cache_retention") body, _ = sjson.DeleteBytes(body, "safety_identifier") body, _ = sjson.DeleteBytes(body, "stream_options") - instructions := gjson.GetBytes(body, "instructions") - if !instructions.Exists() || instructions.Type == gjson.Null { - body, _ = sjson.SetBytes(body, "instructions", "") - } + body = normalizeCodexInstructions(body) url := strings.TrimSuffix(baseURL, "/") + "/responses" httpReq, err := e.cacheHelper(ctx, from, url, req, body) @@ -315,9 +312,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au body, _ = sjson.DeleteBytes(body, "safety_identifier") body, _ = sjson.DeleteBytes(body, "stream_options") body, _ = sjson.SetBytes(body, "model", baseModel) - if !gjson.GetBytes(body, "instructions").Exists() { - body, _ = sjson.SetBytes(body, "instructions", "") - } + body = normalizeCodexInstructions(body) url := strings.TrimSuffix(baseURL, "/") + "/responses" httpReq, err := e.cacheHelper(ctx, from, url, req, body) @@ -420,9 +415,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth body, _ = sjson.DeleteBytes(body, "safety_identifier") body, _ = sjson.DeleteBytes(body, "stream_options") body, _ = sjson.SetBytes(body, "stream", false) - if !gjson.GetBytes(body, "instructions").Exists() { - body, _ = sjson.SetBytes(body, "instructions", "") - } + body = normalizeCodexInstructions(body) enc, err := tokenizerForCodexModel(baseModel) if err != nil { @@ -700,6 +693,14 @@ func newCodexStatusErr(statusCode int, body []byte) statusErr { return err } +func normalizeCodexInstructions(body []byte) []byte { + instructions := gjson.GetBytes(body, "instructions") + if !instructions.Exists() || instructions.Type == gjson.Null { + body, _ = sjson.SetBytes(body, "instructions", "") + } + return body +} + func isCodexModelCapacityError(errorBody []byte) bool { if len(errorBody) == 0 { return false diff --git a/internal/runtime/executor/codex_executor_instructions_test.go b/internal/runtime/executor/codex_executor_instructions_test.go index 0ed791c0..c5dc5aa8 100644 --- a/internal/runtime/executor/codex_executor_instructions_test.go +++ b/internal/runtime/executor/codex_executor_instructions_test.go @@ -52,3 +52,72 @@ func TestCodexExecutorExecuteNormalizesNullInstructions(t *testing.T) { t.Fatalf("instructions = %q, want empty string", gjson.GetBytes(gotBody, "instructions").String()) } } + +func TestCodexExecutorExecuteStreamNormalizesNullInstructions(t *testing.T) { + var gotPath string + var gotBody []byte + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotPath = r.URL.Path + body, _ := io.ReadAll(r.Body) + gotBody = body + w.Header().Set("Content-Type", "text/event-stream") + _, _ = w.Write([]byte("data: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp_1\",\"object\":\"response\",\"created_at\":0,\"status\":\"completed\",\"background\":false,\"error\":null}}\n\n")) + })) + defer server.Close() + + executor := NewCodexExecutor(&config.Config{}) + auth := &cliproxyauth.Auth{Attributes: map[string]string{ + "base_url": server.URL, + "api_key": "test", + }} + + result, err := executor.ExecuteStream(context.Background(), auth, cliproxyexecutor.Request{ + Model: "gpt-5.4", + Payload: []byte(`{"model":"gpt-5.4","instructions":null,"input":"hello"}`), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FromString("openai-response"), + Stream: true, + }) + if err != nil { + t.Fatalf("ExecuteStream error: %v", err) + } + for range result.Chunks { + } + if gotPath != "/responses" { + t.Fatalf("path = %q, want %q", gotPath, "/responses") + } + if gjson.GetBytes(gotBody, "instructions").Type != gjson.String { + t.Fatalf("instructions type = %v, want string", gjson.GetBytes(gotBody, "instructions").Type) + } + if gjson.GetBytes(gotBody, "instructions").String() != "" { + t.Fatalf("instructions = %q, want empty string", gjson.GetBytes(gotBody, "instructions").String()) + } +} + +func TestCodexExecutorCountTokensTreatsNullInstructionsAsEmpty(t *testing.T) { + executor := NewCodexExecutor(&config.Config{}) + + nullResp, err := executor.CountTokens(context.Background(), nil, cliproxyexecutor.Request{ + Model: "gpt-5.4", + Payload: []byte(`{"model":"gpt-5.4","instructions":null,"input":"hello"}`), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FromString("openai-response"), + }) + if err != nil { + t.Fatalf("CountTokens(null) error: %v", err) + } + + emptyResp, err := executor.CountTokens(context.Background(), nil, cliproxyexecutor.Request{ + Model: "gpt-5.4", + Payload: []byte(`{"model":"gpt-5.4","instructions":"","input":"hello"}`), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FromString("openai-response"), + }) + if err != nil { + t.Fatalf("CountTokens(empty) error: %v", err) + } + + if string(nullResp.Payload) != string(emptyResp.Payload) { + t.Fatalf("token count payload mismatch:\nnull=%s\nempty=%s", string(nullResp.Payload), string(emptyResp.Payload)) + } +} From 51fd58d74fe2b8706f3f424a8ab9daafc9ade16a Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Tue, 31 Mar 2026 12:16:57 +0800 Subject: [PATCH 23/42] fix(codex): use normalizeCodexInstructions to set default instructions --- internal/runtime/executor/codex_executor.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 989a6b93..9eafb6be 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -219,10 +219,7 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body, _ = sjson.SetBytes(body, "model", baseModel) body, _ = sjson.DeleteBytes(body, "stream") - instructions := gjson.GetBytes(body, "instructions") - if !instructions.Exists() || instructions.Type == gjson.Null { - body, _ = sjson.SetBytes(body, "instructions", "") - } + body = normalizeCodexInstructions(body) url := strings.TrimSuffix(baseURL, "/") + "/responses/compact" httpReq, err := e.cacheHelper(ctx, from, url, req, body) From 07b7c1a1e01f14d06c6d33abe382013cadaf20f5 Mon Sep 17 00:00:00 2001 From: MonsterQiu <72pgstan@gmail.com> Date: Tue, 31 Mar 2026 14:27:14 +0800 Subject: [PATCH 24/42] fix(auth): resolve oauth aliases before suspension checks --- sdk/cliproxy/auth/conductor.go | 176 ++++++++++++++++-- .../conductor_oauth_alias_suspension_test.go | 111 +++++++++++ 2 files changed, 275 insertions(+), 12 deletions(-) create mode 100644 sdk/cliproxy/auth/conductor_oauth_alias_suspension_test.go diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index 61f32278..82037c51 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -8,6 +8,7 @@ import ( "io" "net/http" "path/filepath" + "sort" "strconv" "strings" "sync" @@ -437,6 +438,27 @@ func (m *Manager) executionModelCandidates(auth *Auth, routeModel string) []stri return []string{resolved} } +func (m *Manager) selectionModelForAuth(auth *Auth, routeModel string) string { + requestedModel := rewriteModelForAuth(routeModel, auth) + if strings.TrimSpace(requestedModel) == "" { + requestedModel = strings.TrimSpace(routeModel) + } + resolvedModel := m.applyOAuthModelAlias(auth, requestedModel) + if strings.TrimSpace(resolvedModel) == "" { + resolvedModel = requestedModel + } + return resolvedModel +} + +func (m *Manager) stateModelForExecution(auth *Auth, routeModel, upstreamModel string, pooled bool) string { + stateModel := executionResultModel(routeModel, upstreamModel, pooled) + selectionModel := m.selectionModelForAuth(auth, routeModel) + if canonicalModelKey(selectionModel) == canonicalModelKey(upstreamModel) && strings.TrimSpace(selectionModel) != "" { + return strings.TrimSpace(upstreamModel) + } + return stateModel +} + func executionResultModel(routeModel, upstreamModel string, pooled bool) string { if pooled { if resolved := strings.TrimSpace(upstreamModel); resolved != "" { @@ -449,14 +471,14 @@ func executionResultModel(routeModel, upstreamModel string, pooled bool) string return strings.TrimSpace(upstreamModel) } -func filterExecutionModels(auth *Auth, routeModel string, candidates []string, pooled bool) []string { +func (m *Manager) filterExecutionModels(auth *Auth, routeModel string, candidates []string, pooled bool) []string { if len(candidates) == 0 { return nil } now := time.Now() out := make([]string, 0, len(candidates)) for _, upstreamModel := range candidates { - stateModel := executionResultModel(routeModel, upstreamModel, pooled) + stateModel := m.stateModelForExecution(auth, routeModel, upstreamModel, pooled) blocked, _, _ := isAuthBlockedForModel(auth, stateModel, now) if blocked { continue @@ -469,7 +491,7 @@ func filterExecutionModels(auth *Auth, routeModel string, candidates []string, p func (m *Manager) preparedExecutionModels(auth *Auth, routeModel string) ([]string, bool) { candidates := m.executionModelCandidates(auth, routeModel) pooled := len(candidates) > 1 - return filterExecutionModels(auth, routeModel, candidates, pooled), pooled + return m.filterExecutionModels(auth, routeModel, candidates, pooled), pooled } func (m *Manager) prepareExecutionModels(auth *Auth, routeModel string) []string { @@ -477,6 +499,62 @@ func (m *Manager) prepareExecutionModels(auth *Auth, routeModel string) []string return models } +func (m *Manager) availableAuthsForRouteModel(auths []*Auth, provider, routeModel string, now time.Time) ([]*Auth, error) { + if len(auths) == 0 { + return nil, &Error{Code: "auth_not_found", Message: "no auth candidates"} + } + + availableByPriority := make(map[int][]*Auth) + cooldownCount := 0 + var earliest time.Time + for i := 0; i < len(auths); i++ { + candidate := auths[i] + checkModel := m.selectionModelForAuth(candidate, routeModel) + blocked, reason, next := isAuthBlockedForModel(candidate, checkModel, now) + if !blocked { + priority := authPriority(candidate) + availableByPriority[priority] = append(availableByPriority[priority], candidate) + continue + } + if reason == blockReasonCooldown { + cooldownCount++ + if !next.IsZero() && (earliest.IsZero() || next.Before(earliest)) { + earliest = next + } + } + } + + if len(availableByPriority) == 0 { + if cooldownCount == len(auths) && !earliest.IsZero() { + providerForError := provider + if providerForError == "mixed" { + providerForError = "" + } + resetIn := earliest.Sub(now) + if resetIn < 0 { + resetIn = 0 + } + return nil, newModelCooldownError(routeModel, providerForError, resetIn) + } + return nil, &Error{Code: "auth_unavailable", Message: "no auth available"} + } + + bestPriority := 0 + found := false + for priority := range availableByPriority { + if !found || priority > bestPriority { + bestPriority = priority + found = true + } + } + + available := availableByPriority[bestPriority] + if len(available) > 1 { + sort.Slice(available, func(i, j int) bool { return available[i].ID < available[j].ID }) + } + return available, nil +} + func discardStreamChunks(ch <-chan cliproxyexecutor.StreamChunk) { if ch == nil { return @@ -627,7 +705,7 @@ func (m *Manager) executeStreamWithModelPool(ctx context.Context, executor Provi } var lastErr error for idx, execModel := range execModels { - resultModel := executionResultModel(routeModel, execModel, pooled) + resultModel := m.stateModelForExecution(auth, routeModel, execModel, pooled) execReq := req execReq.Model = execModel streamResult, errStream := executor.ExecuteStream(ctx, auth, execReq, opts) @@ -1107,7 +1185,7 @@ func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req attempted[auth.ID] = struct{}{} var authErr error for _, upstreamModel := range models { - resultModel := executionResultModel(routeModel, upstreamModel, pooled) + resultModel := m.stateModelForExecution(auth, routeModel, upstreamModel, pooled) execReq := req execReq.Model = upstreamModel resp, errExec := executor.Execute(execCtx, auth, execReq, opts) @@ -1185,7 +1263,7 @@ func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string, attempted[auth.ID] = struct{}{} var authErr error for _, upstreamModel := range models { - resultModel := executionResultModel(routeModel, upstreamModel, pooled) + resultModel := m.stateModelForExecution(auth, routeModel, upstreamModel, pooled) execReq := req execReq.Model = upstreamModel resp, errExec := executor.CountTokens(execCtx, auth, execReq, opts) @@ -2271,6 +2349,13 @@ func shouldRetrySchedulerPick(err error) bool { return authErr.Code == "auth_not_found" || authErr.Code == "auth_unavailable" } +func (m *Manager) routeAwareSelectionRequired(auth *Auth, routeModel string) bool { + if auth == nil || strings.TrimSpace(routeModel) == "" { + return false + } + return canonicalModelKey(m.selectionModelForAuth(auth, routeModel)) != canonicalModelKey(routeModel) +} + func (m *Manager) pickNextLegacy(ctx context.Context, provider, model string, opts cliproxyexecutor.Options, tried map[string]struct{}) (*Auth, ProviderExecutor, error) { pinnedAuthID := pinnedAuthIDFromMetadata(opts.Metadata) @@ -2300,8 +2385,17 @@ func (m *Manager) pickNextLegacy(ctx context.Context, provider, model string, op if _, used := tried[candidate.ID]; used { continue } - if modelKey != "" && registryRef != nil && !registryRef.ClientSupportsModel(candidate.ID, modelKey) { - continue + if modelKey != "" && registryRef != nil { + supportsModel := registryRef.ClientSupportsModel(candidate.ID, modelKey) + if !supportsModel { + selectionKey := canonicalModelKey(m.selectionModelForAuth(candidate, model)) + if selectionKey != "" && selectionKey != modelKey { + supportsModel = registryRef.ClientSupportsModel(candidate.ID, selectionKey) + } + } + if !supportsModel { + continue + } } candidates = append(candidates, candidate) } @@ -2309,7 +2403,12 @@ func (m *Manager) pickNextLegacy(ctx context.Context, provider, model string, op m.mu.RUnlock() return nil, nil, &Error{Code: "auth_not_found", Message: "no auth available"} } - selected, errPick := m.selector.Pick(ctx, provider, model, opts, candidates) + available, errAvailable := m.availableAuthsForRouteModel(candidates, provider, model, time.Now()) + if errAvailable != nil { + m.mu.RUnlock() + return nil, nil, errAvailable + } + selected, errPick := m.selector.Pick(ctx, provider, "", opts, available) if errPick != nil { m.mu.RUnlock() return nil, nil, errPick @@ -2335,6 +2434,22 @@ func (m *Manager) pickNext(ctx context.Context, provider, model string, opts cli if !m.useSchedulerFastPath() { return m.pickNextLegacy(ctx, provider, model, opts, tried) } + if strings.TrimSpace(model) != "" { + m.mu.RLock() + for _, candidate := range m.auths { + if candidate == nil || candidate.Provider != provider || candidate.Disabled { + continue + } + if _, used := tried[candidate.ID]; used { + continue + } + if m.routeAwareSelectionRequired(candidate, model) { + m.mu.RUnlock() + return m.pickNextLegacy(ctx, provider, model, opts, tried) + } + } + m.mu.RUnlock() + } executor, okExecutor := m.Executor(provider) if !okExecutor { return nil, nil, &Error{Code: "executor_not_found", Message: "executor not registered"} @@ -2408,8 +2523,17 @@ func (m *Manager) pickNextMixedLegacy(ctx context.Context, providers []string, m if _, ok := m.executors[providerKey]; !ok { continue } - if modelKey != "" && registryRef != nil && !registryRef.ClientSupportsModel(candidate.ID, modelKey) { - continue + if modelKey != "" && registryRef != nil { + supportsModel := registryRef.ClientSupportsModel(candidate.ID, modelKey) + if !supportsModel { + selectionKey := canonicalModelKey(m.selectionModelForAuth(candidate, model)) + if selectionKey != "" && selectionKey != modelKey { + supportsModel = registryRef.ClientSupportsModel(candidate.ID, selectionKey) + } + } + if !supportsModel { + continue + } } candidates = append(candidates, candidate) } @@ -2417,7 +2541,12 @@ func (m *Manager) pickNextMixedLegacy(ctx context.Context, providers []string, m m.mu.RUnlock() return nil, nil, "", &Error{Code: "auth_not_found", Message: "no auth available"} } - selected, errPick := m.selector.Pick(ctx, "mixed", model, opts, candidates) + available, errAvailable := m.availableAuthsForRouteModel(candidates, "mixed", model, time.Now()) + if errAvailable != nil { + m.mu.RUnlock() + return nil, nil, "", errAvailable + } + selected, errPick := m.selector.Pick(ctx, "mixed", "", opts, available) if errPick != nil { m.mu.RUnlock() return nil, nil, "", errPick @@ -2469,6 +2598,29 @@ func (m *Manager) pickNextMixed(ctx context.Context, providers []string, model s if len(eligibleProviders) == 0 { return nil, nil, "", &Error{Code: "auth_not_found", Message: "no auth available"} } + if strings.TrimSpace(model) != "" { + providerSet := make(map[string]struct{}, len(eligibleProviders)) + for _, providerKey := range eligibleProviders { + providerSet[providerKey] = struct{}{} + } + m.mu.RLock() + for _, candidate := range m.auths { + if candidate == nil || candidate.Disabled { + continue + } + if _, ok := providerSet[strings.TrimSpace(strings.ToLower(candidate.Provider))]; !ok { + continue + } + if _, used := tried[candidate.ID]; used { + continue + } + if m.routeAwareSelectionRequired(candidate, model) { + m.mu.RUnlock() + return m.pickNextMixedLegacy(ctx, providers, model, opts, tried) + } + } + m.mu.RUnlock() + } selected, providerKey, errPick := m.scheduler.pickMixed(ctx, eligibleProviders, model, opts, tried) if errPick != nil && model != "" && shouldRetrySchedulerPick(errPick) { diff --git a/sdk/cliproxy/auth/conductor_oauth_alias_suspension_test.go b/sdk/cliproxy/auth/conductor_oauth_alias_suspension_test.go new file mode 100644 index 00000000..8bc779e5 --- /dev/null +++ b/sdk/cliproxy/auth/conductor_oauth_alias_suspension_test.go @@ -0,0 +1,111 @@ +package auth + +import ( + "context" + "net/http" + "sync" + "testing" + "time" + + internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" +) + +type aliasRoutingExecutor struct { + id string + + mu sync.Mutex + executeModels []string +} + +func (e *aliasRoutingExecutor) Identifier() string { return e.id } + +func (e *aliasRoutingExecutor) Execute(_ context.Context, _ *Auth, req cliproxyexecutor.Request, _ cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + e.mu.Lock() + e.executeModels = append(e.executeModels, req.Model) + e.mu.Unlock() + return cliproxyexecutor.Response{Payload: []byte(req.Model)}, nil +} + +func (e *aliasRoutingExecutor) ExecuteStream(context.Context, *Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (*cliproxyexecutor.StreamResult, error) { + return nil, &Error{HTTPStatus: http.StatusNotImplemented, Message: "ExecuteStream not implemented"} +} + +func (e *aliasRoutingExecutor) Refresh(_ context.Context, auth *Auth) (*Auth, error) { + return auth, nil +} + +func (e *aliasRoutingExecutor) CountTokens(context.Context, *Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + return cliproxyexecutor.Response{}, &Error{HTTPStatus: http.StatusNotImplemented, Message: "CountTokens not implemented"} +} + +func (e *aliasRoutingExecutor) HttpRequest(context.Context, *Auth, *http.Request) (*http.Response, error) { + return nil, &Error{HTTPStatus: http.StatusNotImplemented, Message: "HttpRequest not implemented"} +} + +func (e *aliasRoutingExecutor) ExecuteModels() []string { + e.mu.Lock() + defer e.mu.Unlock() + out := make([]string, len(e.executeModels)) + copy(out, e.executeModels) + return out +} + +func TestManagerExecute_OAuthAliasBypassesBlockedRouteModel(t *testing.T) { + const ( + provider = "antigravity" + routeModel = "claude-opus-4-6" + targetModel = "claude-opus-4-6-thinking" + ) + + manager := NewManager(nil, nil, nil) + executor := &aliasRoutingExecutor{id: provider} + manager.RegisterExecutor(executor) + manager.SetOAuthModelAlias(map[string][]internalconfig.OAuthModelAlias{ + provider: {{ + Name: targetModel, + Alias: routeModel, + Fork: true, + }}, + }) + + auth := &Auth{ + ID: "oauth-alias-auth", + Provider: provider, + Status: StatusActive, + ModelStates: map[string]*ModelState{ + routeModel: { + Unavailable: true, + Status: StatusError, + NextRetryAfter: time.Now().Add(1 * time.Hour), + }, + }, + } + if _, errRegister := manager.Register(context.Background(), auth); errRegister != nil { + t.Fatalf("register auth: %v", errRegister) + } + + reg := registry.GetGlobalRegistry() + reg.RegisterClient(auth.ID, provider, []*registry.ModelInfo{{ID: routeModel}, {ID: targetModel}}) + t.Cleanup(func() { + reg.UnregisterClient(auth.ID) + }) + manager.RefreshSchedulerEntry(auth.ID) + + resp, errExecute := manager.Execute(context.Background(), []string{provider}, cliproxyexecutor.Request{Model: routeModel}, cliproxyexecutor.Options{}) + if errExecute != nil { + t.Fatalf("execute error = %v, want success", errExecute) + } + if string(resp.Payload) != targetModel { + t.Fatalf("execute payload = %q, want %q", string(resp.Payload), targetModel) + } + + gotModels := executor.ExecuteModels() + if len(gotModels) != 1 { + t.Fatalf("execute models len = %d, want 1", len(gotModels)) + } + if gotModels[0] != targetModel { + t.Fatalf("execute model = %q, want %q", gotModels[0], targetModel) + } +} From f611dd6e967f4f7844638b6546848bad7bb21ebe Mon Sep 17 00:00:00 2001 From: MonsterQiu <72pgstan@gmail.com> Date: Tue, 31 Mar 2026 15:42:25 +0800 Subject: [PATCH 25/42] refactor(auth): dedupe route-aware model support checks --- sdk/cliproxy/auth/conductor.go | 61 +++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index 82037c51..478c7921 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -450,6 +450,10 @@ func (m *Manager) selectionModelForAuth(auth *Auth, routeModel string) string { return resolvedModel } +func (m *Manager) selectionModelKeyForAuth(auth *Auth, routeModel string) string { + return canonicalModelKey(m.selectionModelForAuth(auth, routeModel)) +} + func (m *Manager) stateModelForExecution(auth *Auth, routeModel, upstreamModel string, pooled bool) string { stateModel := executionResultModel(routeModel, upstreamModel, pooled) selectionModel := m.selectionModelForAuth(auth, routeModel) @@ -507,8 +511,7 @@ func (m *Manager) availableAuthsForRouteModel(auths []*Auth, provider, routeMode availableByPriority := make(map[int][]*Auth) cooldownCount := 0 var earliest time.Time - for i := 0; i < len(auths); i++ { - candidate := auths[i] + for _, candidate := range auths { checkModel := m.selectionModelForAuth(candidate, routeModel) blocked, reason, next := isAuthBlockedForModel(candidate, checkModel, now) if !blocked { @@ -555,6 +558,28 @@ func (m *Manager) availableAuthsForRouteModel(auths []*Auth, provider, routeMode return available, nil } +func selectionArgForSelector(selector Selector, routeModel string) string { + if isBuiltInSelector(selector) { + return "" + } + return routeModel +} + +func (m *Manager) authSupportsRouteModel(registryRef *registry.ModelRegistry, auth *Auth, routeModel string) bool { + if registryRef == nil || auth == nil { + return true + } + routeKey := canonicalModelKey(routeModel) + if routeKey == "" { + return true + } + if registryRef.ClientSupportsModel(auth.ID, routeKey) { + return true + } + selectionKey := m.selectionModelKeyForAuth(auth, routeModel) + return selectionKey != "" && selectionKey != routeKey && registryRef.ClientSupportsModel(auth.ID, selectionKey) +} + func discardStreamChunks(ch <-chan cliproxyexecutor.StreamChunk) { if ch == nil { return @@ -2353,7 +2378,7 @@ func (m *Manager) routeAwareSelectionRequired(auth *Auth, routeModel string) boo if auth == nil || strings.TrimSpace(routeModel) == "" { return false } - return canonicalModelKey(m.selectionModelForAuth(auth, routeModel)) != canonicalModelKey(routeModel) + return m.selectionModelKeyForAuth(auth, routeModel) != canonicalModelKey(routeModel) } func (m *Manager) pickNextLegacy(ctx context.Context, provider, model string, opts cliproxyexecutor.Options, tried map[string]struct{}) (*Auth, ProviderExecutor, error) { @@ -2385,17 +2410,8 @@ func (m *Manager) pickNextLegacy(ctx context.Context, provider, model string, op if _, used := tried[candidate.ID]; used { continue } - if modelKey != "" && registryRef != nil { - supportsModel := registryRef.ClientSupportsModel(candidate.ID, modelKey) - if !supportsModel { - selectionKey := canonicalModelKey(m.selectionModelForAuth(candidate, model)) - if selectionKey != "" && selectionKey != modelKey { - supportsModel = registryRef.ClientSupportsModel(candidate.ID, selectionKey) - } - } - if !supportsModel { - continue - } + if modelKey != "" && !m.authSupportsRouteModel(registryRef, candidate, model) { + continue } candidates = append(candidates, candidate) } @@ -2408,7 +2424,7 @@ func (m *Manager) pickNextLegacy(ctx context.Context, provider, model string, op m.mu.RUnlock() return nil, nil, errAvailable } - selected, errPick := m.selector.Pick(ctx, provider, "", opts, available) + selected, errPick := m.selector.Pick(ctx, provider, selectionArgForSelector(m.selector, model), opts, available) if errPick != nil { m.mu.RUnlock() return nil, nil, errPick @@ -2523,17 +2539,8 @@ func (m *Manager) pickNextMixedLegacy(ctx context.Context, providers []string, m if _, ok := m.executors[providerKey]; !ok { continue } - if modelKey != "" && registryRef != nil { - supportsModel := registryRef.ClientSupportsModel(candidate.ID, modelKey) - if !supportsModel { - selectionKey := canonicalModelKey(m.selectionModelForAuth(candidate, model)) - if selectionKey != "" && selectionKey != modelKey { - supportsModel = registryRef.ClientSupportsModel(candidate.ID, selectionKey) - } - } - if !supportsModel { - continue - } + if modelKey != "" && !m.authSupportsRouteModel(registryRef, candidate, model) { + continue } candidates = append(candidates, candidate) } @@ -2546,7 +2553,7 @@ func (m *Manager) pickNextMixedLegacy(ctx context.Context, providers []string, m m.mu.RUnlock() return nil, nil, "", errAvailable } - selected, errPick := m.selector.Pick(ctx, "mixed", "", opts, available) + selected, errPick := m.selector.Pick(ctx, "mixed", selectionArgForSelector(m.selector, model), opts, available) if errPick != nil { m.mu.RUnlock() return nil, nil, "", errPick From ec77f4a4f5f9d9155163e58e50a17d07ad41a1bc Mon Sep 17 00:00:00 2001 From: 0oAstro <79555780+0oAstro@users.noreply.github.com> Date: Tue, 31 Mar 2026 13:55:01 +0530 Subject: [PATCH 26/42] fix(codex): set finish_reason to tool_calls in non-streaming response when tool calls are present --- .../openai/chat-completions/codex_openai_response.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/internal/translator/codex/openai/chat-completions/codex_openai_response.go b/internal/translator/codex/openai/chat-completions/codex_openai_response.go index ab728a24..afae35d4 100644 --- a/internal/translator/codex/openai/chat-completions/codex_openai_response.go +++ b/internal/translator/codex/openai/chat-completions/codex_openai_response.go @@ -284,12 +284,12 @@ func ConvertCodexResponseToOpenAINonStream(_ context.Context, _ string, original } // Process the output array for content and function calls + var toolCalls [][]byte outputResult := responseResult.Get("output") if outputResult.IsArray() { outputArray := outputResult.Array() var contentText string var reasoningText string - var toolCalls [][]byte for _, outputItem := range outputArray { outputType := outputItem.Get("type").String() @@ -367,8 +367,12 @@ func ConvertCodexResponseToOpenAINonStream(_ context.Context, _ string, original if statusResult := responseResult.Get("status"); statusResult.Exists() { status := statusResult.String() if status == "completed" { - template, _ = sjson.SetBytes(template, "choices.0.finish_reason", "stop") - template, _ = sjson.SetBytes(template, "choices.0.native_finish_reason", "stop") + finishReason := "stop" + if len(toolCalls) > 0 { + finishReason = "tool_calls" + } + template, _ = sjson.SetBytes(template, "choices.0.finish_reason", finishReason) + template, _ = sjson.SetBytes(template, "choices.0.native_finish_reason", finishReason) } } From 1b44364e782c3cd5e022f047f9c84e602c33bb05 Mon Sep 17 00:00:00 2001 From: Lucaszmv Date: Tue, 31 Mar 2026 14:48:04 -0300 Subject: [PATCH 27/42] fix(qwen): update CLI simulation to v0.13.2 --- internal/runtime/executor/qwen_executor.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index ff19dcb5..941b6696 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -23,7 +23,7 @@ import ( ) const ( - qwenUserAgent = "QwenCode/0.10.3 (darwin; arm64)" + qwenUserAgent = "QwenCode/0.13.2 (darwin; arm64)" qwenRateLimitPerMin = 60 // 60 requests per minute per credential qwenRateLimitWindow = time.Minute // sliding window duration ) @@ -508,16 +508,15 @@ func applyQwenHeaders(r *http.Request, token string, stream bool) { r.Header.Set("Content-Type", "application/json") r.Header.Set("Authorization", "Bearer "+token) r.Header.Set("User-Agent", qwenUserAgent) - r.Header.Set("X-Dashscope-Useragent", qwenUserAgent) + r.Header["X-DashScope-UserAgent"] = []string{qwenUserAgent} r.Header.Set("X-Stainless-Runtime-Version", "v22.17.0") - r.Header.Set("Sec-Fetch-Mode", "cors") r.Header.Set("X-Stainless-Lang", "js") r.Header.Set("X-Stainless-Arch", "arm64") r.Header.Set("X-Stainless-Package-Version", "5.11.0") - r.Header.Set("X-Dashscope-Cachecontrol", "enable") + r.Header["X-DashScope-CacheControl"] = []string{"enable"} r.Header.Set("X-Stainless-Retry-Count", "0") r.Header.Set("X-Stainless-Os", "MacOS") - r.Header.Set("X-Dashscope-Authtype", "qwen-oauth") + r.Header["X-DashScope-AuthType"] = []string{"qwen-oauth"} r.Header.Set("X-Stainless-Runtime", "node") if stream { From d2c7e4e96a7115cc065b6349dbc40a04f5a9e926 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Wed, 1 Apr 2026 03:08:20 +0800 Subject: [PATCH 28/42] refactor(runtime): move executor utilities to `helps` package and update references --- .../runtime/executor/aistudio_executor.go | 71 +++++---- .../runtime/executor/antigravity_executor.go | 129 +++++++-------- internal/runtime/executor/claude_executor.go | 141 +++++++++-------- .../runtime/executor/claude_executor_test.go | 17 +- internal/runtime/executor/codex_executor.go | 101 ++++++------ .../executor/codex_websockets_executor.go | 92 +++++------ .../runtime/executor/gemini_cli_executor.go | 83 +++++----- internal/runtime/executor/gemini_executor.go | 83 +++++----- .../executor/gemini_vertex_executor.go | 149 +++++++++--------- .../executor/{ => helps}/cache_helpers.go | 16 +- .../{ => helps}/claude_device_profile.go | 68 ++++---- .../executor/{ => helps}/cloak_obfuscate.go | 10 +- .../executor/{ => helps}/cloak_utils.go | 14 +- .../executor/{ => helps}/logging_helpers.go | 28 ++-- .../executor/{ => helps}/payload_helpers.go | 8 +- .../executor/{ => helps}/proxy_helpers.go | 6 +- .../{ => helps}/proxy_helpers_test.go | 4 +- .../{ => helps}/thinking_providers.go | 2 +- .../executor/{ => helps}/token_helpers.go | 14 +- .../executor/{ => helps}/usage_helpers.go | 54 ++++--- .../{ => helps}/usage_helpers_test.go | 8 +- .../executor/{ => helps}/user_id_cache.go | 4 +- .../{ => helps}/user_id_cache_test.go | 18 +-- internal/runtime/executor/iflow_executor.go | 69 ++++---- internal/runtime/executor/kimi_executor.go | 59 +++---- .../executor/openai_compat_executor.go | 69 ++++---- internal/runtime/executor/qwen_executor.go | 71 +++++---- 27 files changed, 712 insertions(+), 676 deletions(-) rename internal/runtime/executor/{ => helps}/cache_helpers.go (81%) rename internal/runtime/executor/{ => helps}/claude_device_profile.go (84%) rename internal/runtime/executor/{ => helps}/cloak_obfuscate.go (93%) rename internal/runtime/executor/{ => helps}/cloak_utils.go (83%) rename internal/runtime/executor/{ => helps}/logging_helpers.go (92%) rename internal/runtime/executor/{ => helps}/payload_helpers.go (97%) rename internal/runtime/executor/{ => helps}/proxy_helpers.go (94%) rename internal/runtime/executor/{ => helps}/proxy_helpers_test.go (93%) rename internal/runtime/executor/{ => helps}/thinking_providers.go (97%) rename internal/runtime/executor/{ => helps}/token_helpers.go (94%) rename internal/runtime/executor/{ => helps}/usage_helpers.go (91%) rename internal/runtime/executor/{ => helps}/usage_helpers_test.go (94%) rename internal/runtime/executor/{ => helps}/user_id_cache.go (96%) rename internal/runtime/executor/{ => helps}/user_id_cache_test.go (83%) diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index db56a183..01c4e06e 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -14,6 +14,7 @@ import ( "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/wsrelay" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" @@ -115,8 +116,8 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} } baseModel := thinking.ParseSuffix(req.Model).ModelName - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) translatedReq, body, err := e.translateRequest(req, opts, false) if err != nil { @@ -137,7 +138,7 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: endpoint, Method: http.MethodPost, Headers: wsReq.Headers.Clone(), @@ -151,17 +152,17 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, wsResp, err := e.relay.NonStream(ctx, authID, wsReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } - recordAPIResponseMetadata(ctx, e.cfg, wsResp.Status, wsResp.Headers.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, wsResp.Status, wsResp.Headers.Clone()) if len(wsResp.Body) > 0 { - appendAPIResponseChunk(ctx, e.cfg, wsResp.Body) + helps.AppendAPIResponseChunk(ctx, e.cfg, wsResp.Body) } if wsResp.Status < 200 || wsResp.Status >= 300 { return resp, statusErr{code: wsResp.Status, msg: string(wsResp.Body)} } - reporter.publish(ctx, parseGeminiUsage(wsResp.Body)) + reporter.Publish(ctx, helps.ParseGeminiUsage(wsResp.Body)) var param any out := sdktranslator.TranslateNonStream(ctx, body.toFormat, opts.SourceFormat, req.Model, opts.OriginalRequest, translatedReq, wsResp.Body, ¶m) resp = cliproxyexecutor.Response{Payload: ensureColonSpacedJSON(out), Headers: wsResp.Headers.Clone()} @@ -174,8 +175,8 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} } baseModel := thinking.ParseSuffix(req.Model).ModelName - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) translatedReq, body, err := e.translateRequest(req, opts, true) if err != nil { @@ -195,7 +196,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: endpoint, Method: http.MethodPost, Headers: wsReq.Headers.Clone(), @@ -208,24 +209,24 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth }) wsStream, err := e.relay.Stream(ctx, authID, wsReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return nil, err } firstEvent, ok := <-wsStream if !ok { err = fmt.Errorf("wsrelay: stream closed before start") - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return nil, err } if firstEvent.Status > 0 && firstEvent.Status != http.StatusOK { metadataLogged := false if firstEvent.Status > 0 { - recordAPIResponseMetadata(ctx, e.cfg, firstEvent.Status, firstEvent.Headers.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, firstEvent.Status, firstEvent.Headers.Clone()) metadataLogged = true } var body bytes.Buffer if len(firstEvent.Payload) > 0 { - appendAPIResponseChunk(ctx, e.cfg, firstEvent.Payload) + helps.AppendAPIResponseChunk(ctx, e.cfg, firstEvent.Payload) body.Write(firstEvent.Payload) } if firstEvent.Type == wsrelay.MessageTypeStreamEnd { @@ -233,18 +234,18 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth } for event := range wsStream { if event.Err != nil { - recordAPIResponseError(ctx, e.cfg, event.Err) + helps.RecordAPIResponseError(ctx, e.cfg, event.Err) if body.Len() == 0 { body.WriteString(event.Err.Error()) } break } if !metadataLogged && event.Status > 0 { - recordAPIResponseMetadata(ctx, e.cfg, event.Status, event.Headers.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, event.Status, event.Headers.Clone()) metadataLogged = true } if len(event.Payload) > 0 { - appendAPIResponseChunk(ctx, e.cfg, event.Payload) + helps.AppendAPIResponseChunk(ctx, e.cfg, event.Payload) body.Write(event.Payload) } if event.Type == wsrelay.MessageTypeStreamEnd { @@ -260,23 +261,23 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth metadataLogged := false processEvent := func(event wsrelay.StreamEvent) bool { if event.Err != nil { - recordAPIResponseError(ctx, e.cfg, event.Err) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, event.Err) + reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: fmt.Errorf("wsrelay: %v", event.Err)} return false } switch event.Type { case wsrelay.MessageTypeStreamStart: if !metadataLogged && event.Status > 0 { - recordAPIResponseMetadata(ctx, e.cfg, event.Status, event.Headers.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, event.Status, event.Headers.Clone()) metadataLogged = true } case wsrelay.MessageTypeStreamChunk: if len(event.Payload) > 0 { - appendAPIResponseChunk(ctx, e.cfg, event.Payload) - filtered := FilterSSEUsageMetadata(event.Payload) - if detail, ok := parseGeminiStreamUsage(filtered); ok { - reporter.publish(ctx, detail) + helps.AppendAPIResponseChunk(ctx, e.cfg, event.Payload) + filtered := helps.FilterSSEUsageMetadata(event.Payload) + if detail, ok := helps.ParseGeminiStreamUsage(filtered); ok { + reporter.Publish(ctx, detail) } lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, opts.OriginalRequest, translatedReq, filtered, ¶m) for i := range lines { @@ -288,21 +289,21 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth return false case wsrelay.MessageTypeHTTPResp: if !metadataLogged && event.Status > 0 { - recordAPIResponseMetadata(ctx, e.cfg, event.Status, event.Headers.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, event.Status, event.Headers.Clone()) metadataLogged = true } if len(event.Payload) > 0 { - appendAPIResponseChunk(ctx, e.cfg, event.Payload) + helps.AppendAPIResponseChunk(ctx, e.cfg, event.Payload) } lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, opts.OriginalRequest, translatedReq, event.Payload, ¶m) for i := range lines { out <- cliproxyexecutor.StreamChunk{Payload: ensureColonSpacedJSON(lines[i])} } - reporter.publish(ctx, parseGeminiUsage(event.Payload)) + reporter.Publish(ctx, helps.ParseGeminiUsage(event.Payload)) return false case wsrelay.MessageTypeError: - recordAPIResponseError(ctx, e.cfg, event.Err) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, event.Err) + reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: fmt.Errorf("wsrelay: %v", event.Err)} return false } @@ -345,7 +346,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: endpoint, Method: http.MethodPost, Headers: wsReq.Headers.Clone(), @@ -358,12 +359,12 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A }) resp, err := e.relay.NonStream(ctx, authID, wsReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return cliproxyexecutor.Response{}, err } - recordAPIResponseMetadata(ctx, e.cfg, resp.Status, resp.Headers.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, resp.Status, resp.Headers.Clone()) if len(resp.Body) > 0 { - appendAPIResponseChunk(ctx, e.cfg, resp.Body) + helps.AppendAPIResponseChunk(ctx, e.cfg, resp.Body) } if resp.Status < 200 || resp.Status >= 300 { return cliproxyexecutor.Response{}, statusErr{code: resp.Status, msg: string(resp.Body)} @@ -404,8 +405,8 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c return nil, translatedPayload{}, err } payload = fixGeminiImageAspectRatio(baseModel, payload) - requestedModel := payloadRequestedModel(opts, req.Model) - payload = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", payload, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + payload = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", payload, originalTranslated, requestedModel) payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens") payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType") payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema") diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 6ee972a7..d72dc035 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -24,6 +24,7 @@ import ( "github.com/google/uuid" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" sdkAuth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth" @@ -142,7 +143,7 @@ func initAntigravityTransport() { func newAntigravityHTTPClient(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, timeout time.Duration) *http.Client { antigravityTransportOnce.Do(initAntigravityTransport) - client := newProxyAwareHTTPClient(ctx, cfg, auth, timeout) + client := helps.NewProxyAwareHTTPClient(ctx, cfg, auth, timeout) // If no transport is set, use the shared HTTP/1.1 transport. if client.Transport == nil { client.Transport = antigravityTransport @@ -405,12 +406,12 @@ func (e *AntigravityExecutor) attemptCreditsFallback( httpReq, errReq := e.buildRequest(ctx, auth, token, modelName, creditsPayload, stream, alt, baseURL) if errReq != nil { - recordAPIResponseError(ctx, e.cfg, errReq) + helps.RecordAPIResponseError(ctx, e.cfg, errReq) return nil, true } httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { - recordAPIResponseError(ctx, e.cfg, errDo) + helps.RecordAPIResponseError(ctx, e.cfg, errDo) return nil, true } if httpResp.StatusCode >= http.StatusOK && httpResp.StatusCode < http.StatusMultipleChoices { @@ -420,16 +421,16 @@ func (e *AntigravityExecutor) attemptCreditsFallback( return httpResp, true } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) bodyBytes, errRead := io.ReadAll(httpResp.Body) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("antigravity executor: close credits fallback response body error: %v", errClose) } if errRead != nil { - recordAPIResponseError(ctx, e.cfg, errRead) + helps.RecordAPIResponseError(ctx, e.cfg, errRead) return nil, true } - appendAPIResponseChunk(ctx, e.cfg, bodyBytes) + helps.AppendAPIResponseChunk(ctx, e.cfg, bodyBytes) if shouldMarkAntigravityCreditsExhausted(httpResp.StatusCode, bodyBytes, nil) { clearAntigravityPreferCredits(auth, modelName) markAntigravityCreditsExhausted(auth, now) @@ -457,8 +458,8 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au auth = updatedAuth } - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("antigravity") @@ -476,8 +477,8 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au return resp, err } - requestedModel := payloadRequestedModel(opts, req.Model) - translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + translated = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated, requestedModel) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newAntigravityHTTPClient(ctx, e.cfg, auth, 0) @@ -507,7 +508,7 @@ attemptLoop: httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { - recordAPIResponseError(ctx, e.cfg, errDo) + helps.RecordAPIResponseError(ctx, e.cfg, errDo) if errors.Is(errDo, context.Canceled) || errors.Is(errDo, context.DeadlineExceeded) { return resp, errDo } @@ -522,17 +523,17 @@ attemptLoop: return resp, err } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) bodyBytes, errRead := io.ReadAll(httpResp.Body) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("antigravity executor: close response body error: %v", errClose) } if errRead != nil { - recordAPIResponseError(ctx, e.cfg, errRead) + helps.RecordAPIResponseError(ctx, e.cfg, errRead) err = errRead return resp, err } - appendAPIResponseChunk(ctx, e.cfg, bodyBytes) + helps.AppendAPIResponseChunk(ctx, e.cfg, bodyBytes) if httpResp.StatusCode == http.StatusTooManyRequests { if usedCreditsDirect { @@ -543,29 +544,29 @@ attemptLoop: } else { creditsResp, _ := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, false, opts.Alt, baseURL, bodyBytes) if creditsResp != nil { - recordAPIResponseMetadata(ctx, e.cfg, creditsResp.StatusCode, creditsResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, creditsResp.StatusCode, creditsResp.Header.Clone()) creditsBody, errCreditsRead := io.ReadAll(creditsResp.Body) if errClose := creditsResp.Body.Close(); errClose != nil { log.Errorf("antigravity executor: close credits success response body error: %v", errClose) } if errCreditsRead != nil { - recordAPIResponseError(ctx, e.cfg, errCreditsRead) + helps.RecordAPIResponseError(ctx, e.cfg, errCreditsRead) err = errCreditsRead return resp, err } - appendAPIResponseChunk(ctx, e.cfg, creditsBody) - reporter.publish(ctx, parseAntigravityUsage(creditsBody)) + helps.AppendAPIResponseChunk(ctx, e.cfg, creditsBody) + reporter.Publish(ctx, helps.ParseAntigravityUsage(creditsBody)) var param any converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, creditsBody, ¶m) resp = cliproxyexecutor.Response{Payload: converted, Headers: creditsResp.Header.Clone()} - reporter.ensurePublished(ctx) + reporter.EnsurePublished(ctx) return resp, nil } } } if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices { - log.Debugf("antigravity executor: upstream error status: %d, body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), bodyBytes)) + log.Debugf("antigravity executor: upstream error status: %d, body: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), bodyBytes)) lastStatus = httpResp.StatusCode lastBody = append([]byte(nil), bodyBytes...) lastErr = nil @@ -591,11 +592,11 @@ attemptLoop: return resp, err } - reporter.publish(ctx, parseAntigravityUsage(bodyBytes)) + reporter.Publish(ctx, helps.ParseAntigravityUsage(bodyBytes)) var param any converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, bodyBytes, ¶m) resp = cliproxyexecutor.Response{Payload: converted, Headers: httpResp.Header.Clone()} - reporter.ensurePublished(ctx) + reporter.EnsurePublished(ctx) return resp, nil } @@ -625,8 +626,8 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth * auth = updatedAuth } - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("antigravity") @@ -644,8 +645,8 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth * return resp, err } - requestedModel := payloadRequestedModel(opts, req.Model) - translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + translated = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated, requestedModel) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newAntigravityHTTPClient(ctx, e.cfg, auth, 0) @@ -675,7 +676,7 @@ attemptLoop: httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { - recordAPIResponseError(ctx, e.cfg, errDo) + helps.RecordAPIResponseError(ctx, e.cfg, errDo) if errors.Is(errDo, context.Canceled) || errors.Is(errDo, context.DeadlineExceeded) { return resp, errDo } @@ -689,14 +690,14 @@ attemptLoop: err = errDo return resp, err } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices { bodyBytes, errRead := io.ReadAll(httpResp.Body) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("antigravity executor: close response body error: %v", errClose) } if errRead != nil { - recordAPIResponseError(ctx, e.cfg, errRead) + helps.RecordAPIResponseError(ctx, e.cfg, errRead) if errors.Is(errRead, context.Canceled) || errors.Is(errRead, context.DeadlineExceeded) { err = errRead return resp, err @@ -715,7 +716,7 @@ attemptLoop: err = errRead return resp, err } - appendAPIResponseChunk(ctx, e.cfg, bodyBytes) + helps.AppendAPIResponseChunk(ctx, e.cfg, bodyBytes) if httpResp.StatusCode == http.StatusTooManyRequests { if usedCreditsDirect { if shouldMarkAntigravityCreditsExhausted(httpResp.StatusCode, bodyBytes, nil) { @@ -726,7 +727,7 @@ attemptLoop: creditsResp, _ := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, true, opts.Alt, baseURL, bodyBytes) if creditsResp != nil { httpResp = creditsResp - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) } } } @@ -771,29 +772,29 @@ attemptLoop: scanner.Buffer(nil, streamScannerBuffer) for scanner.Scan() { line := scanner.Bytes() - appendAPIResponseChunk(ctx, e.cfg, line) + helps.AppendAPIResponseChunk(ctx, e.cfg, line) // Filter usage metadata for all models // Only retain usage statistics in the terminal chunk - line = FilterSSEUsageMetadata(line) + line = helps.FilterSSEUsageMetadata(line) - payload := jsonPayload(line) + payload := helps.JSONPayload(line) if payload == nil { continue } - if detail, ok := parseAntigravityStreamUsage(payload); ok { - reporter.publish(ctx, detail) + if detail, ok := helps.ParseAntigravityStreamUsage(payload); ok { + reporter.Publish(ctx, detail) } out <- cliproxyexecutor.StreamChunk{Payload: payload} } if errScan := scanner.Err(); errScan != nil { - recordAPIResponseError(ctx, e.cfg, errScan) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, errScan) + reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: errScan} } else { - reporter.ensurePublished(ctx) + reporter.EnsurePublished(ctx) } }(httpResp) @@ -809,11 +810,11 @@ attemptLoop: } resp = cliproxyexecutor.Response{Payload: e.convertStreamToNonStream(buffer.Bytes())} - reporter.publish(ctx, parseAntigravityUsage(resp.Payload)) + reporter.Publish(ctx, helps.ParseAntigravityUsage(resp.Payload)) var param any converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, resp.Payload, ¶m) resp = cliproxyexecutor.Response{Payload: converted, Headers: httpResp.Header.Clone()} - reporter.ensurePublished(ctx) + reporter.EnsurePublished(ctx) return resp, nil } @@ -1042,8 +1043,8 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya auth = updatedAuth } - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("antigravity") @@ -1061,8 +1062,8 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya return nil, err } - requestedModel := payloadRequestedModel(opts, req.Model) - translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + translated = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated, requestedModel) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newAntigravityHTTPClient(ctx, e.cfg, auth, 0) @@ -1091,7 +1092,7 @@ attemptLoop: } httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { - recordAPIResponseError(ctx, e.cfg, errDo) + helps.RecordAPIResponseError(ctx, e.cfg, errDo) if errors.Is(errDo, context.Canceled) || errors.Is(errDo, context.DeadlineExceeded) { return nil, errDo } @@ -1105,14 +1106,14 @@ attemptLoop: err = errDo return nil, err } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices { bodyBytes, errRead := io.ReadAll(httpResp.Body) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("antigravity executor: close response body error: %v", errClose) } if errRead != nil { - recordAPIResponseError(ctx, e.cfg, errRead) + helps.RecordAPIResponseError(ctx, e.cfg, errRead) if errors.Is(errRead, context.Canceled) || errors.Is(errRead, context.DeadlineExceeded) { err = errRead return nil, err @@ -1131,7 +1132,7 @@ attemptLoop: err = errRead return nil, err } - appendAPIResponseChunk(ctx, e.cfg, bodyBytes) + helps.AppendAPIResponseChunk(ctx, e.cfg, bodyBytes) if httpResp.StatusCode == http.StatusTooManyRequests { if usedCreditsDirect { if shouldMarkAntigravityCreditsExhausted(httpResp.StatusCode, bodyBytes, nil) { @@ -1142,7 +1143,7 @@ attemptLoop: creditsResp, _ := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, true, opts.Alt, baseURL, bodyBytes) if creditsResp != nil { httpResp = creditsResp - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) } } } @@ -1188,19 +1189,19 @@ attemptLoop: var param any for scanner.Scan() { line := scanner.Bytes() - appendAPIResponseChunk(ctx, e.cfg, line) + helps.AppendAPIResponseChunk(ctx, e.cfg, line) // Filter usage metadata for all models // Only retain usage statistics in the terminal chunk - line = FilterSSEUsageMetadata(line) + line = helps.FilterSSEUsageMetadata(line) - payload := jsonPayload(line) + payload := helps.JSONPayload(line) if payload == nil { continue } - if detail, ok := parseAntigravityStreamUsage(payload); ok { - reporter.publish(ctx, detail) + if detail, ok := helps.ParseAntigravityStreamUsage(payload); ok { + reporter.Publish(ctx, detail) } chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, bytes.Clone(payload), ¶m) @@ -1213,11 +1214,11 @@ attemptLoop: out <- cliproxyexecutor.StreamChunk{Payload: tail[i]} } if errScan := scanner.Err(); errScan != nil { - recordAPIResponseError(ctx, e.cfg, errScan) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, errScan) + reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: errScan} } else { - reporter.ensurePublished(ctx) + reporter.EnsurePublished(ctx) } }(httpResp) return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil @@ -1320,7 +1321,7 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut httpReq.Host = host } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: requestURL.String(), Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -1334,7 +1335,7 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { - recordAPIResponseError(ctx, e.cfg, errDo) + helps.RecordAPIResponseError(ctx, e.cfg, errDo) if errors.Is(errDo, context.Canceled) || errors.Is(errDo, context.DeadlineExceeded) { return cliproxyexecutor.Response{}, errDo } @@ -1348,16 +1349,16 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut return cliproxyexecutor.Response{}, errDo } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) bodyBytes, errRead := io.ReadAll(httpResp.Body) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("antigravity executor: close response body error: %v", errClose) } if errRead != nil { - recordAPIResponseError(ctx, e.cfg, errRead) + helps.RecordAPIResponseError(ctx, e.cfg, errRead) return cliproxyexecutor.Response{}, errRead } - appendAPIResponseChunk(ctx, e.cfg, bodyBytes) + helps.AppendAPIResponseChunk(ctx, e.cfg, bodyBytes) if httpResp.StatusCode >= http.StatusOK && httpResp.StatusCode < http.StatusMultipleChoices { count := gjson.GetBytes(bodyBytes, "totalTokens").Int() @@ -1624,7 +1625,7 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau if e.cfg != nil && e.cfg.RequestLog { payloadLog = []byte(payloadStr) } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: requestURL.String(), Method: http.MethodPost, Headers: httpReq.Header.Clone(), diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index cc88dd77..c417bc33 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -23,6 +23,7 @@ import ( "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" @@ -91,7 +92,7 @@ func (e *ClaudeExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut if err := e.PrepareRequest(httpReq, auth); err != nil { return nil, err } - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) return httpClient.Do(httpReq) } @@ -106,8 +107,8 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r baseURL = "https://api.anthropic.com" } - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("claude") // Use streaming translation to preserve function calling, except for claude. @@ -130,8 +131,8 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r // based on client type and configuration. body = applyCloaking(ctx, e.cfg, auth, body, baseModel, apiKey) - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body = ensureModelMaxTokens(body, baseModel) // Disable thinking if tool_choice forces tool use (Anthropic API constraint) @@ -172,7 +173,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -184,33 +185,33 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, err := httpClient.Do(httpReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { // Decompress error responses — pass the Content-Encoding value (may be empty) // and let decodeResponseBody handle both header-declared and magic-byte-detected // compression. This keeps error-path behaviour consistent with the success path. errBody, decErr := decodeResponseBody(httpResp.Body, httpResp.Header.Get("Content-Encoding")) if decErr != nil { - recordAPIResponseError(ctx, e.cfg, decErr) + helps.RecordAPIResponseError(ctx, e.cfg, decErr) msg := fmt.Sprintf("failed to decode error response body: %v", decErr) - logWithRequestID(ctx).Warn(msg) + helps.LogWithRequestID(ctx).Warn(msg) return resp, statusErr{code: httpResp.StatusCode, msg: msg} } b, readErr := io.ReadAll(errBody) if readErr != nil { - recordAPIResponseError(ctx, e.cfg, readErr) + helps.RecordAPIResponseError(ctx, e.cfg, readErr) msg := fmt.Sprintf("failed to read error response body: %v", readErr) - logWithRequestID(ctx).Warn(msg) + helps.LogWithRequestID(ctx).Warn(msg) b = []byte(msg) } - appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) err = statusErr{code: httpResp.StatusCode, msg: string(b)} if errClose := errBody.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) @@ -219,7 +220,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r } decodedBody, err := decodeResponseBody(httpResp.Body, httpResp.Header.Get("Content-Encoding")) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } @@ -232,19 +233,19 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r }() data, err := io.ReadAll(decodedBody) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } - appendAPIResponseChunk(ctx, e.cfg, data) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) if stream { lines := bytes.Split(data, []byte("\n")) for _, line := range lines { - if detail, ok := parseClaudeStreamUsage(line); ok { - reporter.publish(ctx, detail) + if detail, ok := helps.ParseClaudeStreamUsage(line); ok { + reporter.Publish(ctx, detail) } } } else { - reporter.publish(ctx, parseClaudeUsage(data)) + reporter.Publish(ctx, helps.ParseClaudeUsage(data)) } if isClaudeOAuthToken(apiKey) && !auth.ToolPrefixDisabled() { data = stripClaudeToolPrefixFromResponse(data, claudeToolPrefix) @@ -275,8 +276,8 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A baseURL = "https://api.anthropic.com" } - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("claude") originalPayloadSource := req.Payload @@ -297,8 +298,8 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A // based on client type and configuration. body = applyCloaking(ctx, e.cfg, auth, body, baseModel, apiKey) - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body = ensureModelMaxTokens(body, baseModel) // Disable thinking if tool_choice forces tool use (Anthropic API constraint) @@ -336,7 +337,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -348,33 +349,33 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, err := httpClient.Do(httpReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return nil, err } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { // Decompress error responses — pass the Content-Encoding value (may be empty) // and let decodeResponseBody handle both header-declared and magic-byte-detected // compression. This keeps error-path behaviour consistent with the success path. errBody, decErr := decodeResponseBody(httpResp.Body, httpResp.Header.Get("Content-Encoding")) if decErr != nil { - recordAPIResponseError(ctx, e.cfg, decErr) + helps.RecordAPIResponseError(ctx, e.cfg, decErr) msg := fmt.Sprintf("failed to decode error response body: %v", decErr) - logWithRequestID(ctx).Warn(msg) + helps.LogWithRequestID(ctx).Warn(msg) return nil, statusErr{code: httpResp.StatusCode, msg: msg} } b, readErr := io.ReadAll(errBody) if readErr != nil { - recordAPIResponseError(ctx, e.cfg, readErr) + helps.RecordAPIResponseError(ctx, e.cfg, readErr) msg := fmt.Sprintf("failed to read error response body: %v", readErr) - logWithRequestID(ctx).Warn(msg) + helps.LogWithRequestID(ctx).Warn(msg) b = []byte(msg) } - appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) if errClose := errBody.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } @@ -383,7 +384,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A } decodedBody, err := decodeResponseBody(httpResp.Body, httpResp.Header.Get("Content-Encoding")) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } @@ -404,9 +405,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A scanner.Buffer(nil, 52_428_800) // 50MB for scanner.Scan() { line := scanner.Bytes() - appendAPIResponseChunk(ctx, e.cfg, line) - if detail, ok := parseClaudeStreamUsage(line); ok { - reporter.publish(ctx, detail) + helps.AppendAPIResponseChunk(ctx, e.cfg, line) + if detail, ok := helps.ParseClaudeStreamUsage(line); ok { + reporter.Publish(ctx, detail) } if isClaudeOAuthToken(apiKey) && !auth.ToolPrefixDisabled() { line = stripClaudeToolPrefixFromStreamLine(line, claudeToolPrefix) @@ -418,8 +419,8 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A out <- cliproxyexecutor.StreamChunk{Payload: cloned} } if errScan := scanner.Err(); errScan != nil { - recordAPIResponseError(ctx, e.cfg, errScan) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, errScan) + reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: errScan} } return @@ -431,9 +432,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A var param any for scanner.Scan() { line := scanner.Bytes() - appendAPIResponseChunk(ctx, e.cfg, line) - if detail, ok := parseClaudeStreamUsage(line); ok { - reporter.publish(ctx, detail) + helps.AppendAPIResponseChunk(ctx, e.cfg, line) + if detail, ok := helps.ParseClaudeStreamUsage(line); ok { + reporter.Publish(ctx, detail) } if isClaudeOAuthToken(apiKey) && !auth.ToolPrefixDisabled() { line = stripClaudeToolPrefixFromStreamLine(line, claudeToolPrefix) @@ -453,8 +454,8 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A } } if errScan := scanner.Err(); errScan != nil { - recordAPIResponseError(ctx, e.cfg, errScan) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, errScan) + reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: errScan} } }() @@ -503,7 +504,7 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -515,32 +516,32 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) resp, err := httpClient.Do(httpReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return cliproxyexecutor.Response{}, err } - recordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone()) if resp.StatusCode < 200 || resp.StatusCode >= 300 { // Decompress error responses — pass the Content-Encoding value (may be empty) // and let decodeResponseBody handle both header-declared and magic-byte-detected // compression. This keeps error-path behaviour consistent with the success path. errBody, decErr := decodeResponseBody(resp.Body, resp.Header.Get("Content-Encoding")) if decErr != nil { - recordAPIResponseError(ctx, e.cfg, decErr) + helps.RecordAPIResponseError(ctx, e.cfg, decErr) msg := fmt.Sprintf("failed to decode error response body: %v", decErr) - logWithRequestID(ctx).Warn(msg) + helps.LogWithRequestID(ctx).Warn(msg) return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: msg} } b, readErr := io.ReadAll(errBody) if readErr != nil { - recordAPIResponseError(ctx, e.cfg, readErr) + helps.RecordAPIResponseError(ctx, e.cfg, readErr) msg := fmt.Sprintf("failed to read error response body: %v", readErr) - logWithRequestID(ctx).Warn(msg) + helps.LogWithRequestID(ctx).Warn(msg) b = []byte(msg) } - appendAPIResponseChunk(ctx, e.cfg, b) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) if errClose := errBody.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } @@ -548,7 +549,7 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut } decodedBody, err := decodeResponseBody(resp.Body, resp.Header.Get("Content-Encoding")) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) if errClose := resp.Body.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } @@ -561,10 +562,10 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut }() data, err := io.ReadAll(decodedBody) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return cliproxyexecutor.Response{}, err } - appendAPIResponseChunk(ctx, e.cfg, data) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) count := gjson.GetBytes(data, "input_tokens").Int() out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data) return cliproxyexecutor.Response{Payload: out, Headers: resp.Header.Clone()}, nil @@ -800,10 +801,10 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string, if ginCtx, ok := r.Context().Value("gin").(*gin.Context); ok && ginCtx != nil && ginCtx.Request != nil { ginHeaders = ginCtx.Request.Header } - stabilizeDeviceProfile := claudeDeviceProfileStabilizationEnabled(cfg) - var deviceProfile claudeDeviceProfile + stabilizeDeviceProfile := helps.ClaudeDeviceProfileStabilizationEnabled(cfg) + var deviceProfile helps.ClaudeDeviceProfile if stabilizeDeviceProfile { - deviceProfile = resolveClaudeDeviceProfile(auth, apiKey, ginHeaders, cfg) + deviceProfile = helps.ResolveClaudeDeviceProfile(auth, apiKey, ginHeaders, cfg) } baseBetas := "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05" @@ -871,9 +872,9 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string, } util.ApplyCustomHeadersFromAttrs(r, attrs) if stabilizeDeviceProfile { - applyClaudeDeviceProfileHeaders(r, deviceProfile) + helps.ApplyClaudeDeviceProfileHeaders(r, deviceProfile) } else { - applyClaudeLegacyDeviceHeaders(r, ginHeaders, cfg) + helps.ApplyClaudeLegacyDeviceHeaders(r, ginHeaders, cfg) } // Re-enforce Accept-Encoding: identity after ApplyCustomHeadersFromAttrs, which // may override it with a user-configured value. Compressed SSE breaks the line @@ -1044,7 +1045,7 @@ func stripClaudeToolPrefixFromStreamLine(line []byte, prefix string) []byte { if prefix == "" { return line } - payload := jsonPayload(line) + payload := helps.JSONPayload(line) if len(payload) == 0 || !gjson.ValidBytes(payload) { return line } @@ -1156,9 +1157,9 @@ func resolveClaudeKeyCloakConfig(cfg *config.Config, auth *cliproxyauth.Auth) *c func injectFakeUserID(payload []byte, apiKey string, useCache bool) []byte { generateID := func() string { if useCache { - return cachedUserID(apiKey) + return helps.CachedUserID(apiKey) } - return generateFakeUserID() + return helps.GenerateFakeUserID() } metadata := gjson.GetBytes(payload, "metadata") @@ -1168,7 +1169,7 @@ func injectFakeUserID(payload []byte, apiKey string, useCache bool) []byte { } existingUserID := gjson.GetBytes(payload, "metadata.user_id").String() - if existingUserID == "" || !isValidUserID(existingUserID) { + if existingUserID == "" || !helps.IsValidUserID(existingUserID) { payload, _ = sjson.SetBytes(payload, "metadata.user_id", generateID()) } return payload @@ -1292,7 +1293,7 @@ func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.A } // Determine if cloaking should be applied - if !shouldCloak(cloakMode, clientUserAgent) { + if !helps.ShouldCloak(cloakMode, clientUserAgent) { return payload } @@ -1306,8 +1307,8 @@ func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.A // Apply sensitive word obfuscation if len(sensitiveWords) > 0 { - matcher := buildSensitiveWordMatcher(sensitiveWords) - payload = obfuscateSensitiveWords(payload, matcher) + matcher := helps.BuildSensitiveWordMatcher(sensitiveWords) + payload = helps.ObfuscateSensitiveWords(payload, matcher) } return payload diff --git a/internal/runtime/executor/claude_executor_test.go b/internal/runtime/executor/claude_executor_test.go index ee8e9025..b6acdda4 100644 --- a/internal/runtime/executor/claude_executor_test.go +++ b/internal/runtime/executor/claude_executor_test.go @@ -16,6 +16,7 @@ import ( "github.com/klauspost/compress/zstd" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" @@ -24,9 +25,7 @@ import ( ) func resetClaudeDeviceProfileCache() { - claudeDeviceProfileCacheMu.Lock() - claudeDeviceProfileCache = make(map[string]claudeDeviceProfileCacheEntry) - claudeDeviceProfileCacheMu.Unlock() + helps.ResetClaudeDeviceProfileCache() } func newClaudeHeaderTestRequest(t *testing.T, incoming http.Header) *http.Request { @@ -339,7 +338,7 @@ func TestResolveClaudeDeviceProfile_RechecksCacheBeforeStoringCandidate(t *testi var pauseOnce sync.Once var releaseOnce sync.Once - claudeDeviceProfileBeforeCandidateStore = func(candidate claudeDeviceProfile) { + helps.ClaudeDeviceProfileBeforeCandidateStore = func(candidate helps.ClaudeDeviceProfile) { if candidate.UserAgent != "claude-cli/2.1.62 (external, cli)" { return } @@ -347,13 +346,13 @@ func TestResolveClaudeDeviceProfile_RechecksCacheBeforeStoringCandidate(t *testi <-releaseLow } t.Cleanup(func() { - claudeDeviceProfileBeforeCandidateStore = nil + helps.ClaudeDeviceProfileBeforeCandidateStore = nil releaseOnce.Do(func() { close(releaseLow) }) }) - lowResultCh := make(chan claudeDeviceProfile, 1) + lowResultCh := make(chan helps.ClaudeDeviceProfile, 1) go func() { - lowResultCh <- resolveClaudeDeviceProfile(auth, "key-racy-upgrade", http.Header{ + lowResultCh <- helps.ResolveClaudeDeviceProfile(auth, "key-racy-upgrade", http.Header{ "User-Agent": []string{"claude-cli/2.1.62 (external, cli)"}, "X-Stainless-Package-Version": []string{"0.74.0"}, "X-Stainless-Runtime-Version": []string{"v24.3.0"}, @@ -368,7 +367,7 @@ func TestResolveClaudeDeviceProfile_RechecksCacheBeforeStoringCandidate(t *testi t.Fatal("timed out waiting for lower candidate to pause before storing") } - highResult := resolveClaudeDeviceProfile(auth, "key-racy-upgrade", http.Header{ + highResult := helps.ResolveClaudeDeviceProfile(auth, "key-racy-upgrade", http.Header{ "User-Agent": []string{"claude-cli/2.1.63 (external, cli)"}, "X-Stainless-Package-Version": []string{"0.75.0"}, "X-Stainless-Runtime-Version": []string{"v24.4.0"}, @@ -399,7 +398,7 @@ func TestResolveClaudeDeviceProfile_RechecksCacheBeforeStoringCandidate(t *testi t.Fatalf("highResult platform = %s/%s, want %s/%s", highResult.OS, highResult.Arch, "MacOS", "arm64") } - cached := resolveClaudeDeviceProfile(auth, "key-racy-upgrade", http.Header{ + cached := helps.ResolveClaudeDeviceProfile(auth, "key-racy-upgrade", http.Header{ "User-Agent": []string{"curl/8.7.1"}, }, cfg) if cached.UserAgent != "claude-cli/2.1.63 (external, cli)" { diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 9eafb6be..d404302a 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -13,6 +13,7 @@ import ( codexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codex" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" + "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" @@ -73,7 +74,7 @@ func (e *CodexExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth if err := e.PrepareRequest(httpReq, auth); err != nil { return nil, err } - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) return httpClient.Do(httpReq) } @@ -88,8 +89,8 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re baseURL = "https://chatgpt.com/backend-api/codex" } - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("codex") @@ -106,8 +107,8 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re return resp, err } - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body, _ = sjson.SetBytes(body, "model", baseModel) body, _ = sjson.SetBytes(body, "stream", true) body, _ = sjson.DeleteBytes(body, "previous_response_id") @@ -128,7 +129,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -139,10 +140,10 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re AuthType: authType, AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, err := httpClient.Do(httpReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } defer func() { @@ -150,20 +151,20 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re log.Errorf("codex executor: close response body error: %v", errClose) } }() - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) err = newCodexStatusErr(httpResp.StatusCode, b) return resp, err } data, err := io.ReadAll(httpResp.Body) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } - appendAPIResponseChunk(ctx, e.cfg, data) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) lines := bytes.Split(data, []byte("\n")) for _, line := range lines { @@ -176,8 +177,8 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re continue } - if detail, ok := parseCodexUsage(line); ok { - reporter.publish(ctx, detail) + if detail, ok := helps.ParseCodexUsage(line); ok { + reporter.Publish(ctx, detail) } var param any @@ -197,8 +198,8 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A baseURL = "https://chatgpt.com/backend-api/codex" } - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("openai-response") @@ -215,8 +216,8 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A return resp, err } - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body, _ = sjson.SetBytes(body, "model", baseModel) body, _ = sjson.DeleteBytes(body, "stream") body = normalizeCodexInstructions(body) @@ -233,7 +234,7 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -244,10 +245,10 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A AuthType: authType, AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, err := httpClient.Do(httpReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } defer func() { @@ -255,22 +256,22 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A log.Errorf("codex executor: close response body error: %v", errClose) } }() - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) err = newCodexStatusErr(httpResp.StatusCode, b) return resp, err } data, err := io.ReadAll(httpResp.Body) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } - appendAPIResponseChunk(ctx, e.cfg, data) - reporter.publish(ctx, parseOpenAIUsage(data)) - reporter.ensurePublished(ctx) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) + reporter.Publish(ctx, helps.ParseOpenAIUsage(data)) + reporter.EnsurePublished(ctx) var param any out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, originalPayload, body, data, ¶m) resp = cliproxyexecutor.Response{Payload: out, Headers: httpResp.Header.Clone()} @@ -288,8 +289,8 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au baseURL = "https://chatgpt.com/backend-api/codex" } - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("codex") @@ -306,8 +307,8 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au return nil, err } - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.DeleteBytes(body, "prompt_cache_retention") body, _ = sjson.DeleteBytes(body, "safety_identifier") @@ -327,7 +328,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -339,24 +340,24 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, err := httpClient.Do(httpReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return nil, err } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { data, readErr := io.ReadAll(httpResp.Body) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("codex executor: close response body error: %v", errClose) } if readErr != nil { - recordAPIResponseError(ctx, e.cfg, readErr) + helps.RecordAPIResponseError(ctx, e.cfg, readErr) return nil, readErr } - appendAPIResponseChunk(ctx, e.cfg, data) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data)) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), data)) err = newCodexStatusErr(httpResp.StatusCode, data) return nil, err } @@ -373,13 +374,13 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au var param any for scanner.Scan() { line := scanner.Bytes() - appendAPIResponseChunk(ctx, e.cfg, line) + helps.AppendAPIResponseChunk(ctx, e.cfg, line) if bytes.HasPrefix(line, dataTag) { data := bytes.TrimSpace(line[5:]) if gjson.GetBytes(data, "type").String() == "response.completed" { - if detail, ok := parseCodexUsage(data); ok { - reporter.publish(ctx, detail) + if detail, ok := helps.ParseCodexUsage(data); ok { + reporter.Publish(ctx, detail) } } } @@ -390,8 +391,8 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au } } if errScan := scanner.Err(); errScan != nil { - recordAPIResponseError(ctx, e.cfg, errScan) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, errScan) + reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: errScan} } }() @@ -595,18 +596,18 @@ func (e *CodexExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (* } func (e *CodexExecutor) cacheHelper(ctx context.Context, from sdktranslator.Format, url string, req cliproxyexecutor.Request, rawJSON []byte) (*http.Request, error) { - var cache codexCache + var cache helps.CodexCache if from == "claude" { userIDResult := gjson.GetBytes(req.Payload, "metadata.user_id") if userIDResult.Exists() { key := fmt.Sprintf("%s-%s", req.Model, userIDResult.String()) var ok bool - if cache, ok = getCodexCache(key); !ok { - cache = codexCache{ + if cache, ok = helps.GetCodexCache(key); !ok { + cache = helps.CodexCache{ ID: uuid.New().String(), Expire: time.Now().Add(1 * time.Hour), } - setCodexCache(key, cache) + helps.SetCodexCache(key, cache) } } } else if from == "openai-response" { @@ -615,7 +616,7 @@ func (e *CodexExecutor) cacheHelper(ctx context.Context, from sdktranslator.Form cache.ID = promptCacheKey.String() } } else if from == "openai" { - if apiKey := strings.TrimSpace(apiKeyFromContext(ctx)); apiKey != "" { + if apiKey := strings.TrimSpace(helps.APIKeyFromContext(ctx)); apiKey != "" { cache.ID = uuid.NewSHA1(uuid.NameSpaceOID, []byte("cli-proxy-api:codex:prompt-cache:"+apiKey)).String() } } diff --git a/internal/runtime/executor/codex_websockets_executor.go b/internal/runtime/executor/codex_websockets_executor.go index fca82fe7..fdfccd9a 100644 --- a/internal/runtime/executor/codex_websockets_executor.go +++ b/internal/runtime/executor/codex_websockets_executor.go @@ -15,10 +15,12 @@ import ( "sync" "time" + "github.com/gin-gonic/gin" "github.com/google/uuid" "github.com/gorilla/websocket" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" + "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" @@ -155,8 +157,8 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut baseURL = "https://chatgpt.com/backend-api/codex" } - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("codex") @@ -173,8 +175,8 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut return resp, err } - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body, _ = sjson.SetBytes(body, "model", baseModel) body, _ = sjson.SetBytes(body, "stream", true) body, _ = sjson.DeleteBytes(body, "previous_response_id") @@ -209,7 +211,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut } wsReqBody := buildCodexWebsocketRequestBody(body) - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: wsURL, Method: "WEBSOCKET", Headers: wsHeaders.Clone(), @@ -223,12 +225,12 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut conn, respHS, errDial := e.ensureUpstreamConn(ctx, auth, sess, authID, wsURL, wsHeaders) if respHS != nil { - recordAPIResponseMetadata(ctx, e.cfg, respHS.StatusCode, respHS.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, respHS.StatusCode, respHS.Header.Clone()) } if errDial != nil { bodyErr := websocketHandshakeBody(respHS) if len(bodyErr) > 0 { - appendAPIResponseChunk(ctx, e.cfg, bodyErr) + helps.AppendAPIResponseChunk(ctx, e.cfg, bodyErr) } if respHS != nil && respHS.StatusCode == http.StatusUpgradeRequired { return e.CodexExecutor.Execute(ctx, auth, req, opts) @@ -236,7 +238,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut if respHS != nil && respHS.StatusCode > 0 { return resp, statusErr{code: respHS.StatusCode, msg: string(bodyErr)} } - recordAPIResponseError(ctx, e.cfg, errDial) + helps.RecordAPIResponseError(ctx, e.cfg, errDial) return resp, errDial } closeHTTPResponseBody(respHS, "codex websockets executor: close handshake response body error") @@ -271,7 +273,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut connRetry, _, errDialRetry := e.ensureUpstreamConn(ctx, auth, sess, authID, wsURL, wsHeaders) if errDialRetry == nil && connRetry != nil { wsReqBodyRetry := buildCodexWebsocketRequestBody(body) - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: wsURL, Method: "WEBSOCKET", Headers: wsHeaders.Clone(), @@ -287,15 +289,15 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut wsReqBody = wsReqBodyRetry } else { e.invalidateUpstreamConn(sess, connRetry, "send_error", errSendRetry) - recordAPIResponseError(ctx, e.cfg, errSendRetry) + helps.RecordAPIResponseError(ctx, e.cfg, errSendRetry) return resp, errSendRetry } } else { - recordAPIResponseError(ctx, e.cfg, errDialRetry) + helps.RecordAPIResponseError(ctx, e.cfg, errDialRetry) return resp, errDialRetry } } else { - recordAPIResponseError(ctx, e.cfg, errSend) + helps.RecordAPIResponseError(ctx, e.cfg, errSend) return resp, errSend } } @@ -306,7 +308,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut } msgType, payload, errRead := readCodexWebsocketMessage(ctx, sess, conn, readCh) if errRead != nil { - recordAPIResponseError(ctx, e.cfg, errRead) + helps.RecordAPIResponseError(ctx, e.cfg, errRead) return resp, errRead } if msgType != websocket.TextMessage { @@ -315,7 +317,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut if sess != nil { e.invalidateUpstreamConn(sess, conn, "unexpected_binary", err) } - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } continue @@ -325,21 +327,21 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut if len(payload) == 0 { continue } - appendAPIResponseChunk(ctx, e.cfg, payload) + helps.AppendAPIResponseChunk(ctx, e.cfg, payload) if wsErr, ok := parseCodexWebsocketError(payload); ok { if sess != nil { e.invalidateUpstreamConn(sess, conn, "upstream_error", wsErr) } - recordAPIResponseError(ctx, e.cfg, wsErr) + helps.RecordAPIResponseError(ctx, e.cfg, wsErr) return resp, wsErr } payload = normalizeCodexWebsocketCompletion(payload) eventType := gjson.GetBytes(payload, "type").String() if eventType == "response.completed" { - if detail, ok := parseCodexUsage(payload); ok { - reporter.publish(ctx, detail) + if detail, ok := helps.ParseCodexUsage(payload); ok { + reporter.Publish(ctx, detail) } var param any out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, originalPayload, body, payload, ¶m) @@ -364,8 +366,8 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr baseURL = "https://chatgpt.com/backend-api/codex" } - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("codex") @@ -376,8 +378,8 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr return nil, err } - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, body, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, body, requestedModel) httpURL := strings.TrimSuffix(baseURL, "/") + "/responses" wsURL, err := buildCodexResponsesWebsocketURL(httpURL) @@ -403,7 +405,7 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr } wsReqBody := buildCodexWebsocketRequestBody(body) - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: wsURL, Method: "WEBSOCKET", Headers: wsHeaders.Clone(), @@ -419,12 +421,12 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr var upstreamHeaders http.Header if respHS != nil { upstreamHeaders = respHS.Header.Clone() - recordAPIResponseMetadata(ctx, e.cfg, respHS.StatusCode, respHS.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, respHS.StatusCode, respHS.Header.Clone()) } if errDial != nil { bodyErr := websocketHandshakeBody(respHS) if len(bodyErr) > 0 { - appendAPIResponseChunk(ctx, e.cfg, bodyErr) + helps.AppendAPIResponseChunk(ctx, e.cfg, bodyErr) } if respHS != nil && respHS.StatusCode == http.StatusUpgradeRequired { return e.CodexExecutor.ExecuteStream(ctx, auth, req, opts) @@ -432,7 +434,7 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr if respHS != nil && respHS.StatusCode > 0 { return nil, statusErr{code: respHS.StatusCode, msg: string(bodyErr)} } - recordAPIResponseError(ctx, e.cfg, errDial) + helps.RecordAPIResponseError(ctx, e.cfg, errDial) if sess != nil { sess.reqMu.Unlock() } @@ -451,20 +453,20 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr } if errSend := writeCodexWebsocketMessage(sess, conn, wsReqBody); errSend != nil { - recordAPIResponseError(ctx, e.cfg, errSend) + helps.RecordAPIResponseError(ctx, e.cfg, errSend) if sess != nil { e.invalidateUpstreamConn(sess, conn, "send_error", errSend) // Retry once with a new websocket connection for the same execution session. connRetry, _, errDialRetry := e.ensureUpstreamConn(ctx, auth, sess, authID, wsURL, wsHeaders) if errDialRetry != nil || connRetry == nil { - recordAPIResponseError(ctx, e.cfg, errDialRetry) + helps.RecordAPIResponseError(ctx, e.cfg, errDialRetry) sess.clearActive(readCh) sess.reqMu.Unlock() return nil, errDialRetry } wsReqBodyRetry := buildCodexWebsocketRequestBody(body) - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: wsURL, Method: "WEBSOCKET", Headers: wsHeaders.Clone(), @@ -476,7 +478,7 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr AuthValue: authValue, }) if errSendRetry := writeCodexWebsocketMessage(sess, connRetry, wsReqBodyRetry); errSendRetry != nil { - recordAPIResponseError(ctx, e.cfg, errSendRetry) + helps.RecordAPIResponseError(ctx, e.cfg, errSendRetry) e.invalidateUpstreamConn(sess, connRetry, "send_error", errSendRetry) sess.clearActive(readCh) sess.reqMu.Unlock() @@ -542,8 +544,8 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr } terminateReason = "read_error" terminateErr = errRead - recordAPIResponseError(ctx, e.cfg, errRead) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, errRead) + reporter.PublishFailure(ctx) _ = send(cliproxyexecutor.StreamChunk{Err: errRead}) return } @@ -552,8 +554,8 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr err = fmt.Errorf("codex websockets executor: unexpected binary message") terminateReason = "unexpected_binary" terminateErr = err - recordAPIResponseError(ctx, e.cfg, err) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, err) + reporter.PublishFailure(ctx) if sess != nil { e.invalidateUpstreamConn(sess, conn, "unexpected_binary", err) } @@ -567,13 +569,13 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr if len(payload) == 0 { continue } - appendAPIResponseChunk(ctx, e.cfg, payload) + helps.AppendAPIResponseChunk(ctx, e.cfg, payload) if wsErr, ok := parseCodexWebsocketError(payload); ok { terminateReason = "upstream_error" terminateErr = wsErr - recordAPIResponseError(ctx, e.cfg, wsErr) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, wsErr) + reporter.PublishFailure(ctx) if sess != nil { e.invalidateUpstreamConn(sess, conn, "upstream_error", wsErr) } @@ -584,8 +586,8 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr payload = normalizeCodexWebsocketCompletion(payload) eventType := gjson.GetBytes(payload, "type").String() if eventType == "response.completed" || eventType == "response.done" { - if detail, ok := parseCodexUsage(payload); ok { - reporter.publish(ctx, detail) + if detail, ok := helps.ParseCodexUsage(payload); ok { + reporter.Publish(ctx, detail) } } @@ -767,19 +769,19 @@ func applyCodexPromptCacheHeaders(from sdktranslator.Format, req cliproxyexecuto return rawJSON, headers } - var cache codexCache + var cache helps.CodexCache if from == "claude" { userIDResult := gjson.GetBytes(req.Payload, "metadata.user_id") if userIDResult.Exists() { key := fmt.Sprintf("%s-%s", req.Model, userIDResult.String()) - if cached, ok := getCodexCache(key); ok { + if cached, ok := helps.GetCodexCache(key); ok { cache = cached } else { - cache = codexCache{ + cache = helps.CodexCache{ ID: uuid.New().String(), Expire: time.Now().Add(1 * time.Hour), } - setCodexCache(key, cache) + helps.SetCodexCache(key, cache) } } } else if from == "openai-response" { @@ -806,8 +808,8 @@ func applyCodexWebsocketHeaders(ctx context.Context, headers http.Header, auth * } var ginHeaders http.Header - if ginCtx := ginContextFrom(ctx); ginCtx != nil && ginCtx.Request != nil { - ginHeaders = ginCtx.Request.Header + if ginCtx, ok := ctx.Value("gin").(*gin.Context); ok && ginCtx != nil && ginCtx.Request != nil { + ginHeaders = ginCtx.Request.Header.Clone() } cfgUserAgent, cfgBetaFeatures := codexHeaderDefaults(cfg, auth) diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 7d2d2a9b..b2b656ee 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -18,6 +18,7 @@ import ( "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" + "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps" "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" @@ -112,8 +113,8 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth return resp, err } - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") @@ -132,8 +133,8 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth } basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload) - requestedModel := payloadRequestedModel(opts, req.Model) - basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + basePayload = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated, requestedModel) action := "generateContent" if req.Metadata != nil { @@ -190,7 +191,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken) applyGeminiCLIHeaders(reqHTTP, attemptModel) reqHTTP.Header.Set("Accept", "application/json") - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: reqHTTP.Header.Clone(), @@ -204,7 +205,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth httpResp, errDo := httpClient.Do(reqHTTP) if errDo != nil { - recordAPIResponseError(ctx, e.cfg, errDo) + helps.RecordAPIResponseError(ctx, e.cfg, errDo) err = errDo return resp, err } @@ -213,15 +214,15 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("gemini cli executor: close response body error: %v", errClose) } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if errRead != nil { - recordAPIResponseError(ctx, e.cfg, errRead) + helps.RecordAPIResponseError(ctx, e.cfg, errRead) err = errRead return resp, err } - appendAPIResponseChunk(ctx, e.cfg, data) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) if httpResp.StatusCode >= 200 && httpResp.StatusCode < 300 { - reporter.publish(ctx, parseGeminiCLIUsage(data)) + reporter.Publish(ctx, helps.ParseGeminiCLIUsage(data)) var param any out := sdktranslator.TranslateNonStream(respCtx, to, from, attemptModel, opts.OriginalRequest, payload, data, ¶m) resp = cliproxyexecutor.Response{Payload: out, Headers: httpResp.Header.Clone()} @@ -230,7 +231,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth lastStatus = httpResp.StatusCode lastBody = append([]byte(nil), data...) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data)) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), data)) if httpResp.StatusCode == 429 { if idx+1 < len(models) { log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1]) @@ -245,7 +246,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth } if len(lastBody) > 0 { - appendAPIResponseChunk(ctx, e.cfg, lastBody) + helps.AppendAPIResponseChunk(ctx, e.cfg, lastBody) } if lastStatus == 0 { lastStatus = 429 @@ -266,8 +267,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut return nil, err } - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") @@ -286,8 +287,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut } basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload) - requestedModel := payloadRequestedModel(opts, req.Model) - basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + basePayload = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated, requestedModel) projectID := resolveGeminiProjectID(auth) @@ -335,7 +336,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken) applyGeminiCLIHeaders(reqHTTP, attemptModel) reqHTTP.Header.Set("Accept", "text/event-stream") - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: reqHTTP.Header.Clone(), @@ -349,25 +350,25 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut httpResp, errDo := httpClient.Do(reqHTTP) if errDo != nil { - recordAPIResponseError(ctx, e.cfg, errDo) + helps.RecordAPIResponseError(ctx, e.cfg, errDo) err = errDo return nil, err } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { data, errRead := io.ReadAll(httpResp.Body) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("gemini cli executor: close response body error: %v", errClose) } if errRead != nil { - recordAPIResponseError(ctx, e.cfg, errRead) + helps.RecordAPIResponseError(ctx, e.cfg, errRead) err = errRead return nil, err } - appendAPIResponseChunk(ctx, e.cfg, data) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) lastStatus = httpResp.StatusCode lastBody = append([]byte(nil), data...) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data)) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), data)) if httpResp.StatusCode == 429 { if idx+1 < len(models) { log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1]) @@ -394,9 +395,9 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut var param any for scanner.Scan() { line := scanner.Bytes() - appendAPIResponseChunk(ctx, e.cfg, line) - if detail, ok := parseGeminiCLIStreamUsage(line); ok { - reporter.publish(ctx, detail) + helps.AppendAPIResponseChunk(ctx, e.cfg, line) + if detail, ok := helps.ParseGeminiCLIStreamUsage(line); ok { + reporter.Publish(ctx, detail) } if bytes.HasPrefix(line, dataTag) { segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, opts.OriginalRequest, reqBody, bytes.Clone(line), ¶m) @@ -411,8 +412,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut out <- cliproxyexecutor.StreamChunk{Payload: segments[i]} } if errScan := scanner.Err(); errScan != nil { - recordAPIResponseError(ctx, e.cfg, errScan) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, errScan) + reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: errScan} } return @@ -420,13 +421,13 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut data, errRead := io.ReadAll(resp.Body) if errRead != nil { - recordAPIResponseError(ctx, e.cfg, errRead) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, errRead) + reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: errRead} return } - appendAPIResponseChunk(ctx, e.cfg, data) - reporter.publish(ctx, parseGeminiCLIUsage(data)) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) + reporter.Publish(ctx, helps.ParseGeminiCLIUsage(data)) var param any segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, opts.OriginalRequest, reqBody, data, ¶m) for i := range segments { @@ -443,7 +444,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut } if len(lastBody) > 0 { - appendAPIResponseChunk(ctx, e.cfg, lastBody) + helps.AppendAPIResponseChunk(ctx, e.cfg, lastBody) } if lastStatus == 0 { lastStatus = 429 @@ -516,7 +517,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken) applyGeminiCLIHeaders(reqHTTP, baseModel) reqHTTP.Header.Set("Accept", "application/json") - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: reqHTTP.Header.Clone(), @@ -530,17 +531,19 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. resp, errDo := httpClient.Do(reqHTTP) if errDo != nil { - recordAPIResponseError(ctx, e.cfg, errDo) + helps.RecordAPIResponseError(ctx, e.cfg, errDo) return cliproxyexecutor.Response{}, errDo } data, errRead := io.ReadAll(resp.Body) - _ = resp.Body.Close() - recordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone()) + if errClose := resp.Body.Close(); errClose != nil { + helps.LogWithRequestID(ctx).Errorf("response body close error: %v", errClose) + } + helps.RecordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone()) if errRead != nil { - recordAPIResponseError(ctx, e.cfg, errRead) + helps.RecordAPIResponseError(ctx, e.cfg, errRead) return cliproxyexecutor.Response{}, errRead } - appendAPIResponseChunk(ctx, e.cfg, data) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) if resp.StatusCode >= 200 && resp.StatusCode < 300 { count := gjson.GetBytes(data, "totalTokens").Int() translated := sdktranslator.TranslateTokenCount(respCtx, to, from, count, data) @@ -611,7 +614,7 @@ func prepareGeminiCLITokenSource(ctx context.Context, cfg *config.Config, auth * } ctxToken := ctx - if httpClient := newProxyAwareHTTPClient(ctx, cfg, auth, 0); httpClient != nil { + if httpClient := helps.NewProxyAwareHTTPClient(ctx, cfg, auth, 0); httpClient != nil { ctxToken = context.WithValue(ctxToken, oauth2.HTTPClient, httpClient) } @@ -707,7 +710,7 @@ func geminiOAuthMetadata(auth *cliproxyauth.Auth) map[string]any { } func newHTTPClient(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, timeout time.Duration) *http.Client { - return newProxyAwareHTTPClient(ctx, cfg, auth, timeout) + return helps.NewProxyAwareHTTPClient(ctx, cfg, auth, timeout) } func cloneMap(in map[string]any) map[string]any { diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index 35b95da4..fb4fbfda 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -13,6 +13,7 @@ import ( "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" @@ -85,7 +86,7 @@ func (e *GeminiExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut if err := e.PrepareRequest(httpReq, auth); err != nil { return nil, err } - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) return httpClient.Do(httpReq) } @@ -110,8 +111,8 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r apiKey, bearer := geminiCreds(auth) - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) // Official Gemini API via API key or OAuth bearer from := opts.SourceFormat @@ -130,8 +131,8 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r } body = fixGeminiImageAspectRatio(baseModel, body) - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body, _ = sjson.SetBytes(body, "model", baseModel) action := "generateContent" @@ -165,7 +166,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -177,10 +178,10 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, err := httpClient.Do(httpReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } defer func() { @@ -188,21 +189,21 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r log.Errorf("gemini executor: close response body error: %v", errClose) } }() - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) err = statusErr{code: httpResp.StatusCode, msg: string(b)} return resp, err } data, err := io.ReadAll(httpResp.Body) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } - appendAPIResponseChunk(ctx, e.cfg, data) - reporter.publish(ctx, parseGeminiUsage(data)) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) + reporter.Publish(ctx, helps.ParseGeminiUsage(data)) var param any out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, ¶m) resp = cliproxyexecutor.Response{Payload: out, Headers: httpResp.Header.Clone()} @@ -218,8 +219,8 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A apiKey, bearer := geminiCreds(auth) - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("gemini") @@ -237,8 +238,8 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A } body = fixGeminiImageAspectRatio(baseModel, body) - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body, _ = sjson.SetBytes(body, "model", baseModel) baseURL := resolveGeminiBaseURL(auth) @@ -268,7 +269,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -280,17 +281,17 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, err := httpClient.Do(httpReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return nil, err } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("gemini executor: close response body error: %v", errClose) } @@ -310,14 +311,14 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A var param any for scanner.Scan() { line := scanner.Bytes() - appendAPIResponseChunk(ctx, e.cfg, line) - filtered := FilterSSEUsageMetadata(line) - payload := jsonPayload(filtered) + helps.AppendAPIResponseChunk(ctx, e.cfg, line) + filtered := helps.FilterSSEUsageMetadata(line) + payload := helps.JSONPayload(filtered) if len(payload) == 0 { continue } - if detail, ok := parseGeminiStreamUsage(payload); ok { - reporter.publish(ctx, detail) + if detail, ok := helps.ParseGeminiStreamUsage(payload); ok { + reporter.Publish(ctx, detail) } lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(payload), ¶m) for i := range lines { @@ -329,8 +330,8 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A out <- cliproxyexecutor.StreamChunk{Payload: lines[i]} } if errScan := scanner.Err(); errScan != nil { - recordAPIResponseError(ctx, e.cfg, errScan) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, errScan) + reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: errScan} } }() @@ -381,7 +382,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -393,23 +394,27 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) resp, err := httpClient.Do(httpReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return cliproxyexecutor.Response{}, err } - defer func() { _ = resp.Body.Close() }() - recordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone()) + defer func() { + if errClose := resp.Body.Close(); errClose != nil { + helps.LogWithRequestID(ctx).Errorf("response body close error: %v", errClose) + } + }() + helps.RecordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone()) data, err := io.ReadAll(resp.Body) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return cliproxyexecutor.Response{}, err } - appendAPIResponseChunk(ctx, e.cfg, data) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) if resp.StatusCode < 200 || resp.StatusCode >= 300 { - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", resp.StatusCode, summarizeErrorBody(resp.Header.Get("Content-Type"), data)) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", resp.StatusCode, helps.SummarizeErrorBody(resp.Header.Get("Content-Type"), data)) return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: string(data)} } diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go index 13a2b65c..83152e13 100644 --- a/internal/runtime/executor/gemini_vertex_executor.go +++ b/internal/runtime/executor/gemini_vertex_executor.go @@ -16,6 +16,7 @@ import ( vertexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/vertex" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -227,7 +228,7 @@ func (e *GeminiVertexExecutor) HttpRequest(ctx context.Context, auth *cliproxyau if err := e.PrepareRequest(httpReq, auth); err != nil { return nil, err } - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) return httpClient.Do(httpReq) } @@ -301,8 +302,8 @@ func (e *GeminiVertexExecutor) Refresh(_ context.Context, auth *cliproxyauth.Aut func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (resp cliproxyexecutor.Response, err error) { baseModel := thinking.ParseSuffix(req.Model).ModelName - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) var body []byte @@ -332,8 +333,8 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au } body = fixGeminiImageAspectRatio(baseModel, body) - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body, _ = sjson.SetBytes(body, "model", baseModel) } @@ -369,7 +370,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -381,10 +382,10 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { - recordAPIResponseError(ctx, e.cfg, errDo) + helps.RecordAPIResponseError(ctx, e.cfg, errDo) return resp, errDo } defer func() { @@ -392,21 +393,21 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au log.Errorf("vertex executor: close response body error: %v", errClose) } }() - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) err = statusErr{code: httpResp.StatusCode, msg: string(b)} return resp, err } data, errRead := io.ReadAll(httpResp.Body) if errRead != nil { - recordAPIResponseError(ctx, e.cfg, errRead) + helps.RecordAPIResponseError(ctx, e.cfg, errRead) return resp, errRead } - appendAPIResponseChunk(ctx, e.cfg, data) - reporter.publish(ctx, parseGeminiUsage(data)) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) + reporter.Publish(ctx, helps.ParseGeminiUsage(data)) // For Imagen models, convert response to Gemini format before translation // This ensures Imagen responses use the same format as gemini-3-pro-image-preview @@ -427,8 +428,8 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (resp cliproxyexecutor.Response, err error) { baseModel := thinking.ParseSuffix(req.Model).ModelName - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("gemini") @@ -447,8 +448,8 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip } body = fixGeminiImageAspectRatio(baseModel, body) - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body, _ = sjson.SetBytes(body, "model", baseModel) action := getVertexAction(baseModel, false) @@ -484,7 +485,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -496,10 +497,10 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { - recordAPIResponseError(ctx, e.cfg, errDo) + helps.RecordAPIResponseError(ctx, e.cfg, errDo) return resp, errDo } defer func() { @@ -507,21 +508,21 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip log.Errorf("vertex executor: close response body error: %v", errClose) } }() - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) err = statusErr{code: httpResp.StatusCode, msg: string(b)} return resp, err } data, errRead := io.ReadAll(httpResp.Body) if errRead != nil { - recordAPIResponseError(ctx, e.cfg, errRead) + helps.RecordAPIResponseError(ctx, e.cfg, errRead) return resp, errRead } - appendAPIResponseChunk(ctx, e.cfg, data) - reporter.publish(ctx, parseGeminiUsage(data)) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) + reporter.Publish(ctx, helps.ParseGeminiUsage(data)) var param any out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, ¶m) resp = cliproxyexecutor.Response{Payload: out, Headers: httpResp.Header.Clone()} @@ -532,8 +533,8 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (_ *cliproxyexecutor.StreamResult, err error) { baseModel := thinking.ParseSuffix(req.Model).ModelName - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("gemini") @@ -552,8 +553,8 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte } body = fixGeminiImageAspectRatio(baseModel, body) - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body, _ = sjson.SetBytes(body, "model", baseModel) action := getVertexAction(baseModel, true) @@ -588,7 +589,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -600,17 +601,17 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { - recordAPIResponseError(ctx, e.cfg, errDo) + helps.RecordAPIResponseError(ctx, e.cfg, errDo) return nil, errDo } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("vertex executor: close response body error: %v", errClose) } @@ -630,9 +631,9 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte var param any for scanner.Scan() { line := scanner.Bytes() - appendAPIResponseChunk(ctx, e.cfg, line) - if detail, ok := parseGeminiStreamUsage(line); ok { - reporter.publish(ctx, detail) + helps.AppendAPIResponseChunk(ctx, e.cfg, line) + if detail, ok := helps.ParseGeminiStreamUsage(line); ok { + reporter.Publish(ctx, detail) } lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), ¶m) for i := range lines { @@ -644,8 +645,8 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte out <- cliproxyexecutor.StreamChunk{Payload: lines[i]} } if errScan := scanner.Err(); errScan != nil { - recordAPIResponseError(ctx, e.cfg, errScan) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, errScan) + reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: errScan} } }() @@ -656,8 +657,8 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (_ *cliproxyexecutor.StreamResult, err error) { baseModel := thinking.ParseSuffix(req.Model).ModelName - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("gemini") @@ -676,8 +677,8 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth } body = fixGeminiImageAspectRatio(baseModel, body) - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body, _ = sjson.SetBytes(body, "model", baseModel) action := getVertexAction(baseModel, true) @@ -712,7 +713,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -724,17 +725,17 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { - recordAPIResponseError(ctx, e.cfg, errDo) + helps.RecordAPIResponseError(ctx, e.cfg, errDo) return nil, errDo } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("vertex executor: close response body error: %v", errClose) } @@ -754,9 +755,9 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth var param any for scanner.Scan() { line := scanner.Bytes() - appendAPIResponseChunk(ctx, e.cfg, line) - if detail, ok := parseGeminiStreamUsage(line); ok { - reporter.publish(ctx, detail) + helps.AppendAPIResponseChunk(ctx, e.cfg, line) + if detail, ok := helps.ParseGeminiStreamUsage(line); ok { + reporter.Publish(ctx, detail) } lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), ¶m) for i := range lines { @@ -768,8 +769,8 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth out <- cliproxyexecutor.StreamChunk{Payload: lines[i]} } if errScan := scanner.Err(); errScan != nil { - recordAPIResponseError(ctx, e.cfg, errScan) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, errScan) + reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: errScan} } }() @@ -819,7 +820,7 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -831,10 +832,10 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { - recordAPIResponseError(ctx, e.cfg, errDo) + helps.RecordAPIResponseError(ctx, e.cfg, errDo) return cliproxyexecutor.Response{}, errDo } defer func() { @@ -842,19 +843,19 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context log.Errorf("vertex executor: close response body error: %v", errClose) } }() - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)} } data, errRead := io.ReadAll(httpResp.Body) if errRead != nil { - recordAPIResponseError(ctx, e.cfg, errRead) + helps.RecordAPIResponseError(ctx, e.cfg, errRead) return cliproxyexecutor.Response{}, errRead } - appendAPIResponseChunk(ctx, e.cfg, data) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) count := gjson.GetBytes(data, "totalTokens").Int() out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data) return cliproxyexecutor.Response{Payload: out, Headers: httpResp.Header.Clone()}, nil @@ -903,7 +904,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth * authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -915,10 +916,10 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth * AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { - recordAPIResponseError(ctx, e.cfg, errDo) + helps.RecordAPIResponseError(ctx, e.cfg, errDo) return cliproxyexecutor.Response{}, errDo } defer func() { @@ -926,19 +927,19 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth * log.Errorf("vertex executor: close response body error: %v", errClose) } }() - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(b)} } data, errRead := io.ReadAll(httpResp.Body) if errRead != nil { - recordAPIResponseError(ctx, e.cfg, errRead) + helps.RecordAPIResponseError(ctx, e.cfg, errRead) return cliproxyexecutor.Response{}, errRead } - appendAPIResponseChunk(ctx, e.cfg, data) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) count := gjson.GetBytes(data, "totalTokens").Int() out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data) return cliproxyexecutor.Response{Payload: out, Headers: httpResp.Header.Clone()}, nil @@ -1012,7 +1013,7 @@ func vertexBaseURL(location string) string { } func vertexAccessToken(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, saJSON []byte) (string, error) { - if httpClient := newProxyAwareHTTPClient(ctx, cfg, auth, 0); httpClient != nil { + if httpClient := helps.NewProxyAwareHTTPClient(ctx, cfg, auth, 0); httpClient != nil { ctx = context.WithValue(ctx, oauth2.HTTPClient, httpClient) } // Use cloud-platform scope for Vertex AI. diff --git a/internal/runtime/executor/cache_helpers.go b/internal/runtime/executor/helps/cache_helpers.go similarity index 81% rename from internal/runtime/executor/cache_helpers.go rename to internal/runtime/executor/helps/cache_helpers.go index b6de886d..ec063384 100644 --- a/internal/runtime/executor/cache_helpers.go +++ b/internal/runtime/executor/helps/cache_helpers.go @@ -1,11 +1,11 @@ -package executor +package helps import ( "sync" "time" ) -type codexCache struct { +type CodexCache struct { ID string Expire time.Time } @@ -13,7 +13,7 @@ type codexCache struct { // codexCacheMap stores prompt cache IDs keyed by model+user_id. // Protected by codexCacheMu. Entries expire after 1 hour. var ( - codexCacheMap = make(map[string]codexCache) + codexCacheMap = make(map[string]CodexCache) codexCacheMu sync.RWMutex ) @@ -47,20 +47,20 @@ func purgeExpiredCodexCache() { } } -// getCodexCache retrieves a cached entry, returning ok=false if not found or expired. -func getCodexCache(key string) (codexCache, bool) { +// GetCodexCache retrieves a cached entry, returning ok=false if not found or expired. +func GetCodexCache(key string) (CodexCache, bool) { codexCacheCleanupOnce.Do(startCodexCacheCleanup) codexCacheMu.RLock() cache, ok := codexCacheMap[key] codexCacheMu.RUnlock() if !ok || cache.Expire.Before(time.Now()) { - return codexCache{}, false + return CodexCache{}, false } return cache, true } -// setCodexCache stores a cache entry. -func setCodexCache(key string, cache codexCache) { +// SetCodexCache stores a cache entry. +func SetCodexCache(key string, cache CodexCache) { codexCacheCleanupOnce.Do(startCodexCacheCleanup) codexCacheMu.Lock() codexCacheMap[key] = cache diff --git a/internal/runtime/executor/claude_device_profile.go b/internal/runtime/executor/helps/claude_device_profile.go similarity index 84% rename from internal/runtime/executor/claude_device_profile.go rename to internal/runtime/executor/helps/claude_device_profile.go index 374720b8..2cf4d917 100644 --- a/internal/runtime/executor/claude_device_profile.go +++ b/internal/runtime/executor/helps/claude_device_profile.go @@ -1,4 +1,4 @@ -package executor +package helps import ( "crypto/sha256" @@ -32,7 +32,7 @@ var ( claudeDeviceProfileCacheMu sync.RWMutex claudeDeviceProfileCacheCleanupOnce sync.Once - claudeDeviceProfileBeforeCandidateStore func(claudeDeviceProfile) + ClaudeDeviceProfileBeforeCandidateStore func(ClaudeDeviceProfile) ) type claudeCLIVersion struct { @@ -63,29 +63,35 @@ func (v claudeCLIVersion) Compare(other claudeCLIVersion) int { } } -type claudeDeviceProfile struct { +type ClaudeDeviceProfile struct { UserAgent string PackageVersion string RuntimeVersion string OS string Arch string - Version claudeCLIVersion - HasVersion bool + version claudeCLIVersion + hasVersion bool } type claudeDeviceProfileCacheEntry struct { - profile claudeDeviceProfile + profile ClaudeDeviceProfile expire time.Time } -func claudeDeviceProfileStabilizationEnabled(cfg *config.Config) bool { +func ClaudeDeviceProfileStabilizationEnabled(cfg *config.Config) bool { if cfg == nil || cfg.ClaudeHeaderDefaults.StabilizeDeviceProfile == nil { return false } return *cfg.ClaudeHeaderDefaults.StabilizeDeviceProfile } -func defaultClaudeDeviceProfile(cfg *config.Config) claudeDeviceProfile { +func ResetClaudeDeviceProfileCache() { + claudeDeviceProfileCacheMu.Lock() + claudeDeviceProfileCache = make(map[string]claudeDeviceProfileCacheEntry) + claudeDeviceProfileCacheMu.Unlock() +} + +func defaultClaudeDeviceProfile(cfg *config.Config) ClaudeDeviceProfile { hdrDefault := func(cfgVal, fallback string) string { if strings.TrimSpace(cfgVal) != "" { return strings.TrimSpace(cfgVal) @@ -98,7 +104,7 @@ func defaultClaudeDeviceProfile(cfg *config.Config) claudeDeviceProfile { hd = cfg.ClaudeHeaderDefaults } - profile := claudeDeviceProfile{ + profile := ClaudeDeviceProfile{ UserAgent: hdrDefault(hd.UserAgent, defaultClaudeFingerprintUserAgent), PackageVersion: hdrDefault(hd.PackageVersion, defaultClaudeFingerprintPackageVersion), RuntimeVersion: hdrDefault(hd.RuntimeVersion, defaultClaudeFingerprintRuntimeVersion), @@ -106,8 +112,8 @@ func defaultClaudeDeviceProfile(cfg *config.Config) claudeDeviceProfile { Arch: hdrDefault(hd.Arch, defaultClaudeFingerprintArch), } if version, ok := parseClaudeCLIVersion(profile.UserAgent); ok { - profile.Version = version - profile.HasVersion = true + profile.version = version + profile.hasVersion = true } return profile } @@ -162,17 +168,17 @@ func parseClaudeCLIVersion(userAgent string) (claudeCLIVersion, bool) { return claudeCLIVersion{major: major, minor: minor, patch: patch}, true } -func shouldUpgradeClaudeDeviceProfile(candidate, current claudeDeviceProfile) bool { - if candidate.UserAgent == "" || !candidate.HasVersion { +func shouldUpgradeClaudeDeviceProfile(candidate, current ClaudeDeviceProfile) bool { + if candidate.UserAgent == "" || !candidate.hasVersion { return false } - if current.UserAgent == "" || !current.HasVersion { + if current.UserAgent == "" || !current.hasVersion { return true } - return candidate.Version.Compare(current.Version) > 0 + return candidate.version.Compare(current.version) > 0 } -func pinClaudeDeviceProfilePlatform(profile, baseline claudeDeviceProfile) claudeDeviceProfile { +func pinClaudeDeviceProfilePlatform(profile, baseline ClaudeDeviceProfile) ClaudeDeviceProfile { profile.OS = baseline.OS profile.Arch = baseline.Arch return profile @@ -180,38 +186,38 @@ func pinClaudeDeviceProfilePlatform(profile, baseline claudeDeviceProfile) claud // normalizeClaudeDeviceProfile keeps stabilized profiles pinned to the current // baseline platform and enforces the baseline software fingerprint as a floor. -func normalizeClaudeDeviceProfile(profile, baseline claudeDeviceProfile) claudeDeviceProfile { +func normalizeClaudeDeviceProfile(profile, baseline ClaudeDeviceProfile) ClaudeDeviceProfile { profile = pinClaudeDeviceProfilePlatform(profile, baseline) - if profile.UserAgent == "" || !profile.HasVersion || shouldUpgradeClaudeDeviceProfile(baseline, profile) { + if profile.UserAgent == "" || !profile.hasVersion || shouldUpgradeClaudeDeviceProfile(baseline, profile) { profile.UserAgent = baseline.UserAgent profile.PackageVersion = baseline.PackageVersion profile.RuntimeVersion = baseline.RuntimeVersion - profile.Version = baseline.Version - profile.HasVersion = baseline.HasVersion + profile.version = baseline.version + profile.hasVersion = baseline.hasVersion } return profile } -func extractClaudeDeviceProfile(headers http.Header, cfg *config.Config) (claudeDeviceProfile, bool) { +func extractClaudeDeviceProfile(headers http.Header, cfg *config.Config) (ClaudeDeviceProfile, bool) { if headers == nil { - return claudeDeviceProfile{}, false + return ClaudeDeviceProfile{}, false } userAgent := strings.TrimSpace(headers.Get("User-Agent")) version, ok := parseClaudeCLIVersion(userAgent) if !ok { - return claudeDeviceProfile{}, false + return ClaudeDeviceProfile{}, false } baseline := defaultClaudeDeviceProfile(cfg) - profile := claudeDeviceProfile{ + profile := ClaudeDeviceProfile{ UserAgent: userAgent, PackageVersion: firstNonEmptyHeader(headers, "X-Stainless-Package-Version", baseline.PackageVersion), RuntimeVersion: firstNonEmptyHeader(headers, "X-Stainless-Runtime-Version", baseline.RuntimeVersion), OS: firstNonEmptyHeader(headers, "X-Stainless-Os", baseline.OS), Arch: firstNonEmptyHeader(headers, "X-Stainless-Arch", baseline.Arch), - Version: version, - HasVersion: true, + version: version, + hasVersion: true, } return profile, true } @@ -263,7 +269,7 @@ func purgeExpiredClaudeDeviceProfiles() { claudeDeviceProfileCacheMu.Unlock() } -func resolveClaudeDeviceProfile(auth *cliproxyauth.Auth, apiKey string, headers http.Header, cfg *config.Config) claudeDeviceProfile { +func ResolveClaudeDeviceProfile(auth *cliproxyauth.Auth, apiKey string, headers http.Header, cfg *config.Config) ClaudeDeviceProfile { claudeDeviceProfileCacheCleanupOnce.Do(startClaudeDeviceProfileCacheCleanup) cacheKey := claudeDeviceProfileCacheKey(auth, apiKey) @@ -283,8 +289,8 @@ func resolveClaudeDeviceProfile(auth *cliproxyauth.Auth, apiKey string, headers claudeDeviceProfileCacheMu.RUnlock() if hasCandidate { - if claudeDeviceProfileBeforeCandidateStore != nil { - claudeDeviceProfileBeforeCandidateStore(candidate) + if ClaudeDeviceProfileBeforeCandidateStore != nil { + ClaudeDeviceProfileBeforeCandidateStore(candidate) } claudeDeviceProfileCacheMu.Lock() @@ -324,7 +330,7 @@ func resolveClaudeDeviceProfile(auth *cliproxyauth.Auth, apiKey string, headers return baseline } -func applyClaudeDeviceProfileHeaders(r *http.Request, profile claudeDeviceProfile) { +func ApplyClaudeDeviceProfileHeaders(r *http.Request, profile ClaudeDeviceProfile) { if r == nil { return } @@ -344,7 +350,7 @@ func applyClaudeDeviceProfileHeaders(r *http.Request, profile claudeDeviceProfil r.Header.Set("X-Stainless-Arch", profile.Arch) } -func applyClaudeLegacyDeviceHeaders(r *http.Request, ginHeaders http.Header, cfg *config.Config) { +func ApplyClaudeLegacyDeviceHeaders(r *http.Request, ginHeaders http.Header, cfg *config.Config) { if r == nil { return } diff --git a/internal/runtime/executor/cloak_obfuscate.go b/internal/runtime/executor/helps/cloak_obfuscate.go similarity index 93% rename from internal/runtime/executor/cloak_obfuscate.go rename to internal/runtime/executor/helps/cloak_obfuscate.go index 81781802..dce724af 100644 --- a/internal/runtime/executor/cloak_obfuscate.go +++ b/internal/runtime/executor/helps/cloak_obfuscate.go @@ -1,4 +1,4 @@ -package executor +package helps import ( "regexp" @@ -18,9 +18,9 @@ type SensitiveWordMatcher struct { regex *regexp.Regexp } -// buildSensitiveWordMatcher compiles a regex from the word list. +// BuildSensitiveWordMatcher compiles a regex from the word list. // Words are sorted by length (longest first) for proper matching. -func buildSensitiveWordMatcher(words []string) *SensitiveWordMatcher { +func BuildSensitiveWordMatcher(words []string) *SensitiveWordMatcher { if len(words) == 0 { return nil } @@ -81,9 +81,9 @@ func (m *SensitiveWordMatcher) obfuscateText(text string) string { return m.regex.ReplaceAllStringFunc(text, obfuscateWord) } -// obfuscateSensitiveWords processes the payload and obfuscates sensitive words +// ObfuscateSensitiveWords processes the payload and obfuscates sensitive words // in system blocks and message content. -func obfuscateSensitiveWords(payload []byte, matcher *SensitiveWordMatcher) []byte { +func ObfuscateSensitiveWords(payload []byte, matcher *SensitiveWordMatcher) []byte { if matcher == nil || matcher.regex == nil { return payload } diff --git a/internal/runtime/executor/cloak_utils.go b/internal/runtime/executor/helps/cloak_utils.go similarity index 83% rename from internal/runtime/executor/cloak_utils.go rename to internal/runtime/executor/helps/cloak_utils.go index 2a3433ac..11ace545 100644 --- a/internal/runtime/executor/cloak_utils.go +++ b/internal/runtime/executor/helps/cloak_utils.go @@ -1,4 +1,4 @@ -package executor +package helps import ( "crypto/rand" @@ -28,9 +28,17 @@ func isValidUserID(userID string) bool { return userIDPattern.MatchString(userID) } -// shouldCloak determines if request should be cloaked based on config and client User-Agent. +func GenerateFakeUserID() string { + return generateFakeUserID() +} + +func IsValidUserID(userID string) bool { + return isValidUserID(userID) +} + +// ShouldCloak determines if request should be cloaked based on config and client User-Agent. // Returns true if cloaking should be applied. -func shouldCloak(cloakMode string, userAgent string) bool { +func ShouldCloak(cloakMode string, userAgent string) bool { switch strings.ToLower(cloakMode) { case "always": return true diff --git a/internal/runtime/executor/logging_helpers.go b/internal/runtime/executor/helps/logging_helpers.go similarity index 92% rename from internal/runtime/executor/logging_helpers.go rename to internal/runtime/executor/helps/logging_helpers.go index ae2aee3f..f9389edd 100644 --- a/internal/runtime/executor/logging_helpers.go +++ b/internal/runtime/executor/helps/logging_helpers.go @@ -1,4 +1,4 @@ -package executor +package helps import ( "bytes" @@ -24,8 +24,8 @@ const ( apiResponseKey = "API_RESPONSE" ) -// upstreamRequestLog captures the outbound upstream request details for logging. -type upstreamRequestLog struct { +// UpstreamRequestLog captures the outbound upstream request details for logging. +type UpstreamRequestLog struct { URL string Method string Headers http.Header @@ -49,8 +49,8 @@ type upstreamAttempt struct { errorWritten bool } -// recordAPIRequest stores the upstream request metadata in Gin context for request logging. -func recordAPIRequest(ctx context.Context, cfg *config.Config, info upstreamRequestLog) { +// RecordAPIRequest stores the upstream request metadata in Gin context for request logging. +func RecordAPIRequest(ctx context.Context, cfg *config.Config, info UpstreamRequestLog) { if cfg == nil || !cfg.RequestLog { return } @@ -96,8 +96,8 @@ func recordAPIRequest(ctx context.Context, cfg *config.Config, info upstreamRequ updateAggregatedRequest(ginCtx, attempts) } -// recordAPIResponseMetadata captures upstream response status/header information for the latest attempt. -func recordAPIResponseMetadata(ctx context.Context, cfg *config.Config, status int, headers http.Header) { +// RecordAPIResponseMetadata captures upstream response status/header information for the latest attempt. +func RecordAPIResponseMetadata(ctx context.Context, cfg *config.Config, status int, headers http.Header) { if cfg == nil || !cfg.RequestLog { return } @@ -122,8 +122,8 @@ func recordAPIResponseMetadata(ctx context.Context, cfg *config.Config, status i updateAggregatedResponse(ginCtx, attempts) } -// recordAPIResponseError adds an error entry for the latest attempt when no HTTP response is available. -func recordAPIResponseError(ctx context.Context, cfg *config.Config, err error) { +// RecordAPIResponseError adds an error entry for the latest attempt when no HTTP response is available. +func RecordAPIResponseError(ctx context.Context, cfg *config.Config, err error) { if cfg == nil || !cfg.RequestLog || err == nil { return } @@ -147,8 +147,8 @@ func recordAPIResponseError(ctx context.Context, cfg *config.Config, err error) updateAggregatedResponse(ginCtx, attempts) } -// appendAPIResponseChunk appends an upstream response chunk to Gin context for request logging. -func appendAPIResponseChunk(ctx context.Context, cfg *config.Config, chunk []byte) { +// AppendAPIResponseChunk appends an upstream response chunk to Gin context for request logging. +func AppendAPIResponseChunk(ctx context.Context, cfg *config.Config, chunk []byte) { if cfg == nil || !cfg.RequestLog { return } @@ -285,7 +285,7 @@ func writeHeaders(builder *strings.Builder, headers http.Header) { } } -func formatAuthInfo(info upstreamRequestLog) string { +func formatAuthInfo(info UpstreamRequestLog) string { var parts []string if trimmed := strings.TrimSpace(info.Provider); trimmed != "" { parts = append(parts, fmt.Sprintf("provider=%s", trimmed)) @@ -321,7 +321,7 @@ func formatAuthInfo(info upstreamRequestLog) string { return strings.Join(parts, ", ") } -func summarizeErrorBody(contentType string, body []byte) string { +func SummarizeErrorBody(contentType string, body []byte) string { isHTML := strings.Contains(strings.ToLower(contentType), "text/html") if !isHTML { trimmed := bytes.TrimSpace(bytes.ToLower(body)) @@ -379,7 +379,7 @@ func extractJSONErrorMessage(body []byte) string { // logWithRequestID returns a logrus Entry with request_id field populated from context. // If no request ID is found in context, it returns the standard logger. -func logWithRequestID(ctx context.Context) *log.Entry { +func LogWithRequestID(ctx context.Context) *log.Entry { if ctx == nil { return log.NewEntry(log.StandardLogger()) } diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/helps/payload_helpers.go similarity index 97% rename from internal/runtime/executor/payload_helpers.go rename to internal/runtime/executor/helps/payload_helpers.go index 271e2c5b..73514c2d 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/helps/payload_helpers.go @@ -1,4 +1,4 @@ -package executor +package helps import ( "encoding/json" @@ -11,12 +11,12 @@ import ( "github.com/tidwall/sjson" ) -// applyPayloadConfigWithRoot behaves like applyPayloadConfig but treats all parameter +// ApplyPayloadConfigWithRoot behaves like applyPayloadConfig but treats all parameter // paths as relative to the provided root path (for example, "request" for Gemini CLI) // and restricts matches to the given protocol when supplied. Defaults are checked // against the original payload when provided. requestedModel carries the client-visible // model name before alias resolution so payload rules can target aliases precisely. -func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string, payload, original []byte, requestedModel string) []byte { +func ApplyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string, payload, original []byte, requestedModel string) []byte { if cfg == nil || len(payload) == 0 { return payload } @@ -244,7 +244,7 @@ func payloadRawValue(value any) ([]byte, bool) { } } -func payloadRequestedModel(opts cliproxyexecutor.Options, fallback string) string { +func PayloadRequestedModel(opts cliproxyexecutor.Options, fallback string) string { fallback = strings.TrimSpace(fallback) if len(opts.Metadata) == 0 { return fallback diff --git a/internal/runtime/executor/proxy_helpers.go b/internal/runtime/executor/helps/proxy_helpers.go similarity index 94% rename from internal/runtime/executor/proxy_helpers.go rename to internal/runtime/executor/helps/proxy_helpers.go index 5511497b..022bc65c 100644 --- a/internal/runtime/executor/proxy_helpers.go +++ b/internal/runtime/executor/helps/proxy_helpers.go @@ -1,4 +1,4 @@ -package executor +package helps import ( "context" @@ -12,7 +12,7 @@ import ( log "github.com/sirupsen/logrus" ) -// newProxyAwareHTTPClient creates an HTTP client with proper proxy configuration priority: +// NewProxyAwareHTTPClient creates an HTTP client with proper proxy configuration priority: // 1. Use auth.ProxyURL if configured (highest priority) // 2. Use cfg.ProxyURL if auth proxy is not configured // 3. Use RoundTripper from context if neither are configured @@ -25,7 +25,7 @@ import ( // // Returns: // - *http.Client: An HTTP client with configured proxy or transport -func newProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, timeout time.Duration) *http.Client { +func NewProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, timeout time.Duration) *http.Client { httpClient := &http.Client{} if timeout > 0 { httpClient.Timeout = timeout diff --git a/internal/runtime/executor/proxy_helpers_test.go b/internal/runtime/executor/helps/proxy_helpers_test.go similarity index 93% rename from internal/runtime/executor/proxy_helpers_test.go rename to internal/runtime/executor/helps/proxy_helpers_test.go index 4ae5c937..33117167 100644 --- a/internal/runtime/executor/proxy_helpers_test.go +++ b/internal/runtime/executor/helps/proxy_helpers_test.go @@ -1,4 +1,4 @@ -package executor +package helps import ( "context" @@ -13,7 +13,7 @@ import ( func TestNewProxyAwareHTTPClientDirectBypassesGlobalProxy(t *testing.T) { t.Parallel() - client := newProxyAwareHTTPClient( + client := NewProxyAwareHTTPClient( context.Background(), &config.Config{SDKConfig: sdkconfig.SDKConfig{ProxyURL: "http://global-proxy.example.com:8080"}}, &cliproxyauth.Auth{ProxyURL: "direct"}, diff --git a/internal/runtime/executor/thinking_providers.go b/internal/runtime/executor/helps/thinking_providers.go similarity index 97% rename from internal/runtime/executor/thinking_providers.go rename to internal/runtime/executor/helps/thinking_providers.go index b961db90..36b63c90 100644 --- a/internal/runtime/executor/thinking_providers.go +++ b/internal/runtime/executor/helps/thinking_providers.go @@ -1,4 +1,4 @@ -package executor +package helps import ( _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/antigravity" diff --git a/internal/runtime/executor/token_helpers.go b/internal/runtime/executor/helps/token_helpers.go similarity index 94% rename from internal/runtime/executor/token_helpers.go rename to internal/runtime/executor/helps/token_helpers.go index f4236f9b..92b8ba8d 100644 --- a/internal/runtime/executor/token_helpers.go +++ b/internal/runtime/executor/helps/token_helpers.go @@ -1,4 +1,4 @@ -package executor +package helps import ( "fmt" @@ -8,8 +8,8 @@ import ( "github.com/tiktoken-go/tokenizer" ) -// tokenizerForModel returns a tokenizer codec suitable for an OpenAI-style model id. -func tokenizerForModel(model string) (tokenizer.Codec, error) { +// TokenizerForModel returns a tokenizer codec suitable for an OpenAI-style model id. +func TokenizerForModel(model string) (tokenizer.Codec, error) { sanitized := strings.ToLower(strings.TrimSpace(model)) switch { case sanitized == "": @@ -37,8 +37,8 @@ func tokenizerForModel(model string) (tokenizer.Codec, error) { } } -// countOpenAIChatTokens approximates prompt tokens for OpenAI chat completions payloads. -func countOpenAIChatTokens(enc tokenizer.Codec, payload []byte) (int64, error) { +// CountOpenAIChatTokens approximates prompt tokens for OpenAI chat completions payloads. +func CountOpenAIChatTokens(enc tokenizer.Codec, payload []byte) (int64, error) { if enc == nil { return 0, fmt.Errorf("encoder is nil") } @@ -69,8 +69,8 @@ func countOpenAIChatTokens(enc tokenizer.Codec, payload []byte) (int64, error) { return int64(count), nil } -// buildOpenAIUsageJSON returns a minimal usage structure understood by downstream translators. -func buildOpenAIUsageJSON(count int64) []byte { +// BuildOpenAIUsageJSON returns a minimal usage structure understood by downstream translators. +func BuildOpenAIUsageJSON(count int64) []byte { return []byte(fmt.Sprintf(`{"usage":{"prompt_tokens":%d,"completion_tokens":0,"total_tokens":%d}}`, count, count)) } diff --git a/internal/runtime/executor/usage_helpers.go b/internal/runtime/executor/helps/usage_helpers.go similarity index 91% rename from internal/runtime/executor/usage_helpers.go rename to internal/runtime/executor/helps/usage_helpers.go index de2f2e52..23040984 100644 --- a/internal/runtime/executor/usage_helpers.go +++ b/internal/runtime/executor/helps/usage_helpers.go @@ -1,4 +1,4 @@ -package executor +package helps import ( "bytes" @@ -15,7 +15,7 @@ import ( "github.com/tidwall/sjson" ) -type usageReporter struct { +type UsageReporter struct { provider string model string authID string @@ -26,9 +26,9 @@ type usageReporter struct { once sync.Once } -func newUsageReporter(ctx context.Context, provider, model string, auth *cliproxyauth.Auth) *usageReporter { - apiKey := apiKeyFromContext(ctx) - reporter := &usageReporter{ +func NewUsageReporter(ctx context.Context, provider, model string, auth *cliproxyauth.Auth) *UsageReporter { + apiKey := APIKeyFromContext(ctx) + reporter := &UsageReporter{ provider: provider, model: model, requestedAt: time.Now(), @@ -42,24 +42,24 @@ func newUsageReporter(ctx context.Context, provider, model string, auth *cliprox return reporter } -func (r *usageReporter) publish(ctx context.Context, detail usage.Detail) { +func (r *UsageReporter) Publish(ctx context.Context, detail usage.Detail) { r.publishWithOutcome(ctx, detail, false) } -func (r *usageReporter) publishFailure(ctx context.Context) { +func (r *UsageReporter) PublishFailure(ctx context.Context) { r.publishWithOutcome(ctx, usage.Detail{}, true) } -func (r *usageReporter) trackFailure(ctx context.Context, errPtr *error) { +func (r *UsageReporter) TrackFailure(ctx context.Context, errPtr *error) { if r == nil || errPtr == nil { return } if *errPtr != nil { - r.publishFailure(ctx) + r.PublishFailure(ctx) } } -func (r *usageReporter) publishWithOutcome(ctx context.Context, detail usage.Detail, failed bool) { +func (r *UsageReporter) publishWithOutcome(ctx context.Context, detail usage.Detail, failed bool) { if r == nil { return } @@ -81,7 +81,7 @@ func (r *usageReporter) publishWithOutcome(ctx context.Context, detail usage.Det // It is safe to call multiple times; only the first call wins due to once.Do. // This is used to ensure request counting even when upstream responses do not // include any usage fields (tokens), especially for streaming paths. -func (r *usageReporter) ensurePublished(ctx context.Context) { +func (r *UsageReporter) EnsurePublished(ctx context.Context) { if r == nil { return } @@ -90,7 +90,7 @@ func (r *usageReporter) ensurePublished(ctx context.Context) { }) } -func (r *usageReporter) buildRecord(detail usage.Detail, failed bool) usage.Record { +func (r *UsageReporter) buildRecord(detail usage.Detail, failed bool) usage.Record { if r == nil { return usage.Record{Detail: detail, Failed: failed} } @@ -108,7 +108,7 @@ func (r *usageReporter) buildRecord(detail usage.Detail, failed bool) usage.Reco } } -func (r *usageReporter) latency() time.Duration { +func (r *UsageReporter) latency() time.Duration { if r == nil || r.requestedAt.IsZero() { return 0 } @@ -119,7 +119,7 @@ func (r *usageReporter) latency() time.Duration { return latency } -func apiKeyFromContext(ctx context.Context) string { +func APIKeyFromContext(ctx context.Context) string { if ctx == nil { return "" } @@ -184,7 +184,7 @@ func resolveUsageSource(auth *cliproxyauth.Auth, ctxAPIKey string) string { return "" } -func parseCodexUsage(data []byte) (usage.Detail, bool) { +func ParseCodexUsage(data []byte) (usage.Detail, bool) { usageNode := gjson.ParseBytes(data).Get("response.usage") if !usageNode.Exists() { return usage.Detail{}, false @@ -203,7 +203,7 @@ func parseCodexUsage(data []byte) (usage.Detail, bool) { return detail, true } -func parseOpenAIUsage(data []byte) usage.Detail { +func ParseOpenAIUsage(data []byte) usage.Detail { usageNode := gjson.ParseBytes(data).Get("usage") if !usageNode.Exists() { return usage.Detail{} @@ -238,7 +238,7 @@ func parseOpenAIUsage(data []byte) usage.Detail { return detail } -func parseOpenAIStreamUsage(line []byte) (usage.Detail, bool) { +func ParseOpenAIStreamUsage(line []byte) (usage.Detail, bool) { payload := jsonPayload(line) if len(payload) == 0 || !gjson.ValidBytes(payload) { return usage.Detail{}, false @@ -261,7 +261,7 @@ func parseOpenAIStreamUsage(line []byte) (usage.Detail, bool) { return detail, true } -func parseClaudeUsage(data []byte) usage.Detail { +func ParseClaudeUsage(data []byte) usage.Detail { usageNode := gjson.ParseBytes(data).Get("usage") if !usageNode.Exists() { return usage.Detail{} @@ -279,7 +279,7 @@ func parseClaudeUsage(data []byte) usage.Detail { return detail } -func parseClaudeStreamUsage(line []byte) (usage.Detail, bool) { +func ParseClaudeStreamUsage(line []byte) (usage.Detail, bool) { payload := jsonPayload(line) if len(payload) == 0 || !gjson.ValidBytes(payload) { return usage.Detail{}, false @@ -314,7 +314,7 @@ func parseGeminiFamilyUsageDetail(node gjson.Result) usage.Detail { return detail } -func parseGeminiCLIUsage(data []byte) usage.Detail { +func ParseGeminiCLIUsage(data []byte) usage.Detail { usageNode := gjson.ParseBytes(data) node := usageNode.Get("response.usageMetadata") if !node.Exists() { @@ -326,7 +326,7 @@ func parseGeminiCLIUsage(data []byte) usage.Detail { return parseGeminiFamilyUsageDetail(node) } -func parseGeminiUsage(data []byte) usage.Detail { +func ParseGeminiUsage(data []byte) usage.Detail { usageNode := gjson.ParseBytes(data) node := usageNode.Get("usageMetadata") if !node.Exists() { @@ -338,7 +338,7 @@ func parseGeminiUsage(data []byte) usage.Detail { return parseGeminiFamilyUsageDetail(node) } -func parseGeminiStreamUsage(line []byte) (usage.Detail, bool) { +func ParseGeminiStreamUsage(line []byte) (usage.Detail, bool) { payload := jsonPayload(line) if len(payload) == 0 || !gjson.ValidBytes(payload) { return usage.Detail{}, false @@ -353,7 +353,7 @@ func parseGeminiStreamUsage(line []byte) (usage.Detail, bool) { return parseGeminiFamilyUsageDetail(node), true } -func parseGeminiCLIStreamUsage(line []byte) (usage.Detail, bool) { +func ParseGeminiCLIStreamUsage(line []byte) (usage.Detail, bool) { payload := jsonPayload(line) if len(payload) == 0 || !gjson.ValidBytes(payload) { return usage.Detail{}, false @@ -368,7 +368,7 @@ func parseGeminiCLIStreamUsage(line []byte) (usage.Detail, bool) { return parseGeminiFamilyUsageDetail(node), true } -func parseAntigravityUsage(data []byte) usage.Detail { +func ParseAntigravityUsage(data []byte) usage.Detail { usageNode := gjson.ParseBytes(data) node := usageNode.Get("response.usageMetadata") if !node.Exists() { @@ -383,7 +383,7 @@ func parseAntigravityUsage(data []byte) usage.Detail { return parseGeminiFamilyUsageDetail(node) } -func parseAntigravityStreamUsage(line []byte) (usage.Detail, bool) { +func ParseAntigravityStreamUsage(line []byte) (usage.Detail, bool) { payload := jsonPayload(line) if len(payload) == 0 || !gjson.ValidBytes(payload) { return usage.Detail{}, false @@ -552,6 +552,10 @@ func isStopChunkWithoutUsage(jsonBytes []byte) bool { return !hasUsageMetadata(jsonBytes) } +func JSONPayload(line []byte) []byte { + return jsonPayload(line) +} + func jsonPayload(line []byte) []byte { trimmed := bytes.TrimSpace(line) if len(trimmed) == 0 { diff --git a/internal/runtime/executor/usage_helpers_test.go b/internal/runtime/executor/helps/usage_helpers_test.go similarity index 94% rename from internal/runtime/executor/usage_helpers_test.go rename to internal/runtime/executor/helps/usage_helpers_test.go index 785f72b4..1a5648e8 100644 --- a/internal/runtime/executor/usage_helpers_test.go +++ b/internal/runtime/executor/helps/usage_helpers_test.go @@ -1,4 +1,4 @@ -package executor +package helps import ( "testing" @@ -9,7 +9,7 @@ import ( func TestParseOpenAIUsageChatCompletions(t *testing.T) { data := []byte(`{"usage":{"prompt_tokens":1,"completion_tokens":2,"total_tokens":3,"prompt_tokens_details":{"cached_tokens":4},"completion_tokens_details":{"reasoning_tokens":5}}}`) - detail := parseOpenAIUsage(data) + detail := ParseOpenAIUsage(data) if detail.InputTokens != 1 { t.Fatalf("input tokens = %d, want %d", detail.InputTokens, 1) } @@ -29,7 +29,7 @@ func TestParseOpenAIUsageChatCompletions(t *testing.T) { func TestParseOpenAIUsageResponses(t *testing.T) { data := []byte(`{"usage":{"input_tokens":10,"output_tokens":20,"total_tokens":30,"input_tokens_details":{"cached_tokens":7},"output_tokens_details":{"reasoning_tokens":9}}}`) - detail := parseOpenAIUsage(data) + detail := ParseOpenAIUsage(data) if detail.InputTokens != 10 { t.Fatalf("input tokens = %d, want %d", detail.InputTokens, 10) } @@ -48,7 +48,7 @@ func TestParseOpenAIUsageResponses(t *testing.T) { } func TestUsageReporterBuildRecordIncludesLatency(t *testing.T) { - reporter := &usageReporter{ + reporter := &UsageReporter{ provider: "openai", model: "gpt-5.4", requestedAt: time.Now().Add(-1500 * time.Millisecond), diff --git a/internal/runtime/executor/user_id_cache.go b/internal/runtime/executor/helps/user_id_cache.go similarity index 96% rename from internal/runtime/executor/user_id_cache.go rename to internal/runtime/executor/helps/user_id_cache.go index ff8efd9d..ad41fd9a 100644 --- a/internal/runtime/executor/user_id_cache.go +++ b/internal/runtime/executor/helps/user_id_cache.go @@ -1,4 +1,4 @@ -package executor +package helps import ( "crypto/sha256" @@ -49,7 +49,7 @@ func userIDCacheKey(apiKey string) string { return hex.EncodeToString(sum[:]) } -func cachedUserID(apiKey string) string { +func CachedUserID(apiKey string) string { if apiKey == "" { return generateFakeUserID() } diff --git a/internal/runtime/executor/user_id_cache_test.go b/internal/runtime/executor/helps/user_id_cache_test.go similarity index 83% rename from internal/runtime/executor/user_id_cache_test.go rename to internal/runtime/executor/helps/user_id_cache_test.go index 420a3cad..b166576c 100644 --- a/internal/runtime/executor/user_id_cache_test.go +++ b/internal/runtime/executor/helps/user_id_cache_test.go @@ -1,4 +1,4 @@ -package executor +package helps import ( "testing" @@ -14,8 +14,8 @@ func resetUserIDCache() { func TestCachedUserID_ReusesWithinTTL(t *testing.T) { resetUserIDCache() - first := cachedUserID("api-key-1") - second := cachedUserID("api-key-1") + first := CachedUserID("api-key-1") + second := CachedUserID("api-key-1") if first == "" { t.Fatal("expected generated user_id to be non-empty") @@ -28,7 +28,7 @@ func TestCachedUserID_ReusesWithinTTL(t *testing.T) { func TestCachedUserID_ExpiresAfterTTL(t *testing.T) { resetUserIDCache() - expiredID := cachedUserID("api-key-expired") + expiredID := CachedUserID("api-key-expired") cacheKey := userIDCacheKey("api-key-expired") userIDCacheMu.Lock() userIDCache[cacheKey] = userIDCacheEntry{ @@ -37,7 +37,7 @@ func TestCachedUserID_ExpiresAfterTTL(t *testing.T) { } userIDCacheMu.Unlock() - newID := cachedUserID("api-key-expired") + newID := CachedUserID("api-key-expired") if newID == expiredID { t.Fatalf("expected expired user_id to be replaced, got %q", newID) } @@ -49,8 +49,8 @@ func TestCachedUserID_ExpiresAfterTTL(t *testing.T) { func TestCachedUserID_IsScopedByAPIKey(t *testing.T) { resetUserIDCache() - first := cachedUserID("api-key-1") - second := cachedUserID("api-key-2") + first := CachedUserID("api-key-1") + second := CachedUserID("api-key-2") if first == second { t.Fatalf("expected different API keys to have different user_ids, got %q", first) @@ -61,7 +61,7 @@ func TestCachedUserID_RenewsTTLOnHit(t *testing.T) { resetUserIDCache() key := "api-key-renew" - id := cachedUserID(key) + id := CachedUserID(key) cacheKey := userIDCacheKey(key) soon := time.Now() @@ -72,7 +72,7 @@ func TestCachedUserID_RenewsTTLOnHit(t *testing.T) { } userIDCacheMu.Unlock() - if refreshed := cachedUserID(key); refreshed != id { + if refreshed := CachedUserID(key); refreshed != id { t.Fatalf("expected cached user_id to be reused before expiry, got %q", refreshed) } diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go index cc5cc33d..3e9e17fb 100644 --- a/internal/runtime/executor/iflow_executor.go +++ b/internal/runtime/executor/iflow_executor.go @@ -16,6 +16,7 @@ import ( "github.com/google/uuid" iflowauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/iflow" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" @@ -66,7 +67,7 @@ func (e *IFlowExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth if err := e.PrepareRequest(httpReq, auth); err != nil { return nil, err } - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) return httpClient.Do(httpReq) } @@ -86,8 +87,8 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re baseURL = iflowauth.DefaultAPIBaseURL } - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("openai") @@ -106,8 +107,8 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re } body = preserveReasoningContentInMessages(body) - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint @@ -122,7 +123,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: endpoint, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -134,10 +135,10 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, err := httpClient.Do(httpReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } defer func() { @@ -145,25 +146,25 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re log.Errorf("iflow executor: close response body error: %v", errClose) } }() - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) err = statusErr{code: httpResp.StatusCode, msg: string(b)} return resp, err } data, err := io.ReadAll(httpResp.Body) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } - appendAPIResponseChunk(ctx, e.cfg, data) - reporter.publish(ctx, parseOpenAIUsage(data)) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) + reporter.Publish(ctx, helps.ParseOpenAIUsage(data)) // Ensure usage is recorded even if upstream omits usage metadata. - reporter.ensurePublished(ctx) + reporter.EnsurePublished(ctx) var param any // Note: TranslateNonStream uses req.Model (original with suffix) to preserve @@ -189,8 +190,8 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au baseURL = iflowauth.DefaultAPIBaseURL } - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("openai") @@ -214,8 +215,8 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 { body = ensureToolsArray(body) } - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint @@ -230,7 +231,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: endpoint, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -242,21 +243,21 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, err := httpClient.Do(httpReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return nil, err } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { data, _ := io.ReadAll(httpResp.Body) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("iflow executor: close response body error: %v", errClose) } - appendAPIResponseChunk(ctx, e.cfg, data) - logWithRequestID(ctx).Debugf("request error, error status: %d error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data)) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), data)) err = statusErr{code: httpResp.StatusCode, msg: string(data)} return nil, err } @@ -275,9 +276,9 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au var param any for scanner.Scan() { line := scanner.Bytes() - appendAPIResponseChunk(ctx, e.cfg, line) - if detail, ok := parseOpenAIStreamUsage(line); ok { - reporter.publish(ctx, detail) + helps.AppendAPIResponseChunk(ctx, e.cfg, line) + if detail, ok := helps.ParseOpenAIStreamUsage(line); ok { + reporter.Publish(ctx, detail) } chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), ¶m) for i := range chunks { @@ -285,12 +286,12 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au } } if errScan := scanner.Err(); errScan != nil { - recordAPIResponseError(ctx, e.cfg, errScan) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, errScan) + reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: errScan} } // Guarantee a usage record exists even if the stream never emitted usage data. - reporter.ensurePublished(ctx) + reporter.EnsurePublished(ctx) }() return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil @@ -303,17 +304,17 @@ func (e *IFlowExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false) - enc, err := tokenizerForModel(baseModel) + enc, err := helps.TokenizerForModel(baseModel) if err != nil { return cliproxyexecutor.Response{}, fmt.Errorf("iflow executor: tokenizer init failed: %w", err) } - count, err := countOpenAIChatTokens(enc, body) + count, err := helps.CountOpenAIChatTokens(enc, body) if err != nil { return cliproxyexecutor.Response{}, fmt.Errorf("iflow executor: token counting failed: %w", err) } - usageJSON := buildOpenAIUsageJSON(count) + usageJSON := helps.BuildOpenAIUsageJSON(count) translated := sdktranslator.TranslateTokenCount(ctx, to, from, count, usageJSON) return cliproxyexecutor.Response{Payload: translated}, nil } diff --git a/internal/runtime/executor/kimi_executor.go b/internal/runtime/executor/kimi_executor.go index e7052ee2..ce7d2ddc 100644 --- a/internal/runtime/executor/kimi_executor.go +++ b/internal/runtime/executor/kimi_executor.go @@ -15,6 +15,7 @@ import ( kimiauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/kimi" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -60,7 +61,7 @@ func (e *KimiExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth, if err := e.PrepareRequest(httpReq, auth); err != nil { return nil, err } - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) return httpClient.Do(httpReq) } @@ -76,8 +77,8 @@ func (e *KimiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req token := kimiCreds(auth) - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) to := sdktranslator.FromString("openai") originalPayloadSource := req.Payload @@ -100,8 +101,8 @@ func (e *KimiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req return resp, err } - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body, err = normalizeKimiToolMessageLinks(body) if err != nil { return resp, err @@ -119,7 +120,7 @@ func (e *KimiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -131,10 +132,10 @@ func (e *KimiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, err := httpClient.Do(httpReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } defer func() { @@ -142,21 +143,21 @@ func (e *KimiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req log.Errorf("kimi executor: close response body error: %v", errClose) } }() - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) err = statusErr{code: httpResp.StatusCode, msg: string(b)} return resp, err } data, err := io.ReadAll(httpResp.Body) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } - appendAPIResponseChunk(ctx, e.cfg, data) - reporter.publish(ctx, parseOpenAIUsage(data)) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) + reporter.Publish(ctx, helps.ParseOpenAIUsage(data)) var param any // Note: TranslateNonStream uses req.Model (original with suffix) to preserve // the original model name in the response for client compatibility. @@ -176,8 +177,8 @@ func (e *KimiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut baseModel := thinking.ParseSuffix(req.Model).ModelName token := kimiCreds(auth) - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) to := sdktranslator.FromString("openai") originalPayloadSource := req.Payload @@ -204,8 +205,8 @@ func (e *KimiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut if err != nil { return nil, fmt.Errorf("kimi executor: failed to set stream_options in payload: %w", err) } - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body, err = normalizeKimiToolMessageLinks(body) if err != nil { return nil, err @@ -223,7 +224,7 @@ func (e *KimiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -235,17 +236,17 @@ func (e *KimiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, err := httpClient.Do(httpReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return nil, err } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("kimi executor: close response body error: %v", errClose) } @@ -265,9 +266,9 @@ func (e *KimiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut var param any for scanner.Scan() { line := scanner.Bytes() - appendAPIResponseChunk(ctx, e.cfg, line) - if detail, ok := parseOpenAIStreamUsage(line); ok { - reporter.publish(ctx, detail) + helps.AppendAPIResponseChunk(ctx, e.cfg, line) + if detail, ok := helps.ParseOpenAIStreamUsage(line); ok { + reporter.Publish(ctx, detail) } chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), ¶m) for i := range chunks { @@ -279,8 +280,8 @@ func (e *KimiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut out <- cliproxyexecutor.StreamChunk{Payload: doneChunks[i]} } if errScan := scanner.Err(); errScan != nil { - recordAPIResponseError(ctx, e.cfg, errScan) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, errScan) + reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: errScan} } }() diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index 3bb6e012..a03e4987 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -11,6 +11,7 @@ import ( "time" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" @@ -65,15 +66,15 @@ func (e *OpenAICompatExecutor) HttpRequest(ctx context.Context, auth *cliproxyau if err := e.PrepareRequest(httpReq, auth); err != nil { return nil, err } - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) return httpClient.Do(httpReq) } func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { baseModel := thinking.ParseSuffix(req.Model).ModelName - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) baseURL, apiKey := e.resolveCredentials(auth) if baseURL == "" { @@ -95,8 +96,8 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A originalPayload := originalPayloadSource originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, opts.Stream) translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, opts.Stream) - requestedModel := payloadRequestedModel(opts, req.Model) - translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + translated = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel) if opts.Alt == "responses/compact" { if updated, errDelete := sjson.DeleteBytes(translated, "stream"); errDelete == nil { translated = updated @@ -129,7 +130,7 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -141,10 +142,10 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, err := httpClient.Do(httpReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } defer func() { @@ -152,23 +153,23 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A log.Errorf("openai compat executor: close response body error: %v", errClose) } }() - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) err = statusErr{code: httpResp.StatusCode, msg: string(b)} return resp, err } body, err := io.ReadAll(httpResp.Body) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } - appendAPIResponseChunk(ctx, e.cfg, body) - reporter.publish(ctx, parseOpenAIUsage(body)) + helps.AppendAPIResponseChunk(ctx, e.cfg, body) + reporter.Publish(ctx, helps.ParseOpenAIUsage(body)) // Ensure we at least record the request even if upstream doesn't return usage - reporter.ensurePublished(ctx) + reporter.EnsurePublished(ctx) // Translate response back to source format when needed var param any out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, body, ¶m) @@ -179,8 +180,8 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (_ *cliproxyexecutor.StreamResult, err error) { baseModel := thinking.ParseSuffix(req.Model).ModelName - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) baseURL, apiKey := e.resolveCredentials(auth) if baseURL == "" { @@ -197,8 +198,8 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy originalPayload := originalPayloadSource originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true) - requestedModel := payloadRequestedModel(opts, req.Model) - translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + translated = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel) translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier()) if err != nil { @@ -232,7 +233,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -244,17 +245,17 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, err := httpClient.Do(httpReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return nil, err } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("openai compat executor: close response body error: %v", errClose) } @@ -274,9 +275,9 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy var param any for scanner.Scan() { line := scanner.Bytes() - appendAPIResponseChunk(ctx, e.cfg, line) - if detail, ok := parseOpenAIStreamUsage(line); ok { - reporter.publish(ctx, detail) + helps.AppendAPIResponseChunk(ctx, e.cfg, line) + if detail, ok := helps.ParseOpenAIStreamUsage(line); ok { + reporter.Publish(ctx, detail) } if len(line) == 0 { continue @@ -294,12 +295,12 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy } } if errScan := scanner.Err(); errScan != nil { - recordAPIResponseError(ctx, e.cfg, errScan) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, errScan) + reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: errScan} } // Ensure we record the request if no usage chunk was ever seen - reporter.ensurePublished(ctx) + reporter.EnsurePublished(ctx) }() return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil } @@ -318,17 +319,17 @@ func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyau return cliproxyexecutor.Response{}, err } - enc, err := tokenizerForModel(modelForCounting) + enc, err := helps.TokenizerForModel(modelForCounting) if err != nil { return cliproxyexecutor.Response{}, fmt.Errorf("openai compat executor: tokenizer init failed: %w", err) } - count, err := countOpenAIChatTokens(enc, translated) + count, err := helps.CountOpenAIChatTokens(enc, translated) if err != nil { return cliproxyexecutor.Response{}, fmt.Errorf("openai compat executor: token counting failed: %w", err) } - usageJSON := buildOpenAIUsageJSON(count) + usageJSON := helps.BuildOpenAIUsageJSON(count) translatedUsage := sdktranslator.TranslateTokenCount(ctx, to, from, count, usageJSON) return cliproxyexecutor.Response{Payload: translatedUsage}, nil } diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index ff19dcb5..24f6c558 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -13,6 +13,7 @@ import ( qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -154,7 +155,7 @@ func wrapQwenError(ctx context.Context, httpCode int, body []byte) (errCode int, errCode = http.StatusTooManyRequests // Map to 429 to trigger quota logic cooldown := timeUntilNextDay() retryAfter = &cooldown - logWithRequestID(ctx).Warnf("qwen quota exceeded (http %d -> %d), cooling down until tomorrow (%v)", httpCode, errCode, cooldown) + helps.LogWithRequestID(ctx).Warnf("qwen quota exceeded (http %d -> %d), cooling down until tomorrow (%v)", httpCode, errCode, cooldown) } return errCode, retryAfter } @@ -202,7 +203,7 @@ func (e *QwenExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth, if err := e.PrepareRequest(httpReq, auth); err != nil { return nil, err } - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) return httpClient.Do(httpReq) } @@ -217,7 +218,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req authID = auth.ID } if err := checkQwenRateLimit(authID); err != nil { - logWithRequestID(ctx).Warnf("qwen rate limit exceeded for credential %s", redactAuthID(authID)) + helps.LogWithRequestID(ctx).Warnf("qwen rate limit exceeded for credential %s", redactAuthID(authID)) return resp, err } @@ -228,8 +229,8 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req baseURL = "https://portal.qwen.ai/v1" } - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("openai") @@ -247,8 +248,8 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req return resp, err } - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) @@ -261,7 +262,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -273,10 +274,10 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, err := httpClient.Do(httpReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } defer func() { @@ -284,23 +285,23 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req log.Errorf("qwen executor: close response body error: %v", errClose) } }() - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b) - logWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter} return resp, err } data, err := io.ReadAll(httpResp.Body) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return resp, err } - appendAPIResponseChunk(ctx, e.cfg, data) - reporter.publish(ctx, parseOpenAIUsage(data)) + helps.AppendAPIResponseChunk(ctx, e.cfg, data) + reporter.Publish(ctx, helps.ParseOpenAIUsage(data)) var param any // Note: TranslateNonStream uses req.Model (original with suffix) to preserve // the original model name in the response for client compatibility. @@ -320,7 +321,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut authID = auth.ID } if err := checkQwenRateLimit(authID); err != nil { - logWithRequestID(ctx).Warnf("qwen rate limit exceeded for credential %s", redactAuthID(authID)) + helps.LogWithRequestID(ctx).Warnf("qwen rate limit exceeded for credential %s", redactAuthID(authID)) return nil, err } @@ -331,8 +332,8 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut baseURL = "https://portal.qwen.ai/v1" } - reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) - defer reporter.trackFailure(ctx, &err) + reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.TrackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("openai") @@ -357,8 +358,8 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`)) } body, _ = sjson.SetBytes(body, "stream_options.include_usage", true) - requestedModel := payloadRequestedModel(opts, req.Model) - body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + requestedModel := helps.PayloadRequestedModel(opts, req.Model) + body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) @@ -371,7 +372,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut authLabel = auth.Label authType, authValue = auth.AccountInfo() } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{ URL: url, Method: http.MethodPost, Headers: httpReq.Header.Clone(), @@ -383,19 +384,19 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0) httpResp, err := httpClient.Do(httpReq) if err != nil { - recordAPIResponseError(ctx, e.cfg, err) + helps.RecordAPIResponseError(ctx, e.cfg, err) return nil, err } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) + helps.AppendAPIResponseChunk(ctx, e.cfg, b) errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b) - logWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("qwen executor: close response body error: %v", errClose) } @@ -415,9 +416,9 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut var param any for scanner.Scan() { line := scanner.Bytes() - appendAPIResponseChunk(ctx, e.cfg, line) - if detail, ok := parseOpenAIStreamUsage(line); ok { - reporter.publish(ctx, detail) + helps.AppendAPIResponseChunk(ctx, e.cfg, line) + if detail, ok := helps.ParseOpenAIStreamUsage(line); ok { + reporter.Publish(ctx, detail) } chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), ¶m) for i := range chunks { @@ -429,8 +430,8 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut out <- cliproxyexecutor.StreamChunk{Payload: doneChunks[i]} } if errScan := scanner.Err(); errScan != nil { - recordAPIResponseError(ctx, e.cfg, errScan) - reporter.publishFailure(ctx) + helps.RecordAPIResponseError(ctx, e.cfg, errScan) + reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: errScan} } }() @@ -449,17 +450,17 @@ func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, modelName = baseModel } - enc, err := tokenizerForModel(modelName) + enc, err := helps.TokenizerForModel(modelName) if err != nil { return cliproxyexecutor.Response{}, fmt.Errorf("qwen executor: tokenizer init failed: %w", err) } - count, err := countOpenAIChatTokens(enc, body) + count, err := helps.CountOpenAIChatTokens(enc, body) if err != nil { return cliproxyexecutor.Response{}, fmt.Errorf("qwen executor: token counting failed: %w", err) } - usageJSON := buildOpenAIUsageJSON(count) + usageJSON := helps.BuildOpenAIUsageJSON(count) translated := sdktranslator.TranslateTokenCount(ctx, to, from, count, usageJSON) return cliproxyexecutor.Response{Payload: translated}, nil } From 330e12d3c230af5877de7712aac5b5a932c5f775 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Wed, 1 Apr 2026 10:53:14 +0800 Subject: [PATCH 29/42] fix(codex): conditionally set `Session_id` header for Mac OS user agents and clean up redundant logic --- internal/runtime/executor/codex_executor.go | 13 +++++++++---- .../runtime/executor/codex_websockets_executor.go | 6 ++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index d404302a..e48a4ac3 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -29,8 +29,8 @@ import ( ) const ( - codexUserAgent = "codex_cli_rs/0.116.0 (Mac OS 26.0.1; arm64) Apple_Terminal/464" - codexOriginator = "codex_cli_rs" + codexUserAgent = "codex-tui/0.118.0 (Mac OS 26.3.1; arm64) iTerm.app/3.6.9 (codex-tui; 0.118.0)" + codexOriginator = "codex-tui" ) var dataTag = []byte("data:") @@ -629,7 +629,6 @@ func (e *CodexExecutor) cacheHelper(ctx context.Context, from sdktranslator.Form return nil, err } if cache.ID != "" { - httpReq.Header.Set("Conversation_id", cache.ID) httpReq.Header.Set("Session_id", cache.ID) } return httpReq, nil @@ -644,13 +643,19 @@ func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string, s ginHeaders = ginCtx.Request.Header } + if ginHeaders.Get("X-Codex-Beta-Features") != "" { + r.Header.Set("X-Codex-Beta-Features", ginHeaders.Get("X-Codex-Beta-Features")) + } misc.EnsureHeader(r.Header, ginHeaders, "Version", "") - misc.EnsureHeader(r.Header, ginHeaders, "Session_id", uuid.NewString()) misc.EnsureHeader(r.Header, ginHeaders, "X-Codex-Turn-Metadata", "") misc.EnsureHeader(r.Header, ginHeaders, "X-Client-Request-Id", "") cfgUserAgent, _ := codexHeaderDefaults(cfg, auth) ensureHeaderWithConfigPrecedence(r.Header, ginHeaders, "User-Agent", cfgUserAgent, codexUserAgent) + if strings.Contains(r.Header.Get("User-Agent"), "Mac OS") { + misc.EnsureHeader(r.Header, ginHeaders, "Session_id", uuid.NewString()) + } + if stream { r.Header.Set("Accept", "text/event-stream") } else { diff --git a/internal/runtime/executor/codex_websockets_executor.go b/internal/runtime/executor/codex_websockets_executor.go index fdfccd9a..7edbc35c 100644 --- a/internal/runtime/executor/codex_websockets_executor.go +++ b/internal/runtime/executor/codex_websockets_executor.go @@ -793,7 +793,6 @@ func applyCodexPromptCacheHeaders(from sdktranslator.Format, req cliproxyexecuto if cache.ID != "" { rawJSON, _ = sjson.SetBytes(rawJSON, "prompt_cache_key", cache.ID) headers.Set("Conversation_id", cache.ID) - headers.Set("Session_id", cache.ID) } return rawJSON, headers @@ -828,9 +827,12 @@ func applyCodexWebsocketHeaders(ctx context.Context, headers http.Header, auth * betaHeader = codexResponsesWebsocketBetaHeaderValue } headers.Set("OpenAI-Beta", betaHeader) - misc.EnsureHeader(headers, ginHeaders, "Session_id", uuid.NewString()) ensureHeaderWithConfigPrecedence(headers, ginHeaders, "User-Agent", cfgUserAgent, codexUserAgent) + if strings.Contains(headers.Get("User-Agent"), "Mac OS") { + misc.EnsureHeader(headers, ginHeaders, "Session_id", uuid.NewString()) + } + isAPIKey := false if auth != nil && auth.Attributes != nil { if v := strings.TrimSpace(auth.Attributes["api_key"]); v != "" { From ca11b236a7da1e0bc1c2c8ffd2d35454614b42e0 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Wed, 1 Apr 2026 11:57:31 +0800 Subject: [PATCH 30/42] refactor(runtime, openai): simplify header management and remove redundant websocket logging logic --- .../executor/codex_websockets_executor.go | 5 +- .../openai/openai_responses_websocket.go | 77 ++----------------- 2 files changed, 9 insertions(+), 73 deletions(-) diff --git a/internal/runtime/executor/codex_websockets_executor.go b/internal/runtime/executor/codex_websockets_executor.go index 7edbc35c..afc255e3 100644 --- a/internal/runtime/executor/codex_websockets_executor.go +++ b/internal/runtime/executor/codex_websockets_executor.go @@ -811,7 +811,7 @@ func applyCodexWebsocketHeaders(ctx context.Context, headers http.Header, auth * ginHeaders = ginCtx.Request.Header.Clone() } - cfgUserAgent, cfgBetaFeatures := codexHeaderDefaults(cfg, auth) + _, cfgBetaFeatures := codexHeaderDefaults(cfg, auth) ensureHeaderWithPriority(headers, ginHeaders, "x-codex-beta-features", cfgBetaFeatures, "") misc.EnsureHeader(headers, ginHeaders, "x-codex-turn-state", "") misc.EnsureHeader(headers, ginHeaders, "x-codex-turn-metadata", "") @@ -827,11 +827,10 @@ func applyCodexWebsocketHeaders(ctx context.Context, headers http.Header, auth * betaHeader = codexResponsesWebsocketBetaHeaderValue } headers.Set("OpenAI-Beta", betaHeader) - ensureHeaderWithConfigPrecedence(headers, ginHeaders, "User-Agent", cfgUserAgent, codexUserAgent) - if strings.Contains(headers.Get("User-Agent"), "Mac OS") { misc.EnsureHeader(headers, ginHeaders, "Session_id", uuid.NewString()) } + headers.Del("User-Agent") isAPIKey := false if auth != nil && auth.Attributes != nil { diff --git a/sdk/api/handlers/openai/openai_responses_websocket.go b/sdk/api/handlers/openai/openai_responses_websocket.go index 15a6bda7..591552ae 100644 --- a/sdk/api/handlers/openai/openai_responses_websocket.go +++ b/sdk/api/handlers/openai/openai_responses_websocket.go @@ -33,9 +33,6 @@ const ( wsDoneMarker = "[DONE]" wsTurnStateHeader = "x-codex-turn-state" wsRequestBodyKey = "REQUEST_BODY_OVERRIDE" - wsPayloadLogMaxSize = 2048 - wsBodyLogMaxSize = 64 * 1024 - wsBodyLogTruncated = "\n[websocket log truncated]\n" ) var responsesWebsocketUpgrader = websocket.Upgrader{ @@ -894,71 +891,18 @@ func appendWebsocketEvent(builder *strings.Builder, eventType string, payload [] if builder == nil { return } - if builder.Len() >= wsBodyLogMaxSize { - return - } trimmedPayload := bytes.TrimSpace(payload) if len(trimmedPayload) == 0 { return } if builder.Len() > 0 { - if !appendWebsocketLogString(builder, "\n") { - return - } + builder.WriteString("\n") } - if !appendWebsocketLogString(builder, "websocket.") { - return - } - if !appendWebsocketLogString(builder, eventType) { - return - } - if !appendWebsocketLogString(builder, "\n") { - return - } - if !appendWebsocketLogBytes(builder, trimmedPayload, len(wsBodyLogTruncated)) { - appendWebsocketLogString(builder, wsBodyLogTruncated) - return - } - appendWebsocketLogString(builder, "\n") -} - -func appendWebsocketLogString(builder *strings.Builder, value string) bool { - if builder == nil { - return false - } - remaining := wsBodyLogMaxSize - builder.Len() - if remaining <= 0 { - return false - } - if len(value) <= remaining { - builder.WriteString(value) - return true - } - builder.WriteString(value[:remaining]) - return false -} - -func appendWebsocketLogBytes(builder *strings.Builder, value []byte, reserveForSuffix int) bool { - if builder == nil { - return false - } - remaining := wsBodyLogMaxSize - builder.Len() - if remaining <= 0 { - return false - } - if len(value) <= remaining { - builder.Write(value) - return true - } - limit := remaining - reserveForSuffix - if limit < 0 { - limit = 0 - } - if limit > len(value) { - limit = len(value) - } - builder.Write(value[:limit]) - return false + builder.WriteString("websocket.") + builder.WriteString(eventType) + builder.WriteString("\n") + builder.Write(trimmedPayload) + builder.WriteString("\n") } func websocketPayloadEventType(payload []byte) string { @@ -974,15 +918,8 @@ func websocketPayloadPreview(payload []byte) string { if len(trimmedPayload) == 0 { return "" } - preview := trimmedPayload - if len(preview) > wsPayloadLogMaxSize { - preview = preview[:wsPayloadLogMaxSize] - } - previewText := strings.ReplaceAll(string(preview), "\n", "\\n") + previewText := strings.ReplaceAll(string(trimmedPayload), "\n", "\\n") previewText = strings.ReplaceAll(previewText, "\r", "\\r") - if len(trimmedPayload) > wsPayloadLogMaxSize { - return fmt.Sprintf("%s...(truncated,total=%d)", previewText, len(trimmedPayload)) - } return previewText } From 1734aa166466293454807f1843859a535d66824e Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Wed, 1 Apr 2026 12:51:12 +0800 Subject: [PATCH 31/42] fix(codex): prioritize websocket-enabled credentials across priority tiers in scheduler logic --- sdk/cliproxy/auth/scheduler.go | 34 ++++++++++++++++++++++++----- sdk/cliproxy/auth/scheduler_test.go | 26 ++++++++++++++++++++++ 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/sdk/cliproxy/auth/scheduler.go b/sdk/cliproxy/auth/scheduler.go index fd8c9490..1482bae6 100644 --- a/sdk/cliproxy/auth/scheduler.go +++ b/sdk/cliproxy/auth/scheduler.go @@ -219,6 +219,19 @@ func (s *authScheduler) pickMixed(ctx context.Context, providers []string, model if len(normalized) == 0 { return nil, "", &Error{Code: "provider_not_found", Message: "no provider supplied"} } + if len(normalized) == 1 { + // When a single provider is eligible, reuse pickSingle so provider-specific preferences + // (for example Codex websocket transport) are applied consistently. + providerKey := normalized[0] + picked, errPick := s.pickSingle(ctx, providerKey, model, opts, tried) + if errPick != nil { + return nil, "", errPick + } + if picked == nil { + return nil, "", &Error{Code: "auth_not_found", Message: "no auth available"} + } + return picked, providerKey, nil + } pinnedAuthID := pinnedAuthIDFromMetadata(opts.Metadata) modelKey := canonicalModelKey(model) @@ -696,16 +709,25 @@ func (m *modelScheduler) highestReadyPriorityLocked(preferWebsocket bool, predic if m == nil { return 0, false } + if preferWebsocket { + // When downstream is websocket and Codex supports websocket transport, prefer websocket-enabled + // credentials even if they are in a lower priority tier than HTTP-only credentials. + for _, priority := range m.priorityOrder { + bucket := m.readyByPriority[priority] + if bucket == nil { + continue + } + if bucket.ws.pickFirst(predicate) != nil { + return priority, true + } + } + } for _, priority := range m.priorityOrder { bucket := m.readyByPriority[priority] if bucket == nil { continue } - view := &bucket.all - if preferWebsocket && len(bucket.ws.flat) > 0 { - view = &bucket.ws - } - if view.pickFirst(predicate) != nil { + if bucket.all.pickFirst(predicate) != nil { return priority, true } } @@ -723,7 +745,7 @@ func (m *modelScheduler) pickReadyAtPriorityLocked(preferWebsocket bool, priorit return nil } view := &bucket.all - if preferWebsocket && len(bucket.ws.flat) > 0 { + if preferWebsocket && bucket.ws.pickFirst(predicate) != nil { view = &bucket.ws } var picked *scheduledAuth diff --git a/sdk/cliproxy/auth/scheduler_test.go b/sdk/cliproxy/auth/scheduler_test.go index 3988c90a..d744ec32 100644 --- a/sdk/cliproxy/auth/scheduler_test.go +++ b/sdk/cliproxy/auth/scheduler_test.go @@ -208,6 +208,32 @@ func TestSchedulerPick_CodexWebsocketPrefersWebsocketEnabledSubset(t *testing.T) } } +func TestSchedulerPick_CodexWebsocketPrefersWebsocketEnabledAcrossPriorities(t *testing.T) { + t.Parallel() + + scheduler := newSchedulerForTest( + &RoundRobinSelector{}, + &Auth{ID: "codex-http", Provider: "codex", Attributes: map[string]string{"priority": "10"}}, + &Auth{ID: "codex-ws-a", Provider: "codex", Attributes: map[string]string{"priority": "0", "websockets": "true"}}, + &Auth{ID: "codex-ws-b", Provider: "codex", Attributes: map[string]string{"priority": "0", "websockets": "true"}}, + ) + + ctx := cliproxyexecutor.WithDownstreamWebsocket(context.Background()) + want := []string{"codex-ws-a", "codex-ws-b", "codex-ws-a"} + for index, wantID := range want { + got, errPick := scheduler.pickSingle(ctx, "codex", "", cliproxyexecutor.Options{}, nil) + if errPick != nil { + t.Fatalf("pickSingle() #%d error = %v", index, errPick) + } + if got == nil { + t.Fatalf("pickSingle() #%d auth = nil", index) + } + if got.ID != wantID { + t.Fatalf("pickSingle() #%d auth.ID = %q, want %q", index, got.ID, wantID) + } + } +} + func TestSchedulerPick_MixedProvidersUsesWeightedProviderRotationOverReadyCandidates(t *testing.T) { t.Parallel() From 105a21548f15b6c07f9c76aa916486e07af6262d Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Wed, 1 Apr 2026 13:17:10 +0800 Subject: [PATCH 32/42] fix(codex): centralize session management with global store and add tests for executor session lifecycle --- .../executor/codex_websockets_executor.go | 128 +++++++++++++++--- .../codex_websockets_executor_store_test.go | 48 +++++++ sdk/cliproxy/service.go | 1 + 3 files changed, 159 insertions(+), 18 deletions(-) create mode 100644 internal/runtime/executor/codex_websockets_executor_store_test.go diff --git a/internal/runtime/executor/codex_websockets_executor.go b/internal/runtime/executor/codex_websockets_executor.go index afc255e3..dc9a8a79 100644 --- a/internal/runtime/executor/codex_websockets_executor.go +++ b/internal/runtime/executor/codex_websockets_executor.go @@ -46,10 +46,18 @@ const ( type CodexWebsocketsExecutor struct { *CodexExecutor - sessMu sync.Mutex + store *codexWebsocketSessionStore +} + +type codexWebsocketSessionStore struct { + mu sync.Mutex sessions map[string]*codexWebsocketSession } +var globalCodexWebsocketSessionStore = &codexWebsocketSessionStore{ + sessions: make(map[string]*codexWebsocketSession), +} + type codexWebsocketSession struct { sessionID string @@ -73,7 +81,7 @@ type codexWebsocketSession struct { func NewCodexWebsocketsExecutor(cfg *config.Config) *CodexWebsocketsExecutor { return &CodexWebsocketsExecutor{ CodexExecutor: NewCodexExecutor(cfg), - sessions: make(map[string]*codexWebsocketSession), + store: globalCodexWebsocketSessionStore, } } @@ -1058,16 +1066,23 @@ func (e *CodexWebsocketsExecutor) getOrCreateSession(sessionID string) *codexWeb if sessionID == "" { return nil } - e.sessMu.Lock() - defer e.sessMu.Unlock() - if e.sessions == nil { - e.sessions = make(map[string]*codexWebsocketSession) + if e == nil { + return nil } - if sess, ok := e.sessions[sessionID]; ok && sess != nil { + store := e.store + if store == nil { + store = globalCodexWebsocketSessionStore + } + store.mu.Lock() + defer store.mu.Unlock() + if store.sessions == nil { + store.sessions = make(map[string]*codexWebsocketSession) + } + if sess, ok := store.sessions[sessionID]; ok && sess != nil { return sess } sess := &codexWebsocketSession{sessionID: sessionID} - e.sessions[sessionID] = sess + store.sessions[sessionID] = sess return sess } @@ -1213,14 +1228,20 @@ func (e *CodexWebsocketsExecutor) CloseExecutionSession(sessionID string) { return } if sessionID == cliproxyauth.CloseAllExecutionSessionsID { - e.closeAllExecutionSessions("executor_replaced") + // Executor replacement can happen during hot reload (config/credential changes). + // Do not force-close upstream websocket sessions here, otherwise in-flight + // downstream websocket requests get interrupted. return } - e.sessMu.Lock() - sess := e.sessions[sessionID] - delete(e.sessions, sessionID) - e.sessMu.Unlock() + store := e.store + if store == nil { + store = globalCodexWebsocketSessionStore + } + store.mu.Lock() + sess := store.sessions[sessionID] + delete(store.sessions, sessionID) + store.mu.Unlock() e.closeExecutionSession(sess, "session_closed") } @@ -1230,15 +1251,19 @@ func (e *CodexWebsocketsExecutor) closeAllExecutionSessions(reason string) { return } - e.sessMu.Lock() - sessions := make([]*codexWebsocketSession, 0, len(e.sessions)) - for sessionID, sess := range e.sessions { - delete(e.sessions, sessionID) + store := e.store + if store == nil { + store = globalCodexWebsocketSessionStore + } + store.mu.Lock() + sessions := make([]*codexWebsocketSession, 0, len(store.sessions)) + for sessionID, sess := range store.sessions { + delete(store.sessions, sessionID) if sess != nil { sessions = append(sessions, sess) } } - e.sessMu.Unlock() + store.mu.Unlock() for i := range sessions { e.closeExecutionSession(sessions[i], reason) @@ -1246,6 +1271,10 @@ func (e *CodexWebsocketsExecutor) closeAllExecutionSessions(reason string) { } func (e *CodexWebsocketsExecutor) closeExecutionSession(sess *codexWebsocketSession, reason string) { + closeCodexWebsocketSession(sess, reason) +} + +func closeCodexWebsocketSession(sess *codexWebsocketSession, reason string) { if sess == nil { return } @@ -1286,6 +1315,69 @@ func logCodexWebsocketDisconnected(sessionID string, authID string, wsURL string log.Infof("codex websockets: upstream disconnected session=%s auth=%s url=%s reason=%s", strings.TrimSpace(sessionID), strings.TrimSpace(authID), strings.TrimSpace(wsURL), strings.TrimSpace(reason)) } +// CloseCodexWebsocketSessionsForAuthID closes all active Codex upstream websocket sessions +// associated with the supplied auth ID. +func CloseCodexWebsocketSessionsForAuthID(authID string, reason string) { + authID = strings.TrimSpace(authID) + if authID == "" { + return + } + reason = strings.TrimSpace(reason) + if reason == "" { + reason = "auth_removed" + } + + store := globalCodexWebsocketSessionStore + if store == nil { + return + } + + type sessionItem struct { + sessionID string + sess *codexWebsocketSession + } + + store.mu.Lock() + items := make([]sessionItem, 0, len(store.sessions)) + for sessionID, sess := range store.sessions { + items = append(items, sessionItem{sessionID: sessionID, sess: sess}) + } + store.mu.Unlock() + + matches := make([]sessionItem, 0) + for i := range items { + sess := items[i].sess + if sess == nil { + continue + } + sess.connMu.Lock() + sessAuthID := strings.TrimSpace(sess.authID) + sess.connMu.Unlock() + if sessAuthID == authID { + matches = append(matches, items[i]) + } + } + if len(matches) == 0 { + return + } + + toClose := make([]*codexWebsocketSession, 0, len(matches)) + store.mu.Lock() + for i := range matches { + current, ok := store.sessions[matches[i].sessionID] + if !ok || current == nil || current != matches[i].sess { + continue + } + delete(store.sessions, matches[i].sessionID) + toClose = append(toClose, current) + } + store.mu.Unlock() + + for i := range toClose { + closeCodexWebsocketSession(toClose[i], reason) + } +} + // CodexAutoExecutor routes Codex requests to the websocket transport only when: // 1. The downstream transport is websocket, and // 2. The selected auth enables websockets. diff --git a/internal/runtime/executor/codex_websockets_executor_store_test.go b/internal/runtime/executor/codex_websockets_executor_store_test.go new file mode 100644 index 00000000..1a23fa31 --- /dev/null +++ b/internal/runtime/executor/codex_websockets_executor_store_test.go @@ -0,0 +1,48 @@ +package executor + +import ( + "testing" + + cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" +) + +func TestCodexWebsocketsExecutor_SessionStoreSurvivesExecutorReplacement(t *testing.T) { + sessionID := "test-session-store-survives-replace" + + globalCodexWebsocketSessionStore.mu.Lock() + delete(globalCodexWebsocketSessionStore.sessions, sessionID) + globalCodexWebsocketSessionStore.mu.Unlock() + + exec1 := NewCodexWebsocketsExecutor(nil) + sess1 := exec1.getOrCreateSession(sessionID) + if sess1 == nil { + t.Fatalf("expected session to be created") + } + + exec2 := NewCodexWebsocketsExecutor(nil) + sess2 := exec2.getOrCreateSession(sessionID) + if sess2 == nil { + t.Fatalf("expected session to be available across executors") + } + if sess1 != sess2 { + t.Fatalf("expected the same session instance across executors") + } + + exec1.CloseExecutionSession(cliproxyauth.CloseAllExecutionSessionsID) + + globalCodexWebsocketSessionStore.mu.Lock() + _, stillPresent := globalCodexWebsocketSessionStore.sessions[sessionID] + globalCodexWebsocketSessionStore.mu.Unlock() + if !stillPresent { + t.Fatalf("expected session to remain after executor replacement close marker") + } + + exec2.CloseExecutionSession(sessionID) + + globalCodexWebsocketSessionStore.mu.Lock() + _, presentAfterClose := globalCodexWebsocketSessionStore.sessions[sessionID] + globalCodexWebsocketSessionStore.mu.Unlock() + if presentAfterClose { + t.Fatalf("expected session to be removed after explicit close") + } +} diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go index ffbd7289..3103554a 100644 --- a/sdk/cliproxy/service.go +++ b/sdk/cliproxy/service.go @@ -335,6 +335,7 @@ func (s *Service) applyCoreAuthRemoval(ctx context.Context, id string) { log.Errorf("failed to disable auth %s: %v", id, err) } if strings.EqualFold(strings.TrimSpace(existing.Provider), "codex") { + executor.CloseCodexWebsocketSessionsForAuthID(existing.ID, "auth_removed") s.ensureExecutorsForAuth(existing) } } From d1c07a091eb5641d97ace2521def4dc475ff9345 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Wed, 1 Apr 2026 17:16:49 +0800 Subject: [PATCH 33/42] fix(openai): add websocket tool call repair with caching and tests to improve transcript consistency --- .../openai/openai_responses_websocket.go | 101 +++++- .../openai/openai_responses_websocket_test.go | 126 +++++++ ...nai_responses_websocket_toolcall_repair.go | 327 ++++++++++++++++++ 3 files changed, 547 insertions(+), 7 deletions(-) create mode 100644 sdk/api/handlers/openai/openai_responses_websocket_toolcall_repair.go diff --git a/sdk/api/handlers/openai/openai_responses_websocket.go b/sdk/api/handlers/openai/openai_responses_websocket.go index 591552ae..b8076601 100644 --- a/sdk/api/handlers/openai/openai_responses_websocket.go +++ b/sdk/api/handlers/openai/openai_responses_websocket.go @@ -33,6 +33,8 @@ const ( wsDoneMarker = "[DONE]" wsTurnStateHeader = "x-codex-turn-state" wsRequestBodyKey = "REQUEST_BODY_OVERRIDE" + wsBodyLogMaxSize = 32 * 1024 + wsBodyLogTruncated = "\n...[truncated]\n" ) var responsesWebsocketUpgrader = websocket.Upgrader{ @@ -52,6 +54,7 @@ func (h *OpenAIResponsesAPIHandler) ResponsesWebsocket(c *gin.Context) { return } passthroughSessionID := uuid.NewString() + downstreamSessionKey := websocketDownstreamSessionKey(c.Request) clientRemoteAddr := "" if c != nil && c.Request != nil { clientRemoteAddr = strings.TrimSpace(c.Request.RemoteAddr) @@ -164,6 +167,9 @@ func (h *OpenAIResponsesAPIHandler) ResponsesWebsocket(c *gin.Context) { } continue } + + requestJSON = repairResponsesWebsocketToolCalls(downstreamSessionKey, requestJSON) + updatedLastRequest = bytes.Clone(requestJSON) lastRequest = updatedLastRequest modelName := gjson.GetBytes(requestJSON, "model").String() @@ -324,6 +330,10 @@ func normalizeResponseSubsequentRequest(rawJSON []byte, lastRequest []byte, last Error: fmt.Errorf("invalid request input: %w", errMerge), } } + dedupedInput, errDedupeFunctionCalls := dedupeFunctionCallsByCallID(mergedInput) + if errDedupeFunctionCalls == nil { + mergedInput = dedupedInput + } normalized, errDelete := sjson.DeleteBytes(rawJSON, "type") if errDelete != nil { @@ -355,7 +365,8 @@ func normalizeResponseSubsequentRequest(rawJSON []byte, lastRequest []byte, last } func shouldReplaceWebsocketTranscript(rawJSON []byte, nextInput gjson.Result) bool { - if strings.TrimSpace(gjson.GetBytes(rawJSON, "type").String()) != wsRequestTypeCreate { + requestType := strings.TrimSpace(gjson.GetBytes(rawJSON, "type").String()) + if requestType != wsRequestTypeCreate && requestType != wsRequestTypeAppend { return false } if strings.TrimSpace(gjson.GetBytes(rawJSON, "previous_response_id").String()) != "" { @@ -402,6 +413,42 @@ func normalizeResponseTranscriptReplacement(rawJSON []byte, lastRequest []byte) return bytes.Clone(normalized) } +func dedupeFunctionCallsByCallID(rawArray string) (string, error) { + rawArray = strings.TrimSpace(rawArray) + if rawArray == "" { + return "[]", nil + } + var items []json.RawMessage + if errUnmarshal := json.Unmarshal([]byte(rawArray), &items); errUnmarshal != nil { + return "", errUnmarshal + } + + seenCallIDs := make(map[string]struct{}, len(items)) + filtered := make([]json.RawMessage, 0, len(items)) + for _, item := range items { + if len(item) == 0 { + continue + } + itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String()) + if itemType == "function_call" { + callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String()) + if callID != "" { + if _, ok := seenCallIDs[callID]; ok { + continue + } + seenCallIDs[callID] = struct{}{} + } + } + filtered = append(filtered, item) + } + + out, errMarshal := json.Marshal(filtered) + if errMarshal != nil { + return "", errMarshal + } + return string(out), nil +} + func websocketUpstreamSupportsIncrementalInput(attributes map[string]string, metadata map[string]any) bool { if len(attributes) > 0 { if raw := strings.TrimSpace(attributes["websockets"]); raw != "" { @@ -667,6 +714,10 @@ func (h *OpenAIResponsesAPIHandler) forwardResponsesWebsocket( ) ([]byte, error) { completed := false completedOutput := []byte("[]") + downstreamSessionKey := "" + if c != nil && c.Request != nil { + downstreamSessionKey = websocketDownstreamSessionKey(c.Request) + } for { select { @@ -744,6 +795,7 @@ func (h *OpenAIResponsesAPIHandler) forwardResponsesWebsocket( payloads := websocketJSONPayloadsFromChunk(chunk) for i := range payloads { + recordResponsesWebsocketToolCallsFromPayload(downstreamSessionKey, payloads[i]) eventType := gjson.GetBytes(payloads[i], "type").String() if eventType == wsEventTypeCompleted { completed = true @@ -891,18 +943,53 @@ func appendWebsocketEvent(builder *strings.Builder, eventType string, payload [] if builder == nil { return } + if builder.Len() >= wsBodyLogMaxSize { + return + } trimmedPayload := bytes.TrimSpace(payload) if len(trimmedPayload) == 0 { return } + + separator := []byte{} if builder.Len() > 0 { - builder.WriteString("\n") + separator = []byte("\n") } - builder.WriteString("websocket.") - builder.WriteString(eventType) - builder.WriteString("\n") - builder.Write(trimmedPayload) - builder.WriteString("\n") + header := []byte("websocket." + eventType + "\n") + footer := []byte("\n") + entryLen := len(separator) + len(header) + len(trimmedPayload) + len(footer) + remaining := wsBodyLogMaxSize - builder.Len() + + if entryLen <= remaining { + builder.Write(separator) + builder.Write(header) + builder.Write(trimmedPayload) + builder.Write(footer) + return + } + + marker := []byte(wsBodyLogTruncated) + if len(marker) > remaining { + builder.Write(marker[:remaining]) + return + } + + allowed := remaining - len(marker) + parts := [][]byte{separator, header, trimmedPayload, footer} + for _, part := range parts { + if allowed <= 0 { + break + } + if len(part) <= allowed { + builder.Write(part) + allowed -= len(part) + continue + } + builder.Write(part[:allowed]) + allowed = 0 + break + } + builder.Write(marker) } func websocketPayloadEventType(payload []byte) string { diff --git a/sdk/api/handlers/openai/openai_responses_websocket_test.go b/sdk/api/handlers/openai/openai_responses_websocket_test.go index 5619e6b1..9e2a1ed6 100644 --- a/sdk/api/handlers/openai/openai_responses_websocket_test.go +++ b/sdk/api/handlers/openai/openai_responses_websocket_test.go @@ -10,6 +10,7 @@ import ( "strings" "sync" "testing" + "time" "github.com/gin-gonic/gin" "github.com/gorilla/websocket" @@ -442,6 +443,108 @@ func TestSetWebsocketRequestBody(t *testing.T) { } } +func TestRepairResponsesWebsocketToolCallsInsertsCachedOutput(t *testing.T) { + cache := newWebsocketToolOutputCache(time.Minute, 10) + sessionKey := "session-1" + + cacheWarm := []byte(`{"previous_response_id":"resp-1","input":[{"type":"function_call_output","call_id":"call-1","output":"ok"}]}`) + warmed := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, cacheWarm) + if gjson.GetBytes(warmed, "input.0.call_id").String() != "call-1" { + t.Fatalf("expected warmup output to remain") + } + + raw := []byte(`{"input":[{"type":"function_call","call_id":"call-1","name":"tool"},{"type":"message","id":"msg-1"}]}`) + repaired := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, raw) + + input := gjson.GetBytes(repaired, "input").Array() + if len(input) != 3 { + t.Fatalf("repaired input len = %d, want 3", len(input)) + } + if input[0].Get("type").String() != "function_call" || input[0].Get("call_id").String() != "call-1" { + t.Fatalf("unexpected first item: %s", input[0].Raw) + } + if input[1].Get("type").String() != "function_call_output" || input[1].Get("call_id").String() != "call-1" { + t.Fatalf("missing inserted output: %s", input[1].Raw) + } + if input[2].Get("type").String() != "message" || input[2].Get("id").String() != "msg-1" { + t.Fatalf("unexpected trailing item: %s", input[2].Raw) + } +} + +func TestRepairResponsesWebsocketToolCallsDropsOrphanFunctionCall(t *testing.T) { + cache := newWebsocketToolOutputCache(time.Minute, 10) + sessionKey := "session-1" + + raw := []byte(`{"input":[{"type":"function_call","call_id":"call-1","name":"tool"},{"type":"message","id":"msg-1"}]}`) + repaired := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, raw) + + input := gjson.GetBytes(repaired, "input").Array() + if len(input) != 1 { + t.Fatalf("repaired input len = %d, want 1", len(input)) + } + if input[0].Get("type").String() != "message" || input[0].Get("id").String() != "msg-1" { + t.Fatalf("unexpected remaining item: %s", input[0].Raw) + } +} + +func TestRepairResponsesWebsocketToolCallsInsertsCachedCallForOrphanOutput(t *testing.T) { + outputCache := newWebsocketToolOutputCache(time.Minute, 10) + callCache := newWebsocketToolOutputCache(time.Minute, 10) + sessionKey := "session-1" + + callCache.record(sessionKey, "call-1", []byte(`{"type":"function_call","call_id":"call-1","name":"tool"}`)) + + raw := []byte(`{"input":[{"type":"function_call_output","call_id":"call-1","output":"ok"},{"type":"message","id":"msg-1"}]}`) + repaired := repairResponsesWebsocketToolCallsWithCaches(outputCache, callCache, sessionKey, raw) + + input := gjson.GetBytes(repaired, "input").Array() + if len(input) != 3 { + t.Fatalf("repaired input len = %d, want 3", len(input)) + } + if input[0].Get("type").String() != "function_call" || input[0].Get("call_id").String() != "call-1" { + t.Fatalf("missing inserted call: %s", input[0].Raw) + } + if input[1].Get("type").String() != "function_call_output" || input[1].Get("call_id").String() != "call-1" { + t.Fatalf("unexpected output item: %s", input[1].Raw) + } + if input[2].Get("type").String() != "message" || input[2].Get("id").String() != "msg-1" { + t.Fatalf("unexpected trailing item: %s", input[2].Raw) + } +} + +func TestRepairResponsesWebsocketToolCallsDropsOrphanOutputWhenCallMissing(t *testing.T) { + outputCache := newWebsocketToolOutputCache(time.Minute, 10) + callCache := newWebsocketToolOutputCache(time.Minute, 10) + sessionKey := "session-1" + + raw := []byte(`{"input":[{"type":"function_call_output","call_id":"call-1","output":"ok"},{"type":"message","id":"msg-1"}]}`) + repaired := repairResponsesWebsocketToolCallsWithCaches(outputCache, callCache, sessionKey, raw) + + input := gjson.GetBytes(repaired, "input").Array() + if len(input) != 1 { + t.Fatalf("repaired input len = %d, want 1", len(input)) + } + if input[0].Get("type").String() != "message" || input[0].Get("id").String() != "msg-1" { + t.Fatalf("unexpected remaining item: %s", input[0].Raw) + } +} + +func TestRecordResponsesWebsocketToolCallsFromPayloadWithCache(t *testing.T) { + cache := newWebsocketToolOutputCache(time.Minute, 10) + sessionKey := "session-1" + + payload := []byte(`{"type":"response.completed","response":{"id":"resp-1","output":[{"type":"function_call","id":"fc-1","call_id":"call-1","name":"tool","arguments":"{}"}]}}`) + recordResponsesWebsocketToolCallsFromPayloadWithCache(cache, sessionKey, payload) + + cached, ok := cache.get(sessionKey, "call-1") + if !ok { + t.Fatalf("expected cached tool call") + } + if gjson.GetBytes(cached, "type").String() != "function_call" || gjson.GetBytes(cached, "call_id").String() != "call-1" { + t.Fatalf("unexpected cached tool call: %s", cached) + } +} + func TestForwardResponsesWebsocketPreservesCompletedEvent(t *testing.T) { gin.SetMode(gin.TestMode) @@ -767,6 +870,29 @@ func TestNormalizeResponsesWebsocketRequestDoesNotTreatDeveloperMessageAsReplace } } +func TestNormalizeResponsesWebsocketRequestDropsDuplicateFunctionCallsByCallID(t *testing.T) { + lastRequest := []byte(`{"model":"test-model","stream":true,"input":[{"type":"function_call","id":"fc-1","call_id":"call-1"},{"type":"function_call_output","id":"tool-out-1","call_id":"call-1"}]}`) + lastResponseOutput := []byte(`[ + {"type":"function_call","id":"fc-1","call_id":"call-1","name":"tool"} + ]`) + raw := []byte(`{"type":"response.create","input":[{"type":"message","id":"msg-2"}]}`) + + normalized, _, errMsg := normalizeResponsesWebsocketRequest(raw, lastRequest, lastResponseOutput) + if errMsg != nil { + t.Fatalf("unexpected error: %v", errMsg.Error) + } + + items := gjson.GetBytes(normalized, "input").Array() + if len(items) != 3 { + t.Fatalf("merged input len = %d, want 3: %s", len(items), normalized) + } + if items[0].Get("id").String() != "fc-1" || + items[1].Get("id").String() != "tool-out-1" || + items[2].Get("id").String() != "msg-2" { + t.Fatalf("unexpected merged input order: %s", normalized) + } +} + func TestResponsesWebsocketCompactionResetsTurnStateOnTranscriptReplacement(t *testing.T) { gin.SetMode(gin.TestMode) diff --git a/sdk/api/handlers/openai/openai_responses_websocket_toolcall_repair.go b/sdk/api/handlers/openai/openai_responses_websocket_toolcall_repair.go new file mode 100644 index 00000000..8333bce6 --- /dev/null +++ b/sdk/api/handlers/openai/openai_responses_websocket_toolcall_repair.go @@ -0,0 +1,327 @@ +package openai + +import ( + "encoding/json" + "net/http" + "strings" + "sync" + "time" + + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +const ( + websocketToolOutputCacheMaxPerSession = 256 + websocketToolOutputCacheTTL = 30 * time.Minute +) + +var defaultWebsocketToolOutputCache = newWebsocketToolOutputCache(websocketToolOutputCacheTTL, websocketToolOutputCacheMaxPerSession) +var defaultWebsocketToolCallCache = newWebsocketToolOutputCache(websocketToolOutputCacheTTL, websocketToolOutputCacheMaxPerSession) + +type websocketToolOutputCache struct { + mu sync.Mutex + ttl time.Duration + maxPerSession int + sessions map[string]*websocketToolOutputSession +} + +type websocketToolOutputSession struct { + lastSeen time.Time + outputs map[string]json.RawMessage + order []string +} + +func newWebsocketToolOutputCache(ttl time.Duration, maxPerSession int) *websocketToolOutputCache { + if ttl <= 0 { + ttl = websocketToolOutputCacheTTL + } + if maxPerSession <= 0 { + maxPerSession = websocketToolOutputCacheMaxPerSession + } + return &websocketToolOutputCache{ + ttl: ttl, + maxPerSession: maxPerSession, + sessions: make(map[string]*websocketToolOutputSession), + } +} + +func (c *websocketToolOutputCache) record(sessionKey string, callID string, item json.RawMessage) { + sessionKey = strings.TrimSpace(sessionKey) + callID = strings.TrimSpace(callID) + if sessionKey == "" || callID == "" || c == nil { + return + } + + now := time.Now() + c.mu.Lock() + defer c.mu.Unlock() + + c.cleanupLocked(now) + + session, ok := c.sessions[sessionKey] + if !ok || session == nil { + session = &websocketToolOutputSession{ + lastSeen: now, + outputs: make(map[string]json.RawMessage), + } + c.sessions[sessionKey] = session + } + session.lastSeen = now + + if _, exists := session.outputs[callID]; !exists { + session.order = append(session.order, callID) + } + session.outputs[callID] = append(json.RawMessage(nil), item...) + + for len(session.order) > c.maxPerSession { + evict := session.order[0] + session.order = session.order[1:] + delete(session.outputs, evict) + } +} + +func (c *websocketToolOutputCache) get(sessionKey string, callID string) (json.RawMessage, bool) { + sessionKey = strings.TrimSpace(sessionKey) + callID = strings.TrimSpace(callID) + if sessionKey == "" || callID == "" || c == nil { + return nil, false + } + + now := time.Now() + c.mu.Lock() + defer c.mu.Unlock() + + c.cleanupLocked(now) + + session, ok := c.sessions[sessionKey] + if !ok || session == nil { + return nil, false + } + session.lastSeen = now + item, ok := session.outputs[callID] + if !ok || len(item) == 0 { + return nil, false + } + return append(json.RawMessage(nil), item...), true +} + +func (c *websocketToolOutputCache) cleanupLocked(now time.Time) { + if c == nil || c.ttl <= 0 { + return + } + + for key, session := range c.sessions { + if session == nil { + delete(c.sessions, key) + continue + } + if now.Sub(session.lastSeen) > c.ttl { + delete(c.sessions, key) + } + } +} + +func websocketDownstreamSessionKey(req *http.Request) string { + if req == nil { + return "" + } + if sessionID := strings.TrimSpace(req.Header.Get("Session_id")); sessionID != "" { + return sessionID + } + if requestID := strings.TrimSpace(req.Header.Get("X-Client-Request-Id")); requestID != "" { + return requestID + } + if raw := strings.TrimSpace(req.Header.Get("X-Codex-Turn-Metadata")); raw != "" { + if sessionID := strings.TrimSpace(gjson.Get(raw, "session_id").String()); sessionID != "" { + return sessionID + } + } + return "" +} + +func repairResponsesWebsocketToolCalls(sessionKey string, payload []byte) []byte { + return repairResponsesWebsocketToolCallsWithCaches(defaultWebsocketToolOutputCache, defaultWebsocketToolCallCache, sessionKey, payload) +} + +func repairResponsesWebsocketToolCallsWithCache(cache *websocketToolOutputCache, sessionKey string, payload []byte) []byte { + return repairResponsesWebsocketToolCallsWithCaches(cache, nil, sessionKey, payload) +} + +func repairResponsesWebsocketToolCallsWithCaches(outputCache, callCache *websocketToolOutputCache, sessionKey string, payload []byte) []byte { + sessionKey = strings.TrimSpace(sessionKey) + if sessionKey == "" || outputCache == nil || len(payload) == 0 { + return payload + } + + input := gjson.GetBytes(payload, "input") + if !input.Exists() || !input.IsArray() { + return payload + } + + allowOrphanOutputs := strings.TrimSpace(gjson.GetBytes(payload, "previous_response_id").String()) != "" + updatedRaw, errRepair := repairResponsesToolCallsArray(outputCache, callCache, sessionKey, input.Raw, allowOrphanOutputs) + if errRepair != nil || updatedRaw == "" || updatedRaw == input.Raw { + return payload + } + + updated, errSet := sjson.SetRawBytes(payload, "input", []byte(updatedRaw)) + if errSet != nil { + return payload + } + return updated +} + +func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCache, sessionKey string, rawArray string, allowOrphanOutputs bool) (string, error) { + rawArray = strings.TrimSpace(rawArray) + if rawArray == "" { + return "[]", nil + } + + var items []json.RawMessage + if errUnmarshal := json.Unmarshal([]byte(rawArray), &items); errUnmarshal != nil { + return "", errUnmarshal + } + + // First pass: record tool outputs and remember which call_ids have outputs in this payload. + outputPresent := make(map[string]struct{}, len(items)) + callPresent := make(map[string]struct{}, len(items)) + for _, item := range items { + if len(item) == 0 { + continue + } + itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String()) + switch itemType { + case "function_call_output": + callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String()) + if callID == "" { + continue + } + outputPresent[callID] = struct{}{} + outputCache.record(sessionKey, callID, item) + case "function_call": + callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String()) + if callID == "" { + continue + } + callPresent[callID] = struct{}{} + if callCache != nil { + callCache.record(sessionKey, callID, item) + } + } + } + + filtered := make([]json.RawMessage, 0, len(items)) + insertedCalls := make(map[string]struct{}, len(items)) + for _, item := range items { + if len(item) == 0 { + continue + } + itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String()) + if itemType == "function_call_output" { + callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String()) + if callID == "" { + // Upstream rejects tool outputs without a call_id; drop it. + continue + } + + if allowOrphanOutputs { + filtered = append(filtered, item) + continue + } + + if _, ok := callPresent[callID]; ok { + filtered = append(filtered, item) + continue + } + + if callCache != nil { + if cached, ok := callCache.get(sessionKey, callID); ok { + if _, already := insertedCalls[callID]; !already { + filtered = append(filtered, cached) + insertedCalls[callID] = struct{}{} + callPresent[callID] = struct{}{} + } + filtered = append(filtered, item) + continue + } + } + + // Drop orphaned function_call_output items; upstream rejects transcripts with missing calls. + continue + } + if itemType != "function_call" { + filtered = append(filtered, item) + continue + } + + callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String()) + if callID == "" { + // Upstream rejects tool calls without a call_id; drop it. + continue + } + + if _, ok := outputPresent[callID]; ok { + filtered = append(filtered, item) + continue + } + + if cached, ok := outputCache.get(sessionKey, callID); ok { + filtered = append(filtered, item) + filtered = append(filtered, cached) + outputPresent[callID] = struct{}{} + continue + } + + // Drop orphaned function_call items; upstream rejects transcripts with missing outputs. + } + + out, errMarshal := json.Marshal(filtered) + if errMarshal != nil { + return "", errMarshal + } + return string(out), nil +} + +func recordResponsesWebsocketToolCallsFromPayload(sessionKey string, payload []byte) { + recordResponsesWebsocketToolCallsFromPayloadWithCache(defaultWebsocketToolCallCache, sessionKey, payload) +} + +func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolOutputCache, sessionKey string, payload []byte) { + sessionKey = strings.TrimSpace(sessionKey) + if sessionKey == "" || cache == nil || len(payload) == 0 { + return + } + + eventType := strings.TrimSpace(gjson.GetBytes(payload, "type").String()) + switch eventType { + case "response.completed": + output := gjson.GetBytes(payload, "response.output") + if !output.Exists() || !output.IsArray() { + return + } + for _, item := range output.Array() { + if strings.TrimSpace(item.Get("type").String()) != "function_call" { + continue + } + callID := strings.TrimSpace(item.Get("call_id").String()) + if callID == "" { + continue + } + cache.record(sessionKey, callID, json.RawMessage(item.Raw)) + } + case "response.output_item.added", "response.output_item.done": + item := gjson.GetBytes(payload, "item") + if !item.Exists() || !item.IsObject() { + return + } + if strings.TrimSpace(item.Get("type").String()) != "function_call" { + return + } + callID := strings.TrimSpace(item.Get("call_id").String()) + if callID == "" { + return + } + cache.record(sessionKey, callID, json.RawMessage(item.Raw)) + } +} From acf98ed10e9bcf39c332bf79098f8a4d87d4d1d8 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Wed, 1 Apr 2026 17:28:50 +0800 Subject: [PATCH 34/42] fix(openai): add session reference counter and cache lifecycle management for websocket tools --- .../openai/openai_responses_websocket.go | 2 + ...nai_responses_websocket_toolcall_repair.go | 87 +++++++++++++++++-- 2 files changed, 83 insertions(+), 6 deletions(-) diff --git a/sdk/api/handlers/openai/openai_responses_websocket.go b/sdk/api/handlers/openai/openai_responses_websocket.go index b8076601..6c43e931 100644 --- a/sdk/api/handlers/openai/openai_responses_websocket.go +++ b/sdk/api/handlers/openai/openai_responses_websocket.go @@ -55,6 +55,7 @@ func (h *OpenAIResponsesAPIHandler) ResponsesWebsocket(c *gin.Context) { } passthroughSessionID := uuid.NewString() downstreamSessionKey := websocketDownstreamSessionKey(c.Request) + retainResponsesWebsocketToolCaches(downstreamSessionKey) clientRemoteAddr := "" if c != nil && c.Request != nil { clientRemoteAddr = strings.TrimSpace(c.Request.RemoteAddr) @@ -63,6 +64,7 @@ func (h *OpenAIResponsesAPIHandler) ResponsesWebsocket(c *gin.Context) { var wsTerminateErr error var wsBodyLog strings.Builder defer func() { + releaseResponsesWebsocketToolCaches(downstreamSessionKey) if wsTerminateErr != nil { // log.Infof("responses websocket: session closing id=%s reason=%v", passthroughSessionID, wsTerminateErr) } else { diff --git a/sdk/api/handlers/openai/openai_responses_websocket_toolcall_repair.go b/sdk/api/handlers/openai/openai_responses_websocket_toolcall_repair.go index 8333bce6..530aca96 100644 --- a/sdk/api/handlers/openai/openai_responses_websocket_toolcall_repair.go +++ b/sdk/api/handlers/openai/openai_responses_websocket_toolcall_repair.go @@ -16,8 +16,9 @@ const ( websocketToolOutputCacheTTL = 30 * time.Minute ) -var defaultWebsocketToolOutputCache = newWebsocketToolOutputCache(websocketToolOutputCacheTTL, websocketToolOutputCacheMaxPerSession) -var defaultWebsocketToolCallCache = newWebsocketToolOutputCache(websocketToolOutputCacheTTL, websocketToolOutputCacheMaxPerSession) +var defaultWebsocketToolOutputCache = newWebsocketToolOutputCache(0, websocketToolOutputCacheMaxPerSession) +var defaultWebsocketToolCallCache = newWebsocketToolOutputCache(0, websocketToolOutputCacheMaxPerSession) +var defaultWebsocketToolSessionRefs = newWebsocketToolSessionRefCounter() type websocketToolOutputCache struct { mu sync.Mutex @@ -33,7 +34,7 @@ type websocketToolOutputSession struct { } func newWebsocketToolOutputCache(ttl time.Duration, maxPerSession int) *websocketToolOutputCache { - if ttl <= 0 { + if ttl < 0 { ttl = websocketToolOutputCacheTTL } if maxPerSession <= 0 { @@ -122,13 +123,22 @@ func (c *websocketToolOutputCache) cleanupLocked(now time.Time) { } } +func (c *websocketToolOutputCache) deleteSession(sessionKey string) { + sessionKey = strings.TrimSpace(sessionKey) + if sessionKey == "" || c == nil { + return + } + + c.mu.Lock() + defer c.mu.Unlock() + + delete(c.sessions, sessionKey) +} + func websocketDownstreamSessionKey(req *http.Request) string { if req == nil { return "" } - if sessionID := strings.TrimSpace(req.Header.Get("Session_id")); sessionID != "" { - return sessionID - } if requestID := strings.TrimSpace(req.Header.Get("X-Client-Request-Id")); requestID != "" { return requestID } @@ -137,9 +147,74 @@ func websocketDownstreamSessionKey(req *http.Request) string { return sessionID } } + if sessionID := strings.TrimSpace(req.Header.Get("Session_id")); sessionID != "" { + return sessionID + } return "" } +type websocketToolSessionRefCounter struct { + mu sync.Mutex + counts map[string]int +} + +func newWebsocketToolSessionRefCounter() *websocketToolSessionRefCounter { + return &websocketToolSessionRefCounter{counts: make(map[string]int)} +} + +func (c *websocketToolSessionRefCounter) acquire(sessionKey string) { + sessionKey = strings.TrimSpace(sessionKey) + if sessionKey == "" || c == nil { + return + } + + c.mu.Lock() + defer c.mu.Unlock() + + c.counts[sessionKey]++ +} + +func (c *websocketToolSessionRefCounter) release(sessionKey string) bool { + sessionKey = strings.TrimSpace(sessionKey) + if sessionKey == "" || c == nil { + return false + } + + c.mu.Lock() + defer c.mu.Unlock() + + count := c.counts[sessionKey] + if count <= 1 { + delete(c.counts, sessionKey) + return true + } + c.counts[sessionKey] = count - 1 + return false +} + +func retainResponsesWebsocketToolCaches(sessionKey string) { + if defaultWebsocketToolSessionRefs == nil { + return + } + defaultWebsocketToolSessionRefs.acquire(sessionKey) +} + +func releaseResponsesWebsocketToolCaches(sessionKey string) { + if defaultWebsocketToolSessionRefs == nil { + return + } + if !defaultWebsocketToolSessionRefs.release(sessionKey) { + return + } + + if defaultWebsocketToolOutputCache != nil { + defaultWebsocketToolOutputCache.deleteSession(sessionKey) + } + if defaultWebsocketToolCallCache != nil { + defaultWebsocketToolCallCache.deleteSession(sessionKey) + } +} + func repairResponsesWebsocketToolCalls(sessionKey string, payload []byte) []byte { return repairResponsesWebsocketToolCallsWithCaches(defaultWebsocketToolOutputCache, defaultWebsocketToolCallCache, sessionKey, payload) } From 51a4379bf4b14a10445c642bec33be566c8b18e7 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Wed, 1 Apr 2026 18:11:43 +0800 Subject: [PATCH 35/42] refactor(openai): remove websocket body log truncation limit --- .../openai/openai_responses_websocket.go | 49 +++---------------- .../openai/openai_responses_websocket_test.go | 27 ---------- 2 files changed, 6 insertions(+), 70 deletions(-) diff --git a/sdk/api/handlers/openai/openai_responses_websocket.go b/sdk/api/handlers/openai/openai_responses_websocket.go index 6c43e931..9f065efd 100644 --- a/sdk/api/handlers/openai/openai_responses_websocket.go +++ b/sdk/api/handlers/openai/openai_responses_websocket.go @@ -33,8 +33,6 @@ const ( wsDoneMarker = "[DONE]" wsTurnStateHeader = "x-codex-turn-state" wsRequestBodyKey = "REQUEST_BODY_OVERRIDE" - wsBodyLogMaxSize = 32 * 1024 - wsBodyLogTruncated = "\n...[truncated]\n" ) var responsesWebsocketUpgrader = websocket.Upgrader{ @@ -945,53 +943,18 @@ func appendWebsocketEvent(builder *strings.Builder, eventType string, payload [] if builder == nil { return } - if builder.Len() >= wsBodyLogMaxSize { - return - } trimmedPayload := bytes.TrimSpace(payload) if len(trimmedPayload) == 0 { return } - - separator := []byte{} if builder.Len() > 0 { - separator = []byte("\n") + builder.WriteString("\n") } - header := []byte("websocket." + eventType + "\n") - footer := []byte("\n") - entryLen := len(separator) + len(header) + len(trimmedPayload) + len(footer) - remaining := wsBodyLogMaxSize - builder.Len() - - if entryLen <= remaining { - builder.Write(separator) - builder.Write(header) - builder.Write(trimmedPayload) - builder.Write(footer) - return - } - - marker := []byte(wsBodyLogTruncated) - if len(marker) > remaining { - builder.Write(marker[:remaining]) - return - } - - allowed := remaining - len(marker) - parts := [][]byte{separator, header, trimmedPayload, footer} - for _, part := range parts { - if allowed <= 0 { - break - } - if len(part) <= allowed { - builder.Write(part) - allowed -= len(part) - continue - } - builder.Write(part[:allowed]) - allowed = 0 - break - } - builder.Write(marker) + builder.WriteString("websocket.") + builder.WriteString(eventType) + builder.WriteString("\n") + builder.Write(trimmedPayload) + builder.WriteString("\n") } func websocketPayloadEventType(payload []byte) string { diff --git a/sdk/api/handlers/openai/openai_responses_websocket_test.go b/sdk/api/handlers/openai/openai_responses_websocket_test.go index 9e2a1ed6..157d6e2f 100644 --- a/sdk/api/handlers/openai/openai_responses_websocket_test.go +++ b/sdk/api/handlers/openai/openai_responses_websocket_test.go @@ -392,33 +392,6 @@ func TestAppendWebsocketEvent(t *testing.T) { } } -func TestAppendWebsocketEventTruncatesAtLimit(t *testing.T) { - var builder strings.Builder - payload := bytes.Repeat([]byte("x"), wsBodyLogMaxSize) - - appendWebsocketEvent(&builder, "request", payload) - - got := builder.String() - if len(got) > wsBodyLogMaxSize { - t.Fatalf("body log len = %d, want <= %d", len(got), wsBodyLogMaxSize) - } - if !strings.Contains(got, wsBodyLogTruncated) { - t.Fatalf("expected truncation marker in body log") - } -} - -func TestAppendWebsocketEventNoGrowthAfterLimit(t *testing.T) { - var builder strings.Builder - appendWebsocketEvent(&builder, "request", bytes.Repeat([]byte("x"), wsBodyLogMaxSize)) - initial := builder.String() - - appendWebsocketEvent(&builder, "response", []byte(`{"type":"response.completed"}`)) - - if builder.String() != initial { - t.Fatalf("builder grew after reaching limit") - } -} - func TestSetWebsocketRequestBody(t *testing.T) { gin.SetMode(gin.TestMode) recorder := httptest.NewRecorder() From caa529c282303b171c180b92a772a1a766d8fdbb Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Wed, 1 Apr 2026 20:16:01 +0800 Subject: [PATCH 36/42] fix(openai): improve client IP retrieval in websocket handler --- .../openai/openai_responses_websocket.go | 14 +++++++---- .../openai/openai_responses_websocket_test.go | 25 +++++++++++++++++++ 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/sdk/api/handlers/openai/openai_responses_websocket.go b/sdk/api/handlers/openai/openai_responses_websocket.go index 9f065efd..df46d971 100644 --- a/sdk/api/handlers/openai/openai_responses_websocket.go +++ b/sdk/api/handlers/openai/openai_responses_websocket.go @@ -54,11 +54,8 @@ func (h *OpenAIResponsesAPIHandler) ResponsesWebsocket(c *gin.Context) { passthroughSessionID := uuid.NewString() downstreamSessionKey := websocketDownstreamSessionKey(c.Request) retainResponsesWebsocketToolCaches(downstreamSessionKey) - clientRemoteAddr := "" - if c != nil && c.Request != nil { - clientRemoteAddr = strings.TrimSpace(c.Request.RemoteAddr) - } - log.Infof("responses websocket: client connected id=%s remote=%s", passthroughSessionID, clientRemoteAddr) + clientIP := websocketClientAddress(c) + log.Infof("responses websocket: client connected id=%s remote=%s", passthroughSessionID, clientIP) var wsTerminateErr error var wsBodyLog strings.Builder defer func() { @@ -206,6 +203,13 @@ func (h *OpenAIResponsesAPIHandler) ResponsesWebsocket(c *gin.Context) { } } +func websocketClientAddress(c *gin.Context) string { + if c == nil || c.Request == nil { + return "" + } + return strings.TrimSpace(c.ClientIP()) +} + func websocketUpgradeHeaders(req *http.Request) http.Header { headers := http.Header{} if req == nil { diff --git a/sdk/api/handlers/openai/openai_responses_websocket_test.go b/sdk/api/handlers/openai/openai_responses_websocket_test.go index 157d6e2f..773df18e 100644 --- a/sdk/api/handlers/openai/openai_responses_websocket_test.go +++ b/sdk/api/handlers/openai/openai_responses_websocket_test.go @@ -721,6 +721,31 @@ func TestResponsesWebsocketPrewarmHandledLocallyForSSEUpstream(t *testing.T) { } } +func TestWebsocketClientAddressUsesGinClientIP(t *testing.T) { + gin.SetMode(gin.TestMode) + + recorder := httptest.NewRecorder() + c, engine := gin.CreateTestContext(recorder) + if err := engine.SetTrustedProxies([]string{"0.0.0.0/0", "::/0"}); err != nil { + t.Fatalf("SetTrustedProxies: %v", err) + } + + req := httptest.NewRequest(http.MethodGet, "/v1/responses/ws", nil) + req.RemoteAddr = "172.18.0.1:34282" + req.Header.Set("X-Forwarded-For", "203.0.113.7") + c.Request = req + + if got := websocketClientAddress(c); got != strings.TrimSpace(c.ClientIP()) { + t.Fatalf("websocketClientAddress = %q, ClientIP = %q", got, c.ClientIP()) + } +} + +func TestWebsocketClientAddressReturnsEmptyForNilContext(t *testing.T) { + if got := websocketClientAddress(nil); got != "" { + t.Fatalf("websocketClientAddress(nil) = %q, want empty", got) + } +} + func TestResponsesWebsocketPinsOnlyWebsocketCapableAuth(t *testing.T) { gin.SetMode(gin.TestMode) From 37249339ac026b35fa708e9cd68fec948bf60e8d Mon Sep 17 00:00:00 2001 From: edlsh Date: Wed, 1 Apr 2026 13:03:17 -0400 Subject: [PATCH 37/42] feat: add opt-in experimental Claude cch signing --- config.example.yaml | 2 + go.mod | 1 + go.sum | 2 + internal/config/config.go | 9 +- internal/runtime/executor/claude_executor.go | 102 ++++---- .../runtime/executor/claude_executor_test.go | 230 ++++++++++++------ internal/runtime/executor/claude_signing.go | 64 +++++ 7 files changed, 277 insertions(+), 133 deletions(-) create mode 100644 internal/runtime/executor/claude_signing.go diff --git a/config.example.yaml b/config.example.yaml index 1b365d87..9bae2e05 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -172,6 +172,8 @@ nonstream-keepalive-interval: 0 # - "API" # - "proxy" # cache-user-id: true # optional: default is false; set true to reuse cached user_id per API key instead of generating a random one each request +# experimental-cch-signing: false # optional: default is false; when true, sign the final /v1/messages body using the current Claude Code cch algorithm +# # keep this disabled unless you explicitly need the behavior, so upstream seed changes fall back to legacy proxy behavior # Default headers for Claude API requests. Update when Claude Code releases new versions. # In legacy mode, user-agent/package-version/runtime-version/timeout are used as fallbacks diff --git a/go.mod b/go.mod index 34237de9..9213f736 100644 --- a/go.mod +++ b/go.mod @@ -81,6 +81,7 @@ require ( github.com/muesli/cancelreader v0.2.2 // indirect github.com/muesli/termenv v0.16.0 // indirect github.com/pelletier/go-toml/v2 v2.2.2 // indirect + github.com/pierrec/xxHash v0.1.5 // indirect github.com/pjbgf/sha1cd v0.5.0 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/rs/xid v1.5.0 // indirect diff --git a/go.sum b/go.sum index 3c424c5e..e811b012 100644 --- a/go.sum +++ b/go.sum @@ -152,6 +152,8 @@ github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk= github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= +github.com/pierrec/xxHash v0.1.5 h1:n/jBpwTHiER4xYvK3/CdPVnLDPchj8eTJFFLUb4QHBo= +github.com/pierrec/xxHash v0.1.5/go.mod h1:w2waW5Zoa/Wc4Yqe0wgrIYAGKqRMf7czn2HNKXmuL+I= github.com/pjbgf/sha1cd v0.5.0 h1:a+UkboSi1znleCDUNT3M5YxjOnN1fz2FhN48FlwCxs0= github.com/pjbgf/sha1cd v0.5.0/go.mod h1:lhpGlyHLpQZoxMv8HcgXvZEhcGs0PG/vsZnEJ7H0iCM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= diff --git a/internal/config/config.go b/internal/config/config.go index c4156e97..85627776 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -240,8 +240,8 @@ type AmpCode struct { UpstreamAPIKey string `yaml:"upstream-api-key" json:"upstream-api-key"` // UpstreamAPIKeys maps client API keys (from top-level api-keys) to upstream API keys. - // When a client authenticates with a key that matches an entry, that upstream key is used. - // If no match is found, falls back to UpstreamAPIKey (default behavior). + // When a request is authenticated with one of the APIKeys, the corresponding UpstreamAPIKey + // is used for the upstream Amp request. UpstreamAPIKeys []AmpUpstreamAPIKeyEntry `yaml:"upstream-api-keys,omitempty" json:"upstream-api-keys,omitempty"` // RestrictManagementToLocalhost restricts Amp management routes (/api/user, /api/threads, etc.) @@ -363,6 +363,11 @@ type ClaudeKey struct { // Cloak configures request cloaking for non-Claude-Code clients. Cloak *CloakConfig `yaml:"cloak,omitempty" json:"cloak,omitempty"` + + // ExperimentalCCHSigning enables opt-in final-body cch signing for cloaked + // Claude /v1/messages requests. It is disabled by default so upstream seed + // changes do not alter the proxy's legacy behavior. + ExperimentalCCHSigning bool `yaml:"experimental-cch-signing,omitempty" json:"experimental-cch-signing,omitempty"` } func (k ClaudeKey) GetAPIKey() string { return k.APIKey } diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index cc88dd77..fed21044 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -159,6 +159,9 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r if isClaudeOAuthToken(apiKey) && !auth.ToolPrefixDisabled() { bodyForUpstream = applyClaudeToolPrefix(body, claudeToolPrefix) } + if experimentalCCHSigningEnabled(e.cfg, auth) { + bodyForUpstream = signAnthropicMessagesBody(bodyForUpstream) + } url := fmt.Sprintf("%s/v1/messages?beta=true", baseURL) httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyForUpstream)) @@ -323,6 +326,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A if isClaudeOAuthToken(apiKey) && !auth.ToolPrefixDisabled() { bodyForUpstream = applyClaudeToolPrefix(body, claudeToolPrefix) } + if experimentalCCHSigningEnabled(e.cfg, auth) { + bodyForUpstream = signAnthropicMessagesBody(bodyForUpstream) + } url := fmt.Sprintf("%s/v1/messages?beta=true", baseURL) httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(bodyForUpstream)) @@ -900,7 +906,7 @@ func claudeCreds(a *cliproxyauth.Auth) (apiKey, baseURL string) { } func checkSystemInstructions(payload []byte) []byte { - return checkSystemInstructionsWithMode(payload, false) + return checkSystemInstructionsWithSigningMode(payload, false, false) } func isClaudeOAuthToken(apiKey string) bool { @@ -1122,35 +1128,6 @@ func getCloakConfigFromAuth(auth *cliproxyauth.Auth) (string, bool, []string, bo return cloakMode, strictMode, sensitiveWords, cacheUserID } -// resolveClaudeKeyCloakConfig finds the matching ClaudeKey config and returns its CloakConfig. -func resolveClaudeKeyCloakConfig(cfg *config.Config, auth *cliproxyauth.Auth) *config.CloakConfig { - if cfg == nil || auth == nil { - return nil - } - - apiKey, baseURL := claudeCreds(auth) - if apiKey == "" { - return nil - } - - for i := range cfg.ClaudeKey { - entry := &cfg.ClaudeKey[i] - cfgKey := strings.TrimSpace(entry.APIKey) - cfgBase := strings.TrimSpace(entry.BaseURL) - - // Match by API key - if strings.EqualFold(cfgKey, apiKey) { - // If baseURL is specified, also check it - if baseURL != "" && cfgBase != "" && !strings.EqualFold(cfgBase, baseURL) { - continue - } - return entry.Cloak - } - } - - return nil -} - // injectFakeUserID generates and injects a fake user ID into the request metadata. // When useCache is false, a new user ID is generated for every call. func injectFakeUserID(payload []byte, apiKey string, useCache bool) []byte { @@ -1177,29 +1154,36 @@ func injectFakeUserID(payload []byte, apiKey string, useCache bool) []byte { // generateBillingHeader creates the x-anthropic-billing-header text block that // real Claude Code prepends to every system prompt array. // Format: x-anthropic-billing-header: cc_version=.; cc_entrypoint=cli; cch=; -func generateBillingHeader(payload []byte) string { - // Generate a deterministic cch hash from the payload content (system + messages + tools). - // Real Claude Code uses a 5-char hex hash that varies per request. - h := sha256.Sum256(payload) - cch := hex.EncodeToString(h[:])[:5] - +func generateBillingHeader(payload []byte, experimentalCCHSigning bool) string { // Build hash: 3-char hex, matches the pattern seen in real requests (e.g. "a43") buildBytes := make([]byte, 2) _, _ = rand.Read(buildBytes) buildHash := hex.EncodeToString(buildBytes)[:3] + if experimentalCCHSigning { + return fmt.Sprintf("x-anthropic-billing-header: cc_version=2.1.63.%s; cc_entrypoint=cli; cch=00000;", buildHash) + } + + // Generate a deterministic cch hash from the payload content (system + messages + tools). + // Real Claude Code uses a 5-char hex hash that varies per request. + h := sha256.Sum256(payload) + cch := hex.EncodeToString(h[:])[:5] return fmt.Sprintf("x-anthropic-billing-header: cc_version=2.1.63.%s; cc_entrypoint=cli; cch=%s;", buildHash, cch) } -// checkSystemInstructionsWithMode injects Claude Code-style system blocks: +func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte { + return checkSystemInstructionsWithSigningMode(payload, strictMode, false) +} + +// checkSystemInstructionsWithSigningMode injects Claude Code-style system blocks: // // system[0]: billing header (no cache_control) // system[1]: agent identifier (no cache_control) // system[2..]: user system messages (cache_control added when missing) -func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte { +func checkSystemInstructionsWithSigningMode(payload []byte, strictMode bool, experimentalCCHSigning bool) []byte { system := gjson.GetBytes(payload, "system") - billingText := generateBillingHeader(payload) + billingText := generateBillingHeader(payload, experimentalCCHSigning) billingBlock := fmt.Sprintf(`{"type":"text","text":"%s"}`, billingText) // No cache_control on the agent block. It is a cloaking artifact with zero cache // value (the last system block is what actually triggers caching of all system content). @@ -1254,9 +1238,12 @@ func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte { // Cloaking includes: system prompt injection, fake user ID, and sensitive word obfuscation. func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, payload []byte, model string, apiKey string) []byte { clientUserAgent := getClientUserAgent(ctx) + useExperimentalCCHSigning := experimentalCCHSigningEnabled(cfg, auth) // Get cloak config from ClaudeKey configuration + cloakCfg := resolveClaudeKeyCloakConfig(cfg, auth) + attrMode, attrStrict, attrWords, attrCache := getCloakConfigFromAuth(auth) // Determine cloak settings var cloakMode string @@ -1265,29 +1252,24 @@ func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.A var cacheUserID bool if cloakCfg != nil { - cloakMode = cloakCfg.Mode - strictMode = cloakCfg.StrictMode - sensitiveWords = cloakCfg.SensitiveWords + cloakMode = strings.TrimSpace(cloakCfg.Mode) + if cloakMode == "" { + cloakMode = attrMode + strictMode = attrStrict + sensitiveWords = attrWords + } else { + strictMode = cloakCfg.StrictMode + sensitiveWords = cloakCfg.SensitiveWords + } if cloakCfg.CacheUserID != nil { cacheUserID = *cloakCfg.CacheUserID - } - } - - // Fallback to auth attributes if no config found - if cloakMode == "" { - attrMode, attrStrict, attrWords, attrCache := getCloakConfigFromAuth(auth) - cloakMode = attrMode - if !strictMode { - strictMode = attrStrict - } - if len(sensitiveWords) == 0 { - sensitiveWords = attrWords - } - if cloakCfg == nil || cloakCfg.CacheUserID == nil { + } else { cacheUserID = attrCache } - } else if cloakCfg == nil || cloakCfg.CacheUserID == nil { - _, _, _, attrCache := getCloakConfigFromAuth(auth) + } else { + cloakMode = attrMode + strictMode = attrStrict + sensitiveWords = attrWords cacheUserID = attrCache } @@ -1298,7 +1280,7 @@ func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.A // Skip system instructions for claude-3-5-haiku models if !strings.HasPrefix(model, "claude-3-5-haiku") { - payload = checkSystemInstructionsWithMode(payload, strictMode) + payload = checkSystemInstructionsWithSigningMode(payload, strictMode, useExperimentalCCHSigning) } // Inject fake user ID @@ -1317,7 +1299,7 @@ func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.A // According to Anthropic's documentation, cache prefixes are created in order: tools -> system -> messages. // This function adds cache_control to: // 1. The LAST tool in the tools array (caches all tool definitions) -// 2. The LAST element in the system array (caches system prompt) +// 2. The LAST system prompt element // 3. The SECOND-TO-LAST user turn (caches conversation history for multi-turn) // // Up to 4 cache breakpoints are allowed per request. Tools, System, and Messages are INDEPENDENT breakpoints. diff --git a/internal/runtime/executor/claude_executor_test.go b/internal/runtime/executor/claude_executor_test.go index ee8e9025..c15d41cf 100644 --- a/internal/runtime/executor/claude_executor_test.go +++ b/internal/runtime/executor/claude_executor_test.go @@ -4,9 +4,11 @@ import ( "bytes" "compress/gzip" "context" + "fmt" "io" "net/http" "net/http/httptest" + "regexp" "strings" "sync" "testing" @@ -14,6 +16,7 @@ import ( "github.com/gin-gonic/gin" "github.com/klauspost/compress/zstd" + xxHash64 "github.com/pierrec/xxHash/xxHash64" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" @@ -1418,6 +1421,35 @@ func TestDecodeResponseBody_MagicByteGzipNoHeader(t *testing.T) { } } +// TestDecodeResponseBody_MagicByteZstdNoHeader verifies that decodeResponseBody +// detects zstd-compressed content via magic bytes even when Content-Encoding is absent. +func TestDecodeResponseBody_MagicByteZstdNoHeader(t *testing.T) { + const plaintext = "data: {\"type\":\"message_stop\"}\n" + + var buf bytes.Buffer + enc, err := zstd.NewWriter(&buf) + if err != nil { + t.Fatalf("zstd.NewWriter: %v", err) + } + _, _ = enc.Write([]byte(plaintext)) + _ = enc.Close() + + rc := io.NopCloser(&buf) + decoded, err := decodeResponseBody(rc, "") + if err != nil { + t.Fatalf("decodeResponseBody error: %v", err) + } + defer decoded.Close() + + got, err := io.ReadAll(decoded) + if err != nil { + t.Fatalf("ReadAll error: %v", err) + } + if string(got) != plaintext { + t.Errorf("decoded = %q, want %q", got, plaintext) + } +} + // TestDecodeResponseBody_PlainTextNoHeader verifies that decodeResponseBody returns // plain text untouched when Content-Encoding is absent and no magic bytes match. func TestDecodeResponseBody_PlainTextNoHeader(t *testing.T) { @@ -1489,77 +1521,6 @@ func TestClaudeExecutor_ExecuteStream_GzipNoContentEncodingHeader(t *testing.T) } } -// TestClaudeExecutor_ExecuteStream_AcceptEncodingOverrideCannotBypassIdentity verifies -// that injecting Accept-Encoding via auth.Attributes cannot override the stream -// path's enforced identity encoding. -func TestClaudeExecutor_ExecuteStream_AcceptEncodingOverrideCannotBypassIdentity(t *testing.T) { - var gotEncoding string - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - gotEncoding = r.Header.Get("Accept-Encoding") - w.Header().Set("Content-Type", "text/event-stream") - _, _ = w.Write([]byte("data: {\"type\":\"message_stop\"}\n\n")) - })) - defer server.Close() - - executor := NewClaudeExecutor(&config.Config{}) - // Inject Accept-Encoding via the custom header attribute mechanism. - auth := &cliproxyauth.Auth{Attributes: map[string]string{ - "api_key": "key-123", - "base_url": server.URL, - "header:Accept-Encoding": "gzip, deflate, br, zstd", - }} - payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`) - - result, err := executor.ExecuteStream(context.Background(), auth, cliproxyexecutor.Request{ - Model: "claude-3-5-sonnet-20241022", - Payload: payload, - }, cliproxyexecutor.Options{ - SourceFormat: sdktranslator.FromString("claude"), - }) - if err != nil { - t.Fatalf("ExecuteStream error: %v", err) - } - for chunk := range result.Chunks { - if chunk.Err != nil { - t.Fatalf("unexpected chunk error: %v", chunk.Err) - } - } - - if gotEncoding != "identity" { - t.Errorf("Accept-Encoding = %q; stream path must enforce identity regardless of auth.Attributes override", gotEncoding) - } -} - -// TestDecodeResponseBody_MagicByteZstdNoHeader verifies that decodeResponseBody -// detects zstd-compressed content via magic bytes (28 b5 2f fd) even when -// Content-Encoding is absent. -func TestDecodeResponseBody_MagicByteZstdNoHeader(t *testing.T) { - const plaintext = "data: {\"type\":\"message_stop\"}\n" - - var buf bytes.Buffer - enc, err := zstd.NewWriter(&buf) - if err != nil { - t.Fatalf("zstd.NewWriter: %v", err) - } - _, _ = enc.Write([]byte(plaintext)) - _ = enc.Close() - - rc := io.NopCloser(&buf) - decoded, err := decodeResponseBody(rc, "") - if err != nil { - t.Fatalf("decodeResponseBody error: %v", err) - } - defer decoded.Close() - - got, err := io.ReadAll(decoded) - if err != nil { - t.Fatalf("ReadAll error: %v", err) - } - if string(got) != plaintext { - t.Errorf("decoded = %q, want %q", got, plaintext) - } -} - // TestClaudeExecutor_Execute_GzipErrorBodyNoContentEncodingHeader verifies that the // error path (4xx) correctly decompresses a gzip body even when the upstream omits // the Content-Encoding header. This closes the gap left by PR #1771, which only @@ -1643,6 +1604,45 @@ func TestClaudeExecutor_ExecuteStream_GzipErrorBodyNoContentEncodingHeader(t *te } } +// TestClaudeExecutor_ExecuteStream_AcceptEncodingOverrideCannotBypassIdentity verifies that the +// streaming executor enforces Accept-Encoding: identity regardless of auth.Attributes override. +func TestClaudeExecutor_ExecuteStream_AcceptEncodingOverrideCannotBypassIdentity(t *testing.T) { + var gotEncoding string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotEncoding = r.Header.Get("Accept-Encoding") + w.Header().Set("Content-Type", "text/event-stream") + _, _ = w.Write([]byte("data: {\"type\":\"message_stop\"}\n\n")) + })) + defer server.Close() + + executor := NewClaudeExecutor(&config.Config{}) + auth := &cliproxyauth.Auth{Attributes: map[string]string{ + "api_key": "key-123", + "base_url": server.URL, + "header:Accept-Encoding": "gzip, deflate, br, zstd", + }} + payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`) + + result, err := executor.ExecuteStream(context.Background(), auth, cliproxyexecutor.Request{ + Model: "claude-3-5-sonnet-20241022", + Payload: payload, + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FromString("claude"), + }) + if err != nil { + t.Fatalf("ExecuteStream error: %v", err) + } + for chunk := range result.Chunks { + if chunk.Err != nil { + t.Fatalf("unexpected chunk error: %v", chunk.Err) + } + } + + if gotEncoding != "identity" { + t.Errorf("Accept-Encoding = %q; stream path must enforce identity regardless of auth.Attributes override", gotEncoding) + } +} + // Test case 1: String system prompt is preserved and converted to a content block func TestCheckSystemInstructionsWithMode_StringSystemPreserved(t *testing.T) { payload := []byte(`{"system":"You are a helpful assistant.","messages":[{"role":"user","content":"hi"}]}`) @@ -1726,3 +1726,91 @@ func TestCheckSystemInstructionsWithMode_StringWithSpecialChars(t *testing.T) { t.Fatalf("blocks[2] text mangled, got %q", blocks[2].Get("text").String()) } } + +func TestClaudeExecutor_ExperimentalCCHSigningDisabledByDefaultKeepsLegacyHeader(t *testing.T) { + var seenBody []byte + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + seenBody = bytes.Clone(body) + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"id":"msg_1","type":"message","model":"claude-3-5-sonnet","role":"assistant","content":[{"type":"text","text":"ok"}],"usage":{"input_tokens":1,"output_tokens":1}}`)) + })) + defer server.Close() + + executor := NewClaudeExecutor(&config.Config{}) + auth := &cliproxyauth.Auth{Attributes: map[string]string{ + "api_key": "key-123", + "base_url": server.URL, + }} + payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`) + + _, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{ + Model: "claude-3-5-sonnet-20241022", + Payload: payload, + }, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")}) + if err != nil { + t.Fatalf("Execute() error = %v", err) + } + if len(seenBody) == 0 { + t.Fatal("expected request body to be captured") + } + + billingHeader := gjson.GetBytes(seenBody, "system.0.text").String() + if !strings.HasPrefix(billingHeader, "x-anthropic-billing-header:") { + t.Fatalf("system.0.text = %q, want billing header", billingHeader) + } + if strings.Contains(billingHeader, "cch=00000;") { + t.Fatalf("legacy mode should not forward cch placeholder, got %q", billingHeader) + } +} + +func TestClaudeExecutor_ExperimentalCCHSigningOptInSignsFinalBody(t *testing.T) { + var seenBody []byte + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + seenBody = bytes.Clone(body) + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"id":"msg_1","type":"message","model":"claude-3-5-sonnet","role":"assistant","content":[{"type":"text","text":"ok"}],"usage":{"input_tokens":1,"output_tokens":1}}`)) + })) + defer server.Close() + + executor := NewClaudeExecutor(&config.Config{ + ClaudeKey: []config.ClaudeKey{{ + APIKey: "key-123", + BaseURL: server.URL, + ExperimentalCCHSigning: true, + }}, + }) + auth := &cliproxyauth.Auth{Attributes: map[string]string{ + "api_key": "key-123", + "base_url": server.URL, + }} + const messageText = "please keep literal cch=00000 in this message" + payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"please keep literal cch=00000 in this message"}]}]}`) + + _, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{ + Model: "claude-3-5-sonnet-20241022", + Payload: payload, + }, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")}) + if err != nil { + t.Fatalf("Execute() error = %v", err) + } + if len(seenBody) == 0 { + t.Fatal("expected request body to be captured") + } + if got := gjson.GetBytes(seenBody, "messages.0.content.0.text").String(); got != messageText { + t.Fatalf("message text = %q, want %q", got, messageText) + } + + billingPattern := regexp.MustCompile(`(x-anthropic-billing-header:[^"]*?\bcch=)([0-9a-f]{5})(;)`) + match := billingPattern.FindSubmatch(seenBody) + if match == nil { + t.Fatalf("expected signed billing header in body: %s", string(seenBody)) + } + actualCCH := string(match[2]) + unsignedBody := billingPattern.ReplaceAll(seenBody, []byte(`${1}00000${3}`)) + wantCCH := fmt.Sprintf("%05x", xxHash64.Checksum(unsignedBody, 0x6E52736AC806831E)&0xFFFFF) + if actualCCH != wantCCH { + t.Fatalf("cch = %q, want %q\nbody: %s", actualCCH, wantCCH, string(seenBody)) + } +} diff --git a/internal/runtime/executor/claude_signing.go b/internal/runtime/executor/claude_signing.go new file mode 100644 index 00000000..c52aef49 --- /dev/null +++ b/internal/runtime/executor/claude_signing.go @@ -0,0 +1,64 @@ +package executor + +import ( + "fmt" + "regexp" + "strings" + + xxHash64 "github.com/pierrec/xxHash/xxHash64" + "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" +) + +const claudeCCHSeed uint64 = 0x6E52736AC806831E + +var claudeBillingHeaderPlaceholderPattern = regexp.MustCompile(`(x-anthropic-billing-header:[^"]*?\bcch=)(00000)(;)`) + +func signAnthropicMessagesBody(body []byte) []byte { + if !claudeBillingHeaderPlaceholderPattern.Match(body) { + return body + } + + cch := fmt.Sprintf("%05x", xxHash64.Checksum(body, claudeCCHSeed)&0xFFFFF) + return claudeBillingHeaderPlaceholderPattern.ReplaceAll(body, []byte("${1}"+cch+"${3}")) +} + +func resolveClaudeKeyConfig(cfg *config.Config, auth *cliproxyauth.Auth) *config.ClaudeKey { + if cfg == nil || auth == nil { + return nil + } + + apiKey, baseURL := claudeCreds(auth) + if apiKey == "" { + return nil + } + + for i := range cfg.ClaudeKey { + entry := &cfg.ClaudeKey[i] + cfgKey := strings.TrimSpace(entry.APIKey) + cfgBase := strings.TrimSpace(entry.BaseURL) + if !strings.EqualFold(cfgKey, apiKey) { + continue + } + if baseURL != "" && cfgBase != "" && !strings.EqualFold(cfgBase, baseURL) { + continue + } + return entry + } + + return nil +} + +// resolveClaudeKeyCloakConfig finds the matching ClaudeKey config and returns its CloakConfig. +func resolveClaudeKeyCloakConfig(cfg *config.Config, auth *cliproxyauth.Auth) *config.CloakConfig { + entry := resolveClaudeKeyConfig(cfg, auth) + if entry == nil { + return nil + } + return entry.Cloak +} + +func experimentalCCHSigningEnabled(cfg *config.Config, auth *cliproxyauth.Auth) bool { + entry := resolveClaudeKeyConfig(cfg, auth) + return entry != nil && entry.ExperimentalCCHSigning +} From 15c2f274ea690c9a7c9db22f9f454af869db5375 Mon Sep 17 00:00:00 2001 From: edlsh Date: Wed, 1 Apr 2026 13:20:11 -0400 Subject: [PATCH 38/42] fix: preserve cloak config defaults when mode omitted --- internal/runtime/executor/claude_executor.go | 30 +++++++------------ .../runtime/executor/claude_executor_test.go | 24 +++++++++++++++ 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index fed21044..b0834d62 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -1241,36 +1241,28 @@ func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.A useExperimentalCCHSigning := experimentalCCHSigningEnabled(cfg, auth) // Get cloak config from ClaudeKey configuration - cloakCfg := resolveClaudeKeyCloakConfig(cfg, auth) attrMode, attrStrict, attrWords, attrCache := getCloakConfigFromAuth(auth) // Determine cloak settings - var cloakMode string - var strictMode bool - var sensitiveWords []string - var cacheUserID bool + cloakMode := attrMode + strictMode := attrStrict + sensitiveWords := attrWords + cacheUserID := attrCache if cloakCfg != nil { - cloakMode = strings.TrimSpace(cloakCfg.Mode) - if cloakMode == "" { - cloakMode = attrMode - strictMode = attrStrict - sensitiveWords = attrWords - } else { - strictMode = cloakCfg.StrictMode + if mode := strings.TrimSpace(cloakCfg.Mode); mode != "" { + cloakMode = mode + } + if cloakCfg.StrictMode { + strictMode = true + } + if len(cloakCfg.SensitiveWords) > 0 { sensitiveWords = cloakCfg.SensitiveWords } if cloakCfg.CacheUserID != nil { cacheUserID = *cloakCfg.CacheUserID - } else { - cacheUserID = attrCache } - } else { - cloakMode = attrMode - strictMode = attrStrict - sensitiveWords = attrWords - cacheUserID = attrCache } // Determine if cloaking should be applied diff --git a/internal/runtime/executor/claude_executor_test.go b/internal/runtime/executor/claude_executor_test.go index c15d41cf..16edc390 100644 --- a/internal/runtime/executor/claude_executor_test.go +++ b/internal/runtime/executor/claude_executor_test.go @@ -1814,3 +1814,27 @@ func TestClaudeExecutor_ExperimentalCCHSigningOptInSignsFinalBody(t *testing.T) t.Fatalf("cch = %q, want %q\nbody: %s", actualCCH, wantCCH, string(seenBody)) } } + +func TestApplyCloaking_PreservesConfiguredStrictModeAndSensitiveWordsWhenModeOmitted(t *testing.T) { + cfg := &config.Config{ + ClaudeKey: []config.ClaudeKey{{ + APIKey: "key-123", + Cloak: &config.CloakConfig{ + StrictMode: true, + SensitiveWords: []string{"proxy"}, + }, + }}, + } + auth := &cliproxyauth.Auth{Attributes: map[string]string{"api_key": "key-123"}} + payload := []byte(`{"system":"proxy rules","messages":[{"role":"user","content":[{"type":"text","text":"proxy access"}]}]}`) + + out := applyCloaking(context.Background(), cfg, auth, payload, "claude-3-5-sonnet-20241022", "key-123") + + blocks := gjson.GetBytes(out, "system").Array() + if len(blocks) != 2 { + t.Fatalf("expected strict mode to keep only injected system blocks, got %d", len(blocks)) + } + if got := gjson.GetBytes(out, "messages.0.content.0.text").String(); !strings.Contains(got, zeroWidthSpace) { + t.Fatalf("expected configured sensitive word obfuscation to apply, got %q", got) + } +} From 8435c3d7becbde32e0653a6636b9b0a687a8c64a Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 31 Mar 2026 11:54:45 +0800 Subject: [PATCH 39/42] feat(tui): show time in usage details --- internal/tui/i18n.go | 2 + internal/tui/usage_tab.go | 54 +++++++++++++ internal/tui/usage_tab_test.go | 134 +++++++++++++++++++++++++++++++++ 3 files changed, 190 insertions(+) create mode 100644 internal/tui/usage_tab_test.go diff --git a/internal/tui/i18n.go b/internal/tui/i18n.go index 2964a6c6..f6a33ca4 100644 --- a/internal/tui/i18n.go +++ b/internal/tui/i18n.go @@ -201,6 +201,7 @@ var zhStrings = map[string]string{ "usage_output": "输出", "usage_cached": "缓存", "usage_reasoning": "思考", + "usage_time": "时间", // ── Logs ── "logs_title": "📋 日志", @@ -352,6 +353,7 @@ var enStrings = map[string]string{ "usage_output": "Output", "usage_cached": "Cached", "usage_reasoning": "Reasoning", + "usage_time": "Time", // ── Logs ── "logs_title": "📋 Logs", diff --git a/internal/tui/usage_tab.go b/internal/tui/usage_tab.go index 9e6da7f8..6b9fef5e 100644 --- a/internal/tui/usage_tab.go +++ b/internal/tui/usage_tab.go @@ -248,6 +248,9 @@ func (m usageTabModel) renderContent() string { // Token type breakdown from details sb.WriteString(m.renderTokenBreakdown(stats)) + + // Latency breakdown from details + sb.WriteString(m.renderLatencyBreakdown(stats)) } } } @@ -308,6 +311,57 @@ func (m usageTabModel) renderTokenBreakdown(modelStats map[string]any) string { lipgloss.NewStyle().Foreground(colorMuted).Render(strings.Join(parts, " "))) } +// renderLatencyBreakdown aggregates latency_ms from model details and displays avg/min/max. +func (m usageTabModel) renderLatencyBreakdown(modelStats map[string]any) string { + details, ok := modelStats["details"] + if !ok { + return "" + } + detailList, ok := details.([]any) + if !ok || len(detailList) == 0 { + return "" + } + + var totalLatency int64 + var count int + var minLatency, maxLatency int64 + first := true + + for _, d := range detailList { + dm, ok := d.(map[string]any) + if !ok { + continue + } + latencyMs := int64(getFloat(dm, "latency_ms")) + if latencyMs <= 0 { + continue + } + totalLatency += latencyMs + count++ + if first { + minLatency = latencyMs + maxLatency = latencyMs + first = false + } else { + if latencyMs < minLatency { + minLatency = latencyMs + } + if latencyMs > maxLatency { + maxLatency = latencyMs + } + } + } + + if count == 0 { + return "" + } + + avgLatency := totalLatency / int64(count) + return fmt.Sprintf(" │ %s: avg %dms min %dms max %dms\n", + lipgloss.NewStyle().Foreground(colorMuted).Render(T("usage_time")), + avgLatency, minLatency, maxLatency) +} + // renderBarChart renders a simple ASCII horizontal bar chart. func renderBarChart(data map[string]any, maxBarWidth int, barColor lipgloss.Color) string { if maxBarWidth < 10 { diff --git a/internal/tui/usage_tab_test.go b/internal/tui/usage_tab_test.go new file mode 100644 index 00000000..4fffcd98 --- /dev/null +++ b/internal/tui/usage_tab_test.go @@ -0,0 +1,134 @@ +package tui + +import ( + "strings" + "testing" +) + +func TestRenderLatencyBreakdown(t *testing.T) { + tests := []struct { + name string + modelStats map[string]any + wantEmpty bool + wantContains string + }{ + { + name: "no details", + modelStats: map[string]any{}, + wantEmpty: true, + }, + { + name: "empty details", + modelStats: map[string]any{ + "details": []any{}, + }, + wantEmpty: true, + }, + { + name: "details with zero latency", + modelStats: map[string]any{ + "details": []any{ + map[string]any{ + "latency_ms": float64(0), + }, + }, + }, + wantEmpty: true, + }, + { + name: "single request with latency", + modelStats: map[string]any{ + "details": []any{ + map[string]any{ + "latency_ms": float64(1500), + }, + }, + }, + wantEmpty: false, + wantContains: "avg 1500ms min 1500ms max 1500ms", + }, + { + name: "multiple requests with varying latency", + modelStats: map[string]any{ + "details": []any{ + map[string]any{ + "latency_ms": float64(100), + }, + map[string]any{ + "latency_ms": float64(200), + }, + map[string]any{ + "latency_ms": float64(300), + }, + }, + }, + wantEmpty: false, + wantContains: "avg 200ms min 100ms max 300ms", + }, + { + name: "mixed valid and invalid latency values", + modelStats: map[string]any{ + "details": []any{ + map[string]any{ + "latency_ms": float64(500), + }, + map[string]any{ + "latency_ms": float64(0), + }, + map[string]any{ + "latency_ms": float64(1500), + }, + }, + }, + wantEmpty: false, + wantContains: "avg 1000ms min 500ms max 1500ms", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + m := usageTabModel{} + result := m.renderLatencyBreakdown(tt.modelStats) + + if tt.wantEmpty { + if result != "" { + t.Errorf("renderLatencyBreakdown() = %q, want empty string", result) + } + return + } + + if result == "" { + t.Errorf("renderLatencyBreakdown() = empty, want non-empty string") + return + } + + if tt.wantContains != "" && !strings.Contains(result, tt.wantContains) { + t.Errorf("renderLatencyBreakdown() = %q, want to contain %q", result, tt.wantContains) + } + }) + } +} + +func TestUsageTimeTranslations(t *testing.T) { + prevLocale := CurrentLocale() + t.Cleanup(func() { + SetLocale(prevLocale) + }) + + tests := []struct { + locale string + want string + }{ + {locale: "en", want: "Time"}, + {locale: "zh", want: "时间"}, + } + + for _, tt := range tests { + t.Run(tt.locale, func(t *testing.T) { + SetLocale(tt.locale) + if got := T("usage_time"); got != tt.want { + t.Fatalf("T(usage_time) = %q, want %q", got, tt.want) + } + }) + } +} From 25d1c18a3f5c42bdaf246d4104f13ea2a2eb4929 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Thu, 2 Apr 2026 11:03:11 +0800 Subject: [PATCH 40/42] fix: scope experimental cch signing to billing header --- go.mod | 2 +- internal/runtime/executor/claude_signing.go | 25 +++++++++++++++++---- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index 9213f736..7ad363a7 100644 --- a/go.mod +++ b/go.mod @@ -81,7 +81,7 @@ require ( github.com/muesli/cancelreader v0.2.2 // indirect github.com/muesli/termenv v0.16.0 // indirect github.com/pelletier/go-toml/v2 v2.2.2 // indirect - github.com/pierrec/xxHash v0.1.5 // indirect + github.com/pierrec/xxHash v0.1.5 github.com/pjbgf/sha1cd v0.5.0 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/rs/xid v1.5.0 // indirect diff --git a/internal/runtime/executor/claude_signing.go b/internal/runtime/executor/claude_signing.go index c52aef49..697a6882 100644 --- a/internal/runtime/executor/claude_signing.go +++ b/internal/runtime/executor/claude_signing.go @@ -8,19 +8,36 @@ import ( xxHash64 "github.com/pierrec/xxHash/xxHash64" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" ) const claudeCCHSeed uint64 = 0x6E52736AC806831E -var claudeBillingHeaderPlaceholderPattern = regexp.MustCompile(`(x-anthropic-billing-header:[^"]*?\bcch=)(00000)(;)`) +var claudeBillingHeaderCCHPattern = regexp.MustCompile(`\bcch=([0-9a-f]{5});`) func signAnthropicMessagesBody(body []byte) []byte { - if !claudeBillingHeaderPlaceholderPattern.Match(body) { + billingHeader := gjson.GetBytes(body, "system.0.text").String() + if !strings.HasPrefix(billingHeader, "x-anthropic-billing-header:") { + return body + } + if !claudeBillingHeaderCCHPattern.MatchString(billingHeader) { return body } - cch := fmt.Sprintf("%05x", xxHash64.Checksum(body, claudeCCHSeed)&0xFFFFF) - return claudeBillingHeaderPlaceholderPattern.ReplaceAll(body, []byte("${1}"+cch+"${3}")) + unsignedBillingHeader := claudeBillingHeaderCCHPattern.ReplaceAllString(billingHeader, "cch=00000;") + unsignedBody, err := sjson.SetBytes(body, "system.0.text", unsignedBillingHeader) + if err != nil { + return body + } + + cch := fmt.Sprintf("%05x", xxHash64.Checksum(unsignedBody, claudeCCHSeed)&0xFFFFF) + signedBillingHeader := claudeBillingHeaderCCHPattern.ReplaceAllString(unsignedBillingHeader, "cch="+cch+";") + signedBody, err := sjson.SetBytes(unsignedBody, "system.0.text", signedBillingHeader) + if err != nil { + return unsignedBody + } + return signedBody } func resolveClaudeKeyConfig(cfg *config.Config, auth *cliproxyauth.Auth) *config.ClaudeKey { From 913f4a9c5f2b772c881827bc4b5a607a9a4f72b6 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Thu, 2 Apr 2026 11:12:30 +0800 Subject: [PATCH 41/42] test: fix executor tests after helpers refactor --- .../runtime/executor/claude_executor_test.go | 18 +++++++----------- .../executor/helps/claude_device_profile.go | 8 ++++++++ 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/internal/runtime/executor/claude_executor_test.go b/internal/runtime/executor/claude_executor_test.go index 5cef9548..8e8173dd 100644 --- a/internal/runtime/executor/claude_executor_test.go +++ b/internal/runtime/executor/claude_executor_test.go @@ -567,7 +567,7 @@ func TestApplyClaudeHeaders_LegacyModeFallsBackToRuntimeOSArchWhenMissing(t *tes }) applyClaudeHeaders(req, auth, "key-legacy-runtime-os-arch", false, nil, cfg) - assertClaudeFingerprint(t, req.Header, "claude-cli/2.1.60 (external, cli)", "0.70.0", "v22.0.0", mapStainlessOS(), mapStainlessArch()) + assertClaudeFingerprint(t, req.Header, "claude-cli/2.1.60 (external, cli)", "0.70.0", "v22.0.0", helps.MapStainlessOS(), helps.MapStainlessArch()) } func TestApplyClaudeHeaders_UnsetStabilizationAlsoUsesLegacyRuntimeOSArchFallback(t *testing.T) { @@ -594,14 +594,14 @@ func TestApplyClaudeHeaders_UnsetStabilizationAlsoUsesLegacyRuntimeOSArchFallbac }) applyClaudeHeaders(req, auth, "key-unset-runtime-os-arch", false, nil, cfg) - assertClaudeFingerprint(t, req.Header, "claude-cli/2.1.60 (external, cli)", "0.70.0", "v22.0.0", mapStainlessOS(), mapStainlessArch()) + assertClaudeFingerprint(t, req.Header, "claude-cli/2.1.60 (external, cli)", "0.70.0", "v22.0.0", helps.MapStainlessOS(), helps.MapStainlessArch()) } func TestClaudeDeviceProfileStabilizationEnabled_DefaultFalse(t *testing.T) { - if claudeDeviceProfileStabilizationEnabled(nil) { + if helps.ClaudeDeviceProfileStabilizationEnabled(nil) { t.Fatal("expected nil config to default to disabled stabilization") } - if claudeDeviceProfileStabilizationEnabled(&config.Config{}) { + if helps.ClaudeDeviceProfileStabilizationEnabled(&config.Config{}) { t.Fatal("expected unset stabilize-device-profile to default to disabled stabilization") } } @@ -799,8 +799,6 @@ func TestApplyClaudeToolPrefix_NestedToolReference(t *testing.T) { } func TestClaudeExecutor_ReusesUserIDAcrossModelsWhenCacheEnabled(t *testing.T) { - resetUserIDCache() - var userIDs []string var requestModels []string server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -860,15 +858,13 @@ func TestClaudeExecutor_ReusesUserIDAcrossModelsWhenCacheEnabled(t *testing.T) { if userIDs[0] != userIDs[1] { t.Fatalf("expected user_id to be reused across models, got %q and %q", userIDs[0], userIDs[1]) } - if !isValidUserID(userIDs[0]) { + if !helps.IsValidUserID(userIDs[0]) { t.Fatalf("user_id %q is not valid", userIDs[0]) } t.Logf("✓ End-to-end test passed: Same user_id (%s) was used for both models", userIDs[0]) } func TestClaudeExecutor_GeneratesNewUserIDByDefault(t *testing.T) { - resetUserIDCache() - var userIDs []string server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { body, _ := io.ReadAll(r.Body) @@ -906,7 +902,7 @@ func TestClaudeExecutor_GeneratesNewUserIDByDefault(t *testing.T) { if userIDs[0] == userIDs[1] { t.Fatalf("expected user_id to change when caching is not enabled, got identical values %q", userIDs[0]) } - if !isValidUserID(userIDs[0]) || !isValidUserID(userIDs[1]) { + if !helps.IsValidUserID(userIDs[0]) || !helps.IsValidUserID(userIDs[1]) { t.Fatalf("user_ids should be valid, got %q and %q", userIDs[0], userIDs[1]) } } @@ -1833,7 +1829,7 @@ func TestApplyCloaking_PreservesConfiguredStrictModeAndSensitiveWordsWhenModeOmi if len(blocks) != 2 { t.Fatalf("expected strict mode to keep only injected system blocks, got %d", len(blocks)) } - if got := gjson.GetBytes(out, "messages.0.content.0.text").String(); !strings.Contains(got, zeroWidthSpace) { + if got := gjson.GetBytes(out, "messages.0.content.0.text").String(); !strings.Contains(got, "\u200B") { t.Fatalf("expected configured sensitive word obfuscation to apply, got %q", got) } } diff --git a/internal/runtime/executor/helps/claude_device_profile.go b/internal/runtime/executor/helps/claude_device_profile.go index 2cf4d917..f7b9c1f2 100644 --- a/internal/runtime/executor/helps/claude_device_profile.go +++ b/internal/runtime/executor/helps/claude_device_profile.go @@ -91,6 +91,14 @@ func ResetClaudeDeviceProfileCache() { claudeDeviceProfileCacheMu.Unlock() } +func MapStainlessOS() string { + return mapStainlessOS() +} + +func MapStainlessArch() string { + return mapStainlessArch() +} + func defaultClaudeDeviceProfile(cfg *config.Config) ClaudeDeviceProfile { hdrDefault := func(cfgVal, fallback string) string { if strings.TrimSpace(cfgVal) != "" { From 4f99bc54f1d7760903e86c09b32f081126558a28 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Thu, 2 Apr 2026 11:19:37 +0800 Subject: [PATCH 42/42] test: update codex header expectations --- .../executor/codex_executor_cache_test.go | 4 ++-- .../codex_websockets_executor_test.go | 20 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/internal/runtime/executor/codex_executor_cache_test.go b/internal/runtime/executor/codex_executor_cache_test.go index d6dca031..7a24fd96 100644 --- a/internal/runtime/executor/codex_executor_cache_test.go +++ b/internal/runtime/executor/codex_executor_cache_test.go @@ -42,8 +42,8 @@ func TestCodexExecutorCacheHelper_OpenAIChatCompletions_StablePromptCacheKeyFrom if gotKey != expectedKey { t.Fatalf("prompt_cache_key = %q, want %q", gotKey, expectedKey) } - if gotConversation := httpReq.Header.Get("Conversation_id"); gotConversation != expectedKey { - t.Fatalf("Conversation_id = %q, want %q", gotConversation, expectedKey) + if gotConversation := httpReq.Header.Get("Conversation_id"); gotConversation != "" { + t.Fatalf("Conversation_id = %q, want empty", gotConversation) } if gotSession := httpReq.Header.Get("Session_id"); gotSession != expectedKey { t.Fatalf("Session_id = %q, want %q", gotSession, expectedKey) diff --git a/internal/runtime/executor/codex_websockets_executor_test.go b/internal/runtime/executor/codex_websockets_executor_test.go index d34e7c39..dec356de 100644 --- a/internal/runtime/executor/codex_websockets_executor_test.go +++ b/internal/runtime/executor/codex_websockets_executor_test.go @@ -38,8 +38,8 @@ func TestApplyCodexWebsocketHeadersDefaultsToCurrentResponsesBeta(t *testing.T) if got := headers.Get("OpenAI-Beta"); got != codexResponsesWebsocketBetaHeaderValue { t.Fatalf("OpenAI-Beta = %s, want %s", got, codexResponsesWebsocketBetaHeaderValue) } - if got := headers.Get("User-Agent"); got != codexUserAgent { - t.Fatalf("User-Agent = %s, want %s", got, codexUserAgent) + if got := headers.Get("User-Agent"); got != "" { + t.Fatalf("User-Agent = %s, want empty", got) } if got := headers.Get("Version"); got != "" { t.Fatalf("Version = %q, want empty", got) @@ -97,8 +97,8 @@ func TestApplyCodexWebsocketHeadersUsesConfigDefaultsForOAuth(t *testing.T) { headers := applyCodexWebsocketHeaders(context.Background(), http.Header{}, auth, "", cfg) - if got := headers.Get("User-Agent"); got != "my-codex-client/1.0" { - t.Fatalf("User-Agent = %s, want %s", got, "my-codex-client/1.0") + if got := headers.Get("User-Agent"); got != "" { + t.Fatalf("User-Agent = %s, want empty", got) } if got := headers.Get("x-codex-beta-features"); got != "feature-a,feature-b" { t.Fatalf("x-codex-beta-features = %s, want %s", got, "feature-a,feature-b") @@ -129,8 +129,8 @@ func TestApplyCodexWebsocketHeadersPrefersExistingHeadersOverClientAndConfig(t * got := applyCodexWebsocketHeaders(ctx, headers, auth, "", cfg) - if gotVal := got.Get("User-Agent"); gotVal != "existing-ua" { - t.Fatalf("User-Agent = %s, want %s", gotVal, "existing-ua") + if gotVal := got.Get("User-Agent"); gotVal != "" { + t.Fatalf("User-Agent = %s, want empty", gotVal) } if gotVal := got.Get("x-codex-beta-features"); gotVal != "existing-beta" { t.Fatalf("x-codex-beta-features = %s, want %s", gotVal, "existing-beta") @@ -155,8 +155,8 @@ func TestApplyCodexWebsocketHeadersConfigUserAgentOverridesClientHeader(t *testi headers := applyCodexWebsocketHeaders(ctx, http.Header{}, auth, "", cfg) - if got := headers.Get("User-Agent"); got != "config-ua" { - t.Fatalf("User-Agent = %s, want %s", got, "config-ua") + if got := headers.Get("User-Agent"); got != "" { + t.Fatalf("User-Agent = %s, want empty", got) } if got := headers.Get("x-codex-beta-features"); got != "client-beta" { t.Fatalf("x-codex-beta-features = %s, want %s", got, "client-beta") @@ -177,8 +177,8 @@ func TestApplyCodexWebsocketHeadersIgnoresConfigForAPIKeyAuth(t *testing.T) { headers := applyCodexWebsocketHeaders(context.Background(), http.Header{}, auth, "sk-test", cfg) - if got := headers.Get("User-Agent"); got != codexUserAgent { - t.Fatalf("User-Agent = %s, want %s", got, codexUserAgent) + if got := headers.Get("User-Agent"); got != "" { + t.Fatalf("User-Agent = %s, want empty", got) } if got := headers.Get("x-codex-beta-features"); got != "" { t.Fatalf("x-codex-beta-features = %q, want empty", got)