mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-05-13 23:01:40 +00:00
fix(executor): implement immediate retry with token refresh on 429 for Qwen and add associated tests
Closes: #2661
This commit is contained in:
@@ -153,6 +153,17 @@ func wrapQwenError(ctx context.Context, httpCode int, body []byte) (errCode int,
|
|||||||
return errCode, retryAfter
|
return errCode, retryAfter
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func qwenShouldAttemptImmediateRefreshRetry(auth *cliproxyauth.Auth) bool {
|
||||||
|
if auth == nil || auth.Metadata == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if provider := strings.TrimSpace(auth.Provider); provider != "" && !strings.EqualFold(provider, "qwen") {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
refreshToken, _ := auth.Metadata["refresh_token"].(string)
|
||||||
|
return strings.TrimSpace(refreshToken) != ""
|
||||||
|
}
|
||||||
|
|
||||||
// ensureQwenSystemMessage ensures the request has a single system message at the beginning.
|
// ensureQwenSystemMessage ensures the request has a single system message at the beginning.
|
||||||
// It always injects the default system prompt and merges any user-provided system messages
|
// It always injects the default system prompt and merges any user-provided system messages
|
||||||
// into the injected system message content to satisfy Qwen's strict message ordering rules.
|
// into the injected system message content to satisfy Qwen's strict message ordering rules.
|
||||||
@@ -255,7 +266,8 @@ func ensureQwenSystemMessage(payload []byte) ([]byte, error) {
|
|||||||
// QwenExecutor is a stateless executor for Qwen Code using OpenAI-compatible chat completions.
|
// QwenExecutor is a stateless executor for Qwen Code using OpenAI-compatible chat completions.
|
||||||
// If access token is unavailable, it falls back to legacy via ClientAdapter.
|
// If access token is unavailable, it falls back to legacy via ClientAdapter.
|
||||||
type QwenExecutor struct {
|
type QwenExecutor struct {
|
||||||
cfg *config.Config
|
cfg *config.Config
|
||||||
|
refreshForImmediateRetry func(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewQwenExecutor(cfg *config.Config) *QwenExecutor { return &QwenExecutor{cfg: cfg} }
|
func NewQwenExecutor(cfg *config.Config) *QwenExecutor { return &QwenExecutor{cfg: cfg} }
|
||||||
@@ -295,23 +307,13 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
|||||||
return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check rate limit before proceeding
|
|
||||||
var authID string
|
var authID string
|
||||||
if auth != nil {
|
if auth != nil {
|
||||||
authID = auth.ID
|
authID = auth.ID
|
||||||
}
|
}
|
||||||
if err := checkQwenRateLimit(authID); err != nil {
|
|
||||||
helps.LogWithRequestID(ctx).Warnf("qwen rate limit exceeded for credential %s", redactAuthID(authID))
|
|
||||||
return resp, err
|
|
||||||
}
|
|
||||||
|
|
||||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||||
|
|
||||||
token, baseURL := qwenCreds(auth)
|
|
||||||
if baseURL == "" {
|
|
||||||
baseURL = "https://portal.qwen.ai/v1"
|
|
||||||
}
|
|
||||||
|
|
||||||
reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||||
defer reporter.TrackFailure(ctx, &err)
|
defer reporter.TrackFailure(ctx, &err)
|
||||||
|
|
||||||
@@ -338,68 +340,107 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
|||||||
return resp, err
|
return resp, err
|
||||||
}
|
}
|
||||||
|
|
||||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
qwenImmediateRetryAttempted := false
|
||||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
for {
|
||||||
if err != nil {
|
if errRate := checkQwenRateLimit(authID); errRate != nil {
|
||||||
return resp, err
|
helps.LogWithRequestID(ctx).Warnf("qwen rate limit exceeded for credential %s", redactAuthID(authID))
|
||||||
}
|
return resp, errRate
|
||||||
applyQwenHeaders(httpReq, token, false)
|
}
|
||||||
var attrs map[string]string
|
|
||||||
if auth != nil {
|
|
||||||
attrs = auth.Attributes
|
|
||||||
}
|
|
||||||
util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
|
|
||||||
var authLabel, authType, authValue string
|
|
||||||
if auth != nil {
|
|
||||||
authLabel = auth.Label
|
|
||||||
authType, authValue = auth.AccountInfo()
|
|
||||||
}
|
|
||||||
helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
|
|
||||||
URL: url,
|
|
||||||
Method: http.MethodPost,
|
|
||||||
Headers: httpReq.Header.Clone(),
|
|
||||||
Body: body,
|
|
||||||
Provider: e.Identifier(),
|
|
||||||
AuthID: authID,
|
|
||||||
AuthLabel: authLabel,
|
|
||||||
AuthType: authType,
|
|
||||||
AuthValue: authValue,
|
|
||||||
})
|
|
||||||
|
|
||||||
httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
token, baseURL := qwenCreds(auth)
|
||||||
httpResp, err := httpClient.Do(httpReq)
|
if baseURL == "" {
|
||||||
if err != nil {
|
baseURL = "https://portal.qwen.ai/v1"
|
||||||
helps.RecordAPIResponseError(ctx, e.cfg, err)
|
}
|
||||||
return resp, err
|
|
||||||
}
|
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||||
defer func() {
|
httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
||||||
|
if errReq != nil {
|
||||||
|
return resp, errReq
|
||||||
|
}
|
||||||
|
applyQwenHeaders(httpReq, token, false)
|
||||||
|
var attrs map[string]string
|
||||||
|
if auth != nil {
|
||||||
|
attrs = auth.Attributes
|
||||||
|
}
|
||||||
|
util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
|
||||||
|
var authLabel, authType, authValue string
|
||||||
|
if auth != nil {
|
||||||
|
authLabel = auth.Label
|
||||||
|
authType, authValue = auth.AccountInfo()
|
||||||
|
}
|
||||||
|
helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
|
||||||
|
URL: url,
|
||||||
|
Method: http.MethodPost,
|
||||||
|
Headers: httpReq.Header.Clone(),
|
||||||
|
Body: body,
|
||||||
|
Provider: e.Identifier(),
|
||||||
|
AuthID: authID,
|
||||||
|
AuthLabel: authLabel,
|
||||||
|
AuthType: authType,
|
||||||
|
AuthValue: authValue,
|
||||||
|
})
|
||||||
|
|
||||||
|
httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||||
|
httpResp, errDo := httpClient.Do(httpReq)
|
||||||
|
if errDo != nil {
|
||||||
|
helps.RecordAPIResponseError(ctx, e.cfg, errDo)
|
||||||
|
return resp, errDo
|
||||||
|
}
|
||||||
|
|
||||||
|
helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
||||||
|
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||||
|
b, _ := io.ReadAll(httpResp.Body)
|
||||||
|
helps.AppendAPIResponseChunk(ctx, e.cfg, b)
|
||||||
|
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||||
|
log.Errorf("qwen executor: close response body error: %v", errClose)
|
||||||
|
}
|
||||||
|
|
||||||
|
errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b)
|
||||||
|
helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
|
||||||
|
|
||||||
|
if errCode == http.StatusTooManyRequests && !qwenImmediateRetryAttempted && qwenShouldAttemptImmediateRefreshRetry(auth) {
|
||||||
|
helps.LogWithRequestID(ctx).WithFields(log.Fields{
|
||||||
|
"auth_id": redactAuthID(authID),
|
||||||
|
"model": req.Model,
|
||||||
|
}).Info("qwen 429 encountered, refreshing token for immediate retry")
|
||||||
|
|
||||||
|
qwenImmediateRetryAttempted = true
|
||||||
|
refreshFn := e.refreshForImmediateRetry
|
||||||
|
if refreshFn == nil {
|
||||||
|
refreshFn = e.Refresh
|
||||||
|
}
|
||||||
|
refreshedAuth, errRefresh := refreshFn(ctx, auth)
|
||||||
|
if errRefresh != nil {
|
||||||
|
helps.LogWithRequestID(ctx).WithError(errRefresh).WithField("auth_id", redactAuthID(authID)).Warn("qwen 429 refresh failed; skipping immediate retry")
|
||||||
|
} else if refreshedAuth != nil {
|
||||||
|
auth = refreshedAuth
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter}
|
||||||
|
return resp, err
|
||||||
|
}
|
||||||
|
|
||||||
|
data, errRead := io.ReadAll(httpResp.Body)
|
||||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||||
log.Errorf("qwen executor: close response body error: %v", errClose)
|
log.Errorf("qwen executor: close response body error: %v", errClose)
|
||||||
}
|
}
|
||||||
}()
|
if errRead != nil {
|
||||||
helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
helps.RecordAPIResponseError(ctx, e.cfg, errRead)
|
||||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
return resp, errRead
|
||||||
b, _ := io.ReadAll(httpResp.Body)
|
}
|
||||||
helps.AppendAPIResponseChunk(ctx, e.cfg, b)
|
|
||||||
|
|
||||||
errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b)
|
helps.AppendAPIResponseChunk(ctx, e.cfg, data)
|
||||||
helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
|
reporter.Publish(ctx, helps.ParseOpenAIUsage(data))
|
||||||
err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter}
|
|
||||||
return resp, err
|
var param any
|
||||||
|
// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
|
||||||
|
// the original model name in the response for client compatibility.
|
||||||
|
out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, ¶m)
|
||||||
|
resp = cliproxyexecutor.Response{Payload: out, Headers: httpResp.Header.Clone()}
|
||||||
|
return resp, nil
|
||||||
}
|
}
|
||||||
data, err := io.ReadAll(httpResp.Body)
|
|
||||||
if err != nil {
|
|
||||||
helps.RecordAPIResponseError(ctx, e.cfg, err)
|
|
||||||
return resp, err
|
|
||||||
}
|
|
||||||
helps.AppendAPIResponseChunk(ctx, e.cfg, data)
|
|
||||||
reporter.Publish(ctx, helps.ParseOpenAIUsage(data))
|
|
||||||
var param any
|
|
||||||
// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
|
|
||||||
// the original model name in the response for client compatibility.
|
|
||||||
out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, ¶m)
|
|
||||||
resp = cliproxyexecutor.Response{Payload: out, Headers: httpResp.Header.Clone()}
|
|
||||||
return resp, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (_ *cliproxyexecutor.StreamResult, err error) {
|
func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (_ *cliproxyexecutor.StreamResult, err error) {
|
||||||
@@ -407,23 +448,13 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
|||||||
return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check rate limit before proceeding
|
|
||||||
var authID string
|
var authID string
|
||||||
if auth != nil {
|
if auth != nil {
|
||||||
authID = auth.ID
|
authID = auth.ID
|
||||||
}
|
}
|
||||||
if err := checkQwenRateLimit(authID); err != nil {
|
|
||||||
helps.LogWithRequestID(ctx).Warnf("qwen rate limit exceeded for credential %s", redactAuthID(authID))
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||||
|
|
||||||
token, baseURL := qwenCreds(auth)
|
|
||||||
if baseURL == "" {
|
|
||||||
baseURL = "https://portal.qwen.ai/v1"
|
|
||||||
}
|
|
||||||
|
|
||||||
reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
|
||||||
defer reporter.TrackFailure(ctx, &err)
|
defer reporter.TrackFailure(ctx, &err)
|
||||||
|
|
||||||
@@ -457,86 +488,122 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
qwenImmediateRetryAttempted := false
|
||||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
for {
|
||||||
if err != nil {
|
if errRate := checkQwenRateLimit(authID); errRate != nil {
|
||||||
return nil, err
|
helps.LogWithRequestID(ctx).Warnf("qwen rate limit exceeded for credential %s", redactAuthID(authID))
|
||||||
}
|
return nil, errRate
|
||||||
applyQwenHeaders(httpReq, token, true)
|
|
||||||
var attrs map[string]string
|
|
||||||
if auth != nil {
|
|
||||||
attrs = auth.Attributes
|
|
||||||
}
|
|
||||||
util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
|
|
||||||
var authLabel, authType, authValue string
|
|
||||||
if auth != nil {
|
|
||||||
authLabel = auth.Label
|
|
||||||
authType, authValue = auth.AccountInfo()
|
|
||||||
}
|
|
||||||
helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
|
|
||||||
URL: url,
|
|
||||||
Method: http.MethodPost,
|
|
||||||
Headers: httpReq.Header.Clone(),
|
|
||||||
Body: body,
|
|
||||||
Provider: e.Identifier(),
|
|
||||||
AuthID: authID,
|
|
||||||
AuthLabel: authLabel,
|
|
||||||
AuthType: authType,
|
|
||||||
AuthValue: authValue,
|
|
||||||
})
|
|
||||||
|
|
||||||
httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
|
||||||
httpResp, err := httpClient.Do(httpReq)
|
|
||||||
if err != nil {
|
|
||||||
helps.RecordAPIResponseError(ctx, e.cfg, err)
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
|
||||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
|
||||||
b, _ := io.ReadAll(httpResp.Body)
|
|
||||||
helps.AppendAPIResponseChunk(ctx, e.cfg, b)
|
|
||||||
|
|
||||||
errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b)
|
|
||||||
helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
|
|
||||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
|
||||||
log.Errorf("qwen executor: close response body error: %v", errClose)
|
|
||||||
}
|
}
|
||||||
err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter}
|
|
||||||
return nil, err
|
token, baseURL := qwenCreds(auth)
|
||||||
}
|
if baseURL == "" {
|
||||||
out := make(chan cliproxyexecutor.StreamChunk)
|
baseURL = "https://portal.qwen.ai/v1"
|
||||||
go func() {
|
}
|
||||||
defer close(out)
|
|
||||||
defer func() {
|
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||||
|
httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
||||||
|
if errReq != nil {
|
||||||
|
return nil, errReq
|
||||||
|
}
|
||||||
|
applyQwenHeaders(httpReq, token, true)
|
||||||
|
var attrs map[string]string
|
||||||
|
if auth != nil {
|
||||||
|
attrs = auth.Attributes
|
||||||
|
}
|
||||||
|
util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
|
||||||
|
var authLabel, authType, authValue string
|
||||||
|
if auth != nil {
|
||||||
|
authLabel = auth.Label
|
||||||
|
authType, authValue = auth.AccountInfo()
|
||||||
|
}
|
||||||
|
helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
|
||||||
|
URL: url,
|
||||||
|
Method: http.MethodPost,
|
||||||
|
Headers: httpReq.Header.Clone(),
|
||||||
|
Body: body,
|
||||||
|
Provider: e.Identifier(),
|
||||||
|
AuthID: authID,
|
||||||
|
AuthLabel: authLabel,
|
||||||
|
AuthType: authType,
|
||||||
|
AuthValue: authValue,
|
||||||
|
})
|
||||||
|
|
||||||
|
httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||||
|
httpResp, errDo := httpClient.Do(httpReq)
|
||||||
|
if errDo != nil {
|
||||||
|
helps.RecordAPIResponseError(ctx, e.cfg, errDo)
|
||||||
|
return nil, errDo
|
||||||
|
}
|
||||||
|
|
||||||
|
helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
||||||
|
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||||
|
b, _ := io.ReadAll(httpResp.Body)
|
||||||
|
helps.AppendAPIResponseChunk(ctx, e.cfg, b)
|
||||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||||
log.Errorf("qwen executor: close response body error: %v", errClose)
|
log.Errorf("qwen executor: close response body error: %v", errClose)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b)
|
||||||
|
helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
|
||||||
|
|
||||||
|
if errCode == http.StatusTooManyRequests && !qwenImmediateRetryAttempted && qwenShouldAttemptImmediateRefreshRetry(auth) {
|
||||||
|
helps.LogWithRequestID(ctx).WithFields(log.Fields{
|
||||||
|
"auth_id": redactAuthID(authID),
|
||||||
|
"model": req.Model,
|
||||||
|
}).Info("qwen 429 encountered, refreshing token for immediate retry (stream)")
|
||||||
|
|
||||||
|
qwenImmediateRetryAttempted = true
|
||||||
|
refreshFn := e.refreshForImmediateRetry
|
||||||
|
if refreshFn == nil {
|
||||||
|
refreshFn = e.Refresh
|
||||||
|
}
|
||||||
|
refreshedAuth, errRefresh := refreshFn(ctx, auth)
|
||||||
|
if errRefresh != nil {
|
||||||
|
helps.LogWithRequestID(ctx).WithError(errRefresh).WithField("auth_id", redactAuthID(authID)).Warn("qwen 429 refresh failed; skipping immediate retry (stream)")
|
||||||
|
} else if refreshedAuth != nil {
|
||||||
|
auth = refreshedAuth
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan cliproxyexecutor.StreamChunk)
|
||||||
|
go func() {
|
||||||
|
defer close(out)
|
||||||
|
defer func() {
|
||||||
|
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||||
|
log.Errorf("qwen executor: close response body error: %v", errClose)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
scanner := bufio.NewScanner(httpResp.Body)
|
||||||
|
scanner.Buffer(nil, 52_428_800) // 50MB
|
||||||
|
var param any
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Bytes()
|
||||||
|
helps.AppendAPIResponseChunk(ctx, e.cfg, line)
|
||||||
|
if detail, ok := helps.ParseOpenAIStreamUsage(line); ok {
|
||||||
|
reporter.Publish(ctx, detail)
|
||||||
|
}
|
||||||
|
chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), ¶m)
|
||||||
|
for i := range chunks {
|
||||||
|
out <- cliproxyexecutor.StreamChunk{Payload: chunks[i]}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
doneChunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), ¶m)
|
||||||
|
for i := range doneChunks {
|
||||||
|
out <- cliproxyexecutor.StreamChunk{Payload: doneChunks[i]}
|
||||||
|
}
|
||||||
|
if errScan := scanner.Err(); errScan != nil {
|
||||||
|
helps.RecordAPIResponseError(ctx, e.cfg, errScan)
|
||||||
|
reporter.PublishFailure(ctx)
|
||||||
|
out <- cliproxyexecutor.StreamChunk{Err: errScan}
|
||||||
|
}
|
||||||
}()
|
}()
|
||||||
scanner := bufio.NewScanner(httpResp.Body)
|
return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
|
||||||
scanner.Buffer(nil, 52_428_800) // 50MB
|
}
|
||||||
var param any
|
|
||||||
for scanner.Scan() {
|
|
||||||
line := scanner.Bytes()
|
|
||||||
helps.AppendAPIResponseChunk(ctx, e.cfg, line)
|
|
||||||
if detail, ok := helps.ParseOpenAIStreamUsage(line); ok {
|
|
||||||
reporter.Publish(ctx, detail)
|
|
||||||
}
|
|
||||||
chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), ¶m)
|
|
||||||
for i := range chunks {
|
|
||||||
out <- cliproxyexecutor.StreamChunk{Payload: chunks[i]}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
doneChunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), ¶m)
|
|
||||||
for i := range doneChunks {
|
|
||||||
out <- cliproxyexecutor.StreamChunk{Payload: doneChunks[i]}
|
|
||||||
}
|
|
||||||
if errScan := scanner.Err(); errScan != nil {
|
|
||||||
helps.RecordAPIResponseError(ctx, e.cfg, errScan)
|
|
||||||
reporter.PublishFailure(ctx)
|
|
||||||
out <- cliproxyexecutor.StreamChunk{Err: errScan}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||||
|
|||||||
@@ -3,10 +3,16 @@ package executor
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"sync/atomic"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||||
|
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||||
|
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -209,3 +215,168 @@ func TestQwenCreds_NormalizesResourceURL(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestQwenExecutorExecute_429RefreshAndRetry(t *testing.T) {
|
||||||
|
qwenRateLimiter.Lock()
|
||||||
|
qwenRateLimiter.requests = make(map[string][]time.Time)
|
||||||
|
qwenRateLimiter.Unlock()
|
||||||
|
|
||||||
|
var calls int32
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
atomic.AddInt32(&calls, 1)
|
||||||
|
if r.URL.Path != "/v1/chat/completions" {
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
switch r.Header.Get("Authorization") {
|
||||||
|
case "Bearer old-token":
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusTooManyRequests)
|
||||||
|
_, _ = w.Write([]byte(`{"error":{"code":"quota_exceeded","message":"quota exceeded","type":"quota_exceeded"}}`))
|
||||||
|
return
|
||||||
|
case "Bearer new-token":
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte(`{"id":"chatcmpl-test","object":"chat.completion","created":1,"model":"qwen-max","choices":[{"index":0,"message":{"role":"assistant","content":"hi"},"finish_reason":"stop"}],"usage":{"prompt_tokens":1,"completion_tokens":1,"total_tokens":2}}`))
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
w.WriteHeader(http.StatusUnauthorized)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
exec := NewQwenExecutor(&config.Config{})
|
||||||
|
auth := &cliproxyauth.Auth{
|
||||||
|
ID: "auth-test",
|
||||||
|
Provider: "qwen",
|
||||||
|
Attributes: map[string]string{
|
||||||
|
"base_url": srv.URL + "/v1",
|
||||||
|
},
|
||||||
|
Metadata: map[string]any{
|
||||||
|
"access_token": "old-token",
|
||||||
|
"refresh_token": "refresh-token",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
var refresherCalls int32
|
||||||
|
exec.refreshForImmediateRetry = func(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||||
|
atomic.AddInt32(&refresherCalls, 1)
|
||||||
|
refreshed := auth.Clone()
|
||||||
|
if refreshed.Metadata == nil {
|
||||||
|
refreshed.Metadata = make(map[string]any)
|
||||||
|
}
|
||||||
|
refreshed.Metadata["access_token"] = "new-token"
|
||||||
|
refreshed.Metadata["refresh_token"] = "refresh-token-2"
|
||||||
|
return refreshed, nil
|
||||||
|
}
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
resp, err := exec.Execute(ctx, auth, cliproxyexecutor.Request{
|
||||||
|
Model: "qwen-max",
|
||||||
|
Payload: []byte(`{"model":"qwen-max","messages":[{"role":"user","content":"hi"}]}`),
|
||||||
|
}, cliproxyexecutor.Options{
|
||||||
|
SourceFormat: sdktranslator.FromString("openai"),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Execute() error = %v", err)
|
||||||
|
}
|
||||||
|
if len(resp.Payload) == 0 {
|
||||||
|
t.Fatalf("Execute() payload is empty")
|
||||||
|
}
|
||||||
|
if atomic.LoadInt32(&calls) != 2 {
|
||||||
|
t.Fatalf("upstream calls = %d, want 2", atomic.LoadInt32(&calls))
|
||||||
|
}
|
||||||
|
if atomic.LoadInt32(&refresherCalls) != 1 {
|
||||||
|
t.Fatalf("refresher calls = %d, want 1", atomic.LoadInt32(&refresherCalls))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestQwenExecutorExecuteStream_429RefreshAndRetry(t *testing.T) {
|
||||||
|
qwenRateLimiter.Lock()
|
||||||
|
qwenRateLimiter.requests = make(map[string][]time.Time)
|
||||||
|
qwenRateLimiter.Unlock()
|
||||||
|
|
||||||
|
var calls int32
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
atomic.AddInt32(&calls, 1)
|
||||||
|
if r.URL.Path != "/v1/chat/completions" {
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
switch r.Header.Get("Authorization") {
|
||||||
|
case "Bearer old-token":
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusTooManyRequests)
|
||||||
|
_, _ = w.Write([]byte(`{"error":{"code":"quota_exceeded","message":"quota exceeded","type":"quota_exceeded"}}`))
|
||||||
|
return
|
||||||
|
case "Bearer new-token":
|
||||||
|
w.Header().Set("Content-Type", "text/event-stream")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-test\",\"object\":\"chat.completion.chunk\",\"created\":1,\"model\":\"qwen-max\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"hi\"},\"finish_reason\":null}]}\n"))
|
||||||
|
if flusher, ok := w.(http.Flusher); ok {
|
||||||
|
flusher.Flush()
|
||||||
|
}
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
w.WriteHeader(http.StatusUnauthorized)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
exec := NewQwenExecutor(&config.Config{})
|
||||||
|
auth := &cliproxyauth.Auth{
|
||||||
|
ID: "auth-test",
|
||||||
|
Provider: "qwen",
|
||||||
|
Attributes: map[string]string{
|
||||||
|
"base_url": srv.URL + "/v1",
|
||||||
|
},
|
||||||
|
Metadata: map[string]any{
|
||||||
|
"access_token": "old-token",
|
||||||
|
"refresh_token": "refresh-token",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
var refresherCalls int32
|
||||||
|
exec.refreshForImmediateRetry = func(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
|
||||||
|
atomic.AddInt32(&refresherCalls, 1)
|
||||||
|
refreshed := auth.Clone()
|
||||||
|
if refreshed.Metadata == nil {
|
||||||
|
refreshed.Metadata = make(map[string]any)
|
||||||
|
}
|
||||||
|
refreshed.Metadata["access_token"] = "new-token"
|
||||||
|
refreshed.Metadata["refresh_token"] = "refresh-token-2"
|
||||||
|
return refreshed, nil
|
||||||
|
}
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
stream, err := exec.ExecuteStream(ctx, auth, cliproxyexecutor.Request{
|
||||||
|
Model: "qwen-max",
|
||||||
|
Payload: []byte(`{"model":"qwen-max","stream":true,"messages":[{"role":"user","content":"hi"}]}`),
|
||||||
|
}, cliproxyexecutor.Options{
|
||||||
|
SourceFormat: sdktranslator.FromString("openai"),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ExecuteStream() error = %v", err)
|
||||||
|
}
|
||||||
|
if atomic.LoadInt32(&calls) != 2 {
|
||||||
|
t.Fatalf("upstream calls = %d, want 2", atomic.LoadInt32(&calls))
|
||||||
|
}
|
||||||
|
if atomic.LoadInt32(&refresherCalls) != 1 {
|
||||||
|
t.Fatalf("refresher calls = %d, want 1", atomic.LoadInt32(&refresherCalls))
|
||||||
|
}
|
||||||
|
|
||||||
|
var sawPayload bool
|
||||||
|
for chunk := range stream.Chunks {
|
||||||
|
if chunk.Err != nil {
|
||||||
|
t.Fatalf("stream chunk error = %v", chunk.Err)
|
||||||
|
}
|
||||||
|
if len(chunk.Payload) > 0 {
|
||||||
|
sawPayload = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !sawPayload {
|
||||||
|
t.Fatalf("stream did not produce any payload chunks")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user