Merge PR #525 (v6.9.27)

2026-06-08 04:02:12 +08:00 · 2026-04-16 03:16:28 +08:00
parent 1d8e68ad15 ba454dbfbf
commit 44c74d6ea2
68 changed files with 3075 additions and 3239 deletions
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -26,6 +26,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/cache"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	antigravityclaude "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/antigravity/claude"
@@ -184,22 +185,24 @@ func newAntigravityHTTPClient(ctx context.Context, cfg *config.Config, auth *cli
 	return client
 }

-func validateAntigravityRequestSignatures(from sdktranslator.Format, rawJSON []byte) error {
+func validateAntigravityRequestSignatures(from sdktranslator.Format, rawJSON []byte) ([]byte, error) {
 	if from.String() != "claude" {
-		return nil
+		return rawJSON, nil
 	}
+	// Always strip thinking blocks with empty signatures (proxy-generated).
+	rawJSON = antigravityclaude.StripEmptySignatureThinkingBlocks(rawJSON)
 	if cache.SignatureCacheEnabled() {
-		return nil
+		return rawJSON, nil
 	}
 	if !cache.SignatureBypassStrictMode() {
 		// Non-strict bypass: let the translator handle invalid signatures
 		// by dropping unsigned thinking blocks silently (no 400).
-		return nil
+		return rawJSON, nil
 	}
 	if err := antigravityclaude.ValidateClaudeBypassSignatures(rawJSON); err != nil {
-		return statusErr{code: http.StatusBadRequest, msg: err.Error()}
+		return rawJSON, statusErr{code: http.StatusBadRequest, msg: err.Error()}
 	}
-	return nil
+	return rawJSON, nil
 }

 // Identifier returns the executor identifier.
@@ -695,9 +698,11 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 		originalPayloadSource = opts.OriginalRequest
 	}
 	originalPayload := originalPayloadSource
-	if errValidate := validateAntigravityRequestSignatures(from, originalPayload); errValidate != nil {
+	originalPayload, errValidate := validateAntigravityRequestSignatures(from, originalPayload)
+	if errValidate != nil {
 		return resp, errValidate
 	}
+	req.Payload = originalPayload
 	token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
 	if errToken != nil {
 		return resp, errToken
@@ -907,9 +912,11 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 		originalPayloadSource = opts.OriginalRequest
 	}
 	originalPayload := originalPayloadSource
-	if errValidate := validateAntigravityRequestSignatures(from, originalPayload); errValidate != nil {
+	originalPayload, errValidate := validateAntigravityRequestSignatures(from, originalPayload)
+	if errValidate != nil {
 		return resp, errValidate
 	}
+	req.Payload = originalPayload
 	token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
 	if errToken != nil {
 		return resp, errToken
@@ -1370,9 +1377,11 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 		originalPayloadSource = opts.OriginalRequest
 	}
 	originalPayload := originalPayloadSource
-	if errValidate := validateAntigravityRequestSignatures(from, originalPayload); errValidate != nil {
+	originalPayload, errValidate := validateAntigravityRequestSignatures(from, originalPayload)
+	if errValidate != nil {
 		return nil, errValidate
 	}
+	req.Payload = originalPayload
 	token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
 	if errToken != nil {
 		return nil, errToken
@@ -1626,9 +1635,11 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 	if len(opts.OriginalRequest) > 0 {
 		originalPayloadSource = opts.OriginalRequest
 	}
-	if errValidate := validateAntigravityRequestSignatures(from, originalPayloadSource); errValidate != nil {
+	originalPayloadSource, errValidate := validateAntigravityRequestSignatures(from, originalPayloadSource)
+	if errValidate != nil {
 		return cliproxyexecutor.Response{}, errValidate
 	}
+	req.Payload = originalPayloadSource
 	token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
 	if errToken != nil {
 		return cliproxyexecutor.Response{}, errToken
@@ -1945,18 +1956,56 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 	payload = geminiToAntigravity(modelName, payload, projectID)
 	payload, _ = sjson.SetBytes(payload, "model", modelName)

-	useAntigravitySchema := strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro") || strings.Contains(modelName, "gemini-3.1-pro")
-	payloadStr := string(payload)
-	paths := make([]string, 0)
-	util.Walk(gjson.Parse(payloadStr), "", "parametersJsonSchema", &paths)
-	for _, p := range paths {
-		payloadStr, _ = util.RenameKey(payloadStr, p, p[:len(p)-len("parametersJsonSchema")]+"parameters")
+	// Cap maxOutputTokens to model's max_completion_tokens from registry
+	if maxOut := gjson.GetBytes(payload, "request.generationConfig.maxOutputTokens"); maxOut.Exists() && maxOut.Type == gjson.Number {
+		if modelInfo := registry.LookupModelInfo(modelName, "antigravity"); modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
+			if int(maxOut.Int()) > modelInfo.MaxCompletionTokens {
+				payload, _ = sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", modelInfo.MaxCompletionTokens)
+			}
+		}
 	}

-	if useAntigravitySchema {
-		payloadStr = util.CleanJSONSchemaForAntigravity(payloadStr)
+	useAntigravitySchema := strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro") || strings.Contains(modelName, "gemini-3.1-pro")
+	var (
+		bodyReader io.Reader
+		payloadLog []byte
+	)
+	if antigravityRequestNeedsSchemaSanitization(payload) {
+		payloadStr := string(payload)
+		paths := make([]string, 0)
+		util.Walk(gjson.Parse(payloadStr), "", "parametersJsonSchema", &paths)
+		for _, p := range paths {
+			payloadStr, _ = util.RenameKey(payloadStr, p, p[:len(p)-len("parametersJsonSchema")]+"parameters")
+		}
+
+		if useAntigravitySchema {
+			payloadStr = util.CleanJSONSchemaForAntigravity(payloadStr)
+		} else {
+			payloadStr = util.CleanJSONSchemaForGemini(payloadStr)
+		}
+
+		if strings.Contains(modelName, "claude") {
+			updated, _ := sjson.SetBytes([]byte(payloadStr), "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
+			payloadStr = string(updated)
+		} else {
+			payloadStr, _ = sjson.Delete(payloadStr, "request.generationConfig.maxOutputTokens")
+		}
+
+		bodyReader = strings.NewReader(payloadStr)
+		if e.cfg != nil && e.cfg.RequestLog {
+			payloadLog = []byte(payloadStr)
+		}
 	} else {
-		payloadStr = util.CleanJSONSchemaForGemini(payloadStr)
+		if strings.Contains(modelName, "claude") {
+			payload, _ = sjson.SetBytes(payload, "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
+		} else {
+			payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.maxOutputTokens")
+		}
+
+		bodyReader = bytes.NewReader(payload)
+		if e.cfg != nil && e.cfg.RequestLog {
+			payloadLog = append([]byte(nil), payload...)
+		}
 	}

 	// if useAntigravitySchema {
@@ -1972,14 +2021,7 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 	// 	}
 	// }

-	if strings.Contains(modelName, "claude") {
-		updated, _ := sjson.SetBytes([]byte(payloadStr), "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
-		payloadStr = string(updated)
-	} else {
-		payloadStr, _ = sjson.Delete(payloadStr, "request.generationConfig.maxOutputTokens")
-	}
-
-	httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, requestURL.String(), strings.NewReader(payloadStr))
+	httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, requestURL.String(), bodyReader)
 	if errReq != nil {
 		return nil, errReq
 	}
@@ -2002,10 +2044,6 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 		authLabel = auth.Label
 		authType, authValue = auth.AccountInfo()
 	}
-	var payloadLog []byte
-	if e.cfg != nil && e.cfg.RequestLog {
-		payloadLog = []byte(payloadStr)
-	}
 	helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
 		URL:       requestURL.String(),
 		Method:    http.MethodPost,
@@ -2021,6 +2059,19 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 	return httpReq, nil
 }

+func antigravityRequestNeedsSchemaSanitization(payload []byte) bool {
+	if gjson.GetBytes(payload, "request.tools.0").Exists() {
+		return true
+	}
+	if gjson.GetBytes(payload, "request.generationConfig.responseJsonSchema").Exists() {
+		return true
+	}
+	if gjson.GetBytes(payload, "request.generationConfig.responseSchema").Exists() {
+		return true
+	}
+	return false
+}
+
 func tokenExpiry(metadata map[string]any) time.Time {
 	if metadata == nil {
 		return time.Time{}
--- a/internal/runtime/executor/antigravity_executor_buildrequest_test.go
+++ b/internal/runtime/executor/antigravity_executor_buildrequest_test.go
@@ -35,12 +35,102 @@ func TestAntigravityBuildRequest_SanitizesAntigravityToolSchema(t *testing.T) {
 	assertSchemaSanitizedAndPropertyPreserved(t, params)
 }

-func buildRequestBodyFromPayload(t *testing.T, modelName string) map[string]any {
+func TestAntigravityBuildRequest_SkipsSchemaSanitizationWithoutToolsField(t *testing.T) {
+	body := buildRequestBodyFromRawPayload(t, "gemini-3.1-flash-image", []byte(`{
+		"request": {
+			"contents": [
+				{
+					"role": "user",
+					"x-debug": "keep-me",
+					"parts": [
+						{
+							"text": "hello"
+						}
+					]
+				}
+			],
+			"nonSchema": {
+				"nullable": true,
+				"x-extra": "keep-me"
+			},
+			"generationConfig": {
+				"maxOutputTokens": 128
+			}
+		}
+	}`))
+
+	assertNonSchemaRequestPreserved(t, body)
+}
+
+func TestAntigravityBuildRequest_SkipsSchemaSanitizationWithEmptyToolsArray(t *testing.T) {
+	body := buildRequestBodyFromRawPayload(t, "gemini-3.1-flash-image", []byte(`{
+		"request": {
+			"tools": [],
+			"contents": [
+				{
+					"role": "user",
+					"x-debug": "keep-me",
+					"parts": [
+						{
+							"text": "hello"
+						}
+					]
+				}
+			],
+			"nonSchema": {
+				"nullable": true,
+				"x-extra": "keep-me"
+			},
+			"generationConfig": {
+				"maxOutputTokens": 128
+			}
+		}
+	}`))
+
+	assertNonSchemaRequestPreserved(t, body)
+}
+
+func assertNonSchemaRequestPreserved(t *testing.T, body map[string]any) {
 	t.Helper()

-	executor := &AntigravityExecutor{}
-	auth := &cliproxyauth.Auth{}
-	payload := []byte(`{
+	request, ok := body["request"].(map[string]any)
+	if !ok {
+		t.Fatalf("request missing or invalid type")
+	}
+
+	contents, ok := request["contents"].([]any)
+	if !ok || len(contents) == 0 {
+		t.Fatalf("contents missing or empty")
+	}
+	content, ok := contents[0].(map[string]any)
+	if !ok {
+		t.Fatalf("content missing or invalid type")
+	}
+	if got, ok := content["x-debug"].(string); !ok || got != "keep-me" {
+		t.Fatalf("x-debug should be preserved when no tool schema exists, got=%v", content["x-debug"])
+	}
+
+	nonSchema, ok := request["nonSchema"].(map[string]any)
+	if !ok {
+		t.Fatalf("nonSchema missing or invalid type")
+	}
+	if _, ok := nonSchema["nullable"]; !ok {
+		t.Fatalf("nullable should be preserved outside schema cleanup path")
+	}
+	if got, ok := nonSchema["x-extra"].(string); !ok || got != "keep-me" {
+		t.Fatalf("x-extra should be preserved outside schema cleanup path, got=%v", nonSchema["x-extra"])
+	}
+
+	if generationConfig, ok := request["generationConfig"].(map[string]any); ok {
+		if _, ok := generationConfig["maxOutputTokens"]; ok {
+			t.Fatalf("maxOutputTokens should still be removed for non-Claude requests")
+		}
+	}
+}
+
+func buildRequestBodyFromPayload(t *testing.T, modelName string) map[string]any {
+	t.Helper()
+	return buildRequestBodyFromRawPayload(t, modelName, []byte(`{
 		"request": {
 			"tools": [
 				{
@@ -75,7 +165,14 @@ func buildRequestBodyFromPayload(t *testing.T, modelName string) map[string]any
 				}
 			]
 		}
-	}`)
+	}`))
+}
+
+func buildRequestBodyFromRawPayload(t *testing.T, modelName string, payload []byte) map[string]any {
+	t.Helper()
+
+	executor := &AntigravityExecutor{}
+	auth := &cliproxyauth.Auth{}

 	req, err := executor.buildRequest(context.Background(), auth, "token", modelName, payload, false, "", "https://example.com")
 	if err != nil {
--- a/internal/runtime/executor/antigravity_executor_signature_test.go
+++ b/internal/runtime/executor/antigravity_executor_signature_test.go
@@ -21,6 +21,14 @@ func testGeminiSignaturePayload() string {
 	return base64.StdEncoding.EncodeToString(payload)
 }

+// testFakeClaudeSignature returns a base64 string starting with 'E' that passes
+// the lightweight hasValidClaudeSignature check but has invalid protobuf content
+// (first decoded byte 0x12 is correct, but no valid protobuf field 2 follows),
+// so it fails deep validation in strict mode.
+func testFakeClaudeSignature() string {
+	return base64.StdEncoding.EncodeToString([]byte{0x12, 0xFF, 0xFE, 0xFD})
+}
+
 func testAntigravityAuth(baseURL string) *cliproxyauth.Auth {
 	return &cliproxyauth.Auth{
 		Attributes: map[string]string{
@@ -40,7 +48,7 @@ func invalidClaudeThinkingPayload() []byte {
 			{
 				"role": "assistant",
 				"content": [
-					{"type": "thinking", "thinking": "bad", "signature": "` + testGeminiSignaturePayload() + `"},
+					{"type": "thinking", "thinking": "bad", "signature": "` + testFakeClaudeSignature() + `"},
 					{"type": "text", "text": "hello"}
 				]
 			}
@@ -134,7 +142,7 @@ func TestAntigravityExecutor_NonStrictBypassSkipsPrecheck(t *testing.T) {
 	payload := invalidClaudeThinkingPayload()
 	from := sdktranslator.FromString("claude")

-	err := validateAntigravityRequestSignatures(from, payload)
+	_, err := validateAntigravityRequestSignatures(from, payload)
 	if err != nil {
 		t.Fatalf("non-strict bypass should skip precheck, got: %v", err)
 	}
@@ -150,7 +158,7 @@ func TestAntigravityExecutor_CacheModeSkipsPrecheck(t *testing.T) {
 	payload := invalidClaudeThinkingPayload()
 	from := sdktranslator.FromString("claude")

-	err := validateAntigravityRequestSignatures(from, payload)
+	_, err := validateAntigravityRequestSignatures(from, payload)
 	if err != nil {
 		t.Fatalf("cache mode should skip precheck, got: %v", err)
 	}
--- a/internal/runtime/executor/cursor_executor.go
+++ b/internal/runtime/executor/cursor_executor.go
@@ -4,11 +4,11 @@ import (
 	"bytes"
 	"context"
 	"crypto/sha256"
-	"errors"
 	"crypto/tls"
 	"encoding/base64"
 	"encoding/hex"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -30,14 +30,14 @@ import (
 )

 const (
-	cursorAPIURL        = "https://api2.cursor.sh"
-	cursorRunPath       = "/agent.v1.AgentService/Run"
-	cursorModelsPath    = "/agent.v1.AgentService/GetUsableModels"
-	cursorClientVersion = "cli-2026.02.13-41ac335"
-	cursorAuthType      = "cursor"
+	cursorAPIURL            = "https://api2.cursor.sh"
+	cursorRunPath           = "/agent.v1.AgentService/Run"
+	cursorModelsPath        = "/agent.v1.AgentService/GetUsableModels"
+	cursorClientVersion     = "cli-2026.02.13-41ac335"
+	cursorAuthType          = "cursor"
 	cursorHeartbeatInterval = 5 * time.Second
-	cursorSessionTTL      = 5 * time.Minute
-	cursorCheckpointTTL   = 30 * time.Minute
+	cursorSessionTTL        = 5 * time.Minute
+	cursorCheckpointTTL     = 30 * time.Minute
 )

 // CursorExecutor handles requests to the Cursor API via Connect+Protobuf protocol.
@@ -63,9 +63,9 @@ type cursorSession struct {
 	pending      []pendingMcpExec
 	cancel       context.CancelFunc // cancels the session-scoped heartbeat (NOT tied to HTTP request)
 	createdAt    time.Time
-	authID       string // auth file ID that created this session (for multi-account isolation)
-	toolResultCh chan []toolResultInfo                // receives tool results from the next HTTP request
-	resumeOutCh  chan cliproxyexecutor.StreamChunk    // output channel for resumed response
+	authID       string                                     // auth file ID that created this session (for multi-account isolation)
+	toolResultCh chan []toolResultInfo                      // receives tool results from the next HTTP request
+	resumeOutCh  chan cliproxyexecutor.StreamChunk          // output channel for resumed response
 	switchOutput func(ch chan cliproxyexecutor.StreamChunk) // callback to switch output channel
 }

@@ -148,7 +148,7 @@ type cursorStatusErr struct {
 	msg  string
 }

-func (e cursorStatusErr) Error() string             { return e.msg }
+func (e cursorStatusErr) Error() string              { return e.msg }
 func (e cursorStatusErr) StatusCode() int            { return e.code }
 func (e cursorStatusErr) RetryAfter() *time.Duration { return nil } // no retry-after info from Cursor; conductor uses exponential backoff

@@ -786,7 +786,7 @@ func (e *CursorExecutor) resumeWithToolResults(
 func openCursorH2Stream(accessToken string) (*cursorproto.H2Stream, error) {
 	headers := map[string]string{
 		":path":                    cursorRunPath,
-		"content-type":            "application/connect+proto",
+		"content-type":             "application/connect+proto",
 		"connect-protocol-version": "1",
 		"te":                       "trailers",
 		"authorization":            "Bearer " + accessToken,
@@ -876,21 +876,21 @@ func processH2SessionFrames(
 			buf.Write(data)
 			log.Debugf("cursor: processH2SessionFrames[%s]: buf total=%d", stream.ID(), buf.Len())

-		// Process all complete frames
-		for {
-			currentBuf := buf.Bytes()
-			if len(currentBuf) == 0 {
-				break
-			}
-			flags, payload, consumed, ok := cursorproto.ParseConnectFrame(currentBuf)
-			if !ok {
-				// Log detailed info about why parsing failed
-				previewLen := min(20, len(currentBuf))
-				log.Debugf("cursor: incomplete frame in buffer, waiting for more data (buf=%d bytes, first bytes: %x = %q)", len(currentBuf), currentBuf[:previewLen], string(currentBuf[:previewLen]))
-				break
-			}
-			buf.Next(consumed)
-			log.Debugf("cursor: parsed Connect frame flags=0x%02x payload=%d bytes consumed=%d", flags, len(payload), consumed)
+			// Process all complete frames
+			for {
+				currentBuf := buf.Bytes()
+				if len(currentBuf) == 0 {
+					break
+				}
+				flags, payload, consumed, ok := cursorproto.ParseConnectFrame(currentBuf)
+				if !ok {
+					// Log detailed info about why parsing failed
+					previewLen := min(20, len(currentBuf))
+					log.Debugf("cursor: incomplete frame in buffer, waiting for more data (buf=%d bytes, first bytes: %x = %q)", len(currentBuf), currentBuf[:previewLen], string(currentBuf[:previewLen]))
+					break
+				}
+				buf.Next(consumed)
+				log.Debugf("cursor: parsed Connect frame flags=0x%02x payload=%d bytes consumed=%d", flags, len(payload), consumed)

 				if flags&cursorproto.ConnectEndStreamFlag != 0 {
 					if err := cursorproto.ParseConnectEndStream(payload); err != nil {
@@ -1080,15 +1080,15 @@ func processH2SessionFrames(
 // --- OpenAI request parsing ---

 type parsedOpenAIRequest struct {
-	Model       string
-	Messages    []gjson.Result
-	Tools       []gjson.Result
-	Stream      bool
+	Model        string
+	Messages     []gjson.Result
+	Tools        []gjson.Result
+	Stream       bool
 	SystemPrompt string
-	UserText    string
-	Images      []cursorproto.ImageData
-	Turns       []cursorproto.TurnData
-	ToolResults []toolResultInfo
+	UserText     string
+	Images       []cursorproto.ImageData
+	Turns        []cursorproto.TurnData
+	ToolResults  []toolResultInfo
 }

 type toolResultInfo struct {
--- a/internal/runtime/executor/github_copilot_executor.go
+++ b/internal/runtime/executor/github_copilot_executor.go
@@ -16,9 +16,9 @@ import (
 	copilotauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/copilot"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 	log "github.com/sirupsen/logrus"
--- a/internal/runtime/executor/gitlab_executor.go
+++ b/internal/runtime/executor/gitlab_executor.go
@@ -75,7 +75,7 @@ var gitLabAgenticCatalog = []gitLabCatalogModel{
 }

 var gitLabModelAliases = map[string]string{
-	"duo-chat-haiku-4-6":  "duo-chat-haiku-4-5",
+	"duo-chat-haiku-4-6": "duo-chat-haiku-4-5",
 }

 func NewGitLabExecutor(cfg *config.Config) *GitLabExecutor {
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -215,7 +215,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	}

 	body = preserveReasoningContentInMessages(body)
-	// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
+	// Ensure tools array exists to avoid provider quirks observed in some upstreams.
 	toolsResult := gjson.GetBytes(body, "tools")
 	if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
 		body = ensureToolsArray(body)
--- a/internal/runtime/executor/kiro_executor_test.go
+++ b/internal/runtime/executor/kiro_executor_test.go
@@ -281,8 +281,8 @@ func TestGetAuthValue(t *testing.T) {
 			expected: "attribute_value",
 		},
 		{
-			name: "Both nil",
-			auth: &cliproxyauth.Auth{},
+			name:     "Both nil",
+			auth:     &cliproxyauth.Auth{},
 			key:      "test_key",
 			expected: "",
 		},
@@ -326,9 +326,9 @@ func TestGetAuthValue(t *testing.T) {

 func TestGetAccountKey(t *testing.T) {
 	tests := []struct {
-		name     string
-		auth     *cliproxyauth.Auth
-		checkFn  func(t *testing.T, result string)
+		name    string
+		auth    *cliproxyauth.Auth
+		checkFn func(t *testing.T, result string)
 	}{
 		{
 			name: "From client_id",
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -1,739 +0,0 @@
-package executor
-
-import (
-	"bufio"
-	"bytes"
-	"context"
-	"fmt"
-	"io"
-	"net/http"
-	"strconv"
-	"strings"
-	"sync"
-	"time"
-
-	qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
-	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
-	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
-	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
-	log "github.com/sirupsen/logrus"
-	"github.com/tidwall/gjson"
-	"github.com/tidwall/sjson"
-)
-
-const (
-	qwenUserAgent       = "QwenCode/0.14.2 (darwin; arm64)"
-	qwenRateLimitPerMin = 60          // 60 requests per minute per credential
-	qwenRateLimitWindow = time.Minute // sliding window duration
-)
-
-var qwenDefaultSystemMessage = []byte(`{"role":"system","content":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}]}`)
-
-// qwenQuotaCodes is a package-level set of error codes that indicate quota exhaustion.
-var qwenQuotaCodes = map[string]struct{}{
-	"insufficient_quota": {},
-	"quota_exceeded":     {},
-}
-
-// qwenRateLimiter tracks request timestamps per credential for rate limiting.
-// Qwen has a limit of 60 requests per minute per account.
-var qwenRateLimiter = struct {
-	sync.Mutex
-	requests map[string][]time.Time // authID -> request timestamps
-}{
-	requests: make(map[string][]time.Time),
-}
-
-// redactAuthID returns a redacted version of the auth ID for safe logging.
-// Keeps a small prefix/suffix to allow correlation across events.
-func redactAuthID(id string) string {
-	if id == "" {
-		return ""
-	}
-	if len(id) <= 8 {
-		return id
-	}
-	return id[:4] + "..." + id[len(id)-4:]
-}
-
-// checkQwenRateLimit checks if the credential has exceeded the rate limit.
-// Returns nil if allowed, or a statusErr with retryAfter if rate limited.
-func checkQwenRateLimit(authID string) error {
-	if authID == "" {
-		// Empty authID should not bypass rate limiting in production
-		// Use debug level to avoid log spam for certain auth flows
-		log.Debug("qwen rate limit check: empty authID, skipping rate limit")
-		return nil
-	}
-
-	now := time.Now()
-	windowStart := now.Add(-qwenRateLimitWindow)
-
-	qwenRateLimiter.Lock()
-	defer qwenRateLimiter.Unlock()
-
-	// Get and filter timestamps within the window
-	timestamps := qwenRateLimiter.requests[authID]
-	var validTimestamps []time.Time
-	for _, ts := range timestamps {
-		if ts.After(windowStart) {
-			validTimestamps = append(validTimestamps, ts)
-		}
-	}
-
-	// Always prune expired entries to prevent memory leak
-	// Delete empty entries, otherwise update with pruned slice
-	if len(validTimestamps) == 0 {
-		delete(qwenRateLimiter.requests, authID)
-	}
-
-	// Check if rate limit exceeded
-	if len(validTimestamps) >= qwenRateLimitPerMin {
-		// Calculate when the oldest request will expire
-		oldestInWindow := validTimestamps[0]
-		retryAfter := oldestInWindow.Add(qwenRateLimitWindow).Sub(now)
-		if retryAfter < time.Second {
-			retryAfter = time.Second
-		}
-		retryAfterSec := int(retryAfter.Seconds())
-		return statusErr{
-			code:       http.StatusTooManyRequests,
-			msg:        fmt.Sprintf(`{"error":{"code":"rate_limit_exceeded","message":"Qwen rate limit: %d requests/minute exceeded, retry after %ds","type":"rate_limit_exceeded"}}`, qwenRateLimitPerMin, retryAfterSec),
-			retryAfter: &retryAfter,
-		}
-	}
-
-	// Record this request and update the map with pruned timestamps
-	validTimestamps = append(validTimestamps, now)
-	qwenRateLimiter.requests[authID] = validTimestamps
-
-	return nil
-}
-
-// isQwenQuotaError checks if the error response indicates a quota exceeded error.
-// Qwen returns HTTP 403 with error.code="insufficient_quota" when daily quota is exhausted.
-func isQwenQuotaError(body []byte) bool {
-	code := strings.ToLower(gjson.GetBytes(body, "error.code").String())
-	errType := strings.ToLower(gjson.GetBytes(body, "error.type").String())
-
-	// Primary check: exact match on error.code or error.type (most reliable)
-	if _, ok := qwenQuotaCodes[code]; ok {
-		return true
-	}
-	if _, ok := qwenQuotaCodes[errType]; ok {
-		return true
-	}
-
-	// Fallback: check message only if code/type don't match (less reliable)
-	msg := strings.ToLower(gjson.GetBytes(body, "error.message").String())
-	if strings.Contains(msg, "insufficient_quota") || strings.Contains(msg, "quota exceeded") ||
-		strings.Contains(msg, "free allocated quota exceeded") {
-		return true
-	}
-
-	return false
-}
-
-// wrapQwenError wraps an HTTP error response, detecting quota errors and mapping them to 429.
-// Returns the appropriate status code and retryAfter duration for statusErr.
-// Only checks for quota errors when httpCode is 403 or 429 to avoid false positives.
-func wrapQwenError(ctx context.Context, httpCode int, body []byte) (errCode int, retryAfter *time.Duration) {
-	errCode = httpCode
-	// Only check quota errors for expected status codes to avoid false positives
-	// Qwen returns 403 for quota errors, 429 for rate limits
-	if (httpCode == http.StatusForbidden || httpCode == http.StatusTooManyRequests) && isQwenQuotaError(body) {
-		errCode = http.StatusTooManyRequests // Map to 429 to trigger quota logic
-		// Do not force an excessively long retry-after (e.g. until tomorrow), otherwise
-		// the global request-retry scheduler may skip retries due to max-retry-interval.
-		helps.LogWithRequestID(ctx).Warnf("qwen quota exceeded (http %d -> %d)", httpCode, errCode)
-	}
-	return errCode, retryAfter
-}
-
-func qwenDisableCooling(cfg *config.Config, auth *cliproxyauth.Auth) bool {
-	if auth != nil {
-		if override, ok := auth.DisableCoolingOverride(); ok {
-			return override
-		}
-	}
-	if cfg == nil {
-		return false
-	}
-	return cfg.DisableCooling
-}
-
-func parseRetryAfterHeader(header http.Header, now time.Time) *time.Duration {
-	raw := strings.TrimSpace(header.Get("Retry-After"))
-	if raw == "" {
-		return nil
-	}
-	if seconds, err := strconv.Atoi(raw); err == nil {
-		if seconds <= 0 {
-			return nil
-		}
-		d := time.Duration(seconds) * time.Second
-		return &d
-	}
-	if at, err := http.ParseTime(raw); err == nil {
-		if !at.After(now) {
-			return nil
-		}
-		d := at.Sub(now)
-		return &d
-	}
-	return nil
-}
-
-// ensureQwenSystemMessage ensures the request has a single system message at the beginning.
-// It always injects the default system prompt and merges any user-provided system messages
-// into the injected system message content to satisfy Qwen's strict message ordering rules.
-func ensureQwenSystemMessage(payload []byte) ([]byte, error) {
-	isInjectedSystemPart := func(part gjson.Result) bool {
-		if !part.Exists() || !part.IsObject() {
-			return false
-		}
-		if !strings.EqualFold(part.Get("type").String(), "text") {
-			return false
-		}
-		if !strings.EqualFold(part.Get("cache_control.type").String(), "ephemeral") {
-			return false
-		}
-		text := part.Get("text").String()
-		return text == "" || text == "You are Qwen Code."
-	}
-
-	defaultParts := gjson.ParseBytes(qwenDefaultSystemMessage).Get("content")
-	var systemParts []any
-	if defaultParts.Exists() && defaultParts.IsArray() {
-		for _, part := range defaultParts.Array() {
-			systemParts = append(systemParts, part.Value())
-		}
-	}
-	if len(systemParts) == 0 {
-		systemParts = append(systemParts, map[string]any{
-			"type": "text",
-			"text": "You are Qwen Code.",
-			"cache_control": map[string]any{
-				"type": "ephemeral",
-			},
-		})
-	}
-
-	appendSystemContent := func(content gjson.Result) {
-		makeTextPart := func(text string) map[string]any {
-			return map[string]any{
-				"type": "text",
-				"text": text,
-			}
-		}
-
-		if !content.Exists() || content.Type == gjson.Null {
-			return
-		}
-		if content.IsArray() {
-			for _, part := range content.Array() {
-				if part.Type == gjson.String {
-					systemParts = append(systemParts, makeTextPart(part.String()))
-					continue
-				}
-				if isInjectedSystemPart(part) {
-					continue
-				}
-				systemParts = append(systemParts, part.Value())
-			}
-			return
-		}
-		if content.Type == gjson.String {
-			systemParts = append(systemParts, makeTextPart(content.String()))
-			return
-		}
-		if content.IsObject() {
-			if isInjectedSystemPart(content) {
-				return
-			}
-			systemParts = append(systemParts, content.Value())
-			return
-		}
-		systemParts = append(systemParts, makeTextPart(content.String()))
-	}
-
-	messages := gjson.GetBytes(payload, "messages")
-	var nonSystemMessages []any
-	if messages.Exists() && messages.IsArray() {
-		for _, msg := range messages.Array() {
-			if strings.EqualFold(msg.Get("role").String(), "system") {
-				appendSystemContent(msg.Get("content"))
-				continue
-			}
-			nonSystemMessages = append(nonSystemMessages, msg.Value())
-		}
-	}
-
-	newMessages := make([]any, 0, 1+len(nonSystemMessages))
-	newMessages = append(newMessages, map[string]any{
-		"role":    "system",
-		"content": systemParts,
-	})
-	newMessages = append(newMessages, nonSystemMessages...)
-
-	updated, errSet := sjson.SetBytes(payload, "messages", newMessages)
-	if errSet != nil {
-		return nil, fmt.Errorf("qwen executor: set system message failed: %w", errSet)
-	}
-	return updated, nil
-}
-
-// QwenExecutor is a stateless executor for Qwen Code using OpenAI-compatible chat completions.
-// If access token is unavailable, it falls back to legacy via ClientAdapter.
-type QwenExecutor struct {
-	cfg                      *config.Config
-	refreshForImmediateRetry func(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error)
-}
-
-func NewQwenExecutor(cfg *config.Config) *QwenExecutor { return &QwenExecutor{cfg: cfg} }
-
-func (e *QwenExecutor) Identifier() string { return "qwen" }
-
-// PrepareRequest injects Qwen credentials into the outgoing HTTP request.
-func (e *QwenExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth.Auth) error {
-	if req == nil {
-		return nil
-	}
-	token, _ := qwenCreds(auth)
-	if strings.TrimSpace(token) != "" {
-		req.Header.Set("Authorization", "Bearer "+token)
-	}
-	return nil
-}
-
-// HttpRequest injects Qwen credentials into the request and executes it.
-func (e *QwenExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth, req *http.Request) (*http.Response, error) {
-	if req == nil {
-		return nil, fmt.Errorf("qwen executor: request is nil")
-	}
-	if ctx == nil {
-		ctx = req.Context()
-	}
-	httpReq := req.WithContext(ctx)
-	if err := e.PrepareRequest(httpReq, auth); err != nil {
-		return nil, err
-	}
-	httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
-	return httpClient.Do(httpReq)
-}
-
-func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
-	if opts.Alt == "responses/compact" {
-		return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
-	}
-
-	var authID string
-	if auth != nil {
-		authID = auth.ID
-	}
-
-	baseModel := thinking.ParseSuffix(req.Model).ModelName
-
-	reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	defer reporter.TrackFailure(ctx, &err)
-
-	from := opts.SourceFormat
-	to := sdktranslator.FromString("openai")
-	originalPayloadSource := req.Payload
-	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
-	}
-	originalPayload := originalPayloadSource
-	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
-	body, _ = sjson.SetBytes(body, "model", baseModel)
-
-	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
-	if err != nil {
-		return resp, err
-	}
-
-	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
-	body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
-	body, err = ensureQwenSystemMessage(body)
-	if err != nil {
-		return resp, err
-	}
-
-	for {
-		if errRate := checkQwenRateLimit(authID); errRate != nil {
-			helps.LogWithRequestID(ctx).Warnf("qwen rate limit exceeded for credential %s", redactAuthID(authID))
-			return resp, errRate
-		}
-
-		token, baseURL := qwenCreds(auth)
-		if baseURL == "" {
-			baseURL = "https://portal.qwen.ai/v1"
-		}
-
-		url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
-		httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
-		if errReq != nil {
-			return resp, errReq
-		}
-		applyQwenHeaders(httpReq, token, false)
-		var attrs map[string]string
-		if auth != nil {
-			attrs = auth.Attributes
-		}
-		util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
-		var authLabel, authType, authValue string
-		if auth != nil {
-			authLabel = auth.Label
-			authType, authValue = auth.AccountInfo()
-		}
-		helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
-			URL:       url,
-			Method:    http.MethodPost,
-			Headers:   httpReq.Header.Clone(),
-			Body:      body,
-			Provider:  e.Identifier(),
-			AuthID:    authID,
-			AuthLabel: authLabel,
-			AuthType:  authType,
-			AuthValue: authValue,
-		})
-
-		httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
-		httpResp, errDo := httpClient.Do(httpReq)
-		if errDo != nil {
-			helps.RecordAPIResponseError(ctx, e.cfg, errDo)
-			return resp, errDo
-		}
-
-		helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
-		if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
-			b, _ := io.ReadAll(httpResp.Body)
-			helps.AppendAPIResponseChunk(ctx, e.cfg, b)
-			if errClose := httpResp.Body.Close(); errClose != nil {
-				log.Errorf("qwen executor: close response body error: %v", errClose)
-			}
-
-			errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b)
-			if errCode == http.StatusTooManyRequests && retryAfter == nil {
-				retryAfter = parseRetryAfterHeader(httpResp.Header, time.Now())
-			}
-			if errCode == http.StatusTooManyRequests && retryAfter == nil && qwenDisableCooling(e.cfg, auth) && isQwenQuotaError(b) {
-				defaultRetryAfter := time.Second
-				retryAfter = &defaultRetryAfter
-			}
-			helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
-
-			err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter}
-			return resp, err
-		}
-
-		data, errRead := io.ReadAll(httpResp.Body)
-		if errClose := httpResp.Body.Close(); errClose != nil {
-			log.Errorf("qwen executor: close response body error: %v", errClose)
-		}
-		if errRead != nil {
-			helps.RecordAPIResponseError(ctx, e.cfg, errRead)
-			return resp, errRead
-		}
-
-		helps.AppendAPIResponseChunk(ctx, e.cfg, data)
-		reporter.Publish(ctx, helps.ParseOpenAIUsage(data))
-
-		var param any
-		// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
-		// the original model name in the response for client compatibility.
-		out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
-		resp = cliproxyexecutor.Response{Payload: out, Headers: httpResp.Header.Clone()}
-		return resp, nil
-	}
-}
-
-func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (_ *cliproxyexecutor.StreamResult, err error) {
-	if opts.Alt == "responses/compact" {
-		return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
-	}
-
-	var authID string
-	if auth != nil {
-		authID = auth.ID
-	}
-
-	baseModel := thinking.ParseSuffix(req.Model).ModelName
-
-	reporter := helps.NewUsageReporter(ctx, e.Identifier(), baseModel, auth)
-	defer reporter.TrackFailure(ctx, &err)
-
-	from := opts.SourceFormat
-	to := sdktranslator.FromString("openai")
-	originalPayloadSource := req.Payload
-	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
-	}
-	originalPayload := originalPayloadSource
-	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
-	body, _ = sjson.SetBytes(body, "model", baseModel)
-
-	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
-	if err != nil {
-		return nil, err
-	}
-
-	// toolsResult := gjson.GetBytes(body, "tools")
-	// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
-	// This will have no real consequences. It's just to scare Qwen3.
-	// if (toolsResult.IsArray() && len(toolsResult.Array()) == 0) || !toolsResult.Exists() {
-	// 	body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
-	// }
-	body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
-	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
-	body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
-	body, err = ensureQwenSystemMessage(body)
-	if err != nil {
-		return nil, err
-	}
-
-	for {
-		if errRate := checkQwenRateLimit(authID); errRate != nil {
-			helps.LogWithRequestID(ctx).Warnf("qwen rate limit exceeded for credential %s", redactAuthID(authID))
-			return nil, errRate
-		}
-
-		token, baseURL := qwenCreds(auth)
-		if baseURL == "" {
-			baseURL = "https://portal.qwen.ai/v1"
-		}
-
-		url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
-		httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
-		if errReq != nil {
-			return nil, errReq
-		}
-		applyQwenHeaders(httpReq, token, true)
-		var attrs map[string]string
-		if auth != nil {
-			attrs = auth.Attributes
-		}
-		util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
-		var authLabel, authType, authValue string
-		if auth != nil {
-			authLabel = auth.Label
-			authType, authValue = auth.AccountInfo()
-		}
-		helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
-			URL:       url,
-			Method:    http.MethodPost,
-			Headers:   httpReq.Header.Clone(),
-			Body:      body,
-			Provider:  e.Identifier(),
-			AuthID:    authID,
-			AuthLabel: authLabel,
-			AuthType:  authType,
-			AuthValue: authValue,
-		})
-
-		httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
-		httpResp, errDo := httpClient.Do(httpReq)
-		if errDo != nil {
-			helps.RecordAPIResponseError(ctx, e.cfg, errDo)
-			return nil, errDo
-		}
-
-		helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
-		if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
-			b, _ := io.ReadAll(httpResp.Body)
-			helps.AppendAPIResponseChunk(ctx, e.cfg, b)
-			if errClose := httpResp.Body.Close(); errClose != nil {
-				log.Errorf("qwen executor: close response body error: %v", errClose)
-			}
-
-			errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b)
-			if errCode == http.StatusTooManyRequests && retryAfter == nil {
-				retryAfter = parseRetryAfterHeader(httpResp.Header, time.Now())
-			}
-			if errCode == http.StatusTooManyRequests && retryAfter == nil && qwenDisableCooling(e.cfg, auth) && isQwenQuotaError(b) {
-				defaultRetryAfter := time.Second
-				retryAfter = &defaultRetryAfter
-			}
-			helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
-
-			err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter}
-			return nil, err
-		}
-
-		out := make(chan cliproxyexecutor.StreamChunk)
-		go func() {
-			defer close(out)
-			defer func() {
-				if errClose := httpResp.Body.Close(); errClose != nil {
-					log.Errorf("qwen executor: close response body error: %v", errClose)
-				}
-			}()
-			scanner := bufio.NewScanner(httpResp.Body)
-			scanner.Buffer(nil, 52_428_800) // 50MB
-			var param any
-			for scanner.Scan() {
-				line := scanner.Bytes()
-				helps.AppendAPIResponseChunk(ctx, e.cfg, line)
-				if detail, ok := helps.ParseOpenAIStreamUsage(line); ok {
-					reporter.Publish(ctx, detail)
-				}
-				chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), &param)
-				for i := range chunks {
-					out <- cliproxyexecutor.StreamChunk{Payload: chunks[i]}
-				}
-			}
-			doneChunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), &param)
-			for i := range doneChunks {
-				out <- cliproxyexecutor.StreamChunk{Payload: doneChunks[i]}
-			}
-			if errScan := scanner.Err(); errScan != nil {
-				helps.RecordAPIResponseError(ctx, e.cfg, errScan)
-				reporter.PublishFailure(ctx)
-				out <- cliproxyexecutor.StreamChunk{Err: errScan}
-			}
-		}()
-		return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
-	}
-}
-
-func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
-	baseModel := thinking.ParseSuffix(req.Model).ModelName
-
-	from := opts.SourceFormat
-	to := sdktranslator.FromString("openai")
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
-
-	modelName := gjson.GetBytes(body, "model").String()
-	if strings.TrimSpace(modelName) == "" {
-		modelName = baseModel
-	}
-
-	enc, err := helps.TokenizerForModel(modelName)
-	if err != nil {
-		return cliproxyexecutor.Response{}, fmt.Errorf("qwen executor: tokenizer init failed: %w", err)
-	}
-
-	count, err := helps.CountOpenAIChatTokens(enc, body)
-	if err != nil {
-		return cliproxyexecutor.Response{}, fmt.Errorf("qwen executor: token counting failed: %w", err)
-	}
-
-	usageJSON := helps.BuildOpenAIUsageJSON(count)
-	translated := sdktranslator.TranslateTokenCount(ctx, to, from, count, usageJSON)
-	return cliproxyexecutor.Response{Payload: translated}, nil
-}
-
-func (e *QwenExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
-	log.Debugf("qwen executor: refresh called")
-	if auth == nil {
-		return nil, fmt.Errorf("qwen executor: auth is nil")
-	}
-	// Expect refresh_token in metadata for OAuth-based accounts
-	var refreshToken string
-	if auth.Metadata != nil {
-		if v, ok := auth.Metadata["refresh_token"].(string); ok && strings.TrimSpace(v) != "" {
-			refreshToken = v
-		}
-	}
-	if strings.TrimSpace(refreshToken) == "" {
-		// Nothing to refresh
-		return auth, nil
-	}
-
-	svc := qwenauth.NewQwenAuth(e.cfg)
-	td, err := svc.RefreshTokens(ctx, refreshToken)
-	if err != nil {
-		return nil, err
-	}
-	if auth.Metadata == nil {
-		auth.Metadata = make(map[string]any)
-	}
-	auth.Metadata["access_token"] = td.AccessToken
-	if td.RefreshToken != "" {
-		auth.Metadata["refresh_token"] = td.RefreshToken
-	}
-	if td.ResourceURL != "" {
-		auth.Metadata["resource_url"] = td.ResourceURL
-	}
-	// Use "expired" for consistency with existing file format
-	auth.Metadata["expired"] = td.Expire
-	auth.Metadata["type"] = "qwen"
-	now := time.Now().Format(time.RFC3339)
-	auth.Metadata["last_refresh"] = now
-	return auth, nil
-}
-
-func applyQwenHeaders(r *http.Request, token string, stream bool) {
-	r.Header.Set("X-Stainless-Runtime-Version", "v22.17.0")
-	r.Header.Set("User-Agent", qwenUserAgent)
-	r.Header.Set("X-Stainless-Lang", "js")
-	r.Header.Set("Accept-Language", "*")
-	r.Header.Set("X-Dashscope-Cachecontrol", "enable")
-	r.Header.Set("X-Stainless-Os", "MacOS")
-	r.Header.Set("X-Dashscope-Authtype", "qwen-oauth")
-	r.Header.Set("X-Stainless-Arch", "arm64")
-	r.Header.Set("X-Stainless-Runtime", "node")
-	r.Header.Set("X-Stainless-Retry-Count", "0")
-	r.Header.Set("Accept-Encoding", "gzip, deflate")
-	r.Header.Set("Authorization", "Bearer "+token)
-	r.Header.Set("X-Stainless-Package-Version", "5.11.0")
-	r.Header.Set("Sec-Fetch-Mode", "cors")
-	r.Header.Set("Content-Type", "application/json")
-	r.Header.Set("Connection", "keep-alive")
-	r.Header.Set("X-Dashscope-Useragent", qwenUserAgent)
-
-	if stream {
-		r.Header.Set("Accept", "text/event-stream")
-		return
-	}
-	r.Header.Set("Accept", "application/json")
-}
-
-func normaliseQwenBaseURL(resourceURL string) string {
-	raw := strings.TrimSpace(resourceURL)
-	if raw == "" {
-		return ""
-	}
-
-	normalized := raw
-	lower := strings.ToLower(normalized)
-	if !strings.HasPrefix(lower, "http://") && !strings.HasPrefix(lower, "https://") {
-		normalized = "https://" + normalized
-	}
-
-	normalized = strings.TrimRight(normalized, "/")
-	if !strings.HasSuffix(strings.ToLower(normalized), "/v1") {
-		normalized += "/v1"
-	}
-
-	return normalized
-}
-
-func qwenCreds(a *cliproxyauth.Auth) (token, baseURL string) {
-	if a == nil {
-		return "", ""
-	}
-	if a.Attributes != nil {
-		if v := a.Attributes["api_key"]; v != "" {
-			token = v
-		}
-		if v := a.Attributes["base_url"]; v != "" {
-			baseURL = v
-		}
-	}
-	if token == "" && a.Metadata != nil {
-		if v, ok := a.Metadata["access_token"].(string); ok {
-			token = v
-		}
-		if v, ok := a.Metadata["resource_url"].(string); ok {
-			baseURL = normaliseQwenBaseURL(v)
-		}
-	}
-	return
-}
--- a/internal/runtime/executor/qwen_executor_test.go
+++ b/internal/runtime/executor/qwen_executor_test.go
@@ -1,614 +0,0 @@
-package executor
-
-import (
-	"context"
-	"net/http"
-	"net/http/httptest"
-	"sync/atomic"
-	"testing"
-	"time"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
-	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
-	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
-	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
-	"github.com/tidwall/gjson"
-)
-
-func TestQwenExecutorParseSuffix(t *testing.T) {
-	tests := []struct {
-		name      string
-		model     string
-		wantBase  string
-		wantLevel string
-	}{
-		{"no suffix", "qwen-max", "qwen-max", ""},
-		{"with level suffix", "qwen-max(high)", "qwen-max", "high"},
-		{"with budget suffix", "qwen-max(16384)", "qwen-max", "16384"},
-		{"complex model name", "qwen-plus-latest(medium)", "qwen-plus-latest", "medium"},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result := thinking.ParseSuffix(tt.model)
-			if result.ModelName != tt.wantBase {
-				t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantBase)
-			}
-		})
-	}
-}
-
-func TestEnsureQwenSystemMessage_MergeStringSystem(t *testing.T) {
-	payload := []byte(`{
-		"model": "qwen3.6-plus",
-		"stream": true,
-		"messages": [
-			{ "role": "system", "content": "ABCDEFG" },
-			{ "role": "user", "content": [ { "type": "text", "text": "你好" } ] }
-		]
-	}`)
-
-	out, err := ensureQwenSystemMessage(payload)
-	if err != nil {
-		t.Fatalf("ensureQwenSystemMessage() error = %v", err)
-	}
-
-	msgs := gjson.GetBytes(out, "messages").Array()
-	if len(msgs) != 2 {
-		t.Fatalf("messages length = %d, want 2", len(msgs))
-	}
-	if msgs[0].Get("role").String() != "system" {
-		t.Fatalf("messages[0].role = %q, want %q", msgs[0].Get("role").String(), "system")
-	}
-	parts := msgs[0].Get("content").Array()
-	if len(parts) != 2 {
-		t.Fatalf("messages[0].content length = %d, want 2", len(parts))
-	}
-	if parts[0].Get("type").String() != "text" || parts[0].Get("cache_control.type").String() != "ephemeral" {
-		t.Fatalf("messages[0].content[0] = %s, want injected system part", parts[0].Raw)
-	}
-	if text := parts[0].Get("text").String(); text != "" && text != "You are Qwen Code." {
-		t.Fatalf("messages[0].content[0].text = %q, want empty string or default prompt", text)
-	}
-	if parts[1].Get("type").String() != "text" || parts[1].Get("text").String() != "ABCDEFG" {
-		t.Fatalf("messages[0].content[1] = %s, want text part with ABCDEFG", parts[1].Raw)
-	}
-	if msgs[1].Get("role").String() != "user" {
-		t.Fatalf("messages[1].role = %q, want %q", msgs[1].Get("role").String(), "user")
-	}
-}
-
-func TestEnsureQwenSystemMessage_MergeObjectSystem(t *testing.T) {
-	payload := []byte(`{
-		"messages": [
-			{ "role": "system", "content": { "type": "text", "text": "ABCDEFG" } },
-			{ "role": "user", "content": [ { "type": "text", "text": "你好" } ] }
-		]
-	}`)
-
-	out, err := ensureQwenSystemMessage(payload)
-	if err != nil {
-		t.Fatalf("ensureQwenSystemMessage() error = %v", err)
-	}
-
-	msgs := gjson.GetBytes(out, "messages").Array()
-	if len(msgs) != 2 {
-		t.Fatalf("messages length = %d, want 2", len(msgs))
-	}
-	parts := msgs[0].Get("content").Array()
-	if len(parts) != 2 {
-		t.Fatalf("messages[0].content length = %d, want 2", len(parts))
-	}
-	if parts[1].Get("text").String() != "ABCDEFG" {
-		t.Fatalf("messages[0].content[1].text = %q, want %q", parts[1].Get("text").String(), "ABCDEFG")
-	}
-}
-
-func TestEnsureQwenSystemMessage_PrependsWhenMissing(t *testing.T) {
-	payload := []byte(`{
-		"messages": [
-			{ "role": "user", "content": [ { "type": "text", "text": "你好" } ] }
-		]
-	}`)
-
-	out, err := ensureQwenSystemMessage(payload)
-	if err != nil {
-		t.Fatalf("ensureQwenSystemMessage() error = %v", err)
-	}
-
-	msgs := gjson.GetBytes(out, "messages").Array()
-	if len(msgs) != 2 {
-		t.Fatalf("messages length = %d, want 2", len(msgs))
-	}
-	if msgs[0].Get("role").String() != "system" {
-		t.Fatalf("messages[0].role = %q, want %q", msgs[0].Get("role").String(), "system")
-	}
-	if !msgs[0].Get("content").IsArray() || len(msgs[0].Get("content").Array()) == 0 {
-		t.Fatalf("messages[0].content = %s, want non-empty array", msgs[0].Get("content").Raw)
-	}
-	if msgs[1].Get("role").String() != "user" {
-		t.Fatalf("messages[1].role = %q, want %q", msgs[1].Get("role").String(), "user")
-	}
-}
-
-func TestEnsureQwenSystemMessage_MergesMultipleSystemMessages(t *testing.T) {
-	payload := []byte(`{
-		"messages": [
-			{ "role": "system", "content": "A" },
-			{ "role": "user", "content": [ { "type": "text", "text": "hi" } ] },
-			{ "role": "system", "content": "B" }
-		]
-	}`)
-
-	out, err := ensureQwenSystemMessage(payload)
-	if err != nil {
-		t.Fatalf("ensureQwenSystemMessage() error = %v", err)
-	}
-
-	msgs := gjson.GetBytes(out, "messages").Array()
-	if len(msgs) != 2 {
-		t.Fatalf("messages length = %d, want 2", len(msgs))
-	}
-	parts := msgs[0].Get("content").Array()
-	if len(parts) != 3 {
-		t.Fatalf("messages[0].content length = %d, want 3", len(parts))
-	}
-	if parts[1].Get("text").String() != "A" {
-		t.Fatalf("messages[0].content[1].text = %q, want %q", parts[1].Get("text").String(), "A")
-	}
-	if parts[2].Get("text").String() != "B" {
-		t.Fatalf("messages[0].content[2].text = %q, want %q", parts[2].Get("text").String(), "B")
-	}
-}
-
-func TestWrapQwenError_InsufficientQuotaDoesNotSetRetryAfter(t *testing.T) {
-	body := []byte(`{"error":{"code":"insufficient_quota","message":"You exceeded your current quota","type":"insufficient_quota"}}`)
-	code, retryAfter := wrapQwenError(context.Background(), http.StatusTooManyRequests, body)
-	if code != http.StatusTooManyRequests {
-		t.Fatalf("wrapQwenError status = %d, want %d", code, http.StatusTooManyRequests)
-	}
-	if retryAfter != nil {
-		t.Fatalf("wrapQwenError retryAfter = %v, want nil", *retryAfter)
-	}
-}
-
-func TestWrapQwenError_Maps403QuotaTo429WithoutRetryAfter(t *testing.T) {
-	body := []byte(`{"error":{"code":"insufficient_quota","message":"You exceeded your current quota","type":"insufficient_quota"}}`)
-	code, retryAfter := wrapQwenError(context.Background(), http.StatusForbidden, body)
-	if code != http.StatusTooManyRequests {
-		t.Fatalf("wrapQwenError status = %d, want %d", code, http.StatusTooManyRequests)
-	}
-	if retryAfter != nil {
-		t.Fatalf("wrapQwenError retryAfter = %v, want nil", *retryAfter)
-	}
-}
-
-func TestQwenCreds_NormalizesResourceURL(t *testing.T) {
-	tests := []struct {
-		name        string
-		resourceURL string
-		wantBaseURL string
-	}{
-		{"host only", "portal.qwen.ai", "https://portal.qwen.ai/v1"},
-		{"scheme no v1", "https://portal.qwen.ai", "https://portal.qwen.ai/v1"},
-		{"scheme with v1", "https://portal.qwen.ai/v1", "https://portal.qwen.ai/v1"},
-		{"scheme with v1 slash", "https://portal.qwen.ai/v1/", "https://portal.qwen.ai/v1"},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			auth := &cliproxyauth.Auth{
-				Metadata: map[string]any{
-					"access_token": "test-token",
-					"resource_url": tt.resourceURL,
-				},
-			}
-
-			token, baseURL := qwenCreds(auth)
-			if token != "test-token" {
-				t.Fatalf("qwenCreds token = %q, want %q", token, "test-token")
-			}
-			if baseURL != tt.wantBaseURL {
-				t.Fatalf("qwenCreds baseURL = %q, want %q", baseURL, tt.wantBaseURL)
-			}
-		})
-	}
-}
-
-func TestQwenExecutorExecute_429DoesNotRefreshOrRetry(t *testing.T) {
-	qwenRateLimiter.Lock()
-	qwenRateLimiter.requests = make(map[string][]time.Time)
-	qwenRateLimiter.Unlock()
-
-	var calls int32
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		atomic.AddInt32(&calls, 1)
-		if r.URL.Path != "/v1/chat/completions" {
-			w.WriteHeader(http.StatusNotFound)
-			return
-		}
-		switch r.Header.Get("Authorization") {
-		case "Bearer old-token":
-			w.Header().Set("Content-Type", "application/json")
-			w.WriteHeader(http.StatusTooManyRequests)
-			_, _ = w.Write([]byte(`{"error":{"code":"quota_exceeded","message":"quota exceeded","type":"quota_exceeded"}}`))
-			return
-		case "Bearer new-token":
-			w.Header().Set("Content-Type", "application/json")
-			w.WriteHeader(http.StatusOK)
-			_, _ = w.Write([]byte(`{"id":"chatcmpl-test","object":"chat.completion","created":1,"model":"qwen-max","choices":[{"index":0,"message":{"role":"assistant","content":"hi"},"finish_reason":"stop"}],"usage":{"prompt_tokens":1,"completion_tokens":1,"total_tokens":2}}`))
-			return
-		default:
-			w.WriteHeader(http.StatusUnauthorized)
-			return
-		}
-	}))
-	defer srv.Close()
-
-	exec := NewQwenExecutor(&config.Config{})
-	auth := &cliproxyauth.Auth{
-		ID:       "auth-test",
-		Provider: "qwen",
-		Attributes: map[string]string{
-			"base_url": srv.URL + "/v1",
-		},
-		Metadata: map[string]any{
-			"access_token":  "old-token",
-			"refresh_token": "refresh-token",
-		},
-	}
-
-	var refresherCalls int32
-	exec.refreshForImmediateRetry = func(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
-		atomic.AddInt32(&refresherCalls, 1)
-		refreshed := auth.Clone()
-		if refreshed.Metadata == nil {
-			refreshed.Metadata = make(map[string]any)
-		}
-		refreshed.Metadata["access_token"] = "new-token"
-		refreshed.Metadata["refresh_token"] = "refresh-token-2"
-		return refreshed, nil
-	}
-	ctx := context.Background()
-
-	_, err := exec.Execute(ctx, auth, cliproxyexecutor.Request{
-		Model:   "qwen-max",
-		Payload: []byte(`{"model":"qwen-max","messages":[{"role":"user","content":"hi"}]}`),
-	}, cliproxyexecutor.Options{
-		SourceFormat: sdktranslator.FromString("openai"),
-	})
-	if err == nil {
-		t.Fatalf("Execute() expected error, got nil")
-	}
-	status, ok := err.(statusErr)
-	if !ok {
-		t.Fatalf("Execute() error type = %T, want statusErr", err)
-	}
-	if status.StatusCode() != http.StatusTooManyRequests {
-		t.Fatalf("Execute() status code = %d, want %d", status.StatusCode(), http.StatusTooManyRequests)
-	}
-	if atomic.LoadInt32(&calls) != 1 {
-		t.Fatalf("upstream calls = %d, want 1", atomic.LoadInt32(&calls))
-	}
-	if atomic.LoadInt32(&refresherCalls) != 0 {
-		t.Fatalf("refresher calls = %d, want 0", atomic.LoadInt32(&refresherCalls))
-	}
-}
-
-func TestQwenExecutorExecuteStream_429DoesNotRefreshOrRetry(t *testing.T) {
-	qwenRateLimiter.Lock()
-	qwenRateLimiter.requests = make(map[string][]time.Time)
-	qwenRateLimiter.Unlock()
-
-	var calls int32
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		atomic.AddInt32(&calls, 1)
-		if r.URL.Path != "/v1/chat/completions" {
-			w.WriteHeader(http.StatusNotFound)
-			return
-		}
-		switch r.Header.Get("Authorization") {
-		case "Bearer old-token":
-			w.Header().Set("Content-Type", "application/json")
-			w.WriteHeader(http.StatusTooManyRequests)
-			_, _ = w.Write([]byte(`{"error":{"code":"quota_exceeded","message":"quota exceeded","type":"quota_exceeded"}}`))
-			return
-		case "Bearer new-token":
-			w.Header().Set("Content-Type", "text/event-stream")
-			w.WriteHeader(http.StatusOK)
-			_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-test\",\"object\":\"chat.completion.chunk\",\"created\":1,\"model\":\"qwen-max\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"hi\"},\"finish_reason\":null}]}\n"))
-			if flusher, ok := w.(http.Flusher); ok {
-				flusher.Flush()
-			}
-			return
-		default:
-			w.WriteHeader(http.StatusUnauthorized)
-			return
-		}
-	}))
-	defer srv.Close()
-
-	exec := NewQwenExecutor(&config.Config{})
-	auth := &cliproxyauth.Auth{
-		ID:       "auth-test",
-		Provider: "qwen",
-		Attributes: map[string]string{
-			"base_url": srv.URL + "/v1",
-		},
-		Metadata: map[string]any{
-			"access_token":  "old-token",
-			"refresh_token": "refresh-token",
-		},
-	}
-
-	var refresherCalls int32
-	exec.refreshForImmediateRetry = func(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
-		atomic.AddInt32(&refresherCalls, 1)
-		refreshed := auth.Clone()
-		if refreshed.Metadata == nil {
-			refreshed.Metadata = make(map[string]any)
-		}
-		refreshed.Metadata["access_token"] = "new-token"
-		refreshed.Metadata["refresh_token"] = "refresh-token-2"
-		return refreshed, nil
-	}
-	ctx := context.Background()
-
-	_, err := exec.ExecuteStream(ctx, auth, cliproxyexecutor.Request{
-		Model:   "qwen-max",
-		Payload: []byte(`{"model":"qwen-max","stream":true,"messages":[{"role":"user","content":"hi"}]}`),
-	}, cliproxyexecutor.Options{
-		SourceFormat: sdktranslator.FromString("openai"),
-	})
-	if err == nil {
-		t.Fatalf("ExecuteStream() expected error, got nil")
-	}
-	status, ok := err.(statusErr)
-	if !ok {
-		t.Fatalf("ExecuteStream() error type = %T, want statusErr", err)
-	}
-	if status.StatusCode() != http.StatusTooManyRequests {
-		t.Fatalf("ExecuteStream() status code = %d, want %d", status.StatusCode(), http.StatusTooManyRequests)
-	}
-	if atomic.LoadInt32(&calls) != 1 {
-		t.Fatalf("upstream calls = %d, want 1", atomic.LoadInt32(&calls))
-	}
-	if atomic.LoadInt32(&refresherCalls) != 0 {
-		t.Fatalf("refresher calls = %d, want 0", atomic.LoadInt32(&refresherCalls))
-	}
-}
-
-func TestQwenExecutorExecute_429RetryAfterHeaderPropagatesToStatusErr(t *testing.T) {
-	qwenRateLimiter.Lock()
-	qwenRateLimiter.requests = make(map[string][]time.Time)
-	qwenRateLimiter.Unlock()
-
-	var calls int32
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		atomic.AddInt32(&calls, 1)
-		if r.URL.Path != "/v1/chat/completions" {
-			w.WriteHeader(http.StatusNotFound)
-			return
-		}
-		w.Header().Set("Content-Type", "application/json")
-		w.Header().Set("Retry-After", "2")
-		w.WriteHeader(http.StatusTooManyRequests)
-		_, _ = w.Write([]byte(`{"error":{"code":"rate_limit_exceeded","message":"rate limited","type":"rate_limit_exceeded"}}`))
-	}))
-	defer srv.Close()
-
-	exec := NewQwenExecutor(&config.Config{})
-	auth := &cliproxyauth.Auth{
-		ID:       "auth-test",
-		Provider: "qwen",
-		Attributes: map[string]string{
-			"base_url": srv.URL + "/v1",
-		},
-		Metadata: map[string]any{
-			"access_token": "test-token",
-		},
-	}
-	ctx := context.Background()
-
-	_, err := exec.Execute(ctx, auth, cliproxyexecutor.Request{
-		Model:   "qwen-max",
-		Payload: []byte(`{"model":"qwen-max","messages":[{"role":"user","content":"hi"}]}`),
-	}, cliproxyexecutor.Options{
-		SourceFormat: sdktranslator.FromString("openai"),
-	})
-	if err == nil {
-		t.Fatalf("Execute() expected error, got nil")
-	}
-	status, ok := err.(statusErr)
-	if !ok {
-		t.Fatalf("Execute() error type = %T, want statusErr", err)
-	}
-	if status.StatusCode() != http.StatusTooManyRequests {
-		t.Fatalf("Execute() status code = %d, want %d", status.StatusCode(), http.StatusTooManyRequests)
-	}
-	if status.RetryAfter() == nil {
-		t.Fatalf("Execute() RetryAfter is nil, want non-nil")
-	}
-	if got := *status.RetryAfter(); got != 2*time.Second {
-		t.Fatalf("Execute() RetryAfter = %v, want %v", got, 2*time.Second)
-	}
-	if atomic.LoadInt32(&calls) != 1 {
-		t.Fatalf("upstream calls = %d, want 1", atomic.LoadInt32(&calls))
-	}
-}
-
-func TestQwenExecutorExecuteStream_429RetryAfterHeaderPropagatesToStatusErr(t *testing.T) {
-	qwenRateLimiter.Lock()
-	qwenRateLimiter.requests = make(map[string][]time.Time)
-	qwenRateLimiter.Unlock()
-
-	var calls int32
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		atomic.AddInt32(&calls, 1)
-		if r.URL.Path != "/v1/chat/completions" {
-			w.WriteHeader(http.StatusNotFound)
-			return
-		}
-		w.Header().Set("Content-Type", "application/json")
-		w.Header().Set("Retry-After", "2")
-		w.WriteHeader(http.StatusTooManyRequests)
-		_, _ = w.Write([]byte(`{"error":{"code":"rate_limit_exceeded","message":"rate limited","type":"rate_limit_exceeded"}}`))
-	}))
-	defer srv.Close()
-
-	exec := NewQwenExecutor(&config.Config{})
-	auth := &cliproxyauth.Auth{
-		ID:       "auth-test",
-		Provider: "qwen",
-		Attributes: map[string]string{
-			"base_url": srv.URL + "/v1",
-		},
-		Metadata: map[string]any{
-			"access_token": "test-token",
-		},
-	}
-	ctx := context.Background()
-
-	_, err := exec.ExecuteStream(ctx, auth, cliproxyexecutor.Request{
-		Model:   "qwen-max",
-		Payload: []byte(`{"model":"qwen-max","stream":true,"messages":[{"role":"user","content":"hi"}]}`),
-	}, cliproxyexecutor.Options{
-		SourceFormat: sdktranslator.FromString("openai"),
-	})
-	if err == nil {
-		t.Fatalf("ExecuteStream() expected error, got nil")
-	}
-	status, ok := err.(statusErr)
-	if !ok {
-		t.Fatalf("ExecuteStream() error type = %T, want statusErr", err)
-	}
-	if status.StatusCode() != http.StatusTooManyRequests {
-		t.Fatalf("ExecuteStream() status code = %d, want %d", status.StatusCode(), http.StatusTooManyRequests)
-	}
-	if status.RetryAfter() == nil {
-		t.Fatalf("ExecuteStream() RetryAfter is nil, want non-nil")
-	}
-	if got := *status.RetryAfter(); got != 2*time.Second {
-		t.Fatalf("ExecuteStream() RetryAfter = %v, want %v", got, 2*time.Second)
-	}
-	if atomic.LoadInt32(&calls) != 1 {
-		t.Fatalf("upstream calls = %d, want 1", atomic.LoadInt32(&calls))
-	}
-}
-
-func TestQwenExecutorExecute_429QuotaExhausted_DisableCoolingSetsDefaultRetryAfter(t *testing.T) {
-	qwenRateLimiter.Lock()
-	qwenRateLimiter.requests = make(map[string][]time.Time)
-	qwenRateLimiter.Unlock()
-
-	var calls int32
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		atomic.AddInt32(&calls, 1)
-		if r.URL.Path != "/v1/chat/completions" {
-			w.WriteHeader(http.StatusNotFound)
-			return
-		}
-		w.Header().Set("Content-Type", "application/json")
-		w.WriteHeader(http.StatusTooManyRequests)
-		_, _ = w.Write([]byte(`{"error":{"code":"quota_exceeded","message":"quota exceeded","type":"quota_exceeded"}}`))
-	}))
-	defer srv.Close()
-
-	exec := NewQwenExecutor(&config.Config{DisableCooling: true})
-	auth := &cliproxyauth.Auth{
-		ID:       "auth-test",
-		Provider: "qwen",
-		Attributes: map[string]string{
-			"base_url": srv.URL + "/v1",
-		},
-		Metadata: map[string]any{
-			"access_token": "test-token",
-		},
-	}
-	ctx := context.Background()
-
-	_, err := exec.Execute(ctx, auth, cliproxyexecutor.Request{
-		Model:   "qwen-max",
-		Payload: []byte(`{"model":"qwen-max","messages":[{"role":"user","content":"hi"}]}`),
-	}, cliproxyexecutor.Options{
-		SourceFormat: sdktranslator.FromString("openai"),
-	})
-	if err == nil {
-		t.Fatalf("Execute() expected error, got nil")
-	}
-	status, ok := err.(statusErr)
-	if !ok {
-		t.Fatalf("Execute() error type = %T, want statusErr", err)
-	}
-	if status.StatusCode() != http.StatusTooManyRequests {
-		t.Fatalf("Execute() status code = %d, want %d", status.StatusCode(), http.StatusTooManyRequests)
-	}
-	if status.RetryAfter() == nil {
-		t.Fatalf("Execute() RetryAfter is nil, want non-nil")
-	}
-	if got := *status.RetryAfter(); got != time.Second {
-		t.Fatalf("Execute() RetryAfter = %v, want %v", got, time.Second)
-	}
-	if atomic.LoadInt32(&calls) != 1 {
-		t.Fatalf("upstream calls = %d, want 1", atomic.LoadInt32(&calls))
-	}
-}
-
-func TestQwenExecutorExecuteStream_429QuotaExhausted_DisableCoolingSetsDefaultRetryAfter(t *testing.T) {
-	qwenRateLimiter.Lock()
-	qwenRateLimiter.requests = make(map[string][]time.Time)
-	qwenRateLimiter.Unlock()
-
-	var calls int32
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		atomic.AddInt32(&calls, 1)
-		if r.URL.Path != "/v1/chat/completions" {
-			w.WriteHeader(http.StatusNotFound)
-			return
-		}
-		w.Header().Set("Content-Type", "application/json")
-		w.WriteHeader(http.StatusTooManyRequests)
-		_, _ = w.Write([]byte(`{"error":{"code":"quota_exceeded","message":"quota exceeded","type":"quota_exceeded"}}`))
-	}))
-	defer srv.Close()
-
-	exec := NewQwenExecutor(&config.Config{DisableCooling: true})
-	auth := &cliproxyauth.Auth{
-		ID:       "auth-test",
-		Provider: "qwen",
-		Attributes: map[string]string{
-			"base_url": srv.URL + "/v1",
-		},
-		Metadata: map[string]any{
-			"access_token": "test-token",
-		},
-	}
-	ctx := context.Background()
-
-	_, err := exec.ExecuteStream(ctx, auth, cliproxyexecutor.Request{
-		Model:   "qwen-max",
-		Payload: []byte(`{"model":"qwen-max","stream":true,"messages":[{"role":"user","content":"hi"}]}`),
-	}, cliproxyexecutor.Options{
-		SourceFormat: sdktranslator.FromString("openai"),
-	})
-	if err == nil {
-		t.Fatalf("ExecuteStream() expected error, got nil")
-	}
-	status, ok := err.(statusErr)
-	if !ok {
-		t.Fatalf("ExecuteStream() error type = %T, want statusErr", err)
-	}
-	if status.StatusCode() != http.StatusTooManyRequests {
-		t.Fatalf("ExecuteStream() status code = %d, want %d", status.StatusCode(), http.StatusTooManyRequests)
-	}
-	if status.RetryAfter() == nil {
-		t.Fatalf("ExecuteStream() RetryAfter is nil, want non-nil")
-	}
-	if got := *status.RetryAfter(); got != time.Second {
-		t.Fatalf("ExecuteStream() RetryAfter = %v, want %v", got, time.Second)
-	}
-	if atomic.LoadInt32(&calls) != 1 {
-		t.Fatalf("upstream calls = %d, want 1", atomic.LoadInt32(&calls))
-	}
-}