diff --git a/internal/redisqueue/plugin.go b/internal/redisqueue/plugin.go index 158b5ed5e..eb3c8c822 100644 --- a/internal/redisqueue/plugin.go +++ b/internal/redisqueue/plugin.go @@ -48,6 +48,10 @@ func (p *usageQueuePlugin) HandleUsage(ctx context.Context, record coreusage.Rec } apiKey := strings.TrimSpace(record.APIKey) requestID := strings.TrimSpace(internallogging.GetRequestID(ctx)) + reasoningEffort := strings.TrimSpace(record.ReasoningEffort) + if reasoningEffort == "" { + reasoningEffort = coreusage.ReasoningEffortFromContext(ctx) + } tokens := tokenStats{ InputTokens: record.Detail.InputTokens, @@ -83,14 +87,15 @@ func (p *usageQueuePlugin) HandleUsage(ctx context.Context, record coreusage.Rec } payload, err := json.Marshal(queuedUsageDetail{ - requestDetail: detail, - Provider: provider, - Model: modelName, - Alias: aliasName, - Endpoint: resolveEndpoint(ctx), - AuthType: authType, - APIKey: apiKey, - RequestID: requestID, + requestDetail: detail, + Provider: provider, + Model: modelName, + Alias: aliasName, + Endpoint: resolveEndpoint(ctx), + AuthType: authType, + APIKey: apiKey, + RequestID: requestID, + ReasoningEffort: reasoningEffort, }) if err != nil { return @@ -100,13 +105,14 @@ func (p *usageQueuePlugin) HandleUsage(ctx context.Context, record coreusage.Rec type queuedUsageDetail struct { requestDetail - Provider string `json:"provider"` - Model string `json:"model"` - Alias string `json:"alias"` - Endpoint string `json:"endpoint"` - AuthType string `json:"auth_type"` - APIKey string `json:"api_key"` - RequestID string `json:"request_id"` + Provider string `json:"provider"` + Model string `json:"model"` + Alias string `json:"alias"` + Endpoint string `json:"endpoint"` + AuthType string `json:"auth_type"` + APIKey string `json:"api_key"` + RequestID string `json:"request_id"` + ReasoningEffort string `json:"reasoning_effort"` } type requestDetail struct { diff --git a/internal/redisqueue/plugin_test.go b/internal/redisqueue/plugin_test.go index a3358d163..4917955cd 100644 --- a/internal/redisqueue/plugin_test.go +++ b/internal/redisqueue/plugin_test.go @@ -25,15 +25,16 @@ func TestUsageQueuePluginPayloadIncludesStableFieldsAndSuccess(t *testing.T) { plugin := &usageQueuePlugin{} plugin.HandleUsage(ctx, coreusage.Record{ - Provider: "openai", - Model: "gpt-5.4", - Alias: "client-gpt", - APIKey: "test-key", - AuthIndex: "0", - AuthType: "apikey", - Source: "user@example.com", - RequestedAt: time.Date(2026, 4, 25, 0, 0, 0, 0, time.UTC), - Latency: 1500 * time.Millisecond, + Provider: "openai", + Model: "gpt-5.4", + Alias: "client-gpt", + APIKey: "test-key", + AuthIndex: "0", + AuthType: "apikey", + Source: "user@example.com", + ReasoningEffort: "medium", + RequestedAt: time.Date(2026, 4, 25, 0, 0, 0, 0, time.UTC), + Latency: 1500 * time.Millisecond, Detail: coreusage.Detail{ InputTokens: 10, OutputTokens: 20, @@ -51,6 +52,7 @@ func TestUsageQueuePluginPayloadIncludesStableFieldsAndSuccess(t *testing.T) { requireStringField(t, payload, "auth_type", "apikey") requireMissingField(t, payload, "user_api_key") requireStringField(t, payload, "request_id", "ctx-request-id") + requireStringField(t, payload, "reasoning_effort", "medium") requireHeaderField(t, payload, "response_headers", "X-Upstream-Request-Id", []string{"upstream-req-1"}) requireHeaderField(t, payload, "response_headers", "Retry-After", []string{"30"}) requireBoolField(t, payload, "failed", false) diff --git a/internal/runtime/executor/helps/usage_helpers.go b/internal/runtime/executor/helps/usage_helpers.go index d711b91a7..f6958221c 100644 --- a/internal/runtime/executor/helps/usage_helpers.go +++ b/internal/runtime/executor/helps/usage_helpers.go @@ -26,6 +26,7 @@ type UsageReporter struct { authType string apiKey string source string + reasoning string requestedAt time.Time once sync.Once } @@ -44,6 +45,7 @@ func NewUsageReporter(ctx context.Context, provider, model string, auth *cliprox apiKey: apiKey, source: resolveUsageSource(auth, apiKey), authType: resolveUsageAuthType(auth), + reasoning: usage.ReasoningEffortFromContext(ctx), } if auth != nil { reporter.authID = auth.ID @@ -156,19 +158,20 @@ func (r *UsageReporter) buildRecordForModel(model string, detail usage.Detail, f return usage.Record{Model: model, Detail: detail, Failed: failed, Fail: fail} } return usage.Record{ - Provider: r.provider, - Model: model, - Alias: r.alias, - Source: r.source, - APIKey: r.apiKey, - AuthID: r.authID, - AuthIndex: r.authIndex, - AuthType: r.authType, - RequestedAt: r.requestedAt, - Latency: r.latency(), - Failed: failed, - Fail: fail, - Detail: detail, + Provider: r.provider, + Model: model, + Alias: r.alias, + Source: r.source, + APIKey: r.apiKey, + AuthID: r.authID, + AuthIndex: r.authIndex, + AuthType: r.authType, + ReasoningEffort: r.reasoning, + RequestedAt: r.requestedAt, + Latency: r.latency(), + Failed: failed, + Fail: fail, + Detail: detail, } } diff --git a/internal/runtime/executor/helps/usage_helpers_test.go b/internal/runtime/executor/helps/usage_helpers_test.go index bd0a9c21b..330641c61 100644 --- a/internal/runtime/executor/helps/usage_helpers_test.go +++ b/internal/runtime/executor/helps/usage_helpers_test.go @@ -159,6 +159,16 @@ func TestUsageReporterBuildRecordIncludesRequestedModelAlias(t *testing.T) { } } +func TestUsageReporterBuildRecordIncludesReasoningEffort(t *testing.T) { + ctx := usage.WithReasoningEffort(context.Background(), "medium") + reporter := NewUsageReporter(ctx, "openai", "gpt-5.4", nil) + + record := reporter.buildRecord(usage.Detail{TotalTokens: 3}, false) + if record.ReasoningEffort != "medium" { + t.Fatalf("reasoning effort = %q, want %q", record.ReasoningEffort, "medium") + } +} + func TestUsageReporterBuildAdditionalModelRecordSkipsZeroTokens(t *testing.T) { reporter := &UsageReporter{ provider: "codex", diff --git a/internal/thinking/apply.go b/internal/thinking/apply.go index e8a078319..614d15ca0 100644 --- a/internal/thinking/apply.go +++ b/internal/thinking/apply.go @@ -339,6 +339,56 @@ func hasThinkingConfig(config ThinkingConfig) bool { return config.Mode != ModeBudget || config.Budget != 0 || config.Level != "" } +// ExtractReasoningEffort returns the request's thinking setting as a canonical +// reasoning_effort label for usage logging. Model suffixes have the same +// priority as ApplyThinking: a valid suffix overrides body fields. +func ExtractReasoningEffort(body []byte, provider, model string) string { + if effort := reasoningEffortFromSuffix(ParseSuffix(model)); effort != "" { + return effort + } + + provider = strings.ToLower(strings.TrimSpace(provider)) + config := extractThinkingConfig(body, provider) + if !hasThinkingConfig(config) { + switch provider { + case "openai-response": + config = extractCodexConfig(body) + case "openai": + config = extractCodexConfig(body) + } + } + return reasoningEffortFromConfig(config) +} + +func reasoningEffortFromSuffix(suffix SuffixResult) string { + if !suffix.HasSuffix { + return "" + } + return reasoningEffortFromConfig(parseSuffixToConfig(suffix.RawSuffix, "", suffix.ModelName)) +} + +func reasoningEffortFromConfig(config ThinkingConfig) string { + if !hasThinkingConfig(config) { + return "" + } + switch config.Mode { + case ModeNone: + return string(LevelNone) + case ModeAuto: + return string(LevelAuto) + case ModeLevel: + return strings.ToLower(strings.TrimSpace(string(config.Level))) + case ModeBudget: + level, ok := ConvertBudgetToLevel(config.Budget) + if !ok { + return "" + } + return level + default: + return "" + } +} + // extractClaudeConfig extracts thinking configuration from Claude format request body. // // Claude API format: diff --git a/internal/thinking/reasoning_effort_test.go b/internal/thinking/reasoning_effort_test.go new file mode 100644 index 000000000..e529e115b --- /dev/null +++ b/internal/thinking/reasoning_effort_test.go @@ -0,0 +1,31 @@ +package thinking + +import "testing" + +func TestExtractReasoningEffortUsesSuffixOverBody(t *testing.T) { + got := ExtractReasoningEffort([]byte(`{"reasoning_effort":"low"}`), "openai", "gpt-5.4(high)") + if got != "high" { + t.Fatalf("ExtractReasoningEffort() = %q, want %q", got, "high") + } +} + +func TestExtractReasoningEffortConvertsBudgetToLevel(t *testing.T) { + got := ExtractReasoningEffort([]byte(`{"thinking":{"type":"enabled","budget_tokens":8192}}`), "claude", "claude-sonnet-4-5") + if got != "medium" { + t.Fatalf("ExtractReasoningEffort() = %q, want %q", got, "medium") + } +} + +func TestExtractReasoningEffortSupportsOpenAIResponses(t *testing.T) { + got := ExtractReasoningEffort([]byte(`{"reasoning":{"effort":"medium"}}`), "openai-response", "gpt-5.4") + if got != "medium" { + t.Fatalf("ExtractReasoningEffort() = %q, want %q", got, "medium") + } +} + +func TestExtractReasoningEffortMissingConfigIsEmpty(t *testing.T) { + got := ExtractReasoningEffort([]byte(`{"messages":[{"role":"user","content":"hi"}]}`), "openai", "gpt-5.4") + if got != "" { + t.Fatalf("ExtractReasoningEffort() = %q, want empty", got) + } +} diff --git a/sdk/api/handlers/handlers.go b/sdk/api/handlers/handlers.go index 003859dcb..5a25681dc 100644 --- a/sdk/api/handlers/handlers.go +++ b/sdk/api/handlers/handlers.go @@ -231,6 +231,17 @@ func requestExecutionMetadata(ctx context.Context) map[string]any { return meta } +func setReasoningEffortMetadata(meta map[string]any, handlerType, model string, rawJSON []byte) { + if meta == nil { + return + } + effort := thinking.ExtractReasoningEffort(rawJSON, handlerType, model) + if effort == "" { + return + } + meta[coreexecutor.ReasoningEffortMetadataKey] = effort +} + // headersFromContext extracts the original HTTP request headers from the gin context // embedded in the provided context. This allows session affinity selectors to read // client headers like X-Amp-Thread-Id. @@ -550,6 +561,7 @@ func (h *BaseAPIHandler) executeWithAuthManager(ctx context.Context, handlerType } reqMeta := requestExecutionMetadata(ctx) reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName + setReasoningEffortMetadata(reqMeta, handlerType, normalizedModel, rawJSON) payload := rawJSON if len(payload) == 0 { payload = nil @@ -598,6 +610,7 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle } reqMeta := requestExecutionMetadata(ctx) reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName + setReasoningEffortMetadata(reqMeta, handlerType, normalizedModel, rawJSON) payload := rawJSON if len(payload) == 0 { payload = nil @@ -659,6 +672,7 @@ func (h *BaseAPIHandler) executeStreamWithAuthManager(ctx context.Context, handl } reqMeta := requestExecutionMetadata(ctx) reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName + setReasoningEffortMetadata(reqMeta, handlerType, normalizedModel, rawJSON) payload := rawJSON if len(payload) == 0 { payload = nil diff --git a/sdk/api/handlers/handlers_metadata_test.go b/sdk/api/handlers/handlers_metadata_test.go index c5e94f963..d2bdab683 100644 --- a/sdk/api/handlers/handlers_metadata_test.go +++ b/sdk/api/handlers/handlers_metadata_test.go @@ -18,3 +18,23 @@ func TestRequestExecutionMetadataIncludesExecutionSessionWithoutIdempotencyKey(t t.Fatalf("unexpected idempotency key in metadata: %v", meta[idempotencyKeyMetadataKey]) } } + +func TestSetReasoningEffortMetadataUsesSuffixOverBody(t *testing.T) { + meta := make(map[string]any) + + setReasoningEffortMetadata(meta, "openai", "gpt-5.4(high)", []byte(`{"reasoning_effort":"low"}`)) + + if got := meta[coreexecutor.ReasoningEffortMetadataKey]; got != "high" { + t.Fatalf("ReasoningEffortMetadataKey = %v, want %q", got, "high") + } +} + +func TestSetReasoningEffortMetadataSupportsOpenAIResponses(t *testing.T) { + meta := make(map[string]any) + + setReasoningEffortMetadata(meta, "openai-response", "gpt-5.4", []byte(`{"reasoning":{"effort":"medium"}}`)) + + if got := meta[coreexecutor.ReasoningEffortMetadataKey]; got != "medium" { + t.Fatalf("ReasoningEffortMetadataKey = %v, want %q", got, "medium") + } +} diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index fca26a9c2..537f182ac 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -1632,7 +1632,11 @@ func hasRequestedModelMetadata(meta map[string]any) bool { func contextWithRequestedModelAlias(ctx context.Context, opts cliproxyexecutor.Options, fallback string) context.Context { alias := requestedModelAliasFromOptions(opts, fallback) - return coreusage.WithRequestedModelAlias(ctx, alias) + ctx = coreusage.WithRequestedModelAlias(ctx, alias) + if effort := reasoningEffortFromOptions(opts); effort != "" { + ctx = coreusage.WithReasoningEffort(ctx, effort) + } + return ctx } func requestedModelAliasFromOptions(opts cliproxyexecutor.Options, fallback string) string { @@ -1660,6 +1664,24 @@ func requestedModelAliasFromOptions(opts cliproxyexecutor.Options, fallback stri } } +func reasoningEffortFromOptions(opts cliproxyexecutor.Options) string { + if len(opts.Metadata) == 0 { + return "" + } + raw, ok := opts.Metadata[cliproxyexecutor.ReasoningEffortMetadataKey] + if !ok || raw == nil { + return "" + } + switch value := raw.(type) { + case string: + return strings.TrimSpace(value) + case []byte: + return strings.TrimSpace(string(value)) + default: + return "" + } +} + func pinnedAuthIDFromMetadata(meta map[string]any) string { if len(meta) == 0 { return "" diff --git a/sdk/cliproxy/auth/conductor_usage_test.go b/sdk/cliproxy/auth/conductor_usage_test.go new file mode 100644 index 000000000..23a70ea28 --- /dev/null +++ b/sdk/cliproxy/auth/conductor_usage_test.go @@ -0,0 +1,25 @@ +package auth + +import ( + "context" + "testing" + + cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/executor" + coreusage "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/usage" +) + +func TestContextWithRequestedModelAliasIncludesReasoningEffort(t *testing.T) { + ctx := contextWithRequestedModelAlias(context.Background(), cliproxyexecutor.Options{ + Metadata: map[string]any{ + cliproxyexecutor.RequestedModelMetadataKey: "client-model", + cliproxyexecutor.ReasoningEffortMetadataKey: "medium", + }, + }, "fallback-model") + + if got := coreusage.RequestedModelAliasFromContext(ctx); got != "client-model" { + t.Fatalf("requested model alias = %q, want %q", got, "client-model") + } + if got := coreusage.ReasoningEffortFromContext(ctx); got != "medium" { + t.Fatalf("reasoning effort = %q, want %q", got, "medium") + } +} diff --git a/sdk/cliproxy/executor/types.go b/sdk/cliproxy/executor/types.go index fd1da2e53..fc003540e 100644 --- a/sdk/cliproxy/executor/types.go +++ b/sdk/cliproxy/executor/types.go @@ -17,6 +17,9 @@ const RequestPathMetadataKey = "request_path" // DisallowFreeAuthMetadataKey instructs auth selection to skip known free-tier credentials. const DisallowFreeAuthMetadataKey = "disallow_free_auth" +// ReasoningEffortMetadataKey stores the client-requested reasoning effort for usage logs. +const ReasoningEffortMetadataKey = "reasoning_effort" + const ( // PinnedAuthMetadataKey locks execution to a specific auth ID. PinnedAuthMetadataKey = "pinned_auth_id" diff --git a/sdk/cliproxy/usage/manager.go b/sdk/cliproxy/usage/manager.go index 2cdd34716..1bda0188a 100644 --- a/sdk/cliproxy/usage/manager.go +++ b/sdk/cliproxy/usage/manager.go @@ -12,19 +12,21 @@ import ( // Record contains the usage statistics captured for a single provider request. type Record struct { - Provider string - Model string - Alias string - APIKey string - AuthID string - AuthIndex string - AuthType string - Source string - RequestedAt time.Time - Latency time.Duration - Failed bool - Fail Failure - Detail Detail + Provider string + Model string + Alias string + APIKey string + AuthID string + AuthIndex string + AuthType string + Source string + // ReasoningEffort stores the client-requested thinking level for request event logs. + ReasoningEffort string + RequestedAt time.Time + Latency time.Duration + Failed bool + Fail Failure + Detail Detail // ResponseHeaders stores a snapshot of upstream response headers for usage sinks. ResponseHeaders http.Header } @@ -47,6 +49,7 @@ type Detail struct { } type requestedModelAliasContextKey struct{} +type reasoningEffortContextKey struct{} // WithRequestedModelAlias stores the client-requested model name for usage sinks. func WithRequestedModelAlias(ctx context.Context, alias string) context.Context { @@ -76,6 +79,34 @@ func RequestedModelAliasFromContext(ctx context.Context) string { } } +// WithReasoningEffort stores the client-requested reasoning effort for usage sinks. +func WithReasoningEffort(ctx context.Context, effort string) context.Context { + if ctx == nil { + ctx = context.Background() + } + effort = strings.TrimSpace(effort) + if effort == "" { + return ctx + } + return context.WithValue(ctx, reasoningEffortContextKey{}, effort) +} + +// ReasoningEffortFromContext returns the client-requested reasoning effort stored in ctx. +func ReasoningEffortFromContext(ctx context.Context) string { + if ctx == nil { + return "" + } + raw := ctx.Value(reasoningEffortContextKey{}) + switch value := raw.(type) { + case string: + return strings.TrimSpace(value) + case []byte: + return strings.TrimSpace(string(value)) + default: + return "" + } +} + // Plugin consumes usage records emitted by the proxy runtime. type Plugin interface { HandleUsage(ctx context.Context, record Record)