Merge pull request #3484 from yavon007/main

Add reasoning_effort to usage event payloads
This commit is contained in:
Luis Pater
2026-05-20 12:34:40 +08:00
committed by GitHub
12 changed files with 268 additions and 51 deletions

View File

@@ -48,6 +48,10 @@ func (p *usageQueuePlugin) HandleUsage(ctx context.Context, record coreusage.Rec
}
apiKey := strings.TrimSpace(record.APIKey)
requestID := strings.TrimSpace(internallogging.GetRequestID(ctx))
reasoningEffort := strings.TrimSpace(record.ReasoningEffort)
if reasoningEffort == "" {
reasoningEffort = coreusage.ReasoningEffortFromContext(ctx)
}
tokens := tokenStats{
InputTokens: record.Detail.InputTokens,
@@ -83,14 +87,15 @@ func (p *usageQueuePlugin) HandleUsage(ctx context.Context, record coreusage.Rec
}
payload, err := json.Marshal(queuedUsageDetail{
requestDetail: detail,
Provider: provider,
Model: modelName,
Alias: aliasName,
Endpoint: resolveEndpoint(ctx),
AuthType: authType,
APIKey: apiKey,
RequestID: requestID,
requestDetail: detail,
Provider: provider,
Model: modelName,
Alias: aliasName,
Endpoint: resolveEndpoint(ctx),
AuthType: authType,
APIKey: apiKey,
RequestID: requestID,
ReasoningEffort: reasoningEffort,
})
if err != nil {
return
@@ -100,13 +105,14 @@ func (p *usageQueuePlugin) HandleUsage(ctx context.Context, record coreusage.Rec
type queuedUsageDetail struct {
requestDetail
Provider string `json:"provider"`
Model string `json:"model"`
Alias string `json:"alias"`
Endpoint string `json:"endpoint"`
AuthType string `json:"auth_type"`
APIKey string `json:"api_key"`
RequestID string `json:"request_id"`
Provider string `json:"provider"`
Model string `json:"model"`
Alias string `json:"alias"`
Endpoint string `json:"endpoint"`
AuthType string `json:"auth_type"`
APIKey string `json:"api_key"`
RequestID string `json:"request_id"`
ReasoningEffort string `json:"reasoning_effort"`
}
type requestDetail struct {

View File

@@ -25,15 +25,16 @@ func TestUsageQueuePluginPayloadIncludesStableFieldsAndSuccess(t *testing.T) {
plugin := &usageQueuePlugin{}
plugin.HandleUsage(ctx, coreusage.Record{
Provider: "openai",
Model: "gpt-5.4",
Alias: "client-gpt",
APIKey: "test-key",
AuthIndex: "0",
AuthType: "apikey",
Source: "user@example.com",
RequestedAt: time.Date(2026, 4, 25, 0, 0, 0, 0, time.UTC),
Latency: 1500 * time.Millisecond,
Provider: "openai",
Model: "gpt-5.4",
Alias: "client-gpt",
APIKey: "test-key",
AuthIndex: "0",
AuthType: "apikey",
Source: "user@example.com",
ReasoningEffort: "medium",
RequestedAt: time.Date(2026, 4, 25, 0, 0, 0, 0, time.UTC),
Latency: 1500 * time.Millisecond,
Detail: coreusage.Detail{
InputTokens: 10,
OutputTokens: 20,
@@ -51,6 +52,7 @@ func TestUsageQueuePluginPayloadIncludesStableFieldsAndSuccess(t *testing.T) {
requireStringField(t, payload, "auth_type", "apikey")
requireMissingField(t, payload, "user_api_key")
requireStringField(t, payload, "request_id", "ctx-request-id")
requireStringField(t, payload, "reasoning_effort", "medium")
requireHeaderField(t, payload, "response_headers", "X-Upstream-Request-Id", []string{"upstream-req-1"})
requireHeaderField(t, payload, "response_headers", "Retry-After", []string{"30"})
requireBoolField(t, payload, "failed", false)

View File

@@ -26,6 +26,7 @@ type UsageReporter struct {
authType string
apiKey string
source string
reasoning string
requestedAt time.Time
once sync.Once
}
@@ -44,6 +45,7 @@ func NewUsageReporter(ctx context.Context, provider, model string, auth *cliprox
apiKey: apiKey,
source: resolveUsageSource(auth, apiKey),
authType: resolveUsageAuthType(auth),
reasoning: usage.ReasoningEffortFromContext(ctx),
}
if auth != nil {
reporter.authID = auth.ID
@@ -156,19 +158,20 @@ func (r *UsageReporter) buildRecordForModel(model string, detail usage.Detail, f
return usage.Record{Model: model, Detail: detail, Failed: failed, Fail: fail}
}
return usage.Record{
Provider: r.provider,
Model: model,
Alias: r.alias,
Source: r.source,
APIKey: r.apiKey,
AuthID: r.authID,
AuthIndex: r.authIndex,
AuthType: r.authType,
RequestedAt: r.requestedAt,
Latency: r.latency(),
Failed: failed,
Fail: fail,
Detail: detail,
Provider: r.provider,
Model: model,
Alias: r.alias,
Source: r.source,
APIKey: r.apiKey,
AuthID: r.authID,
AuthIndex: r.authIndex,
AuthType: r.authType,
ReasoningEffort: r.reasoning,
RequestedAt: r.requestedAt,
Latency: r.latency(),
Failed: failed,
Fail: fail,
Detail: detail,
}
}

View File

@@ -159,6 +159,16 @@ func TestUsageReporterBuildRecordIncludesRequestedModelAlias(t *testing.T) {
}
}
func TestUsageReporterBuildRecordIncludesReasoningEffort(t *testing.T) {
ctx := usage.WithReasoningEffort(context.Background(), "medium")
reporter := NewUsageReporter(ctx, "openai", "gpt-5.4", nil)
record := reporter.buildRecord(usage.Detail{TotalTokens: 3}, false)
if record.ReasoningEffort != "medium" {
t.Fatalf("reasoning effort = %q, want %q", record.ReasoningEffort, "medium")
}
}
func TestUsageReporterBuildAdditionalModelRecordSkipsZeroTokens(t *testing.T) {
reporter := &UsageReporter{
provider: "codex",

View File

@@ -339,6 +339,56 @@ func hasThinkingConfig(config ThinkingConfig) bool {
return config.Mode != ModeBudget || config.Budget != 0 || config.Level != ""
}
// ExtractReasoningEffort returns the request's thinking setting as a canonical
// reasoning_effort label for usage logging. Model suffixes have the same
// priority as ApplyThinking: a valid suffix overrides body fields.
func ExtractReasoningEffort(body []byte, provider, model string) string {
if effort := reasoningEffortFromSuffix(ParseSuffix(model)); effort != "" {
return effort
}
provider = strings.ToLower(strings.TrimSpace(provider))
config := extractThinkingConfig(body, provider)
if !hasThinkingConfig(config) {
switch provider {
case "openai-response":
config = extractCodexConfig(body)
case "openai":
config = extractCodexConfig(body)
}
}
return reasoningEffortFromConfig(config)
}
func reasoningEffortFromSuffix(suffix SuffixResult) string {
if !suffix.HasSuffix {
return ""
}
return reasoningEffortFromConfig(parseSuffixToConfig(suffix.RawSuffix, "", suffix.ModelName))
}
func reasoningEffortFromConfig(config ThinkingConfig) string {
if !hasThinkingConfig(config) {
return ""
}
switch config.Mode {
case ModeNone:
return string(LevelNone)
case ModeAuto:
return string(LevelAuto)
case ModeLevel:
return strings.ToLower(strings.TrimSpace(string(config.Level)))
case ModeBudget:
level, ok := ConvertBudgetToLevel(config.Budget)
if !ok {
return ""
}
return level
default:
return ""
}
}
// extractClaudeConfig extracts thinking configuration from Claude format request body.
//
// Claude API format:

View File

@@ -0,0 +1,31 @@
package thinking
import "testing"
func TestExtractReasoningEffortUsesSuffixOverBody(t *testing.T) {
got := ExtractReasoningEffort([]byte(`{"reasoning_effort":"low"}`), "openai", "gpt-5.4(high)")
if got != "high" {
t.Fatalf("ExtractReasoningEffort() = %q, want %q", got, "high")
}
}
func TestExtractReasoningEffortConvertsBudgetToLevel(t *testing.T) {
got := ExtractReasoningEffort([]byte(`{"thinking":{"type":"enabled","budget_tokens":8192}}`), "claude", "claude-sonnet-4-5")
if got != "medium" {
t.Fatalf("ExtractReasoningEffort() = %q, want %q", got, "medium")
}
}
func TestExtractReasoningEffortSupportsOpenAIResponses(t *testing.T) {
got := ExtractReasoningEffort([]byte(`{"reasoning":{"effort":"medium"}}`), "openai-response", "gpt-5.4")
if got != "medium" {
t.Fatalf("ExtractReasoningEffort() = %q, want %q", got, "medium")
}
}
func TestExtractReasoningEffortMissingConfigIsEmpty(t *testing.T) {
got := ExtractReasoningEffort([]byte(`{"messages":[{"role":"user","content":"hi"}]}`), "openai", "gpt-5.4")
if got != "" {
t.Fatalf("ExtractReasoningEffort() = %q, want empty", got)
}
}

View File

@@ -231,6 +231,17 @@ func requestExecutionMetadata(ctx context.Context) map[string]any {
return meta
}
func setReasoningEffortMetadata(meta map[string]any, handlerType, model string, rawJSON []byte) {
if meta == nil {
return
}
effort := thinking.ExtractReasoningEffort(rawJSON, handlerType, model)
if effort == "" {
return
}
meta[coreexecutor.ReasoningEffortMetadataKey] = effort
}
// headersFromContext extracts the original HTTP request headers from the gin context
// embedded in the provided context. This allows session affinity selectors to read
// client headers like X-Amp-Thread-Id.
@@ -550,6 +561,7 @@ func (h *BaseAPIHandler) executeWithAuthManager(ctx context.Context, handlerType
}
reqMeta := requestExecutionMetadata(ctx)
reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName
setReasoningEffortMetadata(reqMeta, handlerType, normalizedModel, rawJSON)
payload := rawJSON
if len(payload) == 0 {
payload = nil
@@ -598,6 +610,7 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
}
reqMeta := requestExecutionMetadata(ctx)
reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName
setReasoningEffortMetadata(reqMeta, handlerType, normalizedModel, rawJSON)
payload := rawJSON
if len(payload) == 0 {
payload = nil
@@ -659,6 +672,7 @@ func (h *BaseAPIHandler) executeStreamWithAuthManager(ctx context.Context, handl
}
reqMeta := requestExecutionMetadata(ctx)
reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName
setReasoningEffortMetadata(reqMeta, handlerType, normalizedModel, rawJSON)
payload := rawJSON
if len(payload) == 0 {
payload = nil

View File

@@ -18,3 +18,23 @@ func TestRequestExecutionMetadataIncludesExecutionSessionWithoutIdempotencyKey(t
t.Fatalf("unexpected idempotency key in metadata: %v", meta[idempotencyKeyMetadataKey])
}
}
func TestSetReasoningEffortMetadataUsesSuffixOverBody(t *testing.T) {
meta := make(map[string]any)
setReasoningEffortMetadata(meta, "openai", "gpt-5.4(high)", []byte(`{"reasoning_effort":"low"}`))
if got := meta[coreexecutor.ReasoningEffortMetadataKey]; got != "high" {
t.Fatalf("ReasoningEffortMetadataKey = %v, want %q", got, "high")
}
}
func TestSetReasoningEffortMetadataSupportsOpenAIResponses(t *testing.T) {
meta := make(map[string]any)
setReasoningEffortMetadata(meta, "openai-response", "gpt-5.4", []byte(`{"reasoning":{"effort":"medium"}}`))
if got := meta[coreexecutor.ReasoningEffortMetadataKey]; got != "medium" {
t.Fatalf("ReasoningEffortMetadataKey = %v, want %q", got, "medium")
}
}

View File

@@ -1632,7 +1632,11 @@ func hasRequestedModelMetadata(meta map[string]any) bool {
func contextWithRequestedModelAlias(ctx context.Context, opts cliproxyexecutor.Options, fallback string) context.Context {
alias := requestedModelAliasFromOptions(opts, fallback)
return coreusage.WithRequestedModelAlias(ctx, alias)
ctx = coreusage.WithRequestedModelAlias(ctx, alias)
if effort := reasoningEffortFromOptions(opts); effort != "" {
ctx = coreusage.WithReasoningEffort(ctx, effort)
}
return ctx
}
func requestedModelAliasFromOptions(opts cliproxyexecutor.Options, fallback string) string {
@@ -1660,6 +1664,24 @@ func requestedModelAliasFromOptions(opts cliproxyexecutor.Options, fallback stri
}
}
func reasoningEffortFromOptions(opts cliproxyexecutor.Options) string {
if len(opts.Metadata) == 0 {
return ""
}
raw, ok := opts.Metadata[cliproxyexecutor.ReasoningEffortMetadataKey]
if !ok || raw == nil {
return ""
}
switch value := raw.(type) {
case string:
return strings.TrimSpace(value)
case []byte:
return strings.TrimSpace(string(value))
default:
return ""
}
}
func pinnedAuthIDFromMetadata(meta map[string]any) string {
if len(meta) == 0 {
return ""

View File

@@ -0,0 +1,25 @@
package auth
import (
"context"
"testing"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/executor"
coreusage "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/usage"
)
func TestContextWithRequestedModelAliasIncludesReasoningEffort(t *testing.T) {
ctx := contextWithRequestedModelAlias(context.Background(), cliproxyexecutor.Options{
Metadata: map[string]any{
cliproxyexecutor.RequestedModelMetadataKey: "client-model",
cliproxyexecutor.ReasoningEffortMetadataKey: "medium",
},
}, "fallback-model")
if got := coreusage.RequestedModelAliasFromContext(ctx); got != "client-model" {
t.Fatalf("requested model alias = %q, want %q", got, "client-model")
}
if got := coreusage.ReasoningEffortFromContext(ctx); got != "medium" {
t.Fatalf("reasoning effort = %q, want %q", got, "medium")
}
}

View File

@@ -17,6 +17,9 @@ const RequestPathMetadataKey = "request_path"
// DisallowFreeAuthMetadataKey instructs auth selection to skip known free-tier credentials.
const DisallowFreeAuthMetadataKey = "disallow_free_auth"
// ReasoningEffortMetadataKey stores the client-requested reasoning effort for usage logs.
const ReasoningEffortMetadataKey = "reasoning_effort"
const (
// PinnedAuthMetadataKey locks execution to a specific auth ID.
PinnedAuthMetadataKey = "pinned_auth_id"

View File

@@ -12,19 +12,21 @@ import (
// Record contains the usage statistics captured for a single provider request.
type Record struct {
Provider string
Model string
Alias string
APIKey string
AuthID string
AuthIndex string
AuthType string
Source string
RequestedAt time.Time
Latency time.Duration
Failed bool
Fail Failure
Detail Detail
Provider string
Model string
Alias string
APIKey string
AuthID string
AuthIndex string
AuthType string
Source string
// ReasoningEffort stores the client-requested thinking level for request event logs.
ReasoningEffort string
RequestedAt time.Time
Latency time.Duration
Failed bool
Fail Failure
Detail Detail
// ResponseHeaders stores a snapshot of upstream response headers for usage sinks.
ResponseHeaders http.Header
}
@@ -47,6 +49,7 @@ type Detail struct {
}
type requestedModelAliasContextKey struct{}
type reasoningEffortContextKey struct{}
// WithRequestedModelAlias stores the client-requested model name for usage sinks.
func WithRequestedModelAlias(ctx context.Context, alias string) context.Context {
@@ -76,6 +79,34 @@ func RequestedModelAliasFromContext(ctx context.Context) string {
}
}
// WithReasoningEffort stores the client-requested reasoning effort for usage sinks.
func WithReasoningEffort(ctx context.Context, effort string) context.Context {
if ctx == nil {
ctx = context.Background()
}
effort = strings.TrimSpace(effort)
if effort == "" {
return ctx
}
return context.WithValue(ctx, reasoningEffortContextKey{}, effort)
}
// ReasoningEffortFromContext returns the client-requested reasoning effort stored in ctx.
func ReasoningEffortFromContext(ctx context.Context) string {
if ctx == nil {
return ""
}
raw := ctx.Value(reasoningEffortContextKey{})
switch value := raw.(type) {
case string:
return strings.TrimSpace(value)
case []byte:
return strings.TrimSpace(string(value))
default:
return ""
}
}
// Plugin consumes usage records emitted by the proxy runtime.
type Plugin interface {
HandleUsage(ctx context.Context, record Record)