mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-06-01 04:12:28 +08:00
Merge pull request #3484 from yavon007/main
Add reasoning_effort to usage event payloads
This commit is contained in:
@@ -48,6 +48,10 @@ func (p *usageQueuePlugin) HandleUsage(ctx context.Context, record coreusage.Rec
|
||||
}
|
||||
apiKey := strings.TrimSpace(record.APIKey)
|
||||
requestID := strings.TrimSpace(internallogging.GetRequestID(ctx))
|
||||
reasoningEffort := strings.TrimSpace(record.ReasoningEffort)
|
||||
if reasoningEffort == "" {
|
||||
reasoningEffort = coreusage.ReasoningEffortFromContext(ctx)
|
||||
}
|
||||
|
||||
tokens := tokenStats{
|
||||
InputTokens: record.Detail.InputTokens,
|
||||
@@ -83,14 +87,15 @@ func (p *usageQueuePlugin) HandleUsage(ctx context.Context, record coreusage.Rec
|
||||
}
|
||||
|
||||
payload, err := json.Marshal(queuedUsageDetail{
|
||||
requestDetail: detail,
|
||||
Provider: provider,
|
||||
Model: modelName,
|
||||
Alias: aliasName,
|
||||
Endpoint: resolveEndpoint(ctx),
|
||||
AuthType: authType,
|
||||
APIKey: apiKey,
|
||||
RequestID: requestID,
|
||||
requestDetail: detail,
|
||||
Provider: provider,
|
||||
Model: modelName,
|
||||
Alias: aliasName,
|
||||
Endpoint: resolveEndpoint(ctx),
|
||||
AuthType: authType,
|
||||
APIKey: apiKey,
|
||||
RequestID: requestID,
|
||||
ReasoningEffort: reasoningEffort,
|
||||
})
|
||||
if err != nil {
|
||||
return
|
||||
@@ -100,13 +105,14 @@ func (p *usageQueuePlugin) HandleUsage(ctx context.Context, record coreusage.Rec
|
||||
|
||||
type queuedUsageDetail struct {
|
||||
requestDetail
|
||||
Provider string `json:"provider"`
|
||||
Model string `json:"model"`
|
||||
Alias string `json:"alias"`
|
||||
Endpoint string `json:"endpoint"`
|
||||
AuthType string `json:"auth_type"`
|
||||
APIKey string `json:"api_key"`
|
||||
RequestID string `json:"request_id"`
|
||||
Provider string `json:"provider"`
|
||||
Model string `json:"model"`
|
||||
Alias string `json:"alias"`
|
||||
Endpoint string `json:"endpoint"`
|
||||
AuthType string `json:"auth_type"`
|
||||
APIKey string `json:"api_key"`
|
||||
RequestID string `json:"request_id"`
|
||||
ReasoningEffort string `json:"reasoning_effort"`
|
||||
}
|
||||
|
||||
type requestDetail struct {
|
||||
|
||||
@@ -25,15 +25,16 @@ func TestUsageQueuePluginPayloadIncludesStableFieldsAndSuccess(t *testing.T) {
|
||||
|
||||
plugin := &usageQueuePlugin{}
|
||||
plugin.HandleUsage(ctx, coreusage.Record{
|
||||
Provider: "openai",
|
||||
Model: "gpt-5.4",
|
||||
Alias: "client-gpt",
|
||||
APIKey: "test-key",
|
||||
AuthIndex: "0",
|
||||
AuthType: "apikey",
|
||||
Source: "user@example.com",
|
||||
RequestedAt: time.Date(2026, 4, 25, 0, 0, 0, 0, time.UTC),
|
||||
Latency: 1500 * time.Millisecond,
|
||||
Provider: "openai",
|
||||
Model: "gpt-5.4",
|
||||
Alias: "client-gpt",
|
||||
APIKey: "test-key",
|
||||
AuthIndex: "0",
|
||||
AuthType: "apikey",
|
||||
Source: "user@example.com",
|
||||
ReasoningEffort: "medium",
|
||||
RequestedAt: time.Date(2026, 4, 25, 0, 0, 0, 0, time.UTC),
|
||||
Latency: 1500 * time.Millisecond,
|
||||
Detail: coreusage.Detail{
|
||||
InputTokens: 10,
|
||||
OutputTokens: 20,
|
||||
@@ -51,6 +52,7 @@ func TestUsageQueuePluginPayloadIncludesStableFieldsAndSuccess(t *testing.T) {
|
||||
requireStringField(t, payload, "auth_type", "apikey")
|
||||
requireMissingField(t, payload, "user_api_key")
|
||||
requireStringField(t, payload, "request_id", "ctx-request-id")
|
||||
requireStringField(t, payload, "reasoning_effort", "medium")
|
||||
requireHeaderField(t, payload, "response_headers", "X-Upstream-Request-Id", []string{"upstream-req-1"})
|
||||
requireHeaderField(t, payload, "response_headers", "Retry-After", []string{"30"})
|
||||
requireBoolField(t, payload, "failed", false)
|
||||
|
||||
@@ -26,6 +26,7 @@ type UsageReporter struct {
|
||||
authType string
|
||||
apiKey string
|
||||
source string
|
||||
reasoning string
|
||||
requestedAt time.Time
|
||||
once sync.Once
|
||||
}
|
||||
@@ -44,6 +45,7 @@ func NewUsageReporter(ctx context.Context, provider, model string, auth *cliprox
|
||||
apiKey: apiKey,
|
||||
source: resolveUsageSource(auth, apiKey),
|
||||
authType: resolveUsageAuthType(auth),
|
||||
reasoning: usage.ReasoningEffortFromContext(ctx),
|
||||
}
|
||||
if auth != nil {
|
||||
reporter.authID = auth.ID
|
||||
@@ -156,19 +158,20 @@ func (r *UsageReporter) buildRecordForModel(model string, detail usage.Detail, f
|
||||
return usage.Record{Model: model, Detail: detail, Failed: failed, Fail: fail}
|
||||
}
|
||||
return usage.Record{
|
||||
Provider: r.provider,
|
||||
Model: model,
|
||||
Alias: r.alias,
|
||||
Source: r.source,
|
||||
APIKey: r.apiKey,
|
||||
AuthID: r.authID,
|
||||
AuthIndex: r.authIndex,
|
||||
AuthType: r.authType,
|
||||
RequestedAt: r.requestedAt,
|
||||
Latency: r.latency(),
|
||||
Failed: failed,
|
||||
Fail: fail,
|
||||
Detail: detail,
|
||||
Provider: r.provider,
|
||||
Model: model,
|
||||
Alias: r.alias,
|
||||
Source: r.source,
|
||||
APIKey: r.apiKey,
|
||||
AuthID: r.authID,
|
||||
AuthIndex: r.authIndex,
|
||||
AuthType: r.authType,
|
||||
ReasoningEffort: r.reasoning,
|
||||
RequestedAt: r.requestedAt,
|
||||
Latency: r.latency(),
|
||||
Failed: failed,
|
||||
Fail: fail,
|
||||
Detail: detail,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -159,6 +159,16 @@ func TestUsageReporterBuildRecordIncludesRequestedModelAlias(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestUsageReporterBuildRecordIncludesReasoningEffort(t *testing.T) {
|
||||
ctx := usage.WithReasoningEffort(context.Background(), "medium")
|
||||
reporter := NewUsageReporter(ctx, "openai", "gpt-5.4", nil)
|
||||
|
||||
record := reporter.buildRecord(usage.Detail{TotalTokens: 3}, false)
|
||||
if record.ReasoningEffort != "medium" {
|
||||
t.Fatalf("reasoning effort = %q, want %q", record.ReasoningEffort, "medium")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUsageReporterBuildAdditionalModelRecordSkipsZeroTokens(t *testing.T) {
|
||||
reporter := &UsageReporter{
|
||||
provider: "codex",
|
||||
|
||||
@@ -339,6 +339,56 @@ func hasThinkingConfig(config ThinkingConfig) bool {
|
||||
return config.Mode != ModeBudget || config.Budget != 0 || config.Level != ""
|
||||
}
|
||||
|
||||
// ExtractReasoningEffort returns the request's thinking setting as a canonical
|
||||
// reasoning_effort label for usage logging. Model suffixes have the same
|
||||
// priority as ApplyThinking: a valid suffix overrides body fields.
|
||||
func ExtractReasoningEffort(body []byte, provider, model string) string {
|
||||
if effort := reasoningEffortFromSuffix(ParseSuffix(model)); effort != "" {
|
||||
return effort
|
||||
}
|
||||
|
||||
provider = strings.ToLower(strings.TrimSpace(provider))
|
||||
config := extractThinkingConfig(body, provider)
|
||||
if !hasThinkingConfig(config) {
|
||||
switch provider {
|
||||
case "openai-response":
|
||||
config = extractCodexConfig(body)
|
||||
case "openai":
|
||||
config = extractCodexConfig(body)
|
||||
}
|
||||
}
|
||||
return reasoningEffortFromConfig(config)
|
||||
}
|
||||
|
||||
func reasoningEffortFromSuffix(suffix SuffixResult) string {
|
||||
if !suffix.HasSuffix {
|
||||
return ""
|
||||
}
|
||||
return reasoningEffortFromConfig(parseSuffixToConfig(suffix.RawSuffix, "", suffix.ModelName))
|
||||
}
|
||||
|
||||
func reasoningEffortFromConfig(config ThinkingConfig) string {
|
||||
if !hasThinkingConfig(config) {
|
||||
return ""
|
||||
}
|
||||
switch config.Mode {
|
||||
case ModeNone:
|
||||
return string(LevelNone)
|
||||
case ModeAuto:
|
||||
return string(LevelAuto)
|
||||
case ModeLevel:
|
||||
return strings.ToLower(strings.TrimSpace(string(config.Level)))
|
||||
case ModeBudget:
|
||||
level, ok := ConvertBudgetToLevel(config.Budget)
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
return level
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// extractClaudeConfig extracts thinking configuration from Claude format request body.
|
||||
//
|
||||
// Claude API format:
|
||||
|
||||
31
internal/thinking/reasoning_effort_test.go
Normal file
31
internal/thinking/reasoning_effort_test.go
Normal file
@@ -0,0 +1,31 @@
|
||||
package thinking
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestExtractReasoningEffortUsesSuffixOverBody(t *testing.T) {
|
||||
got := ExtractReasoningEffort([]byte(`{"reasoning_effort":"low"}`), "openai", "gpt-5.4(high)")
|
||||
if got != "high" {
|
||||
t.Fatalf("ExtractReasoningEffort() = %q, want %q", got, "high")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractReasoningEffortConvertsBudgetToLevel(t *testing.T) {
|
||||
got := ExtractReasoningEffort([]byte(`{"thinking":{"type":"enabled","budget_tokens":8192}}`), "claude", "claude-sonnet-4-5")
|
||||
if got != "medium" {
|
||||
t.Fatalf("ExtractReasoningEffort() = %q, want %q", got, "medium")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractReasoningEffortSupportsOpenAIResponses(t *testing.T) {
|
||||
got := ExtractReasoningEffort([]byte(`{"reasoning":{"effort":"medium"}}`), "openai-response", "gpt-5.4")
|
||||
if got != "medium" {
|
||||
t.Fatalf("ExtractReasoningEffort() = %q, want %q", got, "medium")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractReasoningEffortMissingConfigIsEmpty(t *testing.T) {
|
||||
got := ExtractReasoningEffort([]byte(`{"messages":[{"role":"user","content":"hi"}]}`), "openai", "gpt-5.4")
|
||||
if got != "" {
|
||||
t.Fatalf("ExtractReasoningEffort() = %q, want empty", got)
|
||||
}
|
||||
}
|
||||
@@ -231,6 +231,17 @@ func requestExecutionMetadata(ctx context.Context) map[string]any {
|
||||
return meta
|
||||
}
|
||||
|
||||
func setReasoningEffortMetadata(meta map[string]any, handlerType, model string, rawJSON []byte) {
|
||||
if meta == nil {
|
||||
return
|
||||
}
|
||||
effort := thinking.ExtractReasoningEffort(rawJSON, handlerType, model)
|
||||
if effort == "" {
|
||||
return
|
||||
}
|
||||
meta[coreexecutor.ReasoningEffortMetadataKey] = effort
|
||||
}
|
||||
|
||||
// headersFromContext extracts the original HTTP request headers from the gin context
|
||||
// embedded in the provided context. This allows session affinity selectors to read
|
||||
// client headers like X-Amp-Thread-Id.
|
||||
@@ -550,6 +561,7 @@ func (h *BaseAPIHandler) executeWithAuthManager(ctx context.Context, handlerType
|
||||
}
|
||||
reqMeta := requestExecutionMetadata(ctx)
|
||||
reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName
|
||||
setReasoningEffortMetadata(reqMeta, handlerType, normalizedModel, rawJSON)
|
||||
payload := rawJSON
|
||||
if len(payload) == 0 {
|
||||
payload = nil
|
||||
@@ -598,6 +610,7 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
|
||||
}
|
||||
reqMeta := requestExecutionMetadata(ctx)
|
||||
reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName
|
||||
setReasoningEffortMetadata(reqMeta, handlerType, normalizedModel, rawJSON)
|
||||
payload := rawJSON
|
||||
if len(payload) == 0 {
|
||||
payload = nil
|
||||
@@ -659,6 +672,7 @@ func (h *BaseAPIHandler) executeStreamWithAuthManager(ctx context.Context, handl
|
||||
}
|
||||
reqMeta := requestExecutionMetadata(ctx)
|
||||
reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName
|
||||
setReasoningEffortMetadata(reqMeta, handlerType, normalizedModel, rawJSON)
|
||||
payload := rawJSON
|
||||
if len(payload) == 0 {
|
||||
payload = nil
|
||||
|
||||
@@ -18,3 +18,23 @@ func TestRequestExecutionMetadataIncludesExecutionSessionWithoutIdempotencyKey(t
|
||||
t.Fatalf("unexpected idempotency key in metadata: %v", meta[idempotencyKeyMetadataKey])
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetReasoningEffortMetadataUsesSuffixOverBody(t *testing.T) {
|
||||
meta := make(map[string]any)
|
||||
|
||||
setReasoningEffortMetadata(meta, "openai", "gpt-5.4(high)", []byte(`{"reasoning_effort":"low"}`))
|
||||
|
||||
if got := meta[coreexecutor.ReasoningEffortMetadataKey]; got != "high" {
|
||||
t.Fatalf("ReasoningEffortMetadataKey = %v, want %q", got, "high")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetReasoningEffortMetadataSupportsOpenAIResponses(t *testing.T) {
|
||||
meta := make(map[string]any)
|
||||
|
||||
setReasoningEffortMetadata(meta, "openai-response", "gpt-5.4", []byte(`{"reasoning":{"effort":"medium"}}`))
|
||||
|
||||
if got := meta[coreexecutor.ReasoningEffortMetadataKey]; got != "medium" {
|
||||
t.Fatalf("ReasoningEffortMetadataKey = %v, want %q", got, "medium")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1632,7 +1632,11 @@ func hasRequestedModelMetadata(meta map[string]any) bool {
|
||||
|
||||
func contextWithRequestedModelAlias(ctx context.Context, opts cliproxyexecutor.Options, fallback string) context.Context {
|
||||
alias := requestedModelAliasFromOptions(opts, fallback)
|
||||
return coreusage.WithRequestedModelAlias(ctx, alias)
|
||||
ctx = coreusage.WithRequestedModelAlias(ctx, alias)
|
||||
if effort := reasoningEffortFromOptions(opts); effort != "" {
|
||||
ctx = coreusage.WithReasoningEffort(ctx, effort)
|
||||
}
|
||||
return ctx
|
||||
}
|
||||
|
||||
func requestedModelAliasFromOptions(opts cliproxyexecutor.Options, fallback string) string {
|
||||
@@ -1660,6 +1664,24 @@ func requestedModelAliasFromOptions(opts cliproxyexecutor.Options, fallback stri
|
||||
}
|
||||
}
|
||||
|
||||
func reasoningEffortFromOptions(opts cliproxyexecutor.Options) string {
|
||||
if len(opts.Metadata) == 0 {
|
||||
return ""
|
||||
}
|
||||
raw, ok := opts.Metadata[cliproxyexecutor.ReasoningEffortMetadataKey]
|
||||
if !ok || raw == nil {
|
||||
return ""
|
||||
}
|
||||
switch value := raw.(type) {
|
||||
case string:
|
||||
return strings.TrimSpace(value)
|
||||
case []byte:
|
||||
return strings.TrimSpace(string(value))
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func pinnedAuthIDFromMetadata(meta map[string]any) string {
|
||||
if len(meta) == 0 {
|
||||
return ""
|
||||
|
||||
25
sdk/cliproxy/auth/conductor_usage_test.go
Normal file
25
sdk/cliproxy/auth/conductor_usage_test.go
Normal file
@@ -0,0 +1,25 @@
|
||||
package auth
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/executor"
|
||||
coreusage "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/usage"
|
||||
)
|
||||
|
||||
func TestContextWithRequestedModelAliasIncludesReasoningEffort(t *testing.T) {
|
||||
ctx := contextWithRequestedModelAlias(context.Background(), cliproxyexecutor.Options{
|
||||
Metadata: map[string]any{
|
||||
cliproxyexecutor.RequestedModelMetadataKey: "client-model",
|
||||
cliproxyexecutor.ReasoningEffortMetadataKey: "medium",
|
||||
},
|
||||
}, "fallback-model")
|
||||
|
||||
if got := coreusage.RequestedModelAliasFromContext(ctx); got != "client-model" {
|
||||
t.Fatalf("requested model alias = %q, want %q", got, "client-model")
|
||||
}
|
||||
if got := coreusage.ReasoningEffortFromContext(ctx); got != "medium" {
|
||||
t.Fatalf("reasoning effort = %q, want %q", got, "medium")
|
||||
}
|
||||
}
|
||||
@@ -17,6 +17,9 @@ const RequestPathMetadataKey = "request_path"
|
||||
// DisallowFreeAuthMetadataKey instructs auth selection to skip known free-tier credentials.
|
||||
const DisallowFreeAuthMetadataKey = "disallow_free_auth"
|
||||
|
||||
// ReasoningEffortMetadataKey stores the client-requested reasoning effort for usage logs.
|
||||
const ReasoningEffortMetadataKey = "reasoning_effort"
|
||||
|
||||
const (
|
||||
// PinnedAuthMetadataKey locks execution to a specific auth ID.
|
||||
PinnedAuthMetadataKey = "pinned_auth_id"
|
||||
|
||||
@@ -12,19 +12,21 @@ import (
|
||||
|
||||
// Record contains the usage statistics captured for a single provider request.
|
||||
type Record struct {
|
||||
Provider string
|
||||
Model string
|
||||
Alias string
|
||||
APIKey string
|
||||
AuthID string
|
||||
AuthIndex string
|
||||
AuthType string
|
||||
Source string
|
||||
RequestedAt time.Time
|
||||
Latency time.Duration
|
||||
Failed bool
|
||||
Fail Failure
|
||||
Detail Detail
|
||||
Provider string
|
||||
Model string
|
||||
Alias string
|
||||
APIKey string
|
||||
AuthID string
|
||||
AuthIndex string
|
||||
AuthType string
|
||||
Source string
|
||||
// ReasoningEffort stores the client-requested thinking level for request event logs.
|
||||
ReasoningEffort string
|
||||
RequestedAt time.Time
|
||||
Latency time.Duration
|
||||
Failed bool
|
||||
Fail Failure
|
||||
Detail Detail
|
||||
// ResponseHeaders stores a snapshot of upstream response headers for usage sinks.
|
||||
ResponseHeaders http.Header
|
||||
}
|
||||
@@ -47,6 +49,7 @@ type Detail struct {
|
||||
}
|
||||
|
||||
type requestedModelAliasContextKey struct{}
|
||||
type reasoningEffortContextKey struct{}
|
||||
|
||||
// WithRequestedModelAlias stores the client-requested model name for usage sinks.
|
||||
func WithRequestedModelAlias(ctx context.Context, alias string) context.Context {
|
||||
@@ -76,6 +79,34 @@ func RequestedModelAliasFromContext(ctx context.Context) string {
|
||||
}
|
||||
}
|
||||
|
||||
// WithReasoningEffort stores the client-requested reasoning effort for usage sinks.
|
||||
func WithReasoningEffort(ctx context.Context, effort string) context.Context {
|
||||
if ctx == nil {
|
||||
ctx = context.Background()
|
||||
}
|
||||
effort = strings.TrimSpace(effort)
|
||||
if effort == "" {
|
||||
return ctx
|
||||
}
|
||||
return context.WithValue(ctx, reasoningEffortContextKey{}, effort)
|
||||
}
|
||||
|
||||
// ReasoningEffortFromContext returns the client-requested reasoning effort stored in ctx.
|
||||
func ReasoningEffortFromContext(ctx context.Context) string {
|
||||
if ctx == nil {
|
||||
return ""
|
||||
}
|
||||
raw := ctx.Value(reasoningEffortContextKey{})
|
||||
switch value := raw.(type) {
|
||||
case string:
|
||||
return strings.TrimSpace(value)
|
||||
case []byte:
|
||||
return strings.TrimSpace(string(value))
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// Plugin consumes usage records emitted by the proxy runtime.
|
||||
type Plugin interface {
|
||||
HandleUsage(ctx context.Context, record Record)
|
||||
|
||||
Reference in New Issue
Block a user