From f4fcfc586742849bc71fb969b86692ac49112fd9 Mon Sep 17 00:00:00 2001 From: ChrAlpha <53332481+ChrAlpha@users.noreply.github.com> Date: Thu, 15 Jan 2026 14:14:09 +0800 Subject: [PATCH 1/4] feat(registry): add GPT-5.2-Codex model to GitHub Copilot provider Add gpt-5.2-codex model definition to GetGitHubCopilotModels() function, enabling access to OpenAI GPT-5.2 Codex through the GitHub Copilot API. --- internal/registry/model_definitions.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index 6e7f4805..8359279c 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -901,6 +901,17 @@ func GetGitHubCopilotModels() []*ModelInfo { ContextLength: 200000, MaxCompletionTokens: 32768, }, + { + ID: "gpt-5.2-codex", + Object: "model", + Created: now, + OwnedBy: "github-copilot", + Type: "github-copilot", + DisplayName: "GPT-5.2 Codex", + Description: "OpenAI GPT-5.2 Codex via GitHub Copilot", + ContextLength: 200000, + MaxCompletionTokens: 32768, + }, { ID: "claude-haiku-4.5", Object: "model", From 0ffcce3ec8e6e1eed869309afaef7b4e6d765949 Mon Sep 17 00:00:00 2001 From: ChrAlpha <53332481+ChrAlpha@users.noreply.github.com> Date: Thu, 15 Jan 2026 16:32:28 +0800 Subject: [PATCH 2/4] feat(registry): add supported endpoints for GitHub Copilot models Enhance model definitions by including supported API endpoints for each model. This allows for better integration and usage tracking with the GitHub Copilot API. --- internal/registry/model_definitions.go | 49 ++++++++++++++++--- internal/registry/model_registry.go | 8 +++ .../executor/github_copilot_executor.go | 36 ++++++++++++-- internal/runtime/executor/usage_helpers.go | 48 ++++++++++++++++++ 4 files changed, 132 insertions(+), 9 deletions(-) diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index 8359279c..8ec49304 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -834,6 +834,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/chat/completions", "/responses"}, }, { ID: "gpt-5-mini", @@ -845,6 +846,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5 Mini via GitHub Copilot", ContextLength: 128000, MaxCompletionTokens: 16384, + SupportedEndpoints: []string{"/chat/completions", "/responses"}, }, { ID: "gpt-5-codex", @@ -856,6 +858,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5 Codex via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/responses"}, }, { ID: "gpt-5.1", @@ -867,6 +870,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5.1 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/chat/completions", "/responses"}, }, { ID: "gpt-5.1-codex", @@ -878,6 +882,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5.1 Codex via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/responses"}, }, { ID: "gpt-5.1-codex-mini", @@ -889,6 +894,19 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5.1 Codex Mini via GitHub Copilot", ContextLength: 128000, MaxCompletionTokens: 16384, + SupportedEndpoints: []string{"/responses"}, + }, + { + ID: "gpt-5.1-codex-max", + Object: "model", + Created: now, + OwnedBy: "github-copilot", + Type: "github-copilot", + DisplayName: "GPT-5.1 Codex Max", + Description: "OpenAI GPT-5.1 Codex Max via GitHub Copilot", + ContextLength: 200000, + MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/responses"}, }, { ID: "gpt-5.2", @@ -900,6 +918,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5.2 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/chat/completions", "/responses"}, }, { ID: "gpt-5.2-codex", @@ -911,6 +930,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5.2 Codex via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/responses"}, }, { ID: "claude-haiku-4.5", @@ -922,6 +942,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "Anthropic Claude Haiku 4.5 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 64000, + SupportedEndpoints: []string{"/chat/completions"}, }, { ID: "claude-opus-4.1", @@ -933,6 +954,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "Anthropic Claude Opus 4.1 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32000, + SupportedEndpoints: []string{"/chat/completions"}, }, { ID: "claude-opus-4.5", @@ -944,6 +966,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "Anthropic Claude Opus 4.5 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 64000, + SupportedEndpoints: []string{"/chat/completions"}, }, { ID: "claude-sonnet-4", @@ -955,6 +978,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "Anthropic Claude Sonnet 4 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 64000, + SupportedEndpoints: []string{"/chat/completions"}, }, { ID: "claude-sonnet-4.5", @@ -966,6 +990,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "Anthropic Claude Sonnet 4.5 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 64000, + SupportedEndpoints: []string{"/chat/completions"}, }, { ID: "gemini-2.5-pro", @@ -979,13 +1004,24 @@ func GetGitHubCopilotModels() []*ModelInfo { MaxCompletionTokens: 65536, }, { - ID: "gemini-3-pro", + ID: "gemini-3-pro-preview", Object: "model", Created: now, OwnedBy: "github-copilot", Type: "github-copilot", - DisplayName: "Gemini 3 Pro", - Description: "Google Gemini 3 Pro via GitHub Copilot", + DisplayName: "Gemini 3 Pro (Preview)", + Description: "Google Gemini 3 Pro Preview via GitHub Copilot", + ContextLength: 1048576, + MaxCompletionTokens: 65536, + }, + { + ID: "gemini-3-flash-preview", + Object: "model", + Created: now, + OwnedBy: "github-copilot", + Type: "github-copilot", + DisplayName: "Gemini 3 Flash (Preview)", + Description: "Google Gemini 3 Flash Preview via GitHub Copilot", ContextLength: 1048576, MaxCompletionTokens: 65536, }, @@ -1001,15 +1037,16 @@ func GetGitHubCopilotModels() []*ModelInfo { MaxCompletionTokens: 16384, }, { - ID: "raptor-mini", + ID: "oswe-vscode-prime", Object: "model", Created: now, OwnedBy: "github-copilot", Type: "github-copilot", - DisplayName: "Raptor Mini", - Description: "Raptor Mini via GitHub Copilot", + DisplayName: "Raptor mini (Preview)", + Description: "Raptor mini via GitHub Copilot", ContextLength: 128000, MaxCompletionTokens: 16384, + SupportedEndpoints: []string{"/chat/completions", "/responses"}, }, } } diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go index 537b03c2..13e2e699 100644 --- a/internal/registry/model_registry.go +++ b/internal/registry/model_registry.go @@ -47,6 +47,8 @@ type ModelInfo struct { MaxCompletionTokens int `json:"max_completion_tokens,omitempty"` // SupportedParameters lists supported parameters SupportedParameters []string `json:"supported_parameters,omitempty"` + // SupportedEndpoints lists supported API endpoints (e.g., "/chat/completions", "/responses"). + SupportedEndpoints []string `json:"supported_endpoints,omitempty"` // Thinking holds provider-specific reasoning/thinking budget capabilities. // This is optional and currently used for Gemini thinking budget normalization. @@ -456,6 +458,9 @@ func cloneModelInfo(model *ModelInfo) *ModelInfo { if len(model.SupportedParameters) > 0 { copyModel.SupportedParameters = append([]string(nil), model.SupportedParameters...) } + if len(model.SupportedEndpoints) > 0 { + copyModel.SupportedEndpoints = append([]string(nil), model.SupportedEndpoints...) + } return ©Model } @@ -968,6 +973,9 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string) if len(model.SupportedParameters) > 0 { result["supported_parameters"] = model.SupportedParameters } + if len(model.SupportedEndpoints) > 0 { + result["supported_endpoints"] = model.SupportedEndpoints + } return result case "claude", "kiro", "antigravity": diff --git a/internal/runtime/executor/github_copilot_executor.go b/internal/runtime/executor/github_copilot_executor.go index f29af146..74e3fa6c 100644 --- a/internal/runtime/executor/github_copilot_executor.go +++ b/internal/runtime/executor/github_copilot_executor.go @@ -23,6 +23,7 @@ import ( const ( githubCopilotBaseURL = "https://api.githubcopilot.com" githubCopilotChatPath = "/chat/completions" + githubCopilotResponsesPath = "/responses" githubCopilotAuthType = "github-copilot" githubCopilotTokenCacheTTL = 25 * time.Minute // tokenExpiryBuffer is the time before expiry when we should refresh the token. @@ -106,7 +107,11 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth. defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat + useResponses := useGitHubCopilotResponsesEndpoint(from) to := sdktranslator.FromString("openai") + if useResponses { + to = sdktranslator.FromString("openai-response") + } originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) @@ -117,7 +122,11 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth. body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) body, _ = sjson.SetBytes(body, "stream", false) - url := githubCopilotBaseURL + githubCopilotChatPath + path := githubCopilotChatPath + if useResponses { + path = githubCopilotResponsesPath + } + url := githubCopilotBaseURL + path httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) if err != nil { return resp, err @@ -172,6 +181,9 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth. appendAPIResponseChunk(ctx, e.cfg, data) detail := parseOpenAIUsage(data) + if useResponses && detail.TotalTokens == 0 { + detail = parseOpenAIResponsesUsage(data) + } if detail.TotalTokens > 0 { reporter.publish(ctx, detail) } @@ -194,7 +206,11 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat + useResponses := useGitHubCopilotResponsesEndpoint(from) to := sdktranslator.FromString("openai") + if useResponses { + to = sdktranslator.FromString("openai-response") + } originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) @@ -205,9 +221,15 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) body, _ = sjson.SetBytes(body, "stream", true) // Enable stream options for usage stats in stream - body, _ = sjson.SetBytes(body, "stream_options.include_usage", true) + if !useResponses { + body, _ = sjson.SetBytes(body, "stream_options.include_usage", true) + } - url := githubCopilotBaseURL + githubCopilotChatPath + path := githubCopilotChatPath + if useResponses { + path = githubCopilotResponsesPath + } + url := githubCopilotBaseURL + path httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) if err != nil { return nil, err @@ -283,6 +305,10 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox } if detail, ok := parseOpenAIStreamUsage(line); ok { reporter.publish(ctx, detail) + } else if useResponses { + if detail, ok := parseOpenAIResponsesStreamUsage(line); ok { + reporter.publish(ctx, detail) + } } } @@ -393,6 +419,10 @@ func (e *GitHubCopilotExecutor) normalizeModel(_ string, body []byte) []byte { return body } +func useGitHubCopilotResponsesEndpoint(sourceFormat sdktranslator.Format) bool { + return sourceFormat.String() == "openai-response" +} + // isHTTPSuccess checks if the status code indicates success (2xx). func isHTTPSuccess(statusCode int) bool { return statusCode >= 200 && statusCode < 300 diff --git a/internal/runtime/executor/usage_helpers.go b/internal/runtime/executor/usage_helpers.go index a3ce270c..7d8d345e 100644 --- a/internal/runtime/executor/usage_helpers.go +++ b/internal/runtime/executor/usage_helpers.go @@ -236,6 +236,54 @@ func parseOpenAIStreamUsage(line []byte) (usage.Detail, bool) { return detail, true } +func parseOpenAIResponsesUsage(data []byte) usage.Detail { + usageNode := gjson.ParseBytes(data).Get("usage") + if !usageNode.Exists() { + return usage.Detail{} + } + detail := usage.Detail{ + InputTokens: usageNode.Get("input_tokens").Int(), + OutputTokens: usageNode.Get("output_tokens").Int(), + TotalTokens: usageNode.Get("total_tokens").Int(), + } + if detail.TotalTokens == 0 { + detail.TotalTokens = detail.InputTokens + detail.OutputTokens + } + if cached := usageNode.Get("input_tokens_details.cached_tokens"); cached.Exists() { + detail.CachedTokens = cached.Int() + } + if reasoning := usageNode.Get("output_tokens_details.reasoning_tokens"); reasoning.Exists() { + detail.ReasoningTokens = reasoning.Int() + } + return detail +} + +func parseOpenAIResponsesStreamUsage(line []byte) (usage.Detail, bool) { + payload := jsonPayload(line) + if len(payload) == 0 || !gjson.ValidBytes(payload) { + return usage.Detail{}, false + } + usageNode := gjson.GetBytes(payload, "usage") + if !usageNode.Exists() { + return usage.Detail{}, false + } + detail := usage.Detail{ + InputTokens: usageNode.Get("input_tokens").Int(), + OutputTokens: usageNode.Get("output_tokens").Int(), + TotalTokens: usageNode.Get("total_tokens").Int(), + } + if detail.TotalTokens == 0 { + detail.TotalTokens = detail.InputTokens + detail.OutputTokens + } + if cached := usageNode.Get("input_tokens_details.cached_tokens"); cached.Exists() { + detail.CachedTokens = cached.Int() + } + if reasoning := usageNode.Get("output_tokens_details.reasoning_tokens"); reasoning.Exists() { + detail.ReasoningTokens = reasoning.Int() + } + return detail, true +} + func parseClaudeUsage(data []byte) usage.Detail { usageNode := gjson.ParseBytes(data).Get("usage") if !usageNode.Exists() { From 8950d92682324b0b3d9cff38041fd6b7f7919ee2 Mon Sep 17 00:00:00 2001 From: ChrAlpha <53332481+ChrAlpha@users.noreply.github.com> Date: Thu, 15 Jan 2026 18:30:01 +0800 Subject: [PATCH 3/4] feat(openai): implement endpoint resolution and response handling for Chat and Responses models --- sdk/api/handlers/openai/endpoint_compat.go | 37 ++++ sdk/api/handlers/openai/openai_handlers.go | 168 ++++++++++++++++++ .../openai/openai_responses_handlers.go | 149 +++++++++++++++- 3 files changed, 353 insertions(+), 1 deletion(-) create mode 100644 sdk/api/handlers/openai/endpoint_compat.go diff --git a/sdk/api/handlers/openai/endpoint_compat.go b/sdk/api/handlers/openai/endpoint_compat.go new file mode 100644 index 00000000..56fac508 --- /dev/null +++ b/sdk/api/handlers/openai/endpoint_compat.go @@ -0,0 +1,37 @@ +package openai + +import "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + +const ( + openAIChatEndpoint = "/chat/completions" + openAIResponsesEndpoint = "/responses" +) + +func resolveEndpointOverride(modelName, requestedEndpoint string) (string, bool) { + if modelName == "" { + return "", false + } + info := registry.GetGlobalRegistry().GetModelInfo(modelName) + if info == nil || len(info.SupportedEndpoints) == 0 { + return "", false + } + if endpointListContains(info.SupportedEndpoints, requestedEndpoint) { + return "", false + } + if requestedEndpoint == openAIChatEndpoint && endpointListContains(info.SupportedEndpoints, openAIResponsesEndpoint) { + return openAIResponsesEndpoint, true + } + if requestedEndpoint == openAIResponsesEndpoint && endpointListContains(info.SupportedEndpoints, openAIChatEndpoint) { + return openAIChatEndpoint, true + } + return "", false +} + +func endpointListContains(items []string, value string) bool { + for _, item := range items { + if item == value { + return true + } + } + return false +} \ No newline at end of file diff --git a/sdk/api/handlers/openai/openai_handlers.go b/sdk/api/handlers/openai/openai_handlers.go index 09471ce1..c8aaba78 100644 --- a/sdk/api/handlers/openai/openai_handlers.go +++ b/sdk/api/handlers/openai/openai_handlers.go @@ -17,6 +17,7 @@ import ( . "github.com/router-for-me/CLIProxyAPI/v6/internal/constant" "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + codexconverter "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/codex/openai/chat-completions" responsesconverter "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/openai/openai/responses" "github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers" "github.com/tidwall/gjson" @@ -112,6 +113,23 @@ func (h *OpenAIAPIHandler) ChatCompletions(c *gin.Context) { streamResult := gjson.GetBytes(rawJSON, "stream") stream := streamResult.Type == gjson.True + modelName := gjson.GetBytes(rawJSON, "model").String() + if overrideEndpoint, ok := resolveEndpointOverride(modelName, openAIChatEndpoint); ok && overrideEndpoint == openAIResponsesEndpoint { + originalChat := rawJSON + if shouldTreatAsResponsesFormat(rawJSON) { + // Already responses-style payload; no conversion needed. + } else { + rawJSON = codexconverter.ConvertOpenAIRequestToCodex(modelName, rawJSON, stream) + } + stream = gjson.GetBytes(rawJSON, "stream").Bool() + if stream { + h.handleStreamingResponseViaResponses(c, rawJSON, originalChat) + } else { + h.handleNonStreamingResponseViaResponses(c, rawJSON, originalChat) + } + return + } + // Some clients send OpenAI Responses-format payloads to /v1/chat/completions. // Convert them to Chat Completions so downstream translators preserve tool metadata. if shouldTreatAsResponsesFormat(rawJSON) { @@ -245,6 +263,76 @@ func convertCompletionsRequestToChatCompletions(rawJSON []byte) []byte { return []byte(out) } +func convertResponsesObjectToChatCompletion(ctx context.Context, modelName string, originalChatJSON, responsesRequestJSON, responsesPayload []byte) []byte { + if len(responsesPayload) == 0 { + return nil + } + wrapped := wrapResponsesPayloadAsCompleted(responsesPayload) + if len(wrapped) == 0 { + return nil + } + var param any + converted := codexconverter.ConvertCodexResponseToOpenAINonStream(ctx, modelName, originalChatJSON, responsesRequestJSON, wrapped, ¶m) + if converted == "" { + return nil + } + return []byte(converted) +} + +func wrapResponsesPayloadAsCompleted(payload []byte) []byte { + if gjson.GetBytes(payload, "type").Exists() { + return payload + } + if gjson.GetBytes(payload, "object").String() != "response" { + return payload + } + wrapped := `{"type":"response.completed","response":{}}` + wrapped, _ = sjson.SetRaw(wrapped, "response", string(payload)) + return []byte(wrapped) +} + +func writeConvertedResponsesChunk(c *gin.Context, ctx context.Context, modelName string, originalChatJSON, responsesRequestJSON, chunk []byte, param *any) { + outputs := codexconverter.ConvertCodexResponseToOpenAI(ctx, modelName, originalChatJSON, responsesRequestJSON, chunk, param) + for _, out := range outputs { + if out == "" { + continue + } + _, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", out) + } +} + +func (h *OpenAIAPIHandler) forwardResponsesAsChatStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage, ctx context.Context, modelName string, originalChatJSON, responsesRequestJSON []byte, param *any) { + h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{ + WriteChunk: func(chunk []byte) { + outputs := codexconverter.ConvertCodexResponseToOpenAI(ctx, modelName, originalChatJSON, responsesRequestJSON, chunk, param) + for _, out := range outputs { + if out == "" { + continue + } + _, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", out) + } + }, + WriteTerminalError: func(errMsg *interfaces.ErrorMessage) { + if errMsg == nil { + return + } + status := http.StatusInternalServerError + if errMsg.StatusCode > 0 { + status = errMsg.StatusCode + } + errText := http.StatusText(status) + if errMsg.Error != nil && errMsg.Error.Error() != "" { + errText = errMsg.Error.Error() + } + body := handlers.BuildErrorResponseBody(status, errText) + _, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", string(body)) + }, + WriteDone: func() { + _, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n") + }, + }) +} + // convertChatCompletionsResponseToCompletions converts chat completions API response back to completions format. // This ensures the completions endpoint returns data in the expected format. // @@ -435,6 +523,25 @@ func (h *OpenAIAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSON [] cliCancel() } +func (h *OpenAIAPIHandler) handleNonStreamingResponseViaResponses(c *gin.Context, rawJSON []byte, originalChatJSON []byte) { + c.Header("Content-Type", "application/json") + + modelName := gjson.GetBytes(rawJSON, "model").String() + cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background()) + resp, errMsg := h.ExecuteWithAuthManager(cliCtx, OpenaiResponse, modelName, rawJSON, h.GetAlt(c)) + if errMsg != nil { + h.WriteErrorResponse(c, errMsg) + cliCancel(errMsg.Error) + return + } + converted := convertResponsesObjectToChatCompletion(cliCtx, modelName, originalChatJSON, rawJSON, resp) + if converted == nil { + converted = resp + } + _, _ = c.Writer.Write(converted) + cliCancel() +} + // handleStreamingResponse handles streaming responses for Gemini models. // It establishes a streaming connection with the backend service and forwards // the response chunks to the client in real-time using Server-Sent Events. @@ -509,6 +616,67 @@ func (h *OpenAIAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byt } } +func (h *OpenAIAPIHandler) handleStreamingResponseViaResponses(c *gin.Context, rawJSON []byte, originalChatJSON []byte) { + flusher, ok := c.Writer.(http.Flusher) + if !ok { + c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{ + Error: handlers.ErrorDetail{ + Message: "Streaming not supported", + Type: "server_error", + }, + }) + return + } + + modelName := gjson.GetBytes(rawJSON, "model").String() + cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background()) + dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, OpenaiResponse, modelName, rawJSON, h.GetAlt(c)) + var param any + + setSSEHeaders := func() { + c.Header("Content-Type", "text/event-stream") + c.Header("Cache-Control", "no-cache") + c.Header("Connection", "keep-alive") + c.Header("Access-Control-Allow-Origin", "*") + } + + // Peek for first usable chunk + for { + select { + case <-c.Request.Context().Done(): + cliCancel(c.Request.Context().Err()) + return + case errMsg, ok := <-errChan: + if !ok { + errChan = nil + continue + } + h.WriteErrorResponse(c, errMsg) + if errMsg != nil { + cliCancel(errMsg.Error) + } else { + cliCancel(nil) + } + return + case chunk, ok := <-dataChan: + if !ok { + setSSEHeaders() + _, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n") + flusher.Flush() + cliCancel(nil) + return + } + + setSSEHeaders() + writeConvertedResponsesChunk(c, cliCtx, modelName, originalChatJSON, rawJSON, chunk, ¶m) + flusher.Flush() + + h.forwardResponsesAsChatStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan, cliCtx, modelName, originalChatJSON, rawJSON, ¶m) + return + } + } +} + // handleCompletionsNonStreamingResponse handles non-streaming completions responses. // It converts completions request to chat completions format, sends to backend, // then converts the response back to completions format before sending to client. diff --git a/sdk/api/handlers/openai/openai_responses_handlers.go b/sdk/api/handlers/openai/openai_responses_handlers.go index 31099f81..e6c29001 100644 --- a/sdk/api/handlers/openai/openai_responses_handlers.go +++ b/sdk/api/handlers/openai/openai_responses_handlers.go @@ -16,6 +16,7 @@ import ( . "github.com/router-for-me/CLIProxyAPI/v6/internal/constant" "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + responsesconverter "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/openai/openai/responses" "github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers" "github.com/tidwall/gjson" ) @@ -83,7 +84,21 @@ func (h *OpenAIResponsesAPIHandler) Responses(c *gin.Context) { // Check if the client requested a streaming response. streamResult := gjson.GetBytes(rawJSON, "stream") - if streamResult.Type == gjson.True { + stream := streamResult.Type == gjson.True + + modelName := gjson.GetBytes(rawJSON, "model").String() + if overrideEndpoint, ok := resolveEndpointOverride(modelName, openAIResponsesEndpoint); ok && overrideEndpoint == openAIChatEndpoint { + chatJSON := responsesconverter.ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName, rawJSON, stream) + stream = gjson.GetBytes(chatJSON, "stream").Bool() + if stream { + h.handleStreamingResponseViaChat(c, rawJSON, chatJSON) + } else { + h.handleNonStreamingResponseViaChat(c, rawJSON, chatJSON) + } + return + } + + if stream { h.handleStreamingResponse(c, rawJSON) } else { h.handleNonStreamingResponse(c, rawJSON) @@ -116,6 +131,28 @@ func (h *OpenAIResponsesAPIHandler) handleNonStreamingResponse(c *gin.Context, r cliCancel() } +func (h *OpenAIResponsesAPIHandler) handleNonStreamingResponseViaChat(c *gin.Context, originalResponsesJSON, chatJSON []byte) { + c.Header("Content-Type", "application/json") + + modelName := gjson.GetBytes(chatJSON, "model").String() + cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background()) + resp, errMsg := h.ExecuteWithAuthManager(cliCtx, OpenAI, modelName, chatJSON, "") + if errMsg != nil { + h.WriteErrorResponse(c, errMsg) + cliCancel(errMsg.Error) + return + } + var param any + converted := responsesconverter.ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(cliCtx, modelName, originalResponsesJSON, originalResponsesJSON, resp, ¶m) + if converted == "" { + _, _ = c.Writer.Write(resp) + cliCancel() + return + } + _, _ = c.Writer.Write([]byte(converted)) + cliCancel() +} + // handleStreamingResponse handles streaming responses for Gemini models. // It establishes a streaming connection with the backend service and forwards // the response chunks to the client in real-time using Server-Sent Events. @@ -196,6 +233,116 @@ func (h *OpenAIResponsesAPIHandler) handleStreamingResponse(c *gin.Context, rawJ } } +func (h *OpenAIResponsesAPIHandler) handleStreamingResponseViaChat(c *gin.Context, originalResponsesJSON, chatJSON []byte) { + flusher, ok := c.Writer.(http.Flusher) + if !ok { + c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{ + Error: handlers.ErrorDetail{ + Message: "Streaming not supported", + Type: "server_error", + }, + }) + return + } + + modelName := gjson.GetBytes(chatJSON, "model").String() + cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background()) + dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, OpenAI, modelName, chatJSON, "") + var param any + + setSSEHeaders := func() { + c.Header("Content-Type", "text/event-stream") + c.Header("Cache-Control", "no-cache") + c.Header("Connection", "keep-alive") + c.Header("Access-Control-Allow-Origin", "*") + } + + for { + select { + case <-c.Request.Context().Done(): + cliCancel(c.Request.Context().Err()) + return + case errMsg, ok := <-errChan: + if !ok { + errChan = nil + continue + } + h.WriteErrorResponse(c, errMsg) + if errMsg != nil { + cliCancel(errMsg.Error) + } else { + cliCancel(nil) + } + return + case chunk, ok := <-dataChan: + if !ok { + setSSEHeaders() + _, _ = c.Writer.Write([]byte("\n")) + flusher.Flush() + cliCancel(nil) + return + } + + setSSEHeaders() + writeChatAsResponsesChunk(c, cliCtx, modelName, originalResponsesJSON, chunk, ¶m) + flusher.Flush() + + h.forwardChatAsResponsesStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan, cliCtx, modelName, originalResponsesJSON, ¶m) + return + } + } +} + +func writeChatAsResponsesChunk(c *gin.Context, ctx context.Context, modelName string, originalResponsesJSON, chunk []byte, param *any) { + outputs := responsesconverter.ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx, modelName, originalResponsesJSON, originalResponsesJSON, chunk, param) + for _, out := range outputs { + if out == "" { + continue + } + if bytes.HasPrefix([]byte(out), []byte("event:")) { + _, _ = c.Writer.Write([]byte("\n")) + } + _, _ = c.Writer.Write([]byte(out)) + _, _ = c.Writer.Write([]byte("\n")) + } +} + +func (h *OpenAIResponsesAPIHandler) forwardChatAsResponsesStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage, ctx context.Context, modelName string, originalResponsesJSON []byte, param *any) { + h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{ + WriteChunk: func(chunk []byte) { + outputs := responsesconverter.ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx, modelName, originalResponsesJSON, originalResponsesJSON, chunk, param) + for _, out := range outputs { + if out == "" { + continue + } + if bytes.HasPrefix([]byte(out), []byte("event:")) { + _, _ = c.Writer.Write([]byte("\n")) + } + _, _ = c.Writer.Write([]byte(out)) + _, _ = c.Writer.Write([]byte("\n")) + } + }, + WriteTerminalError: func(errMsg *interfaces.ErrorMessage) { + if errMsg == nil { + return + } + status := http.StatusInternalServerError + if errMsg.StatusCode > 0 { + status = errMsg.StatusCode + } + errText := http.StatusText(status) + if errMsg.Error != nil && errMsg.Error.Error() != "" { + errText = errMsg.Error.Error() + } + body := handlers.BuildErrorResponseBody(status, errText) + _, _ = fmt.Fprintf(c.Writer, "\nevent: error\ndata: %s\n\n", string(body)) + }, + WriteDone: func() { + _, _ = c.Writer.Write([]byte("\n")) + }, + }) +} + func (h *OpenAIResponsesAPIHandler) forwardResponsesStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) { h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{ WriteChunk: func(chunk []byte) { From 18daa023cb56e2ed99f33c2fd2a6f22f65d9a025 Mon Sep 17 00:00:00 2001 From: ChrAlpha <53332481+ChrAlpha@users.noreply.github.com> Date: Thu, 15 Jan 2026 19:13:54 +0800 Subject: [PATCH 4/4] fix(openai): improve error handling for response conversion failures --- internal/runtime/executor/usage_helpers.go | 30 +++++++------------ sdk/api/handlers/openai/openai_handlers.go | 7 ++++- .../openai/openai_responses_handlers.go | 7 +++-- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/internal/runtime/executor/usage_helpers.go b/internal/runtime/executor/usage_helpers.go index 7d8d345e..3aa1e7ff 100644 --- a/internal/runtime/executor/usage_helpers.go +++ b/internal/runtime/executor/usage_helpers.go @@ -236,11 +236,7 @@ func parseOpenAIStreamUsage(line []byte) (usage.Detail, bool) { return detail, true } -func parseOpenAIResponsesUsage(data []byte) usage.Detail { - usageNode := gjson.ParseBytes(data).Get("usage") - if !usageNode.Exists() { - return usage.Detail{} - } +func parseOpenAIResponsesUsageDetail(usageNode gjson.Result) usage.Detail { detail := usage.Detail{ InputTokens: usageNode.Get("input_tokens").Int(), OutputTokens: usageNode.Get("output_tokens").Int(), @@ -258,6 +254,14 @@ func parseOpenAIResponsesUsage(data []byte) usage.Detail { return detail } +func parseOpenAIResponsesUsage(data []byte) usage.Detail { + usageNode := gjson.ParseBytes(data).Get("usage") + if !usageNode.Exists() { + return usage.Detail{} + } + return parseOpenAIResponsesUsageDetail(usageNode) +} + func parseOpenAIResponsesStreamUsage(line []byte) (usage.Detail, bool) { payload := jsonPayload(line) if len(payload) == 0 || !gjson.ValidBytes(payload) { @@ -267,21 +271,7 @@ func parseOpenAIResponsesStreamUsage(line []byte) (usage.Detail, bool) { if !usageNode.Exists() { return usage.Detail{}, false } - detail := usage.Detail{ - InputTokens: usageNode.Get("input_tokens").Int(), - OutputTokens: usageNode.Get("output_tokens").Int(), - TotalTokens: usageNode.Get("total_tokens").Int(), - } - if detail.TotalTokens == 0 { - detail.TotalTokens = detail.InputTokens + detail.OutputTokens - } - if cached := usageNode.Get("input_tokens_details.cached_tokens"); cached.Exists() { - detail.CachedTokens = cached.Int() - } - if reasoning := usageNode.Get("output_tokens_details.reasoning_tokens"); reasoning.Exists() { - detail.ReasoningTokens = reasoning.Int() - } - return detail, true + return parseOpenAIResponsesUsageDetail(usageNode), true } func parseClaudeUsage(data []byte) usage.Detail { diff --git a/sdk/api/handlers/openai/openai_handlers.go b/sdk/api/handlers/openai/openai_handlers.go index c8aaba78..f1dd5a07 100644 --- a/sdk/api/handlers/openai/openai_handlers.go +++ b/sdk/api/handlers/openai/openai_handlers.go @@ -536,7 +536,12 @@ func (h *OpenAIAPIHandler) handleNonStreamingResponseViaResponses(c *gin.Context } converted := convertResponsesObjectToChatCompletion(cliCtx, modelName, originalChatJSON, rawJSON, resp) if converted == nil { - converted = resp + h.WriteErrorResponse(c, &interfaces.ErrorMessage{ + StatusCode: http.StatusInternalServerError, + Error: fmt.Errorf("failed to convert response to chat completion format"), + }) + cliCancel(fmt.Errorf("response conversion failed")) + return } _, _ = c.Writer.Write(converted) cliCancel() diff --git a/sdk/api/handlers/openai/openai_responses_handlers.go b/sdk/api/handlers/openai/openai_responses_handlers.go index e6c29001..952e44e0 100644 --- a/sdk/api/handlers/openai/openai_responses_handlers.go +++ b/sdk/api/handlers/openai/openai_responses_handlers.go @@ -145,8 +145,11 @@ func (h *OpenAIResponsesAPIHandler) handleNonStreamingResponseViaChat(c *gin.Con var param any converted := responsesconverter.ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(cliCtx, modelName, originalResponsesJSON, originalResponsesJSON, resp, ¶m) if converted == "" { - _, _ = c.Writer.Write(resp) - cliCancel() + h.WriteErrorResponse(c, &interfaces.ErrorMessage{ + StatusCode: http.StatusInternalServerError, + Error: fmt.Errorf("failed to convert chat completion response to responses format"), + }) + cliCancel(fmt.Errorf("response conversion failed")) return } _, _ = c.Writer.Write([]byte(converted))