diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index fc323c9f..d47e50a5 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -847,6 +847,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/chat/completions", "/responses"}, }, { ID: "gpt-5-mini", @@ -858,6 +859,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5 Mini via GitHub Copilot", ContextLength: 128000, MaxCompletionTokens: 16384, + SupportedEndpoints: []string{"/chat/completions", "/responses"}, }, { ID: "gpt-5-codex", @@ -869,6 +871,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5 Codex via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/responses"}, }, { ID: "gpt-5.1", @@ -880,6 +883,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5.1 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/chat/completions", "/responses"}, }, { ID: "gpt-5.1-codex", @@ -891,6 +895,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5.1 Codex via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/responses"}, }, { ID: "gpt-5.1-codex-mini", @@ -902,6 +907,19 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5.1 Codex Mini via GitHub Copilot", ContextLength: 128000, MaxCompletionTokens: 16384, + SupportedEndpoints: []string{"/responses"}, + }, + { + ID: "gpt-5.1-codex-max", + Object: "model", + Created: now, + OwnedBy: "github-copilot", + Type: "github-copilot", + DisplayName: "GPT-5.1 Codex Max", + Description: "OpenAI GPT-5.1 Codex Max via GitHub Copilot", + ContextLength: 200000, + MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/responses"}, }, { ID: "gpt-5.2", @@ -913,6 +931,19 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5.2 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/chat/completions", "/responses"}, + }, + { + ID: "gpt-5.2-codex", + Object: "model", + Created: now, + OwnedBy: "github-copilot", + Type: "github-copilot", + DisplayName: "GPT-5.2 Codex", + Description: "OpenAI GPT-5.2 Codex via GitHub Copilot", + ContextLength: 200000, + MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/responses"}, }, { ID: "claude-haiku-4.5", @@ -924,6 +955,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "Anthropic Claude Haiku 4.5 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 64000, + SupportedEndpoints: []string{"/chat/completions"}, }, { ID: "claude-opus-4.1", @@ -935,6 +967,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "Anthropic Claude Opus 4.1 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32000, + SupportedEndpoints: []string{"/chat/completions"}, }, { ID: "claude-opus-4.5", @@ -946,6 +979,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "Anthropic Claude Opus 4.5 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 64000, + SupportedEndpoints: []string{"/chat/completions"}, }, { ID: "claude-sonnet-4", @@ -957,6 +991,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "Anthropic Claude Sonnet 4 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 64000, + SupportedEndpoints: []string{"/chat/completions"}, }, { ID: "claude-sonnet-4.5", @@ -968,6 +1003,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "Anthropic Claude Sonnet 4.5 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 64000, + SupportedEndpoints: []string{"/chat/completions"}, }, { ID: "gemini-2.5-pro", @@ -981,13 +1017,24 @@ func GetGitHubCopilotModels() []*ModelInfo { MaxCompletionTokens: 65536, }, { - ID: "gemini-3-pro", + ID: "gemini-3-pro-preview", Object: "model", Created: now, OwnedBy: "github-copilot", Type: "github-copilot", - DisplayName: "Gemini 3 Pro", - Description: "Google Gemini 3 Pro via GitHub Copilot", + DisplayName: "Gemini 3 Pro (Preview)", + Description: "Google Gemini 3 Pro Preview via GitHub Copilot", + ContextLength: 1048576, + MaxCompletionTokens: 65536, + }, + { + ID: "gemini-3-flash-preview", + Object: "model", + Created: now, + OwnedBy: "github-copilot", + Type: "github-copilot", + DisplayName: "Gemini 3 Flash (Preview)", + Description: "Google Gemini 3 Flash Preview via GitHub Copilot", ContextLength: 1048576, MaxCompletionTokens: 65536, }, @@ -1003,15 +1050,16 @@ func GetGitHubCopilotModels() []*ModelInfo { MaxCompletionTokens: 16384, }, { - ID: "raptor-mini", + ID: "oswe-vscode-prime", Object: "model", Created: now, OwnedBy: "github-copilot", Type: "github-copilot", - DisplayName: "Raptor Mini", - Description: "Raptor Mini via GitHub Copilot", + DisplayName: "Raptor mini (Preview)", + Description: "Raptor mini via GitHub Copilot", ContextLength: 128000, MaxCompletionTokens: 16384, + SupportedEndpoints: []string{"/chat/completions", "/responses"}, }, } } diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go index 5519d5ef..16f42fc9 100644 --- a/internal/registry/model_registry.go +++ b/internal/registry/model_registry.go @@ -47,6 +47,8 @@ type ModelInfo struct { MaxCompletionTokens int `json:"max_completion_tokens,omitempty"` // SupportedParameters lists supported parameters SupportedParameters []string `json:"supported_parameters,omitempty"` + // SupportedEndpoints lists supported API endpoints (e.g., "/chat/completions", "/responses"). + SupportedEndpoints []string `json:"supported_endpoints,omitempty"` // Thinking holds provider-specific reasoning/thinking budget capabilities. // This is optional and currently used for Gemini thinking budget normalization. @@ -476,6 +478,9 @@ func cloneModelInfo(model *ModelInfo) *ModelInfo { if len(model.SupportedParameters) > 0 { copyModel.SupportedParameters = append([]string(nil), model.SupportedParameters...) } + if len(model.SupportedEndpoints) > 0 { + copyModel.SupportedEndpoints = append([]string(nil), model.SupportedEndpoints...) + } return ©Model } @@ -988,6 +993,9 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string) if len(model.SupportedParameters) > 0 { result["supported_parameters"] = model.SupportedParameters } + if len(model.SupportedEndpoints) > 0 { + result["supported_endpoints"] = model.SupportedEndpoints + } return result case "claude", "kiro", "antigravity": diff --git a/internal/runtime/executor/github_copilot_executor.go b/internal/runtime/executor/github_copilot_executor.go index f29af146..74e3fa6c 100644 --- a/internal/runtime/executor/github_copilot_executor.go +++ b/internal/runtime/executor/github_copilot_executor.go @@ -23,6 +23,7 @@ import ( const ( githubCopilotBaseURL = "https://api.githubcopilot.com" githubCopilotChatPath = "/chat/completions" + githubCopilotResponsesPath = "/responses" githubCopilotAuthType = "github-copilot" githubCopilotTokenCacheTTL = 25 * time.Minute // tokenExpiryBuffer is the time before expiry when we should refresh the token. @@ -106,7 +107,11 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth. defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat + useResponses := useGitHubCopilotResponsesEndpoint(from) to := sdktranslator.FromString("openai") + if useResponses { + to = sdktranslator.FromString("openai-response") + } originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) @@ -117,7 +122,11 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth. body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) body, _ = sjson.SetBytes(body, "stream", false) - url := githubCopilotBaseURL + githubCopilotChatPath + path := githubCopilotChatPath + if useResponses { + path = githubCopilotResponsesPath + } + url := githubCopilotBaseURL + path httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) if err != nil { return resp, err @@ -172,6 +181,9 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth. appendAPIResponseChunk(ctx, e.cfg, data) detail := parseOpenAIUsage(data) + if useResponses && detail.TotalTokens == 0 { + detail = parseOpenAIResponsesUsage(data) + } if detail.TotalTokens > 0 { reporter.publish(ctx, detail) } @@ -194,7 +206,11 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat + useResponses := useGitHubCopilotResponsesEndpoint(from) to := sdktranslator.FromString("openai") + if useResponses { + to = sdktranslator.FromString("openai-response") + } originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) @@ -205,9 +221,15 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) body, _ = sjson.SetBytes(body, "stream", true) // Enable stream options for usage stats in stream - body, _ = sjson.SetBytes(body, "stream_options.include_usage", true) + if !useResponses { + body, _ = sjson.SetBytes(body, "stream_options.include_usage", true) + } - url := githubCopilotBaseURL + githubCopilotChatPath + path := githubCopilotChatPath + if useResponses { + path = githubCopilotResponsesPath + } + url := githubCopilotBaseURL + path httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) if err != nil { return nil, err @@ -283,6 +305,10 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox } if detail, ok := parseOpenAIStreamUsage(line); ok { reporter.publish(ctx, detail) + } else if useResponses { + if detail, ok := parseOpenAIResponsesStreamUsage(line); ok { + reporter.publish(ctx, detail) + } } } @@ -393,6 +419,10 @@ func (e *GitHubCopilotExecutor) normalizeModel(_ string, body []byte) []byte { return body } +func useGitHubCopilotResponsesEndpoint(sourceFormat sdktranslator.Format) bool { + return sourceFormat.String() == "openai-response" +} + // isHTTPSuccess checks if the status code indicates success (2xx). func isHTTPSuccess(statusCode int) bool { return statusCode >= 200 && statusCode < 300 diff --git a/internal/runtime/executor/usage_helpers.go b/internal/runtime/executor/usage_helpers.go index a3ce270c..3aa1e7ff 100644 --- a/internal/runtime/executor/usage_helpers.go +++ b/internal/runtime/executor/usage_helpers.go @@ -236,6 +236,44 @@ func parseOpenAIStreamUsage(line []byte) (usage.Detail, bool) { return detail, true } +func parseOpenAIResponsesUsageDetail(usageNode gjson.Result) usage.Detail { + detail := usage.Detail{ + InputTokens: usageNode.Get("input_tokens").Int(), + OutputTokens: usageNode.Get("output_tokens").Int(), + TotalTokens: usageNode.Get("total_tokens").Int(), + } + if detail.TotalTokens == 0 { + detail.TotalTokens = detail.InputTokens + detail.OutputTokens + } + if cached := usageNode.Get("input_tokens_details.cached_tokens"); cached.Exists() { + detail.CachedTokens = cached.Int() + } + if reasoning := usageNode.Get("output_tokens_details.reasoning_tokens"); reasoning.Exists() { + detail.ReasoningTokens = reasoning.Int() + } + return detail +} + +func parseOpenAIResponsesUsage(data []byte) usage.Detail { + usageNode := gjson.ParseBytes(data).Get("usage") + if !usageNode.Exists() { + return usage.Detail{} + } + return parseOpenAIResponsesUsageDetail(usageNode) +} + +func parseOpenAIResponsesStreamUsage(line []byte) (usage.Detail, bool) { + payload := jsonPayload(line) + if len(payload) == 0 || !gjson.ValidBytes(payload) { + return usage.Detail{}, false + } + usageNode := gjson.GetBytes(payload, "usage") + if !usageNode.Exists() { + return usage.Detail{}, false + } + return parseOpenAIResponsesUsageDetail(usageNode), true +} + func parseClaudeUsage(data []byte) usage.Detail { usageNode := gjson.ParseBytes(data).Get("usage") if !usageNode.Exists() { diff --git a/sdk/api/handlers/openai/endpoint_compat.go b/sdk/api/handlers/openai/endpoint_compat.go new file mode 100644 index 00000000..56fac508 --- /dev/null +++ b/sdk/api/handlers/openai/endpoint_compat.go @@ -0,0 +1,37 @@ +package openai + +import "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + +const ( + openAIChatEndpoint = "/chat/completions" + openAIResponsesEndpoint = "/responses" +) + +func resolveEndpointOverride(modelName, requestedEndpoint string) (string, bool) { + if modelName == "" { + return "", false + } + info := registry.GetGlobalRegistry().GetModelInfo(modelName) + if info == nil || len(info.SupportedEndpoints) == 0 { + return "", false + } + if endpointListContains(info.SupportedEndpoints, requestedEndpoint) { + return "", false + } + if requestedEndpoint == openAIChatEndpoint && endpointListContains(info.SupportedEndpoints, openAIResponsesEndpoint) { + return openAIResponsesEndpoint, true + } + if requestedEndpoint == openAIResponsesEndpoint && endpointListContains(info.SupportedEndpoints, openAIChatEndpoint) { + return openAIChatEndpoint, true + } + return "", false +} + +func endpointListContains(items []string, value string) bool { + for _, item := range items { + if item == value { + return true + } + } + return false +} \ No newline at end of file diff --git a/sdk/api/handlers/openai/openai_handlers.go b/sdk/api/handlers/openai/openai_handlers.go index 09471ce1..f1dd5a07 100644 --- a/sdk/api/handlers/openai/openai_handlers.go +++ b/sdk/api/handlers/openai/openai_handlers.go @@ -17,6 +17,7 @@ import ( . "github.com/router-for-me/CLIProxyAPI/v6/internal/constant" "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + codexconverter "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/codex/openai/chat-completions" responsesconverter "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/openai/openai/responses" "github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers" "github.com/tidwall/gjson" @@ -112,6 +113,23 @@ func (h *OpenAIAPIHandler) ChatCompletions(c *gin.Context) { streamResult := gjson.GetBytes(rawJSON, "stream") stream := streamResult.Type == gjson.True + modelName := gjson.GetBytes(rawJSON, "model").String() + if overrideEndpoint, ok := resolveEndpointOverride(modelName, openAIChatEndpoint); ok && overrideEndpoint == openAIResponsesEndpoint { + originalChat := rawJSON + if shouldTreatAsResponsesFormat(rawJSON) { + // Already responses-style payload; no conversion needed. + } else { + rawJSON = codexconverter.ConvertOpenAIRequestToCodex(modelName, rawJSON, stream) + } + stream = gjson.GetBytes(rawJSON, "stream").Bool() + if stream { + h.handleStreamingResponseViaResponses(c, rawJSON, originalChat) + } else { + h.handleNonStreamingResponseViaResponses(c, rawJSON, originalChat) + } + return + } + // Some clients send OpenAI Responses-format payloads to /v1/chat/completions. // Convert them to Chat Completions so downstream translators preserve tool metadata. if shouldTreatAsResponsesFormat(rawJSON) { @@ -245,6 +263,76 @@ func convertCompletionsRequestToChatCompletions(rawJSON []byte) []byte { return []byte(out) } +func convertResponsesObjectToChatCompletion(ctx context.Context, modelName string, originalChatJSON, responsesRequestJSON, responsesPayload []byte) []byte { + if len(responsesPayload) == 0 { + return nil + } + wrapped := wrapResponsesPayloadAsCompleted(responsesPayload) + if len(wrapped) == 0 { + return nil + } + var param any + converted := codexconverter.ConvertCodexResponseToOpenAINonStream(ctx, modelName, originalChatJSON, responsesRequestJSON, wrapped, ¶m) + if converted == "" { + return nil + } + return []byte(converted) +} + +func wrapResponsesPayloadAsCompleted(payload []byte) []byte { + if gjson.GetBytes(payload, "type").Exists() { + return payload + } + if gjson.GetBytes(payload, "object").String() != "response" { + return payload + } + wrapped := `{"type":"response.completed","response":{}}` + wrapped, _ = sjson.SetRaw(wrapped, "response", string(payload)) + return []byte(wrapped) +} + +func writeConvertedResponsesChunk(c *gin.Context, ctx context.Context, modelName string, originalChatJSON, responsesRequestJSON, chunk []byte, param *any) { + outputs := codexconverter.ConvertCodexResponseToOpenAI(ctx, modelName, originalChatJSON, responsesRequestJSON, chunk, param) + for _, out := range outputs { + if out == "" { + continue + } + _, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", out) + } +} + +func (h *OpenAIAPIHandler) forwardResponsesAsChatStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage, ctx context.Context, modelName string, originalChatJSON, responsesRequestJSON []byte, param *any) { + h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{ + WriteChunk: func(chunk []byte) { + outputs := codexconverter.ConvertCodexResponseToOpenAI(ctx, modelName, originalChatJSON, responsesRequestJSON, chunk, param) + for _, out := range outputs { + if out == "" { + continue + } + _, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", out) + } + }, + WriteTerminalError: func(errMsg *interfaces.ErrorMessage) { + if errMsg == nil { + return + } + status := http.StatusInternalServerError + if errMsg.StatusCode > 0 { + status = errMsg.StatusCode + } + errText := http.StatusText(status) + if errMsg.Error != nil && errMsg.Error.Error() != "" { + errText = errMsg.Error.Error() + } + body := handlers.BuildErrorResponseBody(status, errText) + _, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", string(body)) + }, + WriteDone: func() { + _, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n") + }, + }) +} + // convertChatCompletionsResponseToCompletions converts chat completions API response back to completions format. // This ensures the completions endpoint returns data in the expected format. // @@ -435,6 +523,30 @@ func (h *OpenAIAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSON [] cliCancel() } +func (h *OpenAIAPIHandler) handleNonStreamingResponseViaResponses(c *gin.Context, rawJSON []byte, originalChatJSON []byte) { + c.Header("Content-Type", "application/json") + + modelName := gjson.GetBytes(rawJSON, "model").String() + cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background()) + resp, errMsg := h.ExecuteWithAuthManager(cliCtx, OpenaiResponse, modelName, rawJSON, h.GetAlt(c)) + if errMsg != nil { + h.WriteErrorResponse(c, errMsg) + cliCancel(errMsg.Error) + return + } + converted := convertResponsesObjectToChatCompletion(cliCtx, modelName, originalChatJSON, rawJSON, resp) + if converted == nil { + h.WriteErrorResponse(c, &interfaces.ErrorMessage{ + StatusCode: http.StatusInternalServerError, + Error: fmt.Errorf("failed to convert response to chat completion format"), + }) + cliCancel(fmt.Errorf("response conversion failed")) + return + } + _, _ = c.Writer.Write(converted) + cliCancel() +} + // handleStreamingResponse handles streaming responses for Gemini models. // It establishes a streaming connection with the backend service and forwards // the response chunks to the client in real-time using Server-Sent Events. @@ -509,6 +621,67 @@ func (h *OpenAIAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byt } } +func (h *OpenAIAPIHandler) handleStreamingResponseViaResponses(c *gin.Context, rawJSON []byte, originalChatJSON []byte) { + flusher, ok := c.Writer.(http.Flusher) + if !ok { + c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{ + Error: handlers.ErrorDetail{ + Message: "Streaming not supported", + Type: "server_error", + }, + }) + return + } + + modelName := gjson.GetBytes(rawJSON, "model").String() + cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background()) + dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, OpenaiResponse, modelName, rawJSON, h.GetAlt(c)) + var param any + + setSSEHeaders := func() { + c.Header("Content-Type", "text/event-stream") + c.Header("Cache-Control", "no-cache") + c.Header("Connection", "keep-alive") + c.Header("Access-Control-Allow-Origin", "*") + } + + // Peek for first usable chunk + for { + select { + case <-c.Request.Context().Done(): + cliCancel(c.Request.Context().Err()) + return + case errMsg, ok := <-errChan: + if !ok { + errChan = nil + continue + } + h.WriteErrorResponse(c, errMsg) + if errMsg != nil { + cliCancel(errMsg.Error) + } else { + cliCancel(nil) + } + return + case chunk, ok := <-dataChan: + if !ok { + setSSEHeaders() + _, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n") + flusher.Flush() + cliCancel(nil) + return + } + + setSSEHeaders() + writeConvertedResponsesChunk(c, cliCtx, modelName, originalChatJSON, rawJSON, chunk, ¶m) + flusher.Flush() + + h.forwardResponsesAsChatStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan, cliCtx, modelName, originalChatJSON, rawJSON, ¶m) + return + } + } +} + // handleCompletionsNonStreamingResponse handles non-streaming completions responses. // It converts completions request to chat completions format, sends to backend, // then converts the response back to completions format before sending to client. diff --git a/sdk/api/handlers/openai/openai_responses_handlers.go b/sdk/api/handlers/openai/openai_responses_handlers.go index 31099f81..952e44e0 100644 --- a/sdk/api/handlers/openai/openai_responses_handlers.go +++ b/sdk/api/handlers/openai/openai_responses_handlers.go @@ -16,6 +16,7 @@ import ( . "github.com/router-for-me/CLIProxyAPI/v6/internal/constant" "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + responsesconverter "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/openai/openai/responses" "github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers" "github.com/tidwall/gjson" ) @@ -83,7 +84,21 @@ func (h *OpenAIResponsesAPIHandler) Responses(c *gin.Context) { // Check if the client requested a streaming response. streamResult := gjson.GetBytes(rawJSON, "stream") - if streamResult.Type == gjson.True { + stream := streamResult.Type == gjson.True + + modelName := gjson.GetBytes(rawJSON, "model").String() + if overrideEndpoint, ok := resolveEndpointOverride(modelName, openAIResponsesEndpoint); ok && overrideEndpoint == openAIChatEndpoint { + chatJSON := responsesconverter.ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName, rawJSON, stream) + stream = gjson.GetBytes(chatJSON, "stream").Bool() + if stream { + h.handleStreamingResponseViaChat(c, rawJSON, chatJSON) + } else { + h.handleNonStreamingResponseViaChat(c, rawJSON, chatJSON) + } + return + } + + if stream { h.handleStreamingResponse(c, rawJSON) } else { h.handleNonStreamingResponse(c, rawJSON) @@ -116,6 +131,31 @@ func (h *OpenAIResponsesAPIHandler) handleNonStreamingResponse(c *gin.Context, r cliCancel() } +func (h *OpenAIResponsesAPIHandler) handleNonStreamingResponseViaChat(c *gin.Context, originalResponsesJSON, chatJSON []byte) { + c.Header("Content-Type", "application/json") + + modelName := gjson.GetBytes(chatJSON, "model").String() + cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background()) + resp, errMsg := h.ExecuteWithAuthManager(cliCtx, OpenAI, modelName, chatJSON, "") + if errMsg != nil { + h.WriteErrorResponse(c, errMsg) + cliCancel(errMsg.Error) + return + } + var param any + converted := responsesconverter.ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(cliCtx, modelName, originalResponsesJSON, originalResponsesJSON, resp, ¶m) + if converted == "" { + h.WriteErrorResponse(c, &interfaces.ErrorMessage{ + StatusCode: http.StatusInternalServerError, + Error: fmt.Errorf("failed to convert chat completion response to responses format"), + }) + cliCancel(fmt.Errorf("response conversion failed")) + return + } + _, _ = c.Writer.Write([]byte(converted)) + cliCancel() +} + // handleStreamingResponse handles streaming responses for Gemini models. // It establishes a streaming connection with the backend service and forwards // the response chunks to the client in real-time using Server-Sent Events. @@ -196,6 +236,116 @@ func (h *OpenAIResponsesAPIHandler) handleStreamingResponse(c *gin.Context, rawJ } } +func (h *OpenAIResponsesAPIHandler) handleStreamingResponseViaChat(c *gin.Context, originalResponsesJSON, chatJSON []byte) { + flusher, ok := c.Writer.(http.Flusher) + if !ok { + c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{ + Error: handlers.ErrorDetail{ + Message: "Streaming not supported", + Type: "server_error", + }, + }) + return + } + + modelName := gjson.GetBytes(chatJSON, "model").String() + cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background()) + dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, OpenAI, modelName, chatJSON, "") + var param any + + setSSEHeaders := func() { + c.Header("Content-Type", "text/event-stream") + c.Header("Cache-Control", "no-cache") + c.Header("Connection", "keep-alive") + c.Header("Access-Control-Allow-Origin", "*") + } + + for { + select { + case <-c.Request.Context().Done(): + cliCancel(c.Request.Context().Err()) + return + case errMsg, ok := <-errChan: + if !ok { + errChan = nil + continue + } + h.WriteErrorResponse(c, errMsg) + if errMsg != nil { + cliCancel(errMsg.Error) + } else { + cliCancel(nil) + } + return + case chunk, ok := <-dataChan: + if !ok { + setSSEHeaders() + _, _ = c.Writer.Write([]byte("\n")) + flusher.Flush() + cliCancel(nil) + return + } + + setSSEHeaders() + writeChatAsResponsesChunk(c, cliCtx, modelName, originalResponsesJSON, chunk, ¶m) + flusher.Flush() + + h.forwardChatAsResponsesStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan, cliCtx, modelName, originalResponsesJSON, ¶m) + return + } + } +} + +func writeChatAsResponsesChunk(c *gin.Context, ctx context.Context, modelName string, originalResponsesJSON, chunk []byte, param *any) { + outputs := responsesconverter.ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx, modelName, originalResponsesJSON, originalResponsesJSON, chunk, param) + for _, out := range outputs { + if out == "" { + continue + } + if bytes.HasPrefix([]byte(out), []byte("event:")) { + _, _ = c.Writer.Write([]byte("\n")) + } + _, _ = c.Writer.Write([]byte(out)) + _, _ = c.Writer.Write([]byte("\n")) + } +} + +func (h *OpenAIResponsesAPIHandler) forwardChatAsResponsesStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage, ctx context.Context, modelName string, originalResponsesJSON []byte, param *any) { + h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{ + WriteChunk: func(chunk []byte) { + outputs := responsesconverter.ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx, modelName, originalResponsesJSON, originalResponsesJSON, chunk, param) + for _, out := range outputs { + if out == "" { + continue + } + if bytes.HasPrefix([]byte(out), []byte("event:")) { + _, _ = c.Writer.Write([]byte("\n")) + } + _, _ = c.Writer.Write([]byte(out)) + _, _ = c.Writer.Write([]byte("\n")) + } + }, + WriteTerminalError: func(errMsg *interfaces.ErrorMessage) { + if errMsg == nil { + return + } + status := http.StatusInternalServerError + if errMsg.StatusCode > 0 { + status = errMsg.StatusCode + } + errText := http.StatusText(status) + if errMsg.Error != nil && errMsg.Error.Error() != "" { + errText = errMsg.Error.Error() + } + body := handlers.BuildErrorResponseBody(status, errText) + _, _ = fmt.Fprintf(c.Writer, "\nevent: error\ndata: %s\n\n", string(body)) + }, + WriteDone: func() { + _, _ = c.Writer.Write([]byte("\n")) + }, + }) +} + func (h *OpenAIResponsesAPIHandler) forwardResponsesStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) { h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{ WriteChunk: func(chunk []byte) {