diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index 8359279c..8ec49304 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -834,6 +834,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/chat/completions", "/responses"}, }, { ID: "gpt-5-mini", @@ -845,6 +846,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5 Mini via GitHub Copilot", ContextLength: 128000, MaxCompletionTokens: 16384, + SupportedEndpoints: []string{"/chat/completions", "/responses"}, }, { ID: "gpt-5-codex", @@ -856,6 +858,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5 Codex via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/responses"}, }, { ID: "gpt-5.1", @@ -867,6 +870,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5.1 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/chat/completions", "/responses"}, }, { ID: "gpt-5.1-codex", @@ -878,6 +882,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5.1 Codex via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/responses"}, }, { ID: "gpt-5.1-codex-mini", @@ -889,6 +894,19 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5.1 Codex Mini via GitHub Copilot", ContextLength: 128000, MaxCompletionTokens: 16384, + SupportedEndpoints: []string{"/responses"}, + }, + { + ID: "gpt-5.1-codex-max", + Object: "model", + Created: now, + OwnedBy: "github-copilot", + Type: "github-copilot", + DisplayName: "GPT-5.1 Codex Max", + Description: "OpenAI GPT-5.1 Codex Max via GitHub Copilot", + ContextLength: 200000, + MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/responses"}, }, { ID: "gpt-5.2", @@ -900,6 +918,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5.2 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/chat/completions", "/responses"}, }, { ID: "gpt-5.2-codex", @@ -911,6 +930,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "OpenAI GPT-5.2 Codex via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32768, + SupportedEndpoints: []string{"/responses"}, }, { ID: "claude-haiku-4.5", @@ -922,6 +942,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "Anthropic Claude Haiku 4.5 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 64000, + SupportedEndpoints: []string{"/chat/completions"}, }, { ID: "claude-opus-4.1", @@ -933,6 +954,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "Anthropic Claude Opus 4.1 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 32000, + SupportedEndpoints: []string{"/chat/completions"}, }, { ID: "claude-opus-4.5", @@ -944,6 +966,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "Anthropic Claude Opus 4.5 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 64000, + SupportedEndpoints: []string{"/chat/completions"}, }, { ID: "claude-sonnet-4", @@ -955,6 +978,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "Anthropic Claude Sonnet 4 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 64000, + SupportedEndpoints: []string{"/chat/completions"}, }, { ID: "claude-sonnet-4.5", @@ -966,6 +990,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Description: "Anthropic Claude Sonnet 4.5 via GitHub Copilot", ContextLength: 200000, MaxCompletionTokens: 64000, + SupportedEndpoints: []string{"/chat/completions"}, }, { ID: "gemini-2.5-pro", @@ -979,13 +1004,24 @@ func GetGitHubCopilotModels() []*ModelInfo { MaxCompletionTokens: 65536, }, { - ID: "gemini-3-pro", + ID: "gemini-3-pro-preview", Object: "model", Created: now, OwnedBy: "github-copilot", Type: "github-copilot", - DisplayName: "Gemini 3 Pro", - Description: "Google Gemini 3 Pro via GitHub Copilot", + DisplayName: "Gemini 3 Pro (Preview)", + Description: "Google Gemini 3 Pro Preview via GitHub Copilot", + ContextLength: 1048576, + MaxCompletionTokens: 65536, + }, + { + ID: "gemini-3-flash-preview", + Object: "model", + Created: now, + OwnedBy: "github-copilot", + Type: "github-copilot", + DisplayName: "Gemini 3 Flash (Preview)", + Description: "Google Gemini 3 Flash Preview via GitHub Copilot", ContextLength: 1048576, MaxCompletionTokens: 65536, }, @@ -1001,15 +1037,16 @@ func GetGitHubCopilotModels() []*ModelInfo { MaxCompletionTokens: 16384, }, { - ID: "raptor-mini", + ID: "oswe-vscode-prime", Object: "model", Created: now, OwnedBy: "github-copilot", Type: "github-copilot", - DisplayName: "Raptor Mini", - Description: "Raptor Mini via GitHub Copilot", + DisplayName: "Raptor mini (Preview)", + Description: "Raptor mini via GitHub Copilot", ContextLength: 128000, MaxCompletionTokens: 16384, + SupportedEndpoints: []string{"/chat/completions", "/responses"}, }, } } diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go index 537b03c2..13e2e699 100644 --- a/internal/registry/model_registry.go +++ b/internal/registry/model_registry.go @@ -47,6 +47,8 @@ type ModelInfo struct { MaxCompletionTokens int `json:"max_completion_tokens,omitempty"` // SupportedParameters lists supported parameters SupportedParameters []string `json:"supported_parameters,omitempty"` + // SupportedEndpoints lists supported API endpoints (e.g., "/chat/completions", "/responses"). + SupportedEndpoints []string `json:"supported_endpoints,omitempty"` // Thinking holds provider-specific reasoning/thinking budget capabilities. // This is optional and currently used for Gemini thinking budget normalization. @@ -456,6 +458,9 @@ func cloneModelInfo(model *ModelInfo) *ModelInfo { if len(model.SupportedParameters) > 0 { copyModel.SupportedParameters = append([]string(nil), model.SupportedParameters...) } + if len(model.SupportedEndpoints) > 0 { + copyModel.SupportedEndpoints = append([]string(nil), model.SupportedEndpoints...) + } return ©Model } @@ -968,6 +973,9 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string) if len(model.SupportedParameters) > 0 { result["supported_parameters"] = model.SupportedParameters } + if len(model.SupportedEndpoints) > 0 { + result["supported_endpoints"] = model.SupportedEndpoints + } return result case "claude", "kiro", "antigravity": diff --git a/internal/runtime/executor/github_copilot_executor.go b/internal/runtime/executor/github_copilot_executor.go index f29af146..74e3fa6c 100644 --- a/internal/runtime/executor/github_copilot_executor.go +++ b/internal/runtime/executor/github_copilot_executor.go @@ -23,6 +23,7 @@ import ( const ( githubCopilotBaseURL = "https://api.githubcopilot.com" githubCopilotChatPath = "/chat/completions" + githubCopilotResponsesPath = "/responses" githubCopilotAuthType = "github-copilot" githubCopilotTokenCacheTTL = 25 * time.Minute // tokenExpiryBuffer is the time before expiry when we should refresh the token. @@ -106,7 +107,11 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth. defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat + useResponses := useGitHubCopilotResponsesEndpoint(from) to := sdktranslator.FromString("openai") + if useResponses { + to = sdktranslator.FromString("openai-response") + } originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) @@ -117,7 +122,11 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth. body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) body, _ = sjson.SetBytes(body, "stream", false) - url := githubCopilotBaseURL + githubCopilotChatPath + path := githubCopilotChatPath + if useResponses { + path = githubCopilotResponsesPath + } + url := githubCopilotBaseURL + path httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) if err != nil { return resp, err @@ -172,6 +181,9 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth. appendAPIResponseChunk(ctx, e.cfg, data) detail := parseOpenAIUsage(data) + if useResponses && detail.TotalTokens == 0 { + detail = parseOpenAIResponsesUsage(data) + } if detail.TotalTokens > 0 { reporter.publish(ctx, detail) } @@ -194,7 +206,11 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat + useResponses := useGitHubCopilotResponsesEndpoint(from) to := sdktranslator.FromString("openai") + if useResponses { + to = sdktranslator.FromString("openai-response") + } originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) @@ -205,9 +221,15 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) body, _ = sjson.SetBytes(body, "stream", true) // Enable stream options for usage stats in stream - body, _ = sjson.SetBytes(body, "stream_options.include_usage", true) + if !useResponses { + body, _ = sjson.SetBytes(body, "stream_options.include_usage", true) + } - url := githubCopilotBaseURL + githubCopilotChatPath + path := githubCopilotChatPath + if useResponses { + path = githubCopilotResponsesPath + } + url := githubCopilotBaseURL + path httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) if err != nil { return nil, err @@ -283,6 +305,10 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox } if detail, ok := parseOpenAIStreamUsage(line); ok { reporter.publish(ctx, detail) + } else if useResponses { + if detail, ok := parseOpenAIResponsesStreamUsage(line); ok { + reporter.publish(ctx, detail) + } } } @@ -393,6 +419,10 @@ func (e *GitHubCopilotExecutor) normalizeModel(_ string, body []byte) []byte { return body } +func useGitHubCopilotResponsesEndpoint(sourceFormat sdktranslator.Format) bool { + return sourceFormat.String() == "openai-response" +} + // isHTTPSuccess checks if the status code indicates success (2xx). func isHTTPSuccess(statusCode int) bool { return statusCode >= 200 && statusCode < 300 diff --git a/internal/runtime/executor/usage_helpers.go b/internal/runtime/executor/usage_helpers.go index a3ce270c..7d8d345e 100644 --- a/internal/runtime/executor/usage_helpers.go +++ b/internal/runtime/executor/usage_helpers.go @@ -236,6 +236,54 @@ func parseOpenAIStreamUsage(line []byte) (usage.Detail, bool) { return detail, true } +func parseOpenAIResponsesUsage(data []byte) usage.Detail { + usageNode := gjson.ParseBytes(data).Get("usage") + if !usageNode.Exists() { + return usage.Detail{} + } + detail := usage.Detail{ + InputTokens: usageNode.Get("input_tokens").Int(), + OutputTokens: usageNode.Get("output_tokens").Int(), + TotalTokens: usageNode.Get("total_tokens").Int(), + } + if detail.TotalTokens == 0 { + detail.TotalTokens = detail.InputTokens + detail.OutputTokens + } + if cached := usageNode.Get("input_tokens_details.cached_tokens"); cached.Exists() { + detail.CachedTokens = cached.Int() + } + if reasoning := usageNode.Get("output_tokens_details.reasoning_tokens"); reasoning.Exists() { + detail.ReasoningTokens = reasoning.Int() + } + return detail +} + +func parseOpenAIResponsesStreamUsage(line []byte) (usage.Detail, bool) { + payload := jsonPayload(line) + if len(payload) == 0 || !gjson.ValidBytes(payload) { + return usage.Detail{}, false + } + usageNode := gjson.GetBytes(payload, "usage") + if !usageNode.Exists() { + return usage.Detail{}, false + } + detail := usage.Detail{ + InputTokens: usageNode.Get("input_tokens").Int(), + OutputTokens: usageNode.Get("output_tokens").Int(), + TotalTokens: usageNode.Get("total_tokens").Int(), + } + if detail.TotalTokens == 0 { + detail.TotalTokens = detail.InputTokens + detail.OutputTokens + } + if cached := usageNode.Get("input_tokens_details.cached_tokens"); cached.Exists() { + detail.CachedTokens = cached.Int() + } + if reasoning := usageNode.Get("output_tokens_details.reasoning_tokens"); reasoning.Exists() { + detail.ReasoningTokens = reasoning.Int() + } + return detail, true +} + func parseClaudeUsage(data []byte) usage.Detail { usageNode := gjson.ParseBytes(data).Get("usage") if !usageNode.Exists() {