fix(copilot): lower static Claude context limits and expose them to Claude Code

The Copilot API enforces per-account prompt token limits (128K individual,
168K business) that are lower than the total context window (200K). When
the dynamic /models API fetch fails or returns no capabilities.limits,
the static fallback of 200K exceeds the real enforced limit, causing
intermittent "prompt token count exceeds the limit" errors.

Two complementary fixes:

1. Lower static Copilot Claude model ContextLength from 200000 to 128000
   (the conservative default matching defaultCopilotContextLength). Dynamic
   API limits override this when available.

2. Add context_length and max_completion_tokens to Claude-format model
   responses so Claude Code CLI can learn the actual Copilot limit instead
   of relying on its built-in 1M context configuration.
This commit is contained in:
kunish
2026-04-08 16:44:58 +08:00
parent 6bb9bf3132
commit 578c312660
2 changed files with 24 additions and 7 deletions

View File

@@ -323,6 +323,13 @@ func LookupStaticModelInfo(modelID string) *ModelInfo {
return nil
}
// defaultCopilotClaudeContextLength is the conservative prompt token limit for
// Claude models accessed via the GitHub Copilot API. Individual accounts are
// capped at 128K; business accounts at 168K. When the dynamic /models API fetch
// succeeds, the real per-account limit overrides this value. This constant is
// only used as a safe fallback.
const defaultCopilotClaudeContextLength = 128000
// GetGitHubCopilotModels returns the available models for GitHub Copilot.
// These models are available through the GitHub Copilot API at api.githubcopilot.com.
func GetGitHubCopilotModels() []*ModelInfo {
@@ -534,7 +541,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
Type: "github-copilot",
DisplayName: "Claude Haiku 4.5",
Description: "Anthropic Claude Haiku 4.5 via GitHub Copilot",
ContextLength: 200000,
ContextLength: defaultCopilotClaudeContextLength,
MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"},
},
@@ -546,7 +553,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
Type: "github-copilot",
DisplayName: "Claude Opus 4.1",
Description: "Anthropic Claude Opus 4.1 via GitHub Copilot",
ContextLength: 200000,
ContextLength: defaultCopilotClaudeContextLength,
MaxCompletionTokens: 32000,
SupportedEndpoints: []string{"/chat/completions"},
},
@@ -558,7 +565,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
Type: "github-copilot",
DisplayName: "Claude Opus 4.5",
Description: "Anthropic Claude Opus 4.5 via GitHub Copilot",
ContextLength: 200000,
ContextLength: defaultCopilotClaudeContextLength,
MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
@@ -571,7 +578,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
Type: "github-copilot",
DisplayName: "Claude Opus 4.6",
Description: "Anthropic Claude Opus 4.6 via GitHub Copilot",
ContextLength: 200000,
ContextLength: defaultCopilotClaudeContextLength,
MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
@@ -584,7 +591,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
Type: "github-copilot",
DisplayName: "Claude Sonnet 4",
Description: "Anthropic Claude Sonnet 4 via GitHub Copilot",
ContextLength: 200000,
ContextLength: defaultCopilotClaudeContextLength,
MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
@@ -597,7 +604,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
Type: "github-copilot",
DisplayName: "Claude Sonnet 4.5",
Description: "Anthropic Claude Sonnet 4.5 via GitHub Copilot",
ContextLength: 200000,
ContextLength: defaultCopilotClaudeContextLength,
MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
@@ -610,7 +617,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
Type: "github-copilot",
DisplayName: "Claude Sonnet 4.6",
Description: "Anthropic Claude Sonnet 4.6 via GitHub Copilot",
ContextLength: 200000,
ContextLength: defaultCopilotClaudeContextLength,
MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},

View File

@@ -1177,6 +1177,16 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string)
"dynamic_allowed": model.Thinking.DynamicAllowed,
}
}
// Include context limits so Claude Code can manage conversation
// context correctly, especially for Copilot-proxied models whose
// real prompt limit (128K-168K) is much lower than the 1M window
// that Claude Code may assume for Opus 4.6 with 1M context enabled.
if model.ContextLength > 0 {
result["context_length"] = model.ContextLength
}
if model.MaxCompletionTokens > 0 {
result["max_completion_tokens"] = model.MaxCompletionTokens
}
return result
case "gemini":