From 578c312660637e3a3329305295f0cd2204a228ad Mon Sep 17 00:00:00 2001 From: kunish Date: Wed, 8 Apr 2026 16:44:58 +0800 Subject: [PATCH] fix(copilot): lower static Claude context limits and expose them to Claude Code The Copilot API enforces per-account prompt token limits (128K individual, 168K business) that are lower than the total context window (200K). When the dynamic /models API fetch fails or returns no capabilities.limits, the static fallback of 200K exceeds the real enforced limit, causing intermittent "prompt token count exceeds the limit" errors. Two complementary fixes: 1. Lower static Copilot Claude model ContextLength from 200000 to 128000 (the conservative default matching defaultCopilotContextLength). Dynamic API limits override this when available. 2. Add context_length and max_completion_tokens to Claude-format model responses so Claude Code CLI can learn the actual Copilot limit instead of relying on its built-in 1M context configuration. --- internal/registry/model_definitions.go | 21 ++++++++++++++------- internal/registry/model_registry.go | 10 ++++++++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index 48f64af1..f718e084 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -323,6 +323,13 @@ func LookupStaticModelInfo(modelID string) *ModelInfo { return nil } +// defaultCopilotClaudeContextLength is the conservative prompt token limit for +// Claude models accessed via the GitHub Copilot API. Individual accounts are +// capped at 128K; business accounts at 168K. When the dynamic /models API fetch +// succeeds, the real per-account limit overrides this value. This constant is +// only used as a safe fallback. +const defaultCopilotClaudeContextLength = 128000 + // GetGitHubCopilotModels returns the available models for GitHub Copilot. // These models are available through the GitHub Copilot API at api.githubcopilot.com. func GetGitHubCopilotModels() []*ModelInfo { @@ -534,7 +541,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Type: "github-copilot", DisplayName: "Claude Haiku 4.5", Description: "Anthropic Claude Haiku 4.5 via GitHub Copilot", - ContextLength: 200000, + ContextLength: defaultCopilotClaudeContextLength, MaxCompletionTokens: 64000, SupportedEndpoints: []string{"/chat/completions"}, }, @@ -546,7 +553,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Type: "github-copilot", DisplayName: "Claude Opus 4.1", Description: "Anthropic Claude Opus 4.1 via GitHub Copilot", - ContextLength: 200000, + ContextLength: defaultCopilotClaudeContextLength, MaxCompletionTokens: 32000, SupportedEndpoints: []string{"/chat/completions"}, }, @@ -558,7 +565,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Type: "github-copilot", DisplayName: "Claude Opus 4.5", Description: "Anthropic Claude Opus 4.5 via GitHub Copilot", - ContextLength: 200000, + ContextLength: defaultCopilotClaudeContextLength, MaxCompletionTokens: 64000, SupportedEndpoints: []string{"/chat/completions"}, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, @@ -571,7 +578,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Type: "github-copilot", DisplayName: "Claude Opus 4.6", Description: "Anthropic Claude Opus 4.6 via GitHub Copilot", - ContextLength: 200000, + ContextLength: defaultCopilotClaudeContextLength, MaxCompletionTokens: 64000, SupportedEndpoints: []string{"/chat/completions"}, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, @@ -584,7 +591,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Type: "github-copilot", DisplayName: "Claude Sonnet 4", Description: "Anthropic Claude Sonnet 4 via GitHub Copilot", - ContextLength: 200000, + ContextLength: defaultCopilotClaudeContextLength, MaxCompletionTokens: 64000, SupportedEndpoints: []string{"/chat/completions"}, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, @@ -597,7 +604,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Type: "github-copilot", DisplayName: "Claude Sonnet 4.5", Description: "Anthropic Claude Sonnet 4.5 via GitHub Copilot", - ContextLength: 200000, + ContextLength: defaultCopilotClaudeContextLength, MaxCompletionTokens: 64000, SupportedEndpoints: []string{"/chat/completions"}, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, @@ -610,7 +617,7 @@ func GetGitHubCopilotModels() []*ModelInfo { Type: "github-copilot", DisplayName: "Claude Sonnet 4.6", Description: "Anthropic Claude Sonnet 4.6 via GitHub Copilot", - ContextLength: 200000, + ContextLength: defaultCopilotClaudeContextLength, MaxCompletionTokens: 64000, SupportedEndpoints: []string{"/chat/completions"}, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go index 9bfcd326..ebc02cf0 100644 --- a/internal/registry/model_registry.go +++ b/internal/registry/model_registry.go @@ -1177,6 +1177,16 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string) "dynamic_allowed": model.Thinking.DynamicAllowed, } } + // Include context limits so Claude Code can manage conversation + // context correctly, especially for Copilot-proxied models whose + // real prompt limit (128K-168K) is much lower than the 1M window + // that Claude Code may assume for Opus 4.6 with 1M context enabled. + if model.ContextLength > 0 { + result["context_length"] = model.ContextLength + } + if model.MaxCompletionTokens > 0 { + result["max_completion_tokens"] = model.MaxCompletionTokens + } return result case "gemini":