feat(videos): add video authentication binding and update handler behavior

- Introduced `videoAuthBindingStore` for managing mappings of video IDs to credentials with TTL support.
- Updated video creation and retrieval handlers to bind and utilize credentials for authentication.
- Enhanced response models to include upstream models and adjusted request preparation logic.
- Added test coverage for video auth binding, TTL configuration, and expiration handling.
This commit is contained in:
Luis Pater
2026-06-15 13:38:40 +08:00
parent f85768eef3
commit bbef8da454
4 changed files with 369 additions and 26 deletions

View File

@@ -129,6 +129,10 @@ disable-image-generation: false
# Must start with "gpt-" (case-insensitive). If unset or invalid, defaults to "gpt-5.4-mini".
# gpt-image-2-base-model: "gpt-5.4-mini"
# How long video IDs returned by /openai/v1/videos and xAI video creation stay bound
# to the credential that created them. Default: 3h.
video-result-auth-cache-ttl: "3h"
# Core auth auto-refresh worker pool size (OAuth/file-based auth token refresh).
# When > 0, overrides the default worker count (16).
# auth-auto-refresh-workers: 16

View File

@@ -28,6 +28,11 @@ type SDKConfig struct {
// default base model ("gpt-5.4-mini") is used.
GPTImage2BaseModel string `yaml:"gpt-image-2-base-model,omitempty" json:"gpt-image-2-base-model,omitempty"`
// VideoResultAuthCacheTTL controls how long video IDs stay pinned to the credential
// that created them. Accepts duration strings like "30m" or "3h".
// Empty or invalid values use the default 3h.
VideoResultAuthCacheTTL string `yaml:"video-result-auth-cache-ttl,omitempty" json:"video-result-auth-cache-ttl,omitempty"`
// EnableGeminiCLIEndpoint controls whether Gemini CLI internal endpoints (/v1internal:*) are enabled.
// Default is false for safety; when false, /v1internal:* requests are rejected.
EnableGeminiCLIEndpoint bool `yaml:"enable-gemini-cli-endpoint" json:"enable-gemini-cli-endpoint"`

View File

@@ -9,6 +9,7 @@ import (
"net/url"
"strconv"
"strings"
"sync"
"time"
"github.com/gin-gonic/gin"
@@ -36,12 +37,89 @@ const (
maxXAIVideoReferences = 7
)
const defaultVideoAuthBindingTTL = 3 * time.Hour
var videoAuthBindings = newVideoAuthBindingStore()
type xaiVideoCreateMetadata struct {
Model string
Prompt string
Seconds string
Size string
CreatedAt int64
Model string
UpstreamModel string
Prompt string
Seconds string
Size string
CreatedAt int64
}
type videoAuthBinding struct {
authID string
expiresAt time.Time
}
type videoAuthBindingStore struct {
mu sync.RWMutex
entries map[string]videoAuthBinding
}
func newVideoAuthBindingStore() *videoAuthBindingStore {
return &videoAuthBindingStore{
entries: make(map[string]videoAuthBinding),
}
}
func (s *videoAuthBindingStore) set(videoID string, authID string, ttl time.Duration) {
if s == nil {
return
}
videoID = strings.TrimSpace(videoID)
authID = strings.TrimSpace(authID)
if videoID == "" || authID == "" {
return
}
if ttl <= 0 {
ttl = defaultVideoAuthBindingTTL
}
now := time.Now()
s.mu.Lock()
s.cleanupExpiredLocked(now)
s.entries[videoID] = videoAuthBinding{
authID: authID,
expiresAt: now.Add(ttl),
}
s.mu.Unlock()
}
func (s *videoAuthBindingStore) get(videoID string) (string, bool) {
if s == nil {
return "", false
}
videoID = strings.TrimSpace(videoID)
if videoID == "" {
return "", false
}
now := time.Now()
s.mu.RLock()
entry, ok := s.entries[videoID]
s.mu.RUnlock()
if !ok {
return "", false
}
if now.After(entry.expiresAt) {
s.mu.Lock()
if current, exists := s.entries[videoID]; exists && now.After(current.expiresAt) {
delete(s.entries, videoID)
}
s.mu.Unlock()
return "", false
}
return entry.authID, true
}
func (s *videoAuthBindingStore) cleanupExpiredLocked(now time.Time) {
for videoID, entry := range s.entries {
if now.After(entry.expiresAt) {
delete(s.entries, videoID)
}
}
}
func videosModelBase(model string) string {
@@ -111,11 +189,6 @@ func canonicalXAIVideosModel(model string) string {
}
func responseVideosModel(model string) string {
_, baseModel := imagesModelParts(model)
baseModel = strings.TrimSpace(baseModel)
if isSoraVideosModel(baseModel) {
return baseModel
}
return canonicalXAIVideosModel(model)
}
@@ -179,6 +252,41 @@ func firstPostForm(c *gin.Context, keys ...string) string {
return ""
}
func (h *OpenAIAPIHandler) videoAuthBindingTTL() time.Duration {
if h != nil && h.BaseAPIHandler != nil && h.Cfg != nil {
raw := strings.TrimSpace(h.Cfg.VideoResultAuthCacheTTL)
if raw != "" {
if ttl, err := time.ParseDuration(raw); err == nil && ttl > 0 {
return ttl
}
}
}
return defaultVideoAuthBindingTTL
}
func videoIDFromPayload(payload []byte) string {
videoID := strings.TrimSpace(gjson.GetBytes(payload, "request_id").String())
if videoID == "" {
videoID = strings.TrimSpace(gjson.GetBytes(payload, "id").String())
}
return videoID
}
func (h *OpenAIAPIHandler) bindVideoAuthIDFromPayload(payload []byte, authID string) {
videoID := videoIDFromPayload(payload)
if videoID == "" {
return
}
videoAuthBindings.set(videoID, authID, h.videoAuthBindingTTL())
}
func (h *OpenAIAPIHandler) contextWithVideoAuthBinding(ctx context.Context, videoID string) context.Context {
if authID, ok := videoAuthBindings.get(videoID); ok {
return handlers.WithPinnedAuthID(ctx, authID)
}
return ctx
}
func buildXAIVideosCreateRequest(rawJSON []byte, model string) ([]byte, xaiVideoCreateMetadata, error) {
prompt := strings.TrimSpace(gjson.GetBytes(rawJSON, "prompt").String())
if prompt == "" {
@@ -232,11 +340,12 @@ func buildXAIVideosCreateRequest(rawJSON []byte, model string) ([]byte, xaiVideo
}
meta := xaiVideoCreateMetadata{
Model: responseVideosModel(model),
Prompt: prompt,
Seconds: seconds,
Size: size,
CreatedAt: time.Now().Unix(),
Model: responseVideosModel(model),
UpstreamModel: videoModel,
Prompt: prompt,
Seconds: seconds,
Size: size,
CreatedAt: time.Now().Unix(),
}
return req, meta, nil
}
@@ -398,7 +507,7 @@ func buildVideosCreateAPIResponseFromXAI(payload []byte, meta xaiVideoCreateMeta
func buildVideosFailedAPIResponse(model string, code string, message string) []byte {
model = strings.TrimSpace(model)
if model == "" {
model = defaultOpenAIVideosModel
model = defaultXAIVideosModel
}
code = strings.TrimSpace(code)
if code == "" {
@@ -533,7 +642,7 @@ func openAIVideoStatus(status string) string {
func (h *OpenAIAPIHandler) VideosCreate(c *gin.Context) {
rawJSON, err := readVideosCreateRequest(c)
if err != nil {
writeVideosFailedError(c, http.StatusBadRequest, defaultOpenAIVideosModel, "invalid_request_error", fmt.Sprintf("Invalid request: %v", err))
writeVideosFailedError(c, http.StatusBadRequest, defaultXAIVideosModel, "invalid_request_error", fmt.Sprintf("Invalid request: %v", err))
return
}
@@ -547,7 +656,7 @@ func (h *OpenAIAPIHandler) VideosCreate(c *gin.Context) {
xaiReq, meta, err := buildXAIVideosCreateRequest(rawJSON, videoModel)
if err != nil {
writeVideosFailedError(c, http.StatusBadRequest, videoModel, "invalid_request_error", fmt.Sprintf("Invalid request: %v", err))
writeVideosFailedError(c, http.StatusBadRequest, responseVideosModel(videoModel), "invalid_request_error", fmt.Sprintf("Invalid request: %v", err))
return
}
@@ -586,7 +695,7 @@ func (h *OpenAIAPIHandler) handleXAIVideosNativePost(c *gin.Context) {
return
}
h.collectXAIVideosNative(c, rawJSON, videoModel)
h.collectXAIVideosNative(c, rawJSON, videoModel, true)
}
func (h *OpenAIAPIHandler) XAIVideosRetrieve(c *gin.Context) {
@@ -606,7 +715,7 @@ func (h *OpenAIAPIHandler) XAIVideosRetrieve(c *gin.Context) {
payload := []byte(`{}`)
payload, _ = sjson.SetBytes(payload, "request_id", requestID)
h.collectXAIVideosNative(c, payload, defaultXAIVideosModel)
h.collectXAIVideosNative(c, payload, defaultXAIVideosModel, false)
}
func (h *OpenAIAPIHandler) VideosRetrieve(c *gin.Context) {
@@ -626,6 +735,7 @@ func (h *OpenAIAPIHandler) VideosRetrieve(c *gin.Context) {
c.Header("Content-Type", "application/json")
cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
cliCtx = h.contextWithVideoAuthBinding(cliCtx, videoID)
stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)
resp, upstreamHeaders, errMsg := h.ExecuteWithAuthManager(cliCtx, xaiVideosHandlerType, defaultXAIVideosModel, payload, "")
stopKeepAlive()
@@ -682,6 +792,7 @@ func (h *OpenAIAPIHandler) VideosContent(c *gin.Context) {
payload, _ = sjson.SetBytes(payload, "request_id", videoID)
cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
cliCtx = h.contextWithVideoAuthBinding(cliCtx, videoID)
stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)
resp, _, errMsg := h.ExecuteWithAuthManager(cliCtx, xaiVideosHandlerType, defaultXAIVideosModel, payload, "")
stopKeepAlive()
@@ -758,10 +869,18 @@ func copyVideoContentHeaders(dst http.Header, src http.Header) {
}
}
func (h *OpenAIAPIHandler) collectXAIVideosNative(c *gin.Context, rawJSON []byte, model string) {
func (h *OpenAIAPIHandler) collectXAIVideosNative(c *gin.Context, rawJSON []byte, model string, bindCreatedVideoAuth bool) {
c.Header("Content-Type", "application/json")
cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
selectedAuthID := ""
if bindCreatedVideoAuth {
cliCtx = handlers.WithSelectedAuthIDCallback(cliCtx, func(authID string) {
selectedAuthID = authID
})
} else {
cliCtx = h.contextWithVideoAuthBinding(cliCtx, videoIDFromPayload(rawJSON))
}
stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)
resp, upstreamHeaders, errMsg := h.ExecuteWithAuthManager(cliCtx, xaiVideosHandlerType, model, rawJSON, "")
stopKeepAlive()
@@ -775,6 +894,9 @@ func (h *OpenAIAPIHandler) collectXAIVideosNative(c *gin.Context, rawJSON []byte
return
}
if bindCreatedVideoAuth {
h.bindVideoAuthIDFromPayload(resp, selectedAuthID)
}
handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
_, _ = c.Writer.Write(resp)
cliCancel(nil)
@@ -784,8 +906,16 @@ func (h *OpenAIAPIHandler) collectXAIVideosCreate(c *gin.Context, xaiReq []byte,
c.Header("Content-Type", "application/json")
cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
selectedAuthID := ""
cliCtx = handlers.WithSelectedAuthIDCallback(cliCtx, func(authID string) {
selectedAuthID = authID
})
upstreamModel := strings.TrimSpace(meta.UpstreamModel)
if upstreamModel == "" {
upstreamModel = meta.Model
}
stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)
resp, upstreamHeaders, errMsg := h.ExecuteWithAuthManager(cliCtx, xaiVideosHandlerType, meta.Model, xaiReq, "")
resp, upstreamHeaders, errMsg := h.ExecuteWithAuthManager(cliCtx, xaiVideosHandlerType, upstreamModel, xaiReq, "")
stopKeepAlive()
if errMsg != nil {
h.WriteErrorResponse(c, errMsg)
@@ -805,6 +935,7 @@ func (h *OpenAIAPIHandler) collectXAIVideosCreate(c *gin.Context, xaiReq []byte,
return
}
h.bindVideoAuthIDFromPayload(out, selectedAuthID)
handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
_, _ = c.Writer.Write(out)
cliCancel(nil)

View File

@@ -1,13 +1,21 @@
package openai
import (
"context"
"io"
"net/http"
"net/http/httptest"
"strings"
"sync"
"testing"
"time"
"github.com/gin-gonic/gin"
"github.com/router-for-me/CLIProxyAPI/v7/internal/registry"
apihandlers "github.com/router-for-me/CLIProxyAPI/v7/sdk/api/handlers"
coreauth "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/auth"
coreexecutor "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/executor"
sdkconfig "github.com/router-for-me/CLIProxyAPI/v7/sdk/config"
"github.com/tidwall/gjson"
)
@@ -32,6 +40,113 @@ func performVideosEndpointRequest(t *testing.T, method string, endpointPath stri
return resp
}
func performVideosRouteRequest(t *testing.T, method string, routePath string, requestPath string, contentType string, body io.Reader, handler gin.HandlerFunc) *httptest.ResponseRecorder {
t.Helper()
gin.SetMode(gin.TestMode)
router := gin.New()
switch method {
case http.MethodGet:
router.GET(routePath, handler)
default:
router.POST(routePath, handler)
}
req := httptest.NewRequest(method, requestPath, body)
if contentType != "" {
req.Header.Set("Content-Type", contentType)
}
resp := httptest.NewRecorder()
router.ServeHTTP(resp, req)
return resp
}
type videoAuthCaptureExecutor struct {
mu sync.Mutex
requestID string
authIDs []string
}
func (e *videoAuthCaptureExecutor) Identifier() string { return "xai" }
func (e *videoAuthCaptureExecutor) Execute(_ context.Context, auth *coreauth.Auth, req coreexecutor.Request, _ coreexecutor.Options) (coreexecutor.Response, error) {
authID := ""
if auth != nil {
authID = auth.ID
}
e.mu.Lock()
e.authIDs = append(e.authIDs, authID)
e.mu.Unlock()
requestID := strings.TrimSpace(gjson.GetBytes(req.Payload, "request_id").String())
if requestID == "" {
requestID = e.requestID
}
payload := []byte(`{"request_id":"` + requestID + `","status":"completed","progress":100,"video":{"url":"https://vidgen.x.ai/video.mp4","duration":4}}`)
return coreexecutor.Response{Payload: payload}, nil
}
func (e *videoAuthCaptureExecutor) ExecuteStream(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (*coreexecutor.StreamResult, error) {
return nil, &coreauth.Error{Code: "not_implemented", Message: "ExecuteStream not implemented"}
}
func (e *videoAuthCaptureExecutor) Refresh(_ context.Context, auth *coreauth.Auth) (*coreauth.Auth, error) {
return auth, nil
}
func (e *videoAuthCaptureExecutor) CountTokens(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (coreexecutor.Response, error) {
return coreexecutor.Response{}, &coreauth.Error{Code: "not_implemented", Message: "CountTokens not implemented"}
}
func (e *videoAuthCaptureExecutor) HttpRequest(context.Context, *coreauth.Auth, *http.Request) (*http.Response, error) {
return nil, &coreauth.Error{Code: "not_implemented", Message: "HttpRequest not implemented"}
}
func (e *videoAuthCaptureExecutor) AuthIDs() []string {
e.mu.Lock()
defer e.mu.Unlock()
out := make([]string, len(e.authIDs))
copy(out, e.authIDs)
return out
}
func resetVideoAuthBindingsForTest(t *testing.T) {
t.Helper()
previous := videoAuthBindings
videoAuthBindings = newVideoAuthBindingStore()
t.Cleanup(func() {
videoAuthBindings = previous
})
}
func newVideoAuthBindingTestHandler(t *testing.T, executor *videoAuthCaptureExecutor) *OpenAIAPIHandler {
t.Helper()
manager := coreauth.NewManager(nil, &coreauth.RoundRobinSelector{}, nil)
manager.RegisterExecutor(executor)
authIDs := []string{executor.requestID + "-auth-a", executor.requestID + "-auth-b"}
for _, authID := range authIDs {
auth := &coreauth.Auth{
ID: authID,
Provider: "xai",
Status: coreauth.StatusActive,
}
if _, errRegister := manager.Register(context.Background(), auth); errRegister != nil {
t.Fatalf("manager.Register(%s): %v", authID, errRegister)
}
registry.GetGlobalRegistry().RegisterClient(authID, auth.Provider, []*registry.ModelInfo{{ID: defaultXAIVideosModel}})
}
t.Cleanup(func() {
for _, authID := range authIDs {
registry.GetGlobalRegistry().UnregisterClient(authID)
}
})
base := apihandlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, manager)
return NewOpenAIAPIHandler(base)
}
func TestVideosModelValidationAllowsXAIVideoModel(t *testing.T) {
for _, model := range []string{
"grok-imagine-video",
@@ -72,8 +187,8 @@ func TestBuildXAIVideosCreateRequestMapsSoraModelToXAIBackend(t *testing.T) {
if got := gjson.GetBytes(req, "model").String(); got != defaultXAIVideosModel {
t.Fatalf("upstream model = %q, want %s", got, defaultXAIVideosModel)
}
if meta.Model != "sora-2" {
t.Fatalf("response model = %q, want sora-2", meta.Model)
if meta.Model != defaultXAIVideosModel {
t.Fatalf("response model = %q, want %s", meta.Model, defaultXAIVideosModel)
}
}
@@ -343,8 +458,8 @@ func TestVideosCreateInvalidSizeReturnsFailedVideoResource(t *testing.T) {
if got := gjson.GetBytes(resp.Body.Bytes(), "object").String(); got != "video" {
t.Fatalf("object = %q, want video", got)
}
if got := gjson.GetBytes(resp.Body.Bytes(), "model").String(); got != "sora-2" {
t.Fatalf("model = %q, want sora-2", got)
if got := gjson.GetBytes(resp.Body.Bytes(), "model").String(); got != defaultXAIVideosModel {
t.Fatalf("model = %q, want %s", got, defaultXAIVideosModel)
}
if got := gjson.GetBytes(resp.Body.Bytes(), "status").String(); got != "failed" {
t.Fatalf("status = %q, want failed", got)
@@ -394,6 +509,94 @@ func TestXAIVideosNativeRejectsInvalidJSON(t *testing.T) {
}
}
func TestVideosCreateBindsRetrieveToSelectedAuth(t *testing.T) {
resetVideoAuthBindingsForTest(t)
executor := &videoAuthCaptureExecutor{requestID: "video-openai-bound"}
handler := newVideoAuthBindingTestHandler(t, executor)
createResp := performVideosEndpointRequest(t, http.MethodPost, openAIVideosPath, "application/json", strings.NewReader(`{"model":"sora-2","prompt":"make a video"}`), handler.VideosCreate)
if createResp.Code != http.StatusOK {
t.Fatalf("create status = %d, want %d: %s", createResp.Code, http.StatusOK, createResp.Body.String())
}
videoID := gjson.GetBytes(createResp.Body.Bytes(), "id").String()
if videoID != executor.requestID {
t.Fatalf("created video id = %q, want %q", videoID, executor.requestID)
}
if got := gjson.GetBytes(createResp.Body.Bytes(), "model").String(); got != defaultXAIVideosModel {
t.Fatalf("created model = %q, want %s", got, defaultXAIVideosModel)
}
retrieveResp := performVideosRouteRequest(t, http.MethodGet, openAIVideosPath+"/:video_id", openAIVideosPath+"/"+videoID, "", nil, handler.VideosRetrieve)
if retrieveResp.Code != http.StatusOK {
t.Fatalf("retrieve status = %d, want %d: %s", retrieveResp.Code, http.StatusOK, retrieveResp.Body.String())
}
authIDs := executor.AuthIDs()
if len(authIDs) != 2 {
t.Fatalf("authIDs = %v, want two calls", authIDs)
}
if authIDs[1] != authIDs[0] {
t.Fatalf("retrieve auth = %q, want create auth %q; sequence=%v", authIDs[1], authIDs[0], authIDs)
}
}
func TestXAIVideosNativeCreateBindsRetrieveToSelectedAuth(t *testing.T) {
resetVideoAuthBindingsForTest(t)
executor := &videoAuthCaptureExecutor{requestID: "video-xai-bound"}
handler := newVideoAuthBindingTestHandler(t, executor)
createResp := performVideosEndpointRequest(t, http.MethodPost, xaiVideosGenerationsAPI, "application/json", strings.NewReader(`{"model":"grok-imagine-video","prompt":"make a video"}`), handler.XAIVideosGenerations)
if createResp.Code != http.StatusOK {
t.Fatalf("create status = %d, want %d: %s", createResp.Code, http.StatusOK, createResp.Body.String())
}
videoID := gjson.GetBytes(createResp.Body.Bytes(), "request_id").String()
if videoID != executor.requestID {
t.Fatalf("created request_id = %q, want %q", videoID, executor.requestID)
}
retrieveResp := performVideosRouteRequest(t, http.MethodGet, videosPath+"/:request_id", videosPath+"/"+videoID, "", nil, handler.XAIVideosRetrieve)
if retrieveResp.Code != http.StatusOK {
t.Fatalf("retrieve status = %d, want %d: %s", retrieveResp.Code, http.StatusOK, retrieveResp.Body.String())
}
authIDs := executor.AuthIDs()
if len(authIDs) != 2 {
t.Fatalf("authIDs = %v, want two calls", authIDs)
}
if authIDs[1] != authIDs[0] {
t.Fatalf("retrieve auth = %q, want create auth %q; sequence=%v", authIDs[1], authIDs[0], authIDs)
}
}
func TestVideoAuthBindingTTLUsesConfig(t *testing.T) {
base := apihandlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{VideoResultAuthCacheTTL: "45m"}, nil)
handler := NewOpenAIAPIHandler(base)
if got := handler.videoAuthBindingTTL(); got != 45*time.Minute {
t.Fatalf("videoAuthBindingTTL() = %v, want 45m", got)
}
base = apihandlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{VideoResultAuthCacheTTL: "invalid"}, nil)
handler = NewOpenAIAPIHandler(base)
if got := handler.videoAuthBindingTTL(); got != defaultVideoAuthBindingTTL {
t.Fatalf("invalid videoAuthBindingTTL() = %v, want %v", got, defaultVideoAuthBindingTTL)
}
}
func TestVideoAuthBindingStoreExpiresEntries(t *testing.T) {
store := newVideoAuthBindingStore()
store.entries["video-expired"] = videoAuthBinding{
authID: "auth-expired",
expiresAt: time.Now().Add(-time.Second),
}
if authID, ok := store.get("video-expired"); ok {
t.Fatalf("expired binding returned authID=%q", authID)
}
if _, exists := store.entries["video-expired"]; exists {
t.Fatal("expired binding was not removed")
}
}
func TestVideosCreateFormRequest(t *testing.T) {
rawJSON, err := videosCreateRequestFromFormContext("model=grok-imagine-video&prompt=make+a+video&seconds=4&size=720x1280&input_reference%5Bimage_url%5D=https%3A%2F%2Fexample.com%2Fa.png")
if err != nil {