feat(api, xai): add xAI Grok video model support with API integration

- Introduced new xAI `grok-imagine-video` model for video generation with configurable options (e.g., duration, size, resolution).
- Implemented video-specific API endpoints (`/v1/videos`, `/v1/videos/generations`, `/v1/videos/edits`, `/v1/videos/extensions`), including request validation and model handling.
- Enhanced model registry with `xaiBuiltinVideoModelID` and metadata for video capabilities.
- Added unit tests to validate video model support, request structures, and API response handling.
- Extended `XAIExecutor` to integrate video generation and retrieval via runtime requests.
This commit is contained in:
Luis Pater
2026-05-17 02:53:50 +08:00
parent 2ff9e33e26
commit 53d1fd6c5c
9 changed files with 1130 additions and 2 deletions

View File

@@ -7,6 +7,7 @@ import (
"fmt"
"io"
"net/http"
"net/url"
"sort"
"strings"
"time"
@@ -29,9 +30,15 @@ var xaiDataTag = []byte("data:")
const (
xaiImageHandlerType = "openai-image"
xaiVideoHandlerType = "openai-video"
xaiImagesGenerationsPath = "/images/generations"
xaiImagesEditsPath = "/images/edits"
xaiDefaultImageEndpointPath = xaiImagesGenerationsPath
xaiVideosGenerationsPath = "/videos/generations"
xaiVideosEditsPath = "/videos/edits"
xaiVideosExtensionsPath = "/videos/extensions"
xaiVideosPath = "/videos"
xaiIdempotencyKeyMetaKey = "idempotency_key"
)
// XAIExecutor is a stateless executor for xAI Grok's Responses API.
@@ -86,6 +93,9 @@ func (e *XAIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
if endpointPath := xaiImageEndpointPath(opts); endpointPath != "" {
return e.executeImages(ctx, auth, req, endpointPath)
}
if xaiIsVideoRequest(opts) {
return e.executeVideos(ctx, auth, req, opts)
}
token, baseURL := xaiCreds(auth)
if baseURL == "" {
@@ -207,6 +217,71 @@ func (e *XAIExecutor) executeImages(ctx context.Context, auth *cliproxyauth.Auth
return cliproxyexecutor.Response{Payload: data, Headers: httpResp.Header.Clone()}, nil
}
func (e *XAIExecutor) executeVideos(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
token, baseURL := xaiCreds(auth)
if baseURL == "" {
baseURL = xaiauth.DefaultAPIBaseURL
}
method := http.MethodPost
endpointPath := xaiVideosGenerationsPath
var body io.Reader = bytes.NewReader(req.Payload)
switch path := xaiVideoEndpointPath(opts); path {
case xaiVideosGenerationsPath, xaiVideosEditsPath, xaiVideosExtensionsPath:
endpointPath = path
default:
if requestID := strings.TrimSpace(gjson.GetBytes(req.Payload, "request_id").String()); requestID != "" {
method = http.MethodGet
endpointPath = xaiVideosPath + "/" + url.PathEscape(requestID)
body = nil
}
}
requestURL := strings.TrimSuffix(baseURL, "/") + endpointPath
httpReq, err := http.NewRequestWithContext(ctx, method, requestURL, body)
if err != nil {
return resp, err
}
applyXAIHeaders(httpReq, auth, token, false, "")
if method == http.MethodPost {
key := xaiMetadataString(opts.Metadata, xaiIdempotencyKeyMetaKey)
if key == "" && opts.Headers != nil {
key = strings.TrimSpace(opts.Headers.Get("x-idempotency-key"))
}
if key != "" {
httpReq.Header.Set("x-idempotency-key", key)
}
}
e.recordXAIRequest(ctx, auth, requestURL, httpReq.Header.Clone(), req.Payload)
httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
httpResp, err := httpClient.Do(httpReq)
if err != nil {
helps.RecordAPIResponseError(ctx, e.cfg, err)
return resp, err
}
defer func() {
if errClose := httpResp.Body.Close(); errClose != nil {
log.Errorf("xai executor: close response body error: %v", errClose)
}
}()
helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
data, err := io.ReadAll(httpResp.Body)
if err != nil {
helps.RecordAPIResponseError(ctx, e.cfg, err)
return resp, err
}
helps.AppendAPIResponseChunk(ctx, e.cfg, data)
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
return resp, statusErr{code: httpResp.StatusCode, msg: string(data)}
}
return cliproxyexecutor.Response{Payload: data, Headers: httpResp.Header.Clone()}, nil
}
func (e *XAIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (_ *cliproxyexecutor.StreamResult, err error) {
token, baseURL := xaiCreds(auth)
if baseURL == "" {
@@ -525,6 +600,27 @@ func xaiImageEndpointPath(opts cliproxyexecutor.Options) string {
return xaiDefaultImageEndpointPath
}
func xaiIsVideoRequest(opts cliproxyexecutor.Options) bool {
return opts.SourceFormat.String() == xaiVideoHandlerType
}
func xaiVideoEndpointPath(opts cliproxyexecutor.Options) string {
if !xaiIsVideoRequest(opts) {
return ""
}
path := xaiMetadataString(opts.Metadata, cliproxyexecutor.RequestPathMetadataKey)
if strings.HasSuffix(path, "/videos/edits") {
return xaiVideosEditsPath
}
if strings.HasSuffix(path, "/videos/extensions") {
return xaiVideosExtensionsPath
}
if strings.HasSuffix(path, "/videos/generations") {
return xaiVideosGenerationsPath
}
return ""
}
func xaiMetadataString(meta map[string]any, key string) string {
if len(meta) == 0 || key == "" {
return ""

View File

@@ -229,3 +229,168 @@ func TestXAIExecutorExecuteImagesUsesEditsEndpoint(t *testing.T) {
t.Fatalf("path = %q, want /images/edits", gotPath)
}
}
func TestXAIExecutorExecuteVideosCreate(t *testing.T) {
var gotPath string
var gotMethod string
var gotAuth string
var gotIdempotencyKey string
var gotBody []byte
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
gotPath = r.URL.Path
gotMethod = r.Method
gotAuth = r.Header.Get("Authorization")
gotIdempotencyKey = r.Header.Get("x-idempotency-key")
var errRead error
gotBody, errRead = io.ReadAll(r.Body)
if errRead != nil {
t.Fatalf("read body: %v", errRead)
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"request_id":"vid_123"}`))
}))
defer server.Close()
exec := NewXAIExecutor(&config.Config{})
auth := &cliproxyauth.Auth{
Provider: "xai",
Attributes: map[string]string{"base_url": server.URL},
Metadata: map[string]any{"access_token": "xai-token"},
}
resp, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{
Model: "grok-imagine-video",
Payload: []byte(`{"model":"grok-imagine-video","prompt":"animate","duration":4}`),
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("openai-video"),
Metadata: map[string]any{
"idempotency_key": "idem-123",
},
})
if err != nil {
t.Fatalf("Execute() error = %v", err)
}
if gotMethod != http.MethodPost {
t.Fatalf("method = %q, want POST", gotMethod)
}
if gotPath != "/videos/generations" {
t.Fatalf("path = %q, want /videos/generations", gotPath)
}
if gotAuth != "Bearer xai-token" {
t.Fatalf("Authorization = %q, want Bearer xai-token", gotAuth)
}
if gotIdempotencyKey != "idem-123" {
t.Fatalf("x-idempotency-key = %q, want idem-123", gotIdempotencyKey)
}
if string(gotBody) != `{"model":"grok-imagine-video","prompt":"animate","duration":4}` {
t.Fatalf("body = %s", string(gotBody))
}
if gjson.GetBytes(resp.Payload, "request_id").String() != "vid_123" {
t.Fatalf("payload = %s", string(resp.Payload))
}
}
func TestXAIExecutorExecuteVideosRetrieve(t *testing.T) {
var gotPath string
var gotMethod string
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
gotPath = r.URL.Path
gotMethod = r.Method
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"status":"done","video":{"url":"https://vidgen.x.ai/video.mp4","duration":6},"model":"grok-imagine-video","progress":100}`))
}))
defer server.Close()
exec := NewXAIExecutor(&config.Config{})
auth := &cliproxyauth.Auth{
Provider: "xai",
Attributes: map[string]string{"base_url": server.URL},
Metadata: map[string]any{"access_token": "xai-token"},
}
resp, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{
Model: "grok-imagine-video",
Payload: []byte(`{"request_id":"vid_123"}`),
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("openai-video"),
})
if err != nil {
t.Fatalf("Execute() error = %v", err)
}
if gotMethod != http.MethodGet {
t.Fatalf("method = %q, want GET", gotMethod)
}
if gotPath != "/videos/vid_123" {
t.Fatalf("path = %q, want /videos/vid_123", gotPath)
}
if gjson.GetBytes(resp.Payload, "video.url").String() != "https://vidgen.x.ai/video.mp4" {
t.Fatalf("payload = %s", string(resp.Payload))
}
}
func TestXAIExecutorExecuteVideosUsesNativeEndpointFromRequestPath(t *testing.T) {
tests := []struct {
name string
requestPath string
wantPath string
}{
{
name: "generations",
requestPath: "/v1/videos/generations",
wantPath: "/videos/generations",
},
{
name: "edits",
requestPath: "/v1/videos/edits",
wantPath: "/videos/edits",
},
{
name: "extensions",
requestPath: "/v1/videos/extensions",
wantPath: "/videos/extensions",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var gotPath string
var gotMethod string
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
gotPath = r.URL.Path
gotMethod = r.Method
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"request_id":"vid_123"}`))
}))
defer server.Close()
exec := NewXAIExecutor(&config.Config{})
auth := &cliproxyauth.Auth{
Provider: "xai",
Attributes: map[string]string{"base_url": server.URL},
Metadata: map[string]any{"access_token": "xai-token"},
}
_, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{
Model: "grok-imagine-video",
Payload: []byte(`{"model":"grok-imagine-video","prompt":"animate"}`),
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("openai-video"),
Metadata: map[string]any{
cliproxyexecutor.RequestPathMetadataKey: tt.requestPath,
},
})
if err != nil {
t.Fatalf("Execute() error = %v", err)
}
if gotMethod != http.MethodPost {
t.Fatalf("method = %q, want POST", gotMethod)
}
if gotPath != tt.wantPath {
t.Fatalf("path = %q, want %s", gotPath, tt.wantPath)
}
})
}
}