feat(i2v): enhance image-to-video documentation and single/dual frame support

- Update README.md to clarify single vs dual frame image support with automatic model_key selection - Add detailed explanation of automatic adaptation system for different image counts - Include clear distinction between single frame mode (1 image) and dual frame mode (2 images) with proper model selection fix(browser-captcha): increase page load timeout from 15s to 60s - Extend retry range from 15 to 60 attempts for page loading - Update debug logging to reflect new timeout values - Improve reliability of captcha service by allowing more time for page load completion fix(api): correct I2V API endpoint URL for single frame generation - Change URL from batchAsyncGenerateVideoStartAndEndImage to batchAsyncGenerateVideoStartImage for single frame scenarios fix(model-config): correct model_key for I2V single frame mode - Fix typo in model_key: remove duplicate 'fl_' in veo_3_1_i2v_s_fast_fl_landscape - Implement automatic model_key transformation for single frame mode by replacing '_fl_' with '_' in model keys - Add debug logging for model key transformation process
2026-06-02 21:02:36 +08:00 · 2026-01-08 11:45:24 +08:00
parent 9291512bd2
commit 91dcd8fd85
4 changed files with 17 additions and 9 deletions
--- a/README.md
+++ b/README.md
@@ -117,7 +117,11 @@ python main.py
 | `veo_2_0_t2v_landscape` | 文生视频 | 横屏 |

 #### 首尾帧模型 (I2V - Image to Video)
-📸 **支持1-2张图片：首尾帧**
+📸 **支持1-2张图片：1张作为首帧，2张作为首尾帧**
+
+> 💡 **自动适配**：系统会根据图片数量自动选择对应的 model_key
+> - **单帧模式**（1张图）：使用首帧生成视频
+> - **双帧模式**（2张图）：使用首帧+尾帧生成过渡视频

 | 模型名称 | 说明| 尺寸 |
 |---------|---------|--------|
--- a/src/services/browser_captcha_personal.py
+++ b/src/services/browser_captcha_personal.py
@@ -131,11 +131,11 @@ class BrowserCaptchaService:
        
        # 等待页面加载完成（带重试机制）
        page_loaded = False
-        for retry in range(15):
+        for retry in range(60):
            try:
                await asyncio.sleep(1)
                ready_state = await self.resident_tab.evaluate("document.readyState")
-                debug_logger.log_info(f"[BrowserCaptcha] 页面状态: {ready_state} (重试 {retry + 1}/15)")
+                debug_logger.log_info(f"[BrowserCaptcha] 页面状态: {ready_state} (重试 {retry + 1}/60)")
                if ready_state == "complete":
                    page_loaded = True
                    break
@@ -391,7 +391,7 @@ class BrowserCaptchaService:
            
            # 等待页面加载完成
            page_loaded = False
-            for retry in range(15):
+            for retry in range(60):
                try:
                    await asyncio.sleep(1)
                    ready_state = await tab.evaluate("document.readyState")
@@ -402,7 +402,7 @@ class BrowserCaptchaService:
                    debug_logger.log_warning(f"[BrowserCaptcha] 标签页连接丢失: {e}")
                    return None
                except Exception as e:
-                    debug_logger.log_warning(f"[BrowserCaptcha] 等待页面异常: {e}，重试 {retry + 1}/15...")
+                    debug_logger.log_warning(f"[BrowserCaptcha] 等待页面异常: {e}，重试 {retry + 1}/60...")
                    await asyncio.sleep(1)
            
            if not page_loaded:
--- a/src/services/flow_client.py
+++ b/src/services/flow_client.py
@@ -572,7 +572,7 @@ class FlowClient:
        Returns:
            同 generate_video_text
        """
-        url = f"{self.api_base_url}/video:batchAsyncGenerateVideoStartAndEndImage"
+        url = f"{self.api_base_url}/video:batchAsyncGenerateVideoStartImage"

        # 获取 reCAPTCHA token
        recaptcha_token = await self._get_recaptcha_token(project_id) or ""
--- a/src/services/generation_handler.py
+++ b/src/services/generation_handler.py
@@ -118,7 +118,7 @@ MODEL_CONFIG = {
    "veo_3_1_i2v_s_fast_fl_landscape": {
        "type": "video",
        "video_type": "i2v",
-        "model_key": "veo_3_1_i2v_s_fast_landscape_fl_ultra_relaxed",
+        "model_key": "veo_3_1_i2v_s_fast_fl_ultra_relaxed",
        "aspect_ratio": "VIDEO_ASPECT_RATIO_LANDSCAPE",
        "supports_images": True,
        "min_images": 1,
@@ -594,12 +594,16 @@ class GenerationHandler:
                        user_paygate_tier=token.user_paygate_tier or "PAYGATE_TIER_ONE"
                    )
                else:
-                    # 只有首帧
+                    # 只有首帧 - 需要将 model_key 中的 _fl_ 替换为 _
+                    # 例如: veo_3_1_i2v_s_fast_fl_ultra_relaxed -> veo_3_1_i2v_s_fast_ultra_relaxed
+                    #       veo_3_1_i2v_s_fast_portrait_fl_ultra_relaxed -> veo_3_1_i2v_s_fast_portrait_ultra_relaxed
+                    actual_model_key = model_config["model_key"].replace("_fl_", "_")
+                    debug_logger.log_info(f"[I2V] 单帧模式，model_key: {model_config['model_key']} -> {actual_model_key}")
                    result = await self.flow_client.generate_video_start_image(
                        at=token.at,
                        project_id=project_id,
                        prompt=prompt,
-                        model_key=model_config["model_key"],
+                        model_key=actual_model_key,
                        aspect_ratio=model_config["aspect_ratio"],
                        start_media_id=start_media_id,
                        user_paygate_tier=token.user_paygate_tier or "PAYGATE_TIER_ONE"