diff --git a/README.md b/README.md index a7e1392..c33059e 100644 --- a/README.md +++ b/README.md @@ -4,12 +4,18 @@ BILIVE -*7 x 24 小时无人监守录制、渲染弹幕、识别字幕、自动上传,启动项目,人人都是录播员。* +*7 x 24 小时无人监守录制、渲染弹幕、识别字幕、自动切片、自动上传,启动项目,人人都是录播员。* [:page_facing_up: Documentation](https://timerring.github.io/bilive/) | [:gear: Installation](#quick-start) | [:thinking: Reporting Issues](https://github.com/timerring/bilive/issues/new/choose) +支持模型 + + OpenAI whisper + Zhipu GLM-4V-PLUS + Google Gemini 1.5 Pro + ## 1. Introduction @@ -29,6 +35,7 @@ - **自动渲染弹幕**:自动转换xml为ass弹幕文件并且渲染到视频中形成**有弹幕版视频**并自动上传。 - **硬件要求极低**:无需GPU,只需最基础的单核CPU搭配最低的运存即可完成录制,弹幕渲染,上传等等全部过程,无最低配置要求,10年前的电脑或服务器依然可以使用! - **( :tada: NEW)自动渲染字幕**(如需使用本功能,则需保证有 Nvidia 显卡):采用 OpenAI 的开源模型 [`whisper`](https://github.com/openai/whisper),自动识别视频内语音并转换为字幕渲染至视频中。 +- **( :tada: NEW)自动切片上传**:根据弹幕密度计算寻找高能片段并切片,结合多模态视频理解大模型 [`GLM-4V-PLUS`](https://bigmodel.cn/dev/api/normal-model/glm-4) 自动生成有意思的切片标题及内容,并且自动上传。 项目架构流程如下: @@ -46,8 +53,13 @@ graph TD ifDanmaku -->|有弹幕| DanmakuFactory[DanmakuFactory] ifDanmaku -->|无弹幕| ffmpeg1[ffmpeg] DanmakuFactory[DanmakuFactory] --根据分辨率转换弹幕--> ffmpeg1[ffmpeg] + ffmpeg1[ffmpeg] --渲染弹幕及字幕 --> Video[视频文件] + Video[视频文件] --计算弹幕密度并切片--> GLM[多模态视频理解模型] + GLM[多模态视频理解模型] --生成切片信息--> slice[视频切片] end - ffmpeg1[ffmpeg] --渲染弹幕及字幕 --> uploadQueue[(上传队列)] + + slice[视频切片] --> uploadQueue[(上传队列)] + Video[视频文件] --> uploadQueue[(上传队列)] User((用户))--upload-->startUpload(启动视频上传进程) startUpload(启动视频上传进程) <--扫描队列并上传视频--> uploadQueue[(上传队列)] @@ -110,8 +122,9 @@ pip install -r requirements.txt ./setPath.sh && source ~/.bashrc ``` -#### 3. 配置 whisper 模型 +#### 3. 配置 whisper 模型及 GLM-4V-PLUS 模型 +##### 3.1 whisper 模型 项目默认采用 [`small`](https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt) 模型,请点击下载所需文件,并放置在 `src/subtitle/models` 文件夹中。 > [!TIP] @@ -119,6 +132,11 @@ pip install -r requirements.txt > + 更多模型请参考 [whisper 参数模型](https://timerring.github.io/bilive/models.html) 部分。 > + 更换模型方法请参考 [更换模型方法](https://timerring.github.io/bilive/models.html#更换模型方法) 部分。 +##### 3.2 GLM-4V-PLUS 模型 + +> 此功能默认关闭,如果需要打开请将 `src/config.py` 文件中的 `AUTO_SLICE` 参数设置为 `True` + +在配置文件 `src/config.py` 中,`SLICE_DURATION` 以秒为单位设置切片时长(不建议超过 1 分钟),在项目的自动切片功能需要使用到智谱的 [`GLM-4V-PLUS`](https://bigmodel.cn/dev/api/normal-model/glm-4) 模型,请自行[注册账号](https://www.bigmodel.cn/invite?icode=shBtZUfNE6FfdMH1R6NybGczbXFgPRGIalpycrEwJ28%3D)并申请 API Key,填写到 `src/config.py` 文件中对应的 `Your_API_KEY` 中。 #### 4. biliup-rs 登录 @@ -176,7 +194,7 @@ logs # 日志文件夹 ``` ### Installation(无 GPU 版本) -无 GPU 版本过程基本同上,可以跳过步骤 3,需要注意在执行步骤 5 **之前**完成以下设置将确保完全用 CPU 渲染视频弹幕。 +无 GPU 版本过程基本同上,可以跳过步骤 3 配置 whisper 的部分,需要注意在执行步骤 5 **之前**完成以下设置将确保完全用 CPU 渲染视频弹幕。 1. 请将 `src/config.py` 文件中的 `GPU_EXIST` 参数设置为 `False`。(若不置为 `False` 且则会使用 CPU 推理,不推荐,可自行根据硬件条件进行尝试。) 2. 将 `MODEL_TYPE` 调整为 `merge` 或者 `append`。 diff --git a/assets/gemini-brand-color.svg b/assets/gemini-brand-color.svg new file mode 100644 index 0000000..33a2934 --- /dev/null +++ b/assets/gemini-brand-color.svg @@ -0,0 +1 @@ +Gemini \ No newline at end of file diff --git a/assets/openai.svg b/assets/openai.svg new file mode 100644 index 0000000..50d94d6 --- /dev/null +++ b/assets/openai.svg @@ -0,0 +1 @@ +OpenAI \ No newline at end of file diff --git a/assets/zhipu-color.svg b/assets/zhipu-color.svg new file mode 100644 index 0000000..0c6e61c --- /dev/null +++ b/assets/zhipu-color.svg @@ -0,0 +1 @@ +Zhipu \ No newline at end of file diff --git a/src/autoslice/__init__.py b/src/autoslice/__init__.py new file mode 100644 index 0000000..3b36319 --- /dev/null +++ b/src/autoslice/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2024 bilive. + +import sys +import os +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) \ No newline at end of file diff --git a/src/autoslice/calculate_density.py b/src/autoslice/calculate_density.py new file mode 100644 index 0000000..dff9dc6 --- /dev/null +++ b/src/autoslice/calculate_density.py @@ -0,0 +1,51 @@ +# Copyright (c) 2024 bilive. + +import re +from collections import defaultdict +from src.config import SLICE_DURATION + +def parse_time(time_str): + """Convert ASS time format to seconds with milliseconds.""" + h, m, s = time_str.split(':') + s, ms = s.split('.') + return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 100 + +def format_time(seconds): + """Format seconds to hh:mm:ss.xx.""" + h = int(seconds // 3600) + m = int((seconds % 3600) // 60) + s = int(seconds % 60) + ms = int((seconds - int(seconds)) * 100) + return f"{h:02}:{m:02}:{s:02}.{ms:02}" + +def extract_dialogues(file_path): + """Extract dialogue start times from the ASS file.""" + dialogues = [] + with open(file_path, 'r', encoding='utf-8') as file: + for line in file: + if line.startswith('Dialogue:'): + parts = line.split(',') + start_time = parse_time(parts[1].strip()) + dialogues.append(start_time) + return dialogues + +def calculate_density(dialogues, window_size=SLICE_DURATION): + """Calculate the maximum density of dialogues in a given window size.""" + time_counts = defaultdict(int) + for time in dialogues: + time_counts[time] += 1 + + max_density = 0 + max_start_time = 0 + + # Use a sliding window to calculate density + sorted_times = sorted(time_counts.keys()) + for i in range(len(sorted_times)): + start_time = sorted_times[i] + end_time = start_time + window_size + current_density = sum(count for time, count in time_counts.items() if start_time <= time < end_time) + if current_density > max_density: + max_density = current_density + max_start_time = start_time + + return max_start_time, max_density \ No newline at end of file diff --git a/src/autoslice/slice_video.py b/src/autoslice/slice_video.py new file mode 100644 index 0000000..abea815 --- /dev/null +++ b/src/autoslice/slice_video.py @@ -0,0 +1,61 @@ +# Copyright (c) 2024 bilive. + +import subprocess +from src.autoslice.calculate_density import extract_dialogues, calculate_density, format_time +from src.config import Your_API_KEY, SLICE_DURATION +import base64 +from zhipuai import ZhipuAI + +def zhipu_glm_4v_plus_generate_title(video_path, artist): + with open(video_path, 'rb') as video_file: + video_base = base64.b64encode(video_file.read()).decode('utf-8') + + client = ZhipuAI(api_key=Your_API_KEY) + response = client.chat.completions.create( + model="glm-4v-plus", + messages=[ + { + "role": "user", + "content": [ + { + "type": "video_url", + "video_url": { + "url" : video_base + } + }, + { + "type": "text", + "text": f"视频是{artist}的直播的切片,请根据该视频中的内容及弹幕信息,为这段视频起一个调皮并且吸引眼球的标题,注意标题中如果有“主播”请替换成{artist}。" + } + ] + } + ] + ) + return response.choices[0].message.content.replace("《", "").replace("》", "") + +# https://stackoverflow.com/questions/64849478/cant-insert-stream-metadata-into-mp4 +def inject_metadata(video_path, generate_title, output_path): + """Slice the video using ffmpeg.""" + command = [ + 'ffmpeg', + '-i', video_path, + '-metadata:g', f'generate={generate_title}', + '-c:v', 'copy', + '-c:a', 'copy', + output_path + ] + subprocess.run(command) + +def slice_video(video_path, start_time, output_path, duration=f'00:00:{SLICE_DURATION}'): + """Slice the video using ffmpeg.""" + command = [ + 'ffmpeg', + '-ss', format_time(start_time), + '-i', video_path, + '-t', duration, + '-map_metadata', '-1', + '-c:v', 'copy', + '-c:a', 'copy', + output_path + ] + subprocess.run(command) \ No newline at end of file diff --git a/src/burn/only_render.py b/src/burn/only_render.py index 52b7149..57722f3 100644 --- a/src/burn/only_render.py +++ b/src/burn/only_render.py @@ -3,10 +3,13 @@ import argparse import os import subprocess -from src.config import GPU_EXIST, SRC_DIR, MODEL_TYPE +from src.config import GPU_EXIST, SRC_DIR, MODEL_TYPE, AUTO_SLICE, SLICE_DURATION from src.burn.generate_danmakus import get_resolution, process_danmakus from src.burn.generate_subtitles import generate_subtitles from src.burn.render_video import render_video +from src.autoslice.slice_video import slice_video, inject_metadata, zhipu_glm_4v_plus_generate_title +from src.autoslice.calculate_density import extract_dialogues, calculate_density, format_time +from src.upload.extract_video_info import get_video_info import queue import threading import time @@ -52,7 +55,20 @@ def render_video_only(video_path): render_video(original_video_path, format_video_path, subtitle_font_size, subtitle_margin_v) print("complete danamku burning and wait for uploading!", flush=True) - # # Delete relative files + if AUTO_SLICE: + title, artist, date = get_video_info(format_video_path) + slice_video_path = format_video_path[:-4] + '_slice.mp4' + dialogues = extract_dialogues(ass_path) + max_start_time, max_density = calculate_density(dialogues) + formatted_time = format_time(max_start_time) + print(f"The 30-second window with the highest density starts at {formatted_time} seconds with {max_density} danmakus.", flush=True) + slice_video(format_video_path, max_start_time, slice_video_path) + glm_title = zhipu_glm_4v_plus_generate_title(slice_video_path, artist) + slice_video_flv_path = slice_video_path[:-4] + '.flv' + inject_metadata(slice_video_path, glm_title, slice_video_flv_path) + os.remove(slice_video_path) + + # Delete relative files for remove_path in [original_video_path, xml_path, ass_path, srt_path, jsonl_path]: if os.path.exists(remove_path): os.remove(remove_path) @@ -63,6 +79,9 @@ def render_video_only(video_path): with open(f"{SRC_DIR}/upload/uploadVideoQueue.txt", "a") as file: file.write(f"{format_video_path}\n") + if AUTO_SLICE: + print("complete slice video and wait for uploading!", flush=True) + file.write(f"{slice_video_flv_path}\n") class VideoRenderQueue: def __init__(self): diff --git a/src/config.py b/src/config.py index c4465a4..3d258ed 100644 --- a/src/config.py +++ b/src/config.py @@ -10,7 +10,10 @@ GPU_EXIST=True # Can be pipeline, append, merge MODEL_TYPE = "pipeline" Inference_Model = "small" - +AUTO_SLICE = False +SLICE_DURATION = 30 +# Apply for your own GLM-4v-Plus API key at https://www.bigmodel.cn/invite?icode=shBtZUfNE6FfdMH1R6NybGczbXFgPRGIalpycrEwJ28%3D +Your_API_KEY = "" # ============================ Basic configuration ============================ SRC_DIR = str(Path(os.path.abspath(__file__)).parent) BILIVE_DIR = str(Path(SRC_DIR).parent) diff --git a/src/upload/generate_yaml.py b/src/upload/generate_yaml.py index fec6f59..8fcf506 100644 --- a/src/upload/generate_yaml.py +++ b/src/upload/generate_yaml.py @@ -6,6 +6,8 @@ import yaml import codecs from datetime import datetime from src.upload.extract_video_info import generate_title, generate_desc, generate_tag, generate_source +import subprocess +import json def generate_yaml_template(video_path): source = generate_source(video_path) @@ -31,6 +33,35 @@ def generate_yaml_template(video_path): } return yaml.dump(data, default_flow_style=False, sort_keys=False) +def generate_slice_yaml_template(video_path): + command = [ + "ffprobe", + "-v", "quiet", + "-print_format", "json", + "-show_format", + video_path + ] + output = subprocess.check_output(command, stderr=subprocess.STDOUT).decode('utf-8') + parsed_output = json.loads(output) + title = parsed_output["format"]["tags"]["generate"] + data = { + "line": "bda2", + "limit": 5, + "streamers": { + video_path: { + "copyright": 1, + "source": "", + "tid": 138, + "cover": "", + "title": title, + "desc_format_id": 0, + "desc": "", + "dynamic": "", + "tag": "直播切片" + } + } + } + return yaml.dump(data, default_flow_style=False, sort_keys=False) if __name__ == "__main__": # read the queue and upload the video diff --git a/src/upload/upload.py b/src/upload/upload.py index f067104..621aceb 100644 --- a/src/upload/upload.py +++ b/src/upload/upload.py @@ -5,7 +5,7 @@ import os import sys from src.config import SRC_DIR, BILIVE_DIR from datetime import datetime -from src.upload.generate_yaml import generate_yaml_template +from src.upload.generate_yaml import generate_yaml_template, generate_slice_yaml_template from src.upload.extract_video_info import generate_title import time import fcntl @@ -87,23 +87,32 @@ def read_append_and_delete_lines(file_path): print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} " + "deal with " + upload_video_path, flush=True) # check if the live is already uploaded - query = generate_title(upload_video_path) - result = subprocess.check_output(f"{SRC_DIR}/upload/biliup" + " -u " + f"{SRC_DIR}/upload/cookies.json" + " list", shell=True) - upload_list = result.decode("utf-8").splitlines() - limit_list = upload_list[:30] - bv_result = find_bv_number(query, limit_list) - if bv_result: - print(f"BV number is: {bv_result}", flush=True) - append_upload(upload_video_path, bv_result) - else: - print("First upload this live", flush=True) - # generate the yaml template - yaml_template = generate_yaml_template(upload_video_path) + if upload_video_path.endswith('.flv'): + # upload slice video + yaml_template = generate_slice_yaml_template(upload_video_path) yaml_file_path = SRC_DIR + "/upload/upload.yaml" with open(yaml_file_path, 'w', encoding='utf-8') as file: file.write(yaml_template) upload_video(upload_video_path, yaml_file_path) return + else: + query = generate_title(upload_video_path) + result = subprocess.check_output(f"{SRC_DIR}/upload/biliup" + " -u " + f"{SRC_DIR}/upload/cookies.json" + " list", shell=True) + upload_list = result.decode("utf-8").splitlines() + limit_list = upload_list[:30] + bv_result = find_bv_number(query, limit_list) + if bv_result: + print(f"BV number is: {bv_result}", flush=True) + append_upload(upload_video_path, bv_result) + else: + print("First upload this live", flush=True) + # generate the yaml template + yaml_template = generate_yaml_template(upload_video_path) + yaml_file_path = SRC_DIR + "/upload/upload.yaml" + with open(yaml_file_path, 'w', encoding='utf-8') as file: + file.write(yaml_template) + upload_video(upload_video_path, yaml_file_path) + return except subprocess.CalledProcessError: print("Fail to upload, the files will be reserved.")