240 lines
7.1 KiB
Python
240 lines
7.1 KiB
Python
# pip install scenedetect opencv-python -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||
|
||
from scenedetect.video_manager import VideoManager
|
||
from scenedetect.scene_manager import SceneManager
|
||
from scenedetect.stats_manager import StatsManager
|
||
from scenedetect.detectors.content_detector import ContentDetector
|
||
import os
|
||
import sys
|
||
import subprocess
|
||
from huggingface_hub import hf_hub_download
|
||
from faster_whisper import WhisperModel
|
||
import public_tools
|
||
from pathlib import Path
|
||
|
||
|
||
# 获取智能画面分割的时间或者秒数
|
||
def find_scenes(video_path, sensitivity):
|
||
print(
|
||
"正在计算分镜数据" + "sensitivity:" + str(sensitivity) + "path : " + video_path
|
||
)
|
||
sys.stdout.flush()
|
||
video_manager = VideoManager([video_path])
|
||
stats_manager = StatsManager()
|
||
scene_manager = SceneManager(stats_manager)
|
||
|
||
# 使用contect-detector
|
||
scene_manager.add_detector(ContentDetector(threshold=float(sensitivity)))
|
||
|
||
shijian_list = []
|
||
|
||
try:
|
||
video_manager.set_downscale_factor()
|
||
video_manager.start()
|
||
scene_manager.detect_scenes(frame_source=video_manager)
|
||
scene_list = scene_manager.get_scene_list()
|
||
print("分镜数据列表:")
|
||
sys.stdout.flush()
|
||
for i, scene in enumerate(scene_list):
|
||
shijian_list.append([scene[0].get_timecode(), scene[1].get_timecode()])
|
||
print(
|
||
"Scene %2d: Start %s / Frame %d, End %s / Frame %d"
|
||
% (
|
||
i + 1,
|
||
scene[0].get_timecode(),
|
||
scene[0].get_frames(),
|
||
scene[1].get_timecode(),
|
||
scene[1].get_frames(),
|
||
)
|
||
)
|
||
sys.stdout.flush()
|
||
finally:
|
||
video_manager.release()
|
||
|
||
return shijian_list
|
||
|
||
|
||
# 如果不存在就创建
|
||
def createDir(file_dir):
|
||
# 如果不存在文件夹,就创建
|
||
if not os.path.isdir(file_dir):
|
||
os.mkdir(file_dir)
|
||
|
||
|
||
# 切分一个视频
|
||
def ClipVideo(video_path, out_folder, image_out_folder, sensitivity, gpu_type):
|
||
shijian_list = find_scenes(video_path, sensitivity) # 多组时间列表
|
||
shijian_list_len = len(shijian_list)
|
||
|
||
print("总共有%s个场景" % str(shijian_list_len))
|
||
sys.stdout.flush()
|
||
video_list = []
|
||
for i in range(0, shijian_list_len):
|
||
start_time_str = shijian_list[i][0]
|
||
end_time_str = shijian_list[i][1]
|
||
|
||
print("开始输出第" + str(i + 1) + "个分镜")
|
||
video_name = "{:05d}".format(i + 1)
|
||
out_video_file = os.path.join(out_folder, video_name + ".mp4")
|
||
sys.stdout.flush()
|
||
video_list.append(
|
||
{
|
||
"start_time_str": start_time_str,
|
||
"end_time_str": end_time_str,
|
||
"out_video_file": out_video_file,
|
||
"video_name": video_name,
|
||
}
|
||
)
|
||
|
||
# 使用 ffmpeg 裁剪视频
|
||
command = []
|
||
command.append("ffmpeg")
|
||
command.append("-i")
|
||
command.append(video_path)
|
||
command.append("-ss")
|
||
command.append(start_time_str)
|
||
command.append("-to")
|
||
command.append(end_time_str)
|
||
command.append("-c:v")
|
||
|
||
if gpu_type == "NVIDIA":
|
||
command.append("h264_nvenc")
|
||
elif gpu_type == "AMD":
|
||
command.append("h264_vaapi")
|
||
else:
|
||
command.append("libx264")
|
||
|
||
command.append("-preset")
|
||
command.append("fast")
|
||
command.append("-c:a")
|
||
command.append("copy")
|
||
command.append(out_video_file)
|
||
command.append("-loglevel")
|
||
command.append("error")
|
||
|
||
subprocess.run(
|
||
command,
|
||
check=True,
|
||
stderr=subprocess.PIPE,
|
||
)
|
||
|
||
print("分镜输出完成。开始抽帧")
|
||
sys.stdout.flush()
|
||
for vi in video_list:
|
||
h, m, s = vi["start_time_str"].split(":")
|
||
start_seconds = int(h) * 3600 + int(m) * 60 + float(s)
|
||
|
||
h, m, s = vi["end_time_str"].split(":")
|
||
end_seconds = int(h) * 3600 + int(m) * 60 + float(s)
|
||
print("正在抽帧:" + vi["video_name"])
|
||
sys.stdout.flush()
|
||
subprocess.run(
|
||
[
|
||
"ffmpeg",
|
||
"-ss",
|
||
str((end_seconds - start_seconds) / 2),
|
||
"-i",
|
||
vi["out_video_file"],
|
||
"-frames:v",
|
||
"1",
|
||
os.path.join(image_out_folder, vi["video_name"] + ".png"),
|
||
"-loglevel",
|
||
"error",
|
||
]
|
||
)
|
||
|
||
print("抽帧完成,开始识别文案")
|
||
sys.stdout.flush()
|
||
return video_list
|
||
|
||
|
||
def SplitAudio(video_out_folder, video_list):
|
||
# ffmpeg -i input_file.mp4 -vn -ab 128k output_file.mp3
|
||
print("正在分离音频!!")
|
||
mp3_list = []
|
||
sys.stdout.flush()
|
||
for v in video_list:
|
||
mp3_path = os.path.join(video_out_folder, v["video_name"] + ".mp3")
|
||
mp3_list.append(mp3_path)
|
||
subprocess.run(
|
||
[
|
||
"ffmpeg",
|
||
"-i",
|
||
v["out_video_file"],
|
||
"-vn",
|
||
"-ab",
|
||
"128k",
|
||
mp3_path,
|
||
"-loglevel",
|
||
"error",
|
||
],
|
||
check=True,
|
||
)
|
||
return mp3_list
|
||
|
||
|
||
def GetText(out_folder, mp3_list):
|
||
text = []
|
||
# 先获取模型
|
||
print("正在下载或加载模型")
|
||
sys.stdout.flush()
|
||
model_path = Path(
|
||
hf_hub_download(repo_id="Systran/faster-whisper-large-v3", filename="model.bin")
|
||
)
|
||
hf_hub_download(
|
||
repo_id="Systran/faster-whisper-large-v3",
|
||
filename="config.json",
|
||
)
|
||
hf_hub_download(
|
||
repo_id="Systran/faster-whisper-large-v3",
|
||
filename="preprocessor_config.json",
|
||
)
|
||
hf_hub_download(
|
||
repo_id="Systran/faster-whisper-large-v3",
|
||
filename="tokenizer.json",
|
||
)
|
||
hf_hub_download(
|
||
repo_id="Systran/faster-whisper-large-v3",
|
||
filename="vocabulary.json",
|
||
)
|
||
model = WhisperModel(
|
||
model_size_or_path=os.path.dirname(model_path),
|
||
device="auto",
|
||
local_files_only=True,
|
||
)
|
||
print("模型加载成功,开始识别")
|
||
sys.stdout.flush()
|
||
for mp in mp3_list:
|
||
segments, info = model.transcribe(
|
||
mp,
|
||
beam_size=5,
|
||
language="zh",
|
||
vad_filter=True,
|
||
vad_parameters=dict(min_silence_duration_ms=1000),
|
||
)
|
||
tmp_text = ""
|
||
for segment in segments:
|
||
tmp_text += segment.text + "。"
|
||
print(mp + "识别完成")
|
||
sys.stdout.flush()
|
||
text.append(tmp_text)
|
||
|
||
# 数据写出
|
||
print("文本全部识别成功,正在写出")
|
||
sys.stdout.flush()
|
||
tools = public_tools.PublicTools()
|
||
tools.write_to_file(text, os.path.join(out_folder, "文案.txt"))
|
||
print("写出完成")
|
||
sys.stdout.flush()
|
||
|
||
|
||
def init(video_path, video_out_folder, image_out_folder, sensitivity, gpu_type):
|
||
v_l = ClipVideo(
|
||
video_path, video_out_folder, image_out_folder, sensitivity, gpu_type
|
||
)
|
||
|
||
# 开始分离音频
|
||
m_l = SplitAudio(video_out_folder, v_l)
|
||
# 开始识别字幕
|
||
GetText(os.path.dirname(video_out_folder), m_l)
|