LaiTool/resources/scripts/iamge_to_video.py

491 lines
17 KiB
Python
Raw Normal View History

2024-05-15 12:57:15 +08:00
# -*- coding: utf-8 -*-
import cv2
import numpy as np
import os
import glob
import public_tools
import subprocess
import json
class ImageToVideo:
2024-06-01 15:08:22 +08:00
def __init__(self, gpu_type) -> None:
2024-05-15 12:57:15 +08:00
self.frames = 0
2024-06-01 15:08:22 +08:00
self.gpu_type = gpu_type
2024-05-15 12:57:15 +08:00
self.public_tools = public_tools.PublicTools()
2024-06-27 16:24:41 +08:00
self.ffmpeg_path = "../package/ffmpeg/win/ffmpeg"
self.ffprobe_path = "../package/ffmpeg/win/ffprobe"
2024-05-15 12:57:15 +08:00
pass
def create_video_from_image_with_center_offset(
self,
image_path,
duration,
start_offset,
end_offset,
fps=60,
video_size=(1440, 1080),
offest_type="KFTypePositionY",
):
"""
将图片合并成视频并添加关键帧
"""
# 使用Python的open函数以二进制模式读取图片文件
with open(image_path, "rb") as file:
img_bytes = file.read()
# 将字节流解码成图片
img = cv2.imdecode(np.frombuffer(img_bytes, np.uint8), cv2.IMREAD_UNCHANGED)
img_resized = self.get_scaled_image(
img, video_size, offest_type, start_offset, end_offset
)
if offest_type == "KFTypePositionY":
video_name = self.create_video_from_image_Y(
image_path,
fps,
video_size,
duration,
start_offset,
end_offset,
img_resized,
)
elif offest_type == "KFTypePositionX":
video_name = self.create_video_from_image_X(
image_path,
fps,
video_size,
duration,
start_offset,
end_offset,
img_resized,
)
elif offest_type == "KFTypeScale":
video_name = self.create_video_from_image_scale(
image_path,
fps,
video_size,
duration,
start_offset,
end_offset,
img,
)
else:
return ValueError("关键帧没有设置正确的参数")
return video_name
def create_video_from_image_scale(
self,
image_path,
fps,
video_size,
duration,
start_scale,
end_scale,
img,
):
"""
缩放关键帧生成视频
"""
scale_width = video_size[0] / img.shape[1]
scale_height = video_size[1] / img.shape[0]
default_scale = max(scale_width, scale_height)
# 创建视频写入器
video_name = f"{image_path.split('/')[-1].split('.')[0]}.mp4"
out = cv2.VideoWriter(
video_name, cv2.VideoWriter_fourcc(*"mp4v"), fps, video_size
)
total_frames = round(duration * fps)
self.frames += total_frames
# 计算偏移变化率
offset_change_per_frame = float(end_scale - start_scale) / total_frames // 2
if start_scale < 0:
start_scale = 0
current_scale = start_scale
for _ in range(int(duration * fps)):
# 创建一个空白画布
canvas = np.zeros((video_size[1], video_size[0], 3), dtype=np.uint8)
# 根据当前的缩放比例调整图片大小
img_resized = cv2.resize(
img,
None,
fx=default_scale + current_scale,
fy=default_scale + current_scale,
)
center_x, center_y = video_size[0] // 2, video_size[1] // 2
# 计算图片在画布上的绘制位置
start_x = center_x - img_resized.shape[1] // 2
start_y = center_y - img_resized.shape[0] // 2
# 安全检查,确保不会复制超出边界的区域
src_x1 = max(-start_x, 0)
dst_x1 = max(start_x, 0)
copy_width = min(img_resized.shape[1] - src_x1, video_size[0] - dst_x1)
src_y1 = max(-start_y, 0)
dst_y1 = max(start_y, 0)
copy_height = min(img_resized.shape[0] - src_y1, video_size[1] - dst_y1)
if copy_height > 0 and copy_width > 0:
canvas[dst_y1 : dst_y1 + copy_height, dst_x1 : dst_x1 + copy_width] = (
img_resized[
src_y1 : src_y1 + copy_height, src_x1 : src_x1 + copy_width
]
)
# 更新偏移量
current_scale += offset_change_per_frame
out.write(canvas)
out.release()
return video_name
def create_video_from_image_X(
self,
image_path,
fps,
video_size,
duration,
start_offset,
end_offset,
img_resized,
):
"""
左右关键帧生成视频
"""
# 创建视频写入器
video_name = f"{image_path.split('/')[-1].split('.')[0]}.mp4"
out = cv2.VideoWriter(
video_name, cv2.VideoWriter_fourcc(*"mp4v"), fps, video_size
)
total_frames = round(duration * fps)
self.frames += total_frames
# 计算偏移变化率
offset_change_per_frame = float(end_offset - start_offset) / total_frames
current_offset = start_offset
2024-05-26 12:12:15 +08:00
scale = img_resized.shape[0] / video_size[1]
2024-05-15 12:57:15 +08:00
for _ in range(int(duration * fps)):
# 创建一个空白画布
canvas = np.zeros((video_size[1], video_size[0], 3), dtype=np.uint8)
center_x, center_y = video_size[0] // 2, video_size[1] // 2
# 计算当前帧的图片中心偏移位置
# offset_y = int(current_offset * scale) # 根据放大比例调整偏移量
offset_x = int(current_offset * 1) # 根据放大比例调整偏移量
# offset_y = int(((new_height - img.shape[0]) // 2) * 1)
# start_y = center_y - img_resized.shape[0] // 2 + offset_y
# 计算图片在画布上的绘制位置
start_x = center_x - img_resized.shape[1] // 2 + offset_x
start_y = center_y - img_resized.shape[0] // 2
# 安全检查,确保不会复制超出边界的区域
src_x1 = max(-start_x, 0)
dst_x1 = max(start_x, 0)
2024-05-26 12:12:15 +08:00
copy_width = min(img_resized.shape[1] - src_x1, video_size[0] - dst_x1)
2024-05-15 12:57:15 +08:00
# copy_width = min(video_size[0], img_resized.shape[1])
# 调整图片复制区域的计算
src_y1 = max(-start_y, 0)
dst_y1 = max(start_y, 0)
2024-05-26 12:12:15 +08:00
copy_height = min(img_resized.shape[0] - src_y1, video_size[1] - dst_y1)
2024-06-01 15:08:22 +08:00
2024-05-26 12:12:15 +08:00
# copy_width = min(copy_width, img_resized.shape[1])
2024-05-15 12:57:15 +08:00
if copy_height > 0 and copy_width > 0:
canvas[dst_y1 : dst_y1 + copy_height, dst_x1 : dst_x1 + copy_width] = (
img_resized[
src_y1 : src_y1 + copy_height, src_x1 : src_x1 + copy_width
]
)
# 更新偏移量
current_offset += offset_change_per_frame
out.write(canvas)
out.release()
return video_name
def create_video_from_image_Y(
self,
image_path,
fps,
video_size,
duration,
start_offset,
end_offset,
img_resized,
):
"""
上下关键帧生成视频
"""
# 创建视频写入器
video_name = f"{image_path.split('/')[-1].split('.')[0]}.mp4"
out = cv2.VideoWriter(
video_name, cv2.VideoWriter_fourcc(*"mp4v"), fps, video_size
)
total_frames = round(duration * fps)
self.frames += total_frames
# 计算偏移变化率
offset_change_per_frame = float(end_offset - start_offset) / total_frames
current_offset = start_offset
2024-05-26 12:12:15 +08:00
scale = img_resized.shape[1] / video_size[0]
2024-05-15 12:57:15 +08:00
for _ in range(int(duration * fps)):
# 创建一个空白画布
canvas = np.zeros((video_size[1], video_size[0], 3), dtype=np.uint8)
center_x, center_y = video_size[0] // 2, video_size[1] // 2
# 计算当前帧的图片中心偏移位置
# offset_y = int(current_offset * scale) # 根据放大比例调整偏移量
offset_y = int(current_offset * 1) # 根据放大比例调整偏移量
# offset_x = int(((new_width - img.shape[1]) // 2) * 1)
# start_x = center_x - img_resized.shape[1] // 2 + offset_x
# 计算图片在画布上的绘制位置
start_x = center_x - img_resized.shape[1] // 2
start_y = center_y - img_resized.shape[0] // 2 + offset_y
# 安全检查,确保不会复制超出边界的区域
src_y1 = max(-start_y, 0)
dst_y1 = max(start_y, 0)
copy_height = min(img_resized.shape[0] - src_y1, video_size[1] - dst_y1)
# copy_width = min(video_size[0], img_resized.shape[1])
# 调整图片复制区域的计算
src_x1 = max(-start_x, 0)
dst_x1 = max(start_x, 0)
copy_width = min(img_resized.shape[1] - src_x1, video_size[0] - dst_x1)
if copy_height > 0 and copy_width > 0:
canvas[dst_y1 : dst_y1 + copy_height, dst_x1 : dst_x1 + copy_width] = (
img_resized[
src_y1 : src_y1 + copy_height, src_x1 : src_x1 + copy_width
]
)
# 更新偏移量
current_offset += offset_change_per_frame
out.write(canvas)
out.release()
return video_name
def get_sorted_images(self, folder_path, image_extensions=[".jpg", ".png"]):
"""
获取图片排序
"""
# 构建一个匹配所有指定扩展名的模式
patterns = [os.path.join(folder_path, "*" + ext) for ext in image_extensions]
# 列表用于存储找到的图片文件
image_files = []
# 遍历所有模式,匹配文件
for pattern in patterns:
image_files.extend(glob.glob(pattern))
# 按文件名排序
image_files.sort()
return image_files
def get_scaled_image(self, img, video_size, offest_type, start_offest, end_offest):
"""
根据关键帧类型获取当前图片的放大比例
"""
scale_width = video_size[0] / img.shape[1]
scale_height = video_size[1] / img.shape[0]
scale = max(scale_width, scale_height)
if offest_type == "KFTypePositionY":
# 检查最大偏移量是否大于图片高度
all_offset = abs(start_offest) + abs(end_offest) + video_size[1]
if all_offset > img.shape[0] * scale:
# if all_offset > img.shape[0]:
scale = max(scale, all_offset / img.shape[0])
max_offset = max(abs(start_offest), abs(end_offest))
if max_offset > img.shape[0]:
# 如果最大偏移量大于图片高度,则进一步放大图像
scale = max(scale, video_size[1] / (img.shape[0] - max_offset))
elif offest_type == "KFTypePositionX":
# 检查最大偏移量是否大于图片宽度
all_offset = abs(start_offest) + abs(end_offest) + video_size[0]
# 判断最大高度和当前图片当前放大倍率之间的大小
if all_offset > img.shape[1] * scale:
# if all_offset > img.shape[0]:
scale = max(scale, all_offset / img.shape[1])
max_offset = max(abs(start_offest), abs(end_offest))
if max_offset > img.shape[1]:
# 如果最大偏移量大于图片高度,则进一步放大图像
scale = max(scale, video_size[0] / (img.shape[1] - max_offset))
elif offest_type == "KFTypeScale":
pass
else:
return ValueError("关键帧没有设置正确的参数")
new_width = int(img.shape[1] * scale)
new_height = int(img.shape[0] * scale)
img_resized = cv2.resize(
img, (new_width, new_height), interpolation=cv2.INTER_LINEAR
)
return img_resized
def GenerateVideoAllImage(self, image_dir, offset, config_json):
"""
生成所有的图片
"""
config_data = config_json["srt_time_information"]
isDirection = False
sort_images = self.get_sorted_images(image_dir)
# 生成所有的图片视频
for image_file in sort_images:
filename = os.path.splitext(os.path.basename(image_file))[
0
] # 获取文件名,不包括扩展名
number = int(filename.split("_")[-1])
if number == 188:
print(number)
filtered_data = [item for item in config_data if item["no"] == number]
# 判断是不是空,空的话就跳过
if len(filtered_data) == 0:
return ValueError("没有找到对应的关键帧")
print(filtered_data)
video_arr = []
# 计算当前图片的偏移量以3200像素为基准
with open(image_file, "rb") as file:
img_bytes = file.read()
# 将字节流解码成图片
img = cv2.imdecode(np.frombuffer(img_bytes, np.uint8), cv2.IMREAD_UNCHANGED)
img_height, img_width = img.shape[:2]
proportion_height = img_height / 3200
proportion_width = img_height / 3200
2024-05-24 13:46:19 +08:00
key_frame = offset["name"]
offset_list = ["KFTypePositionY", "KFTypePositionX"]
real_key_frame = key_frame
if key_frame == "KFTypeRandom":
# 随机获取 offset_list 中的一个数据
real_key_frame = offset_list[np.random.randint(0, 2)]
if real_key_frame == "KFTypePositionY":
2024-05-15 12:57:15 +08:00
offsetValue = offset["up_down"] * proportion_height
2024-05-24 13:46:19 +08:00
elif real_key_frame == "KFTypePositionX":
2024-05-15 12:57:15 +08:00
offsetValue = offset["left_right"] * proportion_width
2024-05-24 13:46:19 +08:00
elif real_key_frame == "KFTypeScale":
2024-05-15 12:57:15 +08:00
offsetValue = offset["scale"]
else:
return ValueError("关键帧没有设置正确的参数")
# offsetValue = offset
if isDirection:
start_offset = offsetValue
end_offset = -offsetValue
isDirection = False
else:
start_offset = -offsetValue
end_offset = offsetValue
isDirection = True
video_path = self.create_video_from_image_with_center_offset(
image_file,
(filtered_data[0]["end_time"] - filtered_data[0]["start_time"]) / 1000,
start_offset,
end_offset,
self.fps,
self.video_size,
2024-05-24 13:46:19 +08:00
real_key_frame,
2024-05-15 12:57:15 +08:00
)
video_arr.append(video_path)
print(video_path)
# 微调所有的视频
mp4_folder = self.public_tools.list_files_by_extension(image_dir, ".mp4")
for mp4_path in mp4_folder:
filename = os.path.splitext(os.path.basename(mp4_path))[
0
] # 获取文件名,不包括扩展名
number = int(filename.split("_")[-1])
if number == 188:
print(number)
filtered_data = [item for item in config_data if item["no"] == number]
2024-05-24 13:46:19 +08:00
2024-05-15 12:57:15 +08:00
# print(filtered_data)
cmd = [
self.ffprobe_path,
"-v",
"error",
"-select_streams",
"v:0",
"-show_entries",
"stream=duration",
"-of",
"json",
mp4_path,
]
result = subprocess.run(
cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
duration_sec = json.loads(result.stdout)["streams"][0]["duration"]
duration_ms = int(float(duration_sec) * 1000) # 将秒转换为毫秒
print(
duration_ms,
(filtered_data[0]["end_time"] - filtered_data[0]["start_time"]),
)
temp_mp4_path = os.path.join(image_dir, "temp_" + str(number) + ".mp4")
# 开始微调
2024-06-01 15:08:22 +08:00
cmd = []
cmd.append(self.ffmpeg_path)
cmd.append("-i")
cmd.append(mp4_path)
cmd.append("-filter:v")
cmd.append(
2024-05-15 12:57:15 +08:00
"setpts=PTS*"
+ str(
(filtered_data[0]["end_time"] - filtered_data[0]["start_time"])
/ duration_ms
2024-06-01 15:08:22 +08:00
)
)
cmd.append("-c:v")
if self.gpu_type == "NVIDIA":
cmd.append("h264_nvenc")
elif self.gpu_type == "AMD":
2024-06-08 16:56:04 +08:00
cmd.append("h264_amf")
2024-06-01 15:08:22 +08:00
else:
cmd.append("libx264")
cmd.append("-preset")
cmd.append("fast")
cmd.append("-rc:v")
cmd.append("cbr")
cmd.append("-b:v")
cmd.append(str(self.bitRate) + "k")
cmd.append(temp_mp4_path)
cmd.append("-loglevel")
cmd.append("error")
cmd.append("-an")
2024-05-15 12:57:15 +08:00
subprocess.run(cmd, check=True)
os.remove(mp4_path)
os.rename(temp_mp4_path, mp4_path)
print(self.frames)