171 lines
4.8 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
import io
import os
import sys
import public_tools
from pathlib import Path
from huggingface_hub import hf_hub_download
from faster_whisper import WhisperModel
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
# 判断sys.argv 的长度如果小于2说明没有传入参数设置初始参数
# "C:\\Users\\27698\\Desktop\\LAITool\\resources\\scripts\\Lai.exe" -c "D:/来推项目集/7.4/娱乐:江湖大哥退休,去拍电影/scripts/output_crop_00001.json" "NVIDIA"
# if len(sys.argv) < 2:
# sys.argv = [
# "C:\\Users\\27698\\Desktop\\LAITool\\resources\\scripts\\Lai.exe",
# "-w",
# "C:\\Users\\27698\\Desktop\\测试\\test\\mjTestoutput_crop_00001.mp4",
# "C:\\Users\\27698\\Desktop\\测试\\test\data\\frame",
# "C:\\Users\\27698\\Desktop\\测试\\test\\tmp\\input_crop",
# 30,
# "NVIDIA",
# ]
print(sys.argv)
if len(sys.argv) < 2:
print("Params: <runtime-config.json>")
exit(0)
if getattr(sys, "frozen", False):
cript_directory = os.path.dirname(sys.executable)
elif __file__:
cript_directory = os.path.dirname(__file__)
def GetText(out_folder, mp3_folder):
text = []
# 先获取模型
print("正在下载或加载模型")
sys.stdout.flush()
model_path = Path(
hf_hub_download(repo_id="Systran/faster-whisper-large-v3", filename="model.bin")
)
hf_hub_download(
repo_id="Systran/faster-whisper-large-v3",
filename="config.json",
)
hf_hub_download(
repo_id="Systran/faster-whisper-large-v3",
filename="preprocessor_config.json",
)
hf_hub_download(
repo_id="Systran/faster-whisper-large-v3",
filename="tokenizer.json",
)
hf_hub_download(
repo_id="Systran/faster-whisper-large-v3",
filename="vocabulary.json",
)
model = WhisperModel(
model_size_or_path=os.path.dirname(model_path),
device="auto",
local_files_only=True,
)
print("模型加载成功,开始识别")
sys.stdout.flush()
# 拿到指定文件夹里面的所有的MP3文件
mp3_list = []
for root, dirs, files in os.walk(mp3_folder):
for file in files:
if file.endswith(".mp3"):
mp3_list.append(os.path.join(root, file))
for mp in mp3_list:
segments, info = model.transcribe(
mp,
beam_size=5,
language="zh",
vad_filter=True,
vad_parameters=dict(min_silence_duration_ms=1000),
)
tmp_text = ""
for segment in segments:
tmp_text += segment.text + ""
print(mp + "识别完成")
sys.stdout.flush()
text.append(tmp_text)
# 数据写出
print("文本全部识别成功,正在写出")
sys.stdout.flush()
tools = public_tools.PublicTools()
tools.write_to_file(text, os.path.join(out_folder, "文案.txt"))
print("写出完成")
sys.stdout.flush()
def GetTextTask(out_folder, mp, name):
text = []
# 先获取模型
print("正在下载或加载模型")
sys.stdout.flush()
model_path = Path(
hf_hub_download(repo_id="Systran/faster-whisper-large-v3", filename="model.bin")
)
hf_hub_download(
repo_id="Systran/faster-whisper-large-v3",
filename="config.json",
)
hf_hub_download(
repo_id="Systran/faster-whisper-large-v3",
filename="preprocessor_config.json",
)
hf_hub_download(
repo_id="Systran/faster-whisper-large-v3",
filename="tokenizer.json",
)
hf_hub_download(
repo_id="Systran/faster-whisper-large-v3",
filename="vocabulary.json",
)
model = WhisperModel(
model_size_or_path=os.path.dirname(model_path),
device="auto",
local_files_only=True,
)
print("模型加载成功,开始识别")
sys.stdout.flush()
segments, info = model.transcribe(
mp,
beam_size=5,
language="zh",
vad_filter=True,
vad_parameters=dict(min_silence_duration_ms=1000),
)
tmp_text = ""
for segment in segments:
tmp_text += segment.text + ""
print(mp + "识别完成")
sys.stdout.flush()
text.append(tmp_text)
# 数据写出
sys.stdout.flush()
tools = public_tools.PublicTools()
tools.write_to_file(text, os.path.join(out_folder, name + ".txt"))
sys.stdout.flush()
# GetTextTask(
# "C:\\Users\\27698\\Desktop\\测试\\mjTest",
# "C:\\Users\\27698\\Desktop\\测试\\mjTest\\data\\frame\\00001.mp4",
# "00001",
# )
if sys.argv[1] == "-ts":
GetText(
sys.argv[2],
sys.argv[3],
)
elif sys.argv[1] == "-t":
GetTextTask(
sys.argv[2],
sys.argv[3],
sys.argv[4],
)
else:
print("Params: <runtime-config.json>")
exit(0)