171 lines
4.8 KiB
Python
171 lines
4.8 KiB
Python
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
import io
|
|||
|
|
import os
|
|||
|
|
import sys
|
|||
|
|
import public_tools
|
|||
|
|
from pathlib import Path
|
|||
|
|
from huggingface_hub import hf_hub_download
|
|||
|
|
from faster_whisper import WhisperModel
|
|||
|
|
|
|||
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
|
|||
|
|
|
|||
|
|
# 判断sys.argv 的长度,如果小于2,说明没有传入参数,设置初始参数
|
|||
|
|
# "C:\\Users\\27698\\Desktop\\LAITool\\resources\\scripts\\Lai.exe" -c "D:/来推项目集/7.4/娱乐:江湖大哥退休,去拍电影/scripts/output_crop_00001.json" "NVIDIA"
|
|||
|
|
# if len(sys.argv) < 2:
|
|||
|
|
# sys.argv = [
|
|||
|
|
# "C:\\Users\\27698\\Desktop\\LAITool\\resources\\scripts\\Lai.exe",
|
|||
|
|
# "-w",
|
|||
|
|
# "C:\\Users\\27698\\Desktop\\测试\\test\\mjTestoutput_crop_00001.mp4",
|
|||
|
|
# "C:\\Users\\27698\\Desktop\\测试\\test\data\\frame",
|
|||
|
|
# "C:\\Users\\27698\\Desktop\\测试\\test\\tmp\\input_crop",
|
|||
|
|
# 30,
|
|||
|
|
# "NVIDIA",
|
|||
|
|
# ]
|
|||
|
|
|
|||
|
|
print(sys.argv)
|
|||
|
|
|
|||
|
|
if len(sys.argv) < 2:
|
|||
|
|
print("Params: <runtime-config.json>")
|
|||
|
|
exit(0)
|
|||
|
|
|
|||
|
|
if getattr(sys, "frozen", False):
|
|||
|
|
cript_directory = os.path.dirname(sys.executable)
|
|||
|
|
elif __file__:
|
|||
|
|
cript_directory = os.path.dirname(__file__)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def GetText(out_folder, mp3_folder):
|
|||
|
|
text = []
|
|||
|
|
# 先获取模型
|
|||
|
|
print("正在下载或加载模型")
|
|||
|
|
sys.stdout.flush()
|
|||
|
|
model_path = Path(
|
|||
|
|
hf_hub_download(repo_id="Systran/faster-whisper-large-v3", filename="model.bin")
|
|||
|
|
)
|
|||
|
|
hf_hub_download(
|
|||
|
|
repo_id="Systran/faster-whisper-large-v3",
|
|||
|
|
filename="config.json",
|
|||
|
|
)
|
|||
|
|
hf_hub_download(
|
|||
|
|
repo_id="Systran/faster-whisper-large-v3",
|
|||
|
|
filename="preprocessor_config.json",
|
|||
|
|
)
|
|||
|
|
hf_hub_download(
|
|||
|
|
repo_id="Systran/faster-whisper-large-v3",
|
|||
|
|
filename="tokenizer.json",
|
|||
|
|
)
|
|||
|
|
hf_hub_download(
|
|||
|
|
repo_id="Systran/faster-whisper-large-v3",
|
|||
|
|
filename="vocabulary.json",
|
|||
|
|
)
|
|||
|
|
model = WhisperModel(
|
|||
|
|
model_size_or_path=os.path.dirname(model_path),
|
|||
|
|
device="auto",
|
|||
|
|
local_files_only=True,
|
|||
|
|
)
|
|||
|
|
print("模型加载成功,开始识别")
|
|||
|
|
sys.stdout.flush()
|
|||
|
|
# 拿到指定文件夹里面的所有的MP3文件
|
|||
|
|
mp3_list = []
|
|||
|
|
for root, dirs, files in os.walk(mp3_folder):
|
|||
|
|
for file in files:
|
|||
|
|
if file.endswith(".mp3"):
|
|||
|
|
mp3_list.append(os.path.join(root, file))
|
|||
|
|
|
|||
|
|
for mp in mp3_list:
|
|||
|
|
segments, info = model.transcribe(
|
|||
|
|
mp,
|
|||
|
|
beam_size=5,
|
|||
|
|
language="zh",
|
|||
|
|
vad_filter=True,
|
|||
|
|
vad_parameters=dict(min_silence_duration_ms=1000),
|
|||
|
|
)
|
|||
|
|
tmp_text = ""
|
|||
|
|
for segment in segments:
|
|||
|
|
tmp_text += segment.text + "。"
|
|||
|
|
print(mp + "识别完成")
|
|||
|
|
sys.stdout.flush()
|
|||
|
|
text.append(tmp_text)
|
|||
|
|
|
|||
|
|
# 数据写出
|
|||
|
|
print("文本全部识别成功,正在写出")
|
|||
|
|
sys.stdout.flush()
|
|||
|
|
tools = public_tools.PublicTools()
|
|||
|
|
tools.write_to_file(text, os.path.join(out_folder, "文案.txt"))
|
|||
|
|
print("写出完成")
|
|||
|
|
sys.stdout.flush()
|
|||
|
|
|
|||
|
|
|
|||
|
|
def GetTextTask(out_folder, mp, name):
|
|||
|
|
text = []
|
|||
|
|
# 先获取模型
|
|||
|
|
print("正在下载或加载模型")
|
|||
|
|
sys.stdout.flush()
|
|||
|
|
model_path = Path(
|
|||
|
|
hf_hub_download(repo_id="Systran/faster-whisper-large-v3", filename="model.bin")
|
|||
|
|
)
|
|||
|
|
hf_hub_download(
|
|||
|
|
repo_id="Systran/faster-whisper-large-v3",
|
|||
|
|
filename="config.json",
|
|||
|
|
)
|
|||
|
|
hf_hub_download(
|
|||
|
|
repo_id="Systran/faster-whisper-large-v3",
|
|||
|
|
filename="preprocessor_config.json",
|
|||
|
|
)
|
|||
|
|
hf_hub_download(
|
|||
|
|
repo_id="Systran/faster-whisper-large-v3",
|
|||
|
|
filename="tokenizer.json",
|
|||
|
|
)
|
|||
|
|
hf_hub_download(
|
|||
|
|
repo_id="Systran/faster-whisper-large-v3",
|
|||
|
|
filename="vocabulary.json",
|
|||
|
|
)
|
|||
|
|
model = WhisperModel(
|
|||
|
|
model_size_or_path=os.path.dirname(model_path),
|
|||
|
|
device="auto",
|
|||
|
|
local_files_only=True,
|
|||
|
|
)
|
|||
|
|
print("模型加载成功,开始识别")
|
|||
|
|
sys.stdout.flush()
|
|||
|
|
segments, info = model.transcribe(
|
|||
|
|
mp,
|
|||
|
|
beam_size=5,
|
|||
|
|
language="zh",
|
|||
|
|
vad_filter=True,
|
|||
|
|
vad_parameters=dict(min_silence_duration_ms=1000),
|
|||
|
|
)
|
|||
|
|
tmp_text = ""
|
|||
|
|
for segment in segments:
|
|||
|
|
tmp_text += segment.text + "。"
|
|||
|
|
print(mp + "识别完成")
|
|||
|
|
sys.stdout.flush()
|
|||
|
|
text.append(tmp_text)
|
|||
|
|
|
|||
|
|
# 数据写出
|
|||
|
|
sys.stdout.flush()
|
|||
|
|
tools = public_tools.PublicTools()
|
|||
|
|
tools.write_to_file(text, os.path.join(out_folder, name + ".txt"))
|
|||
|
|
sys.stdout.flush()
|
|||
|
|
|
|||
|
|
|
|||
|
|
# GetTextTask(
|
|||
|
|
# "C:\\Users\\27698\\Desktop\\测试\\mjTest",
|
|||
|
|
# "C:\\Users\\27698\\Desktop\\测试\\mjTest\\data\\frame\\00001.mp4",
|
|||
|
|
# "00001",
|
|||
|
|
# )
|
|||
|
|
|
|||
|
|
if sys.argv[1] == "-ts":
|
|||
|
|
GetText(
|
|||
|
|
sys.argv[2],
|
|||
|
|
sys.argv[3],
|
|||
|
|
)
|
|||
|
|
elif sys.argv[1] == "-t":
|
|||
|
|
GetTextTask(
|
|||
|
|
sys.argv[2],
|
|||
|
|
sys.argv[3],
|
|||
|
|
sys.argv[4],
|
|||
|
|
)
|
|||
|
|
else:
|
|||
|
|
print("Params: <runtime-config.json>")
|
|||
|
|
exit(0)
|