2024-02-29 16:21:25 +08:00
|
|
|
|
package dto
|
|
|
|
|
|
|
2025-08-14 20:05:06 +08:00
|
|
|
|
import (
|
2025-10-18 01:13:54 +08:00
|
|
|
|
"encoding/json"
|
2025-12-13 17:24:23 +08:00
|
|
|
|
"strings"
|
2025-10-18 01:13:54 +08:00
|
|
|
|
|
2025-10-11 15:30:09 +08:00
|
|
|
|
"github.com/QuantumNous/new-api/types"
|
2025-08-14 20:05:06 +08:00
|
|
|
|
|
|
|
|
|
|
"github.com/gin-gonic/gin"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2024-07-16 22:07:10 +08:00
|
|
|
|
type AudioRequest struct {
|
2025-10-18 01:13:54 +08:00
|
|
|
|
Model string `json:"model"`
|
|
|
|
|
|
Input string `json:"input"`
|
|
|
|
|
|
Voice string `json:"voice"`
|
|
|
|
|
|
Instructions string `json:"instructions,omitempty"`
|
|
|
|
|
|
ResponseFormat string `json:"response_format,omitempty"`
|
2026-03-01 15:47:03 +08:00
|
|
|
|
Speed *float64 `json:"speed,omitempty"`
|
2025-10-18 01:13:54 +08:00
|
|
|
|
StreamFormat string `json:"stream_format,omitempty"`
|
|
|
|
|
|
Metadata json.RawMessage `json:"metadata,omitempty"`
|
2026-04-09 12:41:51 +08:00
|
|
|
|
// vllm-omini
|
|
|
|
|
|
TaskType json.RawMessage `json:"task_type,omitempty"`
|
|
|
|
|
|
Language json.RawMessage `json:"language,omitempty"`
|
|
|
|
|
|
RefAudio json.RawMessage `json:"ref_audio,omitempty"`
|
|
|
|
|
|
RefText json.RawMessage `json:"ref_text,omitempty"`
|
|
|
|
|
|
XVectorOnlyMode json.RawMessage `json:"x_vector_only_mode,omitempty"`
|
|
|
|
|
|
MaxNewTokens json.RawMessage `json:"max_new_tokens,omitempty"`
|
|
|
|
|
|
InitialCodecChunkFrames json.RawMessage `json:"initial_codec_chunk_frames,omitempty"`
|
|
|
|
|
|
// TODO:ensure that the logic remains correct after the stream is started.
|
|
|
|
|
|
//Stream json.RawMessage `json:"stream,omitempty"`
|
2024-02-29 16:21:25 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-08-14 20:05:06 +08:00
|
|
|
|
func (r *AudioRequest) GetTokenCountMeta() *types.TokenCountMeta {
|
|
|
|
|
|
meta := &types.TokenCountMeta{
|
|
|
|
|
|
CombineText: r.Input,
|
|
|
|
|
|
TokenType: types.TokenTypeTextNumber,
|
|
|
|
|
|
}
|
2025-12-13 17:24:23 +08:00
|
|
|
|
if strings.Contains(r.Model, "gpt") {
|
|
|
|
|
|
meta.TokenType = types.TokenTypeTokenizer
|
|
|
|
|
|
}
|
2025-08-14 20:05:06 +08:00
|
|
|
|
return meta
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func (r *AudioRequest) IsStream(c *gin.Context) bool {
|
2025-12-13 17:24:23 +08:00
|
|
|
|
return r.StreamFormat == "sse"
|
2025-08-14 20:05:06 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-08-23 13:12:15 +08:00
|
|
|
|
func (r *AudioRequest) SetModelName(modelName string) {
|
|
|
|
|
|
if modelName != "" {
|
|
|
|
|
|
r.Model = modelName
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-02-29 16:21:25 +08:00
|
|
|
|
type AudioResponse struct {
|
|
|
|
|
|
Text string `json:"text"`
|
|
|
|
|
|
}
|
2024-07-16 22:07:10 +08:00
|
|
|
|
|
|
|
|
|
|
type WhisperVerboseJSONResponse struct {
|
|
|
|
|
|
Task string `json:"task,omitempty"`
|
|
|
|
|
|
Language string `json:"language,omitempty"`
|
|
|
|
|
|
Duration float64 `json:"duration,omitempty"`
|
|
|
|
|
|
Text string `json:"text,omitempty"`
|
|
|
|
|
|
Segments []Segment `json:"segments,omitempty"`
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
type Segment struct {
|
|
|
|
|
|
Id int `json:"id"`
|
|
|
|
|
|
Seek int `json:"seek"`
|
|
|
|
|
|
Start float64 `json:"start"`
|
|
|
|
|
|
End float64 `json:"end"`
|
|
|
|
|
|
Text string `json:"text"`
|
|
|
|
|
|
Tokens []int `json:"tokens"`
|
|
|
|
|
|
Temperature float64 `json:"temperature"`
|
|
|
|
|
|
AvgLogprob float64 `json:"avg_logprob"`
|
|
|
|
|
|
CompressionRatio float64 `json:"compression_ratio"`
|
|
|
|
|
|
NoSpeechProb float64 `json:"no_speech_prob"`
|
|
|
|
|
|
}
|