new-api/dto/audio.go

78 lines
2.5 KiB
Go
Raw Normal View History

2024-02-29 16:21:25 +08:00
package dto
import (
"encoding/json"
"strings"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
)
2024-07-16 22:07:10 +08:00
type AudioRequest struct {
Model string `json:"model"`
Input string `json:"input"`
Voice string `json:"voice"`
Instructions string `json:"instructions,omitempty"`
ResponseFormat string `json:"response_format,omitempty"`
Speed *float64 `json:"speed,omitempty"`
StreamFormat string `json:"stream_format,omitempty"`
Metadata json.RawMessage `json:"metadata,omitempty"`
// vllm-omini
TaskType json.RawMessage `json:"task_type,omitempty"`
Language json.RawMessage `json:"language,omitempty"`
RefAudio json.RawMessage `json:"ref_audio,omitempty"`
RefText json.RawMessage `json:"ref_text,omitempty"`
XVectorOnlyMode json.RawMessage `json:"x_vector_only_mode,omitempty"`
MaxNewTokens json.RawMessage `json:"max_new_tokens,omitempty"`
InitialCodecChunkFrames json.RawMessage `json:"initial_codec_chunk_frames,omitempty"`
// TODOensure that the logic remains correct after the stream is started.
//Stream json.RawMessage `json:"stream,omitempty"`
2024-02-29 16:21:25 +08:00
}
func (r *AudioRequest) GetTokenCountMeta() *types.TokenCountMeta {
meta := &types.TokenCountMeta{
CombineText: r.Input,
TokenType: types.TokenTypeTextNumber,
}
if strings.Contains(r.Model, "gpt") {
meta.TokenType = types.TokenTypeTokenizer
}
return meta
}
func (r *AudioRequest) IsStream(c *gin.Context) bool {
return r.StreamFormat == "sse"
}
func (r *AudioRequest) SetModelName(modelName string) {
if modelName != "" {
r.Model = modelName
}
}
2024-02-29 16:21:25 +08:00
type AudioResponse struct {
Text string `json:"text"`
}
2024-07-16 22:07:10 +08:00
type WhisperVerboseJSONResponse struct {
Task string `json:"task,omitempty"`
Language string `json:"language,omitempty"`
Duration float64 `json:"duration,omitempty"`
Text string `json:"text,omitempty"`
Segments []Segment `json:"segments,omitempty"`
}
type Segment struct {
Id int `json:"id"`
Seek int `json:"seek"`
Start float64 `json:"start"`
End float64 `json:"end"`
Text string `json:"text"`
Tokens []int `json:"tokens"`
Temperature float64 `json:"temperature"`
AvgLogprob float64 `json:"avg_logprob"`
CompressionRatio float64 `json:"compression_ratio"`
NoSpeechProb float64 `json:"no_speech_prob"`
}