2025-02-20 16:41:46 +08:00
package helper
import (
2025-02-28 19:17:15 +08:00
"fmt"
2026-04-24 16:39:12 +08:00
"strings"
2025-10-11 15:30:09 +08:00
"github.com/QuantumNous/new-api/common"
2025-10-28 23:25:43 +08:00
"github.com/QuantumNous/new-api/logger"
2026-04-11 17:19:38 +08:00
"github.com/QuantumNous/new-api/model"
2026-03-16 16:00:22 +08:00
"github.com/QuantumNous/new-api/pkg/billingexpr"
2025-10-11 15:30:09 +08:00
relaycommon "github.com/QuantumNous/new-api/relay/common"
2026-03-16 16:00:22 +08:00
"github.com/QuantumNous/new-api/setting/billing_setting"
2025-10-12 13:31:03 +08:00
"github.com/QuantumNous/new-api/setting/operation_setting"
2025-10-11 15:30:09 +08:00
"github.com/QuantumNous/new-api/setting/ratio_setting"
"github.com/QuantumNous/new-api/types"
2025-06-11 23:46:59 +08:00
"github.com/gin-gonic/gin"
2025-02-20 16:41:46 +08:00
)
2026-04-11 17:19:38 +08:00
func modelPriceNotConfiguredError ( modelName string , userId int ) error {
if model . IsAdmin ( userId ) {
return fmt . Errorf (
"模型 %s 的价格未配置。请前往「系统设置 → 运营设置」开启自用模式,或在「系统设置 → 分组与模型定价设置」中为该模型配置价格;" +
"Model %s price not configured. Go to System Settings → Operation Settings to enable self-use mode, or configure the model price in System Settings → Group & Model Pricing." ,
modelName , modelName ,
)
}
return fmt . Errorf (
"模型 %s 的价格尚未由管理员配置,暂时无法使用,请联系站点管理员开启该模型;" +
"Model %s has not been priced by the administrator yet. Please contact the site administrator to enable this model." ,
modelName , modelName ,
)
}
2025-11-04 00:20:50 +08:00
// https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration
const claudeCacheCreation1hMultiplier = 6 / 3.75
2025-06-22 17:52:48 +08:00
// HandleGroupRatio checks for "auto_group" in the context and updates the group ratio and relayInfo.UsingGroup if present
2025-08-14 20:05:06 +08:00
func HandleGroupRatio ( ctx * gin . Context , relayInfo * relaycommon . RelayInfo ) types . GroupRatioInfo {
groupRatioInfo := types . GroupRatioInfo {
2025-06-17 21:05:35 +08:00
GroupRatio : 1.0 , // default ratio
2025-06-19 15:36:06 +08:00
GroupSpecialRatio : - 1 ,
2025-06-17 21:05:35 +08:00
}
// check auto group
autoGroup , exists := ctx . Get ( "auto_group" )
2025-06-16 22:15:12 +08:00
if exists {
2025-10-28 23:25:43 +08:00
logger . LogDebug ( ctx , fmt . Sprintf ( "final group: %s" , autoGroup ) )
2025-06-22 17:52:48 +08:00
relayInfo . UsingGroup = autoGroup . ( string )
2025-06-16 22:15:12 +08:00
}
2025-06-17 21:05:35 +08:00
// check user group special ratio
2025-06-22 17:52:48 +08:00
userGroupRatio , ok := ratio_setting . GetGroupGroupRatio ( relayInfo . UserGroup , relayInfo . UsingGroup )
2025-06-11 23:46:59 +08:00
if ok {
2025-06-17 21:05:35 +08:00
// user group special ratio
groupRatioInfo . GroupSpecialRatio = userGroupRatio
groupRatioInfo . GroupRatio = userGroupRatio
2025-06-22 17:52:48 +08:00
groupRatioInfo . HasSpecialRatio = true
2025-06-17 21:05:35 +08:00
} else {
// normal group ratio
2025-06-22 17:52:48 +08:00
groupRatioInfo . GroupRatio = ratio_setting . GetGroupRatio ( relayInfo . UsingGroup )
2025-06-11 23:46:59 +08:00
}
2025-06-17 21:05:35 +08:00
return groupRatioInfo
}
2025-08-14 20:05:06 +08:00
func ModelPriceHelper ( c * gin . Context , info * relaycommon . RelayInfo , promptTokens int , meta * types . TokenCountMeta ) ( types . PriceData , error ) {
2025-06-18 18:00:49 +08:00
modelPrice , usePrice := ratio_setting . GetModelPrice ( info . OriginModelName , false )
2025-06-17 21:05:35 +08:00
groupRatioInfo := HandleGroupRatio ( c , info )
2026-03-16 16:00:22 +08:00
// Check if this model uses tiered_expr billing
if billing_setting . GetBillingMode ( info . OriginModelName ) == billing_setting . BillingModeTieredExpr {
return modelPriceHelperTiered ( c , info , promptTokens , meta , groupRatioInfo )
}
2025-02-20 16:41:46 +08:00
var preConsumedQuota int
var modelRatio float64
2025-03-02 15:47:12 +08:00
var completionRatio float64
2025-03-08 01:30:50 +08:00
var cacheRatio float64
2025-04-24 19:25:08 +08:00
var imageRatio float64
2025-03-12 21:31:46 +08:00
var cacheCreationRatio float64
2025-11-04 00:20:50 +08:00
var cacheCreationRatio5m float64
var cacheCreationRatio1h float64
2025-08-30 23:28:09 +08:00
var audioRatio float64
var audioCompletionRatio float64
2025-10-12 13:31:03 +08:00
var freeModel bool
2025-02-20 16:41:46 +08:00
if ! usePrice {
2025-08-14 21:10:04 +08:00
preConsumedTokens := common . Max ( promptTokens , common . PreConsumedQuota )
2025-08-14 20:05:06 +08:00
if meta . MaxTokens != 0 {
2025-08-14 21:10:04 +08:00
preConsumedTokens += meta . MaxTokens
2025-02-20 16:41:46 +08:00
}
2025-02-28 20:28:44 +08:00
var success bool
2025-07-20 10:12:36 +08:00
var matchName string
modelRatio , success , matchName = ratio_setting . GetModelRatio ( info . OriginModelName )
2025-02-28 19:17:15 +08:00
if ! success {
2025-04-03 17:32:48 +08:00
acceptUnsetRatio := false
2025-07-07 14:26:37 +08:00
if info . UserSetting . AcceptUnsetRatioModel {
acceptUnsetRatio = true
2025-04-03 17:32:48 +08:00
}
if ! acceptUnsetRatio {
2026-04-11 17:19:38 +08:00
return types . PriceData { } , modelPriceNotConfiguredError ( matchName , info . UserId )
2025-03-01 21:13:48 +08:00
}
2025-02-28 19:17:15 +08:00
}
2025-06-18 18:00:49 +08:00
completionRatio = ratio_setting . GetCompletionRatio ( info . OriginModelName )
cacheRatio , _ = ratio_setting . GetCacheRatio ( info . OriginModelName )
cacheCreationRatio , _ = ratio_setting . GetCreateCacheRatio ( info . OriginModelName )
2025-11-04 00:20:50 +08:00
cacheCreationRatio5m = cacheCreationRatio
// 固定1h和5min缓存写入价格的比例
cacheCreationRatio1h = cacheCreationRatio * claudeCacheCreation1hMultiplier
2025-06-18 18:00:49 +08:00
imageRatio , _ = ratio_setting . GetImageRatio ( info . OriginModelName )
2025-08-30 23:28:09 +08:00
audioRatio = ratio_setting . GetAudioRatio ( info . OriginModelName )
audioCompletionRatio = ratio_setting . GetAudioCompletionRatio ( info . OriginModelName )
2025-06-17 21:05:35 +08:00
ratio := modelRatio * groupRatioInfo . GroupRatio
2025-02-20 16:41:46 +08:00
preConsumedQuota = int ( float64 ( preConsumedTokens ) * ratio )
} else {
2025-08-14 20:05:06 +08:00
if meta . ImagePriceRatio != 0 {
modelPrice = modelPrice * meta . ImagePriceRatio
}
2025-06-17 21:05:35 +08:00
preConsumedQuota = int ( modelPrice * common . QuotaPerUnit * groupRatioInfo . GroupRatio )
2025-02-20 16:41:46 +08:00
}
2025-03-17 17:52:54 +08:00
2025-10-12 13:31:03 +08:00
// check if free model pre-consume is disabled
if ! operation_setting . GetQuotaSetting ( ) . EnableFreeModelPreConsume {
// if model price or ratio is 0, do not pre-consume quota
2025-12-02 22:09:48 +08:00
if groupRatioInfo . GroupRatio == 0 {
preConsumedQuota = 0
freeModel = true
} else if usePrice {
2025-10-12 13:31:03 +08:00
if modelPrice == 0 {
preConsumedQuota = 0
freeModel = true
}
} else {
if modelRatio == 0 {
preConsumedQuota = 0
freeModel = true
}
}
}
2025-08-14 20:05:06 +08:00
priceData := types . PriceData {
2025-10-12 13:31:03 +08:00
FreeModel : freeModel ,
ModelPrice : modelPrice ,
ModelRatio : modelRatio ,
CompletionRatio : completionRatio ,
GroupRatioInfo : groupRatioInfo ,
UsePrice : usePrice ,
CacheRatio : cacheRatio ,
ImageRatio : imageRatio ,
AudioRatio : audioRatio ,
AudioCompletionRatio : audioCompletionRatio ,
CacheCreationRatio : cacheCreationRatio ,
2025-11-04 00:20:50 +08:00
CacheCreation5mRatio : cacheCreationRatio5m ,
CacheCreation1hRatio : cacheCreationRatio1h ,
2025-10-12 13:31:03 +08:00
QuotaToPreConsume : preConsumedQuota ,
2025-03-17 17:52:54 +08:00
}
if common . DebugEnabled {
println ( fmt . Sprintf ( "model_price_helper result: %s" , priceData . ToSetting ( ) ) )
}
2025-08-14 20:05:06 +08:00
info . PriceData = priceData
2025-03-17 17:52:54 +08:00
return priceData , nil
2025-02-20 16:41:46 +08:00
}
2025-04-18 19:36:18 +08:00
2026-04-05 20:07:48 +08:00
// ModelPriceHelperPerCall 按次/按量计费的 PriceHelper (MJ、Task)
2026-03-02 19:09:48 +08:00
func ModelPriceHelperPerCall ( c * gin . Context , info * relaycommon . RelayInfo ) ( types . PriceData , error ) {
2025-08-14 21:10:04 +08:00
groupRatioInfo := HandleGroupRatio ( c , info )
modelPrice , success := ratio_setting . GetModelPrice ( info . OriginModelName , true )
2026-04-05 20:07:48 +08:00
usePrice := success
var modelRatio float64
2026-03-02 19:09:48 +08:00
2026-04-05 20:07:48 +08:00
if ! success {
2025-10-10 23:56:36 +08:00
defaultPrice , ok := ratio_setting . GetDefaultModelPriceMap ( ) [ info . OriginModelName ]
2026-03-03 10:56:57 +08:00
if ok {
2025-08-14 21:10:04 +08:00
modelPrice = defaultPrice
2026-04-05 20:07:48 +08:00
usePrice = true
2026-03-03 10:56:57 +08:00
} else {
2026-04-05 20:07:48 +08:00
var ratioSuccess bool
var matchName string
modelRatio , ratioSuccess , matchName = ratio_setting . GetModelRatio ( info . OriginModelName )
2026-03-02 19:09:48 +08:00
acceptUnsetRatio := false
if info . UserSetting . AcceptUnsetRatioModel {
acceptUnsetRatio = true
}
2026-03-03 10:56:57 +08:00
if ! ratioSuccess && ! acceptUnsetRatio {
2026-04-11 17:19:38 +08:00
return types . PriceData { } , modelPriceNotConfiguredError ( matchName , info . UserId )
2026-03-02 19:09:48 +08:00
}
}
2025-08-14 21:10:04 +08:00
}
refactor(task): extract billing and polling logic from controller to service layer
Restructure the task relay system for better separation of concerns:
- Extract task billing into service/task_billing.go with unified settlement flow
- Move task polling loop from controller to service/task_polling.go (supports Suno + video platforms)
- Split RelayTask into fetch/submit paths with dedicated retry logic (taskSubmitWithRetry)
- Add TaskDto, TaskResponse generics, and FetchReq to dto/task.go
- Add taskcommon/helpers.go for shared task adaptor utilities
- Remove controller/task_video.go (logic consolidated into service layer)
- Update all task adaptors (ali, doubao, gemini, hailuo, jimeng, kling, sora, suno, vertex, vidu)
- Simplify frontend task logs to use new TaskDto response format
2026-02-10 20:40:33 +08:00
2026-04-05 20:07:48 +08:00
var quota int
refactor(task): extract billing and polling logic from controller to service layer
Restructure the task relay system for better separation of concerns:
- Extract task billing into service/task_billing.go with unified settlement flow
- Move task polling loop from controller to service/task_polling.go (supports Suno + video platforms)
- Split RelayTask into fetch/submit paths with dedicated retry logic (taskSubmitWithRetry)
- Add TaskDto, TaskResponse generics, and FetchReq to dto/task.go
- Add taskcommon/helpers.go for shared task adaptor utilities
- Remove controller/task_video.go (logic consolidated into service layer)
- Update all task adaptors (ali, doubao, gemini, hailuo, jimeng, kling, sora, suno, vertex, vidu)
- Simplify frontend task logs to use new TaskDto response format
2026-02-10 20:40:33 +08:00
freeModel := false
2026-04-05 20:07:48 +08:00
if usePrice {
quota = int ( modelPrice * common . QuotaPerUnit * groupRatioInfo . GroupRatio )
if ! operation_setting . GetQuotaSetting ( ) . EnableFreeModelPreConsume {
if groupRatioInfo . GroupRatio == 0 || modelPrice == 0 {
quota = 0
freeModel = true
}
}
} else {
// 按量计费:以模型倍率的一半作为预扣额度
quota = int ( modelRatio / 2 * common . QuotaPerUnit * groupRatioInfo . GroupRatio )
modelPrice = - 1
if ! operation_setting . GetQuotaSetting ( ) . EnableFreeModelPreConsume {
if groupRatioInfo . GroupRatio == 0 || modelRatio == 0 {
quota = 0
freeModel = true
}
refactor(task): extract billing and polling logic from controller to service layer
Restructure the task relay system for better separation of concerns:
- Extract task billing into service/task_billing.go with unified settlement flow
- Move task polling loop from controller to service/task_polling.go (supports Suno + video platforms)
- Split RelayTask into fetch/submit paths with dedicated retry logic (taskSubmitWithRetry)
- Add TaskDto, TaskResponse generics, and FetchReq to dto/task.go
- Add taskcommon/helpers.go for shared task adaptor utilities
- Remove controller/task_video.go (logic consolidated into service layer)
- Update all task adaptors (ali, doubao, gemini, hailuo, jimeng, kling, sora, suno, vertex, vidu)
- Simplify frontend task logs to use new TaskDto response format
2026-02-10 20:40:33 +08:00
}
}
priceData := types . PriceData {
FreeModel : freeModel ,
2025-08-14 21:10:04 +08:00
ModelPrice : modelPrice ,
2026-04-05 20:07:48 +08:00
ModelRatio : modelRatio ,
UsePrice : usePrice ,
2025-08-14 21:10:04 +08:00
Quota : quota ,
GroupRatioInfo : groupRatioInfo ,
}
2026-03-02 19:09:48 +08:00
return priceData , nil
2025-08-14 21:10:04 +08:00
}
2025-06-22 17:52:48 +08:00
2026-04-24 16:39:12 +08:00
func HasModelBillingConfig ( modelName string ) bool {
if _ , ok := ratio_setting . GetModelPrice ( modelName , false ) ; ok {
return true
}
if _ , ok , _ := ratio_setting . GetModelRatio ( modelName ) ; ok {
return true
}
if billing_setting . GetBillingMode ( modelName ) != billing_setting . BillingModeTieredExpr {
return false
}
expr , ok := billing_setting . GetBillingExpr ( modelName )
return ok && strings . TrimSpace ( expr ) != ""
2025-04-18 19:36:18 +08:00
}
2026-03-16 16:00:22 +08:00
func modelPriceHelperTiered ( c * gin . Context , info * relaycommon . RelayInfo , promptTokens int , meta * types . TokenCountMeta , groupRatioInfo types . GroupRatioInfo ) ( types . PriceData , error ) {
exprStr , ok := billing_setting . GetBillingExpr ( info . OriginModelName )
if ! ok {
return types . PriceData { } , fmt . Errorf ( "model %s is configured as tiered_expr but has no billing expression" , info . OriginModelName )
}
estimatedCompletionTokens := 0
if meta . MaxTokens != 0 {
estimatedCompletionTokens = meta . MaxTokens
}
requestInput , err := ResolveIncomingBillingExprRequestInput ( c , info )
if err != nil {
return types . PriceData { } , err
}
2026-03-16 20:11:55 +08:00
rawCost , trace , err := billingexpr . RunExprWithRequest ( exprStr , billingexpr . TokenParams {
2026-04-25 13:24:20 +08:00
P : float64 ( promptTokens ) ,
C : float64 ( estimatedCompletionTokens ) ,
Len : float64 ( promptTokens ) ,
2026-03-16 16:00:22 +08:00
} , requestInput )
if err != nil {
return types . PriceData { } , fmt . Errorf ( "model %s tiered expr run failed: %w" , info . OriginModelName , err )
}
2026-03-16 20:11:55 +08:00
// Expression coefficients are $/1M tokens prices; convert to quota the same way per-call billing does.
quotaBeforeGroup := rawCost / 1_000_000 * common . QuotaPerUnit
preConsumedQuota := billingexpr . QuotaRound ( quotaBeforeGroup * groupRatioInfo . GroupRatio )
2026-03-16 16:00:22 +08:00
freeModel := false
if ! operation_setting . GetQuotaSetting ( ) . EnableFreeModelPreConsume {
2026-04-24 00:33:54 +08:00
if groupRatioInfo . GroupRatio == 0 {
2026-03-16 16:00:22 +08:00
preConsumedQuota = 0
freeModel = true
}
}
exprHash := billingexpr . ExprHashString ( exprStr )
snapshot := & billingexpr . BillingSnapshot {
BillingMode : billing_setting . BillingModeTieredExpr ,
ModelName : info . OriginModelName ,
ExprString : exprStr ,
ExprHash : exprHash ,
GroupRatio : groupRatioInfo . GroupRatio ,
EstimatedPromptTokens : promptTokens ,
EstimatedCompletionTokens : estimatedCompletionTokens ,
2026-03-16 20:11:55 +08:00
EstimatedQuotaBeforeGroup : quotaBeforeGroup ,
2026-03-16 16:00:22 +08:00
EstimatedQuotaAfterGroup : preConsumedQuota ,
EstimatedTier : trace . MatchedTier ,
2026-03-16 20:11:55 +08:00
QuotaPerUnit : common . QuotaPerUnit ,
2026-03-17 15:29:43 +08:00
ExprVersion : billingexpr . ExprVersion ( exprStr ) ,
2026-03-16 16:00:22 +08:00
}
info . TieredBillingSnapshot = snapshot
info . BillingRequestInput = & requestInput
priceData := types . PriceData {
FreeModel : freeModel ,
GroupRatioInfo : groupRatioInfo ,
QuotaToPreConsume : preConsumedQuota ,
}
if common . DebugEnabled {
2026-03-16 20:11:55 +08:00
println ( fmt . Sprintf ( "model_price_helper_tiered result: model=%s preConsume=%d quotaBeforeGroup=%.2f groupRatio=%.2f tier=%s" , info . OriginModelName , preConsumedQuota , quotaBeforeGroup , groupRatioInfo . GroupRatio , trace . MatchedTier ) )
2026-03-16 16:00:22 +08:00
}
info . PriceData = priceData
return priceData , nil
}