new-api/relay/helper/price.go
CaIon 4d2993e4cc
Some checks failed
Release (Linux, macOS, Windows) / Linux Release (push) Has been cancelled
Release (Linux, macOS, Windows) / macOS Release (push) Has been cancelled
Release (Linux, macOS, Windows) / Windows Release (push) Has been cancelled
Merge remote-tracking branch 'origin/main' into nightly
# Conflicts:
#	web/src/helpers/render.jsx
#	web/src/hooks/usage-logs/useUsageLogsData.jsx
#	web/src/i18n/locales/en.json
2026-04-09 17:12:21 +08:00

293 lines
9.9 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package helper
import (
"fmt"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/logger"
"github.com/QuantumNous/new-api/pkg/billingexpr"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/setting/billing_setting"
"github.com/QuantumNous/new-api/setting/operation_setting"
"github.com/QuantumNous/new-api/setting/ratio_setting"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
)
// https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration
const claudeCacheCreation1hMultiplier = 6 / 3.75
// HandleGroupRatio checks for "auto_group" in the context and updates the group ratio and relayInfo.UsingGroup if present
func HandleGroupRatio(ctx *gin.Context, relayInfo *relaycommon.RelayInfo) types.GroupRatioInfo {
groupRatioInfo := types.GroupRatioInfo{
GroupRatio: 1.0, // default ratio
GroupSpecialRatio: -1,
}
// check auto group
autoGroup, exists := ctx.Get("auto_group")
if exists {
logger.LogDebug(ctx, fmt.Sprintf("final group: %s", autoGroup))
relayInfo.UsingGroup = autoGroup.(string)
}
// check user group special ratio
userGroupRatio, ok := ratio_setting.GetGroupGroupRatio(relayInfo.UserGroup, relayInfo.UsingGroup)
if ok {
// user group special ratio
groupRatioInfo.GroupSpecialRatio = userGroupRatio
groupRatioInfo.GroupRatio = userGroupRatio
groupRatioInfo.HasSpecialRatio = true
} else {
// normal group ratio
groupRatioInfo.GroupRatio = ratio_setting.GetGroupRatio(relayInfo.UsingGroup)
}
return groupRatioInfo
}
func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens int, meta *types.TokenCountMeta) (types.PriceData, error) {
modelPrice, usePrice := ratio_setting.GetModelPrice(info.OriginModelName, false)
groupRatioInfo := HandleGroupRatio(c, info)
// Check if this model uses tiered_expr billing
if billing_setting.GetBillingMode(info.OriginModelName) == billing_setting.BillingModeTieredExpr {
return modelPriceHelperTiered(c, info, promptTokens, meta, groupRatioInfo)
}
var preConsumedQuota int
var modelRatio float64
var completionRatio float64
var cacheRatio float64
var imageRatio float64
var cacheCreationRatio float64
var cacheCreationRatio5m float64
var cacheCreationRatio1h float64
var audioRatio float64
var audioCompletionRatio float64
var freeModel bool
if !usePrice {
preConsumedTokens := common.Max(promptTokens, common.PreConsumedQuota)
if meta.MaxTokens != 0 {
preConsumedTokens += meta.MaxTokens
}
var success bool
var matchName string
modelRatio, success, matchName = ratio_setting.GetModelRatio(info.OriginModelName)
if !success {
acceptUnsetRatio := false
if info.UserSetting.AcceptUnsetRatioModel {
acceptUnsetRatio = true
}
if !acceptUnsetRatio {
return types.PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置请联系管理员设置或开始自用模式Model %s ratio or price not set, please set or start self-use mode", matchName, matchName)
}
}
completionRatio = ratio_setting.GetCompletionRatio(info.OriginModelName)
cacheRatio, _ = ratio_setting.GetCacheRatio(info.OriginModelName)
cacheCreationRatio, _ = ratio_setting.GetCreateCacheRatio(info.OriginModelName)
cacheCreationRatio5m = cacheCreationRatio
// 固定1h和5min缓存写入价格的比例
cacheCreationRatio1h = cacheCreationRatio * claudeCacheCreation1hMultiplier
imageRatio, _ = ratio_setting.GetImageRatio(info.OriginModelName)
audioRatio = ratio_setting.GetAudioRatio(info.OriginModelName)
audioCompletionRatio = ratio_setting.GetAudioCompletionRatio(info.OriginModelName)
ratio := modelRatio * groupRatioInfo.GroupRatio
preConsumedQuota = int(float64(preConsumedTokens) * ratio)
} else {
if meta.ImagePriceRatio != 0 {
modelPrice = modelPrice * meta.ImagePriceRatio
}
preConsumedQuota = int(modelPrice * common.QuotaPerUnit * groupRatioInfo.GroupRatio)
}
// check if free model pre-consume is disabled
if !operation_setting.GetQuotaSetting().EnableFreeModelPreConsume {
// if model price or ratio is 0, do not pre-consume quota
if groupRatioInfo.GroupRatio == 0 {
preConsumedQuota = 0
freeModel = true
} else if usePrice {
if modelPrice == 0 {
preConsumedQuota = 0
freeModel = true
}
} else {
if modelRatio == 0 {
preConsumedQuota = 0
freeModel = true
}
}
}
priceData := types.PriceData{
FreeModel: freeModel,
ModelPrice: modelPrice,
ModelRatio: modelRatio,
CompletionRatio: completionRatio,
GroupRatioInfo: groupRatioInfo,
UsePrice: usePrice,
CacheRatio: cacheRatio,
ImageRatio: imageRatio,
AudioRatio: audioRatio,
AudioCompletionRatio: audioCompletionRatio,
CacheCreationRatio: cacheCreationRatio,
CacheCreation5mRatio: cacheCreationRatio5m,
CacheCreation1hRatio: cacheCreationRatio1h,
QuotaToPreConsume: preConsumedQuota,
}
if common.DebugEnabled {
println(fmt.Sprintf("model_price_helper result: %s", priceData.ToSetting()))
}
info.PriceData = priceData
return priceData, nil
}
// ModelPriceHelperPerCall 按次/按量计费的 PriceHelper (MJ、Task)
func ModelPriceHelperPerCall(c *gin.Context, info *relaycommon.RelayInfo) (types.PriceData, error) {
groupRatioInfo := HandleGroupRatio(c, info)
modelPrice, success := ratio_setting.GetModelPrice(info.OriginModelName, true)
usePrice := success
var modelRatio float64
if !success {
defaultPrice, ok := ratio_setting.GetDefaultModelPriceMap()[info.OriginModelName]
if ok {
modelPrice = defaultPrice
usePrice = true
} else {
var ratioSuccess bool
var matchName string
modelRatio, ratioSuccess, matchName = ratio_setting.GetModelRatio(info.OriginModelName)
acceptUnsetRatio := false
if info.UserSetting.AcceptUnsetRatioModel {
acceptUnsetRatio = true
}
if !ratioSuccess && !acceptUnsetRatio {
return types.PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置请联系管理员设置或开始自用模式Model %s ratio or price not set, please set or start self-use mode", matchName, matchName)
}
}
}
var quota int
freeModel := false
if usePrice {
quota = int(modelPrice * common.QuotaPerUnit * groupRatioInfo.GroupRatio)
if !operation_setting.GetQuotaSetting().EnableFreeModelPreConsume {
if groupRatioInfo.GroupRatio == 0 || modelPrice == 0 {
quota = 0
freeModel = true
}
}
} else {
// 按量计费:以模型倍率的一半作为预扣额度
quota = int(modelRatio / 2 * common.QuotaPerUnit * groupRatioInfo.GroupRatio)
modelPrice = -1
if !operation_setting.GetQuotaSetting().EnableFreeModelPreConsume {
if groupRatioInfo.GroupRatio == 0 || modelRatio == 0 {
quota = 0
freeModel = true
}
}
}
priceData := types.PriceData{
FreeModel: freeModel,
ModelPrice: modelPrice,
ModelRatio: modelRatio,
UsePrice: usePrice,
Quota: quota,
GroupRatioInfo: groupRatioInfo,
}
return priceData, nil
}
func ContainPriceOrRatio(modelName string) bool {
_, ok := ratio_setting.GetModelPrice(modelName, false)
if ok {
return true
}
_, ok, _ = ratio_setting.GetModelRatio(modelName)
if ok {
return true
}
if billing_setting.GetBillingMode(modelName) == billing_setting.BillingModeTieredExpr {
_, ok = billing_setting.GetBillingExpr(modelName)
return ok
}
return false
}
func modelPriceHelperTiered(c *gin.Context, info *relaycommon.RelayInfo, promptTokens int, meta *types.TokenCountMeta, groupRatioInfo types.GroupRatioInfo) (types.PriceData, error) {
exprStr, ok := billing_setting.GetBillingExpr(info.OriginModelName)
if !ok {
return types.PriceData{}, fmt.Errorf("model %s is configured as tiered_expr but has no billing expression", info.OriginModelName)
}
estimatedCompletionTokens := 0
if meta.MaxTokens != 0 {
estimatedCompletionTokens = meta.MaxTokens
}
requestInput, err := ResolveIncomingBillingExprRequestInput(c, info)
if err != nil {
return types.PriceData{}, err
}
rawCost, trace, err := billingexpr.RunExprWithRequest(exprStr, billingexpr.TokenParams{
P: float64(promptTokens),
C: float64(estimatedCompletionTokens),
}, requestInput)
if err != nil {
return types.PriceData{}, fmt.Errorf("model %s tiered expr run failed: %w", info.OriginModelName, err)
}
// Expression coefficients are $/1M tokens prices; convert to quota the same way per-call billing does.
quotaBeforeGroup := rawCost / 1_000_000 * common.QuotaPerUnit
preConsumedQuota := billingexpr.QuotaRound(quotaBeforeGroup * groupRatioInfo.GroupRatio)
freeModel := false
if !operation_setting.GetQuotaSetting().EnableFreeModelPreConsume {
if groupRatioInfo.GroupRatio == 0 || quotaBeforeGroup == 0 {
preConsumedQuota = 0
freeModel = true
}
}
exprHash := billingexpr.ExprHashString(exprStr)
snapshot := &billingexpr.BillingSnapshot{
BillingMode: billing_setting.BillingModeTieredExpr,
ModelName: info.OriginModelName,
ExprString: exprStr,
ExprHash: exprHash,
GroupRatio: groupRatioInfo.GroupRatio,
EstimatedPromptTokens: promptTokens,
EstimatedCompletionTokens: estimatedCompletionTokens,
EstimatedQuotaBeforeGroup: quotaBeforeGroup,
EstimatedQuotaAfterGroup: preConsumedQuota,
EstimatedTier: trace.MatchedTier,
QuotaPerUnit: common.QuotaPerUnit,
ExprVersion: billingexpr.ExprVersion(exprStr),
}
info.TieredBillingSnapshot = snapshot
info.BillingRequestInput = &requestInput
priceData := types.PriceData{
FreeModel: freeModel,
GroupRatioInfo: groupRatioInfo,
QuotaToPreConsume: preConsumedQuota,
}
if common.DebugEnabled {
println(fmt.Sprintf("model_price_helper_tiered result: model=%s preConsume=%d quotaBeforeGroup=%.2f groupRatio=%.2f tier=%s", info.OriginModelName, preConsumedQuota, quotaBeforeGroup, groupRatioInfo.GroupRatio, trace.MatchedTier))
}
info.PriceData = priceData
return priceData, nil
}