fix: restore pre-3400 OpenRouter billing semantics
This commit is contained in:
parent
dbf900a531
commit
9f61407bf0
@ -113,8 +113,10 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf
|
|||||||
summary.ImageTokens = usage.PromptTokensDetails.ImageTokens
|
summary.ImageTokens = usage.PromptTokensDetails.ImageTokens
|
||||||
summary.AudioTokens = usage.PromptTokensDetails.AudioTokens
|
summary.AudioTokens = usage.PromptTokensDetails.AudioTokens
|
||||||
legacyClaudeDerived := isLegacyClaudeDerivedOpenAIUsage(relayInfo, usage)
|
legacyClaudeDerived := isLegacyClaudeDerivedOpenAIUsage(relayInfo, usage)
|
||||||
|
isOpenRouter := relayInfo.ChannelMeta != nil && relayInfo.ChannelType == constant.ChannelTypeOpenRouter
|
||||||
|
isOpenRouterClaudeBilling := isOpenRouter && summary.IsClaudeUsageSemantic
|
||||||
|
|
||||||
if relayInfo.ChannelMeta != nil && relayInfo.ChannelType == constant.ChannelTypeOpenRouter {
|
if isOpenRouterClaudeBilling {
|
||||||
summary.PromptTokens -= summary.CacheTokens
|
summary.PromptTokens -= summary.CacheTokens
|
||||||
isUsingCustomSettings := relayInfo.PriceData.UsePrice || hasCustomModelRatio(summary.ModelName, relayInfo.PriceData.ModelRatio)
|
isUsingCustomSettings := relayInfo.PriceData.UsePrice || hasCustomModelRatio(summary.ModelName, relayInfo.PriceData.ModelRatio)
|
||||||
if summary.CacheCreationTokens == 0 && relayInfo.PriceData.CacheCreationRatio != 1 && usage.Cost != 0 && !isUsingCustomSettings {
|
if summary.CacheCreationTokens == 0 && relayInfo.PriceData.CacheCreationRatio != 1 && usage.Cost != 0 && !isUsingCustomSettings {
|
||||||
@ -197,7 +199,7 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf
|
|||||||
|
|
||||||
var cachedTokensWithRatio decimal.Decimal
|
var cachedTokensWithRatio decimal.Decimal
|
||||||
if !dCacheTokens.IsZero() {
|
if !dCacheTokens.IsZero() {
|
||||||
if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived {
|
if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived && !isOpenRouter {
|
||||||
baseTokens = baseTokens.Sub(dCacheTokens)
|
baseTokens = baseTokens.Sub(dCacheTokens)
|
||||||
}
|
}
|
||||||
cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio)
|
cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio)
|
||||||
@ -206,7 +208,7 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf
|
|||||||
var cachedCreationTokensWithRatio decimal.Decimal
|
var cachedCreationTokensWithRatio decimal.Decimal
|
||||||
hasSplitCacheCreationTokens := summary.CacheCreationTokens5m > 0 || summary.CacheCreationTokens1h > 0
|
hasSplitCacheCreationTokens := summary.CacheCreationTokens5m > 0 || summary.CacheCreationTokens1h > 0
|
||||||
if !dCachedCreationTokens.IsZero() || hasSplitCacheCreationTokens {
|
if !dCachedCreationTokens.IsZero() || hasSplitCacheCreationTokens {
|
||||||
if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived {
|
if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived && !isOpenRouter {
|
||||||
baseTokens = baseTokens.Sub(dCachedCreationTokens)
|
baseTokens = baseTokens.Sub(dCachedCreationTokens)
|
||||||
cachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCacheCreationRatio)
|
cachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCacheCreationRatio)
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@ -5,6 +5,7 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/QuantumNous/new-api/constant"
|
||||||
"github.com/QuantumNous/new-api/dto"
|
"github.com/QuantumNous/new-api/dto"
|
||||||
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
||||||
"github.com/QuantumNous/new-api/types"
|
"github.com/QuantumNous/new-api/types"
|
||||||
@ -204,3 +205,113 @@ func TestCalculateTextQuotaSummaryHandlesLegacyClaudeDerivedOpenAIUsage(t *testi
|
|||||||
// 62 + 3544*0.1 + 586*1.25 + 95*5 = 1624.9 => 1624
|
// 62 + 3544*0.1 + 586*1.25 + 95*5 = 1624.9 => 1624
|
||||||
require.Equal(t, 1624, summary.Quota)
|
require.Equal(t, 1624, summary.Quota)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCalculateTextQuotaSummaryDoesNotSubtractOpenRouterCacheTokensFromPrompt(t *testing.T) {
|
||||||
|
gin.SetMode(gin.TestMode)
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
ctx, _ := gin.CreateTestContext(w)
|
||||||
|
|
||||||
|
relayInfo := &relaycommon.RelayInfo{
|
||||||
|
OriginModelName: "openai/gpt-4.1",
|
||||||
|
ChannelMeta: &relaycommon.ChannelMeta{
|
||||||
|
ChannelType: constant.ChannelTypeOpenRouter,
|
||||||
|
},
|
||||||
|
PriceData: types.PriceData{
|
||||||
|
ModelRatio: 1,
|
||||||
|
CompletionRatio: 1,
|
||||||
|
CacheRatio: 0.1,
|
||||||
|
CacheCreationRatio: 1.25,
|
||||||
|
GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
|
||||||
|
},
|
||||||
|
StartTime: time.Now(),
|
||||||
|
}
|
||||||
|
|
||||||
|
usage := &dto.Usage{
|
||||||
|
PromptTokens: 2604,
|
||||||
|
CompletionTokens: 383,
|
||||||
|
PromptTokensDetails: dto.InputTokenDetails{
|
||||||
|
CachedTokens: 2432,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
|
||||||
|
|
||||||
|
// OpenRouter usage is already normalized. prompt_tokens should stay intact.
|
||||||
|
// quota = 2604 + 2432*0.1 + 383 = 3230.2 => 3230
|
||||||
|
require.Equal(t, 2604, summary.PromptTokens)
|
||||||
|
require.Equal(t, 3230, summary.Quota)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCalculateTextQuotaSummaryDoesNotSubtractOpenRouterCacheCreationTokensFromPrompt(t *testing.T) {
|
||||||
|
gin.SetMode(gin.TestMode)
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
ctx, _ := gin.CreateTestContext(w)
|
||||||
|
|
||||||
|
relayInfo := &relaycommon.RelayInfo{
|
||||||
|
OriginModelName: "openai/gpt-4.1",
|
||||||
|
ChannelMeta: &relaycommon.ChannelMeta{
|
||||||
|
ChannelType: constant.ChannelTypeOpenRouter,
|
||||||
|
},
|
||||||
|
PriceData: types.PriceData{
|
||||||
|
ModelRatio: 1,
|
||||||
|
CompletionRatio: 1,
|
||||||
|
CacheCreationRatio: 1.25,
|
||||||
|
GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
|
||||||
|
},
|
||||||
|
StartTime: time.Now(),
|
||||||
|
}
|
||||||
|
|
||||||
|
usage := &dto.Usage{
|
||||||
|
PromptTokens: 2604,
|
||||||
|
CompletionTokens: 383,
|
||||||
|
PromptTokensDetails: dto.InputTokenDetails{
|
||||||
|
CachedCreationTokens: 100,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
|
||||||
|
|
||||||
|
// OpenRouter usage is already normalized. prompt_tokens should stay intact.
|
||||||
|
// quota = 2604 + 100*1.25 + 383 = 3112
|
||||||
|
require.Equal(t, 2604, summary.PromptTokens)
|
||||||
|
require.Equal(t, 3112, summary.Quota)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCalculateTextQuotaSummaryKeepsPrePRClaudeOpenRouterBilling(t *testing.T) {
|
||||||
|
gin.SetMode(gin.TestMode)
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
ctx, _ := gin.CreateTestContext(w)
|
||||||
|
|
||||||
|
relayInfo := &relaycommon.RelayInfo{
|
||||||
|
FinalRequestRelayFormat: types.RelayFormatClaude,
|
||||||
|
OriginModelName: "anthropic/claude-3.7-sonnet",
|
||||||
|
ChannelMeta: &relaycommon.ChannelMeta{
|
||||||
|
ChannelType: constant.ChannelTypeOpenRouter,
|
||||||
|
},
|
||||||
|
PriceData: types.PriceData{
|
||||||
|
ModelRatio: 1,
|
||||||
|
CompletionRatio: 1,
|
||||||
|
CacheRatio: 0.1,
|
||||||
|
CacheCreationRatio: 1.25,
|
||||||
|
GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
|
||||||
|
},
|
||||||
|
StartTime: time.Now(),
|
||||||
|
}
|
||||||
|
|
||||||
|
usage := &dto.Usage{
|
||||||
|
PromptTokens: 2604,
|
||||||
|
CompletionTokens: 383,
|
||||||
|
PromptTokensDetails: dto.InputTokenDetails{
|
||||||
|
CachedTokens: 2432,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
|
||||||
|
|
||||||
|
// Pre-PR PostClaudeConsumeQuota behavior for OpenRouter:
|
||||||
|
// prompt = 2604 - 2432 = 172
|
||||||
|
// quota = 172 + 2432*0.1 + 383 = 798.2 => 798
|
||||||
|
require.True(t, summary.IsClaudeUsageSemantic)
|
||||||
|
require.Equal(t, 172, summary.PromptTokens)
|
||||||
|
require.Equal(t, 798, summary.Quota)
|
||||||
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user