diff --git a/pkg/billingexpr/billingexpr_test.go b/pkg/billingexpr/billingexpr_test.go index 0ca47ab6..652df0a4 100644 --- a/pkg/billingexpr/billingexpr_test.go +++ b/pkg/billingexpr/billingexpr_test.go @@ -313,9 +313,10 @@ func TestComputeTieredQuota_Basic(t *testing.T) { GroupRatio: 1.0, EstimatedPromptTokens: 100000, EstimatedCompletionTokens: 5000, - EstimatedQuotaBeforeGroup: 100000*1.5 + 5000*7.5, - EstimatedQuotaAfterGroup: billingexpr.QuotaRound(100000*1.5 + 5000*7.5), + EstimatedQuotaBeforeGroup: (100000*1.5 + 5000*7.5) / 1_000_000 * 500_000, + EstimatedQuotaAfterGroup: billingexpr.QuotaRound((100000*1.5 + 5000*7.5) / 1_000_000 * 500_000), EstimatedTier: "standard", + QuotaPerUnit: 500_000, } result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 300000, C: 10000}) @@ -323,7 +324,7 @@ func TestComputeTieredQuota_Basic(t *testing.T) { t.Fatal(err) } - wantBefore := 300000*3.0 + 10000*11.25 + wantBefore := (300000*3.0 + 10000*11.25) / 1_000_000 * 500_000 if math.Abs(result.ActualQuotaBeforeGroup-wantBefore) > 1e-6 { t.Errorf("before group: got %f, want %f", result.ActualQuotaBeforeGroup, wantBefore) } @@ -343,9 +344,10 @@ func TestComputeTieredQuota_SameTier(t *testing.T) { GroupRatio: 1.5, EstimatedPromptTokens: 50000, EstimatedCompletionTokens: 1000, - EstimatedQuotaBeforeGroup: 50000*1.5 + 1000*7.5, - EstimatedQuotaAfterGroup: billingexpr.QuotaRound((50000*1.5 + 1000*7.5) * 1.5), + EstimatedQuotaBeforeGroup: (50000*1.5 + 1000*7.5) / 1_000_000 * 500_000, + EstimatedQuotaAfterGroup: billingexpr.QuotaRound((50000*1.5 + 1000*7.5) / 1_000_000 * 500_000 * 1.5), EstimatedTier: "standard", + QuotaPerUnit: 500_000, } result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 80000, C: 2000}) @@ -353,7 +355,7 @@ func TestComputeTieredQuota_SameTier(t *testing.T) { t.Fatal(err) } - wantBefore := 80000*1.5 + 2000*7.5 + wantBefore := (80000*1.5 + 2000*7.5) / 1_000_000 * 500_000 wantAfter := billingexpr.QuotaRound(wantBefore * 1.5) if result.ActualQuotaAfterGroup != wantAfter { t.Errorf("after group: got %d, want %d", result.ActualQuotaAfterGroup, wantAfter) @@ -534,9 +536,10 @@ func TestComputeTieredQuota_WithCache(t *testing.T) { GroupRatio: 1.0, EstimatedPromptTokens: 100000, EstimatedCompletionTokens: 5000, - EstimatedQuotaBeforeGroup: 100000*1.5 + 5000*7.5, - EstimatedQuotaAfterGroup: billingexpr.QuotaRound(100000*1.5 + 5000*7.5), + EstimatedQuotaBeforeGroup: (100000*1.5 + 5000*7.5) / 1_000_000 * 500_000, + EstimatedQuotaAfterGroup: billingexpr.QuotaRound((100000*1.5 + 5000*7.5) / 1_000_000 * 500_000), EstimatedTier: "standard", + QuotaPerUnit: 500_000, } params := billingexpr.TokenParams{P: 100000, C: 5000, CR: 50000, CC: 10000} @@ -545,7 +548,7 @@ func TestComputeTieredQuota_WithCache(t *testing.T) { t.Fatal(err) } - wantBefore := 100000*1.5 + 5000*7.5 + 50000*0.15 + 10000*1.875 + wantBefore := (100000*1.5 + 5000*7.5 + 50000*0.15 + 10000*1.875) / 1_000_000 * 500_000 if math.Abs(result.ActualQuotaBeforeGroup-wantBefore) > 1e-6 { t.Errorf("before group: got %f, want %f", result.ActualQuotaBeforeGroup, wantBefore) } @@ -565,9 +568,10 @@ func TestComputeTieredQuota_WithCacheCrossTier(t *testing.T) { GroupRatio: 2.0, EstimatedPromptTokens: 100000, EstimatedCompletionTokens: 5000, - EstimatedQuotaBeforeGroup: 100000*1.5 + 5000*7.5, - EstimatedQuotaAfterGroup: billingexpr.QuotaRound((100000*1.5 + 5000*7.5) * 2.0), + EstimatedQuotaBeforeGroup: (100000*1.5 + 5000*7.5) / 1_000_000 * 500_000, + EstimatedQuotaAfterGroup: billingexpr.QuotaRound((100000*1.5 + 5000*7.5) / 1_000_000 * 500_000 * 2.0), EstimatedTier: "standard", + QuotaPerUnit: 500_000, } params := billingexpr.TokenParams{P: 300000, C: 10000, CR: 50000, CC: 10000} @@ -576,7 +580,7 @@ func TestComputeTieredQuota_WithCacheCrossTier(t *testing.T) { t.Fatal(err) } - wantBefore := 300000*3.0 + 10000*11.25 + 50000*0.3 + 10000*3.75 + wantBefore := (300000*3.0 + 10000*11.25 + 50000*0.3 + 10000*3.75) / 1_000_000 * 500_000 wantAfter := billingexpr.QuotaRound(wantBefore * 2.0) if math.Abs(result.ActualQuotaBeforeGroup-wantBefore) > 1e-6 { t.Errorf("before group: got %f, want %f", result.ActualQuotaBeforeGroup, wantBefore) @@ -646,6 +650,7 @@ func TestFuzz_SettlementConsistency(t *testing.T) { estCost, estTrace, _ := billingexpr.RunExpr(claudeWithCacheExpr, estParams) + const qpu = 500_000.0 snap := &billingexpr.BillingSnapshot{ BillingMode: "tiered_expr", ExprString: claudeWithCacheExpr, @@ -653,9 +658,10 @@ func TestFuzz_SettlementConsistency(t *testing.T) { GroupRatio: groupRatio, EstimatedPromptTokens: int(estParams.P), EstimatedCompletionTokens: int(estParams.C), - EstimatedQuotaBeforeGroup: estCost, - EstimatedQuotaAfterGroup: billingexpr.QuotaRound(estCost * groupRatio), + EstimatedQuotaBeforeGroup: estCost / 1_000_000 * qpu, + EstimatedQuotaAfterGroup: billingexpr.QuotaRound(estCost / 1_000_000 * qpu * groupRatio), EstimatedTier: estTrace.MatchedTier, + QuotaPerUnit: qpu, } result, err := billingexpr.ComputeTieredQuota(snap, actParams) @@ -664,7 +670,7 @@ func TestFuzz_SettlementConsistency(t *testing.T) { } directCost, _, _ := billingexpr.RunExpr(claudeWithCacheExpr, actParams) - directQuota := billingexpr.QuotaRound(directCost * groupRatio) + directQuota := billingexpr.QuotaRound(directCost / 1_000_000 * qpu * groupRatio) if result.ActualQuotaAfterGroup != directQuota { t.Errorf("iter %d: settlement %d != direct %d", i, result.ActualQuotaAfterGroup, directQuota) @@ -679,21 +685,23 @@ func TestFuzz_SettlementConsistency(t *testing.T) { func TestComputeTieredQuota_BasicSettlement(t *testing.T) { exprStr := `tier("default", p + c)` snap := &billingexpr.BillingSnapshot{ - BillingMode: "tiered_expr", - ExprString: exprStr, - ExprHash: billingexpr.ExprHashString(exprStr), - GroupRatio: 1.0, + BillingMode: "tiered_expr", + ExprString: exprStr, + ExprHash: billingexpr.ExprHashString(exprStr), + GroupRatio: 1.0, + QuotaPerUnit: 500_000, } result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 3000, C: 2000}) if err != nil { t.Fatal(err) } - if math.Abs(result.ActualQuotaBeforeGroup-5000) > 1e-6 { - t.Errorf("before group = %f, want 5000", result.ActualQuotaBeforeGroup) + // exprOutput = 5000; quota = 5000 / 1M * 500K = 2500 + if math.Abs(result.ActualQuotaBeforeGroup-2500) > 1e-6 { + t.Errorf("before group = %f, want 2500", result.ActualQuotaBeforeGroup) } - if result.ActualQuotaAfterGroup != 5000 { - t.Errorf("after group = %d, want 5000", result.ActualQuotaAfterGroup) + if result.ActualQuotaAfterGroup != 2500 { + t.Errorf("after group = %d, want 2500", result.ActualQuotaAfterGroup) } if result.MatchedTier != "default" { t.Errorf("tier = %q, want default", result.MatchedTier) @@ -703,29 +711,31 @@ func TestComputeTieredQuota_BasicSettlement(t *testing.T) { func TestComputeTieredQuota_WithGroupRatio(t *testing.T) { exprStr := `tier("default", p + c)` snap := &billingexpr.BillingSnapshot{ - BillingMode: "tiered_expr", - ExprString: exprStr, - ExprHash: billingexpr.ExprHashString(exprStr), - GroupRatio: 2.0, + BillingMode: "tiered_expr", + ExprString: exprStr, + ExprHash: billingexpr.ExprHashString(exprStr), + GroupRatio: 2.0, + QuotaPerUnit: 500_000, } result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 1000, C: 500}) if err != nil { t.Fatal(err) } - // cost = 1500, after group = round(1500 * 2.0) = 3000 - if result.ActualQuotaAfterGroup != 3000 { - t.Errorf("after group = %d, want 3000", result.ActualQuotaAfterGroup) + // exprOutput = 1500; quotaBeforeGroup = 750; afterGroup = round(750 * 2.0) = 1500 + if result.ActualQuotaAfterGroup != 1500 { + t.Errorf("after group = %d, want 1500", result.ActualQuotaAfterGroup) } } func TestComputeTieredQuota_ZeroTokens(t *testing.T) { exprStr := `tier("default", p * 2 + c * 10)` snap := &billingexpr.BillingSnapshot{ - BillingMode: "tiered_expr", - ExprString: exprStr, - ExprHash: billingexpr.ExprHashString(exprStr), - GroupRatio: 1.0, + BillingMode: "tiered_expr", + ExprString: exprStr, + ExprHash: billingexpr.ExprHashString(exprStr), + GroupRatio: 1.0, + QuotaPerUnit: 500_000, } result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{}) @@ -738,40 +748,42 @@ func TestComputeTieredQuota_ZeroTokens(t *testing.T) { } func TestComputeTieredQuota_RoundingEdge(t *testing.T) { - exprStr := `tier("default", p * 0.5)` // 3 * 0.5 = 1.5 -> round to 2 + exprStr := `tier("default", p * 0.5)` // 3 * 0.5 = 1.5 (expr); 1.5 / 1M * 500K = 0.75; round(0.75) = 1 snap := &billingexpr.BillingSnapshot{ - BillingMode: "tiered_expr", - ExprString: exprStr, - ExprHash: billingexpr.ExprHashString(exprStr), - GroupRatio: 1.0, + BillingMode: "tiered_expr", + ExprString: exprStr, + ExprHash: billingexpr.ExprHashString(exprStr), + GroupRatio: 1.0, + QuotaPerUnit: 500_000, } result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 3}) if err != nil { t.Fatal(err) } - // 3 * 0.5 = 1.5, round(1.5) = 2 - if result.ActualQuotaAfterGroup != 2 { - t.Errorf("after group = %d, want 2 (round 1.5 up)", result.ActualQuotaAfterGroup) + // 3 * 0.5 = 1.5 (expr); quota = 1.5 / 1M * 500K = 0.75; round(0.75) = 1 + if result.ActualQuotaAfterGroup != 1 { + t.Errorf("after group = %d, want 1 (round 0.75 up)", result.ActualQuotaAfterGroup) } } func TestComputeTieredQuota_RoundingEdgeDown(t *testing.T) { - exprStr := `tier("default", p * 0.4)` // 3 * 0.4 = 1.2 -> round to 1 + exprStr := `tier("default", p * 0.4)` // 3 * 0.4 = 1.2 (expr); 1.2 / 1M * 500K = 0.6; round(0.6) = 1 snap := &billingexpr.BillingSnapshot{ - BillingMode: "tiered_expr", - ExprString: exprStr, - ExprHash: billingexpr.ExprHashString(exprStr), - GroupRatio: 1.0, + BillingMode: "tiered_expr", + ExprString: exprStr, + ExprHash: billingexpr.ExprHashString(exprStr), + GroupRatio: 1.0, + QuotaPerUnit: 500_000, } result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 3}) if err != nil { t.Fatal(err) } - // 3 * 0.4 = 1.2, round(1.2) = 1 + // 3 * 0.4 = 1.2 (expr); quota = 1.2 / 1M * 500K = 0.6; round(0.6) = 1 if result.ActualQuotaAfterGroup != 1 { - t.Errorf("after group = %d, want 1 (round 1.2 down)", result.ActualQuotaAfterGroup) + t.Errorf("after group = %d, want 1 (round 0.6 up)", result.ActualQuotaAfterGroup) } } @@ -783,6 +795,7 @@ func TestComputeTieredQuotaWithRequest_ProbeAffectsQuota(t *testing.T) { ExprHash: billingexpr.ExprHashString(exprStr), GroupRatio: 1.0, EstimatedTier: "normal", + QuotaPerUnit: 500_000, } // Without request: normal tier @@ -790,8 +803,9 @@ func TestComputeTieredQuotaWithRequest_ProbeAffectsQuota(t *testing.T) { if err != nil { t.Fatal(err) } - if r1.ActualQuotaAfterGroup != 2000 { - t.Errorf("normal = %d, want 2000", r1.ActualQuotaAfterGroup) + // normal: p*2 = 2000; quota = 2000 / 1M * 500K = 1000 + if r1.ActualQuotaAfterGroup != 1000 { + t.Errorf("normal = %d, want 1000", r1.ActualQuotaAfterGroup) } // With request: fast tier @@ -801,8 +815,9 @@ func TestComputeTieredQuotaWithRequest_ProbeAffectsQuota(t *testing.T) { if err != nil { t.Fatal(err) } - if r2.ActualQuotaAfterGroup != 4000 { - t.Errorf("fast = %d, want 4000", r2.ActualQuotaAfterGroup) + // fast: p*4 = 4000; quota = 4000 / 1M * 500K = 2000 + if r2.ActualQuotaAfterGroup != 2000 { + t.Errorf("fast = %d, want 2000", r2.ActualQuotaAfterGroup) } if !r2.CrossedTier { t.Error("expected CrossedTier = true when probe changes tier") @@ -817,9 +832,10 @@ func TestComputeTieredQuota_BoundaryTierCrossing(t *testing.T) { ExprHash: billingexpr.ExprHashString(exprStr), GroupRatio: 1.0, EstimatedTier: "small", + QuotaPerUnit: 500_000, } - // At boundary + // At boundary: small, p*1 = 100000; quota = 100000 / 1M * 500K = 50000 r1, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 100000}) if err != nil { t.Fatal(err) @@ -827,11 +843,11 @@ func TestComputeTieredQuota_BoundaryTierCrossing(t *testing.T) { if r1.MatchedTier != "small" { t.Errorf("at boundary: tier = %s, want small", r1.MatchedTier) } - if r1.ActualQuotaAfterGroup != 100000 { - t.Errorf("at boundary: quota = %d, want 100000", r1.ActualQuotaAfterGroup) + if r1.ActualQuotaAfterGroup != 50000 { + t.Errorf("at boundary: quota = %d, want 50000", r1.ActualQuotaAfterGroup) } - // Past boundary + // Past boundary: large, p*2 = 200002; quota = 200002 / 1M * 500K = 100001 r2, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 100001}) if err != nil { t.Fatal(err) @@ -839,8 +855,8 @@ func TestComputeTieredQuota_BoundaryTierCrossing(t *testing.T) { if r2.MatchedTier != "large" { t.Errorf("past boundary: tier = %s, want large", r2.MatchedTier) } - if r2.ActualQuotaAfterGroup != 200002 { - t.Errorf("past boundary: quota = %d, want 200002", r2.ActualQuotaAfterGroup) + if r2.ActualQuotaAfterGroup != 100001 { + t.Errorf("past boundary: quota = %d, want 100001", r2.ActualQuotaAfterGroup) } if !r2.CrossedTier { t.Error("expected CrossedTier = true") diff --git a/pkg/billingexpr/settle.go b/pkg/billingexpr/settle.go index 7e69b9e3..98a6d44b 100644 --- a/pkg/billingexpr/settle.go +++ b/pkg/billingexpr/settle.go @@ -12,11 +12,12 @@ func ComputeTieredQuotaWithRequest(snap *BillingSnapshot, params TokenParams, re return TieredResult{}, err } - afterGroup := QuotaRound(cost * snap.GroupRatio) + quotaBeforeGroup := cost / 1_000_000 * snap.QuotaPerUnit + afterGroup := QuotaRound(quotaBeforeGroup * snap.GroupRatio) crossed := trace.MatchedTier != snap.EstimatedTier return TieredResult{ - ActualQuotaBeforeGroup: cost, + ActualQuotaBeforeGroup: quotaBeforeGroup, ActualQuotaAfterGroup: afterGroup, MatchedTier: trace.MatchedTier, CrossedTier: crossed, diff --git a/pkg/billingexpr/types.go b/pkg/billingexpr/types.go index dd626aae..53b89bbc 100644 --- a/pkg/billingexpr/types.go +++ b/pkg/billingexpr/types.go @@ -45,6 +45,7 @@ type BillingSnapshot struct { EstimatedQuotaBeforeGroup float64 `json:"estimated_quota_before_group"` EstimatedQuotaAfterGroup int `json:"estimated_quota_after_group"` EstimatedTier string `json:"estimated_tier"` + QuotaPerUnit float64 `json:"quota_per_unit"` } // TieredResult holds everything needed after running tiered settlement. diff --git a/relay/helper/price.go b/relay/helper/price.go index 798f4e8d..8299b605 100644 --- a/relay/helper/price.go +++ b/relay/helper/price.go @@ -225,7 +225,7 @@ func modelPriceHelperTiered(c *gin.Context, info *relaycommon.RelayInfo, promptT return types.PriceData{}, err } - rawQuota, trace, err := billingexpr.RunExprWithRequest(exprStr, billingexpr.TokenParams{ + rawCost, trace, err := billingexpr.RunExprWithRequest(exprStr, billingexpr.TokenParams{ P: float64(promptTokens), C: float64(estimatedCompletionTokens), }, requestInput) @@ -233,11 +233,13 @@ func modelPriceHelperTiered(c *gin.Context, info *relaycommon.RelayInfo, promptT return types.PriceData{}, fmt.Errorf("model %s tiered expr run failed: %w", info.OriginModelName, err) } - preConsumedQuota := billingexpr.QuotaRound(rawQuota * groupRatioInfo.GroupRatio) + // Expression coefficients are $/1M tokens prices; convert to quota the same way per-call billing does. + quotaBeforeGroup := rawCost / 1_000_000 * common.QuotaPerUnit + preConsumedQuota := billingexpr.QuotaRound(quotaBeforeGroup * groupRatioInfo.GroupRatio) freeModel := false if !operation_setting.GetQuotaSetting().EnableFreeModelPreConsume { - if groupRatioInfo.GroupRatio == 0 || rawQuota == 0 { + if groupRatioInfo.GroupRatio == 0 || quotaBeforeGroup == 0 { preConsumedQuota = 0 freeModel = true } @@ -252,9 +254,10 @@ func modelPriceHelperTiered(c *gin.Context, info *relaycommon.RelayInfo, promptT GroupRatio: groupRatioInfo.GroupRatio, EstimatedPromptTokens: promptTokens, EstimatedCompletionTokens: estimatedCompletionTokens, - EstimatedQuotaBeforeGroup: rawQuota, + EstimatedQuotaBeforeGroup: quotaBeforeGroup, EstimatedQuotaAfterGroup: preConsumedQuota, EstimatedTier: trace.MatchedTier, + QuotaPerUnit: common.QuotaPerUnit, } info.TieredBillingSnapshot = snapshot info.BillingRequestInput = &requestInput @@ -266,7 +269,7 @@ func modelPriceHelperTiered(c *gin.Context, info *relaycommon.RelayInfo, promptT } if common.DebugEnabled { - println(fmt.Sprintf("model_price_helper_tiered result: model=%s preConsume=%d rawQuota=%.2f groupRatio=%.2f tier=%s", info.OriginModelName, preConsumedQuota, rawQuota, groupRatioInfo.GroupRatio, trace.MatchedTier)) + println(fmt.Sprintf("model_price_helper_tiered result: model=%s preConsume=%d quotaBeforeGroup=%.2f groupRatio=%.2f tier=%s", info.OriginModelName, preConsumedQuota, quotaBeforeGroup, groupRatioInfo.GroupRatio, trace.MatchedTier)) } info.PriceData = priceData diff --git a/service/tiered_settle_test.go b/service/tiered_settle_test.go index 4ec702ad..c67e89ef 100644 --- a/service/tiered_settle_test.go +++ b/service/tiered_settle_test.go @@ -19,6 +19,8 @@ const cacheExpr = `tier("default", p * 2 + c * 10 + cr * 0.2 + cc * 2.5 + cc1h * // Expression with request probes const probeExpr = `param("service_tier") == "fast" ? tier("fast", p * 4 + c * 20) : tier("normal", p * 2 + c * 10)` +const testQuotaPerUnit = 500_000.0 + func makeSnapshot(expr string, groupRatio float64, estPrompt, estCompletion int) *billingexpr.BillingSnapshot { return &billingexpr.BillingSnapshot{ BillingMode: "tiered_expr", @@ -27,14 +29,16 @@ func makeSnapshot(expr string, groupRatio float64, estPrompt, estCompletion int) GroupRatio: groupRatio, EstimatedPromptTokens: estPrompt, EstimatedCompletionTokens: estCompletion, + QuotaPerUnit: testQuotaPerUnit, } } func makeRelayInfo(expr string, groupRatio float64, estPrompt, estCompletion int) *relaycommon.RelayInfo { snap := makeSnapshot(expr, groupRatio, estPrompt, estCompletion) cost, trace, _ := billingexpr.RunExpr(expr, billingexpr.TokenParams{P: float64(estPrompt), C: float64(estCompletion)}) - snap.EstimatedQuotaBeforeGroup = cost - snap.EstimatedQuotaAfterGroup = billingexpr.QuotaRound(cost * groupRatio) + quotaBeforeGroup := cost / 1_000_000 * testQuotaPerUnit + snap.EstimatedQuotaBeforeGroup = quotaBeforeGroup + snap.EstimatedQuotaAfterGroup = billingexpr.QuotaRound(quotaBeforeGroup * groupRatio) snap.EstimatedTier = trace.MatchedTier return &relaycommon.RelayInfo{ TieredBillingSnapshot: snap, @@ -56,7 +60,8 @@ func TestTryTieredSettleUsesFrozenRequestInput(t *testing.T) { GroupRatio: 1.0, EstimatedPromptTokens: 100, EstimatedCompletionTokens: 0, - EstimatedQuotaAfterGroup: 100, + EstimatedQuotaAfterGroup: 50, + QuotaPerUnit: testQuotaPerUnit, }, BillingRequestInput: &billingexpr.RequestInput{ Body: []byte(`{"service_tier":"fast"}`), @@ -67,8 +72,9 @@ func TestTryTieredSettleUsesFrozenRequestInput(t *testing.T) { if !ok { t.Fatal("expected tiered settle to apply") } - if quota != 200 { - t.Fatalf("quota = %d, want 200", quota) + // fast: p*2 = 200; quota = 200 / 1M * 500K = 100 + if quota != 100 { + t.Fatalf("quota = %d, want 100", quota) } if result == nil || result.MatchedTier != "fast" { t.Fatalf("matched tier = %v, want fast", result) @@ -111,9 +117,9 @@ func TestTryTieredSettle_PreConsumeMatchesPostConsume(t *testing.T) { if !ok { t.Fatal("expected tiered settle") } - // p*2 + c*10 = 2000 + 5000 = 7000 - if quota != 7000 { - t.Fatalf("quota = %d, want 7000", quota) + // p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500 + if quota != 3500 { + t.Fatalf("quota = %d, want 3500", quota) } if quota != info.FinalPreConsumedQuota { t.Fatalf("pre-consume %d != post-consume %d", info.FinalPreConsumedQuota, quota) @@ -122,7 +128,7 @@ func TestTryTieredSettle_PreConsumeMatchesPostConsume(t *testing.T) { func TestTryTieredSettle_PostConsumeOverPreConsume(t *testing.T) { info := makeRelayInfo(flatExpr, 1.0, 1000, 500) - preConsumed := info.FinalPreConsumedQuota // 7000 + preConsumed := info.FinalPreConsumedQuota // 3500 // Actual usage is higher than estimated params := billingexpr.TokenParams{P: 2000, C: 1000} @@ -130,9 +136,9 @@ func TestTryTieredSettle_PostConsumeOverPreConsume(t *testing.T) { if !ok { t.Fatal("expected tiered settle") } - // p*2 + c*10 = 4000 + 10000 = 14000 - if quota != 14000 { - t.Fatalf("quota = %d, want 14000", quota) + // p*2 + c*10 = 14000; quota = 14000 / 1M * 500K = 7000 + if quota != 7000 { + t.Fatalf("quota = %d, want 7000", quota) } if quota <= preConsumed { t.Fatalf("expected supplement: actual %d should > pre-consumed %d", quota, preConsumed) @@ -141,7 +147,7 @@ func TestTryTieredSettle_PostConsumeOverPreConsume(t *testing.T) { func TestTryTieredSettle_PostConsumeUnderPreConsume(t *testing.T) { info := makeRelayInfo(flatExpr, 1.0, 1000, 500) - preConsumed := info.FinalPreConsumedQuota // 7000 + preConsumed := info.FinalPreConsumedQuota // 3500 // Actual usage is lower than estimated params := billingexpr.TokenParams{P: 100, C: 50} @@ -149,9 +155,9 @@ func TestTryTieredSettle_PostConsumeUnderPreConsume(t *testing.T) { if !ok { t.Fatal("expected tiered settle") } - // p*2 + c*10 = 200 + 500 = 700 - if quota != 700 { - t.Fatalf("quota = %d, want 700", quota) + // p*2 + c*10 = 700; quota = 700 / 1M * 500K = 350 + if quota != 350 { + t.Fatalf("quota = %d, want 350", quota) } if quota >= preConsumed { t.Fatalf("expected refund: actual %d should < pre-consumed %d", quota, preConsumed) @@ -170,9 +176,9 @@ func TestTryTieredSettle_ExactBoundary(t *testing.T) { if !ok { t.Fatal("expected tiered settle") } - // standard: p*1.5 + c*7.5 = 300000 + 7500 = 307500 - if quota != 307500 { - t.Fatalf("quota = %d, want 307500", quota) + // standard: p*1.5 + c*7.5 = 307500; quota = 307500 / 1M * 500K = 153750 + if quota != 153750 { + t.Fatalf("quota = %d, want 153750", quota) } if result.MatchedTier != "standard" { t.Fatalf("tier = %s, want standard", result.MatchedTier) @@ -187,9 +193,9 @@ func TestTryTieredSettle_BoundaryPlusOne(t *testing.T) { if !ok { t.Fatal("expected tiered settle") } - // long_context: p*3 + c*11.25 = 600003 + 11250 = 611253 - if quota != 611253 { - t.Fatalf("quota = %d, want 611253", quota) + // long_context: p*3 + c*11.25 = 611253; quota = round(611253 / 1M * 500K) = 305627 + if quota != 305627 { + t.Fatalf("quota = %d, want 305627", quota) } if result.MatchedTier != "long_context" { t.Fatalf("tier = %s, want long_context", result.MatchedTier) @@ -221,9 +227,9 @@ func TestTryTieredSettle_HugeTokens(t *testing.T) { if !ok { t.Fatal("expected tiered settle") } - // p*2 + c*10 = 20000000 + 50000000 = 70000000 - if quota != 70000000 { - t.Fatalf("quota = %d, want 70000000", quota) + // p*2 + c*10 = 70000000; quota = 70000000 / 1M * 500K = 35000000 + if quota != 35000000 { + t.Fatalf("quota = %d, want 35000000", quota) } } @@ -235,23 +241,23 @@ func TestTryTieredSettle_CacheTokensAffectSettlement(t *testing.T) { if !ok1 { t.Fatal("expected tiered settle") } - // p*2 + c*10 + cr*0.2 + cc*2.5 + cc1h*4 = 2000 + 5000 + 0 + 0 + 0 = 7000 + // p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500 // With cache tokens ok2, quota2, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500, CR: 10000, CC: 5000, CC1h: 2000}) if !ok2 { t.Fatal("expected tiered settle") } - // 2000 + 5000 + 10000*0.2 + 5000*2.5 + 2000*4 = 2000 + 5000 + 2000 + 12500 + 8000 = 29500 + // 2000 + 5000 + 2000 + 12500 + 8000 = 29500; quota = 29500 / 1M * 500K = 14750 if quota2 <= quota1 { t.Fatalf("cache tokens should increase quota: without=%d, with=%d", quota1, quota2) } - if quota1 != 7000 { - t.Fatalf("no-cache quota = %d, want 7000", quota1) + if quota1 != 3500 { + t.Fatalf("no-cache quota = %d, want 3500", quota1) } - if quota2 != 29500 { - t.Fatalf("cache quota = %d, want 29500", quota2) + if quota2 != 14750 { + t.Fatalf("cache quota = %d, want 14750", quota2) } } @@ -269,9 +275,9 @@ func TestTryTieredSettle_RequestProbeInfluencesBilling(t *testing.T) { if !ok { t.Fatal("expected tiered settle") } - // fast: p*4 + c*20 = 4000 + 10000 = 14000 - if quota != 14000 { - t.Fatalf("quota = %d, want 14000", quota) + // fast: p*4 + c*20 = 14000; quota = 14000 / 1M * 500K = 7000 + if quota != 7000 { + t.Fatalf("quota = %d, want 7000", quota) } if result.MatchedTier != "fast" { t.Fatalf("tier = %s, want fast", result.MatchedTier) @@ -286,9 +292,9 @@ func TestTryTieredSettle_NoRequestInput_FallsBackToDefault(t *testing.T) { if !ok { t.Fatal("expected tiered settle") } - // normal: p*2 + c*10 = 2000 + 5000 = 7000 - if quota != 7000 { - t.Fatalf("quota = %d, want 7000", quota) + // normal: p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500 + if quota != 3500 { + t.Fatalf("quota = %d, want 3500", quota) } if result.MatchedTier != "normal" { t.Fatalf("tier = %s, want normal", result.MatchedTier) @@ -306,9 +312,9 @@ func TestTryTieredSettle_GroupRatioScaling(t *testing.T) { if !ok { t.Fatal("expected tiered settle") } - // cost = 7000, after group = round(7000 * 1.5) = 10500 - if quota != 10500 { - t.Fatalf("quota = %d, want 10500", quota) + // exprCost = 7000, quotaBeforeGroup = 3500, afterGroup = round(3500 * 1.5) = 5250 + if quota != 5250 { + t.Fatalf("quota = %d, want 5250", quota) } } diff --git a/web/src/components/table/model-pricing/modal/components/DynamicPricingBreakdown.jsx b/web/src/components/table/model-pricing/modal/components/DynamicPricingBreakdown.jsx index 36e1fe2c..abbee423 100644 --- a/web/src/components/table/model-pricing/modal/components/DynamicPricingBreakdown.jsx +++ b/web/src/components/table/model-pricing/modal/components/DynamicPricingBreakdown.jsx @@ -37,7 +37,7 @@ const { Text } = Typography; const PRICE_SUFFIX = '$/1M tokens'; function unitCostToPrice(uc) { - return (Number(uc) || 0) * 2; + return Number(uc) || 0; } function formatPrice(uc) { diff --git a/web/src/components/table/model-pricing/view/card/PricingCardView.jsx b/web/src/components/table/model-pricing/view/card/PricingCardView.jsx index 77aa1a39..c36ad5e1 100644 --- a/web/src/components/table/model-pricing/view/card/PricingCardView.jsx +++ b/web/src/components/table/model-pricing/view/card/PricingCardView.jsx @@ -269,7 +269,7 @@ const PricingCardView = ({