diff --git a/controller/perf_metrics.go b/controller/perf_metrics.go index fada3f29..2ffc2b91 100644 --- a/controller/perf_metrics.go +++ b/controller/perf_metrics.go @@ -9,6 +9,29 @@ import ( "github.com/gin-gonic/gin" ) +func GetPerfMetricsSummary(c *gin.Context) { + hours := 24 + if rawHours := c.Query("hours"); rawHours != "" { + if parsed, err := strconv.Atoi(rawHours); err == nil { + hours = parsed + } + } + + result, err := perfmetrics.QuerySummaryAll(hours) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{ + "success": false, + "message": err.Error(), + }) + return + } + + c.JSON(http.StatusOK, gin.H{ + "success": true, + "data": result, + }) +} + func GetPerfMetrics(c *gin.Context) { modelName := c.Query("model") if modelName == "" { diff --git a/model/perf_metric.go b/model/perf_metric.go index 8ef85438..b4a6a924 100644 --- a/model/perf_metric.go +++ b/model/perf_metric.go @@ -59,6 +59,26 @@ func GetPerfMetrics(modelName string, group string, startTs int64, endTs int64) return metrics, err } +type PerfMetricSummary struct { + ModelName string `json:"model_name"` + RequestCount int64 `json:"request_count"` + SuccessCount int64 `json:"success_count"` + TotalLatencyMs int64 `json:"total_latency_ms"` + OutputTokens int64 `json:"output_tokens"` + GenerationMs int64 `json:"generation_ms"` +} + +func GetPerfMetricsSummaryAll(startTs int64, endTs int64) ([]PerfMetricSummary, error) { + var summaries []PerfMetricSummary + err := DB.Model(&PerfMetric{}). + Select("model_name, SUM(request_count) as request_count, SUM(success_count) as success_count, SUM(total_latency_ms) as total_latency_ms, SUM(output_tokens) as output_tokens, SUM(generation_ms) as generation_ms"). + Where("bucket_ts >= ? AND bucket_ts <= ?", startTs, endTs). + Group("model_name"). + Having("SUM(request_count) > 0"). + Find(&summaries).Error + return summaries, err +} + func DeletePerfMetricsBefore(cutoffTs int64) error { if cutoffTs <= 0 { return nil diff --git a/pkg/perf_metrics/metrics.go b/pkg/perf_metrics/metrics.go index 7e8648af..c61d17ea 100644 --- a/pkg/perf_metrics/metrics.go +++ b/pkg/perf_metrics/metrics.go @@ -3,6 +3,7 @@ package perfmetrics import ( "context" "fmt" + "math" "sort" "sync" "time" @@ -121,6 +122,77 @@ func Query(params QueryParams) (QueryResult, error) { return buildQueryResult(params.Model, merged), nil } +func QuerySummaryAll(hours int) (SummaryAllResult, error) { + if hours <= 0 { + hours = 24 + } + if hours > 24*30 { + hours = 24 * 30 + } + endTs := time.Now().Unix() + startTs := endTs - int64(hours)*3600 + + rows, err := model.GetPerfMetricsSummaryAll(startTs, endTs) + if err != nil { + return SummaryAllResult{}, err + } + + totals := map[string]counters{} + for _, row := range rows { + totals[row.ModelName] = counters{ + requestCount: row.RequestCount, + successCount: row.SuccessCount, + totalLatencyMs: row.TotalLatencyMs, + outputTokens: row.OutputTokens, + generationMs: row.GenerationMs, + } + } + + hotBuckets.Range(func(key, value any) bool { + k := key.(bucketKey) + if k.bucketTs < startTs || k.bucketTs > endTs { + return true + } + snap := value.(*atomicBucket).snapshot() + if snap.requestCount == 0 { + return true + } + cur := totals[k.model] + cur.requestCount += snap.requestCount + cur.successCount += snap.successCount + cur.totalLatencyMs += snap.totalLatencyMs + cur.outputTokens += snap.outputTokens + cur.generationMs += snap.generationMs + totals[k.model] = cur + return true + }) + + models := make([]ModelSummary, 0, len(totals)) + for name, total := range totals { + if total.requestCount == 0 { + continue + } + avgLatency := total.totalLatencyMs / total.requestCount + successRate := float64(total.successCount) / float64(total.requestCount) * 100 + avgTps := 0.0 + if total.generationMs > 0 { + avgTps = float64(total.outputTokens) / (float64(total.generationMs) / 1000.0) + } + models = append(models, ModelSummary{ + ModelName: name, + AvgLatencyMs: avgLatency, + SuccessRate: math.Round(successRate*100) / 100, + AvgTps: math.Round(avgTps*100) / 100, + RequestCount: total.requestCount, + }) + } + sort.Slice(models, func(i, j int) bool { + return models[i].ModelName < models[j].ModelName + }) + + return SummaryAllResult{Models: models}, nil +} + func bucketStart(ts int64) int64 { bucketSeconds := perf_metrics_setting.GetBucketSeconds() if bucketSeconds <= 0 { diff --git a/pkg/perf_metrics/types.go b/pkg/perf_metrics/types.go index 25d07868..f4f8a949 100644 --- a/pkg/perf_metrics/types.go +++ b/pkg/perf_metrics/types.go @@ -47,6 +47,18 @@ type QueryResult struct { Groups []GroupResult `json:"groups"` } +type ModelSummary struct { + ModelName string `json:"model_name"` + AvgLatencyMs int64 `json:"avg_latency_ms"` + SuccessRate float64 `json:"success_rate"` + AvgTps float64 `json:"avg_tps"` + RequestCount int64 `json:"request_count"` +} + +type SummaryAllResult struct { + Models []ModelSummary `json:"models"` +} + type bucketKey struct { model string group string diff --git a/router/api-router.go b/router/api-router.go index d756185e..72b55856 100644 --- a/router/api-router.go +++ b/router/api-router.go @@ -31,7 +31,12 @@ func SetApiRouter(router *gin.Engine) { //apiRouter.GET("/midjourney", controller.GetMidjourney) apiRouter.GET("/home_page_content", controller.GetHomePageContent) apiRouter.GET("/pricing", middleware.TryUserAuth(), controller.GetPricing) - apiRouter.GET("/perf-metrics", middleware.TryUserAuth(), controller.GetPerfMetrics) + perfMetricsRoute := apiRouter.Group("/perf-metrics") + perfMetricsRoute.Use(middleware.TryUserAuth()) + { + perfMetricsRoute.GET("/summary", controller.GetPerfMetricsSummary) + perfMetricsRoute.GET("", controller.GetPerfMetrics) + } apiRouter.GET("/rankings", controller.GetRankings) apiRouter.GET("/verification", middleware.EmailVerificationRateLimit(), middleware.TurnstileCheck(), controller.SendEmailVerification) apiRouter.GET("/reset_password", middleware.CriticalRateLimit(), middleware.TurnstileCheck(), controller.SendPasswordResetEmail) diff --git a/web/default/src/features/auth/otp/components/otp-form.tsx b/web/default/src/features/auth/otp/components/otp-form.tsx index bb356ce8..b8e7e8e5 100644 --- a/web/default/src/features/auth/otp/components/otp-form.tsx +++ b/web/default/src/features/auth/otp/components/otp-form.tsx @@ -183,7 +183,7 @@ export function OtpForm({ className, ...props }: OtpFormProps) { )} /> - diff --git a/web/default/src/features/pricing/api.ts b/web/default/src/features/pricing/api.ts index a20f1702..ac9a88de 100644 --- a/web/default/src/features/pricing/api.ts +++ b/web/default/src/features/pricing/api.ts @@ -38,6 +38,29 @@ export type PerformanceMetricsData = { } } +export type PerfModelSummary = { + model_name: string + avg_latency_ms: number + success_rate: number + avg_tps: number + request_count: number +} + +export type PerfSummaryAllData = { + success: boolean + message?: string + data: { + models: PerfModelSummary[] + } +} + +export async function getPerfMetricsSummary( + hours = 24 +): Promise { + const res = await api.get(`/api/perf-metrics/summary?hours=${hours}`) + return res.data +} + export async function getPerfMetrics( modelName: string, hours = 24 diff --git a/web/default/src/features/pricing/components/model-card-grid.tsx b/web/default/src/features/pricing/components/model-card-grid.tsx index 45010131..3ee63c81 100644 --- a/web/default/src/features/pricing/components/model-card-grid.tsx +++ b/web/default/src/features/pricing/components/model-card-grid.tsx @@ -1,10 +1,13 @@ import { useEffect, useMemo, useState } from 'react' +import { useQuery } from '@tanstack/react-query' import { ChevronLeft, ChevronRight } from 'lucide-react' import { useTranslation } from 'react-i18next' import { Button } from '@/components/ui/button' +import { getPerfMetricsSummary } from '../api' import { DEFAULT_PRICING_PAGE_SIZE, DEFAULT_TOKEN_UNIT } from '../constants' import type { PricingModel, TokenUnit } from '../types' import { ModelCard } from './model-card' +import type { ModelPerfBadgeData } from './model-perf-badge' export interface ModelCardGridProps { models: PricingModel[] @@ -22,6 +25,13 @@ export function ModelCardGrid(props: ModelCardGridProps) { const tokenUnit = props.tokenUnit ?? DEFAULT_TOKEN_UNIT const totalPages = Math.max(1, Math.ceil(props.models.length / pageSize)) + const perfQuery = useQuery({ + queryKey: ['perf-metrics-summary'], + queryFn: () => getPerfMetricsSummary(24), + staleTime: 60 * 1000, + retry: false, + }) + useEffect(() => { setPage(1) }, [props.models]) @@ -31,6 +41,16 @@ export function ModelCardGrid(props: ModelCardGridProps) { return props.models.slice(start, start + pageSize) }, [page, pageSize, props.models]) + const perfMap = useMemo(() => { + const map = new Map() + for (const model of perfQuery.data?.data?.models ?? []) { + if (model.request_count > 0) { + map.set(model.model_name, model) + } + } + return map + }, [perfQuery.data]) + if (props.models.length === 0) { return null } @@ -46,6 +66,7 @@ export function ModelCardGrid(props: ModelCardGridProps) { priceRate={props.priceRate} usdExchangeRate={props.usdExchangeRate} showRechargePrice={props.showRechargePrice} + perf={perfMap.get(model.model_name || '')} onClick={() => props.onModelClick(model.model_name || '')} /> ))} diff --git a/web/default/src/features/pricing/components/model-card.tsx b/web/default/src/features/pricing/components/model-card.tsx index 36d459eb..b5c64300 100644 --- a/web/default/src/features/pricing/components/model-card.tsx +++ b/web/default/src/features/pricing/components/model-card.tsx @@ -14,6 +14,8 @@ import { parseTags } from '../lib/filters' import { isTokenBasedModel } from '../lib/model-helpers' import { formatPrice, formatRequestPrice } from '../lib/price' import type { PricingModel, TokenUnit } from '../types' +import { ModelPerfBadge } from './model-perf-badge' +import type { ModelPerfBadgeData } from './model-perf-badge' export interface ModelCardProps { model: PricingModel @@ -22,6 +24,7 @@ export interface ModelCardProps { usdExchangeRate?: number tokenUnit?: TokenUnit showRechargePrice?: boolean + perf?: ModelPerfBadgeData } export const ModelCard = memo(function ModelCard(props: ModelCardProps) { @@ -69,7 +72,7 @@ export const ModelCard = memo(function ModelCard(props: ModelCardProps) { return (
@@ -206,41 +209,43 @@ export const ModelCard = memo(function ModelCard(props: ModelCardProps) { {props.model.description || t('No description available.')}

- {/* Footer row 1: group + billing type */} -
- {primaryGroup && ( + {/* Footer: left metadata and right performance summary share row alignment */} +
+
+ {primaryGroup && ( + + {primaryGroup} {t('Groups')} + + )} - {primaryGroup} {t('Groups')} + {isTokenBased ? t('Token-based') : t('Per Request')} - )} - - {isTokenBased ? t('Token-based') : t('Per Request')} - - {isDynamicPricing && ( - - )} -
+ {isDynamicPricing && ( + + )} +
+ - {/* Footer row 2: endpoint + tag chips */} -
- {bottomTags.map((item) => ( - - {item} +
+ {bottomTags.map((item) => ( + + {item} + + ))} + + {tokenUnitLabel} - ))} - - {tokenUnitLabel} - - {hiddenCount > 0 && ( - - +{hiddenCount} - - )} + {hiddenCount > 0 && ( + + +{hiddenCount} + + )} +
) diff --git a/web/default/src/features/pricing/components/model-perf-badge.tsx b/web/default/src/features/pricing/components/model-perf-badge.tsx new file mode 100644 index 00000000..3b05be56 --- /dev/null +++ b/web/default/src/features/pricing/components/model-perf-badge.tsx @@ -0,0 +1,77 @@ +import { memo } from 'react' +import { useTranslation } from 'react-i18next' +import { cn } from '@/lib/utils' +import { formatLatency, formatThroughput } from '../lib/mock-stats' + +export type ModelPerfBadgeData = { + avg_latency_ms: number + success_rate: number + avg_tps: number +} + +export interface ModelPerfBadgeProps + extends React.HTMLAttributes { + perf: ModelPerfBadgeData | undefined +} + +function formatCompactThroughput(tps: number): string { + return formatThroughput(tps).replace(' t/s', 'tps') +} + +export const ModelPerfBadge = memo(function ModelPerfBadge( + props: ModelPerfBadgeProps +) { + const { t } = useTranslation() + + if (!props.perf) { + return null + } + + const { avg_latency_ms, avg_tps, success_rate } = props.perf + + let statusColor = 'bg-emerald-500' + if (success_rate < 99) { + statusColor = 'bg-red-500' + } else if (success_rate < 99.9) { + statusColor = 'bg-amber-500' + } + + return ( +
+
+
+ {t('Latency short')} +
+
+ {avg_latency_ms > 0 ? formatLatency(avg_latency_ms) : '—'} +
+
+
+
+ {t('Throughput short')} +
+
+ {formatCompactThroughput(avg_tps)} +
+
+
+
+ {t('Status short')} +
+
+ + + +
+
+
+ ) +}) diff --git a/web/default/src/i18n/locales/en.json b/web/default/src/i18n/locales/en.json index 7d470eef..ee07cc06 100644 --- a/web/default/src/i18n/locales/en.json +++ b/web/default/src/i18n/locales/en.json @@ -446,6 +446,8 @@ "Available Models": "Available Models", "Available Rewards": "Available Rewards", "Average latency": "Average latency", + "Latency": "Latency", + "Latency short": "Lat.", "Average latency, TTFT, and success rate by group": "Average latency, TTFT, and success rate by group", "Average RPM": "Average RPM", "Average time-to-first-token (TTFT) by group": "Average time-to-first-token (TTFT) by group", @@ -3603,6 +3605,7 @@ "Statistical tokens": "Statistical tokens", "Statistics reset": "Statistics reset", "Status": "Status", + "Status short": "Status", "Status & Sync": "Status & Sync", "Status Code": "Status Code", "Status Code Mapping": "Status Code Mapping", @@ -3834,6 +3837,7 @@ "This year": "This year", "Three steps to get started": "Three steps to get started", "Throughput": "Throughput", + "Throughput short": "TPS", "Throughput by group": "Throughput by group", "Throughput trend": "Throughput trend", "Tier": "Tier", diff --git a/web/default/src/i18n/locales/fr.json b/web/default/src/i18n/locales/fr.json index 2cbb521d..7230fabc 100644 --- a/web/default/src/i18n/locales/fr.json +++ b/web/default/src/i18n/locales/fr.json @@ -446,6 +446,8 @@ "Available Models": "Modèles disponibles", "Available Rewards": "Récompenses disponibles", "Average latency": "Latence moyenne", + "Latency": "Latence", + "Latency short": "Lat.", "Average latency, TTFT, and success rate by group": "Latence moyenne, TTFT et taux de réussite par groupe", "Average RPM": "RPM moyen", "Average time-to-first-token (TTFT) by group": "Temps moyen jusqu’au premier token (TTFT) par groupe", @@ -3603,6 +3605,7 @@ "Statistical tokens": "Jetons statistiques", "Statistics reset": "Statistiques réinitialisées", "Status": "Statut", + "Status short": "État", "Status & Sync": "Statut et synchronisation", "Status Code": "Code de statut", "Status Code Mapping": "Mappage des codes d'état", @@ -3834,6 +3837,7 @@ "This year": "Cette année", "Three steps to get started": "Trois étapes pour commencer", "Throughput": "Débit", + "Throughput short": "TPS", "Throughput by group": "Débit par groupe", "Throughput trend": "Tendance du débit", "Tier": "Palier", diff --git a/web/default/src/i18n/locales/ja.json b/web/default/src/i18n/locales/ja.json index 2e4b7ad7..41a2ab4c 100644 --- a/web/default/src/i18n/locales/ja.json +++ b/web/default/src/i18n/locales/ja.json @@ -446,6 +446,8 @@ "Available Models": "利用可能なモデル", "Available Rewards": "利用可能な報酬", "Average latency": "平均レイテンシ", + "Latency": "レイテンシ", + "Latency short": "遅延", "Average latency, TTFT, and success rate by group": "グループ別の平均レイテンシ、TTFT、成功率", "Average RPM": "平均RPM", "Average time-to-first-token (TTFT) by group": "グループ別の平均 Time to First Token(TTFT)", @@ -3603,6 +3605,7 @@ "Statistical tokens": "統計トークン", "Statistics reset": "統計をリセットしました", "Status": "ステータス", + "Status short": "状態", "Status & Sync": "ステータスと同期", "Status Code": "ステータスコード", "Status Code Mapping": "ステータスコードマッピング", @@ -3834,6 +3837,7 @@ "This year": "今年", "Three steps to get started": "3ステップで始める", "Throughput": "スループット", + "Throughput short": "TPS", "Throughput by group": "グループ別スループット", "Throughput trend": "スループット推移", "Tier": "ティア", diff --git a/web/default/src/i18n/locales/ru.json b/web/default/src/i18n/locales/ru.json index c6177c27..0d555257 100644 --- a/web/default/src/i18n/locales/ru.json +++ b/web/default/src/i18n/locales/ru.json @@ -446,6 +446,8 @@ "Available Models": "Доступные модели", "Available Rewards": "Доступные награды", "Average latency": "Средняя задержка", + "Latency": "Задержка", + "Latency short": "Зад.", "Average latency, TTFT, and success rate by group": "Средняя задержка, TTFT и доля успешных запросов по группам", "Average RPM": "Среднее число оборотов в минуту", "Average time-to-first-token (TTFT) by group": "Среднее время до первого токена (TTFT) по группам", @@ -3603,6 +3605,7 @@ "Statistical tokens": "Статистические токены", "Statistics reset": "Статистика сброшена", "Status": "Статус", + "Status short": "Стат.", "Status & Sync": "Статус и синхронизация", "Status Code": "Код статуса", "Status Code Mapping": "Сопоставление кодов состояния", @@ -3834,6 +3837,7 @@ "This year": "Этот год", "Three steps to get started": "Три шага для начала работы", "Throughput": "Пропускная способность", + "Throughput short": "TPS", "Throughput by group": "Пропускная способность по группам", "Throughput trend": "Тренд пропускной способности", "Tier": "Уровень", diff --git a/web/default/src/i18n/locales/vi.json b/web/default/src/i18n/locales/vi.json index 34e4dec4..e8c77955 100644 --- a/web/default/src/i18n/locales/vi.json +++ b/web/default/src/i18n/locales/vi.json @@ -446,6 +446,8 @@ "Available Models": "Mô hình khả dụng", "Available Rewards": "Phần thưởng hiện có", "Average latency": "Độ trễ trung bình", + "Latency": "Độ trễ", + "Latency short": "Trễ", "Average latency, TTFT, and success rate by group": "Độ trễ trung bình, TTFT và tỷ lệ thành công theo nhóm", "Average RPM": "RPM trung bình", "Average time-to-first-token (TTFT) by group": "Thời gian trung bình tới token đầu tiên (TTFT) theo nhóm", @@ -3603,6 +3605,7 @@ "Statistical tokens": "Mã thông báo thống kê", "Statistics reset": "Đã đặt lại thống kê", "Status": "Trạng thái", + "Status short": "TT", "Status & Sync": "Trạng thái & Đồng bộ", "Status Code": "Mã trạng thái", "Status Code Mapping": "Ánh xạ mã trạng thái", @@ -3834,6 +3837,7 @@ "This year": "Năm nay", "Three steps to get started": "Ba bước để bắt đầu", "Throughput": "Thông lượng", + "Throughput short": "TPS", "Throughput by group": "Thông lượng theo nhóm", "Throughput trend": "Xu hướng thông lượng", "Tier": "Bậc", diff --git a/web/default/src/i18n/locales/zh.json b/web/default/src/i18n/locales/zh.json index 0b25e366..db52a494 100644 --- a/web/default/src/i18n/locales/zh.json +++ b/web/default/src/i18n/locales/zh.json @@ -446,6 +446,8 @@ "Available Models": "可用模型", "Available Rewards": "可用奖励", "Average latency": "平均延迟", + "Latency": "延迟", + "Latency short": "延迟", "Average latency, TTFT, and success rate by group": "各分组的平均延迟、首 Token 延迟和成功率", "Average RPM": "平均 RPM", "Average time-to-first-token (TTFT) by group": "各分组的平均首 Token 延迟(TTFT)", @@ -3603,6 +3605,7 @@ "Statistical tokens": "统计 Token 数", "Statistics reset": "统计已重置", "Status": "状态", + "Status short": "状态", "Status & Sync": "状态与同步", "Status Code": "状态码", "Status Code Mapping": "状态码映射", @@ -3834,6 +3837,7 @@ "This year": "本年", "Three steps to get started": "三步快速上手", "Throughput": "吞吐量", + "Throughput short": "吞吐", "Throughput by group": "各分组吞吐量", "Throughput trend": "吞吐量趋势", "Tier": "档位",