feat(default): add model performance badges
Some checks failed
Publish Docker image (Multi-arch) / Build & push (amd64) (push) Has been cancelled
Publish Docker image (Multi-arch) / Build & push (arm64) (push) Has been cancelled
Publish Docker image (Multi-arch) / Create multi-arch manifests (push) Has been cancelled
Release (Linux, macOS, Windows) / Linux Release (push) Has been cancelled
Release (Linux, macOS, Windows) / macOS Release (push) Has been cancelled
Release (Linux, macOS, Windows) / Windows Release (push) Has been cancelled

Add a batched performance summary API for model square cards and show compact latency, throughput, and status metrics without increasing card size. Also fix OTP verification form submission.
This commit is contained in:
CaIon 2026-05-06 22:20:43 +08:00
parent d98f0e8ac3
commit e8cfb546fa
No known key found for this signature in database
GPG Key ID: 0CFA613529A9921D
16 changed files with 316 additions and 34 deletions

View File

@ -9,6 +9,29 @@ import (
"github.com/gin-gonic/gin"
)
func GetPerfMetricsSummary(c *gin.Context) {
hours := 24
if rawHours := c.Query("hours"); rawHours != "" {
if parsed, err := strconv.Atoi(rawHours); err == nil {
hours = parsed
}
}
result, err := perfmetrics.QuerySummaryAll(hours)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{
"success": false,
"message": err.Error(),
})
return
}
c.JSON(http.StatusOK, gin.H{
"success": true,
"data": result,
})
}
func GetPerfMetrics(c *gin.Context) {
modelName := c.Query("model")
if modelName == "" {

View File

@ -59,6 +59,26 @@ func GetPerfMetrics(modelName string, group string, startTs int64, endTs int64)
return metrics, err
}
type PerfMetricSummary struct {
ModelName string `json:"model_name"`
RequestCount int64 `json:"request_count"`
SuccessCount int64 `json:"success_count"`
TotalLatencyMs int64 `json:"total_latency_ms"`
OutputTokens int64 `json:"output_tokens"`
GenerationMs int64 `json:"generation_ms"`
}
func GetPerfMetricsSummaryAll(startTs int64, endTs int64) ([]PerfMetricSummary, error) {
var summaries []PerfMetricSummary
err := DB.Model(&PerfMetric{}).
Select("model_name, SUM(request_count) as request_count, SUM(success_count) as success_count, SUM(total_latency_ms) as total_latency_ms, SUM(output_tokens) as output_tokens, SUM(generation_ms) as generation_ms").
Where("bucket_ts >= ? AND bucket_ts <= ?", startTs, endTs).
Group("model_name").
Having("SUM(request_count) > 0").
Find(&summaries).Error
return summaries, err
}
func DeletePerfMetricsBefore(cutoffTs int64) error {
if cutoffTs <= 0 {
return nil

View File

@ -3,6 +3,7 @@ package perfmetrics
import (
"context"
"fmt"
"math"
"sort"
"sync"
"time"
@ -121,6 +122,77 @@ func Query(params QueryParams) (QueryResult, error) {
return buildQueryResult(params.Model, merged), nil
}
func QuerySummaryAll(hours int) (SummaryAllResult, error) {
if hours <= 0 {
hours = 24
}
if hours > 24*30 {
hours = 24 * 30
}
endTs := time.Now().Unix()
startTs := endTs - int64(hours)*3600
rows, err := model.GetPerfMetricsSummaryAll(startTs, endTs)
if err != nil {
return SummaryAllResult{}, err
}
totals := map[string]counters{}
for _, row := range rows {
totals[row.ModelName] = counters{
requestCount: row.RequestCount,
successCount: row.SuccessCount,
totalLatencyMs: row.TotalLatencyMs,
outputTokens: row.OutputTokens,
generationMs: row.GenerationMs,
}
}
hotBuckets.Range(func(key, value any) bool {
k := key.(bucketKey)
if k.bucketTs < startTs || k.bucketTs > endTs {
return true
}
snap := value.(*atomicBucket).snapshot()
if snap.requestCount == 0 {
return true
}
cur := totals[k.model]
cur.requestCount += snap.requestCount
cur.successCount += snap.successCount
cur.totalLatencyMs += snap.totalLatencyMs
cur.outputTokens += snap.outputTokens
cur.generationMs += snap.generationMs
totals[k.model] = cur
return true
})
models := make([]ModelSummary, 0, len(totals))
for name, total := range totals {
if total.requestCount == 0 {
continue
}
avgLatency := total.totalLatencyMs / total.requestCount
successRate := float64(total.successCount) / float64(total.requestCount) * 100
avgTps := 0.0
if total.generationMs > 0 {
avgTps = float64(total.outputTokens) / (float64(total.generationMs) / 1000.0)
}
models = append(models, ModelSummary{
ModelName: name,
AvgLatencyMs: avgLatency,
SuccessRate: math.Round(successRate*100) / 100,
AvgTps: math.Round(avgTps*100) / 100,
RequestCount: total.requestCount,
})
}
sort.Slice(models, func(i, j int) bool {
return models[i].ModelName < models[j].ModelName
})
return SummaryAllResult{Models: models}, nil
}
func bucketStart(ts int64) int64 {
bucketSeconds := perf_metrics_setting.GetBucketSeconds()
if bucketSeconds <= 0 {

View File

@ -47,6 +47,18 @@ type QueryResult struct {
Groups []GroupResult `json:"groups"`
}
type ModelSummary struct {
ModelName string `json:"model_name"`
AvgLatencyMs int64 `json:"avg_latency_ms"`
SuccessRate float64 `json:"success_rate"`
AvgTps float64 `json:"avg_tps"`
RequestCount int64 `json:"request_count"`
}
type SummaryAllResult struct {
Models []ModelSummary `json:"models"`
}
type bucketKey struct {
model string
group string

View File

@ -31,7 +31,12 @@ func SetApiRouter(router *gin.Engine) {
//apiRouter.GET("/midjourney", controller.GetMidjourney)
apiRouter.GET("/home_page_content", controller.GetHomePageContent)
apiRouter.GET("/pricing", middleware.TryUserAuth(), controller.GetPricing)
apiRouter.GET("/perf-metrics", middleware.TryUserAuth(), controller.GetPerfMetrics)
perfMetricsRoute := apiRouter.Group("/perf-metrics")
perfMetricsRoute.Use(middleware.TryUserAuth())
{
perfMetricsRoute.GET("/summary", controller.GetPerfMetricsSummary)
perfMetricsRoute.GET("", controller.GetPerfMetrics)
}
apiRouter.GET("/rankings", controller.GetRankings)
apiRouter.GET("/verification", middleware.EmailVerificationRateLimit(), middleware.TurnstileCheck(), controller.SendEmailVerification)
apiRouter.GET("/reset_password", middleware.CriticalRateLimit(), middleware.TurnstileCheck(), controller.SendPasswordResetEmail)

View File

@ -183,7 +183,7 @@ export function OtpForm({ className, ...props }: OtpFormProps) {
)}
/>
<Button className='mt-2 w-full' disabled={!isFormValid || isLoading}>
<Button type='submit' className='mt-2 w-full' disabled={!isFormValid || isLoading}>
{isLoading ? <Loader2 className='h-4 w-4 animate-spin' /> : null}
{t('Verify and Sign In')}
</Button>

View File

@ -38,6 +38,29 @@ export type PerformanceMetricsData = {
}
}
export type PerfModelSummary = {
model_name: string
avg_latency_ms: number
success_rate: number
avg_tps: number
request_count: number
}
export type PerfSummaryAllData = {
success: boolean
message?: string
data: {
models: PerfModelSummary[]
}
}
export async function getPerfMetricsSummary(
hours = 24
): Promise<PerfSummaryAllData> {
const res = await api.get(`/api/perf-metrics/summary?hours=${hours}`)
return res.data
}
export async function getPerfMetrics(
modelName: string,
hours = 24

View File

@ -1,10 +1,13 @@
import { useEffect, useMemo, useState } from 'react'
import { useQuery } from '@tanstack/react-query'
import { ChevronLeft, ChevronRight } from 'lucide-react'
import { useTranslation } from 'react-i18next'
import { Button } from '@/components/ui/button'
import { getPerfMetricsSummary } from '../api'
import { DEFAULT_PRICING_PAGE_SIZE, DEFAULT_TOKEN_UNIT } from '../constants'
import type { PricingModel, TokenUnit } from '../types'
import { ModelCard } from './model-card'
import type { ModelPerfBadgeData } from './model-perf-badge'
export interface ModelCardGridProps {
models: PricingModel[]
@ -22,6 +25,13 @@ export function ModelCardGrid(props: ModelCardGridProps) {
const tokenUnit = props.tokenUnit ?? DEFAULT_TOKEN_UNIT
const totalPages = Math.max(1, Math.ceil(props.models.length / pageSize))
const perfQuery = useQuery({
queryKey: ['perf-metrics-summary'],
queryFn: () => getPerfMetricsSummary(24),
staleTime: 60 * 1000,
retry: false,
})
useEffect(() => {
setPage(1)
}, [props.models])
@ -31,6 +41,16 @@ export function ModelCardGrid(props: ModelCardGridProps) {
return props.models.slice(start, start + pageSize)
}, [page, pageSize, props.models])
const perfMap = useMemo(() => {
const map = new Map<string, ModelPerfBadgeData>()
for (const model of perfQuery.data?.data?.models ?? []) {
if (model.request_count > 0) {
map.set(model.model_name, model)
}
}
return map
}, [perfQuery.data])
if (props.models.length === 0) {
return null
}
@ -46,6 +66,7 @@ export function ModelCardGrid(props: ModelCardGridProps) {
priceRate={props.priceRate}
usdExchangeRate={props.usdExchangeRate}
showRechargePrice={props.showRechargePrice}
perf={perfMap.get(model.model_name || '')}
onClick={() => props.onModelClick(model.model_name || '')}
/>
))}

View File

@ -14,6 +14,8 @@ import { parseTags } from '../lib/filters'
import { isTokenBasedModel } from '../lib/model-helpers'
import { formatPrice, formatRequestPrice } from '../lib/price'
import type { PricingModel, TokenUnit } from '../types'
import { ModelPerfBadge } from './model-perf-badge'
import type { ModelPerfBadgeData } from './model-perf-badge'
export interface ModelCardProps {
model: PricingModel
@ -22,6 +24,7 @@ export interface ModelCardProps {
usdExchangeRate?: number
tokenUnit?: TokenUnit
showRechargePrice?: boolean
perf?: ModelPerfBadgeData
}
export const ModelCard = memo(function ModelCard(props: ModelCardProps) {
@ -69,7 +72,7 @@ export const ModelCard = memo(function ModelCard(props: ModelCardProps) {
return (
<div
className={cn(
'group flex flex-col rounded-xl border p-3 transition-colors sm:p-5',
'group relative flex flex-col rounded-xl border p-3 transition-colors sm:p-5',
'hover:bg-muted/20'
)}
>
@ -206,41 +209,43 @@ export const ModelCard = memo(function ModelCard(props: ModelCardProps) {
{props.model.description || t('No description available.')}
</p>
{/* Footer row 1: group + billing type */}
<div className='mt-2 flex flex-wrap items-center gap-x-2 gap-y-1 sm:mt-4'>
{primaryGroup && (
{/* Footer: left metadata and right performance summary share row alignment */}
<div className='mt-2 grid grid-cols-[minmax(0,1fr)_auto] items-start gap-x-2 gap-y-1 sm:mt-4'>
<div className='flex min-w-0 flex-wrap items-center gap-x-2 gap-y-1'>
{primaryGroup && (
<span className='text-muted-foreground text-xs font-medium'>
{primaryGroup} {t('Groups')}
</span>
)}
<span className='text-muted-foreground text-xs font-medium'>
{primaryGroup} {t('Groups')}
{isTokenBased ? t('Token-based') : t('Per Request')}
</span>
)}
<span className='text-muted-foreground text-xs font-medium'>
{isTokenBased ? t('Token-based') : t('Per Request')}
</span>
{isDynamicPricing && (
<StatusBadge
label={t('Dynamic Pricing')}
variant='warning'
copyable={false}
size='sm'
/>
)}
</div>
{isDynamicPricing && (
<StatusBadge
label={t('Dynamic Pricing')}
variant='warning'
copyable={false}
size='sm'
/>
)}
</div>
<ModelPerfBadge perf={props.perf} className='row-span-2 self-start' />
{/* Footer row 2: endpoint + tag chips */}
<div className='mt-1.5 flex flex-wrap items-center gap-x-2.5 gap-y-0.5 sm:mt-2 sm:gap-x-3 sm:gap-y-1'>
{bottomTags.map((item) => (
<span key={item} className='text-muted-foreground/70 text-xs'>
{item}
<div className='flex min-w-0 flex-wrap items-center gap-x-2.5 gap-y-0.5 sm:gap-x-3 sm:gap-y-1'>
{bottomTags.map((item) => (
<span key={item} className='text-muted-foreground/70 text-xs'>
{item}
</span>
))}
<span className='text-muted-foreground/50 text-xs'>
{tokenUnitLabel}
</span>
))}
<span className='text-muted-foreground/50 text-xs'>
{tokenUnitLabel}
</span>
{hiddenCount > 0 && (
<span className='text-muted-foreground/40 text-xs'>
+{hiddenCount}
</span>
)}
{hiddenCount > 0 && (
<span className='text-muted-foreground/40 text-xs'>
+{hiddenCount}
</span>
)}
</div>
</div>
</div>
)

View File

@ -0,0 +1,77 @@
import { memo } from 'react'
import { useTranslation } from 'react-i18next'
import { cn } from '@/lib/utils'
import { formatLatency, formatThroughput } from '../lib/mock-stats'
export type ModelPerfBadgeData = {
avg_latency_ms: number
success_rate: number
avg_tps: number
}
export interface ModelPerfBadgeProps
extends React.HTMLAttributes<HTMLDivElement> {
perf: ModelPerfBadgeData | undefined
}
function formatCompactThroughput(tps: number): string {
return formatThroughput(tps).replace(' t/s', 'tps')
}
export const ModelPerfBadge = memo(function ModelPerfBadge(
props: ModelPerfBadgeProps
) {
const { t } = useTranslation()
if (!props.perf) {
return null
}
const { avg_latency_ms, avg_tps, success_rate } = props.perf
let statusColor = 'bg-emerald-500'
if (success_rate < 99) {
statusColor = 'bg-red-500'
} else if (success_rate < 99.9) {
statusColor = 'bg-amber-500'
}
return (
<div
className={cn(
'hidden w-[132px] grid-cols-[38px_48px_30px] gap-x-2 text-right tabular-nums min-[460px]:grid',
props.className
)}
>
<div title={t('Average latency')} className='min-w-0'>
<div className='text-muted-foreground/55 text-[10px] leading-4'>
{t('Latency short')}
</div>
<div className='text-muted-foreground/80 whitespace-nowrap font-mono text-xs leading-4'>
{avg_latency_ms > 0 ? formatLatency(avg_latency_ms) : '—'}
</div>
</div>
<div title={t('Throughput')} className='min-w-0'>
<div className='text-muted-foreground/55 truncate text-[10px] leading-4'>
{t('Throughput short')}
</div>
<div className='text-muted-foreground/80 whitespace-nowrap font-mono text-xs leading-4'>
{formatCompactThroughput(avg_tps)}
</div>
</div>
<div
title={`${t('Success rate')}: ${success_rate.toFixed(1)}%`}
className='min-w-0'
>
<div className='text-muted-foreground/55 truncate text-[10px] leading-4'>
{t('Status short')}
</div>
<div className='flex h-4 items-center justify-end gap-0.5'>
<span className='bg-muted-foreground/10 h-2 w-1 rounded-full' />
<span className='bg-muted-foreground/15 h-2.5 w-1 rounded-full' />
<span className={cn('h-3 w-1 rounded-full', statusColor)} />
</div>
</div>
</div>
)
})

View File

@ -446,6 +446,8 @@
"Available Models": "Available Models",
"Available Rewards": "Available Rewards",
"Average latency": "Average latency",
"Latency": "Latency",
"Latency short": "Lat.",
"Average latency, TTFT, and success rate by group": "Average latency, TTFT, and success rate by group",
"Average RPM": "Average RPM",
"Average time-to-first-token (TTFT) by group": "Average time-to-first-token (TTFT) by group",
@ -3603,6 +3605,7 @@
"Statistical tokens": "Statistical tokens",
"Statistics reset": "Statistics reset",
"Status": "Status",
"Status short": "Status",
"Status & Sync": "Status & Sync",
"Status Code": "Status Code",
"Status Code Mapping": "Status Code Mapping",
@ -3834,6 +3837,7 @@
"This year": "This year",
"Three steps to get started": "Three steps to get started",
"Throughput": "Throughput",
"Throughput short": "TPS",
"Throughput by group": "Throughput by group",
"Throughput trend": "Throughput trend",
"Tier": "Tier",

View File

@ -446,6 +446,8 @@
"Available Models": "Modèles disponibles",
"Available Rewards": "Récompenses disponibles",
"Average latency": "Latence moyenne",
"Latency": "Latence",
"Latency short": "Lat.",
"Average latency, TTFT, and success rate by group": "Latence moyenne, TTFT et taux de réussite par groupe",
"Average RPM": "RPM moyen",
"Average time-to-first-token (TTFT) by group": "Temps moyen jusquau premier token (TTFT) par groupe",
@ -3603,6 +3605,7 @@
"Statistical tokens": "Jetons statistiques",
"Statistics reset": "Statistiques réinitialisées",
"Status": "Statut",
"Status short": "État",
"Status & Sync": "Statut et synchronisation",
"Status Code": "Code de statut",
"Status Code Mapping": "Mappage des codes d'état",
@ -3834,6 +3837,7 @@
"This year": "Cette année",
"Three steps to get started": "Trois étapes pour commencer",
"Throughput": "Débit",
"Throughput short": "TPS",
"Throughput by group": "Débit par groupe",
"Throughput trend": "Tendance du débit",
"Tier": "Palier",

View File

@ -446,6 +446,8 @@
"Available Models": "利用可能なモデル",
"Available Rewards": "利用可能な報酬",
"Average latency": "平均レイテンシ",
"Latency": "レイテンシ",
"Latency short": "遅延",
"Average latency, TTFT, and success rate by group": "グループ別の平均レイテンシ、TTFT、成功率",
"Average RPM": "平均RPM",
"Average time-to-first-token (TTFT) by group": "グループ別の平均 Time to First TokenTTFT",
@ -3603,6 +3605,7 @@
"Statistical tokens": "統計トークン",
"Statistics reset": "統計をリセットしました",
"Status": "ステータス",
"Status short": "状態",
"Status & Sync": "ステータスと同期",
"Status Code": "ステータスコード",
"Status Code Mapping": "ステータスコードマッピング",
@ -3834,6 +3837,7 @@
"This year": "今年",
"Three steps to get started": "3ステップで始める",
"Throughput": "スループット",
"Throughput short": "TPS",
"Throughput by group": "グループ別スループット",
"Throughput trend": "スループット推移",
"Tier": "ティア",

View File

@ -446,6 +446,8 @@
"Available Models": "Доступные модели",
"Available Rewards": "Доступные награды",
"Average latency": "Средняя задержка",
"Latency": "Задержка",
"Latency short": "Зад.",
"Average latency, TTFT, and success rate by group": "Средняя задержка, TTFT и доля успешных запросов по группам",
"Average RPM": "Среднее число оборотов в минуту",
"Average time-to-first-token (TTFT) by group": "Среднее время до первого токена (TTFT) по группам",
@ -3603,6 +3605,7 @@
"Statistical tokens": "Статистические токены",
"Statistics reset": "Статистика сброшена",
"Status": "Статус",
"Status short": "Стат.",
"Status & Sync": "Статус и синхронизация",
"Status Code": "Код статуса",
"Status Code Mapping": "Сопоставление кодов состояния",
@ -3834,6 +3837,7 @@
"This year": "Этот год",
"Three steps to get started": "Три шага для начала работы",
"Throughput": "Пропускная способность",
"Throughput short": "TPS",
"Throughput by group": "Пропускная способность по группам",
"Throughput trend": "Тренд пропускной способности",
"Tier": "Уровень",

View File

@ -446,6 +446,8 @@
"Available Models": "Mô hình khả dụng",
"Available Rewards": "Phần thưởng hiện có",
"Average latency": "Độ trễ trung bình",
"Latency": "Độ trễ",
"Latency short": "Trễ",
"Average latency, TTFT, and success rate by group": "Độ trễ trung bình, TTFT và tỷ lệ thành công theo nhóm",
"Average RPM": "RPM trung bình",
"Average time-to-first-token (TTFT) by group": "Thời gian trung bình tới token đầu tiên (TTFT) theo nhóm",
@ -3603,6 +3605,7 @@
"Statistical tokens": "Mã thông báo thống kê",
"Statistics reset": "Đã đặt lại thống kê",
"Status": "Trạng thái",
"Status short": "TT",
"Status & Sync": "Trạng thái & Đồng bộ",
"Status Code": "Mã trạng thái",
"Status Code Mapping": "Ánh xạ mã trạng thái",
@ -3834,6 +3837,7 @@
"This year": "Năm nay",
"Three steps to get started": "Ba bước để bắt đầu",
"Throughput": "Thông lượng",
"Throughput short": "TPS",
"Throughput by group": "Thông lượng theo nhóm",
"Throughput trend": "Xu hướng thông lượng",
"Tier": "Bậc",

View File

@ -446,6 +446,8 @@
"Available Models": "可用模型",
"Available Rewards": "可用奖励",
"Average latency": "平均延迟",
"Latency": "延迟",
"Latency short": "延迟",
"Average latency, TTFT, and success rate by group": "各分组的平均延迟、首 Token 延迟和成功率",
"Average RPM": "平均 RPM",
"Average time-to-first-token (TTFT) by group": "各分组的平均首 Token 延迟TTFT",
@ -3603,6 +3605,7 @@
"Statistical tokens": "统计 Token 数",
"Statistics reset": "统计已重置",
"Status": "状态",
"Status short": "状态",
"Status & Sync": "状态与同步",
"Status Code": "状态码",
"Status Code Mapping": "状态码映射",
@ -3834,6 +3837,7 @@
"This year": "本年",
"Three steps to get started": "三步快速上手",
"Throughput": "吞吐量",
"Throughput short": "吞吐",
"Throughput by group": "各分组吞吐量",
"Throughput trend": "吞吐量趋势",
"Tier": "档位",