new-api/model/perf_metric.go
CaIon e8cfb546fa
Some checks failed
Publish Docker image (Multi-arch) / Build & push (amd64) (push) Has been cancelled
Publish Docker image (Multi-arch) / Build & push (arm64) (push) Has been cancelled
Publish Docker image (Multi-arch) / Create multi-arch manifests (push) Has been cancelled
Release (Linux, macOS, Windows) / Linux Release (push) Has been cancelled
Release (Linux, macOS, Windows) / macOS Release (push) Has been cancelled
Release (Linux, macOS, Windows) / Windows Release (push) Has been cancelled
feat(default): add model performance badges
Add a batched performance summary API for model square cards and show compact latency, throughput, and status metrics without increasing card size. Also fix OTP verification form submission.
2026-05-06 22:21:00 +08:00

95 lines
3.4 KiB
Go

package model
import (
"time"
"gorm.io/gorm"
"gorm.io/gorm/clause"
)
// PerfMetric stores aggregated relay performance metrics for the model square.
type PerfMetric struct {
Id int `json:"id" gorm:"primaryKey"`
ModelName string `json:"model_name" gorm:"size:128;uniqueIndex:idx_perf_model_group_bucket,priority:1"`
Group string `json:"group" gorm:"column:group;size:64;uniqueIndex:idx_perf_model_group_bucket,priority:2"`
BucketTs int64 `json:"bucket_ts" gorm:"uniqueIndex:idx_perf_model_group_bucket,priority:3;index:idx_perf_bucket_ts"`
RequestCount int64 `json:"-" gorm:"default:0"`
SuccessCount int64 `json:"-" gorm:"default:0"`
TotalLatencyMs int64 `json:"-" gorm:"default:0"`
TtftSumMs int64 `json:"-" gorm:"default:0"`
TtftCount int64 `json:"-" gorm:"default:0"`
OutputTokens int64 `json:"-" gorm:"default:0"`
GenerationMs int64 `json:"-" gorm:"default:0"`
}
func (PerfMetric) TableName() string {
return "perf_metrics"
}
func UpsertPerfMetric(metric *PerfMetric) error {
if metric == nil || metric.RequestCount == 0 {
return nil
}
return DB.Clauses(clause.OnConflict{
Columns: []clause.Column{
{Name: "model_name"},
{Name: "group"},
{Name: "bucket_ts"},
},
DoUpdates: clause.Assignments(map[string]interface{}{
"request_count": gorm.Expr("request_count + ?", metric.RequestCount),
"success_count": gorm.Expr("success_count + ?", metric.SuccessCount),
"total_latency_ms": gorm.Expr("total_latency_ms + ?", metric.TotalLatencyMs),
"ttft_sum_ms": gorm.Expr("ttft_sum_ms + ?", metric.TtftSumMs),
"ttft_count": gorm.Expr("ttft_count + ?", metric.TtftCount),
"output_tokens": gorm.Expr("output_tokens + ?", metric.OutputTokens),
"generation_ms": gorm.Expr("generation_ms + ?", metric.GenerationMs),
}),
}).Create(metric).Error
}
func GetPerfMetrics(modelName string, group string, startTs int64, endTs int64) ([]PerfMetric, error) {
var metrics []PerfMetric
query := DB.Model(&PerfMetric{}).
Where("model_name = ? AND bucket_ts >= ? AND bucket_ts <= ?", modelName, startTs, endTs)
if group != "" {
query = query.Where(commonGroupCol+" = ?", group)
}
err := query.Order("bucket_ts ASC").Find(&metrics).Error
return metrics, err
}
type PerfMetricSummary struct {
ModelName string `json:"model_name"`
RequestCount int64 `json:"request_count"`
SuccessCount int64 `json:"success_count"`
TotalLatencyMs int64 `json:"total_latency_ms"`
OutputTokens int64 `json:"output_tokens"`
GenerationMs int64 `json:"generation_ms"`
}
func GetPerfMetricsSummaryAll(startTs int64, endTs int64) ([]PerfMetricSummary, error) {
var summaries []PerfMetricSummary
err := DB.Model(&PerfMetric{}).
Select("model_name, SUM(request_count) as request_count, SUM(success_count) as success_count, SUM(total_latency_ms) as total_latency_ms, SUM(output_tokens) as output_tokens, SUM(generation_ms) as generation_ms").
Where("bucket_ts >= ? AND bucket_ts <= ?", startTs, endTs).
Group("model_name").
Having("SUM(request_count) > 0").
Find(&summaries).Error
return summaries, err
}
func DeletePerfMetricsBefore(cutoffTs int64) error {
if cutoffTs <= 0 {
return nil
}
return DB.Where("bucket_ts < ?", cutoffTs).Delete(&PerfMetric{}).Error
}
func PerfMetricStartTime(hours int) int64 {
if hours <= 0 {
hours = 24
}
return time.Now().Add(-time.Duration(hours) * time.Hour).Unix()
}