Some checks failed
Publish Docker image (Multi-arch) / Build & push (amd64) (push) Has been cancelled
Publish Docker image (Multi-arch) / Build & push (arm64) (push) Has been cancelled
Publish Docker image (Multi-arch) / Create multi-arch manifests (push) Has been cancelled
Release (Linux, macOS, Windows) / Linux Release (push) Has been cancelled
Release (Linux, macOS, Windows) / macOS Release (push) Has been cancelled
Release (Linux, macOS, Windows) / Windows Release (push) Has been cancelled
Add a batched performance summary API for model square cards and show compact latency, throughput, and status metrics without increasing card size. Also fix OTP verification form submission.
95 lines
3.4 KiB
Go
95 lines
3.4 KiB
Go
package model
|
|
|
|
import (
|
|
"time"
|
|
|
|
"gorm.io/gorm"
|
|
"gorm.io/gorm/clause"
|
|
)
|
|
|
|
// PerfMetric stores aggregated relay performance metrics for the model square.
|
|
type PerfMetric struct {
|
|
Id int `json:"id" gorm:"primaryKey"`
|
|
ModelName string `json:"model_name" gorm:"size:128;uniqueIndex:idx_perf_model_group_bucket,priority:1"`
|
|
Group string `json:"group" gorm:"column:group;size:64;uniqueIndex:idx_perf_model_group_bucket,priority:2"`
|
|
BucketTs int64 `json:"bucket_ts" gorm:"uniqueIndex:idx_perf_model_group_bucket,priority:3;index:idx_perf_bucket_ts"`
|
|
RequestCount int64 `json:"-" gorm:"default:0"`
|
|
SuccessCount int64 `json:"-" gorm:"default:0"`
|
|
TotalLatencyMs int64 `json:"-" gorm:"default:0"`
|
|
TtftSumMs int64 `json:"-" gorm:"default:0"`
|
|
TtftCount int64 `json:"-" gorm:"default:0"`
|
|
OutputTokens int64 `json:"-" gorm:"default:0"`
|
|
GenerationMs int64 `json:"-" gorm:"default:0"`
|
|
}
|
|
|
|
func (PerfMetric) TableName() string {
|
|
return "perf_metrics"
|
|
}
|
|
|
|
func UpsertPerfMetric(metric *PerfMetric) error {
|
|
if metric == nil || metric.RequestCount == 0 {
|
|
return nil
|
|
}
|
|
return DB.Clauses(clause.OnConflict{
|
|
Columns: []clause.Column{
|
|
{Name: "model_name"},
|
|
{Name: "group"},
|
|
{Name: "bucket_ts"},
|
|
},
|
|
DoUpdates: clause.Assignments(map[string]interface{}{
|
|
"request_count": gorm.Expr("request_count + ?", metric.RequestCount),
|
|
"success_count": gorm.Expr("success_count + ?", metric.SuccessCount),
|
|
"total_latency_ms": gorm.Expr("total_latency_ms + ?", metric.TotalLatencyMs),
|
|
"ttft_sum_ms": gorm.Expr("ttft_sum_ms + ?", metric.TtftSumMs),
|
|
"ttft_count": gorm.Expr("ttft_count + ?", metric.TtftCount),
|
|
"output_tokens": gorm.Expr("output_tokens + ?", metric.OutputTokens),
|
|
"generation_ms": gorm.Expr("generation_ms + ?", metric.GenerationMs),
|
|
}),
|
|
}).Create(metric).Error
|
|
}
|
|
|
|
func GetPerfMetrics(modelName string, group string, startTs int64, endTs int64) ([]PerfMetric, error) {
|
|
var metrics []PerfMetric
|
|
query := DB.Model(&PerfMetric{}).
|
|
Where("model_name = ? AND bucket_ts >= ? AND bucket_ts <= ?", modelName, startTs, endTs)
|
|
if group != "" {
|
|
query = query.Where(commonGroupCol+" = ?", group)
|
|
}
|
|
err := query.Order("bucket_ts ASC").Find(&metrics).Error
|
|
return metrics, err
|
|
}
|
|
|
|
type PerfMetricSummary struct {
|
|
ModelName string `json:"model_name"`
|
|
RequestCount int64 `json:"request_count"`
|
|
SuccessCount int64 `json:"success_count"`
|
|
TotalLatencyMs int64 `json:"total_latency_ms"`
|
|
OutputTokens int64 `json:"output_tokens"`
|
|
GenerationMs int64 `json:"generation_ms"`
|
|
}
|
|
|
|
func GetPerfMetricsSummaryAll(startTs int64, endTs int64) ([]PerfMetricSummary, error) {
|
|
var summaries []PerfMetricSummary
|
|
err := DB.Model(&PerfMetric{}).
|
|
Select("model_name, SUM(request_count) as request_count, SUM(success_count) as success_count, SUM(total_latency_ms) as total_latency_ms, SUM(output_tokens) as output_tokens, SUM(generation_ms) as generation_ms").
|
|
Where("bucket_ts >= ? AND bucket_ts <= ?", startTs, endTs).
|
|
Group("model_name").
|
|
Having("SUM(request_count) > 0").
|
|
Find(&summaries).Error
|
|
return summaries, err
|
|
}
|
|
|
|
func DeletePerfMetricsBefore(cutoffTs int64) error {
|
|
if cutoffTs <= 0 {
|
|
return nil
|
|
}
|
|
return DB.Where("bucket_ts < ?", cutoffTs).Delete(&PerfMetric{}).Error
|
|
}
|
|
|
|
func PerfMetricStartTime(hours int) int64 {
|
|
if hours <= 0 {
|
|
hours = 24
|
|
}
|
|
return time.Now().Add(-time.Duration(hours) * time.Hour).Unix()
|
|
}
|