new-api/relay/chat_completions_via_responses.go
CaIon fddf54ccc5
perf: reduce heap residency for large base64 relay requests
Three layered optimizations targeting Gemini-style 5MB base64 payloads where
RSS could balloon to tens of GB under concurrent load:

1. Byte-based param override (relay/common/override.go)
   - Switch legacy/operations hot paths from common.Marshal round-trips and
     map[string]any conversions to gjson/sjson on []byte directly.
   - Avoids cloning 5MB strings during each Set/Delete operation.

2. strings.Builder for Gemini response markdown (relay/channel/gemini/relay-gemini.go)
   - Replace string concatenation + strings.Join when assembling
     "![image](data:...;base64,DATA)" content for inline image responses.
   - Pre-allocates capacity from inline_data byte sizes.

3. Outbound BodyStorage + streaming Decoder (this commit's core)
   - New relay/common/outbound_body.go helper wraps marshaled upstream bodies
     in common.BodyStorage, allowing disk-cache mode to offload jsonData to
     a temp file while waiting for upstream TTFB. The original []byte can
     then be GC'd, removing ~5MB/req of heap residency during the longest
     window of a request.
   - All 7 relay handlers (gemini/claude/responses/embedding/image/compatible/
     rerank) plus chat_completions_via_responses adopt the helper with
     defer closer.Close() and explicit jsonData = nil.
   - relay/common/relay_info.go: new UpstreamRequestBodySize so
     relay/channel/api_request.go can populate req.ContentLength (lost when
     body becomes a type-erased io.Reader).
   - common/gin.go UnmarshalBodyReusable: when storage is disk-backed and
     content-type is JSON, decode via DecodeJson(storage) instead of
     storage.Bytes()+Unmarshal, removing one transient 5MB copy per request.
     memory mode and form/multipart paths unchanged.
2026-05-22 19:08:38 +08:00

171 lines
5.5 KiB
Go

package relay
import (
"io"
"net/http"
"strings"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/constant"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/relay/channel"
openaichannel "github.com/QuantumNous/new-api/relay/channel/openai"
relaycommon "github.com/QuantumNous/new-api/relay/common"
relayconstant "github.com/QuantumNous/new-api/relay/constant"
"github.com/QuantumNous/new-api/service"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
)
func applySystemPromptIfNeeded(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) {
if info == nil || request == nil {
return
}
if info.ChannelSetting.SystemPrompt == "" {
return
}
systemRole := request.GetSystemRoleName()
containSystemPrompt := false
for _, message := range request.Messages {
if message.Role == systemRole {
containSystemPrompt = true
break
}
}
if !containSystemPrompt {
systemMessage := dto.Message{
Role: systemRole,
Content: info.ChannelSetting.SystemPrompt,
}
request.Messages = append([]dto.Message{systemMessage}, request.Messages...)
return
}
if !info.ChannelSetting.SystemPromptOverride {
return
}
common.SetContextKey(c, constant.ContextKeySystemPromptOverride, true)
for i, message := range request.Messages {
if message.Role != systemRole {
continue
}
if message.IsStringContent() {
request.Messages[i].SetStringContent(info.ChannelSetting.SystemPrompt + "\n" + message.StringContent())
return
}
contents := message.ParseContent()
contents = append([]dto.MediaContent{
{
Type: dto.ContentTypeText,
Text: info.ChannelSetting.SystemPrompt,
},
}, contents...)
request.Messages[i].Content = contents
return
}
}
func chatCompletionsViaResponses(c *gin.Context, info *relaycommon.RelayInfo, adaptor channel.Adaptor, request *dto.GeneralOpenAIRequest) (*dto.Usage, *types.NewAPIError) {
chatJSON, err := common.Marshal(request)
if err != nil {
return nil, types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
}
chatJSON, err = relaycommon.RemoveDisabledFields(chatJSON, info.ChannelOtherSettings, info.ChannelSetting.PassThroughBodyEnabled)
if err != nil {
return nil, types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
}
if len(info.ParamOverride) > 0 {
chatJSON, err = relaycommon.ApplyParamOverrideWithRelayInfo(chatJSON, info)
if err != nil {
return nil, newAPIErrorFromParamOverride(err)
}
}
var overriddenChatReq dto.GeneralOpenAIRequest
if err := common.Unmarshal(chatJSON, &overriddenChatReq); err != nil {
return nil, types.NewError(err, types.ErrorCodeChannelParamOverrideInvalid, types.ErrOptionWithSkipRetry())
}
responsesReq, err := service.ChatCompletionsRequestToResponsesRequest(&overriddenChatReq)
if err != nil {
return nil, types.NewErrorWithStatusCode(err, types.ErrorCodeInvalidRequest, http.StatusBadRequest, types.ErrOptionWithSkipRetry())
}
info.AppendRequestConversion(types.RelayFormatOpenAIResponses)
savedRelayMode := info.RelayMode
savedRequestURLPath := info.RequestURLPath
defer func() {
info.RelayMode = savedRelayMode
info.RequestURLPath = savedRequestURLPath
}()
info.RelayMode = relayconstant.RelayModeResponses
info.RequestURLPath = "/v1/responses"
convertedRequest, err := adaptor.ConvertOpenAIResponsesRequest(c, info, *responsesReq)
if err != nil {
return nil, types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
}
relaycommon.AppendRequestConversionFromRequest(info, convertedRequest)
jsonData, err := common.Marshal(convertedRequest)
if err != nil {
return nil, types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
}
jsonData, err = relaycommon.RemoveDisabledFields(jsonData, info.ChannelOtherSettings, info.ChannelSetting.PassThroughBodyEnabled)
if err != nil {
return nil, types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
}
body, size, closer, err := relaycommon.NewOutboundJSONBody(jsonData)
if err != nil {
return nil, types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
}
defer closer.Close()
jsonData = nil
info.UpstreamRequestBodySize = size
var requestBody io.Reader = body
var httpResp *http.Response
resp, err := adaptor.DoRequest(c, info, requestBody)
if err != nil {
return nil, types.NewOpenAIError(err, types.ErrorCodeDoRequestFailed, http.StatusInternalServerError)
}
if resp == nil {
return nil, types.NewOpenAIError(nil, types.ErrorCodeBadResponse, http.StatusInternalServerError)
}
statusCodeMappingStr := c.GetString("status_code_mapping")
httpResp = resp.(*http.Response)
info.IsStream = info.IsStream || strings.HasPrefix(httpResp.Header.Get("Content-Type"), "text/event-stream")
if httpResp.StatusCode != http.StatusOK {
newApiErr := service.RelayErrorHandler(c.Request.Context(), httpResp, false)
service.ResetStatusCode(newApiErr, statusCodeMappingStr)
return nil, newApiErr
}
if info.IsStream {
usage, newApiErr := openaichannel.OaiResponsesToChatStreamHandler(c, info, httpResp)
if newApiErr != nil {
service.ResetStatusCode(newApiErr, statusCodeMappingStr)
return nil, newApiErr
}
return usage, nil
}
usage, newApiErr := openaichannel.OaiResponsesToChatHandler(c, info, httpResp)
if newApiErr != nil {
service.ResetStatusCode(newApiErr, statusCodeMappingStr)
return nil, newApiErr
}
return usage, nil
}