Three layered optimizations targeting Gemini-style 5MB base64 payloads where
RSS could balloon to tens of GB under concurrent load:
1. Byte-based param override (relay/common/override.go)
- Switch legacy/operations hot paths from common.Marshal round-trips and
map[string]any conversions to gjson/sjson on []byte directly.
- Avoids cloning 5MB strings during each Set/Delete operation.
2. strings.Builder for Gemini response markdown (relay/channel/gemini/relay-gemini.go)
- Replace string concatenation + strings.Join when assembling
"" content for inline image responses.
- Pre-allocates capacity from inline_data byte sizes.
3. Outbound BodyStorage + streaming Decoder (this commit's core)
- New relay/common/outbound_body.go helper wraps marshaled upstream bodies
in common.BodyStorage, allowing disk-cache mode to offload jsonData to
a temp file while waiting for upstream TTFB. The original []byte can
then be GC'd, removing ~5MB/req of heap residency during the longest
window of a request.
- All 7 relay handlers (gemini/claude/responses/embedding/image/compatible/
rerank) plus chat_completions_via_responses adopt the helper with
defer closer.Close() and explicit jsonData = nil.
- relay/common/relay_info.go: new UpstreamRequestBodySize so
relay/channel/api_request.go can populate req.ContentLength (lost when
body becomes a type-erased io.Reader).
- common/gin.go UnmarshalBodyReusable: when storage is disk-backed and
content-type is JSON, decode via DecodeJson(storage) instead of
storage.Bytes()+Unmarshal, removing one transient 5MB copy per request.
memory mode and form/multipart paths unchanged.
167 lines
5.6 KiB
Go
167 lines
5.6 KiB
Go
package relay
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
|
|
"github.com/QuantumNous/new-api/common"
|
|
appconstant "github.com/QuantumNous/new-api/constant"
|
|
"github.com/QuantumNous/new-api/dto"
|
|
"github.com/QuantumNous/new-api/logger"
|
|
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
|
relayconstant "github.com/QuantumNous/new-api/relay/constant"
|
|
"github.com/QuantumNous/new-api/relay/helper"
|
|
"github.com/QuantumNous/new-api/service"
|
|
"github.com/QuantumNous/new-api/setting/model_setting"
|
|
"github.com/QuantumNous/new-api/types"
|
|
|
|
"github.com/gin-gonic/gin"
|
|
)
|
|
|
|
func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *types.NewAPIError) {
|
|
info.InitChannelMeta(c)
|
|
if info.RelayMode == relayconstant.RelayModeResponsesCompact {
|
|
switch info.ApiType {
|
|
case appconstant.APITypeOpenAI, appconstant.APITypeCodex:
|
|
default:
|
|
return types.NewErrorWithStatusCode(
|
|
fmt.Errorf("unsupported endpoint %q for api type %d", "/v1/responses/compact", info.ApiType),
|
|
types.ErrorCodeInvalidRequest,
|
|
http.StatusBadRequest,
|
|
types.ErrOptionWithSkipRetry(),
|
|
)
|
|
}
|
|
}
|
|
|
|
var responsesReq *dto.OpenAIResponsesRequest
|
|
switch req := info.Request.(type) {
|
|
case *dto.OpenAIResponsesRequest:
|
|
responsesReq = req
|
|
case *dto.OpenAIResponsesCompactionRequest:
|
|
responsesReq = &dto.OpenAIResponsesRequest{
|
|
Model: req.Model,
|
|
Input: req.Input,
|
|
Instructions: req.Instructions,
|
|
PreviousResponseID: req.PreviousResponseID,
|
|
}
|
|
default:
|
|
return types.NewErrorWithStatusCode(
|
|
fmt.Errorf("invalid request type, expected dto.OpenAIResponsesRequest or dto.OpenAIResponsesCompactionRequest, got %T", info.Request),
|
|
types.ErrorCodeInvalidRequest,
|
|
http.StatusBadRequest,
|
|
types.ErrOptionWithSkipRetry(),
|
|
)
|
|
}
|
|
|
|
request, err := common.DeepCopy(responsesReq)
|
|
if err != nil {
|
|
return types.NewError(fmt.Errorf("failed to copy request to GeneralOpenAIRequest: %w", err), types.ErrorCodeInvalidRequest, types.ErrOptionWithSkipRetry())
|
|
}
|
|
|
|
err = helper.ModelMappedHelper(c, info, request)
|
|
if err != nil {
|
|
return types.NewError(err, types.ErrorCodeChannelModelMappedError, types.ErrOptionWithSkipRetry())
|
|
}
|
|
|
|
adaptor := GetAdaptor(info.ApiType)
|
|
if adaptor == nil {
|
|
return types.NewError(fmt.Errorf("invalid api type: %d", info.ApiType), types.ErrorCodeInvalidApiType, types.ErrOptionWithSkipRetry())
|
|
}
|
|
adaptor.Init(info)
|
|
var requestBody io.Reader
|
|
if model_setting.GetGlobalSettings().PassThroughRequestEnabled || info.ChannelSetting.PassThroughBodyEnabled {
|
|
storage, err := common.GetBodyStorage(c)
|
|
if err != nil {
|
|
return types.NewError(err, types.ErrorCodeReadRequestBodyFailed, types.ErrOptionWithSkipRetry())
|
|
}
|
|
requestBody = common.ReaderOnly(storage)
|
|
} else {
|
|
convertedRequest, err := adaptor.ConvertOpenAIResponsesRequest(c, info, *request)
|
|
if err != nil {
|
|
return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
|
|
}
|
|
relaycommon.AppendRequestConversionFromRequest(info, convertedRequest)
|
|
jsonData, err := common.Marshal(convertedRequest)
|
|
if err != nil {
|
|
return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
|
|
}
|
|
|
|
// remove disabled fields for OpenAI Responses API
|
|
jsonData, err = relaycommon.RemoveDisabledFields(jsonData, info.ChannelOtherSettings, info.ChannelSetting.PassThroughBodyEnabled)
|
|
if err != nil {
|
|
return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
|
|
}
|
|
|
|
// apply param override
|
|
if len(info.ParamOverride) > 0 {
|
|
jsonData, err = relaycommon.ApplyParamOverrideWithRelayInfo(jsonData, info)
|
|
if err != nil {
|
|
return newAPIErrorFromParamOverride(err)
|
|
}
|
|
}
|
|
|
|
logger.LogDebug(c, "requestBody: %s", jsonData)
|
|
body, size, closer, err := relaycommon.NewOutboundJSONBody(jsonData)
|
|
if err != nil {
|
|
return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
|
|
}
|
|
defer closer.Close()
|
|
jsonData = nil
|
|
info.UpstreamRequestBodySize = size
|
|
requestBody = body
|
|
}
|
|
|
|
var httpResp *http.Response
|
|
resp, err := adaptor.DoRequest(c, info, requestBody)
|
|
if err != nil {
|
|
return types.NewOpenAIError(err, types.ErrorCodeDoRequestFailed, http.StatusInternalServerError)
|
|
}
|
|
|
|
statusCodeMappingStr := c.GetString("status_code_mapping")
|
|
|
|
if resp != nil {
|
|
httpResp = resp.(*http.Response)
|
|
|
|
if httpResp.StatusCode != http.StatusOK {
|
|
newAPIError = service.RelayErrorHandler(c.Request.Context(), httpResp, false)
|
|
// reset status code 重置状态码
|
|
service.ResetStatusCode(newAPIError, statusCodeMappingStr)
|
|
return newAPIError
|
|
}
|
|
}
|
|
|
|
usage, newAPIError := adaptor.DoResponse(c, httpResp, info)
|
|
if newAPIError != nil {
|
|
// reset status code 重置状态码
|
|
service.ResetStatusCode(newAPIError, statusCodeMappingStr)
|
|
return newAPIError
|
|
}
|
|
|
|
usageDto := usage.(*dto.Usage)
|
|
if info.RelayMode == relayconstant.RelayModeResponsesCompact {
|
|
originModelName := info.OriginModelName
|
|
originPriceData := info.PriceData
|
|
|
|
_, err := helper.ModelPriceHelper(c, info, info.GetEstimatePromptTokens(), &types.TokenCountMeta{})
|
|
if err != nil {
|
|
info.OriginModelName = originModelName
|
|
info.PriceData = originPriceData
|
|
return types.NewError(err, types.ErrorCodeModelPriceError, types.ErrOptionWithSkipRetry(), types.ErrOptionWithStatusCode(http.StatusBadRequest))
|
|
}
|
|
service.PostTextConsumeQuota(c, info, usageDto, nil)
|
|
|
|
info.OriginModelName = originModelName
|
|
info.PriceData = originPriceData
|
|
return nil
|
|
}
|
|
|
|
if strings.HasPrefix(info.OriginModelName, "gpt-4o-audio") {
|
|
service.PostAudioConsumeQuota(c, info, usageDto, "")
|
|
} else {
|
|
service.PostTextConsumeQuota(c, info, usageDto, nil)
|
|
}
|
|
return nil
|
|
}
|