new-api/relay/embedding_handler.go

95 lines
3.2 KiB
Go
Raw Normal View History

2025-01-23 05:54:39 +08:00
package relay
import (
"fmt"
"io"
2025-01-23 05:54:39 +08:00
"net/http"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/dto"
2025-10-15 21:48:36 +08:00
"github.com/QuantumNous/new-api/logger"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/relay/helper"
"github.com/QuantumNous/new-api/service"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
2025-01-23 05:54:39 +08:00
)
func EmbeddingHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *types.NewAPIError) {
info.InitChannelMeta(c)
2025-01-23 05:54:39 +08:00
embeddingReq, ok := info.Request.(*dto.EmbeddingRequest)
if !ok {
return types.NewErrorWithStatusCode(fmt.Errorf("invalid request type, expected *dto.EmbeddingRequest, got %T", info.Request), types.ErrorCodeInvalidRequest, http.StatusBadRequest, types.ErrOptionWithSkipRetry())
2025-01-23 05:54:39 +08:00
}
request, err := common.DeepCopy(embeddingReq)
if err != nil {
return types.NewError(fmt.Errorf("failed to copy request to EmbeddingRequest: %w", err), types.ErrorCodeInvalidRequest, types.ErrOptionWithSkipRetry())
}
err = helper.ModelMappedHelper(c, info, request)
if err != nil {
2025-07-30 22:35:31 +08:00
return types.NewError(err, types.ErrorCodeChannelModelMappedError, types.ErrOptionWithSkipRetry())
2025-01-23 05:54:39 +08:00
}
adaptor := GetAdaptor(info.ApiType)
2025-01-23 05:54:39 +08:00
if adaptor == nil {
return types.NewError(fmt.Errorf("invalid api type: %d", info.ApiType), types.ErrorCodeInvalidApiType, types.ErrOptionWithSkipRetry())
2025-01-23 05:54:39 +08:00
}
adaptor.Init(info)
2025-01-23 05:54:39 +08:00
convertedRequest, err := adaptor.ConvertEmbeddingRequest(c, info, *request)
2025-01-23 05:54:39 +08:00
if err != nil {
2025-07-30 22:35:31 +08:00
return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
2025-01-23 05:54:39 +08:00
}
2026-01-20 23:43:29 +08:00
relaycommon.AppendRequestConversionFromRequest(info, convertedRequest)
jsonData, err := common.Marshal(convertedRequest)
2025-01-23 05:54:39 +08:00
if err != nil {
2025-07-30 22:35:31 +08:00
return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
2025-01-23 05:54:39 +08:00
}
if len(info.ParamOverride) > 0 {
jsonData, err = relaycommon.ApplyParamOverrideWithRelayInfo(jsonData, info)
if err != nil {
return newAPIErrorFromParamOverride(err)
}
}
logger.LogDebug(c, "converted embedding request body: %s", jsonData)
perf: reduce heap residency for large base64 relay requests Three layered optimizations targeting Gemini-style 5MB base64 payloads where RSS could balloon to tens of GB under concurrent load: 1. Byte-based param override (relay/common/override.go) - Switch legacy/operations hot paths from common.Marshal round-trips and map[string]any conversions to gjson/sjson on []byte directly. - Avoids cloning 5MB strings during each Set/Delete operation. 2. strings.Builder for Gemini response markdown (relay/channel/gemini/relay-gemini.go) - Replace string concatenation + strings.Join when assembling "![image](data:...;base64,DATA)" content for inline image responses. - Pre-allocates capacity from inline_data byte sizes. 3. Outbound BodyStorage + streaming Decoder (this commit's core) - New relay/common/outbound_body.go helper wraps marshaled upstream bodies in common.BodyStorage, allowing disk-cache mode to offload jsonData to a temp file while waiting for upstream TTFB. The original []byte can then be GC'd, removing ~5MB/req of heap residency during the longest window of a request. - All 7 relay handlers (gemini/claude/responses/embedding/image/compatible/ rerank) plus chat_completions_via_responses adopt the helper with defer closer.Close() and explicit jsonData = nil. - relay/common/relay_info.go: new UpstreamRequestBodySize so relay/channel/api_request.go can populate req.ContentLength (lost when body becomes a type-erased io.Reader). - common/gin.go UnmarshalBodyReusable: when storage is disk-backed and content-type is JSON, decode via DecodeJson(storage) instead of storage.Bytes()+Unmarshal, removing one transient 5MB copy per request. memory mode and form/multipart paths unchanged.
2026-05-22 19:08:38 +08:00
body, size, closer, err := relaycommon.NewOutboundJSONBody(jsonData)
if err != nil {
return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
}
defer closer.Close()
jsonData = nil
info.UpstreamRequestBodySize = size
var requestBody io.Reader = body
2025-01-23 05:54:39 +08:00
statusCodeMappingStr := c.GetString("status_code_mapping")
resp, err := adaptor.DoRequest(c, info, requestBody)
2025-01-23 05:54:39 +08:00
if err != nil {
return types.NewOpenAIError(err, types.ErrorCodeDoRequestFailed, http.StatusInternalServerError)
2025-01-23 05:54:39 +08:00
}
var httpResp *http.Response
if resp != nil {
httpResp = resp.(*http.Response)
if httpResp.StatusCode != http.StatusOK {
newAPIError = service.RelayErrorHandler(c.Request.Context(), httpResp, false)
2025-01-23 05:54:39 +08:00
// reset status code 重置状态码
service.ResetStatusCode(newAPIError, statusCodeMappingStr)
return newAPIError
2025-01-23 05:54:39 +08:00
}
}
usage, newAPIError := adaptor.DoResponse(c, httpResp, info)
if newAPIError != nil {
2025-01-23 05:54:39 +08:00
// reset status code 重置状态码
service.ResetStatusCode(newAPIError, statusCodeMappingStr)
return newAPIError
2025-01-23 05:54:39 +08:00
}
service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
2025-01-23 05:54:39 +08:00
return nil
}