diff --git a/plugins/wasm-go/extensions/ai-proxy/README.md b/plugins/wasm-go/extensions/ai-proxy/README.md index 80b7c2a890..d0a4505ab7 100644 --- a/plugins/wasm-go/extensions/ai-proxy/README.md +++ b/plugins/wasm-go/extensions/ai-proxy/README.md @@ -243,6 +243,9 @@ DeepL 所对应的 `type` 为 `deepl`。它特有的配置字段如下: Cohere 所对应的 `type` 为 `cohere`。它并无特有的配置字段。 +#### Together-AI +Together-AI 所对应的 `type` 为 `together-ai`。它并无特有的配置字段。 + ## 用法示例 ### 使用 OpenAI 协议代理 Azure OpenAI 服务 @@ -1496,6 +1499,61 @@ provider: } ``` +### 使用 OpenAI 协议代理 Together-AI 服务 + +**配置信息** +```yaml +provider: + type: together-ai + apiTokens: + - "YOUR_TOGETHER_AI_API_TOKEN" + modelMapping: + "*": "Qwen/Qwen2.5-72B-Instruct-Turbo" +``` + +**请求示例** +```json +{ + "model": "Qwen/Qwen2.5-72B-Instruct-Turbo", + "messages": [ + { + "role": "user", + "content": "Who are you?" + } + ] +} +``` + +**响应示例** +```json +{ + "id": "8f5809d54b73efac", + "object": "chat.completion", + "created": 1734785851, + "model": "Qwen/Qwen2.5-72B-Instruct-Turbo", + "prompt": [], + "choices": [ + { + "finish_reason": "eos", + "seed": 12830868308626506000, + "logprobs": null, + "index": 0, + "message": { + "role": "assistant", + "content": "I am Qwen, a large language model created by Alibaba Cloud. I am designed to assist users in generating various types of text, such as articles, stories, poems, and more, as well as answering questions and providing information on a wide range of topics. How can I assist you today?", + "tool_calls": [] + } + } + ], + "usage": { + "prompt_tokens": 33, + "completion_tokens": 61, + "total_tokens": 94 + } +} +``` + + ## 完整配置示例 ### Kubernetes 示例 diff --git a/plugins/wasm-go/extensions/ai-proxy/README_EN.md b/plugins/wasm-go/extensions/ai-proxy/README_EN.md index e34546a4e5..4400e248d0 100644 --- a/plugins/wasm-go/extensions/ai-proxy/README_EN.md +++ b/plugins/wasm-go/extensions/ai-proxy/README_EN.md @@ -1356,6 +1356,60 @@ Here, `model` denotes the service tier of DeepL and can only be either `Free` or } ``` +### Utilizing OpenAI Protocol Proxy for Together-AI Services + +**Configuration Information** +```yaml +provider: + type: together-ai + apiTokens: + - "YOUR_TOGETHER_AI_API_TOKEN" + modelMapping: + "*": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo" +``` + +**Request Example** +```json +{ + "model": "Qwen/Qwen2.5-72B-Instruct-Turbo", + "messages": [ + { + "role": "user", + "content": "Who are you?" + } + ] +} +``` + +**Response Example** +```json +{ + "id": "8f5809d54b73efac", + "object": "chat.completion", + "created": 1734785851, + "model": "Qwen/Qwen2.5-72B-Instruct-Turbo", + "prompt": [], + "choices": [ + { + "finish_reason": "eos", + "seed": 12830868308626506000, + "logprobs": null, + "index": 0, + "message": { + "role": "assistant", + "content": "I am Qwen, a large language model created by Alibaba Cloud. I am designed to assist users in generating various types of text, such as articles, stories, poems, and more, as well as answering questions and providing information on a wide range of topics. How can I assist you today?", + "tool_calls": [] + } + } + ], + "usage": { + "prompt_tokens": 33, + "completion_tokens": 61, + "total_tokens": 94 + } +} +``` + ## Full Configuration Example ### Kubernetes Example diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go index 0f482732aa..0dc70428fd 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go @@ -46,6 +46,7 @@ const ( providerTypeCohere = "cohere" providerTypeDoubao = "doubao" providerTypeCoze = "coze" + providerTypeTogetherAI = "together-ai" protocolOpenAI = "openai" protocolOriginal = "original" @@ -106,6 +107,7 @@ var ( providerTypeCohere: &cohereProviderInitializer{}, providerTypeDoubao: &doubaoProviderInitializer{}, providerTypeCoze: &cozeProviderInitializer{}, + providerTypeTogetherAI: &togetherAIProviderInitializer{}, } ) diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/together_ai.go b/plugins/wasm-go/extensions/ai-proxy/provider/together_ai.go new file mode 100644 index 0000000000..523e9842f4 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-proxy/provider/together_ai.go @@ -0,0 +1,69 @@ +package provider + +import ( + "errors" + "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util" + "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper" + "github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types" + "net/http" + "strings" +) + +const ( + togetherAIDomain = "api.together.xyz" + togetherAICompletionPath = "/v1/chat/completions" +) + +type togetherAIProviderInitializer struct{} + +func (m *togetherAIProviderInitializer) ValidateConfig(config ProviderConfig) error { + if config.apiTokens == nil || len(config.apiTokens) == 0 { + return errors.New("no apiToken found in provider config") + } + return nil +} + +func (m *togetherAIProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) { + return &togetherAIProvider{ + config: config, + contextCache: createContextCache(&config), + }, nil +} + +type togetherAIProvider struct { + config ProviderConfig + contextCache *contextCache +} + +func (m *togetherAIProvider) GetProviderType() string { + return providerTypeTogetherAI +} + +func (m *togetherAIProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error { + if apiName != ApiNameChatCompletion { + return errUnsupportedApiName + } + m.config.handleRequestHeaders(m, ctx, apiName, log) + return nil +} + +func (m *togetherAIProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) { + if apiName != ApiNameChatCompletion { + return types.ActionContinue, errUnsupportedApiName + } + return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body, log) +} + +func (m *togetherAIProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) { + util.OverwriteRequestPathHeader(headers, togetherAICompletionPath) + util.OverwriteRequestHostHeader(headers, togetherAIDomain) + util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+m.config.GetApiTokenInUse(ctx)) + headers.Del("Content-Length") +} + +func (m *togetherAIProvider) GetApiName(path string) ApiName { + if strings.Contains(path, togetherAICompletionPath) { + return ApiNameChatCompletion + } + return "" +}