Merge branch 'main' into enhance-failover

alibaba · Dec 23, 2024 · 9e2779c · 9e2779c
2 parents 351ada4 + 8f3723f
commit 9e2779c
Show file tree

Hide file tree

Showing 8 changed files with 195 additions and 6 deletions.
diff --git a/helm/core/templates/daemonset.yaml b/helm/core/templates/daemonset.yaml
@@ -1,16 +1,18 @@
 {{- if eq .Values.gateway.kind "DaemonSet" -}}
 {{- $o11y := .Values.global.o11y  }}
-{{- $unprivilegedPortSupported := true }}
-{{- range $index, $node := (lookup "v1" "Node" "default" "").items }}
+{{- if eq .Values.gateway.unprivilegedPortSupported nil -}}
+  {{- $unprivilegedPortSupported := true }}
+  {{- range $index, $node := (lookup "v1" "Node" "default" "").items }}
     {{- $kernelVersion := $node.status.nodeInfo.kernelVersion }}
     {{- if $kernelVersion }}
       {{- $kernelVersion = regexFind "^(\\d+\\.\\d+\\.\\d+)" $kernelVersion }}
       {{- if and $kernelVersion (semverCompare "<4.11.0" $kernelVersion) }}
       {{- $unprivilegedPortSupported = false }}
       {{- end }}
     {{- end }}
+  {{- end -}}
+  {{- $_ := set .Values.gateway "unprivilegedPortSupported" $unprivilegedPortSupported -}}
 {{- end -}}
-{{- $_ := set .Values.gateway "unprivilegedPortSupported" $unprivilegedPortSupported -}}
 
 apiVersion: apps/v1
 kind: DaemonSet

diff --git a/helm/core/templates/deployment.yaml b/helm/core/templates/deployment.yaml
@@ -1,15 +1,17 @@
 {{- if eq .Values.gateway.kind "Deployment" -}}
-{{- $unprivilegedPortSupported := true }}
-{{- range $index, $node := (lookup "v1" "Node" "default" "").items }}
+{{- if eq .Values.gateway.unprivilegedPortSupported nil -}}
+  {{- $unprivilegedPortSupported := true }}
+  {{- range $index, $node := (lookup "v1" "Node" "default" "").items }}
     {{- $kernelVersion := $node.status.nodeInfo.kernelVersion }}
     {{- if $kernelVersion }}
       {{- $kernelVersion = regexFind "^(\\d+\\.\\d+\\.\\d+)" $kernelVersion }}
       {{- if and $kernelVersion (semverCompare "<4.11.0" $kernelVersion) }}
       {{- $unprivilegedPortSupported = false }}
       {{- end }}
     {{- end }}
+  {{- end -}}
+  {{- $_ := set .Values.gateway "unprivilegedPortSupported" $unprivilegedPortSupported -}}
 {{- end -}}
-{{- $_ := set .Values.gateway "unprivilegedPortSupported" $unprivilegedPortSupported -}}
 
 apiVersion: apps/v1
 kind: Deployment

diff --git a/helm/core/values.yaml b/helm/core/values.yaml
@@ -465,6 +465,7 @@ gateway:
   # On Kubernetes 1.22+, this only requires the `net.ipv4.ip_unprivileged_port_start` sysctl.
   securityContext: ~
   containerSecurityContext: ~
+  unprivilegedPortSupported: ~
 
   service:
     # -- Type of service. Set to "None" to disable the service entirely

diff --git a/helm/higress/README.md b/helm/higress/README.md
@@ -149,6 +149,7 @@ The command removes all the Kubernetes components associated with the chart and
 | gateway.serviceAccount.name | string | `""` | The name of the service account to use. If not set, the release name is used |
 | gateway.tag | string | `""` |  |
 | gateway.tolerations | list | `[]` |  |
+| gateway.unprivilegedPortSupported | string | `nil` |  |
 | global.autoscalingv2API | bool | `true` | whether to use autoscaling/v2 template for HPA settings for internal usage only, not to be configured by users. |
 | global.caAddress | string | `""` | The customized CA address to retrieve certificates for the pods in the cluster. CSR clients such as the Istio Agent and ingress gateways can use this to specify the CA endpoint. If not set explicitly, default to the Istio discovery address. |
 | global.caName | string | `""` | The name of the CA for workload certificates. For example, when caName=GkeWorkloadCertificate, GKE workload certificates will be used as the certificates for workloads. The default value is "" and when caName="", the CA will be configured by other mechanisms (e.g., environmental variable CA_PROVIDER). |

diff --git a/plugins/wasm-go/extensions/ai-proxy/README.md b/plugins/wasm-go/extensions/ai-proxy/README.md
@@ -252,6 +252,9 @@ DeepL 所对应的 `type` 为 `deepl`。它特有的配置字段如下：
 
 Cohere 所对应的 `type` 为 `cohere`。它并无特有的配置字段。
 
+#### Together-AI
+Together-AI 所对应的 `type` 为 `together-ai`。它并无特有的配置字段。
+
 ## 用法示例
 
 ### 使用 OpenAI 协议代理 Azure OpenAI 服务
@@ -1505,6 +1508,61 @@ provider:
 }
 ```
 
+### 使用 OpenAI 协议代理 Together-AI 服务
+
+**配置信息**
+```yaml
+provider:
+  type: together-ai
+  apiTokens:
+    - "YOUR_TOGETHER_AI_API_TOKEN"
+  modelMapping:
+    "*": "Qwen/Qwen2.5-72B-Instruct-Turbo"
+```
+
+**请求示例**
+```json
+{
+    "model": "Qwen/Qwen2.5-72B-Instruct-Turbo",
+    "messages": [
+        {
+            "role": "user",
+            "content": "Who are you?"
+        }
+    ]
+}
+```
+
+**响应示例**
+```json
+{
+  "id": "8f5809d54b73efac",
+  "object": "chat.completion",
+  "created": 1734785851,
+  "model": "Qwen/Qwen2.5-72B-Instruct-Turbo",
+  "prompt": [],
+  "choices": [
+    {
+      "finish_reason": "eos",
+      "seed": 12830868308626506000,
+      "logprobs": null,
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "I am Qwen, a large language model created by Alibaba Cloud. I am designed to assist users in generating various types of text, such as articles, stories, poems, and more, as well as answering questions and providing information on a wide range of topics. How can I assist you today?",
+        "tool_calls": []
+      }
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 33,
+    "completion_tokens": 61,
+    "total_tokens": 94
+  }
+}
+```
+
+
 ## 完整配置示例
 
 ### Kubernetes 示例

diff --git a/plugins/wasm-go/extensions/ai-proxy/README_EN.md b/plugins/wasm-go/extensions/ai-proxy/README_EN.md
@@ -1356,6 +1356,60 @@ Here, `model` denotes the service tier of DeepL and can only be either `Free` or
 }
 ```
 
+### Utilizing OpenAI Protocol Proxy for Together-AI Services
+
+**Configuration Information**
+```yaml
+provider:
+  type: together-ai
+  apiTokens:
+    - "YOUR_TOGETHER_AI_API_TOKEN"
+  modelMapping:
+    "*": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
+```
+
+**Request Example**
+```json
+{
+    "model": "Qwen/Qwen2.5-72B-Instruct-Turbo",
+    "messages": [
+        {
+            "role": "user",
+            "content": "Who are you?"
+        }
+    ]
+}
+```
+
+**Response Example**
+```json
+{
+  "id": "8f5809d54b73efac",
+  "object": "chat.completion",
+  "created": 1734785851,
+  "model": "Qwen/Qwen2.5-72B-Instruct-Turbo",
+  "prompt": [],
+  "choices": [
+    {
+      "finish_reason": "eos",
+      "seed": 12830868308626506000,
+      "logprobs": null,
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "I am Qwen, a large language model created by Alibaba Cloud. I am designed to assist users in generating various types of text, such as articles, stories, poems, and more, as well as answering questions and providing information on a wide range of topics. How can I assist you today?",
+        "tool_calls": []
+      }
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 33,
+    "completion_tokens": 61,
+    "total_tokens": 94
+  }
+}
+```
+
 ## Full Configuration Example
 
 ### Kubernetes Example

diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
@@ -46,6 +46,7 @@ const (
 	providerTypeCohere     = "cohere"
 	providerTypeDoubao     = "doubao"
 	providerTypeCoze       = "coze"
+	providerTypeTogetherAI = "together-ai"
 
 	protocolOpenAI   = "openai"
 	protocolOriginal = "original"
@@ -108,6 +109,7 @@ var (
 		providerTypeCohere:     &cohereProviderInitializer{},
 		providerTypeDoubao:     &doubaoProviderInitializer{},
 		providerTypeCoze:       &cozeProviderInitializer{},
+		providerTypeTogetherAI: &togetherAIProviderInitializer{},
 	}
 )
 

diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/together_ai.go b/plugins/wasm-go/extensions/ai-proxy/provider/together_ai.go
@@ -0,0 +1,69 @@
+package provider
+
+import (
+	"errors"
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+	"net/http"
+	"strings"
+)
+
+const (
+	togetherAIDomain         = "api.together.xyz"
+	togetherAICompletionPath = "/v1/chat/completions"
+)
+
+type togetherAIProviderInitializer struct{}
+
+func (m *togetherAIProviderInitializer) ValidateConfig(config ProviderConfig) error {
+	if config.apiTokens == nil || len(config.apiTokens) == 0 {
+		return errors.New("no apiToken found in provider config")
+	}
+	return nil
+}
+
+func (m *togetherAIProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	return &togetherAIProvider{
+		config:       config,
+		contextCache: createContextCache(&config),
+	}, nil
+}
+
+type togetherAIProvider struct {
+	config       ProviderConfig
+	contextCache *contextCache
+}
+
+func (m *togetherAIProvider) GetProviderType() string {
+	return providerTypeTogetherAI
+}
+
+func (m *togetherAIProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
+	if apiName != ApiNameChatCompletion {
+		return errUnsupportedApiName
+	}
+	m.config.handleRequestHeaders(m, ctx, apiName, log)
+	return nil
+}
+
+func (m *togetherAIProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
+	if apiName != ApiNameChatCompletion {
+		return types.ActionContinue, errUnsupportedApiName
+	}
+	return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body, log)
+}
+
+func (m *togetherAIProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
+	util.OverwriteRequestPathHeader(headers, togetherAICompletionPath)
+	util.OverwriteRequestHostHeader(headers, togetherAIDomain)
+	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+m.config.GetApiTokenInUse(ctx))
+	headers.Del("Content-Length")
+}
+
+func (m *togetherAIProvider) GetApiName(path string) ApiName {
+	if strings.Contains(path, togetherAICompletionPath) {
+		return ApiNameChatCompletion
+	}
+	return ""
+}