Skip to content

Commit

Permalink
Merge branch 'main' into enhance-failover
Browse files Browse the repository at this point in the history
  • Loading branch information
cr7258 authored Dec 23, 2024
2 parents 351ada4 + 8f3723f commit 9e2779c
Show file tree
Hide file tree
Showing 8 changed files with 195 additions and 6 deletions.
8 changes: 5 additions & 3 deletions helm/core/templates/daemonset.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
{{- if eq .Values.gateway.kind "DaemonSet" -}}
{{- $o11y := .Values.global.o11y }}
{{- $unprivilegedPortSupported := true }}
{{- range $index, $node := (lookup "v1" "Node" "default" "").items }}
{{- if eq .Values.gateway.unprivilegedPortSupported nil -}}
{{- $unprivilegedPortSupported := true }}
{{- range $index, $node := (lookup "v1" "Node" "default" "").items }}
{{- $kernelVersion := $node.status.nodeInfo.kernelVersion }}
{{- if $kernelVersion }}
{{- $kernelVersion = regexFind "^(\\d+\\.\\d+\\.\\d+)" $kernelVersion }}
{{- if and $kernelVersion (semverCompare "<4.11.0" $kernelVersion) }}
{{- $unprivilegedPortSupported = false }}
{{- end }}
{{- end }}
{{- end -}}
{{- $_ := set .Values.gateway "unprivilegedPortSupported" $unprivilegedPortSupported -}}
{{- end -}}
{{- $_ := set .Values.gateway "unprivilegedPortSupported" $unprivilegedPortSupported -}}

apiVersion: apps/v1
kind: DaemonSet
Expand Down
8 changes: 5 additions & 3 deletions helm/core/templates/deployment.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
{{- if eq .Values.gateway.kind "Deployment" -}}
{{- $unprivilegedPortSupported := true }}
{{- range $index, $node := (lookup "v1" "Node" "default" "").items }}
{{- if eq .Values.gateway.unprivilegedPortSupported nil -}}
{{- $unprivilegedPortSupported := true }}
{{- range $index, $node := (lookup "v1" "Node" "default" "").items }}
{{- $kernelVersion := $node.status.nodeInfo.kernelVersion }}
{{- if $kernelVersion }}
{{- $kernelVersion = regexFind "^(\\d+\\.\\d+\\.\\d+)" $kernelVersion }}
{{- if and $kernelVersion (semverCompare "<4.11.0" $kernelVersion) }}
{{- $unprivilegedPortSupported = false }}
{{- end }}
{{- end }}
{{- end -}}
{{- $_ := set .Values.gateway "unprivilegedPortSupported" $unprivilegedPortSupported -}}
{{- end -}}
{{- $_ := set .Values.gateway "unprivilegedPortSupported" $unprivilegedPortSupported -}}

apiVersion: apps/v1
kind: Deployment
Expand Down
1 change: 1 addition & 0 deletions helm/core/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,7 @@ gateway:
# On Kubernetes 1.22+, this only requires the `net.ipv4.ip_unprivileged_port_start` sysctl.
securityContext: ~
containerSecurityContext: ~
unprivilegedPortSupported: ~

service:
# -- Type of service. Set to "None" to disable the service entirely
Expand Down
1 change: 1 addition & 0 deletions helm/higress/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ The command removes all the Kubernetes components associated with the chart and
| gateway.serviceAccount.name | string | `""` | The name of the service account to use. If not set, the release name is used |
| gateway.tag | string | `""` | |
| gateway.tolerations | list | `[]` | |
| gateway.unprivilegedPortSupported | string | `nil` | |
| global.autoscalingv2API | bool | `true` | whether to use autoscaling/v2 template for HPA settings for internal usage only, not to be configured by users. |
| global.caAddress | string | `""` | The customized CA address to retrieve certificates for the pods in the cluster. CSR clients such as the Istio Agent and ingress gateways can use this to specify the CA endpoint. If not set explicitly, default to the Istio discovery address. |
| global.caName | string | `""` | The name of the CA for workload certificates. For example, when caName=GkeWorkloadCertificate, GKE workload certificates will be used as the certificates for workloads. The default value is "" and when caName="", the CA will be configured by other mechanisms (e.g., environmental variable CA_PROVIDER). |
Expand Down
58 changes: 58 additions & 0 deletions plugins/wasm-go/extensions/ai-proxy/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,9 @@ DeepL 所对应的 `type` 为 `deepl`。它特有的配置字段如下:

Cohere 所对应的 `type``cohere`。它并无特有的配置字段。

#### Together-AI
Together-AI 所对应的 `type``together-ai`。它并无特有的配置字段。

## 用法示例

### 使用 OpenAI 协议代理 Azure OpenAI 服务
Expand Down Expand Up @@ -1505,6 +1508,61 @@ provider:
}
```

### 使用 OpenAI 协议代理 Together-AI 服务

**配置信息**
```yaml
provider:
type: together-ai
apiTokens:
- "YOUR_TOGETHER_AI_API_TOKEN"
modelMapping:
"*": "Qwen/Qwen2.5-72B-Instruct-Turbo"
```

**请求示例**
```json
{
"model": "Qwen/Qwen2.5-72B-Instruct-Turbo",
"messages": [
{
"role": "user",
"content": "Who are you?"
}
]
}
```

**响应示例**
```json
{
"id": "8f5809d54b73efac",
"object": "chat.completion",
"created": 1734785851,
"model": "Qwen/Qwen2.5-72B-Instruct-Turbo",
"prompt": [],
"choices": [
{
"finish_reason": "eos",
"seed": 12830868308626506000,
"logprobs": null,
"index": 0,
"message": {
"role": "assistant",
"content": "I am Qwen, a large language model created by Alibaba Cloud. I am designed to assist users in generating various types of text, such as articles, stories, poems, and more, as well as answering questions and providing information on a wide range of topics. How can I assist you today?",
"tool_calls": []
}
}
],
"usage": {
"prompt_tokens": 33,
"completion_tokens": 61,
"total_tokens": 94
}
}
```


## 完整配置示例

### Kubernetes 示例
Expand Down
54 changes: 54 additions & 0 deletions plugins/wasm-go/extensions/ai-proxy/README_EN.md
Original file line number Diff line number Diff line change
Expand Up @@ -1356,6 +1356,60 @@ Here, `model` denotes the service tier of DeepL and can only be either `Free` or
}
```

### Utilizing OpenAI Protocol Proxy for Together-AI Services

**Configuration Information**
```yaml
provider:
type: together-ai
apiTokens:
- "YOUR_TOGETHER_AI_API_TOKEN"
modelMapping:
"*": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
```

**Request Example**
```json
{
"model": "Qwen/Qwen2.5-72B-Instruct-Turbo",
"messages": [
{
"role": "user",
"content": "Who are you?"
}
]
}
```

**Response Example**
```json
{
"id": "8f5809d54b73efac",
"object": "chat.completion",
"created": 1734785851,
"model": "Qwen/Qwen2.5-72B-Instruct-Turbo",
"prompt": [],
"choices": [
{
"finish_reason": "eos",
"seed": 12830868308626506000,
"logprobs": null,
"index": 0,
"message": {
"role": "assistant",
"content": "I am Qwen, a large language model created by Alibaba Cloud. I am designed to assist users in generating various types of text, such as articles, stories, poems, and more, as well as answering questions and providing information on a wide range of topics. How can I assist you today?",
"tool_calls": []
}
}
],
"usage": {
"prompt_tokens": 33,
"completion_tokens": 61,
"total_tokens": 94
}
}
```

## Full Configuration Example

### Kubernetes Example
Expand Down
2 changes: 2 additions & 0 deletions plugins/wasm-go/extensions/ai-proxy/provider/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ const (
providerTypeCohere = "cohere"
providerTypeDoubao = "doubao"
providerTypeCoze = "coze"
providerTypeTogetherAI = "together-ai"

protocolOpenAI = "openai"
protocolOriginal = "original"
Expand Down Expand Up @@ -108,6 +109,7 @@ var (
providerTypeCohere: &cohereProviderInitializer{},
providerTypeDoubao: &doubaoProviderInitializer{},
providerTypeCoze: &cozeProviderInitializer{},
providerTypeTogetherAI: &togetherAIProviderInitializer{},
}
)

Expand Down
69 changes: 69 additions & 0 deletions plugins/wasm-go/extensions/ai-proxy/provider/together_ai.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package provider

import (
"errors"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"net/http"
"strings"
)

const (
togetherAIDomain = "api.together.xyz"
togetherAICompletionPath = "/v1/chat/completions"
)

type togetherAIProviderInitializer struct{}

func (m *togetherAIProviderInitializer) ValidateConfig(config ProviderConfig) error {
if config.apiTokens == nil || len(config.apiTokens) == 0 {
return errors.New("no apiToken found in provider config")
}
return nil
}

func (m *togetherAIProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
return &togetherAIProvider{
config: config,
contextCache: createContextCache(&config),
}, nil
}

type togetherAIProvider struct {
config ProviderConfig
contextCache *contextCache
}

func (m *togetherAIProvider) GetProviderType() string {
return providerTypeTogetherAI
}

func (m *togetherAIProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
if apiName != ApiNameChatCompletion {
return errUnsupportedApiName
}
m.config.handleRequestHeaders(m, ctx, apiName, log)
return nil
}

func (m *togetherAIProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
if apiName != ApiNameChatCompletion {
return types.ActionContinue, errUnsupportedApiName
}
return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body, log)
}

func (m *togetherAIProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
util.OverwriteRequestPathHeader(headers, togetherAICompletionPath)
util.OverwriteRequestHostHeader(headers, togetherAIDomain)
util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+m.config.GetApiTokenInUse(ctx))
headers.Del("Content-Length")
}

func (m *togetherAIProvider) GetApiName(path string) ApiName {
if strings.Contains(path, togetherAICompletionPath) {
return ApiNameChatCompletion
}
return ""
}

0 comments on commit 9e2779c

Please sign in to comment.