From b709ee3983ee410981302c2f35e02a89f34ce959 Mon Sep 17 00:00:00 2001 From: EvanWu <850123119@qq.com> Date: Mon, 24 Feb 2025 20:18:07 +0800 Subject: [PATCH 1/3] feat(alibaba): Added alibaba vision model and omni model support --- app/client/api.ts | 5 +++++ app/client/platforms/alibaba.ts | 38 ++++++++++++++++++++++----------- app/constant.ts | 10 ++++++++- app/utils/chat.ts | 22 +++++++++++++++++++ 4 files changed, 62 insertions(+), 13 deletions(-) diff --git a/app/client/api.ts b/app/client/api.ts index 64ac82b2a61..f5288593d32 100644 --- a/app/client/api.ts +++ b/app/client/api.ts @@ -40,6 +40,11 @@ export interface MultimodalContent { }; } +export interface MultimodalContentForAlibaba { + text?: string; + image?: string; +} + export interface RequestMessage { role: MessageRole; content: string | MultimodalContent[]; diff --git a/app/client/platforms/alibaba.ts b/app/client/platforms/alibaba.ts index 88511768cd3..4875e5c02d9 100644 --- a/app/client/platforms/alibaba.ts +++ b/app/client/platforms/alibaba.ts @@ -7,7 +7,10 @@ import { ChatMessageTool, usePluginStore, } from "@/app/store"; -import { streamWithThink } from "@/app/utils/chat"; +import { + preProcessImageContentForAlibabaDashScope, + streamWithThink, +} from "@/app/utils/chat"; import { ChatOptions, getHeaders, @@ -15,12 +18,14 @@ import { LLMModel, SpeechOptions, MultimodalContent, + MultimodalContentForAlibaba, } from "../api"; import { getClientConfig } from "@/app/config/client"; import { getMessageTextContent, getMessageTextContentWithoutThinking, getTimeoutMSByModel, + isVisionModel, } from "@/app/utils"; import { fetch } from "@/app/utils/stream"; @@ -89,14 +94,6 @@ export class QwenApi implements LLMApi { } async chat(options: ChatOptions) { - const messages = options.messages.map((v) => ({ - role: v.role, - content: - v.role === "assistant" - ? getMessageTextContentWithoutThinking(v) - : getMessageTextContent(v), - })); - const modelConfig = { ...useAppConfig.getState().modelConfig, ...useChatStore.getState().currentSession().mask.modelConfig, @@ -105,6 +102,21 @@ export class QwenApi implements LLMApi { }, }; + const visionModel = isVisionModel(options.config.model); + + const messages: ChatOptions["messages"] = []; + for (const v of options.messages) { + const content = ( + visionModel + ? await preProcessImageContentForAlibabaDashScope(v.content) + : v.role === "assistant" + ? getMessageTextContentWithoutThinking(v) + : getMessageTextContent(v) + ) as any; + + messages.push({ role: v.role, content }); + } + const shouldStream = !!options.config.stream; const requestPayload: RequestPayload = { model: modelConfig.model, @@ -129,7 +141,7 @@ export class QwenApi implements LLMApi { "X-DashScope-SSE": shouldStream ? "enable" : "disable", }; - const chatPath = this.path(Alibaba.ChatPath); + const chatPath = this.path(Alibaba.ChatPath(modelConfig.model)); const chatPayload = { method: "POST", body: JSON.stringify(requestPayload), @@ -162,7 +174,7 @@ export class QwenApi implements LLMApi { const json = JSON.parse(text); const choices = json.output.choices as Array<{ message: { - content: string | null; + content: string | null | MultimodalContentForAlibaba[]; tool_calls: ChatMessageTool[]; reasoning_content: string | null; }; @@ -212,7 +224,9 @@ export class QwenApi implements LLMApi { } else if (content && content.length > 0) { return { isThinking: false, - content: content, + content: Array.isArray(content) + ? content.map((item) => item.text).join(",") + : content, }; } diff --git a/app/constant.ts b/app/constant.ts index 50aaf7921b9..358467c635a 100644 --- a/app/constant.ts +++ b/app/constant.ts @@ -221,7 +221,12 @@ export const ByteDance = { export const Alibaba = { ExampleEndpoint: ALIBABA_BASE_URL, - ChatPath: "v1/services/aigc/text-generation/generation", + ChatPath: (modelName: string) => { + if (modelName.includes("vl") || modelName.includes("omni")) { + return "v1/services/aigc/multimodal-generation/generation"; + } + return `v1/services/aigc/text-generation/generation`; + }, }; export const Tencent = { @@ -568,6 +573,9 @@ const alibabaModes = [ "qwen-max-0403", "qwen-max-0107", "qwen-max-longcontext", + "qwen-omni-turbo", + "qwen-vl-plus", + "qwen-vl-max", ]; const tencentModels = [ diff --git a/app/utils/chat.ts b/app/utils/chat.ts index efc496f2c32..ecb2fa46872 100644 --- a/app/utils/chat.ts +++ b/app/utils/chat.ts @@ -92,6 +92,28 @@ export async function preProcessImageContent( return result; } +export async function preProcessImageContentForAlibabaDashScope( + content: RequestMessage["content"], +) { + if (typeof content === "string") { + return content; + } + const result = []; + for (const part of content) { + if (part?.type == "image_url" && part?.image_url?.url) { + try { + const url = await cacheImageToBase64Image(part?.image_url?.url); + result.push({ image: url }); + } catch (error) { + console.error("Error processing image URL:", error); + } + } else { + result.push({ ...part }); + } + } + return result; +} + const imageCaches: Record = {}; export function cacheImageToBase64Image(imageUrl: string) { if (imageUrl.includes(CACHE_URL_PREFIX)) { From 0a25a1a8cbfde5ba8536afda5624195ab1708cbc Mon Sep 17 00:00:00 2001 From: EvanWu <850123119@qq.com> Date: Tue, 25 Feb 2025 09:22:47 +0800 Subject: [PATCH 2/3] refacto(app/utils/chat.ts)r: optimize function preProcessImageContentBase --- app/utils/chat.ts | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/app/utils/chat.ts b/app/utils/chat.ts index ecb2fa46872..879d3d198cb 100644 --- a/app/utils/chat.ts +++ b/app/utils/chat.ts @@ -70,8 +70,9 @@ export function compressImage(file: Blob, maxSize: number): Promise { }); } -export async function preProcessImageContent( +export async function preProcessImageContentBase( content: RequestMessage["content"], + transformImageUrl: (url: string) => Promise<{ [key: string]: any }>, ) { if (typeof content === "string") { return content; @@ -81,7 +82,7 @@ export async function preProcessImageContent( if (part?.type == "image_url" && part?.image_url?.url) { try { const url = await cacheImageToBase64Image(part?.image_url?.url); - result.push({ type: part.type, image_url: { url } }); + result.push(await transformImageUrl(url)); } catch (error) { console.error("Error processing image URL:", error); } @@ -92,26 +93,21 @@ export async function preProcessImageContent( return result; } +export async function preProcessImageContent( + content: RequestMessage["content"], +) { + return preProcessImageContentBase(content, async (url) => ({ + type: "image_url", + image_url: { url }, + })); +} + export async function preProcessImageContentForAlibabaDashScope( content: RequestMessage["content"], ) { - if (typeof content === "string") { - return content; - } - const result = []; - for (const part of content) { - if (part?.type == "image_url" && part?.image_url?.url) { - try { - const url = await cacheImageToBase64Image(part?.image_url?.url); - result.push({ image: url }); - } catch (error) { - console.error("Error processing image URL:", error); - } - } else { - result.push({ ...part }); - } - } - return result; + return preProcessImageContentBase(content, async (url) => ({ + image: url, + })); } const imageCaches: Record = {}; From a2c4e468a08cfe7108d30ac0e63fe43c63fb4bef Mon Sep 17 00:00:00 2001 From: EvanWu <850123119@qq.com> Date: Wed, 26 Feb 2025 19:58:32 +0800 Subject: [PATCH 3/3] fix(app/utils/chat.ts): fix type error --- app/utils/chat.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/utils/chat.ts b/app/utils/chat.ts index 879d3d198cb..cae775512ad 100644 --- a/app/utils/chat.ts +++ b/app/utils/chat.ts @@ -3,7 +3,7 @@ import { UPLOAD_URL, REQUEST_TIMEOUT_MS, } from "@/app/constant"; -import { RequestMessage } from "@/app/client/api"; +import { MultimodalContent, RequestMessage } from "@/app/client/api"; import Locale from "@/app/locales"; import { EventStreamContentType, @@ -99,7 +99,7 @@ export async function preProcessImageContent( return preProcessImageContentBase(content, async (url) => ({ type: "image_url", image_url: { url }, - })); + })) as Promise; } export async function preProcessImageContentForAlibabaDashScope(