From 5cc81b98f79174365984316dc0379400e5badfbb Mon Sep 17 00:00:00 2001 From: Charlie Ruan <53290280+CharlieFRuan@users.noreply.github.com> Date: Wed, 29 May 2024 23:20:09 -0400 Subject: [PATCH 1/3] Update model ids to match HF repo name Co-authored-by: Nestor Qin --- README.md | 4 +- examples/cache-usage/src/cache_usage.ts | 15 +- .../src/popup.ts | 6 +- examples/chrome-extension/src/popup.ts | 217 +++++++++------- examples/get-started-web-worker/src/main.ts | 60 +++-- examples/get-started/src/get_started.ts | 10 +- examples/json-mode/src/json_mode.ts | 2 +- examples/json-schema/src/json_schema.ts | 4 +- .../logit-processor/src/logit_processor.ts | 45 ++-- examples/logit-processor/src/worker.ts | 2 +- .../multi-round-chat/src/multi_round_chat.ts | 31 ++- .../next-simple-chat/src/utils/chat_ui.ts | 235 +++++++++-------- examples/seed-to-reproduce/src/seed.ts | 87 ++++--- examples/service-worker/src/main.ts | 6 +- examples/streaming/src/streaming.ts | 13 +- src/config.ts | 96 +++---- src/engine.ts | 12 +- tests/openai_chat_completion.test.ts | 241 +++++++++--------- 18 files changed, 570 insertions(+), 516 deletions(-) diff --git a/README.md b/README.md index 61755b22..00e9111e 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ async function main() { const label = document.getElementById("init-label"); label.innerText = report.text; }; - const selectedModel = "Llama-3-8B-Instruct-q4f32_1"; + const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC"; const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( selectedModel, /*engineConfig=*/ { initProgressCallback: initProgressCallback }, @@ -96,7 +96,7 @@ async function main() { const initProgressCallback = (report) => { console.log(report.text); }; - const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-1k"; + const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k"; const engine = await webllm.CreateMLCEngine(selectedModel, { initProgressCallback: initProgressCallback, }); diff --git a/examples/cache-usage/src/cache_usage.ts b/examples/cache-usage/src/cache_usage.ts index e9dc7af6..bbf9730e 100644 --- a/examples/cache-usage/src/cache_usage.ts +++ b/examples/cache-usage/src/cache_usage.ts @@ -24,16 +24,19 @@ async function main() { } // 1. This triggers downloading and caching the model with either Cache or IndexedDB Cache - const selectedModel = "Phi2-q4f16_1" + const selectedModel = "phi-2-q4f16_1-MLC"; const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( - "Phi2-q4f16_1", - { initProgressCallback: initProgressCallback, appConfig: appConfig } + selectedModel, + { initProgressCallback: initProgressCallback, appConfig: appConfig }, ); const request: webllm.ChatCompletionRequest = { stream: false, messages: [ - { "role": "user", "content": "Write an analogy between mathematics and a lighthouse." }, + { + role: "user", + content: "Write an analogy between mathematics and a lighthouse.", + }, ], n: 1, }; @@ -60,7 +63,9 @@ async function main() { modelCached = await webllm.hasModelInCache(selectedModel, appConfig); console.log("After deletion, hasModelInCache: ", modelCached); if (modelCached) { - throw Error("Expect hasModelInCache() to be false, but got: " + modelCached); + throw Error( + "Expect hasModelInCache() to be false, but got: " + modelCached, + ); } // 5. If we reload, we should expect the model to start downloading again diff --git a/examples/chrome-extension-webgpu-service-worker/src/popup.ts b/examples/chrome-extension-webgpu-service-worker/src/popup.ts index 630486ce..e8aae139 100644 --- a/examples/chrome-extension-webgpu-service-worker/src/popup.ts +++ b/examples/chrome-extension-webgpu-service-worker/src/popup.ts @@ -47,8 +47,8 @@ const initProgressCallback = (report: InitProgressReport) => { }; const engine: MLCEngineInterface = await CreateExtensionServiceWorkerMLCEngine( - "Mistral-7B-Instruct-v0.2-q4f16_1", - { initProgressCallback: initProgressCallback } + "Mistral-7B-Instruct-v0.2-q4f16_1-MLC", + { initProgressCallback: initProgressCallback }, ); const chatHistory: ChatCompletionMessageParam[] = []; @@ -150,7 +150,7 @@ function updateAnswer(answer: string) { function fetchPageContents() { chrome.tabs.query({ currentWindow: true, active: true }, function (tabs) { if (tabs[0]?.id) { - var port = chrome.tabs.connect(tabs[0].id, { name: "channelName" }); + const port = chrome.tabs.connect(tabs[0].id, { name: "channelName" }); port.postMessage({}); port.onMessage.addListener(function (msg) { console.log("Page contents:", msg.contents); diff --git a/examples/chrome-extension/src/popup.ts b/examples/chrome-extension/src/popup.ts index ce17b70e..0f8c99d6 100644 --- a/examples/chrome-extension/src/popup.ts +++ b/examples/chrome-extension/src/popup.ts @@ -1,12 +1,17 @@ /* eslint-disable @typescript-eslint/no-non-null-assertion */ -'use strict'; +"use strict"; // This code is partially adapted from the openai-chatgpt-chrome-extension repo: // https://github.com/jessedi0n/openai-chatgpt-chrome-extension -import './popup.css'; +import "./popup.css"; -import { MLCEngineInterface, InitProgressReport, CreateMLCEngine, ChatCompletionMessageParam } from "@mlc-ai/web-llm"; +import { + MLCEngineInterface, + InitProgressReport, + CreateMLCEngine, + ChatCompletionMessageParam, +} from "@mlc-ai/web-llm"; import { ProgressBar, Line } from "progressbar.js"; const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)); @@ -21,135 +26,149 @@ fetchPageContents(); (submitButton).disabled = true; -const progressBar: ProgressBar = new Line('#loadingContainer', { - strokeWidth: 4, - easing: 'easeInOut', - duration: 1400, - color: '#ffd166', - trailColor: '#eee', - trailWidth: 1, - svgStyle: { width: '100%', height: '100%' } +const progressBar: ProgressBar = new Line("#loadingContainer", { + strokeWidth: 4, + easing: "easeInOut", + duration: 1400, + color: "#ffd166", + trailColor: "#eee", + trailWidth: 1, + svgStyle: { width: "100%", height: "100%" }, }); const initProgressCallback = (report: InitProgressReport) => { - console.log(report.text, report.progress); - progressBar.animate(report.progress, { - duration: 50 - }); - if (report.progress == 1.0) { - enableInputs(); - } + console.log(report.text, report.progress); + progressBar.animate(report.progress, { + duration: 50, + }); + if (report.progress == 1.0) { + enableInputs(); + } }; -// const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-1k"; -const selectedModel = "Mistral-7B-Instruct-v0.2-q4f16_1"; -const engine: MLCEngineInterface = await CreateMLCEngine( - selectedModel, - { initProgressCallback: initProgressCallback } -); +// const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k"; +const selectedModel = "Mistral-7B-Instruct-v0.2-q4f16_1-MLC"; +const engine: MLCEngineInterface = await CreateMLCEngine(selectedModel, { + initProgressCallback: initProgressCallback, +}); const chatHistory: ChatCompletionMessageParam[] = []; isLoadingParams = true; function enableInputs() { - if (isLoadingParams) { - sleep(500); - (submitButton).disabled = false; - const loadingBarContainer = document.getElementById("loadingContainer")!; - loadingBarContainer.remove(); - queryInput.focus(); - isLoadingParams = false; - } + if (isLoadingParams) { + sleep(500); + (submitButton).disabled = false; + const loadingBarContainer = document.getElementById("loadingContainer")!; + loadingBarContainer.remove(); + queryInput.focus(); + isLoadingParams = false; + } } // Disable submit button if input field is empty queryInput.addEventListener("keyup", () => { - if ((queryInput).value === "") { - (submitButton).disabled = true; - } else { - (submitButton).disabled = false; - } + if ((queryInput).value === "") { + (submitButton).disabled = true; + } else { + (submitButton).disabled = false; + } }); // If user presses enter, click submit button queryInput.addEventListener("keyup", (event) => { - if (event.code === "Enter") { - event.preventDefault(); - submitButton.click(); - } + if (event.code === "Enter") { + event.preventDefault(); + submitButton.click(); + } }); // Listen for clicks on submit button async function handleClick() { - // Get the message from the input field - const message = (queryInput).value; - console.log("message", message); - // Clear the answer - document.getElementById("answer")!.innerHTML = ""; - // Hide the answer - document.getElementById("answerWrapper")!.style.display = "none"; - // Show the loading indicator - document.getElementById("loading-indicator")!.style.display = "block"; - - // Generate response - let inp = message; - if (context.length > 0) { - inp = "Use only the following context when answering the question at the end. Don't use any other knowledge.\n" + context + "\n\nQuestion: " + message + "\n\nHelpful Answer: "; - } - console.log("Input:", inp); - chatHistory.push({ "role": "user", "content": inp }); - - let curMessage = ""; - const completion = await engine.chat.completions.create({ stream: true, messages: chatHistory }); - for await (const chunk of completion) { - const curDelta = chunk.choices[0].delta.content; - if (curDelta) { - curMessage += curDelta; - } - updateAnswer(curMessage); + // Get the message from the input field + const message = (queryInput).value; + console.log("message", message); + // Clear the answer + document.getElementById("answer")!.innerHTML = ""; + // Hide the answer + document.getElementById("answerWrapper")!.style.display = "none"; + // Show the loading indicator + document.getElementById("loading-indicator")!.style.display = "block"; + + // Generate response + let inp = message; + if (context.length > 0) { + inp = + "Use only the following context when answering the question at the end. Don't use any other knowledge.\n" + + context + + "\n\nQuestion: " + + message + + "\n\nHelpful Answer: "; + } + console.log("Input:", inp); + chatHistory.push({ role: "user", content: inp }); + + let curMessage = ""; + const completion = await engine.chat.completions.create({ + stream: true, + messages: chatHistory, + }); + for await (const chunk of completion) { + const curDelta = chunk.choices[0].delta.content; + if (curDelta) { + curMessage += curDelta; } - const response = await engine.getMessage(); - chatHistory.push({ "role": "assistant", "content": await engine.getMessage() }); - console.log("response", response); + updateAnswer(curMessage); + } + const response = await engine.getMessage(); + chatHistory.push({ role: "assistant", content: await engine.getMessage() }); + console.log("response", response); } submitButton.addEventListener("click", handleClick); // Listen for messages from the background script chrome.runtime.onMessage.addListener(({ answer, error }) => { - if (answer) { - updateAnswer(answer); - } + if (answer) { + updateAnswer(answer); + } }); function updateAnswer(answer: string) { - // Show answer - document.getElementById("answerWrapper")!.style.display = "block"; - const answerWithBreaks = answer.replace(/\n/g, '
'); - document.getElementById("answer")!.innerHTML = answerWithBreaks; - // Add event listener to copy button - document.getElementById("copyAnswer")!.addEventListener("click", () => { - // Get the answer text - const answerText = answer; - // Copy the answer text to the clipboard - navigator.clipboard.writeText(answerText) - .then(() => console.log("Answer text copied to clipboard")) - .catch((err) => console.error("Could not copy text: ", err)); - }); - const options: Intl.DateTimeFormatOptions = { month: 'short', day: '2-digit', hour: '2-digit', minute: '2-digit', second: '2-digit' }; - const time = new Date().toLocaleString('en-US', options); - // Update timestamp - document.getElementById("timestamp")!.innerText = time; - // Hide loading indicator - document.getElementById("loading-indicator")!.style.display = "none"; + // Show answer + document.getElementById("answerWrapper")!.style.display = "block"; + const answerWithBreaks = answer.replace(/\n/g, "
"); + document.getElementById("answer")!.innerHTML = answerWithBreaks; + // Add event listener to copy button + document.getElementById("copyAnswer")!.addEventListener("click", () => { + // Get the answer text + const answerText = answer; + // Copy the answer text to the clipboard + navigator.clipboard + .writeText(answerText) + .then(() => console.log("Answer text copied to clipboard")) + .catch((err) => console.error("Could not copy text: ", err)); + }); + const options: Intl.DateTimeFormatOptions = { + month: "short", + day: "2-digit", + hour: "2-digit", + minute: "2-digit", + second: "2-digit", + }; + const time = new Date().toLocaleString("en-US", options); + // Update timestamp + document.getElementById("timestamp")!.innerText = time; + // Hide loading indicator + document.getElementById("loading-indicator")!.style.display = "none"; } function fetchPageContents() { - chrome.tabs.query({ currentWindow: true, active: true }, function (tabs) { - var port = chrome.tabs.connect(tabs[0].id, { name: "channelName" }); - port.postMessage({}); - port.onMessage.addListener(function (msg) { - console.log("Page contents:", msg.contents); - context = msg.contents - }); + chrome.tabs.query({ currentWindow: true, active: true }, function (tabs) { + const port = chrome.tabs.connect(tabs[0].id, { name: "channelName" }); + port.postMessage({}); + port.onMessage.addListener(function (msg) { + console.log("Page contents:", msg.contents); + context = msg.contents; }); + }); } diff --git a/examples/get-started-web-worker/src/main.ts b/examples/get-started-web-worker/src/main.ts index 0c89d7a8..ebff6c2b 100644 --- a/examples/get-started-web-worker/src/main.ts +++ b/examples/get-started-web-worker/src/main.ts @@ -17,27 +17,26 @@ async function mainNonStreaming() { const initProgressCallback = (report: webllm.InitProgressReport) => { setLabel("init-label", report.text); }; - const selectedModel = "Llama-3-8B-Instruct-q4f32_1"; + const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC"; - const engine: webllm.MLCEngineInterface = await webllm.CreateWebWorkerMLCEngine( - new Worker( - new URL('./worker.ts', import.meta.url), - { type: 'module' } - ), - selectedModel, - { initProgressCallback: initProgressCallback } - ); + const engine: webllm.MLCEngineInterface = + await webllm.CreateWebWorkerMLCEngine( + new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }), + selectedModel, + { initProgressCallback: initProgressCallback }, + ); const request: webllm.ChatCompletionRequest = { messages: [ { - "role": "system", - "content": "You are a helpful, respectful and honest assistant. " + - "Be as happy as you can when speaking please. " + role: "system", + content: + "You are a helpful, respectful and honest assistant. " + + "Be as happy as you can when speaking please. ", }, - { "role": "user", "content": "Provide me three US states." }, - { "role": "assistant", "content": "California, New York, Pennsylvania." }, - { "role": "user", "content": "Two more please!" }, + { role: "user", content: "Provide me three US states." }, + { role: "assistant", content: "California, New York, Pennsylvania." }, + { role: "user", content: "Two more please!" }, ], n: 3, temperature: 1.5, @@ -57,28 +56,27 @@ async function mainStreaming() { const initProgressCallback = (report: webllm.InitProgressReport) => { setLabel("init-label", report.text); }; - const selectedModel = "Llama-3-8B-Instruct-q4f32_1"; + const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC"; - const engine: webllm.MLCEngineInterface = await webllm.CreateWebWorkerMLCEngine( - new Worker( - new URL('./worker.ts', import.meta.url), - { type: 'module' } - ), - selectedModel, - { initProgressCallback: initProgressCallback } - ); + const engine: webllm.MLCEngineInterface = + await webllm.CreateWebWorkerMLCEngine( + new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }), + selectedModel, + { initProgressCallback: initProgressCallback }, + ); const request: webllm.ChatCompletionRequest = { stream: true, messages: [ { - "role": "system", - "content": "You are a helpful, respectful and honest assistant. " + - "Be as happy as you can when speaking please. " + role: "system", + content: + "You are a helpful, respectful and honest assistant. " + + "Be as happy as you can when speaking please. ", }, - { "role": "user", "content": "Provide me three US states." }, - { "role": "assistant", "content": "California, New York, Pennsylvania." }, - { "role": "user", "content": "Two more please!" }, + { role: "user", content: "Provide me three US states." }, + { role: "assistant", content: "California, New York, Pennsylvania." }, + { role: "user", content: "Two more please!" }, ], temperature: 1.5, max_gen_len: 256, @@ -95,7 +93,7 @@ async function mainStreaming() { setLabel("generate-label", message); // engine.interruptGenerate(); // works with interrupt as well } - console.log("Final message:\n", await engine.getMessage()); // the concatenated message + console.log("Final message:\n", await engine.getMessage()); // the concatenated message console.log(await engine.runtimeStatsText()); } diff --git a/examples/get-started/src/get_started.ts b/examples/get-started/src/get_started.ts index 9b39ef68..e083a57c 100644 --- a/examples/get-started/src/get_started.ts +++ b/examples/get-started/src/get_started.ts @@ -13,10 +13,10 @@ async function main() { setLabel("init-label", report.text); }; // Option 1: If we do not specify appConfig, we use `prebuiltAppConfig` defined in `config.ts` - const selectedModel = "Llama-3-8B-Instruct-q4f32_1"; + const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC"; const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( selectedModel, - { initProgressCallback: initProgressCallback } + { initProgressCallback: initProgressCallback }, ); // Option 2: Specify your own model other than the prebuilt ones @@ -24,7 +24,7 @@ async function main() { // model_list: [ // { // "model_url": "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/", - // "model_id": "Llama-3-8B-Instruct-q4f32_1", + // "model_id": "Llama-3-8B-Instruct-q4f32_1-MLC", // "model_lib_url": webllm.modelLibURLPrefix + webllm.modelVersion + "/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", // }, // ] @@ -35,9 +35,7 @@ async function main() { // ); const reply0 = await engine.chat.completions.create({ - messages: [ - { "role": "user", "content": "List three US states." }, - ], + messages: [{ role: "user", content: "List three US states." }], // below configurations are all optional n: 3, temperature: 1.5, diff --git a/examples/json-mode/src/json_mode.ts b/examples/json-mode/src/json_mode.ts index 9ad834d2..f85e8509 100644 --- a/examples/json-mode/src/json_mode.ts +++ b/examples/json-mode/src/json_mode.ts @@ -12,7 +12,7 @@ async function main() { const initProgressCallback = (report: webllm.InitProgressReport) => { setLabel("init-label", report.text); }; - const selectedModel = "Llama-3-8B-Instruct-q4f32_1"; + const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC"; const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( selectedModel, { initProgressCallback: initProgressCallback }, diff --git a/examples/json-schema/src/json_schema.ts b/examples/json-schema/src/json_schema.ts index 8c55ab7f..321e80d6 100644 --- a/examples/json-schema/src/json_schema.ts +++ b/examples/json-schema/src/json_schema.ts @@ -38,7 +38,7 @@ async function simpleStructuredTextExample() { setLabel("init-label", report.text); }; const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( - "Llama-3-8B-Instruct-q4f16_1", + "Llama-3-8B-Instruct-q4f16_1-MLC", { initProgressCallback: initProgressCallback }, ); @@ -105,7 +105,7 @@ async function harryPotterExample() { }; const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( - "Llama-3-8B-Instruct-q4f16_1", + "Llama-3-8B-Instruct-q4f16_1-MLC", { initProgressCallback: initProgressCallback }, ); diff --git a/examples/logit-processor/src/logit_processor.ts b/examples/logit-processor/src/logit_processor.ts index 7941e24f..b43aa4cb 100644 --- a/examples/logit-processor/src/logit_processor.ts +++ b/examples/logit-processor/src/logit_processor.ts @@ -1,8 +1,8 @@ import * as webllm from "@mlc-ai/web-llm"; import { MyLogitProcessor } from "./my_logit_processor"; -const USE_WEB_WORKER = true; // Toggle this to use Logit Processor without a web worker -const AUTOREGRESS_LIMIT = 32; // How many tokens to generate for this test +const USE_WEB_WORKER = true; // Toggle this to use Logit Processor without a web worker +const AUTOREGRESS_LIMIT = 32; // How many tokens to generate for this test function setLabel(id: string, text: string) { const label = document.getElementById(id); @@ -15,11 +15,11 @@ function setLabel(id: string, text: string) { async function main() { const initProgressCallback = (report: webllm.InitProgressReport) => { setLabel("init-label", report.text); - } + }; // Instantiate myLogitProcessor, registering in the logitProcessorRegistry const myLogitProcessor = new MyLogitProcessor(); const logitProcessorRegistry = new Map(); - logitProcessorRegistry.set("Phi2-q4f32_1", myLogitProcessor); + logitProcessorRegistry.set("phi-2-q4f32_1-MLC", myLogitProcessor); let engine: webllm.MLCEngineInterface; @@ -27,43 +27,46 @@ async function main() { if (USE_WEB_WORKER) { // see worker.ts on how LogitProcessor plays a role there engine = await webllm.CreateWebWorkerMLCEngine( - new Worker( - new URL('./worker.ts', import.meta.url), - { type: 'module' } - ), - "Phi2-q4f32_1", - { initProgressCallback: initProgressCallback } + new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }), + "phi-2-q4f32_1-MLC", + { initProgressCallback: initProgressCallback }, ); } else { - engine = await webllm.CreateMLCEngine( - "Phi2-q4f32_1", - { - initProgressCallback: initProgressCallback, - logitProcessorRegistry: logitProcessorRegistry, - } - ); + engine = await webllm.CreateMLCEngine("phi-2-q4f32_1-MLC", { + initProgressCallback: initProgressCallback, + logitProcessorRegistry: logitProcessorRegistry, + }); } // Below we demonstrate the usage of a low-level API `forwardTokensAndSample()` const prompt: Array = [42]; - let nextToken = await engine.forwardTokensAndSample(prompt, /*isPrefill=*/true); + let nextToken = await engine.forwardTokensAndSample( + prompt, + /*isPrefill=*/ true, + ); console.log(nextToken); let counter = prompt.length; while (counter < AUTOREGRESS_LIMIT) { counter += 1; - nextToken = await engine.forwardTokensAndSample([nextToken], /*isPrefill=*/false); + nextToken = await engine.forwardTokensAndSample( + [nextToken], + /*isPrefill=*/ false, + ); console.log(nextToken); } // By calling `engine.resetChat()`, we triggers MyLogitProcessor.resetState() engine.resetChat(); counter = prompt.length; - nextToken = await engine.forwardTokensAndSample(prompt, /*isPrefill=*/true); + nextToken = await engine.forwardTokensAndSample(prompt, /*isPrefill=*/ true); console.log(nextToken); while (counter < AUTOREGRESS_LIMIT) { counter += 1; - nextToken = await engine.forwardTokensAndSample([nextToken], /*isPrefill=*/false); + nextToken = await engine.forwardTokensAndSample( + [nextToken], + /*isPrefill=*/ false, + ); console.log(nextToken); } diff --git a/examples/logit-processor/src/worker.ts b/examples/logit-processor/src/worker.ts index ec7f6d44..ac0f9c05 100644 --- a/examples/logit-processor/src/worker.ts +++ b/examples/logit-processor/src/worker.ts @@ -6,7 +6,7 @@ console.log("Use web worker for logit processor"); const myLogitProcessor = new MyLogitProcessor(); const logitProcessorRegistry = new Map(); -logitProcessorRegistry.set("Phi2-q4f32_1", myLogitProcessor); +logitProcessorRegistry.set("phi-2-q4f32_1-MLC", myLogitProcessor); const engine = new webllm.MLCEngine(); engine.setLogitProcessorRegistry(logitProcessorRegistry); diff --git a/examples/multi-round-chat/src/multi_round_chat.ts b/examples/multi-round-chat/src/multi_round_chat.ts index 15d69ac4..a9dd657d 100644 --- a/examples/multi-round-chat/src/multi_round_chat.ts +++ b/examples/multi-round-chat/src/multi_round_chat.ts @@ -17,25 +17,25 @@ async function main() { const initProgressCallback = (report: webllm.InitProgressReport) => { setLabel("init-label", report.text); }; - const selectedModel = "Llama-3-8B-Instruct-q4f32_1"; + const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC"; const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( selectedModel, - { initProgressCallback: initProgressCallback } + { initProgressCallback: initProgressCallback }, ); - // Round 0 const messages: webllm.ChatCompletionMessageParam[] = [ { - "role": "system", - "content": "You are a helpful, respectful and honest assistant. " + - "Be as happy as you can when speaking please. " + role: "system", + content: + "You are a helpful, respectful and honest assistant. " + + "Be as happy as you can when speaking please. ", }, - { "role": "user", "content": "Provide me three US states." }, + { role: "user", content: "Provide me three US states." }, ]; const request0: webllm.ChatCompletionRequest = { - stream: false, // can be streaming, same behavior + stream: false, // can be streaming, same behavior messages: messages, }; @@ -46,16 +46,16 @@ async function main() { // Round 1 // Append generated response to messages - messages.push({ "role": "assistant", "content": replyMessage0 }); + messages.push({ role: "assistant", content: replyMessage0 }); // Append new user input - messages.push({ "role": "user", "content": "Two more please!" }); + messages.push({ role: "user", content: "Two more please!" }); // Below line would cause an internal reset (clear KV cache, etc.) since the history no longer // matches the new request // messages[0].content = "Another system prompt"; const request1: webllm.ChatCompletionRequest = { - stream: false, // can be streaming, same behavior - messages: messages + stream: false, // can be streaming, same behavior + messages: messages, }; const reply1 = await engine.chat.completions.create(request1); @@ -68,8 +68,11 @@ async function main() { const prefillTokens1 = reply1.usage?.prompt_tokens; console.log("Requset 0 prompt tokens: ", prefillTokens0); console.log("Requset 1 prompt tokens: ", prefillTokens1); - if (prefillTokens0 === undefined || prefillTokens1 === undefined || - prefillTokens1 > prefillTokens0) { + if ( + prefillTokens0 === undefined || + prefillTokens1 === undefined || + prefillTokens1 > prefillTokens0 + ) { throw Error("Multi-round chat is not triggered as expected."); } diff --git a/examples/next-simple-chat/src/utils/chat_ui.ts b/examples/next-simple-chat/src/utils/chat_ui.ts index d8856ba3..791c49f1 100644 --- a/examples/next-simple-chat/src/utils/chat_ui.ts +++ b/examples/next-simple-chat/src/utils/chat_ui.ts @@ -1,122 +1,145 @@ -import { MLCEngineInterface, ChatCompletionMessageParam } from "@mlc-ai/web-llm"; +import { + MLCEngineInterface, + ChatCompletionMessageParam, +} from "@mlc-ai/web-llm"; export default class ChatUI { - private engine: MLCEngineInterface; - private chatLoaded = false; - private requestInProgress = false; - // We use a request chain to ensure that - // all requests send to chat are sequentialized - private chatRequestChain: Promise = Promise.resolve(); - private chatHistory: ChatCompletionMessageParam[] = []; + private engine: MLCEngineInterface; + private chatLoaded = false; + private requestInProgress = false; + // We use a request chain to ensure that + // all requests send to chat are sequentialized + private chatRequestChain: Promise = Promise.resolve(); + private chatHistory: ChatCompletionMessageParam[] = []; - constructor(engine: MLCEngineInterface) { - this.engine = engine; - } - /** - * Push a task to the execution queue. - * - * @param task The task to be executed; - */ - private pushTask(task: () => Promise) { - const lastEvent = this.chatRequestChain; - this.chatRequestChain = lastEvent.then(task); - } - // Event handlers - // all event handler pushes the tasks to a queue - // that get executed sequentially - // the tasks previous tasks, which causes them to early stop - // can be interrupted by chat.interruptGenerate - async onGenerate(prompt: string, messageUpdate: (kind: string, text: string, append: boolean) => void, setRuntimeStats: (runtimeStats: string) => void) { - if (this.requestInProgress) { - return; - } - this.pushTask(async () => { - await this.asyncGenerate(prompt, messageUpdate, setRuntimeStats); - }); - return this.chatRequestChain + constructor(engine: MLCEngineInterface) { + this.engine = engine; + } + /** + * Push a task to the execution queue. + * + * @param task The task to be executed; + */ + private pushTask(task: () => Promise) { + const lastEvent = this.chatRequestChain; + this.chatRequestChain = lastEvent.then(task); + } + // Event handlers + // all event handler pushes the tasks to a queue + // that get executed sequentially + // the tasks previous tasks, which causes them to early stop + // can be interrupted by chat.interruptGenerate + async onGenerate( + prompt: string, + messageUpdate: (kind: string, text: string, append: boolean) => void, + setRuntimeStats: (runtimeStats: string) => void, + ) { + if (this.requestInProgress) { + return; } + this.pushTask(async () => { + await this.asyncGenerate(prompt, messageUpdate, setRuntimeStats); + }); + return this.chatRequestChain; + } - async onReset(clearMessages: () => void) { - if (this.requestInProgress) { - // interrupt previous generation if any - this.engine.interruptGenerate(); - } - this.chatHistory = []; - // try reset after previous requests finishes - this.pushTask(async () => { - await this.engine.resetChat(); - clearMessages(); - }); - return this.chatRequestChain + async onReset(clearMessages: () => void) { + if (this.requestInProgress) { + // interrupt previous generation if any + this.engine.interruptGenerate(); } + this.chatHistory = []; + // try reset after previous requests finishes + this.pushTask(async () => { + await this.engine.resetChat(); + clearMessages(); + }); + return this.chatRequestChain; + } - async asyncInitChat(messageUpdate: (kind: string, text: string, append: boolean) => void) { - if (this.chatLoaded) return; - this.requestInProgress = true; - messageUpdate("init", "", true); - const initProgressCallback = (report: { text: string }) => { - messageUpdate("init", report.text, false); - } - this.engine.setInitProgressCallback(initProgressCallback); + async asyncInitChat( + messageUpdate: (kind: string, text: string, append: boolean) => void, + ) { + if (this.chatLoaded) return; + this.requestInProgress = true; + messageUpdate("init", "", true); + const initProgressCallback = (report: { text: string }) => { + messageUpdate("init", report.text, false); + }; + this.engine.setInitProgressCallback(initProgressCallback); - try { - const selectedModel = "Llama-3-8B-Instruct-q4f32_1"; - // const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-1k"; - await this.engine.reload(selectedModel); - } catch (err: unknown) { - messageUpdate("error", "Init error, " + (err?.toString() ?? ""), true); - console.log(err); - await this.unloadChat(); - this.requestInProgress = false; - return; - } - this.requestInProgress = false; - this.chatLoaded = true; + try { + const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC"; + // const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k"; + await this.engine.reload(selectedModel); + } catch (err: unknown) { + messageUpdate("error", "Init error, " + (err?.toString() ?? ""), true); + console.log(err); + await this.unloadChat(); + this.requestInProgress = false; + return; } + this.requestInProgress = false; + this.chatLoaded = true; + } - private async unloadChat() { - await this.engine.unload(); - this.chatLoaded = false; - } + private async unloadChat() { + await this.engine.unload(); + this.chatLoaded = false; + } - /** - * Run generate - */ - private async asyncGenerate(prompt: string, messageUpdate: (kind: string, text: string, append: boolean) => void, setRuntimeStats: (runtimeStats: string) => void) { - await this.asyncInitChat(messageUpdate); - this.requestInProgress = true; - // const prompt = this.uiChatInput.value; - if (prompt == "") { - this.requestInProgress = false; - return; - } + /** + * Run generate + */ + private async asyncGenerate( + prompt: string, + messageUpdate: (kind: string, text: string, append: boolean) => void, + setRuntimeStats: (runtimeStats: string) => void, + ) { + await this.asyncInitChat(messageUpdate); + this.requestInProgress = true; + // const prompt = this.uiChatInput.value; + if (prompt == "") { + this.requestInProgress = false; + return; + } - messageUpdate("right", prompt, true); - // this.uiChatInput.value = ""; - // this.uiChatInput.setAttribute("placeholder", "Generating..."); + messageUpdate("right", prompt, true); + // this.uiChatInput.value = ""; + // this.uiChatInput.setAttribute("placeholder", "Generating..."); - messageUpdate("left", "", true); + messageUpdate("left", "", true); - try { - this.chatHistory.push({ "role": "user", "content": prompt }); - let curMessage = ""; - const completion = await this.engine.chat.completions.create({ stream: true, messages: this.chatHistory }); - for await (const chunk of completion) { - const curDelta = chunk.choices[0].delta.content; - if (curDelta) { - curMessage += curDelta; - } - messageUpdate("left", curMessage, false); - } - const output = await this.engine.getMessage(); - this.chatHistory.push({ "role": "assistant", "content": output }); - messageUpdate("left", output, false); - this.engine.runtimeStatsText().then(stats => setRuntimeStats(stats)).catch(error => console.log(error)); - } catch (err: unknown) { - messageUpdate("error", "Generate error, " + (err?.toString() ?? ""), true); - console.log(err); - await this.unloadChat(); + try { + this.chatHistory.push({ role: "user", content: prompt }); + let curMessage = ""; + const completion = await this.engine.chat.completions.create({ + stream: true, + messages: this.chatHistory, + }); + for await (const chunk of completion) { + const curDelta = chunk.choices[0].delta.content; + if (curDelta) { + curMessage += curDelta; } - this.requestInProgress = false; + messageUpdate("left", curMessage, false); + } + const output = await this.engine.getMessage(); + this.chatHistory.push({ role: "assistant", content: output }); + messageUpdate("left", output, false); + this.engine + .runtimeStatsText() + .then((stats) => setRuntimeStats(stats)) + .catch((error) => console.log(error)); + } catch (err: unknown) { + messageUpdate( + "error", + "Generate error, " + (err?.toString() ?? ""), + true, + ); + console.log(err); + await this.unloadChat(); } -} \ No newline at end of file + this.requestInProgress = false; + } +} diff --git a/examples/seed-to-reproduce/src/seed.ts b/examples/seed-to-reproduce/src/seed.ts index c4eb3109..ac2547f3 100644 --- a/examples/seed-to-reproduce/src/seed.ts +++ b/examples/seed-to-reproduce/src/seed.ts @@ -1,11 +1,11 @@ import * as webllm from "@mlc-ai/web-llm"; function setLabel(id: string, text: string) { - const label = document.getElementById(id); - if (label == null) { - throw Error("Cannot find label " + id); - } - label.innerText = text; + const label = document.getElementById(id); + if (label == null) { + throw Error("Cannot find label " + id); + } + label.innerText = text; } /** @@ -15,45 +15,48 @@ function setLabel(id: string, text: string) { * With `n > 1`, all choices should also be exactly the same. */ async function main() { - const initProgressCallback = (report: webllm.InitProgressReport) => { - setLabel("init-label", report.text); - }; - const selectedModel = "Llama-3-8B-Instruct-q4f32_1"; - const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( - selectedModel, - { initProgressCallback: initProgressCallback } - ); - - - const request: webllm.ChatCompletionRequest = { - stream: false, // works with streaming as well - messages: [ - { "role": "user", "content": "Write a creative Haiku about Pittsburgh" } - ], - n: 3, - temperature: 1.2, // high temperature gives much more random results - max_gen_len: 128, // To save time; enough to demonstrate the effect - seed: 42, - }; - - const reply0 = await engine.chat.completions.create(request); - console.log(reply0); - console.log("First reply's last choice:\n" + await engine.getMessage()); - - const reply1 = await engine.chat.completions.create(request); - console.log(reply1); - console.log("Second reply's last choice:\n" + await engine.getMessage()); - - // Rigorously check the generation results of each choice for the two requests - for (const choice0 of reply0.choices) { - const id = choice0.index; - const choice1 = reply1.choices[id]; - if (choice0.message.content !== choice1.message.content) { - throw Error("Chocie " + id + " of the two generations are different despite seeding"); - } + const initProgressCallback = (report: webllm.InitProgressReport) => { + setLabel("init-label", report.text); + }; + const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC"; + const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( + selectedModel, + { initProgressCallback: initProgressCallback }, + ); + + const request: webllm.ChatCompletionRequest = { + stream: false, // works with streaming as well + messages: [ + { role: "user", content: "Write a creative Haiku about Pittsburgh" }, + ], + n: 3, + temperature: 1.2, // high temperature gives much more random results + max_gen_len: 128, // To save time; enough to demonstrate the effect + seed: 42, + }; + + const reply0 = await engine.chat.completions.create(request); + console.log(reply0); + console.log("First reply's last choice:\n" + (await engine.getMessage())); + + const reply1 = await engine.chat.completions.create(request); + console.log(reply1); + console.log("Second reply's last choice:\n" + (await engine.getMessage())); + + // Rigorously check the generation results of each choice for the two requests + for (const choice0 of reply0.choices) { + const id = choice0.index; + const choice1 = reply1.choices[id]; + if (choice0.message.content !== choice1.message.content) { + throw Error( + "Chocie " + + id + + " of the two generations are different despite seeding", + ); } + } - console.log(await engine.runtimeStatsText()); + console.log(await engine.runtimeStatsText()); } // Run one of the functions diff --git a/examples/service-worker/src/main.ts b/examples/service-worker/src/main.ts index 437cc58b..290ed1d2 100644 --- a/examples/service-worker/src/main.ts +++ b/examples/service-worker/src/main.ts @@ -5,7 +5,7 @@ const registerServiceWorker = async () => { try { const registration = await navigator.serviceWorker.register( new URL("sw.ts", import.meta.url), - { type: "module" } + { type: "module" }, ); if (registration.installing) { console.log("Service worker installing"); @@ -37,7 +37,7 @@ async function mainNonStreaming() { const initProgressCallback = (report: webllm.InitProgressReport) => { setLabel("init-label", report.text); }; - const selectedModel = "Llama-3-8B-Instruct-q4f32_1"; + const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC"; const engine: webllm.MLCEngineInterface = await webllm.CreateServiceWorkerMLCEngine(selectedModel, { @@ -75,7 +75,7 @@ async function mainStreaming() { const initProgressCallback = (report: webllm.InitProgressReport) => { setLabel("init-label", report.text); }; - const selectedModel = "Llama-3-8B-Instruct-q4f32_1"; + const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC"; const engine: webllm.ServiceWorkerMLCEngine = await webllm.CreateServiceWorkerMLCEngine(selectedModel, { diff --git a/examples/streaming/src/streaming.ts b/examples/streaming/src/streaming.ts index a2b7bbd5..b09c8cb4 100644 --- a/examples/streaming/src/streaming.ts +++ b/examples/streaming/src/streaming.ts @@ -15,20 +15,21 @@ async function main() { const initProgressCallback = (report: webllm.InitProgressReport) => { setLabel("init-label", report.text); }; - const selectedModel = "Llama-3-8B-Instruct-q4f32_1"; + const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC"; const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( selectedModel, - { initProgressCallback: initProgressCallback } + { initProgressCallback: initProgressCallback }, ); const request: webllm.ChatCompletionRequest = { stream: true, messages: [ { - "role": "system", - "content": "You are a pirate chatbot who always responds in pirate speak!" + role: "system", + content: + "You are a pirate chatbot who always responds in pirate speak!", }, - { "role": "user", "content": "Who are you?" }, + { role: "user", content: "Who are you?" }, ], logprobs: true, top_logprobs: 2, @@ -45,7 +46,7 @@ async function main() { setLabel("generate-label", message); // engine.interruptGenerate(); // works with interrupt as well } - console.log("Final message:\n", await engine.getMessage()); // the concatenated message + console.log("Final message:\n", await engine.getMessage()); // the concatenated message console.log(await engine.runtimeStatsText()); } diff --git a/src/config.ts b/src/config.ts index 7f32fb31..93bb2ce2 100644 --- a/src/config.ts +++ b/src/config.ts @@ -295,7 +295,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/", - model_id: "Llama-3-8B-Instruct-q4f32_1-1k", + model_id: "Llama-3-8B-Instruct-q4f32_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -306,7 +306,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC/resolve/main/", - model_id: "Llama-3-8B-Instruct-q4f16_1-1k", + model_id: "Llama-3-8B-Instruct-q4f16_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -317,7 +317,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/", - model_id: "Llama-3-8B-Instruct-q4f32_1", + model_id: "Llama-3-8B-Instruct-q4f32_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -328,7 +328,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC/resolve/main/", - model_id: "Llama-3-8B-Instruct-q4f16_1", + model_id: "Llama-3-8B-Instruct-q4f16_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -339,7 +339,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Llama-3-70B-Instruct-q3f16_1-MLC/resolve/main/", - model_id: "Llama-3-70B-Instruct-q3f16_1", + model_id: "Llama-3-70B-Instruct-q3f16_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -351,7 +351,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC/resolve/main/", - model_id: "Phi-3-mini-4k-instruct-q4f16_1", + model_id: "Phi-3-mini-4k-instruct-q4f16_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -362,7 +362,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC/resolve/main/", - model_id: "Phi-3-mini-4k-instruct-q4f32_1", + model_id: "Phi-3-mini-4k-instruct-q4f32_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -373,7 +373,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC/resolve/main/", - model_id: "Phi-3-mini-4k-instruct-q4f16_1-1k", + model_id: "Phi-3-mini-4k-instruct-q4f16_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -384,7 +384,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC/resolve/main/", - model_id: "Phi-3-mini-4k-instruct-q4f32_1-1k", + model_id: "Phi-3-mini-4k-instruct-q4f32_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -396,7 +396,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC/resolve/main/", - model_id: "Llama-2-7b-chat-hf-q4f32_1-1k", + model_id: "Llama-2-7b-chat-hf-q4f32_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -407,7 +407,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC/resolve/main/", - model_id: "Llama-2-7b-chat-hf-q4f16_1-1k", + model_id: "Llama-2-7b-chat-hf-q4f16_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -419,7 +419,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC/resolve/main/", - model_id: "Llama-2-7b-chat-hf-q4f32_1", + model_id: "Llama-2-7b-chat-hf-q4f32_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -430,7 +430,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC/resolve/main/", - model_id: "Llama-2-7b-chat-hf-q4f16_1", + model_id: "Llama-2-7b-chat-hf-q4f16_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -455,7 +455,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/WizardMath-7B-V1.1-q4f16_1-MLC/resolve/main/", - model_id: "WizardMath-7B-V1.1-q4f16_1", + model_id: "WizardMath-7B-V1.1-q4f16_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -467,7 +467,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Mistral-7B-Instruct-v0.2-q4f16_1-MLC/resolve/main/", - model_id: "Mistral-7B-Instruct-v0.2-q4f16_1", + model_id: "Mistral-7B-Instruct-v0.2-q4f16_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -479,7 +479,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/OpenHermes-2.5-Mistral-7B-q4f16_1-MLC/resolve/main/", - model_id: "OpenHermes-2.5-Mistral-7B-q4f16_1", + model_id: "OpenHermes-2.5-Mistral-7B-q4f16_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -491,7 +491,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC/resolve/main/", - model_id: "NeuralHermes-2.5-Mistral-7B-q4f16_1", + model_id: "NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -504,7 +504,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC/resolve/main/", - model_id: "Hermes-2-Pro-Llama-3-8B-q4f16_1", + model_id: "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -515,7 +515,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC/resolve/main/", - model_id: "Hermes-2-Pro-Llama-3-8B-q4f32_1", + model_id: "Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -526,7 +526,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Hermes-2-Pro-Mistral-7B-q4f16_1-MLC/resolve/main/", - model_id: "Hermes-2-Pro-Mistral-7B-q4f16_1", + model_id: "Hermes-2-Pro-Mistral-7B-q4f16_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -539,7 +539,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC/resolve/main/", - model_id: "gemma-2b-it-q4f16_1", + model_id: "gemma-2b-it-q4f16_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -552,7 +552,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC/resolve/main/", - model_id: "gemma-2b-it-q4f32_1", + model_id: "gemma-2b-it-q4f32_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -564,7 +564,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC/resolve/main/", - model_id: "gemma-2b-it-q4f16_1-1k", + model_id: "gemma-2b-it-q4f16_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -577,7 +577,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC/resolve/main/", - model_id: "gemma-2b-it-q4f32_1-1k", + model_id: "gemma-2b-it-q4f32_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -590,7 +590,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f16_1-MLC/resolve/main/", - model_id: "Qwen1.5-1.8B-Chat-q4f16_1", + model_id: "Qwen1.5-1.8B-Chat-q4f16_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -601,7 +601,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f32_1-MLC/resolve/main/", - model_id: "Qwen1.5-1.8B-Chat-q4f32_1", + model_id: "Qwen1.5-1.8B-Chat-q4f32_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -612,7 +612,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f16_1-MLC/resolve/main/", - model_id: "Qwen1.5-1.8B-Chat-q4f16_1-1k", + model_id: "Qwen1.5-1.8B-Chat-q4f16_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -623,7 +623,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f32_1-MLC/resolve/main/", - model_id: "Qwen1.5-1.8B-Chat-q4f32_1-1k", + model_id: "Qwen1.5-1.8B-Chat-q4f32_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -635,7 +635,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC/resolve/main/", - model_id: "stablelm-2-zephyr-1_6b-q4f16_1", + model_id: "stablelm-2-zephyr-1_6b-q4f16_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -646,7 +646,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC/resolve/main/", - model_id: "stablelm-2-zephyr-1_6b-q4f32_1", + model_id: "stablelm-2-zephyr-1_6b-q4f32_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -657,7 +657,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC/resolve/main/", - model_id: "stablelm-2-zephyr-1_6b-q4f16_1-1k", + model_id: "stablelm-2-zephyr-1_6b-q4f16_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -668,7 +668,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC/resolve/main/", - model_id: "stablelm-2-zephyr-1_6b-q4f32_1-1k", + model_id: "stablelm-2-zephyr-1_6b-q4f32_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -680,7 +680,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC/resolve/main/", - model_id: "RedPajama-INCITE-Chat-3B-v1-q4f16_1", + model_id: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -692,7 +692,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC/resolve/main/", - model_id: "RedPajama-INCITE-Chat-3B-v1-q4f32_1", + model_id: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -703,7 +703,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC/resolve/main/", - model_id: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-1k", + model_id: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -715,7 +715,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC/resolve/main/", - model_id: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-1k", + model_id: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -727,7 +727,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC/resolve/main/", - model_id: "Phi2-q4f16_1", + model_id: "phi-2-q4f16_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -739,7 +739,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC/resolve/main/", - model_id: "Phi2-q4f32_1", + model_id: "phi-2-q4f32_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -750,7 +750,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC/resolve/main/", - model_id: "Phi2-q4f16_1-1k", + model_id: "phi-2-q4f16_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -762,7 +762,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC/resolve/main/", - model_id: "Phi2-q4f32_1-1k", + model_id: "phi-2-q4f32_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -774,7 +774,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC/resolve/main/", - model_id: "Phi1.5-q4f16_1", + model_id: "phi-1_5-q4f16_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -786,7 +786,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC/resolve/main/", - model_id: "Phi1.5-q4f32_1", + model_id: "phi-1_5-q4f32_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -797,7 +797,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC/resolve/main/", - model_id: "Phi1.5-q4f16_1-1k", + model_id: "phi-1_5-q4f16_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -809,7 +809,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC/resolve/main/", - model_id: "Phi1.5-q4f32_1-1k", + model_id: "phi-1_5-q4f32_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -821,7 +821,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC/resolve/main/", - model_id: "TinyLlama-1.1B-Chat-v0.4-q4f16_1", + model_id: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -833,7 +833,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC/resolve/main/", - model_id: "TinyLlama-1.1B-Chat-v0.4-q4f32_1", + model_id: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC", model_lib_url: modelLibURLPrefix + modelVersion + @@ -844,7 +844,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC/resolve/main/", - model_id: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-1k", + model_id: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + @@ -856,7 +856,7 @@ export const prebuiltAppConfig: AppConfig = { { model_url: "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC/resolve/main/", - model_id: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-1k", + model_id: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC-1k", model_lib_url: modelLibURLPrefix + modelVersion + diff --git a/src/engine.ts b/src/engine.ts index 65823ea7..316f91f2 100644 --- a/src/engine.ts +++ b/src/engine.ts @@ -587,12 +587,12 @@ export class MLCEngine implements MLCEngineInterface { `WARNING: the current maxStorageBufferBindingSize ` + `(${computeMB(maxStorageBufferBindingSize)}) ` + `may only work for a limited number of models, e.g.: \n` + - `- Llama-3-8B-Instruct-q4f16_1-1k \n` + - `- Llama-2-7b-chat-hf-q4f16_1-1k \n` + - `- RedPajama-INCITE-Chat-3B-v1-q4f16_1-1k \n` + - `- RedPajama-INCITE-Chat-3B-v1-q4f32_1-1k \n` + - `- TinyLlama-1.1B-Chat-v0.4-q4f16_1-1k \n` + - `- TinyLlama-1.1B-Chat-v0.4-q4f32_1-1k`, + `- Llama-3-8B-Instruct-q4f16_1-MLC-1k \n` + + `- Llama-2-7b-chat-hf-q4f16_1-MLC-1k \n` + + `- RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC-1k \n` + + `- RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC-1k \n` + + `- TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k \n` + + `- TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC-1k`, ); } return maxStorageBufferBindingSize; diff --git a/tests/openai_chat_completion.test.ts b/tests/openai_chat_completion.test.ts index 5e719125..8f7ce8bf 100644 --- a/tests/openai_chat_completion.test.ts +++ b/tests/openai_chat_completion.test.ts @@ -1,129 +1,130 @@ -import { postInitAndCheckFields, ChatCompletionRequest } from "../src/openai_api_protocols/chat_completion" -import { describe, expect, test } from '@jest/globals'; +import { + postInitAndCheckFields, + ChatCompletionRequest, +} from "../src/openai_api_protocols/chat_completion"; +import { describe, expect, test } from "@jest/globals"; -describe('Check chat completion unsupported requests', () => { - test('High-level unsupported fields', () => { - expect(() => { - const request: ChatCompletionRequest = { - model: "Phi2-q4f32_1", // this raises error - messages: [ - { role: "system", content: "You are a helpful assistant." }, - { role: "user", content: "Hello! " }, - ], - }; - postInitAndCheckFields(request) - }).toThrow("The following fields in ChatCompletionRequest are not yet supported"); - }); +describe("Check chat completion unsupported requests", () => { + test("High-level unsupported fields", () => { + expect(() => { + const request: ChatCompletionRequest = { + model: "phi-2-q4f32_1-MLC", // this raises error + messages: [ + { role: "system", content: "You are a helpful assistant." }, + { role: "user", content: "Hello! " }, + ], + }; + postInitAndCheckFields(request); + }).toThrow( + "The following fields in ChatCompletionRequest are not yet supported", + ); + }); - test('Last message should be from user', () => { - expect(() => { - const request: ChatCompletionRequest = { - messages: [ - { role: "system", content: "You are a helpful assistant." }, - { role: "user", content: "Hello! " }, - { role: "assistant", content: "Hello! How may I help you today?" }, - ], - }; - postInitAndCheckFields(request) - }).toThrow("Last message should be from `user`."); - }); + test("Last message should be from user", () => { + expect(() => { + const request: ChatCompletionRequest = { + messages: [ + { role: "system", content: "You are a helpful assistant." }, + { role: "user", content: "Hello! " }, + { role: "assistant", content: "Hello! How may I help you today?" }, + ], + }; + postInitAndCheckFields(request); + }).toThrow("Last message should be from `user`."); + }); - test('System prompt should always be the first one in `messages`', () => { - expect(() => { - const request: ChatCompletionRequest = { - messages: [ - { role: "user", content: "Hello! " }, - { role: "assistant", content: "Hello! How may I help you today?" }, - { role: "user", content: "Tell me about Pittsburgh" }, - { role: "system", content: "You are a helpful assistant." }, - ], - }; - postInitAndCheckFields(request) - }).toThrow("System prompt should always be the first one in `messages`."); - }); + test("System prompt should always be the first one in `messages`", () => { + expect(() => { + const request: ChatCompletionRequest = { + messages: [ + { role: "user", content: "Hello! " }, + { role: "assistant", content: "Hello! How may I help you today?" }, + { role: "user", content: "Tell me about Pittsburgh" }, + { role: "system", content: "You are a helpful assistant." }, + ], + }; + postInitAndCheckFields(request); + }).toThrow("System prompt should always be the first one in `messages`."); + }); - test('When streaming `n` needs to be 1', () => { - expect(() => { - const request: ChatCompletionRequest = { - stream: true, - n: 2, - messages: [ - { role: "user", content: "Hello! " }, - ], - }; - postInitAndCheckFields(request) - }).toThrow("When streaming, `n` cannot be > 1."); - }); + test("When streaming `n` needs to be 1", () => { + expect(() => { + const request: ChatCompletionRequest = { + stream: true, + n: 2, + messages: [{ role: "user", content: "Hello! " }], + }; + postInitAndCheckFields(request); + }).toThrow("When streaming, `n` cannot be > 1."); + }); - test('Non-integer seed', () => { - expect(() => { - const request: ChatCompletionRequest = { - messages: [ - { role: "user", content: "Hello! " }, - ], - max_gen_len: 10, - seed: 42.2, // Note that Number.isInteger(42.0) is true - }; - postInitAndCheckFields(request) - }).toThrow("`seed` should be an integer, but got"); - }); + test("Non-integer seed", () => { + expect(() => { + const request: ChatCompletionRequest = { + messages: [{ role: "user", content: "Hello! " }], + max_gen_len: 10, + seed: 42.2, // Note that Number.isInteger(42.0) is true + }; + postInitAndCheckFields(request); + }).toThrow("`seed` should be an integer, but got"); + }); - test('Schema without type json object', () => { - expect(() => { - const request: ChatCompletionRequest = { - messages: [ - { role: "user", content: "Hello! " }, - ], - response_format: { schema: "some json schema" }, - }; - postInitAndCheckFields(request) - }).toThrow("JSON schema is only supported with `json_object` response format."); - }); + test("Schema without type json object", () => { + expect(() => { + const request: ChatCompletionRequest = { + messages: [{ role: "user", content: "Hello! " }], + response_format: { schema: "some json schema" }, + }; + postInitAndCheckFields(request); + }).toThrow( + "JSON schema is only supported with `json_object` response format.", + ); + }); - // Remove when we support image input (e.g. LlaVA model) - test('Image input is unsupported', () => { - expect(() => { - const request: ChatCompletionRequest = { - messages: [ - { - role: "user", - content: [ - { type: "text", text: "What is in this image?" }, - { - type: "image_url", - image_url: { url: "https://url_here.jpg" }, - }, - ], - }, - ], - }; - postInitAndCheckFields(request) - }).toThrow("User message only supports string `content` for now"); - }); + // Remove when we support image input (e.g. LlaVA model) + test("Image input is unsupported", () => { + expect(() => { + const request: ChatCompletionRequest = { + messages: [ + { + role: "user", + content: [ + { type: "text", text: "What is in this image?" }, + { + type: "image_url", + image_url: { url: "https://url_here.jpg" }, + }, + ], + }, + ], + }; + postInitAndCheckFields(request); + }).toThrow("User message only supports string `content` for now"); + }); }); -describe('Supported requests', () => { - test('Supproted chat completion request', () => { - const request: ChatCompletionRequest = { - messages: [ - { role: "system", content: "You are a helpful assistant." }, - { role: "user", content: "Hello! " }, - { role: "assistant", content: "How can I help you? " }, - { role: "user", content: "Give me 5 US states. " }, - ], - n: 3, - temperature: 1.5, - max_gen_len: 25, - frequency_penalty: 0.2, - seed: 42, - logprobs: true, - top_logprobs: 2, - logit_bias: { - "13813": -100, - "10319": 5, - "7660": 5, - }, - }; - postInitAndCheckFields(request) - }); -}) +describe("Supported requests", () => { + test("Supproted chat completion request", () => { + const request: ChatCompletionRequest = { + messages: [ + { role: "system", content: "You are a helpful assistant." }, + { role: "user", content: "Hello! " }, + { role: "assistant", content: "How can I help you? " }, + { role: "user", content: "Give me 5 US states. " }, + ], + n: 3, + temperature: 1.5, + max_gen_len: 25, + frequency_penalty: 0.2, + seed: 42, + logprobs: true, + top_logprobs: 2, + logit_bias: { + "13813": -100, + "10319": 5, + "7660": 5, + }, + }; + postInitAndCheckFields(request); + }); +}); From 45f41e6c2c06f23715fae1001dd4630fc1b2e999 Mon Sep 17 00:00:00 2001 From: Charlie Ruan <53290280+CharlieFRuan@users.noreply.github.com> Date: Wed, 29 May 2024 23:23:41 -0400 Subject: [PATCH 2/3] [ModelRecord] Update model_lib_url to model_lib, and model_url to model --- README.md | 8 +- .../function-calling/src/function_calling.ts | 44 ++-- examples/get-started/src/get_started.ts | 4 +- src/cache_util.ts | 20 +- src/config.ts | 214 +++++++++--------- src/engine.ts | 8 +- src/utils.ts | 4 +- .../src/vram_requirements.ts | 2 +- 8 files changed, 148 insertions(+), 156 deletions(-) diff --git a/README.md b/README.md index 00e9111e..5c240c90 100644 --- a/README.md +++ b/README.md @@ -247,8 +247,8 @@ on how to add new model weights and libraries to WebLLM. Here, we go over the high-level idea. There are two elements of the WebLLM package that enables new models and weight variants. -- `model_url`: Contains a URL to model artifacts, such as weights and meta-data. -- `model_lib_url`: A URL to the web assembly library (i.e. wasm file) that contains the executables to accelerate the model computations. +- `model`: Contains a URL to model artifacts, such as weights and meta-data. +- `model_lib`: A URL to the web assembly library (i.e. wasm file) that contains the executables to accelerate the model computations. Both are customizable in the WebLLM. @@ -257,9 +257,9 @@ async main() { const appConfig = { "model_list": [ { - "model_url": "/url/to/my/llama", + "model": "/url/to/my/llama", "model_id": "MyLlama-3b-v1-q4f32_0" - "model_lib_url": "/url/to/myllama3b.wasm", + "model_lib": "/url/to/myllama3b.wasm", } ], }; diff --git a/examples/function-calling/src/function_calling.ts b/examples/function-calling/src/function_calling.ts index 21b9605a..a2dd2b84 100644 --- a/examples/function-calling/src/function_calling.ts +++ b/examples/function-calling/src/function_calling.ts @@ -8,25 +8,25 @@ function setLabel(id: string, text: string) { label.innerText = text; } - async function main() { - const myAppConfig: webllm.AppConfig = { model_list: [ { - "model_url": "https://huggingface.co/mlc-ai/gorilla-openfunctions-v2-q4f16_1-MLC/resolve/main/", - "model_id": "gorilla-openfunctions-v2-q4f16_1", - "model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/gorilla-openfunctions-v2/gorilla-openfunctions-v2-q4f16_1.wasm", + model: + "https://huggingface.co/mlc-ai/gorilla-openfunctions-v2-q4f16_1-MLC/resolve/main/", + model_id: "gorilla-openfunctions-v2-q4f16_1", + model_lib: + "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/gorilla-openfunctions-v2/gorilla-openfunctions-v2-q4f16_1.wasm", }, - ] - } + ], + }; const initProgressCallback = (report: webllm.InitProgressReport) => { setLabel("init-label", report.text); }; - const selectedModel = "gorilla-openfunctions-v2-q4f16_1" + const selectedModel = "gorilla-openfunctions-v2-q4f16_1"; const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( selectedModel, - { appConfig: myAppConfig, initProgressCallback: initProgressCallback } + { appConfig: myAppConfig, initProgressCallback: initProgressCallback }, ); const tools: Array = [ @@ -36,26 +36,30 @@ async function main() { name: "get_current_weather", description: "Get the current weather in a given location", parameters: { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", + type: "object", + properties: { + location: { + type: "string", + description: "The city and state, e.g. San Francisco, CA", }, - "unit": { "type": "string", "enum": ["celsius", "fahrenheit"] }, + unit: { type: "string", enum: ["celsius", "fahrenheit"] }, }, - "required": ["location"], + required: ["location"], }, }, - } - ] + }, + ]; const request: webllm.ChatCompletionRequest = { stream: false, messages: [ - { "role": "user", "content": "What is the current weather in celsius in Pittsburgh and Tokyo?" }, + { + role: "user", + content: + "What is the current weather in celsius in Pittsburgh and Tokyo?", + }, ], - tool_choice: 'auto', + tool_choice: "auto", tools: tools, }; diff --git a/examples/get-started/src/get_started.ts b/examples/get-started/src/get_started.ts index e083a57c..0f0e6476 100644 --- a/examples/get-started/src/get_started.ts +++ b/examples/get-started/src/get_started.ts @@ -23,9 +23,9 @@ async function main() { // const appConfig: webllm.AppConfig = { // model_list: [ // { - // "model_url": "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/", + // "model": "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/", // "model_id": "Llama-3-8B-Instruct-q4f32_1-MLC", - // "model_lib_url": webllm.modelLibURLPrefix + webllm.modelVersion + "/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", + // "model_lib": webllm.modelLibURLPrefix + webllm.modelVersion + "/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", // }, // ] // }; diff --git a/src/cache_util.ts b/src/cache_util.ts index 5fe766fe..8aa2cace 100644 --- a/src/cache_util.ts +++ b/src/cache_util.ts @@ -8,7 +8,7 @@ function findModelRecord(modelId: string, appConfig?: AppConfig): ModelRecord { if (matchedItem !== undefined) { return matchedItem; } - throw Error("Cannot find model_url for " + modelId); + throw Error("Cannot find model record in appConfig for " + modelId); } export async function hasModelInCache( @@ -19,7 +19,7 @@ export async function hasModelInCache( appConfig = prebuiltAppConfig; } const modelRecord = findModelRecord(modelId, appConfig); - const modelUrl = modelRecord.model_url; + const modelUrl = modelRecord.model; const cacheType = appConfig.useIndexedDBCache ? "indexeddb" : "cache"; return tvmjs.hasNDArrayInCache(modelUrl, "webllm/model", cacheType); } @@ -51,21 +51,17 @@ export async function deleteModelInCache( const modelRecord = findModelRecord(modelId, appConfig); let modelCache: tvmjs.ArtifactCacheTemplate; if (appConfig.useIndexedDBCache) { - tvmjs.deleteNDArrayCache( - modelRecord.model_url, - "webllm/model", - "indexeddb", - ); + tvmjs.deleteNDArrayCache(modelRecord.model, "webllm/model", "indexeddb"); modelCache = new tvmjs.ArtifactIndexedDBCache("webllm/model"); } else { - tvmjs.deleteNDArrayCache(modelRecord.model_url, "webllm/model", "cache"); + tvmjs.deleteNDArrayCache(modelRecord.model, "webllm/model", "cache"); modelCache = new tvmjs.ArtifactCache("webllm/model"); } await modelCache.deleteInCache( - new URL("tokenizer.model", modelRecord.model_url).href, + new URL("tokenizer.model", modelRecord.model).href, ); await modelCache.deleteInCache( - new URL("tokenizer.json", modelRecord.model_url).href, + new URL("tokenizer.json", modelRecord.model).href, ); } @@ -84,7 +80,7 @@ export async function deleteChatConfigInCache( } else { configCache = new tvmjs.ArtifactCache("webllm/config"); } - const configUrl = new URL("mlc-chat-config.json", modelRecord.model_url).href; + const configUrl = new URL("mlc-chat-config.json", modelRecord.model).href; await configCache.deleteInCache(configUrl); } @@ -103,5 +99,5 @@ export async function deleteModelWasmInCache( } else { wasmCache = new tvmjs.ArtifactCache("webllm/wasm"); } - await wasmCache.deleteInCache(modelRecord.model_lib_url); + await wasmCache.deleteInCache(modelRecord.model_lib); } diff --git a/src/config.ts b/src/config.ts index 93bb2ce2..170a3765 100644 --- a/src/config.ts +++ b/src/config.ts @@ -236,9 +236,9 @@ export function postInitAndCheckGenerationConfigValues( /** * Information for a model. - * @param model_url: the huggingface link to download the model weights. + * @param model: the huggingface link to download the model weights. * @param model_id: what we call the model. - * @param model_lib_url: link to the model library (wasm file) the model uses. + * @param model_lib: link to the model library (wasm file) the model uses. * @param vram_required_MB: amount of vram in MB required to run the model (can use * `utils/vram_requirements` to calculate). * @param low_resource_required: whether the model can run on limited devices (e.g. Android phone). @@ -246,9 +246,9 @@ export function postInitAndCheckGenerationConfigValues( * @param required_features: feature needed to run this model (e.g. shader-f16). */ export interface ModelRecord { - model_url: string; + model: string; model_id: string; - model_lib_url: string; + model_lib: string; vram_required_MB?: number; low_resource_required?: boolean; buffer_size_required_bytes?: number; @@ -273,7 +273,7 @@ export interface AppConfig { /** * modelVersion: the prebuilt model libraries that the current npm is compatible with, affects the - * `model_lib_url`s in `prebuiltAppConfig`. + * `model_lib`s in `prebuiltAppConfig`. * * @note The model version does not have to match the npm version, since not each npm update * requires an update of the model libraries. @@ -293,10 +293,10 @@ export const prebuiltAppConfig: AppConfig = { model_list: [ // Llama-3 { - model_url: + model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/", model_id: "Llama-3-8B-Instruct-q4f32_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Llama-3-8B-Instruct-q4f32_1-ctx1k_cs1k-webgpu.wasm", @@ -304,10 +304,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: true, }, { - model_url: + model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC/resolve/main/", model_id: "Llama-3-8B-Instruct-q4f16_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Llama-3-8B-Instruct-q4f16_1-ctx1k_cs1k-webgpu.wasm", @@ -315,10 +315,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: true, }, { - model_url: + model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/", model_id: "Llama-3-8B-Instruct-q4f32_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", @@ -326,10 +326,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model_url: + model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC/resolve/main/", model_id: "Llama-3-8B-Instruct-q4f16_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Llama-3-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", @@ -337,10 +337,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model_url: + model: "https://huggingface.co/mlc-ai/Llama-3-70B-Instruct-q3f16_1-MLC/resolve/main/", model_id: "Llama-3-70B-Instruct-q3f16_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Llama-3-70B-Instruct-q3f16_1-ctx4k_cs1k-webgpu.wasm", @@ -349,10 +349,10 @@ export const prebuiltAppConfig: AppConfig = { }, // Phi3-mini-instruct { - model_url: + model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC/resolve/main/", model_id: "Phi-3-mini-4k-instruct-q4f16_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Phi-3-mini-4k-instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", @@ -360,10 +360,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model_url: + model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC/resolve/main/", model_id: "Phi-3-mini-4k-instruct-q4f32_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Phi-3-mini-4k-instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", @@ -371,10 +371,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model_url: + model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC/resolve/main/", model_id: "Phi-3-mini-4k-instruct-q4f16_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Phi-3-mini-4k-instruct-q4f16_1-ctx1k_cs1k-webgpu.wasm", @@ -382,10 +382,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: true, }, { - model_url: + model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC/resolve/main/", model_id: "Phi-3-mini-4k-instruct-q4f32_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Phi-3-mini-4k-instruct-q4f32_1-ctx1k_cs1k-webgpu.wasm", @@ -394,10 +394,10 @@ export const prebuiltAppConfig: AppConfig = { }, // Llama-2 { - model_url: + model: "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC/resolve/main/", model_id: "Llama-2-7b-chat-hf-q4f32_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Llama-2-7b-chat-hf-q4f32_1-ctx1k_cs1k-webgpu.wasm", @@ -405,10 +405,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model_url: + model: "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC/resolve/main/", model_id: "Llama-2-7b-chat-hf-q4f16_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Llama-2-7b-chat-hf-q4f16_1-ctx1k_cs1k-webgpu.wasm", @@ -417,10 +417,10 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model_url: + model: "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC/resolve/main/", model_id: "Llama-2-7b-chat-hf-q4f32_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Llama-2-7b-chat-hf-q4f32_1-ctx4k_cs1k-webgpu.wasm", @@ -428,10 +428,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model_url: + model: "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC/resolve/main/", model_id: "Llama-2-7b-chat-hf-q4f16_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Llama-2-7b-chat-hf-q4f16_1-ctx4k_cs1k-webgpu.wasm", @@ -440,10 +440,10 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model_url: + model: "https://huggingface.co/mlc-ai/Llama-2-13b-chat-hf-q4f16_1-MLC/resolve/main/", model_id: "Llama-2-13b-chat-hf-q4f16_1", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Llama-2-13b-chat-hf-q4f16_1-ctx4k_cs1k-webgpu.wasm", @@ -453,10 +453,10 @@ export const prebuiltAppConfig: AppConfig = { }, // Mistral variants { - model_url: + model: "https://huggingface.co/mlc-ai/WizardMath-7B-V1.1-q4f16_1-MLC/resolve/main/", model_id: "WizardMath-7B-V1.1-q4f16_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Mistral-7B-Instruct-v0.2-q4f16_1-sw4k_cs1k-webgpu.wasm", @@ -465,10 +465,10 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model_url: + model: "https://huggingface.co/mlc-ai/Mistral-7B-Instruct-v0.2-q4f16_1-MLC/resolve/main/", model_id: "Mistral-7B-Instruct-v0.2-q4f16_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Mistral-7B-Instruct-v0.2-q4f16_1-sw4k_cs1k-webgpu.wasm", @@ -477,10 +477,10 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model_url: + model: "https://huggingface.co/mlc-ai/OpenHermes-2.5-Mistral-7B-q4f16_1-MLC/resolve/main/", model_id: "OpenHermes-2.5-Mistral-7B-q4f16_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Mistral-7B-Instruct-v0.2-q4f16_1-sw4k_cs1k-webgpu.wasm", @@ -489,10 +489,10 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model_url: + model: "https://huggingface.co/mlc-ai/NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC/resolve/main/", model_id: "NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Mistral-7B-Instruct-v0.2-q4f16_1-sw4k_cs1k-webgpu.wasm", @@ -502,10 +502,10 @@ export const prebuiltAppConfig: AppConfig = { }, // Hermes-2 Pro { - model_url: + model: "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC/resolve/main/", model_id: "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Llama-3-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", @@ -513,10 +513,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model_url: + model: "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC/resolve/main/", model_id: "Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", @@ -524,10 +524,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model_url: + model: "https://huggingface.co/mlc-ai/Hermes-2-Pro-Mistral-7B-q4f16_1-MLC/resolve/main/", model_id: "Hermes-2-Pro-Mistral-7B-q4f16_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Mistral-7B-Instruct-v0.2-q4f16_1-sw4k_cs1k-webgpu.wasm", @@ -537,10 +537,10 @@ export const prebuiltAppConfig: AppConfig = { }, // Gemma-2B { - model_url: + model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC/resolve/main/", model_id: "gemma-2b-it-q4f16_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/gemma-2b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm", @@ -550,10 +550,10 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model_url: + model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC/resolve/main/", model_id: "gemma-2b-it-q4f32_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/gemma-2b-it-q4f32_1-ctx4k_cs1k-webgpu.wasm", @@ -562,10 +562,10 @@ export const prebuiltAppConfig: AppConfig = { buffer_size_required_bytes: 262144000, }, { - model_url: + model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC/resolve/main/", model_id: "gemma-2b-it-q4f16_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/gemma-2b-it-q4f16_1-ctx1k_cs1k-webgpu.wasm", @@ -575,10 +575,10 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model_url: + model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC/resolve/main/", model_id: "gemma-2b-it-q4f32_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/gemma-2b-it-q4f32_1-ctx1k_cs1k-webgpu.wasm", @@ -588,10 +588,10 @@ export const prebuiltAppConfig: AppConfig = { }, // Qwen-1.5-1.8B { - model_url: + model: "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f16_1-MLC/resolve/main/", model_id: "Qwen1.5-1.8B-Chat-q4f16_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Qwen1.5-1.8B-Chat-q4f16_1-ctx4k_cs1k-webgpu.wasm", @@ -599,10 +599,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model_url: + model: "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f32_1-MLC/resolve/main/", model_id: "Qwen1.5-1.8B-Chat-q4f32_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Qwen1.5-1.8B-Chat-q4f32_1-ctx4k_cs1k-webgpu.wasm", @@ -610,10 +610,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model_url: + model: "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f16_1-MLC/resolve/main/", model_id: "Qwen1.5-1.8B-Chat-q4f16_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Qwen1.5-1.8B-Chat-q4f16_1-ctx1k_cs1k-webgpu.wasm", @@ -621,10 +621,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: true, }, { - model_url: + model: "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f32_1-MLC/resolve/main/", model_id: "Qwen1.5-1.8B-Chat-q4f32_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/Qwen1.5-1.8B-Chat-q4f32_1-ctx1k_cs1k-webgpu.wasm", @@ -633,10 +633,10 @@ export const prebuiltAppConfig: AppConfig = { }, // StableLM-zephyr-1.6B { - model_url: + model: "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC/resolve/main/", model_id: "stablelm-2-zephyr-1_6b-q4f16_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/stablelm-2-zephyr-1_6b-q4f16_1-ctx4k_cs1k-webgpu.wasm", @@ -644,10 +644,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model_url: + model: "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC/resolve/main/", model_id: "stablelm-2-zephyr-1_6b-q4f32_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/stablelm-2-zephyr-1_6b-q4f32_1-ctx4k_cs1k-webgpu.wasm", @@ -655,10 +655,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model_url: + model: "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC/resolve/main/", model_id: "stablelm-2-zephyr-1_6b-q4f16_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/stablelm-2-zephyr-1_6b-q4f16_1-ctx1k_cs1k-webgpu.wasm", @@ -666,10 +666,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: true, }, { - model_url: + model: "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC/resolve/main/", model_id: "stablelm-2-zephyr-1_6b-q4f32_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/stablelm-2-zephyr-1_6b-q4f32_1-ctx1k_cs1k-webgpu.wasm", @@ -678,10 +678,10 @@ export const prebuiltAppConfig: AppConfig = { }, // RedPajama { - model_url: + model: "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC/resolve/main/", model_id: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/RedPajama-INCITE-Chat-3B-v1-q4f16_1-ctx2k_cs1k-webgpu.wasm", @@ -690,10 +690,10 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model_url: + model: "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC/resolve/main/", model_id: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/RedPajama-INCITE-Chat-3B-v1-q4f32_1-ctx2k_cs1k-webgpu.wasm", @@ -701,10 +701,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model_url: + model: "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC/resolve/main/", model_id: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/RedPajama-INCITE-Chat-3B-v1-q4f16_1-ctx1k_cs1k-webgpu.wasm", @@ -713,10 +713,10 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model_url: + model: "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC/resolve/main/", model_id: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/RedPajama-INCITE-Chat-3B-v1-q4f32_1-ctx1k_cs1k-webgpu.wasm", @@ -725,10 +725,9 @@ export const prebuiltAppConfig: AppConfig = { }, // Phi-2 { - model_url: - "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC/resolve/main/", model_id: "phi-2-q4f16_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/phi-2-q4f16_1-ctx2k_cs1k-webgpu.wasm", @@ -737,10 +736,9 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model_url: - "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC/resolve/main/", model_id: "phi-2-q4f32_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/phi-2-q4f32_1-ctx2k_cs1k-webgpu.wasm", @@ -748,10 +746,9 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model_url: - "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC/resolve/main/", model_id: "phi-2-q4f16_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/phi-2-q4f16_1-ctx1k_cs1k-webgpu.wasm", @@ -760,10 +757,9 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model_url: - "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC/resolve/main/", model_id: "phi-2-q4f32_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/phi-2-q4f32_1-ctx1k_cs1k-webgpu.wasm", @@ -772,10 +768,9 @@ export const prebuiltAppConfig: AppConfig = { }, // Phi-1.5 { - model_url: - "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC/resolve/main/", model_id: "phi-1_5-q4f16_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/phi-1_5-q4f16_1-ctx2k_cs1k-webgpu.wasm", @@ -784,10 +779,9 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model_url: - "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC/resolve/main/", model_id: "phi-1_5-q4f32_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/phi-1_5-q4f32_1-ctx2k_cs1k-webgpu.wasm", @@ -795,10 +789,9 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: true, }, { - model_url: - "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC/resolve/main/", model_id: "phi-1_5-q4f16_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/phi-1_5-q4f16_1-ctx1k_cs1k-webgpu.wasm", @@ -807,10 +800,9 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model_url: - "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC/resolve/main/", model_id: "phi-1_5-q4f32_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/phi-1_5-q4f32_1-ctx1k_cs1k-webgpu.wasm", @@ -819,10 +811,10 @@ export const prebuiltAppConfig: AppConfig = { }, // TinyLlama { - model_url: + model: "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC/resolve/main/", model_id: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/TinyLlama-1.1B-Chat-v0.4-q4f16_1-ctx2k_cs1k-webgpu.wasm", @@ -831,10 +823,10 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model_url: + model: "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC/resolve/main/", model_id: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/TinyLlama-1.1B-Chat-v0.4-q4f32_1-ctx2k_cs1k-webgpu.wasm", @@ -842,10 +834,10 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: true, }, { - model_url: + model: "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC/resolve/main/", model_id: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/TinyLlama-1.1B-Chat-v0.4-q4f16_1-ctx1k_cs1k-webgpu.wasm", @@ -854,10 +846,10 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model_url: + model: "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC/resolve/main/", model_id: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC-1k", - model_lib_url: + model_lib: modelLibURLPrefix + modelVersion + "/TinyLlama-1.1B-Chat-v0.4-q4f32_1-ctx1k_cs1k-webgpu.wasm", diff --git a/src/engine.ts b/src/engine.ts index 316f91f2..de17c23a 100644 --- a/src/engine.ts +++ b/src/engine.ts @@ -144,7 +144,7 @@ export class MLCEngine implements MLCEngineInterface { typeof document !== "undefined" ? document.URL : globalThis.location.origin; - let modelUrl = modelRecord.model_url; + let modelUrl = modelRecord.model; if (!modelUrl.startsWith("http")) { modelUrl = new URL(modelUrl, baseUrl).href; } @@ -171,12 +171,12 @@ export class MLCEngine implements MLCEngineInterface { wasmCache = new tvmjs.ArtifactCache("webllm/wasm"); } - const wasmUrl = modelRecord.model_lib_url; + const wasmUrl = modelRecord.model_lib; if (wasmUrl === undefined) { throw Error( - 'Missing `model_lib_url` for the model with ID "' + + 'Missing `model_lib` for the model with ID "' + modelRecord.model_id + - '". Please ensure that `model_lib_url` is provided in `model_list` for each model. This URL is essential for downloading the WASM library necessary to run the model.', + '". Please ensure that `model_lib` is provided in `model_list` for each model. This URL is essential for downloading the WASM library necessary to run the model.', ); } const fetchWasmSource = async () => { diff --git a/src/utils.ts b/src/utils.ts index ad6cfcc0..7e2ce99c 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -35,9 +35,9 @@ export function areModelRecordsEqual( ): boolean { // Compare primitive fields if ( - record1.model_url !== record2.model_url || + record1.model !== record2.model || record1.model_id !== record2.model_id || - record1.model_lib_url !== record2.model_lib_url || + record1.model_lib !== record2.model_lib || record1.vram_required_MB !== record2.vram_required_MB || record1.low_resource_required !== record2.low_resource_required || record1.buffer_size_required_bytes !== record2.buffer_size_required_bytes diff --git a/utils/vram_requirements/src/vram_requirements.ts b/utils/vram_requirements/src/vram_requirements.ts index 14988fcc..0f622ef5 100644 --- a/utils/vram_requirements/src/vram_requirements.ts +++ b/utils/vram_requirements/src/vram_requirements.ts @@ -30,7 +30,7 @@ async function main() { const modelRecord: ModelRecord = config.model_list[i]; const model_id = modelRecord.model_id; // 2. Load the wasm - const wasmUrl = modelRecord.model_lib_url; + const wasmUrl = modelRecord.model_lib; const wasmSource = await (await fetch(wasmUrl)).arrayBuffer(); report += `${model_id}: \n`; // 3. Initialize tvmjs instance and virtual machine using the wasm From c5adc8c38b5166700a1f12907a04fcb22479ce35 Mon Sep 17 00:00:00 2001 From: Charlie Ruan <53290280+CharlieFRuan@users.noreply.github.com> Date: Thu, 30 May 2024 00:26:52 -0400 Subject: [PATCH 3/3] Remove resolve/main from model record input --- .../function-calling/src/function_calling.ts | 2 +- examples/get-started/src/get_started.ts | 2 +- src/cache_util.ts | 19 ++- src/config.ts | 125 +++++++----------- src/engine.ts | 3 +- src/support.ts | 17 +++ tests/util.test.ts | 36 ++++- 7 files changed, 114 insertions(+), 90 deletions(-) diff --git a/examples/function-calling/src/function_calling.ts b/examples/function-calling/src/function_calling.ts index a2dd2b84..b2ef1ac5 100644 --- a/examples/function-calling/src/function_calling.ts +++ b/examples/function-calling/src/function_calling.ts @@ -13,7 +13,7 @@ async function main() { model_list: [ { model: - "https://huggingface.co/mlc-ai/gorilla-openfunctions-v2-q4f16_1-MLC/resolve/main/", + "https://huggingface.co/mlc-ai/gorilla-openfunctions-v2-q4f16_1-MLC", model_id: "gorilla-openfunctions-v2-q4f16_1", model_lib: "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/gorilla-openfunctions-v2/gorilla-openfunctions-v2-q4f16_1.wasm", diff --git a/examples/get-started/src/get_started.ts b/examples/get-started/src/get_started.ts index 0f0e6476..4e209efc 100644 --- a/examples/get-started/src/get_started.ts +++ b/examples/get-started/src/get_started.ts @@ -23,7 +23,7 @@ async function main() { // const appConfig: webllm.AppConfig = { // model_list: [ // { - // "model": "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/", + // "model": "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC", // "model_id": "Llama-3-8B-Instruct-q4f32_1-MLC", // "model_lib": webllm.modelLibURLPrefix + webllm.modelVersion + "/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", // }, diff --git a/src/cache_util.ts b/src/cache_util.ts index 8aa2cace..2e60f932 100644 --- a/src/cache_util.ts +++ b/src/cache_util.ts @@ -1,5 +1,6 @@ import * as tvmjs from "tvmjs"; import { AppConfig, ModelRecord, prebuiltAppConfig } from "./config"; +import { cleanModelUrl } from "./support"; function findModelRecord(modelId: string, appConfig?: AppConfig): ModelRecord { const matchedItem = appConfig?.model_list.find( @@ -19,7 +20,7 @@ export async function hasModelInCache( appConfig = prebuiltAppConfig; } const modelRecord = findModelRecord(modelId, appConfig); - const modelUrl = modelRecord.model; + const modelUrl = cleanModelUrl(modelRecord.model); const cacheType = appConfig.useIndexedDBCache ? "indexeddb" : "cache"; return tvmjs.hasNDArrayInCache(modelUrl, "webllm/model", cacheType); } @@ -49,20 +50,17 @@ export async function deleteModelInCache( appConfig = prebuiltAppConfig; } const modelRecord = findModelRecord(modelId, appConfig); + const modelUrl = cleanModelUrl(modelRecord.model); let modelCache: tvmjs.ArtifactCacheTemplate; if (appConfig.useIndexedDBCache) { - tvmjs.deleteNDArrayCache(modelRecord.model, "webllm/model", "indexeddb"); + tvmjs.deleteNDArrayCache(modelUrl, "webllm/model", "indexeddb"); modelCache = new tvmjs.ArtifactIndexedDBCache("webllm/model"); } else { - tvmjs.deleteNDArrayCache(modelRecord.model, "webllm/model", "cache"); + tvmjs.deleteNDArrayCache(modelUrl, "webllm/model", "cache"); modelCache = new tvmjs.ArtifactCache("webllm/model"); } - await modelCache.deleteInCache( - new URL("tokenizer.model", modelRecord.model).href, - ); - await modelCache.deleteInCache( - new URL("tokenizer.json", modelRecord.model).href, - ); + await modelCache.deleteInCache(new URL("tokenizer.model", modelUrl).href); + await modelCache.deleteInCache(new URL("tokenizer.json", modelUrl).href); } export async function deleteChatConfigInCache( @@ -80,7 +78,8 @@ export async function deleteChatConfigInCache( } else { configCache = new tvmjs.ArtifactCache("webllm/config"); } - const configUrl = new URL("mlc-chat-config.json", modelRecord.model).href; + const modelUrl = cleanModelUrl(modelRecord.model); + const configUrl = new URL("mlc-chat-config.json", modelUrl).href; await configCache.deleteInCache(configUrl); } diff --git a/src/config.ts b/src/config.ts index 170a3765..5eb049f9 100644 --- a/src/config.ts +++ b/src/config.ts @@ -293,8 +293,7 @@ export const prebuiltAppConfig: AppConfig = { model_list: [ // Llama-3 { - model: - "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC", model_id: "Llama-3-8B-Instruct-q4f32_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -304,8 +303,7 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: true, }, { - model: - "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC", model_id: "Llama-3-8B-Instruct-q4f16_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -315,8 +313,7 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: true, }, { - model: - "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC", model_id: "Llama-3-8B-Instruct-q4f32_1-MLC", model_lib: modelLibURLPrefix + @@ -326,8 +323,7 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model: - "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC", model_id: "Llama-3-8B-Instruct-q4f16_1-MLC", model_lib: modelLibURLPrefix + @@ -337,8 +333,7 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model: - "https://huggingface.co/mlc-ai/Llama-3-70B-Instruct-q3f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/Llama-3-70B-Instruct-q3f16_1-MLC", model_id: "Llama-3-70B-Instruct-q3f16_1-MLC", model_lib: modelLibURLPrefix + @@ -349,8 +344,7 @@ export const prebuiltAppConfig: AppConfig = { }, // Phi3-mini-instruct { - model: - "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC", model_id: "Phi-3-mini-4k-instruct-q4f16_1-MLC", model_lib: modelLibURLPrefix + @@ -360,8 +354,7 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model: - "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC", model_id: "Phi-3-mini-4k-instruct-q4f32_1-MLC", model_lib: modelLibURLPrefix + @@ -371,8 +364,7 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model: - "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC", model_id: "Phi-3-mini-4k-instruct-q4f16_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -382,8 +374,7 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: true, }, { - model: - "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC", model_id: "Phi-3-mini-4k-instruct-q4f32_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -394,8 +385,7 @@ export const prebuiltAppConfig: AppConfig = { }, // Llama-2 { - model: - "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC", model_id: "Llama-2-7b-chat-hf-q4f32_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -405,8 +395,7 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model: - "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC", model_id: "Llama-2-7b-chat-hf-q4f16_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -417,8 +406,7 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model: - "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC", model_id: "Llama-2-7b-chat-hf-q4f32_1-MLC", model_lib: modelLibURLPrefix + @@ -428,8 +416,7 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model: - "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC", model_id: "Llama-2-7b-chat-hf-q4f16_1-MLC", model_lib: modelLibURLPrefix + @@ -440,8 +427,7 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model: - "https://huggingface.co/mlc-ai/Llama-2-13b-chat-hf-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/Llama-2-13b-chat-hf-q4f16_1-MLC", model_id: "Llama-2-13b-chat-hf-q4f16_1", model_lib: modelLibURLPrefix + @@ -453,8 +439,7 @@ export const prebuiltAppConfig: AppConfig = { }, // Mistral variants { - model: - "https://huggingface.co/mlc-ai/WizardMath-7B-V1.1-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/WizardMath-7B-V1.1-q4f16_1-MLC", model_id: "WizardMath-7B-V1.1-q4f16_1-MLC", model_lib: modelLibURLPrefix + @@ -466,7 +451,7 @@ export const prebuiltAppConfig: AppConfig = { }, { model: - "https://huggingface.co/mlc-ai/Mistral-7B-Instruct-v0.2-q4f16_1-MLC/resolve/main/", + "https://huggingface.co/mlc-ai/Mistral-7B-Instruct-v0.2-q4f16_1-MLC", model_id: "Mistral-7B-Instruct-v0.2-q4f16_1-MLC", model_lib: modelLibURLPrefix + @@ -478,7 +463,7 @@ export const prebuiltAppConfig: AppConfig = { }, { model: - "https://huggingface.co/mlc-ai/OpenHermes-2.5-Mistral-7B-q4f16_1-MLC/resolve/main/", + "https://huggingface.co/mlc-ai/OpenHermes-2.5-Mistral-7B-q4f16_1-MLC", model_id: "OpenHermes-2.5-Mistral-7B-q4f16_1-MLC", model_lib: modelLibURLPrefix + @@ -490,7 +475,7 @@ export const prebuiltAppConfig: AppConfig = { }, { model: - "https://huggingface.co/mlc-ai/NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC/resolve/main/", + "https://huggingface.co/mlc-ai/NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC", model_id: "NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC", model_lib: modelLibURLPrefix + @@ -503,7 +488,7 @@ export const prebuiltAppConfig: AppConfig = { // Hermes-2 Pro { model: - "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC/resolve/main/", + "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC", model_id: "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC", model_lib: modelLibURLPrefix + @@ -514,7 +499,7 @@ export const prebuiltAppConfig: AppConfig = { }, { model: - "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC/resolve/main/", + "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC", model_id: "Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC", model_lib: modelLibURLPrefix + @@ -525,7 +510,7 @@ export const prebuiltAppConfig: AppConfig = { }, { model: - "https://huggingface.co/mlc-ai/Hermes-2-Pro-Mistral-7B-q4f16_1-MLC/resolve/main/", + "https://huggingface.co/mlc-ai/Hermes-2-Pro-Mistral-7B-q4f16_1-MLC", model_id: "Hermes-2-Pro-Mistral-7B-q4f16_1-MLC", model_lib: modelLibURLPrefix + @@ -537,8 +522,7 @@ export const prebuiltAppConfig: AppConfig = { }, // Gemma-2B { - model: - "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC", model_id: "gemma-2b-it-q4f16_1-MLC", model_lib: modelLibURLPrefix + @@ -550,8 +534,7 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model: - "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC", model_id: "gemma-2b-it-q4f32_1-MLC", model_lib: modelLibURLPrefix + @@ -562,8 +545,7 @@ export const prebuiltAppConfig: AppConfig = { buffer_size_required_bytes: 262144000, }, { - model: - "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC", model_id: "gemma-2b-it-q4f16_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -575,8 +557,7 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model: - "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC", model_id: "gemma-2b-it-q4f32_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -588,8 +569,7 @@ export const prebuiltAppConfig: AppConfig = { }, // Qwen-1.5-1.8B { - model: - "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f16_1-MLC", model_id: "Qwen1.5-1.8B-Chat-q4f16_1-MLC", model_lib: modelLibURLPrefix + @@ -599,8 +579,7 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model: - "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f32_1-MLC", model_id: "Qwen1.5-1.8B-Chat-q4f32_1-MLC", model_lib: modelLibURLPrefix + @@ -610,8 +589,7 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model: - "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f16_1-MLC", model_id: "Qwen1.5-1.8B-Chat-q4f16_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -621,8 +599,7 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: true, }, { - model: - "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f32_1-MLC", model_id: "Qwen1.5-1.8B-Chat-q4f32_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -633,8 +610,7 @@ export const prebuiltAppConfig: AppConfig = { }, // StableLM-zephyr-1.6B { - model: - "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC", model_id: "stablelm-2-zephyr-1_6b-q4f16_1-MLC", model_lib: modelLibURLPrefix + @@ -644,8 +620,7 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model: - "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC", model_id: "stablelm-2-zephyr-1_6b-q4f32_1-MLC", model_lib: modelLibURLPrefix + @@ -655,8 +630,7 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model: - "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC", model_id: "stablelm-2-zephyr-1_6b-q4f16_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -666,8 +640,7 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: true, }, { - model: - "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC", model_id: "stablelm-2-zephyr-1_6b-q4f32_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -679,7 +652,7 @@ export const prebuiltAppConfig: AppConfig = { // RedPajama { model: - "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC/resolve/main/", + "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC", model_id: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC", model_lib: modelLibURLPrefix + @@ -691,7 +664,7 @@ export const prebuiltAppConfig: AppConfig = { }, { model: - "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC/resolve/main/", + "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC", model_id: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC", model_lib: modelLibURLPrefix + @@ -702,7 +675,7 @@ export const prebuiltAppConfig: AppConfig = { }, { model: - "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC/resolve/main/", + "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC", model_id: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -714,7 +687,7 @@ export const prebuiltAppConfig: AppConfig = { }, { model: - "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC/resolve/main/", + "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC", model_id: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -725,7 +698,7 @@ export const prebuiltAppConfig: AppConfig = { }, // Phi-2 { - model: "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC", model_id: "phi-2-q4f16_1-MLC", model_lib: modelLibURLPrefix + @@ -736,7 +709,7 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model: "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC", model_id: "phi-2-q4f32_1-MLC", model_lib: modelLibURLPrefix + @@ -746,7 +719,7 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: false, }, { - model: "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC", model_id: "phi-2-q4f16_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -757,7 +730,7 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model: "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC", model_id: "phi-2-q4f32_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -768,7 +741,7 @@ export const prebuiltAppConfig: AppConfig = { }, // Phi-1.5 { - model: "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC", model_id: "phi-1_5-q4f16_1-MLC", model_lib: modelLibURLPrefix + @@ -779,7 +752,7 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model: "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC", model_id: "phi-1_5-q4f32_1-MLC", model_lib: modelLibURLPrefix + @@ -789,7 +762,7 @@ export const prebuiltAppConfig: AppConfig = { low_resource_required: true, }, { - model: "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC", model_id: "phi-1_5-q4f16_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -800,7 +773,7 @@ export const prebuiltAppConfig: AppConfig = { required_features: ["shader-f16"], }, { - model: "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC/resolve/main/", + model: "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC", model_id: "phi-1_5-q4f32_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -812,7 +785,7 @@ export const prebuiltAppConfig: AppConfig = { // TinyLlama { model: - "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC/resolve/main/", + "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC", model_id: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC", model_lib: modelLibURLPrefix + @@ -824,7 +797,7 @@ export const prebuiltAppConfig: AppConfig = { }, { model: - "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC/resolve/main/", + "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC", model_id: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC", model_lib: modelLibURLPrefix + @@ -835,7 +808,7 @@ export const prebuiltAppConfig: AppConfig = { }, { model: - "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC/resolve/main/", + "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC", model_id: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k", model_lib: modelLibURLPrefix + @@ -847,7 +820,7 @@ export const prebuiltAppConfig: AppConfig = { }, { model: - "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC/resolve/main/", + "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC", model_id: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC-1k", model_lib: modelLibURLPrefix + diff --git a/src/engine.ts b/src/engine.ts index de17c23a..66785ecc 100644 --- a/src/engine.ts +++ b/src/engine.ts @@ -39,6 +39,7 @@ import { compareConversationObject, getConversation, } from "./conversation"; +import { cleanModelUrl } from "./support"; const ERROR_WEBGPU_NOT_AVAILABLE = new Error( "WebGPU is not supported in your current environment, but it is necessary to run the WebLLM engine. " + @@ -144,7 +145,7 @@ export class MLCEngine implements MLCEngineInterface { typeof document !== "undefined" ? document.URL : globalThis.location.origin; - let modelUrl = modelRecord.model; + let modelUrl = cleanModelUrl(modelRecord.model); if (!modelUrl.startsWith("http")) { modelUrl = new URL(modelUrl, baseUrl).href; } diff --git a/src/support.ts b/src/support.ts index 8502283a..6e2961df 100644 --- a/src/support.ts +++ b/src/support.ts @@ -63,3 +63,20 @@ export function getTokenTableFromTokenizer(tokenizer: Tokenizer): string[] { } return tokenTable; } + +/** + * Postprocess the suffix of ModelRecord.model to be "/resolve/main/". + * e.g. https://huggingface.co/mlc-ai/OpenHermes-2.5-Mistral-7B-q4f16_1-MLC/resolve/main/ + * @return the href of the final URL. + */ +export function cleanModelUrl(modelUrl: string): string { + if (modelUrl.endsWith("resolve/main") || modelUrl.endsWith("resolve/main/")) { + throw Error( + "Expect ModelRecord.model to not include `resolve/main` suffix.", + ); + } + // https://huggingface.co/USER/MODEL -> https://huggingface.co/USER/MODEL/ + modelUrl += modelUrl.endsWith("/") ? "" : "/"; + // https://huggingface.co/USER/MODEL/ -> https://huggingface.co/USER/MODEL/resolve/main/ + return new URL("resolve/main/", modelUrl).href; +} diff --git a/tests/util.test.ts b/tests/util.test.ts index fe91d86e..69893ff3 100644 --- a/tests/util.test.ts +++ b/tests/util.test.ts @@ -1,4 +1,4 @@ -import { getTopProbs } from "../src/support"; +import { cleanModelUrl, getTopProbs } from "../src/support"; describe("Check getTopLogprobs correctness", () => { test("Correctness test 1", () => { @@ -26,3 +26,37 @@ describe("Check getTopLogprobs correctness", () => { expect(topLogProbs).toEqual([]); }); }); + +describe("Test clean model URL", () => { + test("Already have resolve/main, throw error", () => { + expect(() => { + const input = "https://huggingface.co/mlc-ai/model/resolve/main"; + cleanModelUrl(input); + }).toThrow( + "Expect ModelRecord.model to not include `resolve/main` suffix.", + ); + }); + + test("Already have resolve/main/, throw error", () => { + expect(() => { + const input = "https://huggingface.co/mlc-ai/model/resolve/main/"; + cleanModelUrl(input); + }).toThrow( + "Expect ModelRecord.model to not include `resolve/main` suffix.", + ); + }); + + test("Input does not have /", () => { + const input = "https://huggingface.co/mlc-ai/model"; + const output = cleanModelUrl(input); + const expected = "https://huggingface.co/mlc-ai/model/resolve/main/"; + expect(output).toEqual(expected); + }); + + test("Input has /", () => { + const input = "https://huggingface.co/mlc-ai/model/"; + const output = cleanModelUrl(input); + const expected = "https://huggingface.co/mlc-ai/model/resolve/main/"; + expect(output).toEqual(expected); + }); +});