From 5cc81b98f79174365984316dc0379400e5badfbb Mon Sep 17 00:00:00 2001
From: Charlie Ruan <53290280+CharlieFRuan@users.noreply.github.com>
Date: Wed, 29 May 2024 23:20:09 -0400
Subject: [PATCH 1/3] Update model ids to match HF repo name

Co-authored-by: Nestor Qin <imba.qxy@gmail.com>
---
 README.md                                     |   4 +-
 examples/cache-usage/src/cache_usage.ts       |  15 +-
 .../src/popup.ts                              |   6 +-
 examples/chrome-extension/src/popup.ts        | 217 +++++++++-------
 examples/get-started-web-worker/src/main.ts   |  60 +++--
 examples/get-started/src/get_started.ts       |  10 +-
 examples/json-mode/src/json_mode.ts           |   2 +-
 examples/json-schema/src/json_schema.ts       |   4 +-
 .../logit-processor/src/logit_processor.ts    |  45 ++--
 examples/logit-processor/src/worker.ts        |   2 +-
 .../multi-round-chat/src/multi_round_chat.ts  |  31 ++-
 .../next-simple-chat/src/utils/chat_ui.ts     | 235 +++++++++--------
 examples/seed-to-reproduce/src/seed.ts        |  87 ++++---
 examples/service-worker/src/main.ts           |   6 +-
 examples/streaming/src/streaming.ts           |  13 +-
 src/config.ts                                 |  96 +++----
 src/engine.ts                                 |  12 +-
 tests/openai_chat_completion.test.ts          | 241 +++++++++---------
 18 files changed, 570 insertions(+), 516 deletions(-)

diff --git a/README.md b/README.md
index 61755b22..00e9111e 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,7 @@ async function main() {
     const label = document.getElementById("init-label");
     label.innerText = report.text;
   };
-  const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
+  const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
     selectedModel,
     /*engineConfig=*/ { initProgressCallback: initProgressCallback },
@@ -96,7 +96,7 @@ async function main() {
   const initProgressCallback = (report) => {
     console.log(report.text);
   };
-  const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-1k";
+  const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k";
   const engine = await webllm.CreateMLCEngine(selectedModel, {
     initProgressCallback: initProgressCallback,
   });
diff --git a/examples/cache-usage/src/cache_usage.ts b/examples/cache-usage/src/cache_usage.ts
index e9dc7af6..bbf9730e 100644
--- a/examples/cache-usage/src/cache_usage.ts
+++ b/examples/cache-usage/src/cache_usage.ts
@@ -24,16 +24,19 @@ async function main() {
   }
 
   // 1. This triggers downloading and caching the model with either Cache or IndexedDB Cache
-  const selectedModel = "Phi2-q4f16_1"
+  const selectedModel = "phi-2-q4f16_1-MLC";
   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
-    "Phi2-q4f16_1",
-    { initProgressCallback: initProgressCallback, appConfig: appConfig }
+    selectedModel,
+    { initProgressCallback: initProgressCallback, appConfig: appConfig },
   );
 
   const request: webllm.ChatCompletionRequest = {
     stream: false,
     messages: [
-      { "role": "user", "content": "Write an analogy between mathematics and a lighthouse." },
+      {
+        role: "user",
+        content: "Write an analogy between mathematics and a lighthouse.",
+      },
     ],
     n: 1,
   };
@@ -60,7 +63,9 @@ async function main() {
   modelCached = await webllm.hasModelInCache(selectedModel, appConfig);
   console.log("After deletion, hasModelInCache: ", modelCached);
   if (modelCached) {
-    throw Error("Expect hasModelInCache() to be false, but got: " + modelCached);
+    throw Error(
+      "Expect hasModelInCache() to be false, but got: " + modelCached,
+    );
   }
 
   // 5. If we reload, we should expect the model to start downloading again
diff --git a/examples/chrome-extension-webgpu-service-worker/src/popup.ts b/examples/chrome-extension-webgpu-service-worker/src/popup.ts
index 630486ce..e8aae139 100644
--- a/examples/chrome-extension-webgpu-service-worker/src/popup.ts
+++ b/examples/chrome-extension-webgpu-service-worker/src/popup.ts
@@ -47,8 +47,8 @@ const initProgressCallback = (report: InitProgressReport) => {
 };
 
 const engine: MLCEngineInterface = await CreateExtensionServiceWorkerMLCEngine(
-  "Mistral-7B-Instruct-v0.2-q4f16_1",
-  { initProgressCallback: initProgressCallback }
+  "Mistral-7B-Instruct-v0.2-q4f16_1-MLC",
+  { initProgressCallback: initProgressCallback },
 );
 const chatHistory: ChatCompletionMessageParam[] = [];
 
@@ -150,7 +150,7 @@ function updateAnswer(answer: string) {
 function fetchPageContents() {
   chrome.tabs.query({ currentWindow: true, active: true }, function (tabs) {
     if (tabs[0]?.id) {
-      var port = chrome.tabs.connect(tabs[0].id, { name: "channelName" });
+      const port = chrome.tabs.connect(tabs[0].id, { name: "channelName" });
       port.postMessage({});
       port.onMessage.addListener(function (msg) {
         console.log("Page contents:", msg.contents);
diff --git a/examples/chrome-extension/src/popup.ts b/examples/chrome-extension/src/popup.ts
index ce17b70e..0f8c99d6 100644
--- a/examples/chrome-extension/src/popup.ts
+++ b/examples/chrome-extension/src/popup.ts
@@ -1,12 +1,17 @@
 /* eslint-disable @typescript-eslint/no-non-null-assertion */
-'use strict';
+"use strict";
 
 // This code is partially adapted from the openai-chatgpt-chrome-extension repo:
 // https://github.com/jessedi0n/openai-chatgpt-chrome-extension
 
-import './popup.css';
+import "./popup.css";
 
-import { MLCEngineInterface, InitProgressReport, CreateMLCEngine, ChatCompletionMessageParam } from "@mlc-ai/web-llm";
+import {
+  MLCEngineInterface,
+  InitProgressReport,
+  CreateMLCEngine,
+  ChatCompletionMessageParam,
+} from "@mlc-ai/web-llm";
 import { ProgressBar, Line } from "progressbar.js";
 
 const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms));
@@ -21,135 +26,149 @@ fetchPageContents();
 
 (<HTMLButtonElement>submitButton).disabled = true;
 
-const progressBar: ProgressBar = new Line('#loadingContainer', {
-    strokeWidth: 4,
-    easing: 'easeInOut',
-    duration: 1400,
-    color: '#ffd166',
-    trailColor: '#eee',
-    trailWidth: 1,
-    svgStyle: { width: '100%', height: '100%' }
+const progressBar: ProgressBar = new Line("#loadingContainer", {
+  strokeWidth: 4,
+  easing: "easeInOut",
+  duration: 1400,
+  color: "#ffd166",
+  trailColor: "#eee",
+  trailWidth: 1,
+  svgStyle: { width: "100%", height: "100%" },
 });
 
 const initProgressCallback = (report: InitProgressReport) => {
-    console.log(report.text, report.progress);
-    progressBar.animate(report.progress, {
-        duration: 50
-    });
-    if (report.progress == 1.0) {
-        enableInputs();
-    }
+  console.log(report.text, report.progress);
+  progressBar.animate(report.progress, {
+    duration: 50,
+  });
+  if (report.progress == 1.0) {
+    enableInputs();
+  }
 };
 
-// const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-1k";
-const selectedModel = "Mistral-7B-Instruct-v0.2-q4f16_1";
-const engine: MLCEngineInterface = await CreateMLCEngine(
-    selectedModel,
-    { initProgressCallback: initProgressCallback }
-);
+// const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k";
+const selectedModel = "Mistral-7B-Instruct-v0.2-q4f16_1-MLC";
+const engine: MLCEngineInterface = await CreateMLCEngine(selectedModel, {
+  initProgressCallback: initProgressCallback,
+});
 const chatHistory: ChatCompletionMessageParam[] = [];
 
 isLoadingParams = true;
 
 function enableInputs() {
-    if (isLoadingParams) {
-        sleep(500);
-        (<HTMLButtonElement>submitButton).disabled = false;
-        const loadingBarContainer = document.getElementById("loadingContainer")!;
-        loadingBarContainer.remove();
-        queryInput.focus();
-        isLoadingParams = false;
-    }
+  if (isLoadingParams) {
+    sleep(500);
+    (<HTMLButtonElement>submitButton).disabled = false;
+    const loadingBarContainer = document.getElementById("loadingContainer")!;
+    loadingBarContainer.remove();
+    queryInput.focus();
+    isLoadingParams = false;
+  }
 }
 
 // Disable submit button if input field is empty
 queryInput.addEventListener("keyup", () => {
-    if ((<HTMLInputElement>queryInput).value === "") {
-        (<HTMLButtonElement>submitButton).disabled = true;
-    } else {
-        (<HTMLButtonElement>submitButton).disabled = false;
-    }
+  if ((<HTMLInputElement>queryInput).value === "") {
+    (<HTMLButtonElement>submitButton).disabled = true;
+  } else {
+    (<HTMLButtonElement>submitButton).disabled = false;
+  }
 });
 
 // If user presses enter, click submit button
 queryInput.addEventListener("keyup", (event) => {
-    if (event.code === "Enter") {
-        event.preventDefault();
-        submitButton.click();
-    }
+  if (event.code === "Enter") {
+    event.preventDefault();
+    submitButton.click();
+  }
 });
 
 // Listen for clicks on submit button
 async function handleClick() {
-    // Get the message from the input field
-    const message = (<HTMLInputElement>queryInput).value;
-    console.log("message", message);
-    // Clear the answer
-    document.getElementById("answer")!.innerHTML = "";
-    // Hide the answer
-    document.getElementById("answerWrapper")!.style.display = "none";
-    // Show the loading indicator
-    document.getElementById("loading-indicator")!.style.display = "block";
-
-    // Generate response
-    let inp = message;
-    if (context.length > 0) {
-        inp = "Use only the following context when answering the question at the end. Don't use any other knowledge.\n" + context + "\n\nQuestion: " + message + "\n\nHelpful Answer: ";
-    }
-    console.log("Input:", inp);
-    chatHistory.push({ "role": "user", "content": inp });
-
-    let curMessage = "";
-    const completion = await engine.chat.completions.create({ stream: true, messages: chatHistory });
-    for await (const chunk of completion) {
-        const curDelta = chunk.choices[0].delta.content;
-        if (curDelta) {
-            curMessage += curDelta;
-        }
-        updateAnswer(curMessage);
+  // Get the message from the input field
+  const message = (<HTMLInputElement>queryInput).value;
+  console.log("message", message);
+  // Clear the answer
+  document.getElementById("answer")!.innerHTML = "";
+  // Hide the answer
+  document.getElementById("answerWrapper")!.style.display = "none";
+  // Show the loading indicator
+  document.getElementById("loading-indicator")!.style.display = "block";
+
+  // Generate response
+  let inp = message;
+  if (context.length > 0) {
+    inp =
+      "Use only the following context when answering the question at the end. Don't use any other knowledge.\n" +
+      context +
+      "\n\nQuestion: " +
+      message +
+      "\n\nHelpful Answer: ";
+  }
+  console.log("Input:", inp);
+  chatHistory.push({ role: "user", content: inp });
+
+  let curMessage = "";
+  const completion = await engine.chat.completions.create({
+    stream: true,
+    messages: chatHistory,
+  });
+  for await (const chunk of completion) {
+    const curDelta = chunk.choices[0].delta.content;
+    if (curDelta) {
+      curMessage += curDelta;
     }
-    const response = await engine.getMessage();
-    chatHistory.push({ "role": "assistant", "content": await engine.getMessage() });
-    console.log("response", response);
+    updateAnswer(curMessage);
+  }
+  const response = await engine.getMessage();
+  chatHistory.push({ role: "assistant", content: await engine.getMessage() });
+  console.log("response", response);
 }
 submitButton.addEventListener("click", handleClick);
 
 // Listen for messages from the background script
 chrome.runtime.onMessage.addListener(({ answer, error }) => {
-    if (answer) {
-        updateAnswer(answer);
-    }
+  if (answer) {
+    updateAnswer(answer);
+  }
 });
 
 function updateAnswer(answer: string) {
-    // Show answer
-    document.getElementById("answerWrapper")!.style.display = "block";
-    const answerWithBreaks = answer.replace(/\n/g, '<br>');
-    document.getElementById("answer")!.innerHTML = answerWithBreaks;
-    // Add event listener to copy button
-    document.getElementById("copyAnswer")!.addEventListener("click", () => {
-        // Get the answer text
-        const answerText = answer;
-        // Copy the answer text to the clipboard
-        navigator.clipboard.writeText(answerText)
-            .then(() => console.log("Answer text copied to clipboard"))
-            .catch((err) => console.error("Could not copy text: ", err));
-    });
-    const options: Intl.DateTimeFormatOptions = { month: 'short', day: '2-digit', hour: '2-digit', minute: '2-digit', second: '2-digit' };
-    const time = new Date().toLocaleString('en-US', options);
-    // Update timestamp
-    document.getElementById("timestamp")!.innerText = time;
-    // Hide loading indicator
-    document.getElementById("loading-indicator")!.style.display = "none";
+  // Show answer
+  document.getElementById("answerWrapper")!.style.display = "block";
+  const answerWithBreaks = answer.replace(/\n/g, "<br>");
+  document.getElementById("answer")!.innerHTML = answerWithBreaks;
+  // Add event listener to copy button
+  document.getElementById("copyAnswer")!.addEventListener("click", () => {
+    // Get the answer text
+    const answerText = answer;
+    // Copy the answer text to the clipboard
+    navigator.clipboard
+      .writeText(answerText)
+      .then(() => console.log("Answer text copied to clipboard"))
+      .catch((err) => console.error("Could not copy text: ", err));
+  });
+  const options: Intl.DateTimeFormatOptions = {
+    month: "short",
+    day: "2-digit",
+    hour: "2-digit",
+    minute: "2-digit",
+    second: "2-digit",
+  };
+  const time = new Date().toLocaleString("en-US", options);
+  // Update timestamp
+  document.getElementById("timestamp")!.innerText = time;
+  // Hide loading indicator
+  document.getElementById("loading-indicator")!.style.display = "none";
 }
 
 function fetchPageContents() {
-    chrome.tabs.query({ currentWindow: true, active: true }, function (tabs) {
-        var port = chrome.tabs.connect(tabs[0].id, { name: "channelName" });
-        port.postMessage({});
-        port.onMessage.addListener(function (msg) {
-            console.log("Page contents:", msg.contents);
-            context = msg.contents
-        });
+  chrome.tabs.query({ currentWindow: true, active: true }, function (tabs) {
+    const port = chrome.tabs.connect(tabs[0].id, { name: "channelName" });
+    port.postMessage({});
+    port.onMessage.addListener(function (msg) {
+      console.log("Page contents:", msg.contents);
+      context = msg.contents;
     });
+  });
 }
diff --git a/examples/get-started-web-worker/src/main.ts b/examples/get-started-web-worker/src/main.ts
index 0c89d7a8..ebff6c2b 100644
--- a/examples/get-started-web-worker/src/main.ts
+++ b/examples/get-started-web-worker/src/main.ts
@@ -17,27 +17,26 @@ async function mainNonStreaming() {
   const initProgressCallback = (report: webllm.InitProgressReport) => {
     setLabel("init-label", report.text);
   };
-  const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
+  const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
 
-  const engine: webllm.MLCEngineInterface = await webllm.CreateWebWorkerMLCEngine(
-    new Worker(
-      new URL('./worker.ts', import.meta.url),
-      { type: 'module' }
-    ),
-    selectedModel,
-    { initProgressCallback: initProgressCallback }
-  );
+  const engine: webllm.MLCEngineInterface =
+    await webllm.CreateWebWorkerMLCEngine(
+      new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
+      selectedModel,
+      { initProgressCallback: initProgressCallback },
+    );
 
   const request: webllm.ChatCompletionRequest = {
     messages: [
       {
-        "role": "system",
-        "content": "You are a helpful, respectful and honest assistant. " +
-          "Be as happy as you can when speaking please. "
+        role: "system",
+        content:
+          "You are a helpful, respectful and honest assistant. " +
+          "Be as happy as you can when speaking please. ",
       },
-      { "role": "user", "content": "Provide me three US states." },
-      { "role": "assistant", "content": "California, New York, Pennsylvania." },
-      { "role": "user", "content": "Two more please!" },
+      { role: "user", content: "Provide me three US states." },
+      { role: "assistant", content: "California, New York, Pennsylvania." },
+      { role: "user", content: "Two more please!" },
     ],
     n: 3,
     temperature: 1.5,
@@ -57,28 +56,27 @@ async function mainStreaming() {
   const initProgressCallback = (report: webllm.InitProgressReport) => {
     setLabel("init-label", report.text);
   };
-  const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
+  const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
 
-  const engine: webllm.MLCEngineInterface = await webllm.CreateWebWorkerMLCEngine(
-    new Worker(
-      new URL('./worker.ts', import.meta.url),
-      { type: 'module' }
-    ),
-    selectedModel,
-    { initProgressCallback: initProgressCallback }
-  );
+  const engine: webllm.MLCEngineInterface =
+    await webllm.CreateWebWorkerMLCEngine(
+      new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
+      selectedModel,
+      { initProgressCallback: initProgressCallback },
+    );
 
   const request: webllm.ChatCompletionRequest = {
     stream: true,
     messages: [
       {
-        "role": "system",
-        "content": "You are a helpful, respectful and honest assistant. " +
-          "Be as happy as you can when speaking please. "
+        role: "system",
+        content:
+          "You are a helpful, respectful and honest assistant. " +
+          "Be as happy as you can when speaking please. ",
       },
-      { "role": "user", "content": "Provide me three US states." },
-      { "role": "assistant", "content": "California, New York, Pennsylvania." },
-      { "role": "user", "content": "Two more please!" },
+      { role: "user", content: "Provide me three US states." },
+      { role: "assistant", content: "California, New York, Pennsylvania." },
+      { role: "user", content: "Two more please!" },
     ],
     temperature: 1.5,
     max_gen_len: 256,
@@ -95,7 +93,7 @@ async function mainStreaming() {
     setLabel("generate-label", message);
     // engine.interruptGenerate();  // works with interrupt as well
   }
-  console.log("Final message:\n", await engine.getMessage());  // the concatenated message
+  console.log("Final message:\n", await engine.getMessage()); // the concatenated message
   console.log(await engine.runtimeStatsText());
 }
 
diff --git a/examples/get-started/src/get_started.ts b/examples/get-started/src/get_started.ts
index 9b39ef68..e083a57c 100644
--- a/examples/get-started/src/get_started.ts
+++ b/examples/get-started/src/get_started.ts
@@ -13,10 +13,10 @@ async function main() {
     setLabel("init-label", report.text);
   };
   // Option 1: If we do not specify appConfig, we use `prebuiltAppConfig` defined in `config.ts`
-  const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
+  const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
     selectedModel,
-    { initProgressCallback: initProgressCallback }
+    { initProgressCallback: initProgressCallback },
   );
 
   // Option 2: Specify your own model other than the prebuilt ones
@@ -24,7 +24,7 @@ async function main() {
   //   model_list: [
   //     {
   //       "model_url": "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/",
-  //       "model_id": "Llama-3-8B-Instruct-q4f32_1",
+  //       "model_id": "Llama-3-8B-Instruct-q4f32_1-MLC",
   //       "model_lib_url": webllm.modelLibURLPrefix + webllm.modelVersion + "/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
   //     },
   //   ]
@@ -35,9 +35,7 @@ async function main() {
   // );
 
   const reply0 = await engine.chat.completions.create({
-    messages: [
-      { "role": "user", "content": "List three US states." },
-    ],
+    messages: [{ role: "user", content: "List three US states." }],
     // below configurations are all optional
     n: 3,
     temperature: 1.5,
diff --git a/examples/json-mode/src/json_mode.ts b/examples/json-mode/src/json_mode.ts
index 9ad834d2..f85e8509 100644
--- a/examples/json-mode/src/json_mode.ts
+++ b/examples/json-mode/src/json_mode.ts
@@ -12,7 +12,7 @@ async function main() {
   const initProgressCallback = (report: webllm.InitProgressReport) => {
     setLabel("init-label", report.text);
   };
-  const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
+  const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
     selectedModel,
     { initProgressCallback: initProgressCallback },
diff --git a/examples/json-schema/src/json_schema.ts b/examples/json-schema/src/json_schema.ts
index 8c55ab7f..321e80d6 100644
--- a/examples/json-schema/src/json_schema.ts
+++ b/examples/json-schema/src/json_schema.ts
@@ -38,7 +38,7 @@ async function simpleStructuredTextExample() {
     setLabel("init-label", report.text);
   };
   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
-    "Llama-3-8B-Instruct-q4f16_1",
+    "Llama-3-8B-Instruct-q4f16_1-MLC",
     { initProgressCallback: initProgressCallback },
   );
 
@@ -105,7 +105,7 @@ async function harryPotterExample() {
   };
 
   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
-    "Llama-3-8B-Instruct-q4f16_1",
+    "Llama-3-8B-Instruct-q4f16_1-MLC",
     { initProgressCallback: initProgressCallback },
   );
 
diff --git a/examples/logit-processor/src/logit_processor.ts b/examples/logit-processor/src/logit_processor.ts
index 7941e24f..b43aa4cb 100644
--- a/examples/logit-processor/src/logit_processor.ts
+++ b/examples/logit-processor/src/logit_processor.ts
@@ -1,8 +1,8 @@
 import * as webllm from "@mlc-ai/web-llm";
 import { MyLogitProcessor } from "./my_logit_processor";
 
-const USE_WEB_WORKER = true;  // Toggle this to use Logit Processor without a web worker
-const AUTOREGRESS_LIMIT = 32;  // How many tokens to generate for this test
+const USE_WEB_WORKER = true; // Toggle this to use Logit Processor without a web worker
+const AUTOREGRESS_LIMIT = 32; // How many tokens to generate for this test
 
 function setLabel(id: string, text: string) {
   const label = document.getElementById(id);
@@ -15,11 +15,11 @@ function setLabel(id: string, text: string) {
 async function main() {
   const initProgressCallback = (report: webllm.InitProgressReport) => {
     setLabel("init-label", report.text);
-  }
+  };
   // Instantiate myLogitProcessor, registering in the logitProcessorRegistry
   const myLogitProcessor = new MyLogitProcessor();
   const logitProcessorRegistry = new Map<string, webllm.LogitProcessor>();
-  logitProcessorRegistry.set("Phi2-q4f32_1", myLogitProcessor);
+  logitProcessorRegistry.set("phi-2-q4f32_1-MLC", myLogitProcessor);
 
   let engine: webllm.MLCEngineInterface;
 
@@ -27,43 +27,46 @@ async function main() {
   if (USE_WEB_WORKER) {
     // see worker.ts on how LogitProcessor plays a role there
     engine = await webllm.CreateWebWorkerMLCEngine(
-      new Worker(
-        new URL('./worker.ts', import.meta.url),
-        { type: 'module' }
-      ),
-      "Phi2-q4f32_1",
-      { initProgressCallback: initProgressCallback }
+      new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
+      "phi-2-q4f32_1-MLC",
+      { initProgressCallback: initProgressCallback },
     );
   } else {
-    engine = await webllm.CreateMLCEngine(
-      "Phi2-q4f32_1",
-      {
-        initProgressCallback: initProgressCallback,
-        logitProcessorRegistry: logitProcessorRegistry,
-      }
-    );
+    engine = await webllm.CreateMLCEngine("phi-2-q4f32_1-MLC", {
+      initProgressCallback: initProgressCallback,
+      logitProcessorRegistry: logitProcessorRegistry,
+    });
   }
 
   // Below we demonstrate the usage of a low-level API `forwardTokensAndSample()`
   const prompt: Array<number> = [42];
-  let nextToken = await engine.forwardTokensAndSample(prompt, /*isPrefill=*/true);
+  let nextToken = await engine.forwardTokensAndSample(
+    prompt,
+    /*isPrefill=*/ true,
+  );
   console.log(nextToken);
 
   let counter = prompt.length;
   while (counter < AUTOREGRESS_LIMIT) {
     counter += 1;
-    nextToken = await engine.forwardTokensAndSample([nextToken], /*isPrefill=*/false);
+    nextToken = await engine.forwardTokensAndSample(
+      [nextToken],
+      /*isPrefill=*/ false,
+    );
     console.log(nextToken);
   }
 
   // By calling `engine.resetChat()`, we triggers MyLogitProcessor.resetState()
   engine.resetChat();
   counter = prompt.length;
-  nextToken = await engine.forwardTokensAndSample(prompt, /*isPrefill=*/true);
+  nextToken = await engine.forwardTokensAndSample(prompt, /*isPrefill=*/ true);
   console.log(nextToken);
   while (counter < AUTOREGRESS_LIMIT) {
     counter += 1;
-    nextToken = await engine.forwardTokensAndSample([nextToken], /*isPrefill=*/false);
+    nextToken = await engine.forwardTokensAndSample(
+      [nextToken],
+      /*isPrefill=*/ false,
+    );
     console.log(nextToken);
   }
 
diff --git a/examples/logit-processor/src/worker.ts b/examples/logit-processor/src/worker.ts
index ec7f6d44..ac0f9c05 100644
--- a/examples/logit-processor/src/worker.ts
+++ b/examples/logit-processor/src/worker.ts
@@ -6,7 +6,7 @@ console.log("Use web worker for logit processor");
 
 const myLogitProcessor = new MyLogitProcessor();
 const logitProcessorRegistry = new Map<string, webllm.LogitProcessor>();
-logitProcessorRegistry.set("Phi2-q4f32_1", myLogitProcessor);
+logitProcessorRegistry.set("phi-2-q4f32_1-MLC", myLogitProcessor);
 
 const engine = new webllm.MLCEngine();
 engine.setLogitProcessorRegistry(logitProcessorRegistry);
diff --git a/examples/multi-round-chat/src/multi_round_chat.ts b/examples/multi-round-chat/src/multi_round_chat.ts
index 15d69ac4..a9dd657d 100644
--- a/examples/multi-round-chat/src/multi_round_chat.ts
+++ b/examples/multi-round-chat/src/multi_round_chat.ts
@@ -17,25 +17,25 @@ async function main() {
   const initProgressCallback = (report: webllm.InitProgressReport) => {
     setLabel("init-label", report.text);
   };
-  const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
+  const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
     selectedModel,
-    { initProgressCallback: initProgressCallback }
+    { initProgressCallback: initProgressCallback },
   );
 
-
   // Round 0
   const messages: webllm.ChatCompletionMessageParam[] = [
     {
-      "role": "system",
-      "content": "You are a helpful, respectful and honest assistant. " +
-        "Be as happy as you can when speaking please. "
+      role: "system",
+      content:
+        "You are a helpful, respectful and honest assistant. " +
+        "Be as happy as you can when speaking please. ",
     },
-    { "role": "user", "content": "Provide me three US states." },
+    { role: "user", content: "Provide me three US states." },
   ];
 
   const request0: webllm.ChatCompletionRequest = {
-    stream: false,  // can be streaming, same behavior
+    stream: false, // can be streaming, same behavior
     messages: messages,
   };
 
@@ -46,16 +46,16 @@ async function main() {
 
   // Round 1
   // Append generated response to messages
-  messages.push({ "role": "assistant", "content": replyMessage0 });
+  messages.push({ role: "assistant", content: replyMessage0 });
   // Append new user input
-  messages.push({ "role": "user", "content": "Two more please!" });
+  messages.push({ role: "user", content: "Two more please!" });
   // Below line would cause an internal reset (clear KV cache, etc.) since the history no longer
   // matches the new request
   // messages[0].content = "Another system prompt";
 
   const request1: webllm.ChatCompletionRequest = {
-    stream: false,  // can be streaming, same behavior
-    messages: messages
+    stream: false, // can be streaming, same behavior
+    messages: messages,
   };
 
   const reply1 = await engine.chat.completions.create(request1);
@@ -68,8 +68,11 @@ async function main() {
   const prefillTokens1 = reply1.usage?.prompt_tokens;
   console.log("Requset 0 prompt tokens: ", prefillTokens0);
   console.log("Requset 1 prompt tokens: ", prefillTokens1);
-  if (prefillTokens0 === undefined || prefillTokens1 === undefined ||
-    prefillTokens1 > prefillTokens0) {
+  if (
+    prefillTokens0 === undefined ||
+    prefillTokens1 === undefined ||
+    prefillTokens1 > prefillTokens0
+  ) {
     throw Error("Multi-round chat is not triggered as expected.");
   }
 
diff --git a/examples/next-simple-chat/src/utils/chat_ui.ts b/examples/next-simple-chat/src/utils/chat_ui.ts
index d8856ba3..791c49f1 100644
--- a/examples/next-simple-chat/src/utils/chat_ui.ts
+++ b/examples/next-simple-chat/src/utils/chat_ui.ts
@@ -1,122 +1,145 @@
-import { MLCEngineInterface, ChatCompletionMessageParam } from "@mlc-ai/web-llm";
+import {
+  MLCEngineInterface,
+  ChatCompletionMessageParam,
+} from "@mlc-ai/web-llm";
 
 export default class ChatUI {
-    private engine: MLCEngineInterface;
-    private chatLoaded = false;
-    private requestInProgress = false;
-    // We use a request chain to ensure that
-    // all requests send to chat are sequentialized
-    private chatRequestChain: Promise<void> = Promise.resolve();
-    private chatHistory: ChatCompletionMessageParam[] = [];
+  private engine: MLCEngineInterface;
+  private chatLoaded = false;
+  private requestInProgress = false;
+  // We use a request chain to ensure that
+  // all requests send to chat are sequentialized
+  private chatRequestChain: Promise<void> = Promise.resolve();
+  private chatHistory: ChatCompletionMessageParam[] = [];
 
-    constructor(engine: MLCEngineInterface) {
-        this.engine = engine;
-    }
-    /**
-     * Push a task to the execution queue.
-     *
-     * @param task The task to be executed;
-     */
-    private pushTask(task: () => Promise<void>) {
-        const lastEvent = this.chatRequestChain;
-        this.chatRequestChain = lastEvent.then(task);
-    }
-    // Event handlers
-    // all event handler pushes the tasks to a queue
-    // that get executed sequentially
-    // the tasks previous tasks, which causes them to early stop
-    // can be interrupted by chat.interruptGenerate
-    async onGenerate(prompt: string, messageUpdate: (kind: string, text: string, append: boolean) => void, setRuntimeStats: (runtimeStats: string) => void) {
-        if (this.requestInProgress) {
-            return;
-        }
-        this.pushTask(async () => {
-            await this.asyncGenerate(prompt, messageUpdate, setRuntimeStats);
-        });
-        return this.chatRequestChain
+  constructor(engine: MLCEngineInterface) {
+    this.engine = engine;
+  }
+  /**
+   * Push a task to the execution queue.
+   *
+   * @param task The task to be executed;
+   */
+  private pushTask(task: () => Promise<void>) {
+    const lastEvent = this.chatRequestChain;
+    this.chatRequestChain = lastEvent.then(task);
+  }
+  // Event handlers
+  // all event handler pushes the tasks to a queue
+  // that get executed sequentially
+  // the tasks previous tasks, which causes them to early stop
+  // can be interrupted by chat.interruptGenerate
+  async onGenerate(
+    prompt: string,
+    messageUpdate: (kind: string, text: string, append: boolean) => void,
+    setRuntimeStats: (runtimeStats: string) => void,
+  ) {
+    if (this.requestInProgress) {
+      return;
     }
+    this.pushTask(async () => {
+      await this.asyncGenerate(prompt, messageUpdate, setRuntimeStats);
+    });
+    return this.chatRequestChain;
+  }
 
-    async onReset(clearMessages: () => void) {
-        if (this.requestInProgress) {
-            // interrupt previous generation if any
-            this.engine.interruptGenerate();
-        }
-        this.chatHistory = [];
-        // try reset after previous requests finishes
-        this.pushTask(async () => {
-            await this.engine.resetChat();
-            clearMessages();
-        });
-        return this.chatRequestChain
+  async onReset(clearMessages: () => void) {
+    if (this.requestInProgress) {
+      // interrupt previous generation if any
+      this.engine.interruptGenerate();
     }
+    this.chatHistory = [];
+    // try reset after previous requests finishes
+    this.pushTask(async () => {
+      await this.engine.resetChat();
+      clearMessages();
+    });
+    return this.chatRequestChain;
+  }
 
-    async asyncInitChat(messageUpdate: (kind: string, text: string, append: boolean) => void) {
-        if (this.chatLoaded) return;
-        this.requestInProgress = true;
-        messageUpdate("init", "", true);
-        const initProgressCallback = (report: { text: string }) => {
-            messageUpdate("init", report.text, false);
-        }
-        this.engine.setInitProgressCallback(initProgressCallback);
+  async asyncInitChat(
+    messageUpdate: (kind: string, text: string, append: boolean) => void,
+  ) {
+    if (this.chatLoaded) return;
+    this.requestInProgress = true;
+    messageUpdate("init", "", true);
+    const initProgressCallback = (report: { text: string }) => {
+      messageUpdate("init", report.text, false);
+    };
+    this.engine.setInitProgressCallback(initProgressCallback);
 
-        try {
-            const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
-            // const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-1k";
-            await this.engine.reload(selectedModel);
-        } catch (err: unknown) {
-            messageUpdate("error", "Init error, " + (err?.toString() ?? ""), true);
-            console.log(err);
-            await this.unloadChat();
-            this.requestInProgress = false;
-            return;
-        }
-        this.requestInProgress = false;
-        this.chatLoaded = true;
+    try {
+      const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
+      // const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k";
+      await this.engine.reload(selectedModel);
+    } catch (err: unknown) {
+      messageUpdate("error", "Init error, " + (err?.toString() ?? ""), true);
+      console.log(err);
+      await this.unloadChat();
+      this.requestInProgress = false;
+      return;
     }
+    this.requestInProgress = false;
+    this.chatLoaded = true;
+  }
 
-    private async unloadChat() {
-        await this.engine.unload();
-        this.chatLoaded = false;
-    }
+  private async unloadChat() {
+    await this.engine.unload();
+    this.chatLoaded = false;
+  }
 
-    /**
-     * Run generate
-     */
-    private async asyncGenerate(prompt: string, messageUpdate: (kind: string, text: string, append: boolean) => void, setRuntimeStats: (runtimeStats: string) => void) {
-        await this.asyncInitChat(messageUpdate);
-        this.requestInProgress = true;
-        // const prompt = this.uiChatInput.value;
-        if (prompt == "") {
-            this.requestInProgress = false;
-            return;
-        }
+  /**
+   * Run generate
+   */
+  private async asyncGenerate(
+    prompt: string,
+    messageUpdate: (kind: string, text: string, append: boolean) => void,
+    setRuntimeStats: (runtimeStats: string) => void,
+  ) {
+    await this.asyncInitChat(messageUpdate);
+    this.requestInProgress = true;
+    // const prompt = this.uiChatInput.value;
+    if (prompt == "") {
+      this.requestInProgress = false;
+      return;
+    }
 
-        messageUpdate("right", prompt, true);
-        // this.uiChatInput.value = "";
-        // this.uiChatInput.setAttribute("placeholder", "Generating...");
+    messageUpdate("right", prompt, true);
+    // this.uiChatInput.value = "";
+    // this.uiChatInput.setAttribute("placeholder", "Generating...");
 
-        messageUpdate("left", "", true);
+    messageUpdate("left", "", true);
 
-        try {
-            this.chatHistory.push({ "role": "user", "content": prompt });
-            let curMessage = "";
-            const completion = await this.engine.chat.completions.create({ stream: true, messages: this.chatHistory });
-            for await (const chunk of completion) {
-                const curDelta = chunk.choices[0].delta.content;
-                if (curDelta) {
-                    curMessage += curDelta;
-                }
-                messageUpdate("left", curMessage, false);
-            }
-            const output = await this.engine.getMessage();
-            this.chatHistory.push({ "role": "assistant", "content": output });
-            messageUpdate("left", output, false);
-            this.engine.runtimeStatsText().then(stats => setRuntimeStats(stats)).catch(error => console.log(error));
-        } catch (err: unknown) {
-            messageUpdate("error", "Generate error, " + (err?.toString() ?? ""), true);
-            console.log(err);
-            await this.unloadChat();
+    try {
+      this.chatHistory.push({ role: "user", content: prompt });
+      let curMessage = "";
+      const completion = await this.engine.chat.completions.create({
+        stream: true,
+        messages: this.chatHistory,
+      });
+      for await (const chunk of completion) {
+        const curDelta = chunk.choices[0].delta.content;
+        if (curDelta) {
+          curMessage += curDelta;
         }
-        this.requestInProgress = false;
+        messageUpdate("left", curMessage, false);
+      }
+      const output = await this.engine.getMessage();
+      this.chatHistory.push({ role: "assistant", content: output });
+      messageUpdate("left", output, false);
+      this.engine
+        .runtimeStatsText()
+        .then((stats) => setRuntimeStats(stats))
+        .catch((error) => console.log(error));
+    } catch (err: unknown) {
+      messageUpdate(
+        "error",
+        "Generate error, " + (err?.toString() ?? ""),
+        true,
+      );
+      console.log(err);
+      await this.unloadChat();
     }
-}
\ No newline at end of file
+    this.requestInProgress = false;
+  }
+}
diff --git a/examples/seed-to-reproduce/src/seed.ts b/examples/seed-to-reproduce/src/seed.ts
index c4eb3109..ac2547f3 100644
--- a/examples/seed-to-reproduce/src/seed.ts
+++ b/examples/seed-to-reproduce/src/seed.ts
@@ -1,11 +1,11 @@
 import * as webllm from "@mlc-ai/web-llm";
 
 function setLabel(id: string, text: string) {
-    const label = document.getElementById(id);
-    if (label == null) {
-        throw Error("Cannot find label " + id);
-    }
-    label.innerText = text;
+  const label = document.getElementById(id);
+  if (label == null) {
+    throw Error("Cannot find label " + id);
+  }
+  label.innerText = text;
 }
 
 /**
@@ -15,45 +15,48 @@ function setLabel(id: string, text: string) {
  * With `n > 1`, all choices should also be exactly the same.
  */
 async function main() {
-    const initProgressCallback = (report: webllm.InitProgressReport) => {
-        setLabel("init-label", report.text);
-    };
-    const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
-    const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
-        selectedModel,
-        { initProgressCallback: initProgressCallback }
-    );
-
-
-    const request: webllm.ChatCompletionRequest = {
-        stream: false,  // works with streaming as well
-        messages: [
-            { "role": "user", "content": "Write a creative Haiku about Pittsburgh" }
-        ],
-        n: 3,
-        temperature: 1.2,  // high temperature gives much more random results
-        max_gen_len: 128,  // To save time; enough to demonstrate the effect
-        seed: 42,
-    };
-
-    const reply0 = await engine.chat.completions.create(request);
-    console.log(reply0);
-    console.log("First reply's last choice:\n" + await engine.getMessage());
-
-    const reply1 = await engine.chat.completions.create(request);
-    console.log(reply1);
-    console.log("Second reply's last choice:\n" + await engine.getMessage());
-
-    // Rigorously check the generation results of each choice for the two requests
-    for (const choice0 of reply0.choices) {
-        const id = choice0.index;
-        const choice1 = reply1.choices[id];
-        if (choice0.message.content !== choice1.message.content) {
-            throw Error("Chocie " + id + " of the two generations are different despite seeding");
-        }
+  const initProgressCallback = (report: webllm.InitProgressReport) => {
+    setLabel("init-label", report.text);
+  };
+  const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
+  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
+    selectedModel,
+    { initProgressCallback: initProgressCallback },
+  );
+
+  const request: webllm.ChatCompletionRequest = {
+    stream: false, // works with streaming as well
+    messages: [
+      { role: "user", content: "Write a creative Haiku about Pittsburgh" },
+    ],
+    n: 3,
+    temperature: 1.2, // high temperature gives much more random results
+    max_gen_len: 128, // To save time; enough to demonstrate the effect
+    seed: 42,
+  };
+
+  const reply0 = await engine.chat.completions.create(request);
+  console.log(reply0);
+  console.log("First reply's last choice:\n" + (await engine.getMessage()));
+
+  const reply1 = await engine.chat.completions.create(request);
+  console.log(reply1);
+  console.log("Second reply's last choice:\n" + (await engine.getMessage()));
+
+  // Rigorously check the generation results of each choice for the two requests
+  for (const choice0 of reply0.choices) {
+    const id = choice0.index;
+    const choice1 = reply1.choices[id];
+    if (choice0.message.content !== choice1.message.content) {
+      throw Error(
+        "Chocie " +
+          id +
+          " of the two generations are different despite seeding",
+      );
     }
+  }
 
-    console.log(await engine.runtimeStatsText());
+  console.log(await engine.runtimeStatsText());
 }
 
 // Run one of the functions
diff --git a/examples/service-worker/src/main.ts b/examples/service-worker/src/main.ts
index 437cc58b..290ed1d2 100644
--- a/examples/service-worker/src/main.ts
+++ b/examples/service-worker/src/main.ts
@@ -5,7 +5,7 @@ const registerServiceWorker = async () => {
     try {
       const registration = await navigator.serviceWorker.register(
         new URL("sw.ts", import.meta.url),
-        { type: "module" }
+        { type: "module" },
       );
       if (registration.installing) {
         console.log("Service worker installing");
@@ -37,7 +37,7 @@ async function mainNonStreaming() {
   const initProgressCallback = (report: webllm.InitProgressReport) => {
     setLabel("init-label", report.text);
   };
-  const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
+  const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
 
   const engine: webllm.MLCEngineInterface =
     await webllm.CreateServiceWorkerMLCEngine(selectedModel, {
@@ -75,7 +75,7 @@ async function mainStreaming() {
   const initProgressCallback = (report: webllm.InitProgressReport) => {
     setLabel("init-label", report.text);
   };
-  const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
+  const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
 
   const engine: webllm.ServiceWorkerMLCEngine =
     await webllm.CreateServiceWorkerMLCEngine(selectedModel, {
diff --git a/examples/streaming/src/streaming.ts b/examples/streaming/src/streaming.ts
index a2b7bbd5..b09c8cb4 100644
--- a/examples/streaming/src/streaming.ts
+++ b/examples/streaming/src/streaming.ts
@@ -15,20 +15,21 @@ async function main() {
   const initProgressCallback = (report: webllm.InitProgressReport) => {
     setLabel("init-label", report.text);
   };
-  const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
+  const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
     selectedModel,
-    { initProgressCallback: initProgressCallback }
+    { initProgressCallback: initProgressCallback },
   );
 
   const request: webllm.ChatCompletionRequest = {
     stream: true,
     messages: [
       {
-        "role": "system",
-        "content": "You are a pirate chatbot who always responds in pirate speak!"
+        role: "system",
+        content:
+          "You are a pirate chatbot who always responds in pirate speak!",
       },
-      { "role": "user", "content": "Who are you?" },
+      { role: "user", content: "Who are you?" },
     ],
     logprobs: true,
     top_logprobs: 2,
@@ -45,7 +46,7 @@ async function main() {
     setLabel("generate-label", message);
     // engine.interruptGenerate();  // works with interrupt as well
   }
-  console.log("Final message:\n", await engine.getMessage());  // the concatenated message
+  console.log("Final message:\n", await engine.getMessage()); // the concatenated message
   console.log(await engine.runtimeStatsText());
 }
 
diff --git a/src/config.ts b/src/config.ts
index 7f32fb31..93bb2ce2 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -295,7 +295,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/",
-      model_id: "Llama-3-8B-Instruct-q4f32_1-1k",
+      model_id: "Llama-3-8B-Instruct-q4f32_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -306,7 +306,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC/resolve/main/",
-      model_id: "Llama-3-8B-Instruct-q4f16_1-1k",
+      model_id: "Llama-3-8B-Instruct-q4f16_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -317,7 +317,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/",
-      model_id: "Llama-3-8B-Instruct-q4f32_1",
+      model_id: "Llama-3-8B-Instruct-q4f32_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -328,7 +328,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC/resolve/main/",
-      model_id: "Llama-3-8B-Instruct-q4f16_1",
+      model_id: "Llama-3-8B-Instruct-q4f16_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -339,7 +339,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Llama-3-70B-Instruct-q3f16_1-MLC/resolve/main/",
-      model_id: "Llama-3-70B-Instruct-q3f16_1",
+      model_id: "Llama-3-70B-Instruct-q3f16_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -351,7 +351,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC/resolve/main/",
-      model_id: "Phi-3-mini-4k-instruct-q4f16_1",
+      model_id: "Phi-3-mini-4k-instruct-q4f16_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -362,7 +362,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC/resolve/main/",
-      model_id: "Phi-3-mini-4k-instruct-q4f32_1",
+      model_id: "Phi-3-mini-4k-instruct-q4f32_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -373,7 +373,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC/resolve/main/",
-      model_id: "Phi-3-mini-4k-instruct-q4f16_1-1k",
+      model_id: "Phi-3-mini-4k-instruct-q4f16_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -384,7 +384,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC/resolve/main/",
-      model_id: "Phi-3-mini-4k-instruct-q4f32_1-1k",
+      model_id: "Phi-3-mini-4k-instruct-q4f32_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -396,7 +396,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC/resolve/main/",
-      model_id: "Llama-2-7b-chat-hf-q4f32_1-1k",
+      model_id: "Llama-2-7b-chat-hf-q4f32_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -407,7 +407,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC/resolve/main/",
-      model_id: "Llama-2-7b-chat-hf-q4f16_1-1k",
+      model_id: "Llama-2-7b-chat-hf-q4f16_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -419,7 +419,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC/resolve/main/",
-      model_id: "Llama-2-7b-chat-hf-q4f32_1",
+      model_id: "Llama-2-7b-chat-hf-q4f32_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -430,7 +430,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC/resolve/main/",
-      model_id: "Llama-2-7b-chat-hf-q4f16_1",
+      model_id: "Llama-2-7b-chat-hf-q4f16_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -455,7 +455,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/WizardMath-7B-V1.1-q4f16_1-MLC/resolve/main/",
-      model_id: "WizardMath-7B-V1.1-q4f16_1",
+      model_id: "WizardMath-7B-V1.1-q4f16_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -467,7 +467,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Mistral-7B-Instruct-v0.2-q4f16_1-MLC/resolve/main/",
-      model_id: "Mistral-7B-Instruct-v0.2-q4f16_1",
+      model_id: "Mistral-7B-Instruct-v0.2-q4f16_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -479,7 +479,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/OpenHermes-2.5-Mistral-7B-q4f16_1-MLC/resolve/main/",
-      model_id: "OpenHermes-2.5-Mistral-7B-q4f16_1",
+      model_id: "OpenHermes-2.5-Mistral-7B-q4f16_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -491,7 +491,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC/resolve/main/",
-      model_id: "NeuralHermes-2.5-Mistral-7B-q4f16_1",
+      model_id: "NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -504,7 +504,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC/resolve/main/",
-      model_id: "Hermes-2-Pro-Llama-3-8B-q4f16_1",
+      model_id: "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -515,7 +515,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC/resolve/main/",
-      model_id: "Hermes-2-Pro-Llama-3-8B-q4f32_1",
+      model_id: "Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -526,7 +526,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Hermes-2-Pro-Mistral-7B-q4f16_1-MLC/resolve/main/",
-      model_id: "Hermes-2-Pro-Mistral-7B-q4f16_1",
+      model_id: "Hermes-2-Pro-Mistral-7B-q4f16_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -539,7 +539,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC/resolve/main/",
-      model_id: "gemma-2b-it-q4f16_1",
+      model_id: "gemma-2b-it-q4f16_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -552,7 +552,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC/resolve/main/",
-      model_id: "gemma-2b-it-q4f32_1",
+      model_id: "gemma-2b-it-q4f32_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -564,7 +564,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC/resolve/main/",
-      model_id: "gemma-2b-it-q4f16_1-1k",
+      model_id: "gemma-2b-it-q4f16_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -577,7 +577,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC/resolve/main/",
-      model_id: "gemma-2b-it-q4f32_1-1k",
+      model_id: "gemma-2b-it-q4f32_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -590,7 +590,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f16_1-MLC/resolve/main/",
-      model_id: "Qwen1.5-1.8B-Chat-q4f16_1",
+      model_id: "Qwen1.5-1.8B-Chat-q4f16_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -601,7 +601,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f32_1-MLC/resolve/main/",
-      model_id: "Qwen1.5-1.8B-Chat-q4f32_1",
+      model_id: "Qwen1.5-1.8B-Chat-q4f32_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -612,7 +612,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f16_1-MLC/resolve/main/",
-      model_id: "Qwen1.5-1.8B-Chat-q4f16_1-1k",
+      model_id: "Qwen1.5-1.8B-Chat-q4f16_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -623,7 +623,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f32_1-MLC/resolve/main/",
-      model_id: "Qwen1.5-1.8B-Chat-q4f32_1-1k",
+      model_id: "Qwen1.5-1.8B-Chat-q4f32_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -635,7 +635,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC/resolve/main/",
-      model_id: "stablelm-2-zephyr-1_6b-q4f16_1",
+      model_id: "stablelm-2-zephyr-1_6b-q4f16_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -646,7 +646,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC/resolve/main/",
-      model_id: "stablelm-2-zephyr-1_6b-q4f32_1",
+      model_id: "stablelm-2-zephyr-1_6b-q4f32_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -657,7 +657,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC/resolve/main/",
-      model_id: "stablelm-2-zephyr-1_6b-q4f16_1-1k",
+      model_id: "stablelm-2-zephyr-1_6b-q4f16_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -668,7 +668,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC/resolve/main/",
-      model_id: "stablelm-2-zephyr-1_6b-q4f32_1-1k",
+      model_id: "stablelm-2-zephyr-1_6b-q4f32_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -680,7 +680,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC/resolve/main/",
-      model_id: "RedPajama-INCITE-Chat-3B-v1-q4f16_1",
+      model_id: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -692,7 +692,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC/resolve/main/",
-      model_id: "RedPajama-INCITE-Chat-3B-v1-q4f32_1",
+      model_id: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -703,7 +703,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC/resolve/main/",
-      model_id: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-1k",
+      model_id: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -715,7 +715,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC/resolve/main/",
-      model_id: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-1k",
+      model_id: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -727,7 +727,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC/resolve/main/",
-      model_id: "Phi2-q4f16_1",
+      model_id: "phi-2-q4f16_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -739,7 +739,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC/resolve/main/",
-      model_id: "Phi2-q4f32_1",
+      model_id: "phi-2-q4f32_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -750,7 +750,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC/resolve/main/",
-      model_id: "Phi2-q4f16_1-1k",
+      model_id: "phi-2-q4f16_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -762,7 +762,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC/resolve/main/",
-      model_id: "Phi2-q4f32_1-1k",
+      model_id: "phi-2-q4f32_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -774,7 +774,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC/resolve/main/",
-      model_id: "Phi1.5-q4f16_1",
+      model_id: "phi-1_5-q4f16_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -786,7 +786,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC/resolve/main/",
-      model_id: "Phi1.5-q4f32_1",
+      model_id: "phi-1_5-q4f32_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -797,7 +797,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC/resolve/main/",
-      model_id: "Phi1.5-q4f16_1-1k",
+      model_id: "phi-1_5-q4f16_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -809,7 +809,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC/resolve/main/",
-      model_id: "Phi1.5-q4f32_1-1k",
+      model_id: "phi-1_5-q4f32_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -821,7 +821,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC/resolve/main/",
-      model_id: "TinyLlama-1.1B-Chat-v0.4-q4f16_1",
+      model_id: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -833,7 +833,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC/resolve/main/",
-      model_id: "TinyLlama-1.1B-Chat-v0.4-q4f32_1",
+      model_id: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -844,7 +844,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC/resolve/main/",
-      model_id: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-1k",
+      model_id: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
@@ -856,7 +856,7 @@ export const prebuiltAppConfig: AppConfig = {
     {
       model_url:
         "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC/resolve/main/",
-      model_id: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-1k",
+      model_id: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC-1k",
       model_lib_url:
         modelLibURLPrefix +
         modelVersion +
diff --git a/src/engine.ts b/src/engine.ts
index 65823ea7..316f91f2 100644
--- a/src/engine.ts
+++ b/src/engine.ts
@@ -587,12 +587,12 @@ export class MLCEngine implements MLCEngineInterface {
         `WARNING: the current maxStorageBufferBindingSize ` +
           `(${computeMB(maxStorageBufferBindingSize)}) ` +
           `may only work for a limited number of models, e.g.: \n` +
-          `- Llama-3-8B-Instruct-q4f16_1-1k \n` +
-          `- Llama-2-7b-chat-hf-q4f16_1-1k \n` +
-          `- RedPajama-INCITE-Chat-3B-v1-q4f16_1-1k \n` +
-          `- RedPajama-INCITE-Chat-3B-v1-q4f32_1-1k \n` +
-          `- TinyLlama-1.1B-Chat-v0.4-q4f16_1-1k \n` +
-          `- TinyLlama-1.1B-Chat-v0.4-q4f32_1-1k`,
+          `- Llama-3-8B-Instruct-q4f16_1-MLC-1k \n` +
+          `- Llama-2-7b-chat-hf-q4f16_1-MLC-1k \n` +
+          `- RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC-1k \n` +
+          `- RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC-1k \n` +
+          `- TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k \n` +
+          `- TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC-1k`,
       );
     }
     return maxStorageBufferBindingSize;
diff --git a/tests/openai_chat_completion.test.ts b/tests/openai_chat_completion.test.ts
index 5e719125..8f7ce8bf 100644
--- a/tests/openai_chat_completion.test.ts
+++ b/tests/openai_chat_completion.test.ts
@@ -1,129 +1,130 @@
-import { postInitAndCheckFields, ChatCompletionRequest } from "../src/openai_api_protocols/chat_completion"
-import { describe, expect, test } from '@jest/globals';
+import {
+  postInitAndCheckFields,
+  ChatCompletionRequest,
+} from "../src/openai_api_protocols/chat_completion";
+import { describe, expect, test } from "@jest/globals";
 
-describe('Check chat completion unsupported requests', () => {
-    test('High-level unsupported fields', () => {
-        expect(() => {
-            const request: ChatCompletionRequest = {
-                model: "Phi2-q4f32_1",  // this raises error
-                messages: [
-                    { role: "system", content: "You are a helpful assistant." },
-                    { role: "user", content: "Hello! " },
-                ],
-            };
-            postInitAndCheckFields(request)
-        }).toThrow("The following fields in ChatCompletionRequest are not yet supported");
-    });
+describe("Check chat completion unsupported requests", () => {
+  test("High-level unsupported fields", () => {
+    expect(() => {
+      const request: ChatCompletionRequest = {
+        model: "phi-2-q4f32_1-MLC", // this raises error
+        messages: [
+          { role: "system", content: "You are a helpful assistant." },
+          { role: "user", content: "Hello! " },
+        ],
+      };
+      postInitAndCheckFields(request);
+    }).toThrow(
+      "The following fields in ChatCompletionRequest are not yet supported",
+    );
+  });
 
-    test('Last message should be from user', () => {
-        expect(() => {
-            const request: ChatCompletionRequest = {
-                messages: [
-                    { role: "system", content: "You are a helpful assistant." },
-                    { role: "user", content: "Hello! " },
-                    { role: "assistant", content: "Hello! How may I help you today?" },
-                ],
-            };
-            postInitAndCheckFields(request)
-        }).toThrow("Last message should be from `user`.");
-    });
+  test("Last message should be from user", () => {
+    expect(() => {
+      const request: ChatCompletionRequest = {
+        messages: [
+          { role: "system", content: "You are a helpful assistant." },
+          { role: "user", content: "Hello! " },
+          { role: "assistant", content: "Hello! How may I help you today?" },
+        ],
+      };
+      postInitAndCheckFields(request);
+    }).toThrow("Last message should be from `user`.");
+  });
 
-    test('System prompt should always be the first one in `messages`', () => {
-        expect(() => {
-            const request: ChatCompletionRequest = {
-                messages: [
-                    { role: "user", content: "Hello! " },
-                    { role: "assistant", content: "Hello! How may I help you today?" },
-                    { role: "user", content: "Tell me about Pittsburgh" },
-                    { role: "system", content: "You are a helpful assistant." },
-                ],
-            };
-            postInitAndCheckFields(request)
-        }).toThrow("System prompt should always be the first one in `messages`.");
-    });
+  test("System prompt should always be the first one in `messages`", () => {
+    expect(() => {
+      const request: ChatCompletionRequest = {
+        messages: [
+          { role: "user", content: "Hello! " },
+          { role: "assistant", content: "Hello! How may I help you today?" },
+          { role: "user", content: "Tell me about Pittsburgh" },
+          { role: "system", content: "You are a helpful assistant." },
+        ],
+      };
+      postInitAndCheckFields(request);
+    }).toThrow("System prompt should always be the first one in `messages`.");
+  });
 
-    test('When streaming `n` needs to be 1', () => {
-        expect(() => {
-            const request: ChatCompletionRequest = {
-                stream: true,
-                n: 2,
-                messages: [
-                    { role: "user", content: "Hello! " },
-                ],
-            };
-            postInitAndCheckFields(request)
-        }).toThrow("When streaming, `n` cannot be > 1.");
-    });
+  test("When streaming `n` needs to be 1", () => {
+    expect(() => {
+      const request: ChatCompletionRequest = {
+        stream: true,
+        n: 2,
+        messages: [{ role: "user", content: "Hello! " }],
+      };
+      postInitAndCheckFields(request);
+    }).toThrow("When streaming, `n` cannot be > 1.");
+  });
 
-    test('Non-integer seed', () => {
-        expect(() => {
-            const request: ChatCompletionRequest = {
-                messages: [
-                    { role: "user", content: "Hello! " },
-                ],
-                max_gen_len: 10,
-                seed: 42.2,  // Note that Number.isInteger(42.0) is true 
-            };
-            postInitAndCheckFields(request)
-        }).toThrow("`seed` should be an integer, but got");
-    });
+  test("Non-integer seed", () => {
+    expect(() => {
+      const request: ChatCompletionRequest = {
+        messages: [{ role: "user", content: "Hello! " }],
+        max_gen_len: 10,
+        seed: 42.2, // Note that Number.isInteger(42.0) is true
+      };
+      postInitAndCheckFields(request);
+    }).toThrow("`seed` should be an integer, but got");
+  });
 
-    test('Schema without type json object', () => {
-        expect(() => {
-            const request: ChatCompletionRequest = {
-                messages: [
-                    { role: "user", content: "Hello! " },
-                ],
-                response_format: { schema: "some json schema" },
-            };
-            postInitAndCheckFields(request)
-        }).toThrow("JSON schema is only supported with `json_object` response format.");
-    });
+  test("Schema without type json object", () => {
+    expect(() => {
+      const request: ChatCompletionRequest = {
+        messages: [{ role: "user", content: "Hello! " }],
+        response_format: { schema: "some json schema" },
+      };
+      postInitAndCheckFields(request);
+    }).toThrow(
+      "JSON schema is only supported with `json_object` response format.",
+    );
+  });
 
-    // Remove when we support image input (e.g. LlaVA model)
-    test('Image input is unsupported', () => {
-        expect(() => {
-            const request: ChatCompletionRequest = {
-                messages: [
-                    {
-                        role: "user",
-                        content: [
-                            { type: "text", text: "What is in this image?" },
-                            {
-                                type: "image_url",
-                                image_url: { url: "https://url_here.jpg" },
-                            },
-                        ],
-                    },
-                ],
-            };
-            postInitAndCheckFields(request)
-        }).toThrow("User message only supports string `content` for now");
-    });
+  // Remove when we support image input (e.g. LlaVA model)
+  test("Image input is unsupported", () => {
+    expect(() => {
+      const request: ChatCompletionRequest = {
+        messages: [
+          {
+            role: "user",
+            content: [
+              { type: "text", text: "What is in this image?" },
+              {
+                type: "image_url",
+                image_url: { url: "https://url_here.jpg" },
+              },
+            ],
+          },
+        ],
+      };
+      postInitAndCheckFields(request);
+    }).toThrow("User message only supports string `content` for now");
+  });
 });
 
-describe('Supported requests', () => {
-    test('Supproted chat completion request', () => {
-        const request: ChatCompletionRequest = {
-            messages: [
-                { role: "system", content: "You are a helpful assistant." },
-                { role: "user", content: "Hello! " },
-                { role: "assistant", content: "How can I help you? " },
-                { role: "user", content: "Give me 5 US states. " },
-            ],
-            n: 3,
-            temperature: 1.5,
-            max_gen_len: 25,
-            frequency_penalty: 0.2,
-            seed: 42,
-            logprobs: true,
-            top_logprobs: 2,
-            logit_bias: {
-                "13813": -100,
-                "10319": 5,
-                "7660": 5,
-            },
-        };
-        postInitAndCheckFields(request)
-    });
-})
+describe("Supported requests", () => {
+  test("Supproted chat completion request", () => {
+    const request: ChatCompletionRequest = {
+      messages: [
+        { role: "system", content: "You are a helpful assistant." },
+        { role: "user", content: "Hello! " },
+        { role: "assistant", content: "How can I help you? " },
+        { role: "user", content: "Give me 5 US states. " },
+      ],
+      n: 3,
+      temperature: 1.5,
+      max_gen_len: 25,
+      frequency_penalty: 0.2,
+      seed: 42,
+      logprobs: true,
+      top_logprobs: 2,
+      logit_bias: {
+        "13813": -100,
+        "10319": 5,
+        "7660": 5,
+      },
+    };
+    postInitAndCheckFields(request);
+  });
+});

From 45f41e6c2c06f23715fae1001dd4630fc1b2e999 Mon Sep 17 00:00:00 2001
From: Charlie Ruan <53290280+CharlieFRuan@users.noreply.github.com>
Date: Wed, 29 May 2024 23:23:41 -0400
Subject: [PATCH 2/3] [ModelRecord] Update model_lib_url to model_lib, and
 model_url to model

---
 README.md                                     |   8 +-
 .../function-calling/src/function_calling.ts  |  44 ++--
 examples/get-started/src/get_started.ts       |   4 +-
 src/cache_util.ts                             |  20 +-
 src/config.ts                                 | 214 +++++++++---------
 src/engine.ts                                 |   8 +-
 src/utils.ts                                  |   4 +-
 .../src/vram_requirements.ts                  |   2 +-
 8 files changed, 148 insertions(+), 156 deletions(-)

diff --git a/README.md b/README.md
index 00e9111e..5c240c90 100644
--- a/README.md
+++ b/README.md
@@ -247,8 +247,8 @@ on how to add new model weights and libraries to WebLLM.
 
 Here, we go over the high-level idea. There are two elements of the WebLLM package that enables new models and weight variants.
 
-- `model_url`: Contains a URL to model artifacts, such as weights and meta-data.
-- `model_lib_url`: A URL to the web assembly library (i.e. wasm file) that contains the executables to accelerate the model computations.
+- `model`: Contains a URL to model artifacts, such as weights and meta-data.
+- `model_lib`: A URL to the web assembly library (i.e. wasm file) that contains the executables to accelerate the model computations.
 
 Both are customizable in the WebLLM.
 
@@ -257,9 +257,9 @@ async main() {
   const appConfig = {
     "model_list": [
       {
-        "model_url": "/url/to/my/llama",
+        "model": "/url/to/my/llama",
         "model_id": "MyLlama-3b-v1-q4f32_0"
-        "model_lib_url": "/url/to/myllama3b.wasm",
+        "model_lib": "/url/to/myllama3b.wasm",
       }
     ],
   };
diff --git a/examples/function-calling/src/function_calling.ts b/examples/function-calling/src/function_calling.ts
index 21b9605a..a2dd2b84 100644
--- a/examples/function-calling/src/function_calling.ts
+++ b/examples/function-calling/src/function_calling.ts
@@ -8,25 +8,25 @@ function setLabel(id: string, text: string) {
   label.innerText = text;
 }
 
-
 async function main() {
-
   const myAppConfig: webllm.AppConfig = {
     model_list: [
       {
-        "model_url": "https://huggingface.co/mlc-ai/gorilla-openfunctions-v2-q4f16_1-MLC/resolve/main/",
-        "model_id": "gorilla-openfunctions-v2-q4f16_1",
-        "model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/gorilla-openfunctions-v2/gorilla-openfunctions-v2-q4f16_1.wasm",
+        model:
+          "https://huggingface.co/mlc-ai/gorilla-openfunctions-v2-q4f16_1-MLC/resolve/main/",
+        model_id: "gorilla-openfunctions-v2-q4f16_1",
+        model_lib:
+          "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/gorilla-openfunctions-v2/gorilla-openfunctions-v2-q4f16_1.wasm",
       },
-    ]
-  }
+    ],
+  };
   const initProgressCallback = (report: webllm.InitProgressReport) => {
     setLabel("init-label", report.text);
   };
-  const selectedModel = "gorilla-openfunctions-v2-q4f16_1"
+  const selectedModel = "gorilla-openfunctions-v2-q4f16_1";
   const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
     selectedModel,
-    { appConfig: myAppConfig, initProgressCallback: initProgressCallback }
+    { appConfig: myAppConfig, initProgressCallback: initProgressCallback },
   );
 
   const tools: Array<webllm.ChatCompletionTool> = [
@@ -36,26 +36,30 @@ async function main() {
         name: "get_current_weather",
         description: "Get the current weather in a given location",
         parameters: {
-          "type": "object",
-          "properties": {
-            "location": {
-              "type": "string",
-              "description": "The city and state, e.g. San Francisco, CA",
+          type: "object",
+          properties: {
+            location: {
+              type: "string",
+              description: "The city and state, e.g. San Francisco, CA",
             },
-            "unit": { "type": "string", "enum": ["celsius", "fahrenheit"] },
+            unit: { type: "string", enum: ["celsius", "fahrenheit"] },
           },
-          "required": ["location"],
+          required: ["location"],
         },
       },
-    }
-  ]
+    },
+  ];
 
   const request: webllm.ChatCompletionRequest = {
     stream: false,
     messages: [
-      { "role": "user", "content": "What is the current weather in celsius in Pittsburgh and Tokyo?" },
+      {
+        role: "user",
+        content:
+          "What is the current weather in celsius in Pittsburgh and Tokyo?",
+      },
     ],
-    tool_choice: 'auto',
+    tool_choice: "auto",
     tools: tools,
   };
 
diff --git a/examples/get-started/src/get_started.ts b/examples/get-started/src/get_started.ts
index e083a57c..0f0e6476 100644
--- a/examples/get-started/src/get_started.ts
+++ b/examples/get-started/src/get_started.ts
@@ -23,9 +23,9 @@ async function main() {
   // const appConfig: webllm.AppConfig = {
   //   model_list: [
   //     {
-  //       "model_url": "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/",
+  //       "model": "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/",
   //       "model_id": "Llama-3-8B-Instruct-q4f32_1-MLC",
-  //       "model_lib_url": webllm.modelLibURLPrefix + webllm.modelVersion + "/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
+  //       "model_lib": webllm.modelLibURLPrefix + webllm.modelVersion + "/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
   //     },
   //   ]
   // };
diff --git a/src/cache_util.ts b/src/cache_util.ts
index 5fe766fe..8aa2cace 100644
--- a/src/cache_util.ts
+++ b/src/cache_util.ts
@@ -8,7 +8,7 @@ function findModelRecord(modelId: string, appConfig?: AppConfig): ModelRecord {
   if (matchedItem !== undefined) {
     return matchedItem;
   }
-  throw Error("Cannot find model_url for " + modelId);
+  throw Error("Cannot find model record in appConfig for " + modelId);
 }
 
 export async function hasModelInCache(
@@ -19,7 +19,7 @@ export async function hasModelInCache(
     appConfig = prebuiltAppConfig;
   }
   const modelRecord = findModelRecord(modelId, appConfig);
-  const modelUrl = modelRecord.model_url;
+  const modelUrl = modelRecord.model;
   const cacheType = appConfig.useIndexedDBCache ? "indexeddb" : "cache";
   return tvmjs.hasNDArrayInCache(modelUrl, "webllm/model", cacheType);
 }
@@ -51,21 +51,17 @@ export async function deleteModelInCache(
   const modelRecord = findModelRecord(modelId, appConfig);
   let modelCache: tvmjs.ArtifactCacheTemplate;
   if (appConfig.useIndexedDBCache) {
-    tvmjs.deleteNDArrayCache(
-      modelRecord.model_url,
-      "webllm/model",
-      "indexeddb",
-    );
+    tvmjs.deleteNDArrayCache(modelRecord.model, "webllm/model", "indexeddb");
     modelCache = new tvmjs.ArtifactIndexedDBCache("webllm/model");
   } else {
-    tvmjs.deleteNDArrayCache(modelRecord.model_url, "webllm/model", "cache");
+    tvmjs.deleteNDArrayCache(modelRecord.model, "webllm/model", "cache");
     modelCache = new tvmjs.ArtifactCache("webllm/model");
   }
   await modelCache.deleteInCache(
-    new URL("tokenizer.model", modelRecord.model_url).href,
+    new URL("tokenizer.model", modelRecord.model).href,
   );
   await modelCache.deleteInCache(
-    new URL("tokenizer.json", modelRecord.model_url).href,
+    new URL("tokenizer.json", modelRecord.model).href,
   );
 }
 
@@ -84,7 +80,7 @@ export async function deleteChatConfigInCache(
   } else {
     configCache = new tvmjs.ArtifactCache("webllm/config");
   }
-  const configUrl = new URL("mlc-chat-config.json", modelRecord.model_url).href;
+  const configUrl = new URL("mlc-chat-config.json", modelRecord.model).href;
   await configCache.deleteInCache(configUrl);
 }
 
@@ -103,5 +99,5 @@ export async function deleteModelWasmInCache(
   } else {
     wasmCache = new tvmjs.ArtifactCache("webllm/wasm");
   }
-  await wasmCache.deleteInCache(modelRecord.model_lib_url);
+  await wasmCache.deleteInCache(modelRecord.model_lib);
 }
diff --git a/src/config.ts b/src/config.ts
index 93bb2ce2..170a3765 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -236,9 +236,9 @@ export function postInitAndCheckGenerationConfigValues(
 
 /**
  * Information for a model.
- * @param model_url: the huggingface link to download the model weights.
+ * @param model: the huggingface link to download the model weights.
  * @param model_id: what we call the model.
- * @param model_lib_url: link to the model library (wasm file) the model uses.
+ * @param model_lib: link to the model library (wasm file) the model uses.
  * @param vram_required_MB: amount of vram in MB required to run the model (can use
  *    `utils/vram_requirements` to calculate).
  * @param low_resource_required: whether the model can run on limited devices (e.g. Android phone).
@@ -246,9 +246,9 @@ export function postInitAndCheckGenerationConfigValues(
  * @param required_features: feature needed to run this model (e.g. shader-f16).
  */
 export interface ModelRecord {
-  model_url: string;
+  model: string;
   model_id: string;
-  model_lib_url: string;
+  model_lib: string;
   vram_required_MB?: number;
   low_resource_required?: boolean;
   buffer_size_required_bytes?: number;
@@ -273,7 +273,7 @@ export interface AppConfig {
 
 /**
  * modelVersion: the prebuilt model libraries that the current npm is compatible with, affects the
- * `model_lib_url`s in `prebuiltAppConfig`.
+ * `model_lib`s in `prebuiltAppConfig`.
  *
  * @note The model version does not have to match the npm version, since not each npm update
  * requires an update of the model libraries.
@@ -293,10 +293,10 @@ export const prebuiltAppConfig: AppConfig = {
   model_list: [
     // Llama-3
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/",
       model_id: "Llama-3-8B-Instruct-q4f32_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Llama-3-8B-Instruct-q4f32_1-ctx1k_cs1k-webgpu.wasm",
@@ -304,10 +304,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: true,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC/resolve/main/",
       model_id: "Llama-3-8B-Instruct-q4f16_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Llama-3-8B-Instruct-q4f16_1-ctx1k_cs1k-webgpu.wasm",
@@ -315,10 +315,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: true,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/",
       model_id: "Llama-3-8B-Instruct-q4f32_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
@@ -326,10 +326,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC/resolve/main/",
       model_id: "Llama-3-8B-Instruct-q4f16_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Llama-3-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
@@ -337,10 +337,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Llama-3-70B-Instruct-q3f16_1-MLC/resolve/main/",
       model_id: "Llama-3-70B-Instruct-q3f16_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Llama-3-70B-Instruct-q3f16_1-ctx4k_cs1k-webgpu.wasm",
@@ -349,10 +349,10 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // Phi3-mini-instruct
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC/resolve/main/",
       model_id: "Phi-3-mini-4k-instruct-q4f16_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Phi-3-mini-4k-instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
@@ -360,10 +360,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC/resolve/main/",
       model_id: "Phi-3-mini-4k-instruct-q4f32_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Phi-3-mini-4k-instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
@@ -371,10 +371,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC/resolve/main/",
       model_id: "Phi-3-mini-4k-instruct-q4f16_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Phi-3-mini-4k-instruct-q4f16_1-ctx1k_cs1k-webgpu.wasm",
@@ -382,10 +382,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: true,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC/resolve/main/",
       model_id: "Phi-3-mini-4k-instruct-q4f32_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Phi-3-mini-4k-instruct-q4f32_1-ctx1k_cs1k-webgpu.wasm",
@@ -394,10 +394,10 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // Llama-2
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC/resolve/main/",
       model_id: "Llama-2-7b-chat-hf-q4f32_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Llama-2-7b-chat-hf-q4f32_1-ctx1k_cs1k-webgpu.wasm",
@@ -405,10 +405,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC/resolve/main/",
       model_id: "Llama-2-7b-chat-hf-q4f16_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Llama-2-7b-chat-hf-q4f16_1-ctx1k_cs1k-webgpu.wasm",
@@ -417,10 +417,10 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC/resolve/main/",
       model_id: "Llama-2-7b-chat-hf-q4f32_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Llama-2-7b-chat-hf-q4f32_1-ctx4k_cs1k-webgpu.wasm",
@@ -428,10 +428,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC/resolve/main/",
       model_id: "Llama-2-7b-chat-hf-q4f16_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Llama-2-7b-chat-hf-q4f16_1-ctx4k_cs1k-webgpu.wasm",
@@ -440,10 +440,10 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Llama-2-13b-chat-hf-q4f16_1-MLC/resolve/main/",
       model_id: "Llama-2-13b-chat-hf-q4f16_1",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Llama-2-13b-chat-hf-q4f16_1-ctx4k_cs1k-webgpu.wasm",
@@ -453,10 +453,10 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // Mistral variants
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/WizardMath-7B-V1.1-q4f16_1-MLC/resolve/main/",
       model_id: "WizardMath-7B-V1.1-q4f16_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Mistral-7B-Instruct-v0.2-q4f16_1-sw4k_cs1k-webgpu.wasm",
@@ -465,10 +465,10 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Mistral-7B-Instruct-v0.2-q4f16_1-MLC/resolve/main/",
       model_id: "Mistral-7B-Instruct-v0.2-q4f16_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Mistral-7B-Instruct-v0.2-q4f16_1-sw4k_cs1k-webgpu.wasm",
@@ -477,10 +477,10 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/OpenHermes-2.5-Mistral-7B-q4f16_1-MLC/resolve/main/",
       model_id: "OpenHermes-2.5-Mistral-7B-q4f16_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Mistral-7B-Instruct-v0.2-q4f16_1-sw4k_cs1k-webgpu.wasm",
@@ -489,10 +489,10 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC/resolve/main/",
       model_id: "NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Mistral-7B-Instruct-v0.2-q4f16_1-sw4k_cs1k-webgpu.wasm",
@@ -502,10 +502,10 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // Hermes-2 Pro
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC/resolve/main/",
       model_id: "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Llama-3-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
@@ -513,10 +513,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC/resolve/main/",
       model_id: "Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
@@ -524,10 +524,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Hermes-2-Pro-Mistral-7B-q4f16_1-MLC/resolve/main/",
       model_id: "Hermes-2-Pro-Mistral-7B-q4f16_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Mistral-7B-Instruct-v0.2-q4f16_1-sw4k_cs1k-webgpu.wasm",
@@ -537,10 +537,10 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // Gemma-2B
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC/resolve/main/",
       model_id: "gemma-2b-it-q4f16_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/gemma-2b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm",
@@ -550,10 +550,10 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC/resolve/main/",
       model_id: "gemma-2b-it-q4f32_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/gemma-2b-it-q4f32_1-ctx4k_cs1k-webgpu.wasm",
@@ -562,10 +562,10 @@ export const prebuiltAppConfig: AppConfig = {
       buffer_size_required_bytes: 262144000,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC/resolve/main/",
       model_id: "gemma-2b-it-q4f16_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/gemma-2b-it-q4f16_1-ctx1k_cs1k-webgpu.wasm",
@@ -575,10 +575,10 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC/resolve/main/",
       model_id: "gemma-2b-it-q4f32_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/gemma-2b-it-q4f32_1-ctx1k_cs1k-webgpu.wasm",
@@ -588,10 +588,10 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // Qwen-1.5-1.8B
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f16_1-MLC/resolve/main/",
       model_id: "Qwen1.5-1.8B-Chat-q4f16_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Qwen1.5-1.8B-Chat-q4f16_1-ctx4k_cs1k-webgpu.wasm",
@@ -599,10 +599,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f32_1-MLC/resolve/main/",
       model_id: "Qwen1.5-1.8B-Chat-q4f32_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Qwen1.5-1.8B-Chat-q4f32_1-ctx4k_cs1k-webgpu.wasm",
@@ -610,10 +610,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f16_1-MLC/resolve/main/",
       model_id: "Qwen1.5-1.8B-Chat-q4f16_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Qwen1.5-1.8B-Chat-q4f16_1-ctx1k_cs1k-webgpu.wasm",
@@ -621,10 +621,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: true,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f32_1-MLC/resolve/main/",
       model_id: "Qwen1.5-1.8B-Chat-q4f32_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/Qwen1.5-1.8B-Chat-q4f32_1-ctx1k_cs1k-webgpu.wasm",
@@ -633,10 +633,10 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // StableLM-zephyr-1.6B
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC/resolve/main/",
       model_id: "stablelm-2-zephyr-1_6b-q4f16_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/stablelm-2-zephyr-1_6b-q4f16_1-ctx4k_cs1k-webgpu.wasm",
@@ -644,10 +644,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC/resolve/main/",
       model_id: "stablelm-2-zephyr-1_6b-q4f32_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/stablelm-2-zephyr-1_6b-q4f32_1-ctx4k_cs1k-webgpu.wasm",
@@ -655,10 +655,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC/resolve/main/",
       model_id: "stablelm-2-zephyr-1_6b-q4f16_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/stablelm-2-zephyr-1_6b-q4f16_1-ctx1k_cs1k-webgpu.wasm",
@@ -666,10 +666,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: true,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC/resolve/main/",
       model_id: "stablelm-2-zephyr-1_6b-q4f32_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/stablelm-2-zephyr-1_6b-q4f32_1-ctx1k_cs1k-webgpu.wasm",
@@ -678,10 +678,10 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // RedPajama
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC/resolve/main/",
       model_id: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/RedPajama-INCITE-Chat-3B-v1-q4f16_1-ctx2k_cs1k-webgpu.wasm",
@@ -690,10 +690,10 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC/resolve/main/",
       model_id: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/RedPajama-INCITE-Chat-3B-v1-q4f32_1-ctx2k_cs1k-webgpu.wasm",
@@ -701,10 +701,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC/resolve/main/",
       model_id: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/RedPajama-INCITE-Chat-3B-v1-q4f16_1-ctx1k_cs1k-webgpu.wasm",
@@ -713,10 +713,10 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC/resolve/main/",
       model_id: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/RedPajama-INCITE-Chat-3B-v1-q4f32_1-ctx1k_cs1k-webgpu.wasm",
@@ -725,10 +725,9 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // Phi-2
     {
-      model_url:
-        "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC/resolve/main/",
       model_id: "phi-2-q4f16_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/phi-2-q4f16_1-ctx2k_cs1k-webgpu.wasm",
@@ -737,10 +736,9 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model_url:
-        "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC/resolve/main/",
       model_id: "phi-2-q4f32_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/phi-2-q4f32_1-ctx2k_cs1k-webgpu.wasm",
@@ -748,10 +746,9 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model_url:
-        "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC/resolve/main/",
       model_id: "phi-2-q4f16_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/phi-2-q4f16_1-ctx1k_cs1k-webgpu.wasm",
@@ -760,10 +757,9 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model_url:
-        "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC/resolve/main/",
       model_id: "phi-2-q4f32_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/phi-2-q4f32_1-ctx1k_cs1k-webgpu.wasm",
@@ -772,10 +768,9 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // Phi-1.5
     {
-      model_url:
-        "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC/resolve/main/",
       model_id: "phi-1_5-q4f16_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/phi-1_5-q4f16_1-ctx2k_cs1k-webgpu.wasm",
@@ -784,10 +779,9 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model_url:
-        "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC/resolve/main/",
       model_id: "phi-1_5-q4f32_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/phi-1_5-q4f32_1-ctx2k_cs1k-webgpu.wasm",
@@ -795,10 +789,9 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: true,
     },
     {
-      model_url:
-        "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC/resolve/main/",
       model_id: "phi-1_5-q4f16_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/phi-1_5-q4f16_1-ctx1k_cs1k-webgpu.wasm",
@@ -807,10 +800,9 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model_url:
-        "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC/resolve/main/",
       model_id: "phi-1_5-q4f32_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/phi-1_5-q4f32_1-ctx1k_cs1k-webgpu.wasm",
@@ -819,10 +811,10 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // TinyLlama
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC/resolve/main/",
       model_id: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/TinyLlama-1.1B-Chat-v0.4-q4f16_1-ctx2k_cs1k-webgpu.wasm",
@@ -831,10 +823,10 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC/resolve/main/",
       model_id: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/TinyLlama-1.1B-Chat-v0.4-q4f32_1-ctx2k_cs1k-webgpu.wasm",
@@ -842,10 +834,10 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: true,
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC/resolve/main/",
       model_id: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/TinyLlama-1.1B-Chat-v0.4-q4f16_1-ctx1k_cs1k-webgpu.wasm",
@@ -854,10 +846,10 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model_url:
+      model:
         "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC/resolve/main/",
       model_id: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC-1k",
-      model_lib_url:
+      model_lib:
         modelLibURLPrefix +
         modelVersion +
         "/TinyLlama-1.1B-Chat-v0.4-q4f32_1-ctx1k_cs1k-webgpu.wasm",
diff --git a/src/engine.ts b/src/engine.ts
index 316f91f2..de17c23a 100644
--- a/src/engine.ts
+++ b/src/engine.ts
@@ -144,7 +144,7 @@ export class MLCEngine implements MLCEngineInterface {
       typeof document !== "undefined"
         ? document.URL
         : globalThis.location.origin;
-    let modelUrl = modelRecord.model_url;
+    let modelUrl = modelRecord.model;
     if (!modelUrl.startsWith("http")) {
       modelUrl = new URL(modelUrl, baseUrl).href;
     }
@@ -171,12 +171,12 @@ export class MLCEngine implements MLCEngineInterface {
       wasmCache = new tvmjs.ArtifactCache("webllm/wasm");
     }
 
-    const wasmUrl = modelRecord.model_lib_url;
+    const wasmUrl = modelRecord.model_lib;
     if (wasmUrl === undefined) {
       throw Error(
-        'Missing `model_lib_url` for the model with ID "' +
+        'Missing `model_lib` for the model with ID "' +
           modelRecord.model_id +
-          '". Please ensure that `model_lib_url` is provided in `model_list` for each model. This URL is essential for downloading the WASM library necessary to run the model.',
+          '". Please ensure that `model_lib` is provided in `model_list` for each model. This URL is essential for downloading the WASM library necessary to run the model.',
       );
     }
     const fetchWasmSource = async () => {
diff --git a/src/utils.ts b/src/utils.ts
index ad6cfcc0..7e2ce99c 100644
--- a/src/utils.ts
+++ b/src/utils.ts
@@ -35,9 +35,9 @@ export function areModelRecordsEqual(
 ): boolean {
   // Compare primitive fields
   if (
-    record1.model_url !== record2.model_url ||
+    record1.model !== record2.model ||
     record1.model_id !== record2.model_id ||
-    record1.model_lib_url !== record2.model_lib_url ||
+    record1.model_lib !== record2.model_lib ||
     record1.vram_required_MB !== record2.vram_required_MB ||
     record1.low_resource_required !== record2.low_resource_required ||
     record1.buffer_size_required_bytes !== record2.buffer_size_required_bytes
diff --git a/utils/vram_requirements/src/vram_requirements.ts b/utils/vram_requirements/src/vram_requirements.ts
index 14988fcc..0f622ef5 100644
--- a/utils/vram_requirements/src/vram_requirements.ts
+++ b/utils/vram_requirements/src/vram_requirements.ts
@@ -30,7 +30,7 @@ async function main() {
     const modelRecord: ModelRecord = config.model_list[i];
     const model_id = modelRecord.model_id;
     // 2. Load the wasm
-    const wasmUrl = modelRecord.model_lib_url;
+    const wasmUrl = modelRecord.model_lib;
     const wasmSource = await (await fetch(wasmUrl)).arrayBuffer();
     report += `${model_id}: \n`;
     // 3. Initialize tvmjs instance and virtual machine using the wasm

From c5adc8c38b5166700a1f12907a04fcb22479ce35 Mon Sep 17 00:00:00 2001
From: Charlie Ruan <53290280+CharlieFRuan@users.noreply.github.com>
Date: Thu, 30 May 2024 00:26:52 -0400
Subject: [PATCH 3/3] Remove resolve/main from model record input

---
 .../function-calling/src/function_calling.ts  |   2 +-
 examples/get-started/src/get_started.ts       |   2 +-
 src/cache_util.ts                             |  19 ++-
 src/config.ts                                 | 125 +++++++-----------
 src/engine.ts                                 |   3 +-
 src/support.ts                                |  17 +++
 tests/util.test.ts                            |  36 ++++-
 7 files changed, 114 insertions(+), 90 deletions(-)

diff --git a/examples/function-calling/src/function_calling.ts b/examples/function-calling/src/function_calling.ts
index a2dd2b84..b2ef1ac5 100644
--- a/examples/function-calling/src/function_calling.ts
+++ b/examples/function-calling/src/function_calling.ts
@@ -13,7 +13,7 @@ async function main() {
     model_list: [
       {
         model:
-          "https://huggingface.co/mlc-ai/gorilla-openfunctions-v2-q4f16_1-MLC/resolve/main/",
+          "https://huggingface.co/mlc-ai/gorilla-openfunctions-v2-q4f16_1-MLC",
         model_id: "gorilla-openfunctions-v2-q4f16_1",
         model_lib:
           "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/gorilla-openfunctions-v2/gorilla-openfunctions-v2-q4f16_1.wasm",
diff --git a/examples/get-started/src/get_started.ts b/examples/get-started/src/get_started.ts
index 0f0e6476..4e209efc 100644
--- a/examples/get-started/src/get_started.ts
+++ b/examples/get-started/src/get_started.ts
@@ -23,7 +23,7 @@ async function main() {
   // const appConfig: webllm.AppConfig = {
   //   model_list: [
   //     {
-  //       "model": "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/",
+  //       "model": "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC",
   //       "model_id": "Llama-3-8B-Instruct-q4f32_1-MLC",
   //       "model_lib": webllm.modelLibURLPrefix + webllm.modelVersion + "/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
   //     },
diff --git a/src/cache_util.ts b/src/cache_util.ts
index 8aa2cace..2e60f932 100644
--- a/src/cache_util.ts
+++ b/src/cache_util.ts
@@ -1,5 +1,6 @@
 import * as tvmjs from "tvmjs";
 import { AppConfig, ModelRecord, prebuiltAppConfig } from "./config";
+import { cleanModelUrl } from "./support";
 
 function findModelRecord(modelId: string, appConfig?: AppConfig): ModelRecord {
   const matchedItem = appConfig?.model_list.find(
@@ -19,7 +20,7 @@ export async function hasModelInCache(
     appConfig = prebuiltAppConfig;
   }
   const modelRecord = findModelRecord(modelId, appConfig);
-  const modelUrl = modelRecord.model;
+  const modelUrl = cleanModelUrl(modelRecord.model);
   const cacheType = appConfig.useIndexedDBCache ? "indexeddb" : "cache";
   return tvmjs.hasNDArrayInCache(modelUrl, "webllm/model", cacheType);
 }
@@ -49,20 +50,17 @@ export async function deleteModelInCache(
     appConfig = prebuiltAppConfig;
   }
   const modelRecord = findModelRecord(modelId, appConfig);
+  const modelUrl = cleanModelUrl(modelRecord.model);
   let modelCache: tvmjs.ArtifactCacheTemplate;
   if (appConfig.useIndexedDBCache) {
-    tvmjs.deleteNDArrayCache(modelRecord.model, "webllm/model", "indexeddb");
+    tvmjs.deleteNDArrayCache(modelUrl, "webllm/model", "indexeddb");
     modelCache = new tvmjs.ArtifactIndexedDBCache("webllm/model");
   } else {
-    tvmjs.deleteNDArrayCache(modelRecord.model, "webllm/model", "cache");
+    tvmjs.deleteNDArrayCache(modelUrl, "webllm/model", "cache");
     modelCache = new tvmjs.ArtifactCache("webllm/model");
   }
-  await modelCache.deleteInCache(
-    new URL("tokenizer.model", modelRecord.model).href,
-  );
-  await modelCache.deleteInCache(
-    new URL("tokenizer.json", modelRecord.model).href,
-  );
+  await modelCache.deleteInCache(new URL("tokenizer.model", modelUrl).href);
+  await modelCache.deleteInCache(new URL("tokenizer.json", modelUrl).href);
 }
 
 export async function deleteChatConfigInCache(
@@ -80,7 +78,8 @@ export async function deleteChatConfigInCache(
   } else {
     configCache = new tvmjs.ArtifactCache("webllm/config");
   }
-  const configUrl = new URL("mlc-chat-config.json", modelRecord.model).href;
+  const modelUrl = cleanModelUrl(modelRecord.model);
+  const configUrl = new URL("mlc-chat-config.json", modelUrl).href;
   await configCache.deleteInCache(configUrl);
 }
 
diff --git a/src/config.ts b/src/config.ts
index 170a3765..5eb049f9 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -293,8 +293,7 @@ export const prebuiltAppConfig: AppConfig = {
   model_list: [
     // Llama-3
     {
-      model:
-        "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC",
       model_id: "Llama-3-8B-Instruct-q4f32_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -304,8 +303,7 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: true,
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC",
       model_id: "Llama-3-8B-Instruct-q4f16_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -315,8 +313,7 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: true,
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC",
       model_id: "Llama-3-8B-Instruct-q4f32_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -326,8 +323,7 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC",
       model_id: "Llama-3-8B-Instruct-q4f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -337,8 +333,7 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/Llama-3-70B-Instruct-q3f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/Llama-3-70B-Instruct-q3f16_1-MLC",
       model_id: "Llama-3-70B-Instruct-q3f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -349,8 +344,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // Phi3-mini-instruct
     {
-      model:
-        "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC",
       model_id: "Phi-3-mini-4k-instruct-q4f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -360,8 +354,7 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC",
       model_id: "Phi-3-mini-4k-instruct-q4f32_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -371,8 +364,7 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC",
       model_id: "Phi-3-mini-4k-instruct-q4f16_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -382,8 +374,7 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: true,
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC",
       model_id: "Phi-3-mini-4k-instruct-q4f32_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -394,8 +385,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // Llama-2
     {
-      model:
-        "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC",
       model_id: "Llama-2-7b-chat-hf-q4f32_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -405,8 +395,7 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC",
       model_id: "Llama-2-7b-chat-hf-q4f16_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -417,8 +406,7 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC",
       model_id: "Llama-2-7b-chat-hf-q4f32_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -428,8 +416,7 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC",
       model_id: "Llama-2-7b-chat-hf-q4f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -440,8 +427,7 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/Llama-2-13b-chat-hf-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/Llama-2-13b-chat-hf-q4f16_1-MLC",
       model_id: "Llama-2-13b-chat-hf-q4f16_1",
       model_lib:
         modelLibURLPrefix +
@@ -453,8 +439,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // Mistral variants
     {
-      model:
-        "https://huggingface.co/mlc-ai/WizardMath-7B-V1.1-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/WizardMath-7B-V1.1-q4f16_1-MLC",
       model_id: "WizardMath-7B-V1.1-q4f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -466,7 +451,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     {
       model:
-        "https://huggingface.co/mlc-ai/Mistral-7B-Instruct-v0.2-q4f16_1-MLC/resolve/main/",
+        "https://huggingface.co/mlc-ai/Mistral-7B-Instruct-v0.2-q4f16_1-MLC",
       model_id: "Mistral-7B-Instruct-v0.2-q4f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -478,7 +463,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     {
       model:
-        "https://huggingface.co/mlc-ai/OpenHermes-2.5-Mistral-7B-q4f16_1-MLC/resolve/main/",
+        "https://huggingface.co/mlc-ai/OpenHermes-2.5-Mistral-7B-q4f16_1-MLC",
       model_id: "OpenHermes-2.5-Mistral-7B-q4f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -490,7 +475,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     {
       model:
-        "https://huggingface.co/mlc-ai/NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC/resolve/main/",
+        "https://huggingface.co/mlc-ai/NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC",
       model_id: "NeuralHermes-2.5-Mistral-7B-q4f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -503,7 +488,7 @@ export const prebuiltAppConfig: AppConfig = {
     // Hermes-2 Pro
     {
       model:
-        "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC/resolve/main/",
+        "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC",
       model_id: "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -514,7 +499,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     {
       model:
-        "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC/resolve/main/",
+        "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC",
       model_id: "Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -525,7 +510,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     {
       model:
-        "https://huggingface.co/mlc-ai/Hermes-2-Pro-Mistral-7B-q4f16_1-MLC/resolve/main/",
+        "https://huggingface.co/mlc-ai/Hermes-2-Pro-Mistral-7B-q4f16_1-MLC",
       model_id: "Hermes-2-Pro-Mistral-7B-q4f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -537,8 +522,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // Gemma-2B
     {
-      model:
-        "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC",
       model_id: "gemma-2b-it-q4f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -550,8 +534,7 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC",
       model_id: "gemma-2b-it-q4f32_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -562,8 +545,7 @@ export const prebuiltAppConfig: AppConfig = {
       buffer_size_required_bytes: 262144000,
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC",
       model_id: "gemma-2b-it-q4f16_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -575,8 +557,7 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC",
       model_id: "gemma-2b-it-q4f32_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -588,8 +569,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // Qwen-1.5-1.8B
     {
-      model:
-        "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f16_1-MLC",
       model_id: "Qwen1.5-1.8B-Chat-q4f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -599,8 +579,7 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f32_1-MLC",
       model_id: "Qwen1.5-1.8B-Chat-q4f32_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -610,8 +589,7 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f16_1-MLC",
       model_id: "Qwen1.5-1.8B-Chat-q4f16_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -621,8 +599,7 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: true,
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/Qwen1.5-1.8B-Chat-q4f32_1-MLC",
       model_id: "Qwen1.5-1.8B-Chat-q4f32_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -633,8 +610,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // StableLM-zephyr-1.6B
     {
-      model:
-        "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC",
       model_id: "stablelm-2-zephyr-1_6b-q4f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -644,8 +620,7 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC",
       model_id: "stablelm-2-zephyr-1_6b-q4f32_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -655,8 +630,7 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC",
       model_id: "stablelm-2-zephyr-1_6b-q4f16_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -666,8 +640,7 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: true,
     },
     {
-      model:
-        "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC",
       model_id: "stablelm-2-zephyr-1_6b-q4f32_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -679,7 +652,7 @@ export const prebuiltAppConfig: AppConfig = {
     // RedPajama
     {
       model:
-        "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC/resolve/main/",
+        "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC",
       model_id: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -691,7 +664,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     {
       model:
-        "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC/resolve/main/",
+        "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC",
       model_id: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -702,7 +675,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     {
       model:
-        "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC/resolve/main/",
+        "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC",
       model_id: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -714,7 +687,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     {
       model:
-        "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC/resolve/main/",
+        "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC",
       model_id: "RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -725,7 +698,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // Phi-2
     {
-      model: "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC",
       model_id: "phi-2-q4f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -736,7 +709,7 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model: "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC",
       model_id: "phi-2-q4f32_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -746,7 +719,7 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: false,
     },
     {
-      model: "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC",
       model_id: "phi-2-q4f16_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -757,7 +730,7 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model: "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC",
       model_id: "phi-2-q4f32_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -768,7 +741,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     // Phi-1.5
     {
-      model: "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC",
       model_id: "phi-1_5-q4f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -779,7 +752,7 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model: "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC",
       model_id: "phi-1_5-q4f32_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -789,7 +762,7 @@ export const prebuiltAppConfig: AppConfig = {
       low_resource_required: true,
     },
     {
-      model: "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC",
       model_id: "phi-1_5-q4f16_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -800,7 +773,7 @@ export const prebuiltAppConfig: AppConfig = {
       required_features: ["shader-f16"],
     },
     {
-      model: "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC/resolve/main/",
+      model: "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC",
       model_id: "phi-1_5-q4f32_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -812,7 +785,7 @@ export const prebuiltAppConfig: AppConfig = {
     // TinyLlama
     {
       model:
-        "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC/resolve/main/",
+        "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC",
       model_id: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -824,7 +797,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     {
       model:
-        "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC/resolve/main/",
+        "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC",
       model_id: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC",
       model_lib:
         modelLibURLPrefix +
@@ -835,7 +808,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     {
       model:
-        "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC/resolve/main/",
+        "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC",
       model_id: "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
@@ -847,7 +820,7 @@ export const prebuiltAppConfig: AppConfig = {
     },
     {
       model:
-        "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC/resolve/main/",
+        "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC",
       model_id: "TinyLlama-1.1B-Chat-v0.4-q4f32_1-MLC-1k",
       model_lib:
         modelLibURLPrefix +
diff --git a/src/engine.ts b/src/engine.ts
index de17c23a..66785ecc 100644
--- a/src/engine.ts
+++ b/src/engine.ts
@@ -39,6 +39,7 @@ import {
   compareConversationObject,
   getConversation,
 } from "./conversation";
+import { cleanModelUrl } from "./support";
 
 const ERROR_WEBGPU_NOT_AVAILABLE = new Error(
   "WebGPU is not supported in your current environment, but it is necessary to run the WebLLM engine. " +
@@ -144,7 +145,7 @@ export class MLCEngine implements MLCEngineInterface {
       typeof document !== "undefined"
         ? document.URL
         : globalThis.location.origin;
-    let modelUrl = modelRecord.model;
+    let modelUrl = cleanModelUrl(modelRecord.model);
     if (!modelUrl.startsWith("http")) {
       modelUrl = new URL(modelUrl, baseUrl).href;
     }
diff --git a/src/support.ts b/src/support.ts
index 8502283a..6e2961df 100644
--- a/src/support.ts
+++ b/src/support.ts
@@ -63,3 +63,20 @@ export function getTokenTableFromTokenizer(tokenizer: Tokenizer): string[] {
   }
   return tokenTable;
 }
+
+/**
+ * Postprocess the suffix of ModelRecord.model to be "/resolve/main/".
+ * e.g. https://huggingface.co/mlc-ai/OpenHermes-2.5-Mistral-7B-q4f16_1-MLC/resolve/main/
+ * @return the href of the final URL.
+ */
+export function cleanModelUrl(modelUrl: string): string {
+  if (modelUrl.endsWith("resolve/main") || modelUrl.endsWith("resolve/main/")) {
+    throw Error(
+      "Expect ModelRecord.model to not include `resolve/main` suffix.",
+    );
+  }
+  // https://huggingface.co/USER/MODEL -> https://huggingface.co/USER/MODEL/
+  modelUrl += modelUrl.endsWith("/") ? "" : "/";
+  // https://huggingface.co/USER/MODEL/ -> https://huggingface.co/USER/MODEL/resolve/main/
+  return new URL("resolve/main/", modelUrl).href;
+}
diff --git a/tests/util.test.ts b/tests/util.test.ts
index fe91d86e..69893ff3 100644
--- a/tests/util.test.ts
+++ b/tests/util.test.ts
@@ -1,4 +1,4 @@
-import { getTopProbs } from "../src/support";
+import { cleanModelUrl, getTopProbs } from "../src/support";
 
 describe("Check getTopLogprobs correctness", () => {
   test("Correctness test 1", () => {
@@ -26,3 +26,37 @@ describe("Check getTopLogprobs correctness", () => {
     expect(topLogProbs).toEqual([]);
   });
 });
+
+describe("Test clean model URL", () => {
+  test("Already have resolve/main, throw error", () => {
+    expect(() => {
+      const input = "https://huggingface.co/mlc-ai/model/resolve/main";
+      cleanModelUrl(input);
+    }).toThrow(
+      "Expect ModelRecord.model to not include `resolve/main` suffix.",
+    );
+  });
+
+  test("Already have resolve/main/, throw error", () => {
+    expect(() => {
+      const input = "https://huggingface.co/mlc-ai/model/resolve/main/";
+      cleanModelUrl(input);
+    }).toThrow(
+      "Expect ModelRecord.model to not include `resolve/main` suffix.",
+    );
+  });
+
+  test("Input does not have /", () => {
+    const input = "https://huggingface.co/mlc-ai/model";
+    const output = cleanModelUrl(input);
+    const expected = "https://huggingface.co/mlc-ai/model/resolve/main/";
+    expect(output).toEqual(expected);
+  });
+
+  test("Input has /", () => {
+    const input = "https://huggingface.co/mlc-ai/model/";
+    const output = cleanModelUrl(input);
+    const expected = "https://huggingface.co/mlc-ai/model/resolve/main/";
+    expect(output).toEqual(expected);
+  });
+});