diff --git a/examples/vision-model/README.md b/examples/vision-model/README.md
new file mode 100644
index 00000000..7450aad8
--- /dev/null
+++ b/examples/vision-model/README.md
@@ -0,0 +1,14 @@
+# WebLLM Get Started App
+
+This folder provides a minimum demo to show WebLLM API in a webapp setting.
+To try it out, you can do the following steps under this folder
+
+```bash
+npm install
+npm start
+```
+
+Note if you would like to hack WebLLM core package.
+You can change web-llm dependencies as `"file:../.."`, and follow the build from source
+instruction in the project to build webllm locally. This option is only recommended
+if you would like to hack WebLLM core package.
diff --git a/examples/vision-model/package.json b/examples/vision-model/package.json
new file mode 100644
index 00000000..7950e36b
--- /dev/null
+++ b/examples/vision-model/package.json
@@ -0,0 +1,20 @@
+{
+  "name": "get-started",
+  "version": "0.1.0",
+  "private": true,
+  "scripts": {
+    "start": "parcel src/vision_model.html  --port 8888",
+    "build": "parcel build src/vision_model.html --dist-dir lib"
+  },
+  "devDependencies": {
+    "buffer": "^5.7.1",
+    "parcel": "^2.8.3",
+    "process": "^0.11.10",
+    "tslib": "^2.3.1",
+    "typescript": "^4.9.5",
+    "url": "^0.11.3"
+  },
+  "dependencies": {
+    "@mlc-ai/web-llm": "file:../.."
+  }
+}
diff --git a/examples/vision-model/src/vision_model.html b/examples/vision-model/src/vision_model.html
new file mode 100644
index 00000000..fb50b530
--- /dev/null
+++ b/examples/vision-model/src/vision_model.html
@@ -0,0 +1,23 @@
+<!doctype html>
+<html>
+  <script>
+    webLLMGlobal = {};
+  </script>
+  <body>
+    <h2>WebLLM Test Page</h2>
+    Open console to see output
+    <br />
+    <br />
+    <label id="init-label"> </label>
+
+    <h3>Prompt</h3>
+    <label id="prompt-label"> </label>
+
+    <h3>Response</h3>
+    <label id="generate-label"> </label>
+    <br />
+    <label id="stats-label"> </label>
+
+    <script type="module" src="./vision_model.ts"></script>
+  </body>
+</html>
diff --git a/examples/vision-model/src/vision_model.ts b/examples/vision-model/src/vision_model.ts
new file mode 100644
index 00000000..411e4a70
--- /dev/null
+++ b/examples/vision-model/src/vision_model.ts
@@ -0,0 +1,94 @@
+import * as webllm from "@mlc-ai/web-llm";
+
+function setLabel(id: string, text: string) {
+  const label = document.getElementById(id);
+  if (label == null) {
+    throw Error("Cannot find label " + id);
+  }
+  label.innerText = text;
+}
+
+async function main() {
+  const initProgressCallback = (report: webllm.InitProgressReport) => {
+    setLabel("init-label", report.text);
+  };
+  const selectedModel = "Phi-3.5-vision-instruct-q4f16_1-MLC";
+  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
+    selectedModel,
+    {
+      initProgressCallback: initProgressCallback,
+      logLevel: "INFO", // specify the log level
+    },
+  );
+
+  // 1. Single image input (with choices)
+  const messages: webllm.ChatCompletionMessageParam[] = [
+    {
+      role: "system",
+      content:
+        "You are a helpful and honest assistant that answers question concisely.",
+    },
+    {
+      role: "user",
+      content: [
+        { type: "text", text: "List the items in the image concisely." },
+        {
+          type: "image_url",
+          image_url: {
+            url: "https://www.ilankelman.org/stopsigns/australia.jpg",
+          },
+        },
+      ],
+    },
+  ];
+  const request0: webllm.ChatCompletionRequest = {
+    stream: false, // can be streaming, same behavior
+    messages: messages,
+  };
+  const reply0 = await engine.chat.completions.create(request0);
+  const replyMessage0 = await engine.getMessage();
+  console.log(reply0);
+  console.log(replyMessage0);
+  console.log(reply0.usage);
+
+  // 2. A follow up text-only question
+  messages.push({ role: "assistant", content: replyMessage0 });
+  messages.push({ role: "user", content: "What is special about this image?" });
+  const request1: webllm.ChatCompletionRequest = {
+    stream: false, // can be streaming, same behavior
+    messages: messages,
+  };
+  const reply1 = await engine.chat.completions.create(request1);
+  const replyMessage1 = await engine.getMessage();
+  console.log(reply1);
+  console.log(replyMessage1);
+  console.log(reply1.usage);
+
+  // 3. A follow up multi-image question
+  messages.push({ role: "assistant", content: replyMessage1 });
+  messages.push({
+    role: "user",
+    content: [
+      { type: "text", text: "What about these two images? Answer concisely." },
+      {
+        type: "image_url",
+        image_url: { url: "https://www.ilankelman.org/eiffeltower.jpg" },
+      },
+      {
+        type: "image_url",
+        image_url: { url: "https://www.ilankelman.org/sunset.jpg" },
+      },
+    ],
+  });
+  const request2: webllm.ChatCompletionRequest = {
+    stream: false, // can be streaming, same behavior
+    messages: messages,
+  };
+  const reply2 = await engine.chat.completions.create(request2);
+  const replyMessage2 = await engine.getMessage();
+  console.log(reply2);
+  console.log(replyMessage2);
+  console.log(reply2.usage);
+}
+
+main();
diff --git a/src/config.ts b/src/config.ts
index 2e7430d1..6184dc37 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -229,6 +229,7 @@ export function postInitAndCheckGenerationConfigValues(
 export enum ModelType {
   "LLM",
   "embedding",
+  "VLM", // vision-language model
 }
 
 /**
@@ -512,6 +513,37 @@ export const prebuiltAppConfig: AppConfig = {
         context_window_size: 1024,
       },
     },
+    // Phi-3.5-vision-instruct
+    {
+      model:
+        "https://huggingface.co/mlc-ai/Phi-3.5-vision-instruct-q4f16_1-MLC",
+      model_id: "Phi-3.5-vision-instruct-q4f16_1-MLC",
+      model_lib:
+        modelLibURLPrefix +
+        modelVersion +
+        "/Phi-3.5-vision-instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
+      vram_required_MB: 3952.18,
+      low_resource_required: true,
+      overrides: {
+        context_window_size: 4096,
+      },
+      model_type: ModelType.VLM,
+    },
+    {
+      model:
+        "https://huggingface.co/mlc-ai/Phi-3.5-vision-instruct-q4f32_1-MLC",
+      model_id: "Phi-3.5-vision-instruct-q4f32_1-MLC",
+      model_lib:
+        modelLibURLPrefix +
+        modelVersion +
+        "/Phi-3.5-vision-instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
+      vram_required_MB: 5879.84,
+      low_resource_required: true,
+      overrides: {
+        context_window_size: 4096,
+      },
+      model_type: ModelType.VLM,
+    },
     // Mistral variants
     {
       model:
diff --git a/src/conversation.ts b/src/conversation.ts
index 916f2a22..5fa0dd83 100644
--- a/src/conversation.ts
+++ b/src/conversation.ts
@@ -139,7 +139,7 @@ export class Conversation {
    */
   getPromptArrayLastRound() {
     if (this.isTextCompletion) {
-      throw new TextCompletionConversationError("getPromptyArrayLastRound");
+      throw new TextCompletionConversationError("getPromptArrayLastRound");
     }
     if (this.messages.length < 3) {
       throw Error("needs to call getPromptArray for the first message");
@@ -346,7 +346,7 @@ export function getConversationFromChatCompletionRequest(
  * encounter invalid request.
  *
  * @param request The chatCompletionRequest we are about to prefill for.
- * @returns The string used to set Conversatoin.function_string
+ * @returns The string used to set Conversation.function_string
  */
 export function getFunctionCallUsage(request: ChatCompletionRequest): string {
   if (
diff --git a/src/engine.ts b/src/engine.ts
index f201cbec..5a57325c 100644
--- a/src/engine.ts
+++ b/src/engine.ts
@@ -119,6 +119,8 @@ export class MLCEngine implements MLCEngineInterface {
   >;
   /** Maps each loaded model's modelId to its chatConfig */
   private loadedModelIdToChatConfig: Map<string, ChatConfig>;
+  /** Maps each loaded model's modelId to its modelType */
+  private loadedModelIdToModelType: Map<string, ModelType>;
   /** Maps each loaded model's modelId to a lock. Ensures
    * each model only processes one request at at time.
    */
@@ -141,6 +143,7 @@ export class MLCEngine implements MLCEngineInterface {
       LLMChatPipeline | EmbeddingPipeline
     >();
     this.loadedModelIdToChatConfig = new Map<string, ChatConfig>();
+    this.loadedModelIdToModelType = new Map<string, ModelType>();
     this.loadedModelIdToLock = new Map<string, CustomLock>();
     this.appConfig = engineConfig?.appConfig || prebuiltAppConfig;
     this.setLogLevel(engineConfig?.logLevel || DefaultLogLevel);
@@ -239,6 +242,7 @@ export class MLCEngine implements MLCEngineInterface {
     const logitProcessor = this.logitProcessorRegistry?.get(modelId);
     const tstart = performance.now();
 
+    // look up and parse model record, record model type
     const modelRecord = findModelRecord(modelId, this.appConfig);
     const baseUrl =
       typeof document !== "undefined"
@@ -248,7 +252,13 @@ export class MLCEngine implements MLCEngineInterface {
     if (!modelUrl.startsWith("http")) {
       modelUrl = new URL(modelUrl, baseUrl).href;
     }
+    const modelType =
+      modelRecord.model_type === undefined || modelRecord.model_type === null
+        ? ModelType.LLM
+        : modelRecord.model_type;
+    this.loadedModelIdToModelType.set(modelId, modelType);
 
+    // instantiate cache
     let configCache: tvmjs.ArtifactCacheTemplate;
     if (this.appConfig.useIndexedDBCache) {
       configCache = new tvmjs.ArtifactIndexedDBCache("webllm/config");
@@ -409,6 +419,7 @@ export class MLCEngine implements MLCEngineInterface {
     }
     this.loadedModelIdToPipeline.clear();
     this.loadedModelIdToChatConfig.clear();
+    this.loadedModelIdToModelType.clear();
     this.loadedModelIdToLock.clear();
     this.deviceLostIsError = true;
     if (this.reloadController) {
@@ -737,7 +748,13 @@ export class MLCEngine implements MLCEngineInterface {
     // 0. Check model loaded and preprocess inputs
     const [selectedModelId, selectedPipeline, selectedChatConfig] =
       this.getLLMStates("ChatCompletionRequest", request.model);
-    API.postInitAndCheckFieldsChatCompletion(request, selectedModelId);
+    const selectedModelType =
+      this.loadedModelIdToModelType.get(selectedModelId);
+    API.postInitAndCheckFieldsChatCompletion(
+      request,
+      selectedModelId,
+      selectedModelType!,
+    );
     const genConfig: GenerationConfig = {
       frequency_penalty: request.frequency_penalty,
       presence_penalty: request.presence_penalty,
diff --git a/src/error.ts b/src/error.ts
index ae09a90f..496a9dfc 100644
--- a/src/error.ts
+++ b/src/error.ts
@@ -124,19 +124,39 @@ export class ContentTypeError extends Error {
   }
 }
 
-export class UserMessageContentError extends Error {
-  constructor(content: any) {
+export class UnsupportedRoleError extends Error {
+  constructor(role: string) {
+    super(`Unsupported role of message: ${role}`);
+    this.name = "UnsupportedRoleError";
+  }
+}
+
+export class UserMessageContentErrorForNonVLM extends Error {
+  constructor(modelId: string, modelType: string, content: any) {
     super(
-      `User message only supports string content for now, but received: ${content}`,
+      `The model loaded is not of type ModelType.VLM (vision-language model). ` +
+        `Therefore, user message only supports string content, but received: ${content}\n` +
+        `Loaded modelId: ${modelId}, modelType: ${modelType}`,
     );
-    this.name = "UserMessageContentError";
+    this.name = "UserMessageContentErrorForNonVLM";
   }
 }
 
-export class UnsupportedRoleError extends Error {
-  constructor(role: string) {
-    super(`Unsupported role of message: ${role}`);
-    this.name = "UnsupportedRoleError";
+export class UnsupportedDetailError extends Error {
+  constructor(detail: string) {
+    super(
+      `Currently do not support field image_url.detail, but received: ${detail}`,
+    );
+    this.name = "UnsupportedDetailError";
+  }
+}
+
+export class MultipleTextContentError extends Error {
+  constructor(numTextContent: number) {
+    super(
+      `Each message can have at most one text contentPart, but received: ${numTextContent}`,
+    );
+    this.name = "MultipleTextContentError";
   }
 }
 
diff --git a/src/openai_api_protocols/chat_completion.ts b/src/openai_api_protocols/chat_completion.ts
index 492b869f..c0c927d6 100644
--- a/src/openai_api_protocols/chat_completion.ts
+++ b/src/openai_api_protocols/chat_completion.ts
@@ -16,7 +16,11 @@
  */
 
 import { MLCEngineInterface } from "../types";
-import { functionCallingModelIds, MessagePlaceholders } from "../config";
+import {
+  functionCallingModelIds,
+  MessagePlaceholders,
+  ModelType,
+} from "../config";
 import {
   officialHermes2FunctionCallSchemaArray,
   hermes2FunctionCallingSystemPrompt,
@@ -27,12 +31,14 @@ import {
   InvalidResponseFormatError,
   InvalidStreamOptionsError,
   MessageOrderError,
+  MultipleTextContentError,
   SeedTypeError,
   StreamingCountError,
   SystemMessageOrderError,
+  UnsupportedDetailError,
   UnsupportedFieldsError,
   UnsupportedModelIdError,
-  UserMessageContentError,
+  UserMessageContentErrorForNonVLM,
 } from "../error";
 
 /* eslint-disable @typescript-eslint/no-namespace */
@@ -371,10 +377,12 @@ export const ChatCompletionRequestUnsupportedFields: Array<string> = []; // all
  * error or in-place update request.
  * @param request User's input request.
  * @param currentModelId The current model loaded that will perform this request.
+ * @param currentModelType The type of the model loaded, decide what requests can be handled.
  */
 export function postInitAndCheckFields(
   request: ChatCompletionRequest,
   currentModelId: string,
+  currentModelType: ModelType,
 ): void {
   // Generation-related checks and post inits are in `postInitAndCheckGenerationConfigValues()`
   // 1. Check unsupported fields in request
@@ -391,10 +399,33 @@ export function postInitAndCheckFields(
   // 2. Check unsupported messages
   request.messages.forEach(
     (message: ChatCompletionMessageParam, index: number) => {
+      // Check content array messages (that are not simple string)
       if (message.role === "user" && typeof message.content !== "string") {
-        // ChatCompletionUserMessageParam
-        // Remove this when we support image input
-        throw new UserMessageContentError(message.content);
+        if (currentModelType !== ModelType.VLM) {
+          // Only VLM can handle non-string content (i.e. message with image)
+          throw new UserMessageContentErrorForNonVLM(
+            currentModelId,
+            ModelType[currentModelType],
+            message.content,
+          );
+        }
+        let numTextContent = 0;
+        for (let i = 0; i < message.content.length; i++) {
+          const curContent = message.content[i];
+          if (curContent.type === "image_url") {
+            // Do not support image_url.detail
+            const detail = curContent.image_url.detail;
+            if (detail !== undefined && detail !== null) {
+              throw new UnsupportedDetailError(detail);
+            }
+          } else {
+            numTextContent += 1;
+          }
+        }
+        if (numTextContent > 1) {
+          // Only one text contentPart per message
+          throw new MultipleTextContentError(numTextContent);
+        }
       }
       if (message.role === "system" && index !== 0) {
         throw new SystemMessageOrderError();
diff --git a/tests/openai_chat_completion.test.ts b/tests/openai_chat_completion.test.ts
index f7176650..f8dcf27f 100644
--- a/tests/openai_chat_completion.test.ts
+++ b/tests/openai_chat_completion.test.ts
@@ -8,7 +8,7 @@ import {
   hermes2FunctionCallingSystemPrompt,
   officialHermes2FunctionCallSchemaArray,
 } from "../src/support";
-import { MessagePlaceholders } from "../src/config";
+import { MessagePlaceholders, ModelType } from "../src/config";
 import { describe, expect, test } from "@jest/globals";
 
 describe("Check chat completion unsupported requests", () => {
@@ -18,7 +18,11 @@ describe("Check chat completion unsupported requests", () => {
         messages: [{ role: "user", content: "Hello! " }],
         stream_options: { include_usage: true },
       };
-      postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC");
+      postInitAndCheckFields(
+        request,
+        "Llama-3.1-8B-Instruct-q4f32_1-MLC",
+        ModelType.LLM,
+      );
     }).toThrow("Only specify stream_options when stream=True.");
   });
 
@@ -29,7 +33,11 @@ describe("Check chat completion unsupported requests", () => {
         messages: [{ role: "user", content: "Hello! " }],
         stream_options: { include_usage: true },
       };
-      postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC");
+      postInitAndCheckFields(
+        request,
+        "Llama-3.1-8B-Instruct-q4f32_1-MLC",
+        ModelType.LLM,
+      );
     }).toThrow("Only specify stream_options when stream=True.");
   });
 
@@ -42,7 +50,11 @@ describe("Check chat completion unsupported requests", () => {
           { role: "assistant", content: "Hello! How may I help you today?" },
         ],
       };
-      postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC");
+      postInitAndCheckFields(
+        request,
+        "Llama-3.1-8B-Instruct-q4f32_1-MLC",
+        ModelType.LLM,
+      );
     }).toThrow("Last message should be from either `user` or `tool`.");
   });
 
@@ -56,7 +68,11 @@ describe("Check chat completion unsupported requests", () => {
           { role: "system", content: "You are a helpful assistant." },
         ],
       };
-      postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC");
+      postInitAndCheckFields(
+        request,
+        "Llama-3.1-8B-Instruct-q4f32_1-MLC",
+        ModelType.LLM,
+      );
     }).toThrow(
       "System prompt should always be the first message in `messages`.",
     );
@@ -69,7 +85,11 @@ describe("Check chat completion unsupported requests", () => {
         n: 2,
         messages: [{ role: "user", content: "Hello! " }],
       };
-      postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC");
+      postInitAndCheckFields(
+        request,
+        "Llama-3.1-8B-Instruct-q4f32_1-MLC",
+        ModelType.LLM,
+      );
     }).toThrow("When streaming, `n` cannot be > 1.");
   });
 
@@ -80,7 +100,11 @@ describe("Check chat completion unsupported requests", () => {
         max_tokens: 10,
         seed: 42.2, // Note that Number.isInteger(42.0) is true
       };
-      postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC");
+      postInitAndCheckFields(
+        request,
+        "Llama-3.1-8B-Instruct-q4f32_1-MLC",
+        ModelType.LLM,
+      );
     }).toThrow("`seed` should be an integer, but got");
   });
 
@@ -90,14 +114,17 @@ describe("Check chat completion unsupported requests", () => {
         messages: [{ role: "user", content: "Hello! " }],
         response_format: { schema: "some json schema" },
       };
-      postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC");
+      postInitAndCheckFields(
+        request,
+        "Llama-3.1-8B-Instruct-q4f32_1-MLC",
+        ModelType.LLM,
+      );
     }).toThrow(
       "JSON schema is only supported with `json_object` response format.",
     );
   });
 
-  // Remove when we support image input (e.g. LlaVA model)
-  test("Image input is unsupported", () => {
+  test("image_url.detail is unsupported", () => {
     expect(() => {
       const request: ChatCompletionRequest = {
         messages: [
@@ -107,15 +134,79 @@ describe("Check chat completion unsupported requests", () => {
               { type: "text", text: "What is in this image?" },
               {
                 type: "image_url",
-                image_url: { url: "https://url_here.jpg" },
+                image_url: {
+                  url: "https://url_here.jpg",
+                  detail: "high",
+                },
               },
             ],
           },
         ],
       };
-      postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC");
+      postInitAndCheckFields(
+        request,
+        "Phi-3.5-vision-instruct-q4f16_1-MLC",
+        ModelType.VLM,
+      );
     }).toThrow(
-      "User message only supports string content for now, but received:",
+      "Currently do not support field image_url.detail, but received: high",
+    );
+  });
+
+  test("User content cannot have multiple text content parts", () => {
+    expect(() => {
+      const request: ChatCompletionRequest = {
+        messages: [
+          {
+            role: "user",
+            content: [
+              { type: "text", text: "What is in this image?" },
+              {
+                type: "image_url",
+                image_url: {
+                  url: "https://url_here.jpg",
+                },
+              },
+              { type: "text", text: "Thank you." },
+            ],
+          },
+        ],
+      };
+      postInitAndCheckFields(
+        request,
+        "Phi-3.5-vision-instruct-q4f16_1-MLC",
+        ModelType.VLM,
+      );
+    }).toThrow(
+      "Each message can have at most one text contentPart, but received: 2",
+    );
+  });
+
+  test("Non-VLM cannot support non-string content", () => {
+    expect(() => {
+      const request: ChatCompletionRequest = {
+        messages: [
+          {
+            role: "user",
+            content: [
+              { type: "text", text: "What is in this image?" },
+              {
+                type: "image_url",
+                image_url: {
+                  url: "https://url_here.jpg",
+                },
+              },
+            ],
+          },
+        ],
+      };
+      postInitAndCheckFields(
+        request,
+        "Llama-3.1-8B-Instruct-q4f32_1-MLC",
+        ModelType.LLM,
+      );
+    }).toThrow(
+      "The model loaded is not of type ModelType.VLM (vision-language model).",
     );
   });
 });
@@ -142,7 +233,37 @@ describe("Supported requests", () => {
         "7660": 5,
       },
     };
-    postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC");
+    postInitAndCheckFields(
+      request,
+      "Llama-3.1-8B-Instruct-q4f32_1-MLC",
+      ModelType.LLM,
+    );
+  });
+
+  test("Support image input, single or multiple images", () => {
+    const request: ChatCompletionRequest = {
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "text", text: "What is in this image?" },
+            {
+              type: "image_url",
+              image_url: { url: "https://url_here1.jpg" },
+            },
+            {
+              type: "image_url",
+              image_url: { url: "https://url_here2.jpg" },
+            },
+          ],
+        },
+      ],
+    };
+    postInitAndCheckFields(
+      request,
+      "Phi-3.5-vision-instruct-q4f16_1-MLC",
+      ModelType.VLM,
+    );
   });
 });
 
@@ -167,7 +288,11 @@ describe("Manual function calling", () => {
         },
       ],
     };
-    postInitAndCheckFields(request, "Hermes-2-Theta-Llama-3-8B-q4f16_1-MLC");
+    postInitAndCheckFields(
+      request,
+      "Hermes-2-Theta-Llama-3-8B-q4f16_1-MLC",
+      ModelType.LLM,
+    );
   });
 });
 
@@ -204,7 +329,11 @@ describe("OpenAI API function calling", () => {
           },
         ],
       };
-      postInitAndCheckFields(request, "Llama-3.1-8B-Instruct-q4f32_1-MLC");
+      postInitAndCheckFields(
+        request,
+        "Llama-3.1-8B-Instruct-q4f32_1-MLC",
+        ModelType.LLM,
+      );
     }).toThrow(
       "Llama-3.1-8B-Instruct-q4f32_1-MLC is not supported for ChatCompletionRequest.tools.",
     );
@@ -222,7 +351,11 @@ describe("OpenAI API function calling", () => {
         ],
         response_format: { type: "json_object" },
       };
-      postInitAndCheckFields(request, "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC");
+      postInitAndCheckFields(
+        request,
+        "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC",
+        ModelType.LLM,
+      );
     }).toThrow(
       "When using Hermes-2-Pro function calling via ChatCompletionRequest.tools, " +
         "cannot specify customized response_format. We will set it for you internally.",
@@ -244,7 +377,11 @@ describe("OpenAI API function calling", () => {
           },
         ],
       };
-      postInitAndCheckFields(request, "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC");
+      postInitAndCheckFields(
+        request,
+        "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC",
+        ModelType.LLM,
+      );
     }).toThrow(
       "When using Hermes-2-Pro function calling via ChatCompletionRequest.tools, cannot " +
         "specify customized system prompt.",
@@ -266,7 +403,11 @@ describe("OpenAI API function calling", () => {
           },
         ],
       };
-      postInitAndCheckFields(request, "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC");
+      postInitAndCheckFields(
+        request,
+        "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC",
+        ModelType.LLM,
+      );
     }).toThrow(
       "When using Hermes-2-Pro function calling via ChatCompletionRequest.tools, cannot " +
         "specify customized system prompt.",
@@ -283,7 +424,11 @@ describe("OpenAI API function calling", () => {
         },
       ],
     };
-    postInitAndCheckFields(request, "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC");
+    postInitAndCheckFields(
+      request,
+      "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC",
+      ModelType.LLM,
+    );
     expect(request.messages[0].role).toEqual("system");
     expect(request.messages[0].content).toEqual(
       hermes2FunctionCallingSystemPrompt.replace(