diff --git a/package-lock.json b/package-lock.json
index e33d27da..70a66922 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -8,6 +8,9 @@
       "name": "@mlc-ai/web-llm",
       "version": "0.2.38",
       "license": "Apache-2.0",
+      "dependencies": {
+        "loglevel": "^1.9.1"
+      },
       "devDependencies": {
         "@mlc-ai/web-tokenizers": "^0.1.3",
         "@rollup/plugin-commonjs": "^20.0.0",
@@ -6116,6 +6119,18 @@
       "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==",
       "dev": true
     },
+    "node_modules/loglevel": {
+      "version": "1.9.1",
+      "resolved": "https://registry.npmjs.org/loglevel/-/loglevel-1.9.1.tgz",
+      "integrity": "sha512-hP3I3kCrDIMuRwAwHltphhDM1r8i55H33GgqjXbrisuJhF4kRhW1dNuxsRklp4bXl8DSdLaNLuiL4A/LWRfxvg==",
+      "engines": {
+        "node": ">= 0.6.0"
+      },
+      "funding": {
+        "type": "tidelift",
+        "url": "https://tidelift.com/funding/github/npm/loglevel"
+      }
+    },
     "node_modules/lru-cache": {
       "version": "6.0.0",
       "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz",
diff --git a/package.json b/package.json
index df4a331e..1184654c 100644
--- a/package.json
+++ b/package.json
@@ -51,5 +51,8 @@
     "tslib": "^2.3.1",
     "tvmjs": "file:./tvm_home/web",
     "typescript": "^4.9.5"
+  },
+  "dependencies": {
+    "loglevel": "^1.9.1"
   }
 }
diff --git a/src/config.ts b/src/config.ts
index 0a5b3fd1..b758baf7 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -1,7 +1,7 @@
 /* eslint-disable @typescript-eslint/no-non-null-assertion */
-
+import log from "loglevel";
 import { ResponseFormat } from "./openai_api_protocols";
-import { LogitProcessor, InitProgressCallback } from "./types";
+import { LogitProcessor, InitProgressCallback, LogLevel } from "./types";
 
 /**
  * Conversation template config
@@ -26,6 +26,8 @@ export enum Role {
   assistant = "assistant",
 }
 
+export const DefaultLogLevel: LogLevel = "WARN";
+
 /**
  * Place holders that can be used in role templates.
  * For example, a role template of
@@ -91,6 +93,7 @@ export interface MLCEngineConfig {
   appConfig?: AppConfig;
   initProgressCallback?: InitProgressCallback;
   logitProcessorRegistry?: Map<string, LogitProcessor>;
+  logLevel: LogLevel;
 }
 
 /**
@@ -167,16 +170,14 @@ export function postInitAndCheckGenerationConfigValues(
     !_hasValue(config.presence_penalty)
   ) {
     config.presence_penalty = 0.0;
-    console.log(
-      "Only frequency_penalty is set; we default presence_penaty to 0.",
-    );
+    log.warn("Only frequency_penalty is set; we default presence_penaty to 0.");
   }
   if (
     _hasValue(config.presence_penalty) &&
     !_hasValue(config.frequency_penalty)
   ) {
     config.frequency_penalty = 0.0;
-    console.log(
+    log.warn(
       "Only presence_penalty is set; we default frequency_penalty to 0.",
     );
   }
diff --git a/src/engine.ts b/src/engine.ts
index 3c76a721..65823ea7 100644
--- a/src/engine.ts
+++ b/src/engine.ts
@@ -1,4 +1,5 @@
 import * as tvmjs from "tvmjs";
+import log from "loglevel";
 import { Tokenizer } from "@mlc-ai/web-tokenizers";
 import * as API from "./openai_api_protocols/apis";
 import {
@@ -10,6 +11,7 @@ import {
   postInitAndCheckGenerationConfigValues,
   Role,
   MLCEngineConfig,
+  DefaultLogLevel,
 } from "./config";
 import { LLMChatPipeline } from "./llm_chat";
 import {
@@ -30,6 +32,7 @@ import {
   MLCEngineInterface,
   GenerateProgressCallback,
   LogitProcessor,
+  LogLevel,
 } from "./types";
 import {
   Conversation,
@@ -61,6 +64,7 @@ export async function CreateMLCEngine(
   engineConfig?: MLCEngineConfig,
 ): Promise<MLCEngine> {
   const engine = new MLCEngine();
+  engine.setLogLevel(engineConfig?.logLevel || DefaultLogLevel);
   engine.setInitProgressCallback(engineConfig?.initProgressCallback);
   engine.setLogitProcessorRegistry(engineConfig?.logitProcessorRegistry);
   await engine.reload(modelId, engineConfig?.chatOpts, engineConfig?.appConfig);
@@ -76,7 +80,7 @@ export class MLCEngine implements MLCEngineInterface {
   public chat: API.Chat;
 
   private currentModelId?: string = undefined; // Model current loaded, undefined if nothing is loaded
-  private logger: (msg: string) => void = console.log;
+  private logger: (msg: string) => void = log.info;
   private logitProcessorRegistry?: Map<string, LogitProcessor>;
   private logitProcessor?: LogitProcessor;
   private pipeline?: LLMChatPipeline;
@@ -238,7 +242,7 @@ export class MLCEngine implements MLCEngineInterface {
     let deviceLostInReload = false;
     gpuDetectOutput.device.lost.then((info: any) => {
       if (this.deviceLostIsError) {
-        console.error(
+        log.error(
           `Device was lost during reload. This can happen due to insufficient memory or other GPU constraints. Detailed error: ${info}. Please try to reload WebLLM with a less resource-intensive model.`,
         );
         this.unload();
@@ -291,7 +295,7 @@ export class MLCEngine implements MLCEngineInterface {
     streamInterval = 1,
     genConfig?: GenerationConfig,
   ): Promise<string> {
-    console.log(
+    log.warn(
       "WARNING: `generate()` will soon be deprecated. " +
         "Please use `engine.chat.completions.create()` instead. " +
         "For multi-round chatting, see `examples/multi-round-chat` on how to use " +
@@ -579,7 +583,7 @@ export class MLCEngine implements MLCEngineInterface {
       gpuDetectOutput.device.limits.maxStorageBufferBindingSize;
     const defaultMaxStorageBufferBindingSize = 1 << 30; // 1GB
     if (maxStorageBufferBindingSize < defaultMaxStorageBufferBindingSize) {
-      console.log(
+      log.warn(
         `WARNING: the current maxStorageBufferBindingSize ` +
           `(${computeMB(maxStorageBufferBindingSize)}) ` +
           `may only work for a limited number of models, e.g.: \n` +
@@ -636,6 +640,15 @@ export class MLCEngine implements MLCEngineInterface {
     return this.getPipeline().getMessage();
   }
 
+  /**
+   * Set MLCEngine logging output level
+   *
+   * @param logLevel The new log level
+   */
+  setLogLevel(logLevel: LogLevel) {
+    log.setLevel(logLevel);
+  }
+
   /**
    * Get a new Conversation object based on the chat completion request.
    *
@@ -792,7 +805,7 @@ export class MLCEngine implements MLCEngineInterface {
         this.resetChat();
         this.getPipeline().setConversation(newConv);
       } else {
-        console.log("Multiround chatting, reuse KVCache.");
+        log.info("Multiround chatting, reuse KVCache.");
       }
 
       // 2. Treat the last message as the usual input
diff --git a/src/extension_service_worker.ts b/src/extension_service_worker.ts
index 3e6aae71..1c575253 100644
--- a/src/extension_service_worker.ts
+++ b/src/extension_service_worker.ts
@@ -1,7 +1,8 @@
 import * as tvmjs from "tvmjs";
+import log from "loglevel";
 import { AppConfig, ChatOptions, MLCEngineConfig } from "./config";
 import { ReloadParams, WorkerRequest } from "./message";
-import { MLCEngineInterface } from "./types";
+import { LogLevel, MLCEngineInterface } from "./types";
 import {
   ChatWorker,
   MLCEngineWorkerHandler,
@@ -88,7 +89,7 @@ export class ServiceWorkerMLCEngineHandler extends MLCEngineWorkerHandler {
           areChatOptionsEqual(this.chatOpts, params.chatOpts) &&
           areAppConfigsEqual(this.appConfig, params.appConfig)
         ) {
-          console.log("Already loaded the model. Skip loading");
+          log.info("Already loaded the model. Skip loading");
           const gpuDetectOutput = await tvmjs.detectGPUDevice();
           if (gpuDetectOutput == undefined) {
             throw Error("Cannot find WebGPU in the environment");
@@ -140,6 +141,9 @@ export async function CreateServiceWorkerMLCEngine(
   keepAliveMs = 10000,
 ): Promise<ServiceWorkerMLCEngine> {
   const serviceWorkerMLCEngine = new ServiceWorkerMLCEngine(keepAliveMs);
+  if (engineConfig?.logLevel) {
+    serviceWorkerMLCEngine.setLogLevel(engineConfig.logLevel);
+  }
   serviceWorkerMLCEngine.setInitProgressCallback(
     engineConfig?.initProgressCallback,
   );
diff --git a/src/index.ts b/src/index.ts
index 9f0387bd..0b6672f1 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -14,6 +14,7 @@ export {
   InitProgressReport,
   MLCEngineInterface,
   LogitProcessor,
+  LogLevel,
 } from "./types";
 
 export { MLCEngine, CreateMLCEngine } from "./engine";
diff --git a/src/llm_chat.ts b/src/llm_chat.ts
index 945e1b5b..129d7e92 100644
--- a/src/llm_chat.ts
+++ b/src/llm_chat.ts
@@ -1,6 +1,7 @@
 /* eslint-disable @typescript-eslint/no-non-null-assertion */
 /* eslint-disable no-prototype-builtins */
 import * as tvmjs from "tvmjs";
+import log from "loglevel";
 import { Tokenizer } from "@mlc-ai/web-tokenizers";
 import { ChatConfig, GenerationConfig, Role } from "./config";
 import { getConversation, Conversation } from "./conversation";
@@ -72,9 +73,6 @@ export class LLMChatPipeline {
   private curRoundDecodingTotalTokens = 0;
   private curRoundPrefillTotalTokens = 0;
 
-  // logger
-  private logger = console.log;
-
   // LogitProcessor
   private logitProcessor?: LogitProcessor = undefined;
 
@@ -154,7 +152,7 @@ export class LLMChatPipeline {
 
     // 4. Read in compilation configurations from metadata
     this.prefillChunkSize = metadata.prefill_chunk_size;
-    this.logger("Using prefillChunkSize: ", this.prefillChunkSize);
+    log.info("Using prefillChunkSize: ", this.prefillChunkSize);
     if (this.prefillChunkSize <= 0) {
       throw Error("Prefill chunk size needs to be positive.");
     }
@@ -164,14 +162,14 @@ export class LLMChatPipeline {
       metadata.sliding_window_size != -1
     ) {
       this.slidingWindowSize = metadata.sliding_window_size;
-      this.logger("Using slidingWindowSize: ", this.slidingWindowSize);
+      log.info("Using slidingWindowSize: ", this.slidingWindowSize);
       // Parse attention sink size
       if (
         metadata.hasOwnProperty("attention_sink_size") &&
         metadata.attention_sink_size >= 0
       ) {
         this.attentionSinkSize = metadata.attention_sink_size;
-        this.logger("Using attentionSinkSize: ", this.attentionSinkSize);
+        log.info("Using attentionSinkSize: ", this.attentionSinkSize);
       } else {
         throw Error(
           "Need to specify non-negative attention_sink_size if using sliding window. " +
@@ -184,7 +182,7 @@ export class LLMChatPipeline {
       metadata.context_window_size != -1
     ) {
       this.maxWindowLength = metadata.context_window_size;
-      this.logger("Using maxWindowLength: ", this.maxWindowLength);
+      log.info("Using maxWindowLength: ", this.maxWindowLength);
     } else {
       throw Error(
         "Need to specify either sliding window size or max window size.",
@@ -905,7 +903,7 @@ export class LLMChatPipeline {
     }
 
     // need shift window and re-encode
-    this.logger("need shift window");
+    log.info("need shift window");
     this.filledKVCacheLength = 0;
     this.resetKVCache();
 
@@ -1056,8 +1054,8 @@ export class LLMChatPipeline {
       `decoding-time=${((decodingEnd - decodingStart) / 1000).toFixed(4)} sec`;
 
     // simply log tokens for eyeballing.
-    console.log("Logits:");
-    console.log(logitsOnCPU.toArray());
-    console.log(msg);
+    log.info("Logits:");
+    log.info(logitsOnCPU.toArray());
+    log.info(msg);
   }
 }
diff --git a/src/service_worker.ts b/src/service_worker.ts
index 0cd2707d..eba3e564 100644
--- a/src/service_worker.ts
+++ b/src/service_worker.ts
@@ -1,7 +1,8 @@
 import * as tvmjs from "tvmjs";
+import log from "loglevel";
 import { AppConfig, ChatOptions, MLCEngineConfig } from "./config";
 import { ReloadParams, WorkerRequest, WorkerResponse } from "./message";
-import { MLCEngineInterface, InitProgressReport } from "./types";
+import { MLCEngineInterface, InitProgressReport, LogLevel } from "./types";
 import {
   MLCEngineWorkerHandler,
   WebWorkerMLCEngine,
@@ -90,7 +91,7 @@ export class ServiceWorkerMLCEngineHandler extends MLCEngineWorkerHandler {
     onError?: () => void,
   ): void {
     const msg = event.data as WorkerRequest;
-    console.debug(
+    log.trace(
       `ServiceWorker message: [${msg.kind}] ${JSON.stringify(msg.content)}`,
     );
 
@@ -114,7 +115,7 @@ export class ServiceWorkerMLCEngineHandler extends MLCEngineWorkerHandler {
           areChatOptionsEqual(this.chatOpts, params.chatOpts) &&
           areAppConfigsEqual(this.appConfig, params.appConfig)
         ) {
-          console.log("Already loaded the model. Skip loading");
+          log.info("Already loaded the model. Skip loading");
           const gpuDetectOutput = await tvmjs.detectGPUDevice();
           if (gpuDetectOutput == undefined) {
             throw Error("Cannot find WebGPU in the environment");
@@ -206,6 +207,9 @@ export async function CreateServiceWorkerMLCEngine(
     );
   }
   const serviceWorkerMLCEngine = new ServiceWorkerMLCEngine(serviceWorker);
+  if (engineConfig?.logLevel) {
+    serviceWorkerMLCEngine.setLogLevel(engineConfig.logLevel);
+  }
   serviceWorkerMLCEngine.setInitProgressCallback(
     engineConfig?.initProgressCallback,
   );
@@ -234,7 +238,7 @@ export class ServiceWorkerMLCEngine extends WebWorkerMLCEngine {
       "message",
       (event: MessageEvent) => {
         const msg = event.data;
-        console.debug(
+        log.trace(
           `MLC client message: [${msg.kind}] ${JSON.stringify(msg.content)}`,
         );
         try {
@@ -246,7 +250,7 @@ export class ServiceWorkerMLCEngine extends WebWorkerMLCEngine {
         } catch (err: any) {
           // This is expected to throw if user has multiple windows open
           if (!err.message.startsWith("return from a unknown uuid")) {
-            console.error("CreateWebServiceWorkerMLCEngine.onmessage", err);
+            log.error("CreateWebServiceWorkerMLCEngine.onmessage", err);
           }
         }
       },
@@ -255,7 +259,7 @@ export class ServiceWorkerMLCEngine extends WebWorkerMLCEngine {
     setInterval(() => {
       this.worker.postMessage({ kind: "keepAlive", uuid: crypto.randomUUID() });
       this.missedHeatbeat += 1;
-      console.debug("missedHeatbeat", this.missedHeatbeat);
+      log.trace("missedHeatbeat", this.missedHeatbeat);
     }, keepAliveMs);
   }
 
diff --git a/src/types.ts b/src/types.ts
index e2dfcec1..910c08e7 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -194,4 +194,21 @@ export interface MLCEngineInterface {
     inputIds: Array<number>,
     isPrefill: boolean,
   ): Promise<number>;
+
+  /**
+   * Set MLCEngine logging output level
+   *
+   * @param logLevel The new log level
+   */
+  setLogLevel(logLevel: LogLevel): void;
 }
+
+export const LOG_LEVELS = {
+  TRACE: 0,
+  DEBUG: 1,
+  INFO: 2,
+  WARN: 3,
+  ERROR: 4,
+  SILENT: 5,
+};
+export type LogLevel = keyof typeof LOG_LEVELS;
diff --git a/src/web_worker.ts b/src/web_worker.ts
index cc1c95b7..2ff79f95 100644
--- a/src/web_worker.ts
+++ b/src/web_worker.ts
@@ -9,6 +9,7 @@ import {
   GenerateProgressCallback,
   InitProgressCallback,
   InitProgressReport,
+  LogLevel,
 } from "./types";
 import {
   ChatCompletionRequest,
@@ -31,6 +32,7 @@ import {
   WorkerResponse,
   WorkerRequest,
 } from "./message";
+import log from "loglevel";
 
 export interface PostMessageHandler {
   postMessage: (message: any) => void;
@@ -624,4 +626,8 @@ export class WebWorkerMLCEngine implements MLCEngineInterface {
       }
     }
   }
+
+  setLogLevel(logLevel: LogLevel) {
+    log.setLevel(logLevel);
+  }
 }
diff --git a/utils/vram_requirements/src/vram_requirements.ts b/utils/vram_requirements/src/vram_requirements.ts
index 2e5c2a8c..14988fcc 100644
--- a/utils/vram_requirements/src/vram_requirements.ts
+++ b/utils/vram_requirements/src/vram_requirements.ts
@@ -1,6 +1,7 @@
 import ModelRecord from "@mlc-ai/web-llm";
-import appConfig from "./app-config";  // Modify this to inspect vram requirement for models of choice
+import appConfig from "./app-config"; // Modify this to inspect vram requirement for models of choice
 import * as tvmjs from "tvmjs";
+import log from "loglevel";
 
 function setLabel(id: string, text: string) {
   const label = document.getElementById(id);
@@ -14,16 +15,16 @@ interface AppConfig {
   model_list: Array<ModelRecord>;
 }
 
-let dtypeBytesMap = new Map<string, number>([
+const dtypeBytesMap = new Map<string, number>([
   ["uint32", 4],
   ["uint16", 2],
   ["float32", 4],
-  ["float16", 4]
+  ["float16", 4],
 ]);
 
 async function main() {
-  let config: AppConfig = appConfig;
-  let report: string = "";
+  const config: AppConfig = appConfig;
+  let report = "";
   for (let i = 0; i < config.model_list.length; ++i) {
     // 1. Read each model record
     const modelRecord: ModelRecord = config.model_list[i];
@@ -36,7 +37,7 @@ async function main() {
     const tvm = await tvmjs.instantiate(
       new Uint8Array(wasmSource),
       tvmjs.createPolyfillWASI(),
-      console.log
+      log.info,
     );
     const gpuDetectOutput = await tvmjs.detectGPUDevice();
     if (gpuDetectOutput == undefined) {
@@ -45,14 +46,17 @@ async function main() {
     tvm.initWebGPU(gpuDetectOutput.device);
     tvm.beginScope();
     const vm = tvm.detachFromCurrentScope(
-      tvm.createVirtualMachine(tvm.webgpu())
+      tvm.createVirtualMachine(tvm.webgpu()),
     );
     // 4. Get metadata from the vm
     let fgetMetadata: any;
     try {
       fgetMetadata = vm.getFunction("_metadata");
     } catch (err) {
-      console.error("The wasm needs to have function `_metadata` to inspect vram requirement.", err);
+      log.error(
+        "The wasm needs to have function `_metadata` to inspect vram requirement.",
+        err,
+      );
     }
     const ret_value = fgetMetadata();
     const metadataStr = tvm.detachFromCurrentScope(ret_value).toString();
@@ -65,33 +69,38 @@ async function main() {
         // Possible to have shape -1 signifying a dynamic shape -- we disregard them
         const dtypeBytes = dtypeBytesMap.get(param.dtype);
         if (dtypeBytes === undefined) {
-          throw Error("Cannot find size of " + param.dtype + ", add it to `dtypeBytesMap`.")
+          throw Error(
+            "Cannot find size of " +
+              param.dtype +
+              ", add it to `dtypeBytesMap`.",
+          );
         }
         const numParams = param.shape.reduce((a: number, b: number) => a * b);
         paramBytes += numParams * dtypeBytes;
       } else {
-        console.log(`${model_id}'s ${param.name} has dynamic shape; excluded from vRAM calculation.`)
+        log.info(
+          `${model_id}'s ${param.name} has dynamic shape; excluded from vRAM calculation.`,
+        );
       }
     });
     // 5.2. Get maximum bytes needed for temporary buffer across all functions
-    let maxTempFuncBytes: number = 0;
+    let maxTempFuncBytes = 0;
     Object.entries(metadata.memory_usage).forEach(([funcName, funcBytes]) => {
       if (typeof funcBytes !== "number") {
-        throw Error("`memory_usage` expects entry `funcName: funcBytes`.")
+        throw Error("`memory_usage` expects entry `funcName: funcBytes`.");
       }
       maxTempFuncBytes = Math.max(maxTempFuncBytes, funcBytes);
-    })
+    });
     // 5.3. Get kv cache bytes
     const kv_cache_bytes: number = metadata.kv_cache_bytes;
     // 5.4. Get total vRAM needed
     const totalBytes = paramBytes + maxTempFuncBytes + kv_cache_bytes;
     // 6. Report vRAM Requirement
-    report += (
+    report +=
       `totalBytes: ${(totalBytes / 1024 / 1024).toFixed(2)} MB\n` +
       `paramBytes: ${(paramBytes / 1024 / 1024).toFixed(2)} MB\n` +
       `maxTempFuncBytes: ${(maxTempFuncBytes / 1024 / 1024).toFixed(2)} MB\n` +
-      `kv_cache_bytes: ${(kv_cache_bytes / 1024 / 1024).toFixed(2)} MB\n\n`
-    );
+      `kv_cache_bytes: ${(kv_cache_bytes / 1024 / 1024).toFixed(2)} MB\n\n`;
     // 7. Dispose everything
     tvm.endScope();
     vm.dispose();