mlc-ai · Neet-Nestor · May 27, 2024 · May 26, 2024 · May 27, 2024
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -51,5 +51,8 @@
     "tslib": "^2.3.1",
     "tvmjs": "file:./tvm_home/web",
     "typescript": "^4.9.5"
+  },
+  "dependencies": {
+    "loglevel": "^1.9.1"
   }
 }
diff --git a/src/config.ts b/src/config.ts
@@ -1,7 +1,7 @@
 /* eslint-disable @typescript-eslint/no-non-null-assertion */
-
+import log from "loglevel";
 import { ResponseFormat } from "./openai_api_protocols";
-import { LogitProcessor, InitProgressCallback } from "./types";
+import { LogitProcessor, InitProgressCallback, LogLevel } from "./types";
 
 /**
  * Conversation template config
@@ -26,6 +26,8 @@ export enum Role {
   assistant = "assistant",
 }
 
+export const DefaultLogLevel: LogLevel = "WARN";
+
 /**
  * Place holders that can be used in role templates.
  * For example, a role template of
@@ -91,6 +93,7 @@ export interface MLCEngineConfig {
   appConfig?: AppConfig;
   initProgressCallback?: InitProgressCallback;
   logitProcessorRegistry?: Map<string, LogitProcessor>;
+  logLevel: LogLevel;
 }
 
 /**
@@ -167,16 +170,14 @@ export function postInitAndCheckGenerationConfigValues(
     !_hasValue(config.presence_penalty)
   ) {
     config.presence_penalty = 0.0;
-    console.log(
-      "Only frequency_penalty is set; we default presence_penaty to 0.",
-    );
+    log.warn("Only frequency_penalty is set; we default presence_penaty to 0.");
   }
   if (
     _hasValue(config.presence_penalty) &&
     !_hasValue(config.frequency_penalty)
   ) {
     config.frequency_penalty = 0.0;
-    console.log(
+    log.warn(
       "Only presence_penalty is set; we default frequency_penalty to 0.",
     );
   }

diff --git a/src/engine.ts b/src/engine.ts
@@ -1,4 +1,5 @@
 import * as tvmjs from "tvmjs";
+import log from "loglevel";
 import { Tokenizer } from "@mlc-ai/web-tokenizers";
 import * as API from "./openai_api_protocols/apis";
 import {
@@ -10,6 +11,7 @@ import {
   postInitAndCheckGenerationConfigValues,
   Role,
   MLCEngineConfig,
+  DefaultLogLevel,
 } from "./config";
 import { LLMChatPipeline } from "./llm_chat";
 import {
@@ -30,6 +32,7 @@ import {
   MLCEngineInterface,
   GenerateProgressCallback,
   LogitProcessor,
+  LogLevel,
 } from "./types";
 import {
   Conversation,
@@ -61,6 +64,7 @@ export async function CreateMLCEngine(
   engineConfig?: MLCEngineConfig,
 ): Promise<MLCEngine> {
   const engine = new MLCEngine();
+  engine.setLogLevel(engineConfig?.logLevel || DefaultLogLevel);
   engine.setInitProgressCallback(engineConfig?.initProgressCallback);
   engine.setLogitProcessorRegistry(engineConfig?.logitProcessorRegistry);
   await engine.reload(modelId, engineConfig?.chatOpts, engineConfig?.appConfig);
@@ -76,7 +80,7 @@ export class MLCEngine implements MLCEngineInterface {
   public chat: API.Chat;
 
   private currentModelId?: string = undefined; // Model current loaded, undefined if nothing is loaded
-  private logger: (msg: string) => void = console.log;
+  private logger: (msg: string) => void = log.info;
   private logitProcessorRegistry?: Map<string, LogitProcessor>;
   private logitProcessor?: LogitProcessor;
   private pipeline?: LLMChatPipeline;
@@ -238,7 +242,7 @@ export class MLCEngine implements MLCEngineInterface {
     let deviceLostInReload = false;
     gpuDetectOutput.device.lost.then((info: any) => {
       if (this.deviceLostIsError) {
-        console.error(
+        log.error(
           `Device was lost during reload. This can happen due to insufficient memory or other GPU constraints. Detailed error: ${info}. Please try to reload WebLLM with a less resource-intensive model.`,
         );
         this.unload();
@@ -291,7 +295,7 @@ export class MLCEngine implements MLCEngineInterface {
     streamInterval = 1,
     genConfig?: GenerationConfig,
   ): Promise<string> {
-    console.log(
+    log.warn(
       "WARNING: `generate()` will soon be deprecated. " +
         "Please use `engine.chat.completions.create()` instead. " +
         "For multi-round chatting, see `examples/multi-round-chat` on how to use " +
@@ -579,7 +583,7 @@ export class MLCEngine implements MLCEngineInterface {
       gpuDetectOutput.device.limits.maxStorageBufferBindingSize;
     const defaultMaxStorageBufferBindingSize = 1 << 30; // 1GB
     if (maxStorageBufferBindingSize < defaultMaxStorageBufferBindingSize) {
-      console.log(
+      log.warn(
         `WARNING: the current maxStorageBufferBindingSize ` +
           `(${computeMB(maxStorageBufferBindingSize)}) ` +
           `may only work for a limited number of models, e.g.: \n` +
@@ -636,6 +640,15 @@ export class MLCEngine implements MLCEngineInterface {
     return this.getPipeline().getMessage();
   }
 
+  /**
+   * Set MLCEngine logging output level
+   *
+   * @param logLevel The new log level
+   */
+  setLogLevel(logLevel: LogLevel) {
+    log.setLevel(logLevel);
+  }
+
   /**
    * Get a new Conversation object based on the chat completion request.
    *
@@ -792,7 +805,7 @@ export class MLCEngine implements MLCEngineInterface {
         this.resetChat();
         this.getPipeline().setConversation(newConv);
       } else {
-        console.log("Multiround chatting, reuse KVCache.");
+        log.info("Multiround chatting, reuse KVCache.");
       }
 
       // 2. Treat the last message as the usual input

diff --git a/src/extension_service_worker.ts b/src/extension_service_worker.ts
@@ -1,7 +1,8 @@
 import * as tvmjs from "tvmjs";
+import log from "loglevel";
 import { AppConfig, ChatOptions, MLCEngineConfig } from "./config";
 import { ReloadParams, WorkerRequest } from "./message";
-import { MLCEngineInterface } from "./types";
+import { LogLevel, MLCEngineInterface } from "./types";
 import {
   ChatWorker,
   MLCEngineWorkerHandler,
@@ -88,7 +89,7 @@ export class ServiceWorkerMLCEngineHandler extends MLCEngineWorkerHandler {
           areChatOptionsEqual(this.chatOpts, params.chatOpts) &&
           areAppConfigsEqual(this.appConfig, params.appConfig)
         ) {
-          console.log("Already loaded the model. Skip loading");
+          log.info("Already loaded the model. Skip loading");
           const gpuDetectOutput = await tvmjs.detectGPUDevice();
           if (gpuDetectOutput == undefined) {
             throw Error("Cannot find WebGPU in the environment");
@@ -140,6 +141,9 @@ export async function CreateServiceWorkerMLCEngine(
   keepAliveMs = 10000,
 ): Promise<ServiceWorkerMLCEngine> {
   const serviceWorkerMLCEngine = new ServiceWorkerMLCEngine(keepAliveMs);
+  if (engineConfig?.logLevel) {
+    serviceWorkerMLCEngine.setLogLevel(engineConfig.logLevel);
+  }
   serviceWorkerMLCEngine.setInitProgressCallback(
     engineConfig?.initProgressCallback,
   );

diff --git a/src/index.ts b/src/index.ts
@@ -14,6 +14,7 @@ export {
   InitProgressReport,
   MLCEngineInterface,
   LogitProcessor,
+  LogLevel,
 } from "./types";
 
 export { MLCEngine, CreateMLCEngine } from "./engine";

diff --git a/src/llm_chat.ts b/src/llm_chat.ts
@@ -1,6 +1,7 @@
 /* eslint-disable @typescript-eslint/no-non-null-assertion */
 /* eslint-disable no-prototype-builtins */
 import * as tvmjs from "tvmjs";
+import log from "loglevel";
 import { Tokenizer } from "@mlc-ai/web-tokenizers";
 import { ChatConfig, GenerationConfig, Role } from "./config";
 import { getConversation, Conversation } from "./conversation";
@@ -72,9 +73,6 @@ export class LLMChatPipeline {
   private curRoundDecodingTotalTokens = 0;
   private curRoundPrefillTotalTokens = 0;
 
-  // logger
-  private logger = console.log;
-
   // LogitProcessor
   private logitProcessor?: LogitProcessor = undefined;
 
@@ -154,7 +152,7 @@ export class LLMChatPipeline {
 
     // 4. Read in compilation configurations from metadata
     this.prefillChunkSize = metadata.prefill_chunk_size;
-    this.logger("Using prefillChunkSize: ", this.prefillChunkSize);
+    log.info("Using prefillChunkSize: ", this.prefillChunkSize);
     if (this.prefillChunkSize <= 0) {
       throw Error("Prefill chunk size needs to be positive.");
     }
@@ -164,14 +162,14 @@ export class LLMChatPipeline {
       metadata.sliding_window_size != -1
     ) {
       this.slidingWindowSize = metadata.sliding_window_size;
-      this.logger("Using slidingWindowSize: ", this.slidingWindowSize);
+      log.info("Using slidingWindowSize: ", this.slidingWindowSize);
       // Parse attention sink size
       if (
         metadata.hasOwnProperty("attention_sink_size") &&
         metadata.attention_sink_size >= 0
       ) {
         this.attentionSinkSize = metadata.attention_sink_size;
-        this.logger("Using attentionSinkSize: ", this.attentionSinkSize);
+        log.info("Using attentionSinkSize: ", this.attentionSinkSize);
       } else {
         throw Error(
           "Need to specify non-negative attention_sink_size if using sliding window. " +
@@ -184,7 +182,7 @@ export class LLMChatPipeline {
       metadata.context_window_size != -1
     ) {
       this.maxWindowLength = metadata.context_window_size;
-      this.logger("Using maxWindowLength: ", this.maxWindowLength);
+      log.info("Using maxWindowLength: ", this.maxWindowLength);
     } else {
       throw Error(
         "Need to specify either sliding window size or max window size.",
@@ -905,7 +903,7 @@ export class LLMChatPipeline {
     }
 
     // need shift window and re-encode
-    this.logger("need shift window");
+    log.info("need shift window");
     this.filledKVCacheLength = 0;
     this.resetKVCache();
 
@@ -1056,8 +1054,8 @@ export class LLMChatPipeline {
       `decoding-time=${((decodingEnd - decodingStart) / 1000).toFixed(4)} sec`;
 
     // simply log tokens for eyeballing.
-    console.log("Logits:");
-    console.log(logitsOnCPU.toArray());
-    console.log(msg);
+    log.info("Logits:");
+    log.info(logitsOnCPU.toArray());
+    log.info(msg);
   }
 }
diff --git a/src/service_worker.ts b/src/service_worker.ts
@@ -1,7 +1,8 @@
 import * as tvmjs from "tvmjs";
+import log from "loglevel";
 import { AppConfig, ChatOptions, MLCEngineConfig } from "./config";
 import { ReloadParams, WorkerRequest, WorkerResponse } from "./message";
-import { MLCEngineInterface, InitProgressReport } from "./types";
+import { MLCEngineInterface, InitProgressReport, LogLevel } from "./types";
 import {
   MLCEngineWorkerHandler,
   WebWorkerMLCEngine,
@@ -90,7 +91,7 @@ export class ServiceWorkerMLCEngineHandler extends MLCEngineWorkerHandler {
     onError?: () => void,
   ): void {
     const msg = event.data as WorkerRequest;
-    console.debug(
+    log.trace(
       `ServiceWorker message: [${msg.kind}] ${JSON.stringify(msg.content)}`,
     );
 
@@ -114,7 +115,7 @@ export class ServiceWorkerMLCEngineHandler extends MLCEngineWorkerHandler {
           areChatOptionsEqual(this.chatOpts, params.chatOpts) &&
           areAppConfigsEqual(this.appConfig, params.appConfig)
         ) {
-          console.log("Already loaded the model. Skip loading");
+          log.info("Already loaded the model. Skip loading");
           const gpuDetectOutput = await tvmjs.detectGPUDevice();
           if (gpuDetectOutput == undefined) {
             throw Error("Cannot find WebGPU in the environment");
@@ -206,6 +207,9 @@ export async function CreateServiceWorkerMLCEngine(
     );
   }
   const serviceWorkerMLCEngine = new ServiceWorkerMLCEngine(serviceWorker);
+  if (engineConfig?.logLevel) {
+    serviceWorkerMLCEngine.setLogLevel(engineConfig.logLevel);
+  }
   serviceWorkerMLCEngine.setInitProgressCallback(
     engineConfig?.initProgressCallback,
   );
@@ -234,7 +238,7 @@ export class ServiceWorkerMLCEngine extends WebWorkerMLCEngine {
       "message",
       (event: MessageEvent) => {
         const msg = event.data;
-        console.debug(
+        log.trace(
           `MLC client message: [${msg.kind}] ${JSON.stringify(msg.content)}`,
         );
         try {
@@ -246,7 +250,7 @@ export class ServiceWorkerMLCEngine extends WebWorkerMLCEngine {
         } catch (err: any) {
           // This is expected to throw if user has multiple windows open
           if (!err.message.startsWith("return from a unknown uuid")) {
-            console.error("CreateWebServiceWorkerMLCEngine.onmessage", err);
+            log.error("CreateWebServiceWorkerMLCEngine.onmessage", err);
           }
         }
       },
@@ -255,7 +259,7 @@ export class ServiceWorkerMLCEngine extends WebWorkerMLCEngine {
     setInterval(() => {
       this.worker.postMessage({ kind: "keepAlive", uuid: crypto.randomUUID() });
       this.missedHeatbeat += 1;
-      console.debug("missedHeatbeat", this.missedHeatbeat);
+      log.trace("missedHeatbeat", this.missedHeatbeat);
     }, keepAliveMs);
   }
 

diff --git a/src/types.ts b/src/types.ts
@@ -194,4 +194,21 @@ export interface MLCEngineInterface {
     inputIds: Array<number>,
     isPrefill: boolean,
   ): Promise<number>;
+
+  /**
+   * Set MLCEngine logging output level
+   *
+   * @param logLevel The new log level
+   */
+  setLogLevel(logLevel: LogLevel): void;
 }
+
+export const LOG_LEVELS = {
+  TRACE: 0,
+  DEBUG: 1,
+  INFO: 2,
+  WARN: 3,
+  ERROR: 4,
+  SILENT: 5,
+};
+export type LogLevel = keyof typeof LOG_LEVELS;
diff --git a/src/web_worker.ts b/src/web_worker.ts
@@ -9,6 +9,7 @@ import {
   GenerateProgressCallback,
   InitProgressCallback,
   InitProgressReport,
+  LogLevel,
 } from "./types";
 import {
   ChatCompletionRequest,
@@ -31,6 +32,7 @@ import {
   WorkerResponse,
   WorkerRequest,
 } from "./message";
+import log from "loglevel";
 
 export interface PostMessageHandler {
   postMessage: (message: any) => void;
@@ -624,4 +626,8 @@ export class WebWorkerMLCEngine implements MLCEngineInterface {
       }
     }
   }
+
+  setLogLevel(logLevel: LogLevel) {
+    log.setLevel(logLevel);
+  }
 }