Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Log] Set log level using 'loglevel' package #427

Merged
merged 2 commits into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,8 @@
"tslib": "^2.3.1",
"tvmjs": "file:./tvm_home/web",
"typescript": "^4.9.5"
},
"dependencies": {
"loglevel": "^1.9.1"
}
}
13 changes: 7 additions & 6 deletions src/config.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/* eslint-disable @typescript-eslint/no-non-null-assertion */

import log from "loglevel";
import { ResponseFormat } from "./openai_api_protocols";
import { LogitProcessor, InitProgressCallback } from "./types";
import { LogitProcessor, InitProgressCallback, LogLevel } from "./types";

/**
* Conversation template config
Expand All @@ -26,6 +26,8 @@ export enum Role {
assistant = "assistant",
}

export const DefaultLogLevel: LogLevel = "WARN";

/**
* Place holders that can be used in role templates.
* For example, a role template of
Expand Down Expand Up @@ -91,6 +93,7 @@ export interface MLCEngineConfig {
appConfig?: AppConfig;
initProgressCallback?: InitProgressCallback;
logitProcessorRegistry?: Map<string, LogitProcessor>;
logLevel: LogLevel;
}

/**
Expand Down Expand Up @@ -167,16 +170,14 @@ export function postInitAndCheckGenerationConfigValues(
!_hasValue(config.presence_penalty)
) {
config.presence_penalty = 0.0;
console.log(
"Only frequency_penalty is set; we default presence_penaty to 0.",
);
log.warn("Only frequency_penalty is set; we default presence_penaty to 0.");
}
if (
_hasValue(config.presence_penalty) &&
!_hasValue(config.frequency_penalty)
) {
config.frequency_penalty = 0.0;
console.log(
log.warn(
"Only presence_penalty is set; we default frequency_penalty to 0.",
);
}
Expand Down
23 changes: 18 additions & 5 deletions src/engine.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import * as tvmjs from "tvmjs";
import log from "loglevel";
import { Tokenizer } from "@mlc-ai/web-tokenizers";
import * as API from "./openai_api_protocols/apis";
import {
Expand All @@ -10,6 +11,7 @@ import {
postInitAndCheckGenerationConfigValues,
Role,
MLCEngineConfig,
DefaultLogLevel,
} from "./config";
import { LLMChatPipeline } from "./llm_chat";
import {
Expand All @@ -30,6 +32,7 @@ import {
MLCEngineInterface,
GenerateProgressCallback,
LogitProcessor,
LogLevel,
} from "./types";
import {
Conversation,
Expand Down Expand Up @@ -61,6 +64,7 @@ export async function CreateMLCEngine(
engineConfig?: MLCEngineConfig,
): Promise<MLCEngine> {
const engine = new MLCEngine();
engine.setLogLevel(engineConfig?.logLevel || DefaultLogLevel);
engine.setInitProgressCallback(engineConfig?.initProgressCallback);
engine.setLogitProcessorRegistry(engineConfig?.logitProcessorRegistry);
await engine.reload(modelId, engineConfig?.chatOpts, engineConfig?.appConfig);
Expand All @@ -76,7 +80,7 @@ export class MLCEngine implements MLCEngineInterface {
public chat: API.Chat;

private currentModelId?: string = undefined; // Model current loaded, undefined if nothing is loaded
private logger: (msg: string) => void = console.log;
private logger: (msg: string) => void = log.info;
private logitProcessorRegistry?: Map<string, LogitProcessor>;
private logitProcessor?: LogitProcessor;
private pipeline?: LLMChatPipeline;
Expand Down Expand Up @@ -238,7 +242,7 @@ export class MLCEngine implements MLCEngineInterface {
let deviceLostInReload = false;
gpuDetectOutput.device.lost.then((info: any) => {
if (this.deviceLostIsError) {
console.error(
log.error(
`Device was lost during reload. This can happen due to insufficient memory or other GPU constraints. Detailed error: ${info}. Please try to reload WebLLM with a less resource-intensive model.`,
);
this.unload();
Expand Down Expand Up @@ -291,7 +295,7 @@ export class MLCEngine implements MLCEngineInterface {
streamInterval = 1,
genConfig?: GenerationConfig,
): Promise<string> {
console.log(
log.warn(
"WARNING: `generate()` will soon be deprecated. " +
"Please use `engine.chat.completions.create()` instead. " +
"For multi-round chatting, see `examples/multi-round-chat` on how to use " +
Expand Down Expand Up @@ -579,7 +583,7 @@ export class MLCEngine implements MLCEngineInterface {
gpuDetectOutput.device.limits.maxStorageBufferBindingSize;
const defaultMaxStorageBufferBindingSize = 1 << 30; // 1GB
if (maxStorageBufferBindingSize < defaultMaxStorageBufferBindingSize) {
console.log(
log.warn(
`WARNING: the current maxStorageBufferBindingSize ` +
`(${computeMB(maxStorageBufferBindingSize)}) ` +
`may only work for a limited number of models, e.g.: \n` +
Expand Down Expand Up @@ -636,6 +640,15 @@ export class MLCEngine implements MLCEngineInterface {
return this.getPipeline().getMessage();
}

/**
* Set MLCEngine logging output level
*
* @param logLevel The new log level
*/
setLogLevel(logLevel: LogLevel) {
log.setLevel(logLevel);
}

/**
* Get a new Conversation object based on the chat completion request.
*
Expand Down Expand Up @@ -792,7 +805,7 @@ export class MLCEngine implements MLCEngineInterface {
this.resetChat();
this.getPipeline().setConversation(newConv);
} else {
console.log("Multiround chatting, reuse KVCache.");
log.info("Multiround chatting, reuse KVCache.");
}

// 2. Treat the last message as the usual input
Expand Down
8 changes: 6 additions & 2 deletions src/extension_service_worker.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import * as tvmjs from "tvmjs";
import log from "loglevel";
import { AppConfig, ChatOptions, MLCEngineConfig } from "./config";
import { ReloadParams, WorkerRequest } from "./message";
import { MLCEngineInterface } from "./types";
import { LogLevel, MLCEngineInterface } from "./types";

Check warning on line 5 in src/extension_service_worker.ts

View workflow job for this annotation

GitHub Actions / lint

'LogLevel' is defined but never used
import {
ChatWorker,
MLCEngineWorkerHandler,
Expand Down Expand Up @@ -88,7 +89,7 @@
areChatOptionsEqual(this.chatOpts, params.chatOpts) &&
areAppConfigsEqual(this.appConfig, params.appConfig)
) {
console.log("Already loaded the model. Skip loading");
log.info("Already loaded the model. Skip loading");
const gpuDetectOutput = await tvmjs.detectGPUDevice();
if (gpuDetectOutput == undefined) {
throw Error("Cannot find WebGPU in the environment");
Expand Down Expand Up @@ -140,6 +141,9 @@
keepAliveMs = 10000,
): Promise<ServiceWorkerMLCEngine> {
const serviceWorkerMLCEngine = new ServiceWorkerMLCEngine(keepAliveMs);
if (engineConfig?.logLevel) {
serviceWorkerMLCEngine.setLogLevel(engineConfig.logLevel);
}
serviceWorkerMLCEngine.setInitProgressCallback(
engineConfig?.initProgressCallback,
);
Expand Down
1 change: 1 addition & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export {
InitProgressReport,
MLCEngineInterface,
LogitProcessor,
LogLevel,
} from "./types";

export { MLCEngine, CreateMLCEngine } from "./engine";
Expand Down
20 changes: 9 additions & 11 deletions src/llm_chat.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/* eslint-disable @typescript-eslint/no-non-null-assertion */
/* eslint-disable no-prototype-builtins */
import * as tvmjs from "tvmjs";
import log from "loglevel";
import { Tokenizer } from "@mlc-ai/web-tokenizers";
import { ChatConfig, GenerationConfig, Role } from "./config";
import { getConversation, Conversation } from "./conversation";
Expand Down Expand Up @@ -72,9 +73,6 @@ export class LLMChatPipeline {
private curRoundDecodingTotalTokens = 0;
private curRoundPrefillTotalTokens = 0;

// logger
private logger = console.log;

// LogitProcessor
private logitProcessor?: LogitProcessor = undefined;

Expand Down Expand Up @@ -154,7 +152,7 @@ export class LLMChatPipeline {

// 4. Read in compilation configurations from metadata
this.prefillChunkSize = metadata.prefill_chunk_size;
this.logger("Using prefillChunkSize: ", this.prefillChunkSize);
log.info("Using prefillChunkSize: ", this.prefillChunkSize);
if (this.prefillChunkSize <= 0) {
throw Error("Prefill chunk size needs to be positive.");
}
Expand All @@ -164,14 +162,14 @@ export class LLMChatPipeline {
metadata.sliding_window_size != -1
) {
this.slidingWindowSize = metadata.sliding_window_size;
this.logger("Using slidingWindowSize: ", this.slidingWindowSize);
log.info("Using slidingWindowSize: ", this.slidingWindowSize);
// Parse attention sink size
if (
metadata.hasOwnProperty("attention_sink_size") &&
metadata.attention_sink_size >= 0
) {
this.attentionSinkSize = metadata.attention_sink_size;
this.logger("Using attentionSinkSize: ", this.attentionSinkSize);
log.info("Using attentionSinkSize: ", this.attentionSinkSize);
} else {
throw Error(
"Need to specify non-negative attention_sink_size if using sliding window. " +
Expand All @@ -184,7 +182,7 @@ export class LLMChatPipeline {
metadata.context_window_size != -1
) {
this.maxWindowLength = metadata.context_window_size;
this.logger("Using maxWindowLength: ", this.maxWindowLength);
log.info("Using maxWindowLength: ", this.maxWindowLength);
} else {
throw Error(
"Need to specify either sliding window size or max window size.",
Expand Down Expand Up @@ -905,7 +903,7 @@ export class LLMChatPipeline {
}

// need shift window and re-encode
this.logger("need shift window");
log.info("need shift window");
this.filledKVCacheLength = 0;
this.resetKVCache();

Expand Down Expand Up @@ -1056,8 +1054,8 @@ export class LLMChatPipeline {
`decoding-time=${((decodingEnd - decodingStart) / 1000).toFixed(4)} sec`;

// simply log tokens for eyeballing.
console.log("Logits:");
console.log(logitsOnCPU.toArray());
console.log(msg);
log.info("Logits:");
log.info(logitsOnCPU.toArray());
log.info(msg);
}
}
16 changes: 10 additions & 6 deletions src/service_worker.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import * as tvmjs from "tvmjs";
import log from "loglevel";
import { AppConfig, ChatOptions, MLCEngineConfig } from "./config";
import { ReloadParams, WorkerRequest, WorkerResponse } from "./message";
import { MLCEngineInterface, InitProgressReport } from "./types";
import { MLCEngineInterface, InitProgressReport, LogLevel } from "./types";

Check warning on line 5 in src/service_worker.ts

View workflow job for this annotation

GitHub Actions / lint

'LogLevel' is defined but never used
import {
MLCEngineWorkerHandler,
WebWorkerMLCEngine,
Expand Down Expand Up @@ -90,7 +91,7 @@
onError?: () => void,
): void {
const msg = event.data as WorkerRequest;
console.debug(
log.trace(
`ServiceWorker message: [${msg.kind}] ${JSON.stringify(msg.content)}`,
);

Expand All @@ -114,7 +115,7 @@
areChatOptionsEqual(this.chatOpts, params.chatOpts) &&
areAppConfigsEqual(this.appConfig, params.appConfig)
) {
console.log("Already loaded the model. Skip loading");
log.info("Already loaded the model. Skip loading");
const gpuDetectOutput = await tvmjs.detectGPUDevice();
if (gpuDetectOutput == undefined) {
throw Error("Cannot find WebGPU in the environment");
Expand Down Expand Up @@ -206,6 +207,9 @@
);
}
const serviceWorkerMLCEngine = new ServiceWorkerMLCEngine(serviceWorker);
if (engineConfig?.logLevel) {
serviceWorkerMLCEngine.setLogLevel(engineConfig.logLevel);
}
serviceWorkerMLCEngine.setInitProgressCallback(
engineConfig?.initProgressCallback,
);
Expand Down Expand Up @@ -234,7 +238,7 @@
"message",
(event: MessageEvent) => {
const msg = event.data;
console.debug(
log.trace(
`MLC client message: [${msg.kind}] ${JSON.stringify(msg.content)}`,
);
try {
Expand All @@ -246,7 +250,7 @@
} catch (err: any) {
// This is expected to throw if user has multiple windows open
if (!err.message.startsWith("return from a unknown uuid")) {
console.error("CreateWebServiceWorkerMLCEngine.onmessage", err);
log.error("CreateWebServiceWorkerMLCEngine.onmessage", err);
}
}
},
Expand All @@ -255,7 +259,7 @@
setInterval(() => {
this.worker.postMessage({ kind: "keepAlive", uuid: crypto.randomUUID() });
this.missedHeatbeat += 1;
console.debug("missedHeatbeat", this.missedHeatbeat);
log.trace("missedHeatbeat", this.missedHeatbeat);
}, keepAliveMs);
}

Expand Down
17 changes: 17 additions & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -194,4 +194,21 @@ export interface MLCEngineInterface {
inputIds: Array<number>,
isPrefill: boolean,
): Promise<number>;

/**
* Set MLCEngine logging output level
*
* @param logLevel The new log level
*/
setLogLevel(logLevel: LogLevel): void;
}

export const LOG_LEVELS = {
TRACE: 0,
DEBUG: 1,
INFO: 2,
WARN: 3,
ERROR: 4,
SILENT: 5,
};
export type LogLevel = keyof typeof LOG_LEVELS;
6 changes: 6 additions & 0 deletions src/web_worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {
GenerateProgressCallback,
InitProgressCallback,
InitProgressReport,
LogLevel,
} from "./types";
import {
ChatCompletionRequest,
Expand All @@ -31,6 +32,7 @@ import {
WorkerResponse,
WorkerRequest,
} from "./message";
import log from "loglevel";

export interface PostMessageHandler {
postMessage: (message: any) => void;
Expand Down Expand Up @@ -624,4 +626,8 @@ export class WebWorkerMLCEngine implements MLCEngineInterface {
}
}
}

setLogLevel(logLevel: LogLevel) {
log.setLevel(logLevel);
}
}
Loading
Loading