-
Notifications
You must be signed in to change notification settings - Fork 797
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Models] Add Phi3-mini, StableLM 1.6B, Qwen 1.8B, update MLC runtime (#…
…433) This PR updates models to v0.2.39 compiled with mlc-ai/binary-mlc-llm-libs#123 The main change is the new MLC-LLM runtime, which supports grammar (i.e. json mode) for Llama3. - Hence we now read in field `tokenizer_info` (or deprecated `token_table_postproc_method`) from `mlc-chat-config.json` when post processing token table for Grammar - If neither is available, we use the default `byte_fallback` New prebuilt models introduced: - Phi3-mini-4k - Hermes-2-Pro-Llama-3-8B - Qwen1.5-1.8B - StableLM-2-zephyr_1.6B Updates on examples: - json-mode and json-schema now use Llama3 to demonstrate - Function calling inside json-schema now uses `Hermes-2-Pro-Llama-3-8B` instead of `Hermes-2-Pro-Mistral`
- Loading branch information
1 parent
44edca1
commit 3731fe6
Showing
7 changed files
with
327 additions
and
190 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,37 +1,40 @@ | ||
import * as webllm from "@mlc-ai/web-llm"; | ||
|
||
function setLabel(id: string, text: string) { | ||
const label = document.getElementById(id); | ||
if (label == null) { | ||
throw Error("Cannot find label " + id); | ||
} | ||
label.innerText = text; | ||
const label = document.getElementById(id); | ||
if (label == null) { | ||
throw Error("Cannot find label " + id); | ||
} | ||
label.innerText = text; | ||
} | ||
|
||
async function main() { | ||
const initProgressCallback = (report: webllm.InitProgressReport) => { | ||
setLabel("init-label", report.text); | ||
}; | ||
const selectedModel = "Llama-2-7b-chat-hf-q4f32_1"; | ||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( | ||
selectedModel, | ||
{ initProgressCallback: initProgressCallback } | ||
); | ||
const initProgressCallback = (report: webllm.InitProgressReport) => { | ||
setLabel("init-label", report.text); | ||
}; | ||
const selectedModel = "Llama-3-8B-Instruct-q4f32_1"; | ||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( | ||
selectedModel, | ||
{ initProgressCallback: initProgressCallback }, | ||
); | ||
|
||
const request: webllm.ChatCompletionRequest = { | ||
stream: false, // works with streaming, logprobs, top_logprobs as well | ||
messages: [ | ||
{ "role": "user", "content": "Write a short JSON file introducing yourself." } | ||
], | ||
n: 2, | ||
max_gen_len: 128, | ||
response_format: { type: "json_object" } as webllm.ResponseFormat | ||
}; | ||
const request: webllm.ChatCompletionRequest = { | ||
stream: false, // works with streaming, logprobs, top_logprobs as well | ||
messages: [ | ||
{ | ||
role: "user", | ||
content: "Write a short JSON file introducing yourself.", | ||
}, | ||
], | ||
n: 2, | ||
max_gen_len: 128, | ||
response_format: { type: "json_object" } as webllm.ResponseFormat, | ||
}; | ||
|
||
const reply0 = await engine.chatCompletion(request); | ||
console.log(reply0); | ||
console.log("First reply's last choice:\n" + await engine.getMessage()); | ||
console.log(await engine.runtimeStatsText()); | ||
const reply0 = await engine.chatCompletion(request); | ||
console.log(reply0); | ||
console.log("First reply's last choice:\n" + (await engine.getMessage())); | ||
console.log(await engine.runtimeStatsText()); | ||
} | ||
|
||
main(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.