Skip to content

Commit

Permalink
[Models] Add Phi3-mini, StableLM 1.6B, Qwen 1.8B, update MLC runtime (#…
Browse files Browse the repository at this point in the history
…433)

This PR updates models to v0.2.39 compiled with
mlc-ai/binary-mlc-llm-libs#123

The main change is the new MLC-LLM runtime, which supports grammar (i.e.
json mode) for Llama3.
- Hence we now read in field `tokenizer_info` (or deprecated
`token_table_postproc_method`) from `mlc-chat-config.json` when post
processing token table for Grammar
  - If neither is available, we use the default `byte_fallback`

New prebuilt models introduced:
- Phi3-mini-4k
- Hermes-2-Pro-Llama-3-8B
- Qwen1.5-1.8B
- StableLM-2-zephyr_1.6B

Updates on examples:
- json-mode and json-schema now use Llama3 to demonstrate
- Function calling inside json-schema now uses `Hermes-2-Pro-Llama-3-8B`
instead of `Hermes-2-Pro-Mistral`
  • Loading branch information
CharlieFRuan committed May 29, 2024
1 parent 44edca1 commit 3731fe6
Show file tree
Hide file tree
Showing 7 changed files with 327 additions and 190 deletions.
55 changes: 29 additions & 26 deletions examples/json-mode/src/json_mode.ts
Original file line number Diff line number Diff line change
@@ -1,37 +1,40 @@
import * as webllm from "@mlc-ai/web-llm";

function setLabel(id: string, text: string) {
const label = document.getElementById(id);
if (label == null) {
throw Error("Cannot find label " + id);
}
label.innerText = text;
const label = document.getElementById(id);
if (label == null) {
throw Error("Cannot find label " + id);
}
label.innerText = text;
}

async function main() {
const initProgressCallback = (report: webllm.InitProgressReport) => {
setLabel("init-label", report.text);
};
const selectedModel = "Llama-2-7b-chat-hf-q4f32_1";
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
selectedModel,
{ initProgressCallback: initProgressCallback }
);
const initProgressCallback = (report: webllm.InitProgressReport) => {
setLabel("init-label", report.text);
};
const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
selectedModel,
{ initProgressCallback: initProgressCallback },
);

const request: webllm.ChatCompletionRequest = {
stream: false, // works with streaming, logprobs, top_logprobs as well
messages: [
{ "role": "user", "content": "Write a short JSON file introducing yourself." }
],
n: 2,
max_gen_len: 128,
response_format: { type: "json_object" } as webllm.ResponseFormat
};
const request: webllm.ChatCompletionRequest = {
stream: false, // works with streaming, logprobs, top_logprobs as well
messages: [
{
role: "user",
content: "Write a short JSON file introducing yourself.",
},
],
n: 2,
max_gen_len: 128,
response_format: { type: "json_object" } as webllm.ResponseFormat,
};

const reply0 = await engine.chatCompletion(request);
console.log(reply0);
console.log("First reply's last choice:\n" + await engine.getMessage());
console.log(await engine.runtimeStatsText());
const reply0 = await engine.chatCompletion(request);
console.log(reply0);
console.log("First reply's last choice:\n" + (await engine.getMessage()));
console.log(await engine.runtimeStatsText());
}

main();
18 changes: 9 additions & 9 deletions examples/json-schema/src/json_schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ async function simpleStructuredTextExample() {
setLabel("init-label", report.text);
};
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
"Llama-2-7b-chat-hf-q4f16_1",
{ initProgressCallback: initProgressCallback }
"Llama-3-8B-Instruct-q4f16_1",
{ initProgressCallback: initProgressCallback },
);

const request: webllm.ChatCompletionRequest = {
Expand Down Expand Up @@ -105,8 +105,8 @@ async function harryPotterExample() {
};

const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
"Llama-2-7b-chat-hf-q4f16_1",
{ initProgressCallback: initProgressCallback }
"Llama-3-8B-Instruct-q4f16_1",
{ initProgressCallback: initProgressCallback },
);

const request: webllm.ChatCompletionRequest = {
Expand Down Expand Up @@ -138,7 +138,7 @@ async function functionCallingExample() {
Type.Object({
arguments: Type.Any(),
name: Type.String(),
})
}),
),
});
type T = Static<typeof T>;
Expand Down Expand Up @@ -170,12 +170,12 @@ async function functionCallingExample() {
setLabel("init-label", report.text);
};

const selectedModel = "Hermes-2-Pro-Mistral-7B-q4f16_1";
const selectedModel = "Hermes-2-Pro-Llama-3-8B-q4f16_1";
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
selectedModel,
{
initProgressCallback: initProgressCallback,
}
},
);

const request: webllm.ChatCompletionRequest = {
Expand All @@ -184,12 +184,12 @@ async function functionCallingExample() {
{
role: "system",
content: `You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: <tools> ${JSON.stringify(
tools
tools,
)} </tools>. Do not stop calling functions until the task has been accomplished or you've reached max iteration of 10.
Calling multiple functions at once can overload the system and increase cost so call one function at a time please.
If you plan to continue with analysis, always call another function.
Return a valid json object (using double quotes) in the following schema: ${JSON.stringify(
schema
schema,
)}.`,
},
{
Expand Down
Loading

0 comments on commit 3731fe6

Please sign in to comment.