-
Notifications
You must be signed in to change notification settings - Fork 797
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support image input in chatCompletionRequest
- Enable content being an array, which can have image_url - Introduce ModelType.VLM so that only VLM can handle non-string message content - Thus pass in loadedModelType to postInitCheck, hence add loadedModelIdToModelType in Engine - Change unit tests correspondingly
- Loading branch information
1 parent
429e719
commit fad3df9
Showing
10 changed files
with
432 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# WebLLM Get Started App | ||
|
||
This folder provides a minimum demo to show WebLLM API in a webapp setting. | ||
To try it out, you can do the following steps under this folder | ||
|
||
```bash | ||
npm install | ||
npm start | ||
``` | ||
|
||
Note if you would like to hack WebLLM core package. | ||
You can change web-llm dependencies as `"file:../.."`, and follow the build from source | ||
instruction in the project to build webllm locally. This option is only recommended | ||
if you would like to hack WebLLM core package. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
{ | ||
"name": "get-started", | ||
"version": "0.1.0", | ||
"private": true, | ||
"scripts": { | ||
"start": "parcel src/vision_model.html --port 8888", | ||
"build": "parcel build src/vision_model.html --dist-dir lib" | ||
}, | ||
"devDependencies": { | ||
"buffer": "^5.7.1", | ||
"parcel": "^2.8.3", | ||
"process": "^0.11.10", | ||
"tslib": "^2.3.1", | ||
"typescript": "^4.9.5", | ||
"url": "^0.11.3" | ||
}, | ||
"dependencies": { | ||
"@mlc-ai/web-llm": "file:../.." | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
<!doctype html> | ||
<html> | ||
<script> | ||
webLLMGlobal = {}; | ||
</script> | ||
<body> | ||
<h2>WebLLM Test Page</h2> | ||
Open console to see output | ||
<br /> | ||
<br /> | ||
<label id="init-label"> </label> | ||
|
||
<h3>Prompt</h3> | ||
<label id="prompt-label"> </label> | ||
|
||
<h3>Response</h3> | ||
<label id="generate-label"> </label> | ||
<br /> | ||
<label id="stats-label"> </label> | ||
|
||
<script type="module" src="./vision_model.ts"></script> | ||
</body> | ||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
import * as webllm from "@mlc-ai/web-llm"; | ||
|
||
function setLabel(id: string, text: string) { | ||
const label = document.getElementById(id); | ||
if (label == null) { | ||
throw Error("Cannot find label " + id); | ||
} | ||
label.innerText = text; | ||
} | ||
|
||
async function main() { | ||
const initProgressCallback = (report: webllm.InitProgressReport) => { | ||
setLabel("init-label", report.text); | ||
}; | ||
const selectedModel = "Phi-3.5-vision-instruct-q4f16_1-MLC"; | ||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine( | ||
selectedModel, | ||
{ | ||
initProgressCallback: initProgressCallback, | ||
logLevel: "INFO", // specify the log level | ||
}, | ||
); | ||
|
||
// 1. Single image input (with choices) | ||
const messages: webllm.ChatCompletionMessageParam[] = [ | ||
{ | ||
role: "system", | ||
content: | ||
"You are a helpful and honest assistant that answers question concisely.", | ||
}, | ||
{ | ||
role: "user", | ||
content: [ | ||
{ type: "text", text: "List the items in the image concisely." }, | ||
{ | ||
type: "image_url", | ||
image_url: { | ||
url: "https://www.ilankelman.org/stopsigns/australia.jpg", | ||
}, | ||
}, | ||
], | ||
}, | ||
]; | ||
const request0: webllm.ChatCompletionRequest = { | ||
stream: false, // can be streaming, same behavior | ||
messages: messages, | ||
}; | ||
const reply0 = await engine.chat.completions.create(request0); | ||
const replyMessage0 = await engine.getMessage(); | ||
console.log(reply0); | ||
console.log(replyMessage0); | ||
console.log(reply0.usage); | ||
|
||
// 2. A follow up text-only question | ||
messages.push({ role: "assistant", content: replyMessage0 }); | ||
messages.push({ role: "user", content: "What is special about this image?" }); | ||
const request1: webllm.ChatCompletionRequest = { | ||
stream: false, // can be streaming, same behavior | ||
messages: messages, | ||
}; | ||
const reply1 = await engine.chat.completions.create(request1); | ||
const replyMessage1 = await engine.getMessage(); | ||
console.log(reply1); | ||
console.log(replyMessage1); | ||
console.log(reply1.usage); | ||
|
||
// 3. A follow up multi-image question | ||
messages.push({ role: "assistant", content: replyMessage1 }); | ||
messages.push({ | ||
role: "user", | ||
content: [ | ||
{ type: "text", text: "What about these two images? Answer concisely." }, | ||
{ | ||
type: "image_url", | ||
image_url: { url: "https://www.ilankelman.org/eiffeltower.jpg" }, | ||
}, | ||
{ | ||
type: "image_url", | ||
image_url: { url: "https://www.ilankelman.org/sunset.jpg" }, | ||
}, | ||
], | ||
}); | ||
const request2: webllm.ChatCompletionRequest = { | ||
stream: false, // can be streaming, same behavior | ||
messages: messages, | ||
}; | ||
const reply2 = await engine.chat.completions.create(request2); | ||
const replyMessage2 = await engine.getMessage(); | ||
console.log(reply2); | ||
console.log(replyMessage2); | ||
console.log(reply2.usage); | ||
} | ||
|
||
main(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.