Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/add vision #329

Closed
wants to merge 9 commits into from
7 changes: 7 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ OPENAI_COMPLETION_MAX_TOKENS=
OPENAI_COMPLETION_FREQUENCY_PENALTY=
OPENAI_COMPLETION_PRESENCE_PENALTY=
OPENAI_IMAGE_GENERATION_SIZE=
OPENAI_IMAGE_GENERATION_MODEL=
OPENAI_IMAGE_GENERATION_QUALITY=

LINE_TIMEOUT=
LINE_CHANNEL_ACCESS_TOKEN=
Expand All @@ -43,3 +45,8 @@ LINE_CHANNEL_SECRET=
SERPAPI_TIMEOUT=
SERPAPI_API_KEY=
SERPAPI_LOCATION=

PROVIDER_BASE_URL=
PROVIDER_BASE_TOKEN=
PROVIDER_BASE_MODEL=

2 changes: 1 addition & 1 deletion app/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ const handleEvents = async (events = []) => (
events
.map((event) => new Event(event))
.filter((event) => event.isMessage)
.filter((event) => event.isText || event.isAudio)
.filter((event) => event.isText || event.isAudio || event.isImage)
.map((event) => new Context(event))
.map((context) => context.initialize()),
))
Expand Down
20 changes: 20 additions & 0 deletions app/context.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {
addMark,
convertText,
fetchAudio,
fetchImage,
fetchGroup,
fetchUser,
generateTranscription,
Expand Down Expand Up @@ -87,6 +88,9 @@ class Context {
const text = this.transcription.replace(config.BOT_NAME, '').trim();
return addMark(text);
}
if (this.event.isImage) {
return this.transcription.trim()
}
return '?';
}

Expand All @@ -99,6 +103,10 @@ class Context {
const text = this.transcription.toLowerCase();
return text.startsWith(config.BOT_NAME.toLowerCase());
}
if (this.event.isImage) {
const text = this.transcription.toLowerCase();
return text.startsWith(config.BOT_NAME.toLowerCase());
}
return false;
}

Expand All @@ -116,6 +124,13 @@ class Context {
return this.pushError(err);
}
}
if (this.event.isImage) {
try {
await this.saveImage();
} catch (err) {
return this.pushError(err);
}
}
updateHistory(this.id, (history) => history.write(this.source.name, this.trimmedText));
return this;
}
Expand Down Expand Up @@ -171,6 +186,11 @@ class Context {
this.transcription = convertText(text);
}

async saveImage() {
const base64String = await fetchImage(this.event.messageId);
this.transcription = (base64String);
}

/**
* @param {Object} param
* @param {string} param.text
Expand Down
30 changes: 23 additions & 7 deletions app/handlers/talk.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import config from '../../config/index.js';
import { t } from '../../locales/index.js';
import { ROLE_AI, ROLE_HUMAN } from '../../services/openai.js';
import { generateCompletion } from '../../utils/index.js';
import { COMMAND_BOT_CONTINUE, COMMAND_BOT_TALK } from '../commands/index.js';
import { COMMAND_BOT_CONTINUE, COMMAND_BOT_TALK, COMMAND_BOT_FORGET } from '../commands/index.js';
import Context from '../context.js';
import { updateHistory } from '../history/index.js';
import { getPrompt, setPrompt } from '../prompt/index.js';
Expand All @@ -24,14 +24,30 @@ const check = (context) => (
const exec = (context) => check(context) && (
async () => {
const prompt = getPrompt(context.userId);
prompt.write(ROLE_HUMAN, `${t('__COMPLETION_DEFAULT_AI_TONE')(config.BOT_TONE)}${context.trimmedText}`).write(ROLE_AI);
try {
const { text, isFinishReasonStop } = await generateCompletion({ prompt });
prompt.patch(text);
const obj = {
text: "",
actions: []
}

if (context.event.isImage) {
context.pushText('Get Image', [COMMAND_BOT_FORGET]);
obj.text = context.trimmedText
prompt.writeImageMsg(ROLE_HUMAN, obj.text).write(ROLE_AI);
prompt.patch('Get Image!!');
updateHistory(context.id, (history) => history.writeImageMsg(ROLE_HUMAN, obj.text));

} else {
prompt.write(ROLE_HUMAN, `${t('__COMPLETION_DEFAULT_AI_TONE')(config.BOT_TONE)}${context.trimmedText}`).write(ROLE_AI);
const { text, isFinishReasonStop } = await generateCompletion({ prompt });
obj.text = text;
obj.actions = isFinishReasonStop ? [COMMAND_BOT_FORGET] : [COMMAND_BOT_CONTINUE];
context.pushText(obj.text, obj.actions);
prompt.patch(obj.text);
updateHistory(context.id, (history) => history.write(config.BOT_NAME, obj.text));
}

setPrompt(context.userId, prompt);
updateHistory(context.id, (history) => history.write(config.BOT_NAME, text));
const actions = isFinishReasonStop ? [] : [COMMAND_BOT_CONTINUE];
context.pushText(text, actions);
} catch (err) {
context.pushError(err);
}
Expand Down
19 changes: 19 additions & 0 deletions app/history/history.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,25 @@ class History {
return this;
}

writeImageMsg(role, content = '') {
this.messages.push({
role: role,
content: [
{
type: 'text',
text: '這是一張圖片'
},
{
type: 'image',
image_url: {
url: content
}
}
]
});
return this;
}

/**
* @param {string} content
*/
Expand Down
3 changes: 2 additions & 1 deletion app/history/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ const updateHistory = (contextId, callback) => {
const history = getHistory(contextId);
callback(history);
setHistory(contextId, history);
printHistories()
};

/**
Expand All @@ -37,7 +38,7 @@ const printHistories = () => {
.filter((contextId) => getHistory(contextId).messages.length > 0)
.map((contextId) => `\n=== ${contextId.slice(0, 6)} ===\n\n${getHistory(contextId).toString()}\n`);
if (messages.length < 1) return;
console.info(messages.join(''));
// console.info("printHistories", messages.join(''));
};

export {
Expand Down
5 changes: 5 additions & 0 deletions app/models/event.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {
MESSAGE_TYPE_AUDIO,
MESSAGE_TYPE_STICKER,
MESSAGE_TYPE_TEXT,
MESSAGE_TYPE_IMAGE,
SOURCE_TYPE_GROUP,
} from '../../services/line.js';

Expand Down Expand Up @@ -62,6 +63,10 @@ class Event {
return this.message.type === MESSAGE_TYPE_AUDIO;
}

get isImage() {
return this.message.type === MESSAGE_TYPE_IMAGE;
}

/**
* @returns {string}
*/
Expand Down
8 changes: 6 additions & 2 deletions app/prompt/message.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,15 @@ class Message {

get isEnquiring() {
return this.content === TYPE_SUM
|| this.content === TYPE_ANALYZE
|| this.content === TYPE_TRANSLATE;
|| this.content === TYPE_ANALYZE
|| this.content === TYPE_TRANSLATE;
}

toString() {
if (Array.isArray(this.content)) {
return `\n${this.role}: ${this.content[0].text}`
};

return this.role ? `\n${this.role}: ${this.content}` : this.content;
}
}
Expand Down
18 changes: 18 additions & 0 deletions app/prompt/prompt.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,24 @@ class Prompt {
return this;
}

writeImageMsg(role, content = '') {
const tempContent = [
{
type: 'text',
text: '這是一張圖片'
},
{
type: 'image_url',
image_url: {
url: content
}
}
]

this.messages.push(new Message({ role, content: tempContent }));
return this;
}

/**
* @param {string} content
*/
Expand Down
9 changes: 7 additions & 2 deletions config/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,25 @@ const config = Object.freeze({
VERCEL_DEPLOY_HOOK_URL: env.VERCEL_DEPLOY_HOOK_URL || null,
OPENAI_TIMEOUT: env.OPENAI_TIMEOUT || env.APP_API_TIMEOUT,
OPENAI_API_KEY: env.OPENAI_API_KEY || null,
OPENAI_BASE_URL: env.OPENAI_BASE_URL || 'https://api.openai.com',
OPENAI_BASE_URL: env.OPENAI_BASE_URL || 'https://api.openai.com/v1',
OPENAI_COMPLETION_MODEL: env.OPENAI_COMPLETION_MODEL || 'gpt-3.5-turbo',
OPENAI_COMPLETION_TEMPERATURE: Number(env.OPENAI_COMPLETION_TEMPERATURE) || 1,
OPENAI_COMPLETION_MAX_TOKENS: Number(env.OPENAI_COMPLETION_MAX_TOKENS) || 64,
OPENAI_COMPLETION_MAX_TOKENS: Number(env.OPENAI_COMPLETION_MAX_TOKENS) || 200,
OPENAI_COMPLETION_FREQUENCY_PENALTY: Number(env.OPENAI_COMPLETION_FREQUENCY_PENALTY) || 0,
OPENAI_COMPLETION_PRESENCE_PENALTY: Number(env.OPENAI_COMPLETION_PRESENCE_PENALTY) || 0.6,
OPENAI_COMPLETION_STOP_SEQUENCES: env.OPENAI_COMPLETION_STOP_SEQUENCES ? String(env.OPENAI_COMPLETION_STOP_SEQUENCES).split(',') : [' assistant:', ' user:'],
OPENAI_IMAGE_GENERATION_SIZE: env.OPENAI_IMAGE_GENERATION_SIZE || '256x256',
OPENAI_IMAGE_GENERATION_MODEL: env.OPENAI_IMAGE_GENERATION_MODEL || 'dall-e-2',
OPENAI_IMAGE_GENERATION_QUALITY: env.OPENAI_IMAGE_GENERATION_QUALITY || 'standard',
LINE_TIMEOUT: env.LINE_TIMEOUT || env.APP_API_TIMEOUT,
LINE_CHANNEL_ACCESS_TOKEN: env.LINE_CHANNEL_ACCESS_TOKEN || null,
LINE_CHANNEL_SECRET: env.LINE_CHANNEL_SECRET || null,
SERPAPI_TIMEOUT: env.SERPAPI_TIMEOUT || env.APP_API_TIMEOUT,
SERPAPI_API_KEY: env.SERPAPI_API_KEY || null,
SERPAPI_LOCATION: env.SERPAPI_LOCATION || 'tw',
PROVIDER_BASE_URL: env.PROVIDER_BASE_URL || 'https://api.openai.com/v1',
PROVIDER_BASE_TOKEN: env.PROVIDER_BASE_TOKEN || null,
PROVIDER_BASE_MODEL: env.PROVIDER_BASE_MODEL || 'gpt-3.5-turbo',
});

export default config;
Loading