-
机器有 4 张 4090D, from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig
from lmdeploy.vl import load_image
model = 'OpenGVLab/InternVL2-40B'
system_prompt = '我是书生·万象,英文名是InternVL,是由上海人工智能实验室、清华大学及多家合作单位联合开发的多模态大语言模型。'
chat_template_config = ChatTemplateConfig('internvl-zh-hermes2')
chat_template_config.meta_instruction = system_prompt
def chat(instruction, img):
pipe = pipeline(model, chat_template_config=chat_template_config,
backend_config=TurbomindEngineConfig(session_len=8192, tp=4))
response = pipe((instruction, img))
print(response.text)
def main():
image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg')
chat('describe this image', image)
if __name__ == '__main__':
main() 运行后,错误: |
Beta Was this translation helpful? Give feedback.
Answered by
navono
Aug 12, 2024
Replies: 2 comments
-
正在下载 AWQ 版本 |
Beta Was this translation helpful? Give feedback.
0 replies
-
降低 --cache-max-entry-count 参数可运行 |
Beta Was this translation helpful? Give feedback.
0 replies
Answer selected by
navono
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
降低 --cache-max-entry-count 参数可运行