From d4d8c89f50044e42813f0fa4824247a3bffa1795 Mon Sep 17 00:00:00 2001 From: KevinHuSh Date: Wed, 29 May 2024 10:40:07 +0800 Subject: [PATCH] limit the system context length of conversation messages. (#962) ### What problem does this PR solve? #951 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/db/services/dialog_service.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py index f068f9fa7a3..f5beb9480a5 100644 --- a/api/db/services/dialog_service.py +++ b/api/db/services/dialog_service.py @@ -130,9 +130,13 @@ def chat(dialog, messages, stream=True, **kwargs): kwargs["knowledge"] = "\n".join(knowledges) gen_conf = dialog.llm_setting - msg = [{"role": m["role"], "content": m["content"]} - for m in messages if m["role"] != "system"] + + msg = [{"role": "system", "content": prompt_config["system"].format(**kwargs)}] + msg.extend([{"role": m["role"], "content": m["content"]} + for m in messages if m["role"] != "system"]) used_token_count, msg = message_fit_in(msg, int(max_tokens * 0.97)) + assert len(msg) >= 2, f"message_fit_in has bug: {msg}" + if "max_tokens" in gen_conf: gen_conf["max_tokens"] = min( gen_conf["max_tokens"], @@ -165,14 +169,13 @@ def decorate_answer(answer): if stream: answer = "" - for ans in chat_mdl.chat_streamly(prompt_config["system"].format(**kwargs), msg, gen_conf): + for ans in chat_mdl.chat_streamly(msg[0]["content"], msg[1:], gen_conf): answer = ans yield {"answer": answer, "reference": {}} yield decorate_answer(answer) else: answer = chat_mdl.chat( - prompt_config["system"].format( - **kwargs), msg, gen_conf) + msg[0]["content"], msg[1:], gen_conf) chat_logger.info("User: {}|Assistant: {}".format( msg[-1]["content"], answer)) yield decorate_answer(answer)