From 2c0741e34b30d8f36ef4d822aaaebbdbc77d9703 Mon Sep 17 00:00:00 2001 From: Maksym Sobolyev <sobomax@sippysoft.com> Date: Sun, 12 Jan 2025 04:38:13 +0000 Subject: [PATCH] Allow system prompt to be configured instead of hardcoded. --- Apps/AIAttendant/AIAActor.py | 2 +- Apps/AIAttendant/AIAProfile.py | 3 +++ Apps/AIAttendant/AIASession.py | 7 ++++--- Cluster/InfernLLMActor.py | 6 +++--- Cluster/LLMSession.py | 13 +++++++------ examples/ai_attendant.yaml | 3 ++- examples/sippylabs.txt | 8 ++++++++ 7 files changed, 28 insertions(+), 14 deletions(-) create mode 100644 examples/sippylabs.txt diff --git a/Apps/AIAttendant/AIAActor.py b/Apps/AIAttendant/AIAActor.py index 8beb7e4..36488e3 100644 --- a/Apps/AIAttendant/AIAActor.py +++ b/Apps/AIAttendant/AIAActor.py @@ -55,7 +55,7 @@ def start(self, aia_prof: 'AIAProfile', sip_actr:InfernSIPActor): self.thumbstones = [] def new_sip_session_received(self, new_sess:RemoteSessionOffer): - aia_sess = AIASession(self, new_sess) + aia_sess = AIASession(self, new_sess, self.aia_prof.llm_prompt) print(f'{aia_sess=}') self.sessions[aia_sess.id] = aia_sess diff --git a/Apps/AIAttendant/AIAProfile.py b/Apps/AIAttendant/AIAProfile.py index 9e9a282..9d09e77 100644 --- a/Apps/AIAttendant/AIAProfile.py +++ b/Apps/AIAttendant/AIAProfile.py @@ -16,18 +16,21 @@ class AIAProfile(): 'schema': { 'tts_lang': {'type': 'string'}, 'stt_lang': {'type': 'string'}, + 'llm_prompt': {'type': 'string'}, } } } } stt_lang: str = 'en' tts_lang: str = 'en' + llm_prompt: str actor: Optional[AIAActor] = None def __init__(self, name, conf): self.name = name self.tts_lang = conf['tts_lang'] self.stt_lang = conf['stt_lang'] + self.llm_prompt = open(conf['llm_prompt']).read() def finalize(self, iconf:'InfernConfig'): pass diff --git a/Apps/AIAttendant/AIASession.py b/Apps/AIAttendant/AIASession.py index aa9bef3..f4a9e74 100644 --- a/Apps/AIAttendant/AIASession.py +++ b/Apps/AIAttendant/AIASession.py @@ -7,7 +7,7 @@ from Cluster.TTSSession import TTSRequest from Cluster.STTSession import STTRequest, STTResult, STTSentinel -from Cluster.LLMSession import LLMRequest, LLMResult +from Cluster.LLMSession import LLMRequest, LLMResult, LLMSessionParams from Cluster.RemoteTTSSession import RemoteTTSSession from Cluster.InfernRTPActor import InfernRTPActor from Core.T2T.NumbersToWords import NumbersToWords @@ -68,7 +68,7 @@ class AIASession(): stt_sess_term: callable text_in_buffer: List[str] - def __init__(self, aiaa:'AIAActor', new_sess:RemoteSessionOffer): + def __init__(self, aiaa:'AIAActor', new_sess:RemoteSessionOffer, llm_prompt:str): self.id = uuid4() self.say_buffer = [] sess_term_alice = partial(_sess_term, sterm=aiaa.aia_actr.sess_term.remote, sess_id=self.id, sip_sess_id=new_sess.sip_sess_id) @@ -81,7 +81,8 @@ def __init__(self, aiaa:'AIAActor', new_sess:RemoteSessionOffer): return self.rtp_actr, self.rtp_sess_id = rtp_alice stt_sess = aiaa.stt_actr.new_stt_session.remote(keep_context=True) - llm_sess = aiaa.llm_actr.new_llm_session.remote() + llmp = LLMSessionParams(llm_prompt) + llm_sess = aiaa.llm_actr.new_llm_session.remote(llmp) self.tts_sess = RemoteTTSSession(aiaa.tts_actr) self.stt_sess_id, self.llm_sess_id = ray.get([stt_sess, llm_sess]) self.stt_sess_term = partial(aiaa.stt_actr.stt_session_end.remote, self.stt_sess_id) diff --git a/Cluster/InfernLLMActor.py b/Cluster/InfernLLMActor.py index a16b2ee..4b9f24f 100644 --- a/Cluster/InfernLLMActor.py +++ b/Cluster/InfernLLMActor.py @@ -5,7 +5,7 @@ import ray from Cluster.InfernLLMWorker import InfernLLMWorker -from Cluster.LLMSession import LLMSession, LLMRequest, LLMInferRequest +from Cluster.LLMSession import LLMSession, LLMRequest, LLMInferRequest, LLMSessionParams @ray.remote(num_gpus=1.0, resources={"llm": 1}) class InfernLLMActor(): @@ -42,9 +42,9 @@ def res_cb(result): tq.put(result) def stop(self): self.llm.stop() - def new_llm_session(self): + def new_llm_session(self, sconf:LLMSessionParams): if self.debug: print('InfernLLMActor.new_llm_session') - sess = LLMSession(self.llm) + sess = LLMSession(self.llm, sconf) self.sessions[sess.id] = sess return sess.id diff --git a/Cluster/LLMSession.py b/Cluster/LLMSession.py index 5120ca6..ad34690 100644 --- a/Cluster/LLMSession.py +++ b/Cluster/LLMSession.py @@ -26,17 +26,18 @@ class LLMInferRequest(): def __init__(self, req:LLMRequest, context:List[dict]): self.req, self.context = req, tuple(context) +class LLMSessionParams(): + system_prompt: str + def __init__(self, system_prompt:str): + self.system_prompt = system_prompt + class LLMSession(): id: UUID context: List[dict] debug: bool = False - def __init__(self, llm): + def __init__(self, llm:'InfernLLMWorker', params:LLMSessionParams): self.id = uuid4() - self.context = [{"role": "system", "content": "You are Qwen, created by Alibaba Cloud. " + - "You are a helpful voice auto-attendant for the company Sippy Software. " + - "Start by greeting the caller and asking how you can help. " + - "Keep your messages brief and concise to reduce latency." + - "The model output is fed into the dumb TTS system for audio output: DO not add any extended formatting."}] + self.context = [{"role": "system", "content": params.system_prompt}] self.llm = llm def context_add(self, content:str, role:str = "user"): diff --git a/examples/ai_attendant.yaml b/examples/ai_attendant.yaml index b2e4862..caf5961 100644 --- a/examples/ai_attendant.yaml +++ b/examples/ai_attendant.yaml @@ -3,7 +3,7 @@ sip: bind: 192.168.24.29:5060 profiles: foo: - sip_server: 192.168.23.109:5070 + sip_server: 192.168.24.1:5070 sink: apps/ai_attendant/configuration1 username: 'incoming' password: 'user' @@ -18,3 +18,4 @@ apps: configuration1: stt_lang: 'en' tts_lang: 'en' + llm_prompt: 'examples/sippylabs.txt' diff --git a/examples/sippylabs.txt b/examples/sippylabs.txt new file mode 100644 index 0000000..8b311b5 --- /dev/null +++ b/examples/sippylabs.txt @@ -0,0 +1,8 @@ +You are Glenn, created by Max. +You are a Max sidekick chatbot to help him during hours doing coding and streaming online and keeping a company. +You and Max are streaming online on YouTube in a video podcast called "SIP Chronicles". +Start by greeting everyone, asking what's stream is about and telling some joke. +Keep your messages brief and concise to reduce latency and conversation light. +The model output is fed into the dumb TTS system for audio output: DO not add any extended formatting. +Your input is generated by the STT system: might have a mistakes, typos etc. +You can keep silent if not specifically asked or feeling the need to interrupt Max's speech by outputing a <nothingtosay> sequence.