From 2c0741e34b30d8f36ef4d822aaaebbdbc77d9703 Mon Sep 17 00:00:00 2001
From: Maksym Sobolyev <sobomax@sippysoft.com>
Date: Sun, 12 Jan 2025 04:38:13 +0000
Subject: [PATCH] Allow system prompt to be configured instead of hardcoded.

---
 Apps/AIAttendant/AIAActor.py   |  2 +-
 Apps/AIAttendant/AIAProfile.py |  3 +++
 Apps/AIAttendant/AIASession.py |  7 ++++---
 Cluster/InfernLLMActor.py      |  6 +++---
 Cluster/LLMSession.py          | 13 +++++++------
 examples/ai_attendant.yaml     |  3 ++-
 examples/sippylabs.txt         |  8 ++++++++
 7 files changed, 28 insertions(+), 14 deletions(-)
 create mode 100644 examples/sippylabs.txt

diff --git a/Apps/AIAttendant/AIAActor.py b/Apps/AIAttendant/AIAActor.py
index 8beb7e4..36488e3 100644
--- a/Apps/AIAttendant/AIAActor.py
+++ b/Apps/AIAttendant/AIAActor.py
@@ -55,7 +55,7 @@ def start(self, aia_prof: 'AIAProfile', sip_actr:InfernSIPActor):
         self.thumbstones = []
 
     def new_sip_session_received(self, new_sess:RemoteSessionOffer):
-        aia_sess = AIASession(self, new_sess)
+        aia_sess = AIASession(self, new_sess, self.aia_prof.llm_prompt)
         print(f'{aia_sess=}')
         self.sessions[aia_sess.id] = aia_sess
 
diff --git a/Apps/AIAttendant/AIAProfile.py b/Apps/AIAttendant/AIAProfile.py
index 9e9a282..9d09e77 100644
--- a/Apps/AIAttendant/AIAProfile.py
+++ b/Apps/AIAttendant/AIAProfile.py
@@ -16,18 +16,21 @@ class AIAProfile():
                 'schema': {
                     'tts_lang': {'type': 'string'},
                     'stt_lang': {'type': 'string'},
+                    'llm_prompt': {'type': 'string'},
                 }
             }
         }
     }
     stt_lang: str = 'en'
     tts_lang: str = 'en'
+    llm_prompt: str
     actor: Optional[AIAActor] = None
 
     def __init__(self, name, conf):
         self.name = name
         self.tts_lang = conf['tts_lang']
         self.stt_lang = conf['stt_lang']
+        self.llm_prompt = open(conf['llm_prompt']).read()
 
     def finalize(self, iconf:'InfernConfig'):
         pass
diff --git a/Apps/AIAttendant/AIASession.py b/Apps/AIAttendant/AIASession.py
index aa9bef3..f4a9e74 100644
--- a/Apps/AIAttendant/AIASession.py
+++ b/Apps/AIAttendant/AIASession.py
@@ -7,7 +7,7 @@
 
 from Cluster.TTSSession import TTSRequest
 from Cluster.STTSession import STTRequest, STTResult, STTSentinel
-from Cluster.LLMSession import LLMRequest, LLMResult
+from Cluster.LLMSession import LLMRequest, LLMResult, LLMSessionParams
 from Cluster.RemoteTTSSession import RemoteTTSSession
 from Cluster.InfernRTPActor import InfernRTPActor
 from Core.T2T.NumbersToWords import NumbersToWords
@@ -68,7 +68,7 @@ class AIASession():
     stt_sess_term: callable
     text_in_buffer: List[str]
 
-    def __init__(self, aiaa:'AIAActor', new_sess:RemoteSessionOffer):
+    def __init__(self, aiaa:'AIAActor', new_sess:RemoteSessionOffer, llm_prompt:str):
         self.id = uuid4()
         self.say_buffer = []
         sess_term_alice = partial(_sess_term, sterm=aiaa.aia_actr.sess_term.remote, sess_id=self.id, sip_sess_id=new_sess.sip_sess_id)
@@ -81,7 +81,8 @@ def __init__(self, aiaa:'AIAActor', new_sess:RemoteSessionOffer):
             return
         self.rtp_actr, self.rtp_sess_id = rtp_alice
         stt_sess = aiaa.stt_actr.new_stt_session.remote(keep_context=True)
-        llm_sess = aiaa.llm_actr.new_llm_session.remote()
+        llmp = LLMSessionParams(llm_prompt)
+        llm_sess = aiaa.llm_actr.new_llm_session.remote(llmp)
         self.tts_sess = RemoteTTSSession(aiaa.tts_actr)
         self.stt_sess_id, self.llm_sess_id = ray.get([stt_sess, llm_sess])
         self.stt_sess_term = partial(aiaa.stt_actr.stt_session_end.remote, self.stt_sess_id)
diff --git a/Cluster/InfernLLMActor.py b/Cluster/InfernLLMActor.py
index a16b2ee..4b9f24f 100644
--- a/Cluster/InfernLLMActor.py
+++ b/Cluster/InfernLLMActor.py
@@ -5,7 +5,7 @@
 import ray
 
 from Cluster.InfernLLMWorker import InfernLLMWorker
-from Cluster.LLMSession import LLMSession, LLMRequest, LLMInferRequest
+from Cluster.LLMSession import LLMSession, LLMRequest, LLMInferRequest, LLMSessionParams
 
 @ray.remote(num_gpus=1.0, resources={"llm": 1})
 class InfernLLMActor():
@@ -42,9 +42,9 @@ def res_cb(result): tq.put(result)
     def stop(self):
         self.llm.stop()
 
-    def new_llm_session(self):
+    def new_llm_session(self, sconf:LLMSessionParams):
         if self.debug: print('InfernLLMActor.new_llm_session')
-        sess = LLMSession(self.llm)
+        sess = LLMSession(self.llm, sconf)
         self.sessions[sess.id] = sess
         return sess.id
 
diff --git a/Cluster/LLMSession.py b/Cluster/LLMSession.py
index 5120ca6..ad34690 100644
--- a/Cluster/LLMSession.py
+++ b/Cluster/LLMSession.py
@@ -26,17 +26,18 @@ class LLMInferRequest():
     def __init__(self, req:LLMRequest, context:List[dict]):
         self.req, self.context = req, tuple(context)
 
+class LLMSessionParams():
+    system_prompt: str
+    def __init__(self, system_prompt:str):
+        self.system_prompt = system_prompt
+
 class LLMSession():
     id: UUID
     context: List[dict]
     debug: bool = False
-    def __init__(self, llm):
+    def __init__(self, llm:'InfernLLMWorker', params:LLMSessionParams):
         self.id = uuid4()
-        self.context = [{"role": "system", "content": "You are Qwen, created by Alibaba Cloud. " +
-                         "You are a helpful voice auto-attendant for the company Sippy Software. " +
-                         "Start by greeting the caller and asking how you can help. " +
-                         "Keep your messages brief and concise to reduce latency." +
-                         "The model output is fed into the dumb TTS system for audio output: DO not add any extended formatting."}]
+        self.context = [{"role": "system", "content": params.system_prompt}]
         self.llm = llm
         
     def context_add(self, content:str, role:str = "user"):
diff --git a/examples/ai_attendant.yaml b/examples/ai_attendant.yaml
index b2e4862..caf5961 100644
--- a/examples/ai_attendant.yaml
+++ b/examples/ai_attendant.yaml
@@ -3,7 +3,7 @@ sip:
     bind: 192.168.24.29:5060
   profiles:
     foo:
-      sip_server: 192.168.23.109:5070
+      sip_server: 192.168.24.1:5070
       sink: apps/ai_attendant/configuration1
       username: 'incoming'
       password: 'user'
@@ -18,3 +18,4 @@ apps:
       configuration1:
         stt_lang: 'en'
         tts_lang: 'en'
+        llm_prompt: 'examples/sippylabs.txt'
diff --git a/examples/sippylabs.txt b/examples/sippylabs.txt
new file mode 100644
index 0000000..8b311b5
--- /dev/null
+++ b/examples/sippylabs.txt
@@ -0,0 +1,8 @@
+You are Glenn, created by Max.
+You are a Max sidekick chatbot to help him during hours doing coding and streaming online and keeping a company.
+You and Max are streaming online on YouTube in a video podcast called "SIP Chronicles".
+Start by greeting everyone, asking what's stream is about and telling some joke.
+Keep your messages brief and concise to reduce latency and conversation light.
+The model output is fed into the dumb TTS system for audio output: DO not add any extended formatting.
+Your input is generated by the STT system: might have a mistakes, typos etc.
+You can keep silent if not specifically asked or feeling the need to interrupt Max's speech by outputing a <nothingtosay> sequence.