Skip to content

Commit

Permalink
Room: AI-Talk support dictation mode. v5.13.20
Browse files Browse the repository at this point in the history
  • Loading branch information
winlinvip committed Jan 30, 2024
1 parent fee6087 commit ba3af94
Show file tree
Hide file tree
Showing 10 changed files with 1,076 additions and 49 deletions.
1 change: 1 addition & 0 deletions DEVELOPER.md
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,7 @@ The following are the update records for the SRS Stack server.
* Room: AI-Talk support multiple assistant in a room. v5.13.18
* Room: AI-Talk support user different languages. v5.13.18
* Room: AI-Talk allow disable ASR/TTS, enable text. v5.13.19
* Room: AI-Talk support dictation mode. v5.13.20
* v5.12
* Refine local variable name conf to config. v5.12.1
* Add forced exit on timeout for program termination. v5.12.1
Expand Down
40 changes: 27 additions & 13 deletions platform/ai-talk.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ func (v *openaiASRService) RequestASR(ctx context.Context, inputFile, language,
outputFile := fmt.Sprintf("%v.mp4", inputFile)
defer os.Remove(outputFile)

// TODO: FIXME: Use client to set the codec and format, skip to copy it in server, because it need about
// 1s to process in some weak performance VPS.
// Transcode input audio in opus or aac, to aac in m4a/mp4 format.
// If need to encode to aac, use:
// "-c:a", "aac", "-ac", "1", "-ar", "16000", "-ab", "30k",
Expand Down Expand Up @@ -179,6 +181,10 @@ func (v *openaiChatService) RequestChat(ctx context.Context, sreq *StageRequest,
Role: openai.ChatMessageRoleUser,
Content: user.previousAsrText,
})
messages = append(messages, openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleAssistant,
Content: user.previousAiText,
})

model := stage.chatModel
maxTokens := 1024
Expand Down Expand Up @@ -368,7 +374,7 @@ func (v *openaiChatService) handle(
stage.previousAssitant += sentence + " "
// We utilize user ASR and AI responses as prompts for the subsequent ASR, given that this is
// a chat-based scenario where the user converses with the AI, and the following audio should pertain to both user and AI text.
user.previousAsrText += " " + sentence
user.previousAiText += " " + sentence
// Commit the sentense to TTS worker and callbacks.
commitAISentence(sentence, firstSentense)
// Reset the sentence, because we have committed it.
Expand Down Expand Up @@ -829,6 +835,8 @@ type StageUser struct {

// Previous ASR text, to use as prompt for next ASR.
previousAsrText string
// Previous AI response text, also can be used for next ASR.
previousAiText string

// Last update of user.
update time.Time
Expand Down Expand Up @@ -869,6 +877,8 @@ type Stage struct {
prompt string
// The AI ASR language.
asrLanguage string
// The AI asr prompt type. user or user-ai.
asrPrompt string
// The prefix for TTS for the first sentence if too short.
prefix string
// The welcome voice url.
Expand Down Expand Up @@ -916,7 +926,8 @@ func NewStage(opts ...func(*Stage)) *Stage {

func (v Stage) String() string {
var sb strings.Builder
sb.WriteString(fmt.Sprintf("sid:%v,asr:%v", v.sid, v.asrLanguage))
sb.WriteString(fmt.Sprintf("sid:%v,asr:%v,asrp:%v",
v.sid, v.asrLanguage, v.asrPrompt))
if v.prefix != "" {
sb.WriteString(fmt.Sprintf(",prefix:%v", v.prefix))
}
Expand Down Expand Up @@ -952,6 +963,7 @@ func (v *Stage) UpdateFromRoom(room *SrsLiveRoom) {
v.voice = helloVoiceFromLanguage(room.AIASRLanguage)
v.prompt = room.AIChatPrompt
v.asrLanguage = room.AIASRLanguage
v.asrPrompt = room.AIASRPrompt
v.replyLimit = room.AIChatMaxWords
v.chatModel = room.AIChatModel
v.chatWindow = room.AIChatMaxWindow
Expand Down Expand Up @@ -1472,7 +1484,7 @@ func handleAITalkService(ctx context.Context, handler *http.ServeMux) error {
}{
RequestUUID: sreq.rid,
})
logger.Tf(ctx, "srs ai-talk stage create conversation ok, rid=%v", sreq.rid)
logger.Tf(ctx, "ai-talk new conversation, room=%v, sid=%v, rid=%v", roomUUID, sid, sreq.rid)
return nil
}(); err != nil {
ohttp.WriteError(ctx, w, r, err)
Expand Down Expand Up @@ -1593,7 +1605,7 @@ func handleAITalkService(ctx context.Context, handler *http.ServeMux) error {
}

// Important trace log.
user.previousAsrText = sreq.asrText
user.previousAsrText, user.previousAiText = sreq.asrText, ""
logger.Tf(ctx, "You: %v", sreq.asrText)

// Notify all subscribers about the ASR text.
Expand All @@ -1605,15 +1617,17 @@ func handleAITalkService(ctx context.Context, handler *http.ServeMux) error {
stage.KeepAlive()

// Do chat, get the response in stream.
chatService := &openaiChatService{
conf: stage.aiConfig,
onFirstResponse: func(ctx context.Context, text string) {
sreq.lastRequestChat = time.Now()
sreq.lastRobotFirstText = text
},
}
if err := chatService.RequestChat(ctx, sreq, stage, user); err != nil {
return errors.Wrapf(err, "chat")
if stage.aiChatEnabled {
chatService := &openaiChatService{
conf: stage.aiConfig,
onFirstResponse: func(ctx context.Context, text string) {
sreq.lastRequestChat = time.Now()
sreq.lastRobotFirstText = text
},
}
if err := chatService.RequestChat(ctx, sreq, stage, user); err != nil {
return errors.Wrapf(err, "chat")
}
}

// Response the request UUID and pulling the response.
Expand Down
6 changes: 4 additions & 2 deletions platform/live-room.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,8 @@ type SrsAssistant struct {
AIASREnabled bool `json:"aiAsrEnabled"`
// The AI asr language.
AIASRLanguage string `json:"aiAsrLanguage"`
// The AI asr prompt type. user or user-ai.
AIASRPrompt string `json:"aiAsrPrompt"`

// Whether enable the AI processing.
AIChatEnabled bool `json:"aiChatEnabled"`
Expand Down Expand Up @@ -315,8 +317,8 @@ func NewAssistant(opts ...func(*SrsAssistant)) *SrsAssistant {
}

func (v *SrsAssistant) String() string {
return fmt.Sprintf("assistant=%v, name=%v, provider=%v, secretKey=%vB, baseURL=%v, asr=<enabled=%v,language=%v>, chat=<enabled=%v,model=%v,prompt=%v,window=%v,words=%v>, tts=<%v>",
return fmt.Sprintf("assistant=%v, name=%v, provider=%v, secretKey=%vB, baseURL=%v, asr=<enabled=%v,language=%v,prompt=%v>, chat=<enabled=%v,model=%v,prompt=%v,window=%v,words=%v>, tts=<%v>",
v.Assistant, v.AIName, v.AIProvider, len(v.AISecretKey), v.AIBaseURL, v.AIASREnabled,
v.AIASRLanguage, v.AIChatEnabled, v.AIChatModel, v.AIChatPrompt, v.AIChatMaxWindow,
v.AIASRLanguage, v.AIASRPrompt, v.AIChatEnabled, v.AIChatModel, v.AIChatPrompt, v.AIChatMaxWindow,
v.AIChatMaxWords, v.AITTSEnabled)
}
9 changes: 7 additions & 2 deletions ui/src/ai-talk.css
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
width:20%; height:25%;
}

.ai-talk-trace-logs-pc, .ai-talk-trace-logs-pcfs, .ai-talk-trace-logs-mobile, .ai-talk-trace-logs-mobilefs, .ai-talk-trace-logs-chat-only{
.ai-talk-trace-logs-pc, .ai-talk-trace-logs-pcfs, .ai-talk-trace-logs-mobile, .ai-talk-trace-logs-mobilefs, .ai-talk-trace-logs-mobilefs-dictation, .ai-talk-trace-logs-chat-only{
overflow: scroll;
}

Expand Down Expand Up @@ -91,7 +91,7 @@
min-width: 30vw;
}

.ai-talk-trace-logs-pc, .ai-talk-trace-logs-pcfs, .ai-talk-trace-logs-mobile, .ai-talk-trace-logs-mobilefs, .ai-talk-trace-logs-chat-only{
.ai-talk-trace-logs-pc, .ai-talk-trace-logs-pcfs, .ai-talk-trace-logs-mobile, .ai-talk-trace-logs-mobilefs, .ai-talk-trace-logs-mobilefs-dictation, .ai-talk-trace-logs-chat-only{
margin-bottom: 0px;
}

Expand All @@ -117,6 +117,11 @@
max-height: 57vh;
}

.ai-talk-trace-logs-mobilefs-dictation {
min-height: 78vh;
max-height: 78vh;
}

.ai-talk-trace-logs-chat-only {
min-height: 50vh;
max-height: 96vh;
Expand Down
Loading

0 comments on commit ba3af94

Please sign in to comment.