Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding tutorial in docs #6

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

This app is powered by the technology of Realtime Communication, Realtime Transcription, a Large Language Model (LLM), and Text to Speech extensions. The TEN Framework makes the workflow super easy! The iOS Demo resembles the web demo and acts as the mobile frontend to the AI Agent. You may ask the Agent any general question.

For more details of the project, view [the guide](docs/GUIDE.md).

![TENagentRun](https://github.com/user-attachments/assets/1e7136e0-ffef-48fd-9c29-6efa797e33d1)
## Prerequisites:
- Agora Developer account
Expand Down
16 changes: 16 additions & 0 deletions TENDemo.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
365EAAA72C759224002039E3 /* ServerNetworkingModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 365EAAA62C759224002039E3 /* ServerNetworkingModel.swift */; };
365EAAB02C768A97002039E3 /* TranscriptionView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 365EAAAF2C768A97002039E3 /* TranscriptionView.swift */; };
365EAAB22C76A61F002039E3 /* StreamTextProcessor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 365EAAB12C76A61F002039E3 /* StreamTextProcessor.swift */; };
36BADB812CD2C3CD00B89787 /* GUIDE.md in Resources */ = {isa = PBXBuildFile; fileRef = 36BADB7F2CD2C3CD00B89787 /* GUIDE.md */; };
36BADB832CD2DB5D00B89787 /* STTStreamDecoder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 36BADB822CD2DB5D00B89787 /* STTStreamDecoder.swift */; };
36E753032C7921450027E5EA /* SoundVisualizer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 36E753022C7921450027E5EA /* SoundVisualizer.swift */; };
36E753282C9521D70027E5EA /* README.md in Resources */ = {isa = PBXBuildFile; fileRef = 36E753272C9521D70027E5EA /* README.md */; };
36E7532C2C98F8AE0027E5EA /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 36E7532B2C98F8AD0027E5EA /* SettingsView.swift */; };
Expand Down Expand Up @@ -55,6 +57,8 @@
365EAAA62C759224002039E3 /* ServerNetworkingModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ServerNetworkingModel.swift; sourceTree = "<group>"; };
365EAAAF2C768A97002039E3 /* TranscriptionView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TranscriptionView.swift; sourceTree = "<group>"; };
365EAAB12C76A61F002039E3 /* StreamTextProcessor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StreamTextProcessor.swift; sourceTree = "<group>"; };
36BADB7F2CD2C3CD00B89787 /* GUIDE.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = GUIDE.md; sourceTree = "<group>"; };
36BADB822CD2DB5D00B89787 /* STTStreamDecoder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = STTStreamDecoder.swift; sourceTree = "<group>"; };
36E753022C7921450027E5EA /* SoundVisualizer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SoundVisualizer.swift; sourceTree = "<group>"; };
36E753272C9521D70027E5EA /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
36E7532B2C98F8AD0027E5EA /* SettingsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsView.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -112,10 +116,19 @@
isa = PBXGroup;
children = (
365EAAA62C759224002039E3 /* ServerNetworkingModel.swift */,
36BADB822CD2DB5D00B89787 /* STTStreamDecoder.swift */,
);
path = Models;
sourceTree = "<group>";
};
36BADB802CD2C3CD00B89787 /* docs */ = {
isa = PBXGroup;
children = (
36BADB7F2CD2C3CD00B89787 /* GUIDE.md */,
);
path = docs;
sourceTree = "<group>";
};
36E753262C926CDA0027E5EA /* Views */ = {
isa = PBXGroup;
children = (
Expand All @@ -131,6 +144,7 @@
isa = PBXGroup;
children = (
36E753272C9521D70027E5EA /* README.md */,
36BADB802CD2C3CD00B89787 /* docs */,
5C32D5592C6591430054DCD7 /* TENDemo */,
5C32D5582C6591430054DCD7 /* Products */,
);
Expand Down Expand Up @@ -286,6 +300,7 @@
files = (
5C32D5622C6591460054DCD7 /* Preview Assets.xcassets in Resources */,
5C32D5722C6592800054DCD7 /* config.json in Resources */,
36BADB812CD2C3CD00B89787 /* GUIDE.md in Resources */,
36E753282C9521D70027E5EA /* README.md in Resources */,
5C32D55F2C6591460054DCD7 /* Assets.xcassets in Resources */,
);
Expand Down Expand Up @@ -316,6 +331,7 @@
5C32D5BA2C6705370054DCD7 /* PlaceHolderUserView.swift in Sources */,
5C32D55B2C6591430054DCD7 /* TENDemoApp.swift in Sources */,
36E753032C7921450027E5EA /* SoundVisualizer.swift in Sources */,
36BADB832CD2DB5D00B89787 /* STTStreamDecoder.swift in Sources */,
36E7532C2C98F8AE0027E5EA /* SettingsView.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
Expand Down
4 changes: 3 additions & 1 deletion TENDemo/AgoraManager/AgoraManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ open class AgoraManager: NSObject, ObservableObject {
@Published var label: String?
@Published var sessionStatus = SessionStatus.loading

let sttStreamDecoder = STTStreamDecoder()

/// whether this client is launch for chatting with AI (one user only)
var talkingWithAI = false

Expand Down Expand Up @@ -436,7 +438,7 @@ extension AgoraManager: AgoraRtcEngineDelegate {
/// - data: the data
open func rtcEngine(_ engine: AgoraRtcEngineKit, receiveStreamMessageFromUid uid: UInt, streamId: Int, data: Data) {
do {
let stt = try JSONDecoder().decode(STTStreamText.self, from: data)
let stt = try sttStreamDecoder.parseStream(data: data)
let msg = IChatItem(userId: uid, text: stt.text, time: stt.textTS, isFinal: stt.isFinal, isAgent: 0 == stt.streamID)
streamTextProcessor.addChatItem(item: msg)
} catch let error {
Expand Down
10 changes: 8 additions & 2 deletions TENDemo/AgoraManager/NetworkManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,9 @@ open class NetworkManager {
// Create a ServiceStartRequest object with request ID, channel name, OpenAI proxy URL, remote stream ID, graph name, voice type, and start properties
let data = ServiceStartRequest(requestId: genUUID(),
channelName: config.channel,
remoteStreamId: uid,
graphName : "camera.va.openai.azure",
userUID: uid,
graphName : "camera_va_openai_azure",
language: config.agoraAsrLanguage,
voiceType: config.voiceType.description,
properties: startProperties
)
Expand Down Expand Up @@ -125,6 +126,11 @@ open class NetworkManager {

// Encode the data into JSON format
let body = try JSONEncoder().encode(data)

// Convert JSON data to a string
if let jsonString = String(data: body, encoding: .utf8) {
print(jsonString)
}

// Set the request body to the encoded JSON data
request.httpBody = body
Expand Down
62 changes: 62 additions & 0 deletions TENDemo/Models/STTStreamDecoder.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
//
// STTStreamDecoder.swift
// TENDemo
//
// Created by Rick Cheng on 10/30/24.
//

import Foundation

/// Incoming stream is in Text Format
/// <id>|<index>|<total>|<content part>
/// sample: 0038dbd0|1|1|eyJpc19maW5hbCI6IHRydWUsICJzdHJlYW1faWQiOiAwLCAibWVzc2FnZV9pZCI6ICIwMDM4ZGJkMCIsICJkYXRhX3R5cGUiOiAidHJhbnNjcmliZSIsICJ0ZXh0X3RzIjogMTczMDMxOTcwNjIzMCwgInRleHQiOiAiVEVOIEFnZW50IGNvbm5lY3RlZC4gSG93IGNhbiBJIGhlbHAgeW91IHRvZGF5PyJ9
///
struct STTStreamMessage {
let messageId: String
let partIndex: Int
let partsTotal: Int
let content: String

init(from input: String) {
let components = input.split(separator: "|")
self.messageId = String(components[0])
self.partIndex = Int(components[1]) ?? 0
self.partsTotal = Int(components[2]) ?? 0
self.content = String(components[3])
}
}

enum SttError : Error {
case runtimeError(String)
}

///
/// A decoder takes the raw data from DataStrream and return the complete message.
/// If the message is too long to fit, there will be parts sent in separate event. Assemble them
/// to form a valid base64 string for decoding.
class STTStreamDecoder {
var contentBuffer : String = ""
///
public func parseStream(data : Data) throws -> STTStreamText {
if let str = String(data: data, encoding: .utf8) {
let message = STTStreamMessage(from:str)
contentBuffer = contentBuffer + message.content
if (message.partIndex == message.partsTotal) {
// this is the last piece of the base64 content
if let jsonString = decodeBase64(contentBuffer) {
// clear the buffer
contentBuffer = ""
// string to data
if let jsonData = jsonString.data(using: .utf8) {
// decoded is the final result
do {
let stt = try JSONDecoder().decode(STTStreamText.self, from: jsonData)
return stt
} catch {}
}
}
}
}
throw SttError.runtimeError("streamed data format can't be parsed.")
}
}
12 changes: 8 additions & 4 deletions TENDemo/Models/ServerNetworkingModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -51,22 +51,25 @@ struct ServiceStartRequest: Codable {
/// The name of the Agora channel.
let channelName: String
/// The remote stream ID.
let remoteStreamId: UInt
let userUID: UInt
/// The name of the graph.
let graphName : String
let graphName: String
/// The spoken language
let language: String
/// The type of voice.
let voiceType: String
/// The properties for starting the service.
let properties : ServerStartProperties
let properties: ServerStartProperties

/// Coding keys for the request.
enum CodingKeys: String, CodingKey {
case requestId = "request_id"
case channelName = "channel_name"
case remoteStreamId = "remote_stream_id"
case userUID = "user_uid"
case graphName = "graph_name"
case voiceType = "voice_type"
case properties = "properties"
case language
}
}

Expand Down Expand Up @@ -164,3 +167,4 @@ struct STTStreamText: Codable {
case textTS = "text_ts"
}
}

2 changes: 1 addition & 1 deletion TENDemo/TENDemoApp.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import SwiftUI

// The version number basically matches the TEN-Framework
let _appVersion = "v0.4.1"
let _appVersion = "v0.5.0"

@main
struct TENDemoApp: App {
Expand Down
7 changes: 7 additions & 0 deletions TENDemo/Utilities/UtilFunctions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,13 @@ func genUUID() -> String {
return UUID().uuidString
}

func decodeBase64(_ base64String: String) -> String? {
guard let data = Data(base64Encoded: base64String) else {
return nil
}
return String(data: data, encoding: .utf8)
}

struct Environment {
func isRunningOnSimulator() -> Bool {
#if targetEnvironment(simulator)
Expand Down
Loading