diff --git a/apps/AppWithWearable/lib/backend/api_requests/api_calls.dart b/apps/AppWithWearable/lib/backend/api_requests/api_calls.dart index fdf7b040f..ddda7af19 100644 --- a/apps/AppWithWearable/lib/backend/api_requests/api_calls.dart +++ b/apps/AppWithWearable/lib/backend/api_requests/api_calls.dart @@ -1,6 +1,7 @@ import 'dart:convert'; import 'dart:io'; +import 'package:deepgram_speech_to_text/deepgram_speech_to_text.dart'; import 'package:flutter/foundation.dart'; import 'package:flutter/material.dart'; import 'package:friend_private/backend/database/memory.dart'; @@ -389,6 +390,50 @@ Future> transcribeAudioFile(File file, String uid) async } } +Future> transcribeAudioFile2(File file) async { + var startTime = DateTime.now(); + Deepgram deepgram = Deepgram(Env.deepgramApiKey!, baseQueryParams: { + 'model': 'nova-2-general', + 'detect_language': false, + 'language': SharedPreferencesUtil().recordingsLanguage, + 'filler_words': false, + 'punctuate': true, + 'diarize': true, + 'smart_format': true, + // TODO: try more options, sentiment analysis, intent, topics + }); + + DeepgramSttResult res = await deepgram.transcribeFromFile(file); + debugPrint('transcribeAudioFile2 took: ${DateTime.now().difference(startTime).inSeconds} seconds'); + var data = jsonDecode(res.json); + var result = data['results']['channels'][0]['alternatives'][0]; + List segments = []; + for (var word in result['words']) { + if (segments.isEmpty) { + segments.add(TranscriptSegment( + speaker: 'SPEAKER_${word['speaker']}', + start: word['start'], + end: word['end'], + text: word['word'], + isUser: false)); + } else { + var lastSegment = segments.last; + if (lastSegment.speakerId == word['speaker']) { + lastSegment.text += ' ${word['word']}'; + lastSegment.end = word['end']; + } else { + segments.add(TranscriptSegment( + speaker: 'SPEAKER_${word['speaker']}', + start: word['start'], + end: word['end'], + text: word['word'], + isUser: false)); + } + } + } + return segments; +} + Future userHasSpeakerProfile(String uid) async { var response = await makeApiCall( url: '${Env.customTranscriptApiBaseUrl}profile?uid=$uid', @@ -436,3 +481,12 @@ Future uploadSample(File file, String uid) async { throw Exception('An error occurred uploadSample: $e'); } } + +//curl \ +// --request POST \ +// --header 'Authorization: Token YOUR_DEEPGRAM_API_KEY' \ +// --header 'Content-Type: application/json' \ +// --data '{"url":"https://dpgr.am/spacewalk.wav"}' \ +// --url 'https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true' + +Future transcribeFromDeepgram(String url) async {} diff --git a/apps/AppWithWearable/lib/pages/capture/widgets/transcript.dart b/apps/AppWithWearable/lib/pages/capture/widgets/transcript.dart index e17824743..01a1c6194 100644 --- a/apps/AppWithWearable/lib/pages/capture/widgets/transcript.dart +++ b/apps/AppWithWearable/lib/pages/capture/widgets/transcript.dart @@ -101,7 +101,8 @@ class TranscriptWidgetState extends State { // var containsAudio = await vad.predict(f.readAsBytesSync()); // debugPrint('Processing audio bytes: ${f.toString()}'); try { - List segments = await transcribeAudioFile(f, SharedPreferencesUtil().uid); + // List segments = await transcribeAudioFile(f, SharedPreferencesUtil().uid); + List segments = await transcribeAudioFile2(f); processCustomTranscript(segments); } catch (e) { debugPrint(e.toString()); diff --git a/apps/AppWithWearable/pubspec.yaml b/apps/AppWithWearable/pubspec.yaml index 357847a4d..7d9ed2db2 100644 --- a/apps/AppWithWearable/pubspec.yaml +++ b/apps/AppWithWearable/pubspec.yaml @@ -15,7 +15,7 @@ publish_to: 'none' # Remove this line if you wish to publish to pub.dev # In iOS, build-name is used as CFBundleShortVersionString while build-number used as CFBundleVersion. # Read more about iOS versioning at # https://developer.apple.com/library/archive/documentation/General/Reference/InfoPlistKeyReference/Articles/CoreFoundationKeys.html -version: 1.0.5+17 +version: 1.0.6+19 environment: sdk: ">=3.0.0 <4.0.0" @@ -74,6 +74,7 @@ dependencies: objectbox: ^4.0.1 objectbox_flutter_libs: any flutter_foreground_task: ^6.5.0 + deepgram_speech_to_text: ^2.1.1 dependency_overrides: http: ^1.2.1