opensrp · lincmba · Dec 18, 2024 · Dec 19, 2024 · Dec 19, 2024 · Dec 20, 2024
diff --git a/android/buildSrc/src/main/kotlin/project-properties.gradle.kts b/android/buildSrc/src/main/kotlin/project-properties.gradle.kts
@@ -27,7 +27,9 @@ val requiredFhirProperties =
     "OAUTH_SCOPE",
     "MAPBOX_SDK_TOKEN",
     "SENTRY_DSN",
-    "OPENSRP_APP_ID"
+    "OPENSRP_APP_ID",
+    "GEMINI_API_KEY",
+    "SPEECH_TO_TEXT_API_KEY",
   )
 
 val localProperties = readProperties((project.properties["localPropertiesFile"] ?: "${rootProject.projectDir}/local.properties").toString())

diff --git a/android/gradle/libs.versions.toml b/android/gradle/libs.versions.toml
@@ -32,8 +32,11 @@ fhir-sdk-engine = "1.1.0-preview4-SNAPSHOT"
 fhir-sdk-knowledge = "0.1.0-alpha03-preview5-rc2-SNAPSHOT"
 fhir-sdk-workflow = "0.1.0-alpha04-preview10-rc1-SNAPSHOT"
 fragment-ktx = "1.8.3"
+generativeai = "0.9.0"
 glide = "4.16.0"
+googleCloudSpeech = "4.50.0"
 gradle = "8.3.2"
+grpcOkHttp = "1.69.0"
 gson = "2.10.1"
 hilt = "1.2.0"
 java-jwt = "4.4.0"
@@ -58,6 +61,7 @@ log4j = "1.2.17"
 logback-android = "3.0.0"
 material = "1.12.0"
 mlkit-barcode-scanning = "17.3.0"
+mockito-inline = "4.0.0"
 mockk = "1.13.8"
 mockk-android = "1.13.8"
 msg-simple = "1.2"
@@ -82,6 +86,7 @@ slf4j-nop = "2.0.7"
 spotlessPluginGradle = "6.25.0"
 sqlcipher = "4.5.4"
 stax-api = "1.0-2"
+tasks-genai = "0.10.14"
 timber = "5.0.1"
 uiautomator = "2.3.0"
 work = "2.9.1"
@@ -136,9 +141,13 @@ fhir-sdk-common = { group = "org.smartregister", name = "common", version.ref =
 foundation = { group = "androidx.compose.foundation", name = "foundation", version.ref = "compose-ui" }
 fragment-ktx = { group = "androidx.fragment", name = "fragment-ktx", version.ref = "fragment-ktx" }
 fragment-testing = { group = "androidx.fragment", name = "fragment-testing", version.ref = "fragment-ktx" }
+generativeai = { module = "com.google.ai.client.generativeai:generativeai", version.ref = "generativeai" }
+tasks-genai = { module = "com.google.mediapipe:tasks-genai", version.ref = "tasks-genai" }
 glide = { group = "com.github.bumptech.glide", name = "glide", version.ref = "glide" }
 gms-play-services-location = { group = "com.google.android.gms", name = "play-services-location", version.ref = "playServicesLocation" }
+google-cloud-speech = { module = "com.google.cloud:google-cloud-speech", version.ref = "googleCloudSpeech" }
 gradle = { module = "com.android.tools.build:gradle", version.ref = "gradle" }
+grpc-okhttp = { module = "io.grpc:grpc-okhttp", version.ref = "grpcOkHttp" }
 gson = { group = "com.google.code.gson", name = "gson", version.ref = "gson" }
 hilt-compiler = { group = "androidx.hilt", name = "hilt-compiler", version.ref = "hilt" }
 hilt-work = { group = "androidx.hilt", name = "hilt-work", version.ref = "hilt" }
@@ -173,6 +182,7 @@ logback-android = { module = "com.github.tony19:logback-android", version.ref =
 mapbox-sdk-turf = { group = "com.mapbox.mapboxsdk", name = "mapbox-sdk-turf", version.ref = "kujaku-mapbox-sdk-turf" }
 material = { group = "com.google.android.material", name = "material", version.ref = "material" }
 mlkit-barcode-scanning = { group = "com.google.mlkit", name = "barcode-scanning", version.ref = "mlkit-barcode-scanning"}
+mockito-inline = { module = "org.mockito:mockito-inline", version.ref = "mockito-inline" }
 mockk = { group = "io.mockk", name = "mockk", version.ref = "mockk" }
 mockk-android = { group = "io.mockk", name = "mockk-android", version.ref = "mockk-android" }
 msg-simple = { group = "com.github.java-json-tools", name = "msg-simple", version.ref = "msg-simple" }

diff --git a/android/quest/build.gradle.kts b/android/quest/build.gradle.kts
@@ -77,6 +77,12 @@ android {
     buildConfigField("String", "CONFIGURATION_SYNC_PAGE_SIZE", """"100"""")
     buildConfigField("String", "SENTRY_DSN", """"${project.extra["SENTRY_DSN"]}"""")
     buildConfigField("String", "BUILD_DATE", "\"$buildDate\"")
+    buildConfigField("String", "GEMINI_API_KEY", """"${project.extra["GEMINI_API_KEY"]}"""")
+    buildConfigField(
+      "String",
+      "SPEECH_TO_TEXT_API_KEY",
+      """"${project.extra["SPEECH_TO_TEXT_API_KEY"]}"""",
+    )
 
     testInstrumentationRunner = "org.smartregister.fhircore.quest.QuestTestRunner"
     testInstrumentationRunnerArguments["additionalTestOutputDir"] = "/sdcard/Download"
@@ -489,6 +495,19 @@ dependencies {
   implementation(libs.bundles.cameraX)
   implementation(libs.log4j)
 
+  // AI dependencies
+  implementation(libs.google.cloud.speech) {
+    exclude("com.google.guava", "guava")
+    exclude("org.threeten", "threetenbp")
+  }
+  implementation(libs.generativeai)
+  implementation(libs.grpc.okhttp) { exclude("com.google.guava", "guava") }
+  implementation(libs.tasks.genai) {
+    // exclude to use the full version required for com.google.cloud:google-cloud-speech
+    // https://github.com/protocolbuffers/protobuf/blob/main/java/lite.md
+    exclude("com.google.protobuf", "protobuf-javalite")
+  }
+
   // Annotation processors
   kapt(libs.hilt.compiler)
   kapt(libs.dagger.hilt.compiler)
@@ -500,6 +519,7 @@ dependencies {
   testImplementation(libs.robolectric)
   testImplementation(libs.bundles.junit.test)
   testImplementation(libs.core.testing)
+  testImplementation(libs.mockito.inline)
   testImplementation(libs.mockk)
   testImplementation(libs.kotlinx.coroutines.test)
   testImplementation(libs.dagger.hilt.android.testing)

diff --git a/android/quest/src/main/AndroidManifest.xml b/android/quest/src/main/AndroidManifest.xml
@@ -5,6 +5,7 @@
     <uses-permission android:name="android.permission.ACCESS_FINE_LOCATION" />
     <uses-permission android:name="android.permission.ACCESS_COARSE_LOCATION" />
     <uses-permission android:name="android.permission.POST_NOTIFICATIONS" />
+    <uses-permission android:name="android.permission.RECORD_AUDIO" />
     <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
     <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" android:maxSdkVersion="32" />
     <uses-permission android:name="android.permission.CAMERA" />

diff --git a/...uest/src/main/java/org/smartregister/fhircore/quest/medintel/speech/models/GeminiModel.kt b/...uest/src/main/java/org/smartregister/fhircore/quest/medintel/speech/models/GeminiModel.kt
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2021-2024 Ona Systems, Inc
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.smartregister.fhircore.quest.medintel.speech.models
+
+import com.google.ai.client.generativeai.GenerativeModel
+import com.google.ai.client.generativeai.type.BlockThreshold
+import com.google.ai.client.generativeai.type.HarmCategory
+import com.google.ai.client.generativeai.type.SafetySetting
+import com.google.ai.client.generativeai.type.generationConfig
+import org.smartregister.fhircore.quest.BuildConfig
+
+class GeminiModel : LlmModel<GenerativeModel> {
+  // model usage
+  // https://developer.android.com/ai/google-ai-client-sdk
+  override var model =
+    GenerativeModel(
+      modelName = "gemini-1.5-flash-001",
+      apiKey = BuildConfig.GEMINI_API_KEY,
+      generationConfig =
+        generationConfig {
+          temperature = 0.15f
+          topK = 32
+          topP = 1f
+          maxOutputTokens = 4096
+        },
+      safetySettings =
+        listOf(
+          SafetySetting(HarmCategory.HARASSMENT, BlockThreshold.MEDIUM_AND_ABOVE),
+          SafetySetting(HarmCategory.HATE_SPEECH, BlockThreshold.MEDIUM_AND_ABOVE),
+          SafetySetting(HarmCategory.SEXUALLY_EXPLICIT, BlockThreshold.MEDIUM_AND_ABOVE),
+          SafetySetting(HarmCategory.DANGEROUS_CONTENT, BlockThreshold.MEDIUM_AND_ABOVE),
+        ),
+    )
+
+  /**
+   * Generates content based on the provided prompt.
+   *
+   * @param prompt The prompt string to generate content from.
+   * @return The generated content as a string or null.
+   */
+  override suspend fun generateContent(prompt: String): String? {
+    return model.generateContent(prompt).text
+  }
+}
diff --git a/...quest/src/main/java/org/smartregister/fhircore/quest/medintel/speech/models/GemmaModel.kt b/...quest/src/main/java/org/smartregister/fhircore/quest/medintel/speech/models/GemmaModel.kt
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2021-2024 Ona Systems, Inc
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.smartregister.fhircore.quest.medintel.speech.models
+
+import android.content.Context
+import com.google.mediapipe.tasks.genai.llminference.LlmInference
+
+class GemmaModel(context: Context, modelPath: String) : LlmModel<LlmInference> {
+  private val options =
+    LlmInference.LlmInferenceOptions.builder()
+      .setModelPath(modelPath)
+      .setMaxTokens(DEFAULT_MAX_TOKENS)
+      .setTopK(DEFAULT_TOP_K)
+      .setTemperature(DEFAULT_TEMPERATURE)
+      .setRandomSeed(DEFAULT_RANDOM_SEED)
+      .build()
+  override var model: LlmInference = LlmInference.createFromOptions(context, options)
+
+  override suspend fun generateContent(prompt: String): String? {
+    return model.generateResponse(prompt)
+  }
+
+  companion object {
+    const val DEFAULT_MAX_TOKENS = 1000
+    const val DEFAULT_TOP_K = 40
+    const val DEFAULT_TEMPERATURE = 0.8F
+    const val DEFAULT_RANDOM_SEED = 101
+  }
+}
diff --git a/...d/quest/src/main/java/org/smartregister/fhircore/quest/medintel/speech/models/LlmModel.kt b/...d/quest/src/main/java/org/smartregister/fhircore/quest/medintel/speech/models/LlmModel.kt
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2021-2024 Ona Systems, Inc
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.smartregister.fhircore.quest.medintel.speech.models
+
+interface LlmModel<T> {
+  val model: T
+
+  suspend fun generateContent(prompt: String): String?
+}
diff --git a/...in/java/org/smartregister/fhircore/quest/medintel/speech/speechtoform/LiveSpeechToText.kt b/...in/java/org/smartregister/fhircore/quest/medintel/speech/speechtoform/LiveSpeechToText.kt
@@ -0,0 +1,134 @@
+/*
+ * Copyright 2021-2024 Ona Systems, Inc
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.smartregister.fhircore.quest.medintel.speech.speechtoform
+
+import android.media.AudioFormat
+import android.media.AudioRecord
+import com.google.api.gax.rpc.ClientStream
+import com.google.api.gax.rpc.ResponseObserver
+import com.google.api.gax.rpc.StreamController
+import com.google.cloud.speech.v1p1beta1.RecognitionConfig
+import com.google.cloud.speech.v1p1beta1.SpeechClient
+import com.google.cloud.speech.v1p1beta1.SpeechSettings
+import com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig
+import com.google.cloud.speech.v1p1beta1.StreamingRecognizeRequest
+import com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse
+import com.google.protobuf.ByteString
+import javax.inject.Inject
+import kotlin.coroutines.cancellation.CancellationException
+import kotlinx.coroutines.CoroutineScope
+import kotlinx.coroutines.cancel
+import kotlinx.coroutines.channels.awaitClose
+import kotlinx.coroutines.flow.Flow
+import kotlinx.coroutines.flow.callbackFlow
+import kotlinx.coroutines.flow.flowOn
+import kotlinx.coroutines.launch
+import org.smartregister.fhircore.engine.util.DispatcherProvider
+import org.smartregister.fhircore.quest.BuildConfig
+import timber.log.Timber
+
+class LiveSpeechToText @Inject constructor(val dispatcherProvider: DispatcherProvider) {
+
+  private val sampleRate = 16000
+  private val channelConfig = AudioFormat.CHANNEL_IN_MONO
+  private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
+  private val bufferSize = AudioRecord.getMinBufferSize(sampleRate, channelConfig, audioFormat)
+
+  private val speechSettings: SpeechSettings by lazy {
+    SpeechSettings.newBuilder().setApiKey(BuildConfig.SPEECH_TO_TEXT_API_KEY).build()
+  }
+
+  fun startTranscription(audioRecord: AudioRecord): Flow<String> {
+    return callbackFlow<String> {
+        val speechClient = SpeechClient.create(speechSettings)
+        val responseObserver =
+          object : ResponseObserver<StreamingRecognizeResponse> {
+            override fun onStart(controller: StreamController) {
+              // No-op
+            }
+
+            override fun onResponse(response: StreamingRecognizeResponse) {
+              response.resultsList
+                .flatMap { it.alternativesList }
+                .forEach {
+                  val transcript = it.transcript
+                  trySend(transcript)
+                  Timber.i("Transcription: $transcript")
+                }
+            }
+
+            override fun onError(t: Throwable) {
+              t.printStackTrace()
+              Timber.e("Error during streaming: ${t.message}")
+              cancel(CancellationException("Streaming Error", t))
+            }
+
+            override fun onComplete() {
+              Timber.i("Streaming completed.")
+              channel.close()
+            }
+          }
+
+        val requestObserver: ClientStream<StreamingRecognizeRequest> =
+          speechClient.streamingRecognizeCallable().splitCall(responseObserver)
+
+        val recognitionConfig =
+          RecognitionConfig.newBuilder()
+            .setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
+            .setSampleRateHertz(sampleRate)
+            .setLanguageCode("en-US")
+            .build()
+
+        val streamingRecognitionConfig =
+          StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).build()
+
+        val configRequest =
+          StreamingRecognizeRequest.newBuilder()
+            .setStreamingConfig(streamingRecognitionConfig)
+            .build()
+
+        requestObserver.send(configRequest)
+
+        audioRecord.startRecording()
+        val audioData = ByteArray(bufferSize)
+        var isRecording = true
+
+        CoroutineScope(dispatcherProvider.io()).launch {
+          while (isRecording) {
+            val read = audioRecord.read(audioData, 0, bufferSize)
+            // TODO we want the caller to control stopping this loop, not on no data
+            if (read > 0) {
+              val audioBytes = ByteString.copyFrom(audioData, 0, read)
+              val audioRequest =
+                StreamingRecognizeRequest.newBuilder().setAudioContent(audioBytes).build()
+              requestObserver.send(audioRequest)
+            } else {
+              isRecording = false
+            }
+          }
+
+          requestObserver.closeSend()
+          speechClient.close()
+          audioRecord.stop()
+          audioRecord.release()
+        }
+
+        awaitClose { isRecording = false }
+      }
+      .flowOn(dispatcherProvider.io())
+  }
+}