diff --git a/Cargo.toml b/Cargo.toml
index 8940212b..74a7c067 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,4 +17,4 @@ candle = { package = "candle-core", version = "0.6.0" }
 candle-nn = { package = "candle-nn", version = "0.6.0" }
 candle-transformers = { package = "candle-transformers", version = "0.6.0" }
 tokenizers = "0.19.1"
-
+tracing = "0.1.37"
diff --git a/screenpipe-audio/Cargo.toml b/screenpipe-audio/Cargo.toml
index 105c2886..ea7b1c14 100644
--- a/screenpipe-audio/Cargo.toml
+++ b/screenpipe-audio/Cargo.toml
@@ -50,6 +50,12 @@ env_logger = "0.10"
 # File 
 tempfile = "3"
 
+# Tracing
+tracing = { workspace = true }
+
+# Concurrency
+crossbeam = "0.8"
+
 [dev-dependencies]
 tempfile = "3.3.0"
 
diff --git a/screenpipe-audio/src/bin/screenpipe-audio.rs b/screenpipe-audio/src/bin/screenpipe-audio.rs
index c0186d33..7465e263 100644
--- a/screenpipe-audio/src/bin/screenpipe-audio.rs
+++ b/screenpipe-audio/src/bin/screenpipe-audio.rs
@@ -1,6 +1,9 @@
 use anyhow::{anyhow, Result};
 use clap::Parser;
 use log::info;
+use screenpipe_audio::create_whisper_channel;
+use screenpipe_audio::default_input_device;
+use screenpipe_audio::default_output_device;
 use screenpipe_audio::list_audio_devices;
 use screenpipe_audio::parse_device_spec;
 use screenpipe_audio::record_and_transcribe;
@@ -12,8 +15,12 @@ use std::time::Duration;
 #[derive(Parser, Debug)]
 #[clap(author, version, about, long_about = None)]
 struct Args {
-    #[clap(short, long, help = "Audio device name")]
-    audio_device: Option<String>,
+    #[clap(
+        short,
+        long,
+        help = "Audio device name (can be specified multiple times)"
+    )]
+    audio_device: Vec<String>,
 
     #[clap(long, help = "List available audio devices")]
     list_audio_devices: bool,
@@ -45,51 +52,58 @@ fn main() -> Result<()> {
         return Ok(());
     }
 
-    let device = match args.audio_device {
-        Some(d) => parse_device_spec(&d).unwrap(),
-        None => {
-            if devices.is_empty() {
-                return Err(anyhow!("No audio input devices found"));
-            }
-            eprintln!("No audio device specified. Available devices are:");
-            print_devices(&devices);
-            eprintln!("\nPlease specify one or more devices with:");
-            eprintln!(
-                "  {} --audio-device \"Device Name (input)\" [--audio-device \"Another Device (output)\"]",
-                std::env::args().next().unwrap()
-            );
-            return Err(anyhow!("No device specified"));
-        }
+    let devices = if args.audio_device.is_empty() {
+        vec![default_input_device()?, default_output_device()?]
+    } else {
+        args.audio_device
+            .iter()
+            .map(|d| parse_device_spec(d))
+            .collect::<Result<Vec<_>>>()?
     };
 
-    let (result_tx, result_rx) = mpsc::channel();
+    if devices.is_empty() {
+        return Err(anyhow!("No audio input devices found"));
+    }
+
     let chunk_duration = Duration::from_secs(30);
     let output_path = PathBuf::from("output.wav");
-    // Spawn a thread to handle the recording and transcription
-    let recording_thread = thread::spawn(move || {
-        record_and_transcribe(&device, chunk_duration, result_tx, output_path)
-    });
+    let (whisper_sender, whisper_receiver) = create_whisper_channel()?;
+
+    // Spawn threads for each device
+    let recording_threads: Vec<_> = devices
+        .into_iter()
+        .enumerate()
+        .map(|(i, device)| {
+            let whisper_sender = whisper_sender.clone();
+            let output_path = output_path.with_file_name(format!("output_{}.wav", i));
+            thread::spawn(move || {
+                record_and_transcribe(&device, chunk_duration, output_path, whisper_sender)
+            })
+        })
+        .collect();
 
     // Main loop to receive and print transcriptions
     loop {
-        match result_rx.recv_timeout(Duration::from_secs(5)) {
+        match whisper_receiver.recv_timeout(Duration::from_secs(5)) {
             Ok(result) => {
-                info!("Transcription: {}", result.text);
+                info!("Transcription: {:?}", result);
             }
-            Err(mpsc::RecvTimeoutError::Timeout) => {
+            Err(crossbeam::channel::RecvTimeoutError::Timeout) => {
                 // No transcription received in 5 seconds, continue waiting
                 continue;
             }
-            Err(mpsc::RecvTimeoutError::Disconnected) => {
-                // Sender has been dropped, recording is complete
+            Err(crossbeam::channel::RecvTimeoutError::Disconnected) => {
+                // All senders have been dropped, recording is complete
                 break;
             }
         }
     }
 
-    // Wait for the recording thread to finish
-    let file_path = recording_thread.join().unwrap()?;
-    println!("Recording complete: {:?}", file_path);
+    // Wait for all recording threads to finish
+    for (i, thread) in recording_threads.into_iter().enumerate() {
+        let file_path = thread.join().unwrap()?;
+        println!("Recording {} complete: {:?}", i, file_path);
+    }
 
     Ok(())
 }
diff --git a/screenpipe-audio/src/core.rs b/screenpipe-audio/src/core.rs
index 464c7355..8431abd3 100644
--- a/screenpipe-audio/src/core.rs
+++ b/screenpipe-audio/src/core.rs
@@ -1,18 +1,18 @@
 use anyhow::{anyhow, Result};
 use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
 use cpal::{FromSample, Sample};
+use crossbeam::channel::Sender;
 use hound::WavSpec;
 use log::{error, info};
 use serde::Serialize;
 use std::fmt;
 use std::fs::File;
 use std::path::PathBuf;
-use std::sync::mpsc::Sender;
 use std::sync::{Arc, Mutex};
 use std::time::{Duration, Instant};
 use std::{io::BufWriter, thread};
 
-use crate::stt::stt;
+use crate::AudioInput;
 
 pub struct AudioCaptureResult {
     pub text: String,
@@ -83,8 +83,8 @@ pub fn parse_device_spec(name: &str) -> Result<DeviceSpec> {
 pub fn record_and_transcribe(
     device_spec: &DeviceSpec,
     duration: Duration,
-    result_tx: Sender<AudioCaptureResult>,
     output_path: PathBuf,
+    whisper_sender: Sender<AudioInput>,
 ) -> Result<PathBuf> {
     let host = match device_spec {
         #[cfg(target_os = "macos")]
@@ -94,7 +94,7 @@ pub fn record_and_transcribe(
 
     info!("device: {:?}", device_spec.to_string());
 
-    let device = if device_spec.to_string() == "default" {
+    let audio_device = if device_spec.to_string() == "default" {
         host.default_input_device()
     } else {
         host.input_devices()?.find(|x| {
@@ -108,10 +108,14 @@ pub fn record_and_transcribe(
                 .unwrap_or(false)
         })
     }
-    .ok_or_else(|| anyhow!("Device not found"))?;
+    .ok_or_else(|| anyhow!("Audio device not found"))?;
 
-    let config = device.default_input_config()?;
-    info!("Recording device: {}, Config: {:?}", device.name()?, config);
+    let config = audio_device.default_input_config()?;
+    info!(
+        "Recording audio device: {}, Config: {:?}",
+        audio_device.name()?,
+        config
+    );
 
     let spec = wav_spec_from_config(&config);
     let writer = hound::WavWriter::create(&output_path, spec)?;
@@ -120,25 +124,25 @@ pub fn record_and_transcribe(
     let err_fn = |err| error!("An error occurred on the audio stream: {}", err);
 
     let stream = match config.sample_format() {
-        cpal::SampleFormat::I8 => device.build_input_stream(
+        cpal::SampleFormat::I8 => audio_device.build_input_stream(
             &config.into(),
             move |data, _: &_| write_input_data::<i8, i8>(data, &writer_2),
             err_fn,
             None,
         )?,
-        cpal::SampleFormat::I16 => device.build_input_stream(
+        cpal::SampleFormat::I16 => audio_device.build_input_stream(
             &config.into(),
             move |data, _: &_| write_input_data::<i16, i16>(data, &writer_2),
             err_fn,
             None,
         )?,
-        cpal::SampleFormat::I32 => device.build_input_stream(
+        cpal::SampleFormat::I32 => audio_device.build_input_stream(
             &config.into(),
             move |data, _: &_| write_input_data::<i32, i32>(data, &writer_2),
             err_fn,
             None,
         )?,
-        cpal::SampleFormat::F32 => device.build_input_stream(
+        cpal::SampleFormat::F32 => audio_device.build_input_stream(
             &config.into(),
             move |data, _: &_| write_input_data::<f32, f32>(data, &writer_2),
             err_fn,
@@ -169,15 +173,11 @@ pub fn record_and_transcribe(
                 if let Some(writer) = writer_guard.as_mut() {
                     writer.flush()?;
 
-                    // Transcribe the current audio chunk
-                    match stt(output_path.to_str().unwrap()) {
-                        Ok(transcription) => {
-                            result_tx.send(AudioCaptureResult {
-                                text: transcription,
-                            })?;
-                        }
-                        Err(e) => error!("Transcription failed: {}", e),
-                    }
+                    // Send the file path to the whisper channel
+                    whisper_sender.send(AudioInput {
+                        path: output_path.to_str().unwrap().to_string(),
+                        device: device_spec.to_string(),
+                    })?;
                 }
             }
 
@@ -195,14 +195,11 @@ pub fn record_and_transcribe(
         if let Some(writer) = writer_guard.as_mut() {
             writer.flush()?;
 
-            match stt(output_path.to_str().unwrap()) {
-                Ok(transcription) => {
-                    result_tx.send(AudioCaptureResult {
-                        text: transcription,
-                    })?;
-                }
-                Err(e) => error!("Final transcription failed: {}", e),
-            }
+            // Send the file path to the whisper channel
+            whisper_sender.send(AudioInput {
+                path: output_path.to_str().unwrap().to_string(),
+                device: device_spec.to_string(),
+            })?;
         }
     }
 
diff --git a/screenpipe-audio/src/lib.rs b/screenpipe-audio/src/lib.rs
index 04729f80..d5f6907c 100644
--- a/screenpipe-audio/src/lib.rs
+++ b/screenpipe-audio/src/lib.rs
@@ -2,8 +2,8 @@ mod core;
 mod multilingual;
 mod pcm_decode;
 mod stt;
-
 pub use core::{
     default_input_device, default_output_device, list_audio_devices, parse_device_spec,
     record_and_transcribe, AudioCaptureResult, AudioDevice, DeviceSpec,
 };
+pub use stt::{create_whisper_channel, AudioInput, TranscriptionResult};
diff --git a/screenpipe-audio/src/multilingual.rs b/screenpipe-audio/src/multilingual.rs
index 78f3c517..cdd73b21 100644
--- a/screenpipe-audio/src/multilingual.rs
+++ b/screenpipe-audio/src/multilingual.rs
@@ -3,6 +3,8 @@ use candle_transformers::models::whisper::SOT_TOKEN;
 use log::info;
 use tokenizers::Tokenizer;
 
+use crate::stt::Model;
+
 const LANGUAGES: [(&str, &str); 99] = [
     ("en", "english"),
     ("zh", "chinese"),
@@ -107,7 +109,7 @@ const LANGUAGES: [(&str, &str); 99] = [
 
 /// Returns the token id for the selected language.
 pub fn detect_language(
-    model: &mut super::stt::Model,
+    model: &mut Model,
     tokenizer: &Tokenizer,
     mel: &Tensor,
 ) -> Result<u32> {
diff --git a/screenpipe-audio/src/stt.rs b/screenpipe-audio/src/stt.rs
index 4ee7988b..35b7b9e8 100644
--- a/screenpipe-audio/src/stt.rs
+++ b/screenpipe-audio/src/stt.rs
@@ -1,18 +1,67 @@
+use std::{
+    thread,
+    time::{SystemTime, UNIX_EPOCH},
+};
+
 use anyhow::{Error as E, Result};
 use candle::{Device, IndexOp, Tensor};
 use candle_nn::ops::softmax;
+use crossbeam::channel::{self, Receiver, Sender};
 use hf_hub::{api::sync::Api, Repo, RepoType};
 use log::{error, info};
 use rand::{distributions::Distribution, SeedableRng};
 use tokenizers::Tokenizer;
 
-use candle_transformers::models::whisper::{self as m, audio, Config};
+use candle_transformers::{
+    models::whisper::{self as m, audio, Config},
+    quantized_var_builder::VarBuilder,
+};
 use rubato::{
     Resampler, SincFixedIn, SincInterpolationParameters, SincInterpolationType, WindowFunction,
 };
 
 use crate::{multilingual, pcm_decode::pcm_decode};
 
+#[derive(Clone)]
+pub struct WhisperModel {
+    pub model: Model,
+    pub tokenizer: Tokenizer,
+    pub device: Device,
+}
+
+impl WhisperModel {
+    pub fn new() -> Result<Self> {
+        let device = Device::new_metal(0).unwrap_or(Device::new_cuda(0).unwrap_or(Device::Cpu));
+        info!("device = {:?}", device);
+
+        let (config_filename, tokenizer_filename, weights_filename) = {
+            let api = Api::new()?;
+            let repo = api.repo(Repo::with_revision(
+                "lmz/candle-whisper".to_string(),
+                RepoType::Model,
+                "main".to_string(),
+            ));
+            let config = repo.get("config-tiny.json")?;
+            let tokenizer = repo.get("tokenizer-tiny.json")?;
+            let model = repo.get("model-tiny-q80.gguf")?;
+            (config, tokenizer, model)
+        };
+
+        let config: Config = serde_json::from_str(&std::fs::read_to_string(config_filename)?)?;
+        let tokenizer = Tokenizer::from_file(tokenizer_filename).map_err(E::msg)?;
+
+        let vb = VarBuilder::from_gguf(weights_filename, &device)?;
+        let model = Model::Quantized(m::quantized_model::Whisper::load(&vb, config)?);
+
+        Ok(Self {
+            model: model,
+            tokenizer,
+            device,
+        })
+    }
+}
+
+#[derive(Debug, Clone)]
 pub enum Model {
     Normal(m::model::Whisper),
     Quantized(m::quantized_model::Whisper),
@@ -70,13 +119,13 @@ struct Segment {
     dr: DecodingResult,
 }
 
-struct Decoder {
-    model: Model,
+struct Decoder<'a> {
+    model: &'a mut Model,
     rng: rand::rngs::StdRng,
     task: Option<Task>,
     timestamps: bool,
     verbose: bool,
-    tokenizer: Tokenizer,
+    tokenizer: &'a Tokenizer,
     suppress_tokens: Tensor,
     sot_token: u32,
     transcribe_token: u32,
@@ -87,11 +136,11 @@ struct Decoder {
     language_token: Option<u32>,
 }
 
-impl Decoder {
+impl<'a> Decoder<'a> {
     #[allow(clippy::too_many_arguments)]
     fn new(
-        model: Model,
-        tokenizer: Tokenizer,
+        model: &'a mut Model,
+        tokenizer: &'a Tokenizer,
         seed: u64,
         device: &Device,
         language_token: Option<u32>,
@@ -142,12 +191,11 @@ impl Decoder {
     }
 
     fn decode(&mut self, mel: &Tensor, t: f64) -> Result<DecodingResult> {
-        let model = &mut self.model;
-        let audio_features = model.encoder_forward(mel, true)?;
+        let audio_features = self.model.encoder_forward(mel, true)?;
         if self.verbose {
             info!("audio features: {:?}", audio_features.dims());
         }
-        let sample_len = model.config().max_target_positions / 2;
+        let sample_len = self.model.config().max_target_positions / 2;
         let mut no_speech_prob = f64::NAN;
         let mut tokens = vec![self.sot_token];
         if let Some(language_token) = self.language_token {
@@ -167,17 +215,20 @@ impl Decoder {
         for i in 0..sample_len {
             let tokens_t = Tensor::new(tokens.as_slice(), mel.device())?;
             let tokens_t = tokens_t.unsqueeze(0)?;
-            let ys = model.decoder_forward(&tokens_t, &audio_features, i == 0)?;
+            let ys = self
+                .model
+                .decoder_forward(&tokens_t, &audio_features, i == 0)?;
 
             if i == 0 {
-                let logits = model.decoder_final_linear(&ys.i(..1)?)?.i(0)?.i(0)?;
+                let logits = self.model.decoder_final_linear(&ys.i(..1)?)?.i(0)?.i(0)?;
                 no_speech_prob = softmax(&logits, 0)?
                     .i(self.no_speech_token as usize)?
                     .to_scalar::<f32>()? as f64;
             }
 
             let (_, seq_len, _) = ys.dims3()?;
-            let logits = model
+            let logits = self
+                .model
                 .decoder_final_linear(&ys.i((..1, seq_len - 1..))?)?
                 .i(0)?
                 .i(0)?;
@@ -213,7 +264,9 @@ impl Decoder {
 
             sum_logprob += prob.ln();
 
-            if next_token == self.eot_token || tokens.len() > model.config().max_target_positions {
+            if next_token == self.eot_token
+                || tokens.len() > self.model.config().max_target_positions
+            {
                 break;
             }
 
@@ -343,31 +396,13 @@ enum Task {
     Translate,
 }
 
-pub fn stt(input: &str) -> Result<String> {
+pub fn stt(file_path: &str, whisper_model: &WhisperModel) -> Result<String> {
     info!("Starting speech to text");
-    let device = Device::new_metal(0).unwrap_or(Device::new_cuda(0).unwrap_or(Device::Cpu));
-    info!("device = {:?}", device);
-    let (default_model, default_revision) = ("lmz/candle-whisper", "main");
-    let default_model = default_model.to_string();
-    let default_revision = default_revision.to_string();
-
-    let (config_filename, tokenizer_filename, weights_filename, input) = {
-        let api = Api::new()?;
-        let repo = api.repo(Repo::with_revision(
-            default_model,
-            RepoType::Model,
-            default_revision,
-        ));
-        let sample = std::path::PathBuf::from(input);
-        let config = repo.get("config-tiny.json")?;
-        let tokenizer = repo.get("tokenizer-tiny.json")?;
-        let model = repo.get("model-tiny-q80.gguf")?;
-        (config, tokenizer, model, sample)
-    };
-    let config: Config = serde_json::from_str(&std::fs::read_to_string(config_filename)?)?;
-    let tokenizer = Tokenizer::from_file(tokenizer_filename).map_err(E::msg)?;
+    let mut model = &whisper_model.model;
+    let tokenizer = &whisper_model.tokenizer;
+    let device = &whisper_model.device;
 
-    let mel_bytes = match config.num_mel_bins {
+    let mel_bytes = match model.config().num_mel_bins {
         80 => include_bytes!("../models/whisper/melfilters.bytes").as_slice(),
         128 => include_bytes!("../models/whisper/melfilters128.bytes").as_slice(),
         nmel => anyhow::bail!("unexpected num_mel_bins {nmel}"),
@@ -375,7 +410,7 @@ pub fn stt(input: &str) -> Result<String> {
     let mut mel_filters = vec![0f32; mel_bytes.len() / 4];
     <byteorder::LittleEndian as byteorder::ByteOrder>::read_f32_into(mel_bytes, &mut mel_filters);
 
-    let (mut pcm_data, sample_rate) = pcm_decode(input)?;
+    let (mut pcm_data, sample_rate) = pcm_decode(file_path)?;
     if sample_rate != m::SAMPLE_RATE as u32 {
         info!(
             "Resampling from {} Hz to {} Hz",
@@ -386,24 +421,26 @@ pub fn stt(input: &str) -> Result<String> {
     }
 
     // info!("pcm data loaded {}", pcm_data.len());
-    let mel = audio::pcm_to_mel(&config, &pcm_data, &mel_filters);
+    let mel = audio::pcm_to_mel(&model.config(), &pcm_data, &mel_filters);
     let mel_len = mel.len();
     let mel = Tensor::from_vec(
         mel,
-        (1, config.num_mel_bins, mel_len / config.num_mel_bins),
+        (
+            1,
+            model.config().num_mel_bins,
+            mel_len / model.config().num_mel_bins,
+        ),
         &device,
     )?;
-    // info!("loaded mel: {:?}", mel.dims());
 
-    let vb = candle_transformers::quantized_var_builder::VarBuilder::from_gguf(
-        weights_filename,
-        &device,
-    )?;
-    let mut model = Model::Quantized(m::quantized_model::Whisper::load(&vb, config)?);
-    let language_token = Some(multilingual::detect_language(&mut model, &tokenizer, &mel)?);
-    // info!("Creating decoder");
+    let language_token = Some(multilingual::detect_language(
+        &mut model.clone(),
+        &tokenizer,
+        &mel,
+    )?);
+    let mut model = model.clone();
     let mut dc = Decoder::new(
-        model,
+        &mut model,
         tokenizer,
         42,
         &device,
@@ -445,6 +482,61 @@ fn resample(input: Vec<f32>, from_sample_rate: u32, to_sample_rate: u32) -> Resu
     Ok(waves_out.into_iter().next().unwrap())
 }
 
+#[derive(Debug, Clone)]
+pub struct AudioInput {
+    pub path: String,
+    pub device: String,
+}
+
+#[derive(Debug, Clone)]
+pub struct TranscriptionResult {
+    pub input: AudioInput,
+    pub transcription: Option<String>,
+    pub timestamp: u64,
+    pub error: Option<String>,
+}
+pub fn create_whisper_channel() -> Result<(Sender<AudioInput>, Receiver<TranscriptionResult>)> {
+    let whisper_model = WhisperModel::new()?;
+    let (input_sender, input_receiver): (Sender<AudioInput>, Receiver<AudioInput>) =
+        channel::unbounded();
+    let (output_sender, output_receiver): (
+        Sender<TranscriptionResult>,
+        Receiver<TranscriptionResult>,
+    ) = channel::unbounded();
+
+    thread::spawn(move || {
+        while let Ok(input) = input_receiver.recv() {
+            let timestamp = SystemTime::now()
+                .duration_since(UNIX_EPOCH)
+                .expect("Time went backwards")
+                .as_secs();
+
+            let result = stt(&input.path, &whisper_model);
+
+            let transcription_result = match result {
+                Ok(transcription) => TranscriptionResult {
+                    input: input.clone(),
+                    transcription: Some(transcription),
+                    timestamp,
+                    error: None,
+                },
+                Err(e) => TranscriptionResult {
+                    input: input.clone(),
+                    transcription: None,
+                    timestamp,
+                    error: Some(e.to_string()),
+                },
+            };
+
+            if output_sender.send(transcription_result).is_err() {
+                break;
+            }
+        }
+    });
+
+    Ok((input_sender, output_receiver))
+}
+
 #[test]
 #[ignore]
 fn test_speech_to_text() {
@@ -452,7 +544,9 @@ fn test_speech_to_text() {
 
     println!("Loading audio file");
     let start = std::time::Instant::now();
-    let text = stt("./test_data/poetic_kapil_gupta.wav").unwrap();
+    let whisper_model = WhisperModel::new().unwrap();
+
+    let text = stt("./test_data/poetic_kapil_gupta.wav", &whisper_model).unwrap();
     let duration = start.elapsed();
 
     println!("Speech to text completed in {:?}", duration);
diff --git a/screenpipe-server/Cargo.toml b/screenpipe-server/Cargo.toml
index edcc9829..e197ea88 100644
--- a/screenpipe-server/Cargo.toml
+++ b/screenpipe-server/Cargo.toml
@@ -68,6 +68,12 @@ sysinfo = "0.29.0"
 # Color 
 colored = "2.0"
 
+# Tracing
+tracing = { workspace = true }
+
+# Concurrency
+crossbeam = "0.8"
+
 [dev-dependencies]
 tempfile = "3.3.0"
 
diff --git a/screenpipe-server/src/core.rs b/screenpipe-server/src/core.rs
index f7a50262..d3bfcd7a 100644
--- a/screenpipe-server/src/core.rs
+++ b/screenpipe-server/src/core.rs
@@ -1,14 +1,16 @@
 use crate::{DatabaseManager, VideoCapture};
 use anyhow::Result;
 use chrono::Utc;
+use crossbeam::channel::{Receiver, Sender};
 use log::{debug, error, info};
-use screenpipe_audio::{record_and_transcribe, AudioCaptureResult, DeviceSpec};
-use screenpipe_vision::CaptureResult;
+use screenpipe_audio::{
+    create_whisper_channel, record_and_transcribe, AudioCaptureResult, AudioInput, DeviceSpec,
+    TranscriptionResult,
+};
 use std::sync::atomic::{AtomicBool, Ordering};
-use std::sync::mpsc::{self, Receiver};
 use std::sync::Arc;
+use std::thread;
 use std::time::Duration;
-use std::{fs, thread};
 use tokio::runtime::Runtime;
 pub enum RecorderControl {
     Pause,
@@ -21,7 +23,7 @@ pub async fn start_continuous_recording(
     output_path: Arc<String>,
     fps: f64,
     audio_chunk_duration: Duration,
-    control_rx: Receiver<RecorderControl>,
+    control_rx: std::sync::mpsc::Receiver<RecorderControl>,
     enable_audio: bool,
     audio_devices: Vec<Arc<DeviceSpec>>,
 ) -> Result<()> {
@@ -30,6 +32,8 @@ pub async fn start_continuous_recording(
         info!("Audio recording disabled");
     }
 
+    let (whisper_sender, whisper_receiver) = create_whisper_channel()?;
+
     let db_manager_video = Arc::clone(&db);
     let db_manager_audio = Arc::clone(&db);
 
@@ -52,6 +56,8 @@ pub async fn start_continuous_recording(
                 audio_chunk_duration,
                 is_running_audio,
                 audio_devices,
+                whisper_sender,
+                whisper_receiver,
             )
             .await
         });
@@ -95,7 +101,6 @@ async fn record_video(
             error!("Failed to insert new video chunk: {}", e);
         }
     };
-
     let video_capture = VideoCapture::new(&output_path, fps, new_chunk_callback);
 
     while is_running.load(Ordering::SeqCst) {
@@ -125,6 +130,8 @@ async fn record_audio(
     chunk_duration: Duration,
     is_running: Arc<AtomicBool>,
     devices: Vec<Arc<DeviceSpec>>,
+    whisper_sender: Sender<AudioInput>,
+    whisper_receiver: Receiver<TranscriptionResult>,
 ) -> Result<()> {
     let mut handles = vec![];
 
@@ -133,10 +140,10 @@ async fn record_audio(
         let output_path_clone = Arc::clone(&output_path);
         let is_running_clone = Arc::clone(&is_running);
         let device_spec_clone = Arc::clone(&device_spec);
+        let whisper_sender_clone = whisper_sender.clone();
+        let whisper_receiver_clone = whisper_receiver.clone();
 
         let handle = tokio::spawn(async move {
-            let (result_tx, result_rx) = mpsc::channel();
-
             info!(
                 "Starting audio capture thread for device: {}",
                 &device_spec_clone
@@ -145,8 +152,9 @@ async fn record_audio(
             while is_running_clone.load(Ordering::SeqCst) {
                 let recording_thread = thread::spawn({
                     let device_spec_clone = Arc::clone(&device_spec_clone);
-                    let result_tx = result_tx.clone();
                     let output_path_clone = Arc::clone(&output_path_clone);
+                    let whisper_sender = whisper_sender_clone.clone();
+
                     move || {
                         let new_file_name = Utc::now().format("%Y-%m-%d_%H-%M-%S").to_string();
                         let file_path = format!(
@@ -156,27 +164,24 @@ async fn record_audio(
                         record_and_transcribe(
                             device_spec_clone.as_ref(),
                             chunk_duration,
-                            result_tx,
                             file_path.into(),
+                            whisper_sender,
                         )
                     }
                 });
 
+                // Handle the recording thread result
                 match recording_thread.join() {
                     Ok(Ok(file_path)) => {
                         info!(
                             "Recording complete for device {}: {:?}",
                             device_spec_clone, file_path
                         );
+                        let whisper_receiver = whisper_receiver_clone.clone();
+
                         // Process the recorded chunk
-                        while let Ok(result) = result_rx.try_recv() {
-                            process_audio_result(
-                                &db_clone,
-                                &file_path.to_str().unwrap(),
-                                &device_spec_clone,
-                                result,
-                            )
-                            .await;
+                        if let Ok(transcription) = whisper_receiver.recv() {
+                            process_audio_result(&db_clone, transcription).await;
                         }
                     }
                     Ok(Err(e)) => error!("Error in record_and_transcribe: {}", e),
@@ -196,34 +201,36 @@ async fn record_audio(
 
     Ok(())
 }
-
-async fn process_audio_result(
-    db: &DatabaseManager,
-    output_path: &str,
-    device_spec: &DeviceSpec,
-    result: AudioCaptureResult,
-) {
-    info!("Inserting audio chunk: {:?}", result.text);
-    match db.insert_audio_chunk(&output_path).await {
+async fn process_audio_result(db: &DatabaseManager, result: TranscriptionResult) {
+    info!("Inserting audio chunk: {:?}", result.transcription);
+    if result.error.is_some() || result.transcription.is_none() {
+        error!(
+            "Error in audio recording: {}",
+            result.error.unwrap_or_default()
+        );
+        return;
+    }
+    let transcription = result.transcription.unwrap();
+    match db.insert_audio_chunk(&result.input.path).await {
         Ok(audio_chunk_id) => {
             if let Err(e) = db
-                .insert_audio_transcription(audio_chunk_id, &result.text, 0) // TODO index is in the text atm
+                .insert_audio_transcription(audio_chunk_id, &transcription, 0) // TODO index is in the text atm
                 .await
             {
                 error!(
                     "Failed to insert audio transcription for device {}: {}",
-                    device_spec, e
+                    result.input.device, e
                 );
             } else {
                 debug!(
                     "Inserted audio transcription for chunk {} from device {}",
-                    audio_chunk_id, device_spec
+                    audio_chunk_id, result.input.device
                 );
             }
         }
         Err(e) => error!(
             "Failed to insert audio chunk for device {}: {}",
-            device_spec, e
+            result.input.device, e
         ),
     }
 }