diff --git a/samples/fast-transcription/README.md b/samples/fast-transcription/README.md new file mode 100644 index 000000000..45d0f80e8 --- /dev/null +++ b/samples/fast-transcription/README.md @@ -0,0 +1,14 @@ +# Examples to use Fast Transcription + +The Azure Speech Fast Transcription API is used to transcribe audio files with returning results synchronously and much faster than real-time audio. +This functionality is exposed through a REST API and is easy to access from many programming languages. The samples here do **NOT** require the installation of the Cognitive Service Speech SDK, but use the REST API directly instead. + +For a detailed explanation see the [fast transcription documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/fast-transcription-create) and the `README.md` in the language specific subdirectories. + +Available samples: + +| Language | Directory | Description | +| ---------- | -------- | ----------- | +| C# | [csharp](csharp_client) | C# calling fast transcription REST API through System.Net.Http | +| Python | [python](python_client) | Python client calling fast transcription REST API | +| Node.js | [js/node](js_client) | Node.js client calling fast transcription REST API | diff --git a/samples/fast-transcription/csharp_client/Program.cs b/samples/fast-transcription/csharp_client/Program.cs new file mode 100644 index 000000000..91041aacf --- /dev/null +++ b/samples/fast-transcription/csharp_client/Program.cs @@ -0,0 +1,92 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Net.Http; +using System.Net.Http.Json; +using System.Runtime.InteropServices; +using System.Text.Json.Serialization; +using Newtonsoft.Json; + +namespace fastTranscriptionClient +{ + public class FastTranscriptionOptions + { + public string[] Locales { get; set; } = { "en-US" }; + public string ProfanityFilterMode { get; set; } = "masked"; + public int[] Channels { get; set; } = { 0 }; + public Dictionary DiarizationSettings { get; set; } = new Dictionary{ + { "minSpeakers", 1 }, + { "maxSpeakers", 4 } + }; + + } + + public class Program + { + //Replace with your subscription key + public const string SubscriptionKey = "YourSubscriptionKey"; //Update with your subscription key + + // Update with your service region + public const string Region = "YourServiceRegion"; //Update with your service region + + // locale and endpoint URL + private const string Locale = "en-US"; + + //local storage location for testing + private const string DisplayName = "Fast transcription"; + + + private static async Task Main(string[] args) + { + var audio_path = @"YourAudio_FilePath"; //- update to your audio file location + var transcription_path = @"transcription_response.json"; // path to store your transcription output + try + { + await TranscribeAsync(audio_path, transcription_path).ConfigureAwait(false); + } + catch (Exception ex) + { + Console.WriteLine($"An error occurred: {ex.Message}"); + } + } + + public static async Task TranscribeAsync(string audio_filepath, string transcriptions_filepath) + { + var fastTranscriptionEndpointPath = $"https://{Region}.api.cognitive.microsoft.com//speechtotext/transcriptions:transcribe?api-version=2024-05-15-preview"; + + var client = new HttpClient(); + var request = new HttpRequestMessage(HttpMethod.Post, fastTranscriptionEndpointPath); + + request.Headers.Add("Ocp-apim-subscription-key", SubscriptionKey); + request.Headers.Add("Accept", "application/json"); + + + var content = new MultipartFormDataContent(); + + // Add the audio file + content.Add(new StreamContent(File.OpenRead(audio_filepath)), "audio", "test_call_audio.wav"); + + // Add definition + //var transcriptionDefinition = new StringContent("{\"locales\": [\"en-us\"],\"profanityfiltermode\": \"masked\", \"channels\": [0, 1]}", System.Text.Encoding.UTF8, "application/json"); + var fastTranscriptionOptions = new FastTranscriptionOptions(); + var transcriptionDefinition = JsonContent.Create(fastTranscriptionOptions); + content.Add(transcriptionDefinition, "definition"); + + request.Content = content; + + // Make the API request + var response = await client.SendAsync(request).ConfigureAwait(false); + response.EnsureSuccessStatusCode(); + + var response_content = await response.Content.ReadAsStringAsync(); + Console.WriteLine(response_content); + + var response_content_json = JsonConvert.SerializeObject(JsonConvert.DeserializeObject(response_content), Formatting.Indented); + + //save transcriptions to the transcriptions file location + await File.WriteAllTextAsync(transcriptions_filepath, response_content_json); + Console.WriteLine($"Response saved to {transcriptions_filepath}"); + + } + } +} diff --git a/samples/fast-transcription/csharp_client/csharp_client.csproj b/samples/fast-transcription/csharp_client/csharp_client.csproj new file mode 100644 index 000000000..af748c004 --- /dev/null +++ b/samples/fast-transcription/csharp_client/csharp_client.csproj @@ -0,0 +1,14 @@ + + + + Exe + net8.0 + enable + enable + + + + + + + diff --git a/samples/fast-transcription/csharp_client/csharp_client.sln b/samples/fast-transcription/csharp_client/csharp_client.sln new file mode 100644 index 000000000..73732aad0 --- /dev/null +++ b/samples/fast-transcription/csharp_client/csharp_client.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.11.35327.3 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "csharp_client", "csharp_client.csproj", "{7965ED0C-5A0E-449A-A11E-8ADFB20F5474}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {7965ED0C-5A0E-449A-A11E-8ADFB20F5474}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {7965ED0C-5A0E-449A-A11E-8ADFB20F5474}.Debug|Any CPU.Build.0 = Debug|Any CPU + {7965ED0C-5A0E-449A-A11E-8ADFB20F5474}.Release|Any CPU.ActiveCfg = Release|Any CPU + {7965ED0C-5A0E-449A-A11E-8ADFB20F5474}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {CCC58C19-0005-4B05-A56B-0016ECEEC7C5} + EndGlobalSection +EndGlobal diff --git a/samples/fast-transcription/csharp_client/readme.md b/samples/fast-transcription/csharp_client/readme.md new file mode 100644 index 000000000..5b7cbc265 --- /dev/null +++ b/samples/fast-transcription/csharp_client/readme.md @@ -0,0 +1,40 @@ +# C# Console app for .NET Core + +This sample demonstrates audio transcription using the Fast Transcription API for C#. It runs under .NET 8.0 or later. + +## Prerequisites + +1. An Azure AI Speech resource in one of the regions where the fast transcription API is available. The supported regions are: Central India, East US, Southeast Asia, and West Europe. For more information and region identifier, see [Speech service regions](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/regions) +1. An audio file (less than 2 hours long and less than 200 MB in size) in one of the formats and codecs supported by the batch transcription API. For more information about supported audio formats, see [Supported audio formats](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/batch-transcription-audio-data?tabs=portal#supported-audio-formats-and-codecs) +1. For additional explanation, see [fast transcription documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/fast-transcription-create) + + +## Build the sample + +* To tailor the sample to your configuration, use search and replace across the whole solution (for example, in Visual Studio, via **Edit** \> **Find and Replace** \> **Quick Replace**) to update the following strings: + + * `YourSubscriptionKey`: replace with your subscription key. + * `YourServiceRegion`: replace with the [region](https://aka.ms/csspeech/region) your subscription is associated with. + For example, `westus` or `northeurope`. + + +## Run the sample +You can run the sample using Microsoft Visual Studio 2022 on Windows or using a .NET Core CLI + +### Using Visual Studio 2022 + +TODO + + +### Using the .NET Core CLI + +Navigate to the directory (samples/fast-transcription/csharp_client) that contains the sample and Run the following command + +```bash +cd samples/fast-transcription/csharp_client +dotnet run +``` + +## References + +* [fast transcription documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/fast-transcription-create) diff --git a/samples/fast-transcription/csharp_client/test_call_audio.wav b/samples/fast-transcription/csharp_client/test_call_audio.wav new file mode 100644 index 000000000..24b513342 Binary files /dev/null and b/samples/fast-transcription/csharp_client/test_call_audio.wav differ diff --git a/samples/fast-transcription/csharp_client/transcription_response.json b/samples/fast-transcription/csharp_client/transcription_response.json new file mode 100644 index 000000000..6956ca91a --- /dev/null +++ b/samples/fast-transcription/csharp_client/transcription_response.json @@ -0,0 +1,2135 @@ +{ + "duration": 120000, + "combinedPhrases": [ + { + "channel": 0, + "text": "I think we got disconnected. Good. How are you? I have to say my little spill again. Just a heads up, Dimitri, to help improve the quality of our products, services and training, this call may be recorded and monitored. Information may be accessed internationally by Microsoft, our service provider, subsidiaries and affiliates. Is it OK if we continue? OK, fantastic. OK. So I I was talking. I think I, I, I, I kept rambling. And then I didn't hear you. So I guess we got disconnected there. And so just wanted to kind of follow up. So you're kind of comparing Shopify versus Azure. It looks like you're currently using an Azure trial. I guess as far as like questions, were you kind of. I know you kind of talked about price. I can definitely go over like pricing with you. If you're comparing apples to apples, I I saw that Spotify, you know obviously they're you know it's a SaaS platform. So if you want to run on that looks like their plans are as low as 29 bucks a month. What I was just saying is Microsoft has a little more flexibility. You kind of decide as far as you know our different options and then if your company scales and your e-commerce site goes goes big. You know, it's it's the fortunate thing about Microsoft is it's Microsoft, it's Azure. And then we have our Office 365 suite for you too for users, one share of company scale. So I didn't mean to kind of put the car before the horse, but that's kind of what I was saying. I'm talking to myself when we got cut off. But how can I kind of kind of help you out at this point, Dimitri? Well, I was wondering if there is a service that Microsoft offers where they can just look at my application and just, you know, tune it in or optimize it for the best performance. Sure. We do have a support team. Yeah. That's our engineering team on the back end. It's an option." + } + ], + "phrases": [ + { + "channel": 0, + "speaker": 0, + "offset": 240, + "duration": 1200, + "text": "I think we got disconnected.", + "words": [ + { + "text": "I", + "offset": 240, + "duration": 40 + }, + { + "text": "think", + "offset": 280, + "duration": 160 + }, + { + "text": "we", + "offset": 440, + "duration": 80 + }, + { + "text": "got", + "offset": 520, + "duration": 200 + }, + { + "text": "disconnected.", + "offset": 720, + "duration": 720 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "channel": 0, + "speaker": 0, + "offset": 2000, + "duration": 200, + "text": "Good.", + "words": [ + { + "text": "Good.", + "offset": 2000, + "duration": 200 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "channel": 0, + "speaker": 0, + "offset": 2200, + "duration": 440, + "text": "How are you?", + "words": [ + { + "text": "How", + "offset": 2200, + "duration": 80 + }, + { + "text": "are", + "offset": 2280, + "duration": 200 + }, + { + "text": "you?", + "offset": 2480, + "duration": 160 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "channel": 0, + "speaker": 0, + "offset": 7760, + "duration": 1840, + "text": "I have to say my little spill again.", + "words": [ + { + "text": "I", + "offset": 7760, + "duration": 120 + }, + { + "text": "have", + "offset": 7880, + "duration": 200 + }, + { + "text": "to", + "offset": 8080, + "duration": 240 + }, + { + "text": "say", + "offset": 8480, + "duration": 160 + }, + { + "text": "my", + "offset": 8640, + "duration": 120 + }, + { + "text": "little", + "offset": 8760, + "duration": 200 + }, + { + "text": "spill", + "offset": 8960, + "duration": 320 + }, + { + "text": "again.", + "offset": 9280, + "duration": 320 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "channel": 0, + "speaker": 0, + "offset": 9680, + "duration": 5640, + "text": "Just a heads up, Dimitri, to help improve the quality of our products, services and training, this call may be recorded and monitored.", + "words": [ + { + "text": "Just", + "offset": 9680, + "duration": 240 + }, + { + "text": "a", + "offset": 9920, + "duration": 40 + }, + { + "text": "heads", + "offset": 9960, + "duration": 280 + }, + { + "text": "up,", + "offset": 10240, + "duration": 120 + }, + { + "text": "Dimitri,", + "offset": 10360, + "duration": 320 + }, + { + "text": "to", + "offset": 10680, + "duration": 80 + }, + { + "text": "help", + "offset": 10760, + "duration": 160 + }, + { + "text": "improve", + "offset": 10920, + "duration": 240 + }, + { + "text": "the", + "offset": 11160, + "duration": 80 + }, + { + "text": "quality", + "offset": 11240, + "duration": 440 + }, + { + "text": "of", + "offset": 11680, + "duration": 80 + }, + { + "text": "our", + "offset": 11760, + "duration": 80 + }, + { + "text": "products,", + "offset": 11840, + "duration": 360 + }, + { + "text": "services", + "offset": 12200, + "duration": 400 + }, + { + "text": "and", + "offset": 12600, + "duration": 160 + }, + { + "text": "training,", + "offset": 12760, + "duration": 280 + }, + { + "text": "this", + "offset": 13040, + "duration": 120 + }, + { + "text": "call", + "offset": 13160, + "duration": 360 + }, + { + "text": "may", + "offset": 13600, + "duration": 160 + }, + { + "text": "be", + "offset": 13760, + "duration": 80 + }, + { + "text": "recorded", + "offset": 13840, + "duration": 560 + }, + { + "text": "and", + "offset": 14760, + "duration": 120 + }, + { + "text": "monitored.", + "offset": 14880, + "duration": 440 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "channel": 0, + "speaker": 0, + "offset": 15320, + "duration": 4960, + "text": "Information may be accessed internationally by Microsoft, our service provider, subsidiaries and affiliates.", + "words": [ + { + "text": "Information", + "offset": 15320, + "duration": 520 + }, + { + "text": "may", + "offset": 15840, + "duration": 80 + }, + { + "text": "be", + "offset": 15920, + "duration": 160 + }, + { + "text": "accessed", + "offset": 16080, + "duration": 280 + }, + { + "text": "internationally", + "offset": 16360, + "duration": 640 + }, + { + "text": "by", + "offset": 17000, + "duration": 80 + }, + { + "text": "Microsoft,", + "offset": 17080, + "duration": 680 + }, + { + "text": "our", + "offset": 18240, + "duration": 240 + }, + { + "text": "service", + "offset": 18480, + "duration": 360 + }, + { + "text": "provider,", + "offset": 18840, + "duration": 440 + }, + { + "text": "subsidiaries", + "offset": 19280, + "duration": 440 + }, + { + "text": "and", + "offset": 19720, + "duration": 120 + }, + { + "text": "affiliates.", + "offset": 19840, + "duration": 440 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "channel": 0, + "speaker": 0, + "offset": 20320, + "duration": 1120, + "text": "Is it OK if we continue?", + "words": [ + { + "text": "Is", + "offset": 20320, + "duration": 120 + }, + { + "text": "it", + "offset": 20440, + "duration": 120 + }, + { + "text": "OK", + "offset": 20560, + "duration": 240 + }, + { + "text": "if", + "offset": 20800, + "duration": 80 + }, + { + "text": "we", + "offset": 20880, + "duration": 80 + }, + { + "text": "continue?", + "offset": 20960, + "duration": 480 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "channel": 0, + "speaker": 0, + "offset": 23440, + "duration": 1080, + "text": "OK, fantastic.", + "words": [ + { + "text": "OK,", + "offset": 23440, + "duration": 200 + }, + { + "text": "fantastic.", + "offset": 23920, + "duration": 600 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "channel": 0, + "speaker": 0, + "offset": 24520, + "duration": 160, + "text": "OK.", + "words": [ + { + "text": "OK.", + "offset": 24520, + "duration": 160 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "channel": 0, + "speaker": 0, + "offset": 25120, + "duration": 2080, + "text": "So I I was talking.", + "words": [ + { + "text": "So", + "offset": 25120, + "duration": 480 + }, + { + "text": "I", + "offset": 25920, + "duration": 400 + }, + { + "text": "I", + "offset": 26480, + "duration": 80 + }, + { + "text": "was", + "offset": 26560, + "duration": 160 + }, + { + "text": "talking.", + "offset": 26720, + "duration": 480 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "channel": 0, + "speaker": 0, + "offset": 27200, + "duration": 2400, + "text": "I think I, I, I, I kept rambling.", + "words": [ + { + "text": "I", + "offset": 27200, + "duration": 80 + }, + { + "text": "think", + "offset": 27280, + "duration": 200 + }, + { + "text": "I,", + "offset": 27600, + "duration": 80 + }, + { + "text": "I,", + "offset": 27760, + "duration": 400 + }, + { + "text": "I,", + "offset": 28400, + "duration": 80 + }, + { + "text": "I", + "offset": 28560, + "duration": 160 + }, + { + "text": "kept", + "offset": 28720, + "duration": 320 + }, + { + "text": "rambling.", + "offset": 29040, + "duration": 560 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "channel": 0, + "speaker": 0, + "offset": 30000, + "duration": 880, + "text": "And then I didn't hear you.", + "words": [ + { + "text": "And", + "offset": 30000, + "duration": 120 + }, + { + "text": "then", + "offset": 30120, + "duration": 120 + }, + { + "text": "I", + "offset": 30240, + "duration": 40 + }, + { + "text": "didn't", + "offset": 30280, + "duration": 240 + }, + { + "text": "hear", + "offset": 30520, + "duration": 200 + }, + { + "text": "you.", + "offset": 30720, + "duration": 160 + } + ], + "locale": "en-US", + "confidence": 0.9149788 + }, + { + "channel": 0, + "speaker": 0, + "offset": 30880, + "duration": 2480, + "text": "So I guess we got disconnected there.", + "words": [ + { + "text": "So", + "offset": 30880, + "duration": 400 + }, + { + "text": "I", + "offset": 31840, + "duration": 80 + }, + { + "text": "guess", + "offset": 31920, + "duration": 240 + }, + { + "text": "we", + "offset": 32160, + "duration": 80 + }, + { + "text": "got", + "offset": 32240, + "duration": 200 + }, + { + "text": "disconnected", + "offset": 32440, + "duration": 600 + }, + { + "text": "there.", + "offset": 33040, + "duration": 320 + } + ], + "locale": "en-US", + "confidence": 0.9149788 + }, + { + "channel": 0, + "speaker": 0, + "offset": 33920, + "duration": 2400, + "text": "And so just wanted to kind of follow up.", + "words": [ + { + "text": "And", + "offset": 33920, + "duration": 240 + }, + { + "text": "so", + "offset": 34160, + "duration": 400 + }, + { + "text": "just", + "offset": 34880, + "duration": 240 + }, + { + "text": "wanted", + "offset": 35120, + "duration": 280 + }, + { + "text": "to", + "offset": 35400, + "duration": 80 + }, + { + "text": "kind", + "offset": 35480, + "duration": 200 + }, + { + "text": "of", + "offset": 35680, + "duration": 80 + }, + { + "text": "follow", + "offset": 35760, + "duration": 400 + }, + { + "text": "up.", + "offset": 36160, + "duration": 160 + } + ], + "locale": "en-US", + "confidence": 0.9149788 + }, + { + "channel": 0, + "speaker": 0, + "offset": 36720, + "duration": 3960, + "text": "So you're kind of comparing Shopify versus Azure.", + "words": [ + { + "text": "So", + "offset": 36720, + "duration": 400 + }, + { + "text": "you're", + "offset": 37120, + "duration": 160 + }, + { + "text": "kind", + "offset": 37280, + "duration": 240 + }, + { + "text": "of", + "offset": 37520, + "duration": 80 + }, + { + "text": "comparing", + "offset": 37600, + "duration": 560 + }, + { + "text": "Shopify", + "offset": 39040, + "duration": 720 + }, + { + "text": "versus", + "offset": 39760, + "duration": 480 + }, + { + "text": "Azure.", + "offset": 40240, + "duration": 440 + } + ], + "locale": "en-US", + "confidence": 0.9149788 + }, + { + "channel": 0, + "speaker": 0, + "offset": 40680, + "duration": 3000, + "text": "It looks like you're currently using an Azure trial.", + "words": [ + { + "text": "It", + "offset": 40680, + "duration": 80 + }, + { + "text": "looks", + "offset": 40760, + "duration": 200 + }, + { + "text": "like", + "offset": 40960, + "duration": 160 + }, + { + "text": "you're", + "offset": 41120, + "duration": 320 + }, + { + "text": "currently", + "offset": 41440, + "duration": 560 + }, + { + "text": "using", + "offset": 42320, + "duration": 280 + }, + { + "text": "an", + "offset": 42600, + "duration": 120 + }, + { + "text": "Azure", + "offset": 42720, + "duration": 320 + }, + { + "text": "trial.", + "offset": 43040, + "duration": 640 + } + ], + "locale": "en-US", + "confidence": 0.9149788 + }, + { + "channel": 0, + "speaker": 0, + "offset": 45360, + "duration": 2720, + "text": "I guess as far as like questions, were you kind of.", + "words": [ + { + "text": "I", + "offset": 45360, + "duration": 480 + }, + { + "text": "guess", + "offset": 45840, + "duration": 240 + }, + { + "text": "as", + "offset": 46240, + "duration": 80 + }, + { + "text": "far", + "offset": 46320, + "duration": 280 + }, + { + "text": "as", + "offset": 46600, + "duration": 80 + }, + { + "text": "like", + "offset": 46680, + "duration": 160 + }, + { + "text": "questions,", + "offset": 46840, + "duration": 640 + }, + { + "text": "were", + "offset": 47480, + "duration": 200 + }, + { + "text": "you", + "offset": 47680, + "duration": 80 + }, + { + "text": "kind", + "offset": 47760, + "duration": 200 + }, + { + "text": "of.", + "offset": 47960, + "duration": 120 + } + ], + "locale": "en-US", + "confidence": 0.9149788 + }, + { + "channel": 0, + "speaker": 0, + "offset": 49800, + "duration": 1680, + "text": "I know you kind of talked about price.", + "words": [ + { + "text": "I", + "offset": 49800, + "duration": 40 + }, + { + "text": "know", + "offset": 50080, + "duration": 80 + }, + { + "text": "you", + "offset": 50160, + "duration": 120 + }, + { + "text": "kind", + "offset": 50280, + "duration": 200 + }, + { + "text": "of", + "offset": 50480, + "duration": 80 + }, + { + "text": "talked", + "offset": 50560, + "duration": 440 + }, + { + "text": "about", + "offset": 51000, + "duration": 280 + }, + { + "text": "price.", + "offset": 51280, + "duration": 200 + } + ], + "locale": "en-US", + "confidence": 0.9328801 + }, + { + "channel": 0, + "speaker": 0, + "offset": 51480, + "duration": 1760, + "text": "I can definitely go over like pricing with you.", + "words": [ + { + "text": "I", + "offset": 51480, + "duration": 40 + }, + { + "text": "can", + "offset": 51520, + "duration": 120 + }, + { + "text": "definitely", + "offset": 51640, + "duration": 360 + }, + { + "text": "go", + "offset": 52000, + "duration": 120 + }, + { + "text": "over", + "offset": 52120, + "duration": 200 + }, + { + "text": "like", + "offset": 52320, + "duration": 160 + }, + { + "text": "pricing", + "offset": 52480, + "duration": 480 + }, + { + "text": "with", + "offset": 52960, + "duration": 160 + }, + { + "text": "you.", + "offset": 53120, + "duration": 120 + } + ], + "locale": "en-US", + "confidence": 0.9328801 + }, + { + "channel": 0, + "speaker": 0, + "offset": 53800, + "duration": 6840, + "text": "If you're comparing apples to apples, I I saw that Spotify, you know obviously they're you know it's a SaaS platform.", + "words": [ + { + "text": "If", + "offset": 53800, + "duration": 80 + }, + { + "text": "you're", + "offset": 53880, + "duration": 120 + }, + { + "text": "comparing", + "offset": 54000, + "duration": 560 + }, + { + "text": "apples", + "offset": 54560, + "duration": 440 + }, + { + "text": "to", + "offset": 55000, + "duration": 80 + }, + { + "text": "apples,", + "offset": 55080, + "duration": 520 + }, + { + "text": "I", + "offset": 55600, + "duration": 40 + }, + { + "text": "I", + "offset": 55640, + "duration": 280 + }, + { + "text": "saw", + "offset": 55920, + "duration": 240 + }, + { + "text": "that", + "offset": 56160, + "duration": 160 + }, + { + "text": "Spotify,", + "offset": 56320, + "duration": 560 + }, + { + "text": "you", + "offset": 57760, + "duration": 80 + }, + { + "text": "know", + "offset": 57840, + "duration": 160 + }, + { + "text": "obviously", + "offset": 58080, + "duration": 360 + }, + { + "text": "they're", + "offset": 58440, + "duration": 280 + }, + { + "text": "you", + "offset": 59040, + "duration": 80 + }, + { + "text": "know", + "offset": 59120, + "duration": 160 + }, + { + "text": "it's", + "offset": 59360, + "duration": 240 + }, + { + "text": "a", + "offset": 59600, + "duration": 40 + }, + { + "text": "SaaS", + "offset": 59640, + "duration": 360 + }, + { + "text": "platform.", + "offset": 60000, + "duration": 640 + } + ], + "locale": "en-US", + "confidence": 0.9328801 + }, + { + "channel": 0, + "speaker": 0, + "offset": 60640, + "duration": 6600, + "text": "So if you want to run on that looks like their plans are as low as 29 bucks a month.", + "words": [ + { + "text": "So", + "offset": 60640, + "duration": 320 + }, + { + "text": "if", + "offset": 61680, + "duration": 320 + }, + { + "text": "you", + "offset": 62000, + "duration": 160 + }, + { + "text": "want", + "offset": 62800, + "duration": 400 + }, + { + "text": "to", + "offset": 63280, + "duration": 80 + }, + { + "text": "run", + "offset": 64240, + "duration": 320 + }, + { + "text": "on", + "offset": 64560, + "duration": 80 + }, + { + "text": "that", + "offset": 64640, + "duration": 320 + }, + { + "text": "looks", + "offset": 64960, + "duration": 160 + }, + { + "text": "like", + "offset": 65120, + "duration": 160 + }, + { + "text": "their", + "offset": 65280, + "duration": 240 + }, + { + "text": "plans", + "offset": 65520, + "duration": 360 + }, + { + "text": "are", + "offset": 65880, + "duration": 80 + }, + { + "text": "as", + "offset": 65960, + "duration": 120 + }, + { + "text": "low", + "offset": 66080, + "duration": 80 + }, + { + "text": "as", + "offset": 66160, + "duration": 80 + }, + { + "text": "29", + "offset": 66240, + "duration": 400 + }, + { + "text": "bucks", + "offset": 66640, + "duration": 280 + }, + { + "text": "a", + "offset": 66920, + "duration": 40 + }, + { + "text": "month.", + "offset": 66960, + "duration": 280 + } + ], + "locale": "en-US", + "confidence": 0.9328801 + }, + { + "channel": 0, + "speaker": 0, + "offset": 67800, + "duration": 2840, + "text": "What I was just saying is Microsoft has a little more flexibility.", + "words": [ + { + "text": "What", + "offset": 67800, + "duration": 200 + }, + { + "text": "I", + "offset": 68000, + "duration": 40 + }, + { + "text": "was", + "offset": 68040, + "duration": 120 + }, + { + "text": "just", + "offset": 68160, + "duration": 200 + }, + { + "text": "saying", + "offset": 68360, + "duration": 200 + }, + { + "text": "is", + "offset": 68560, + "duration": 160 + }, + { + "text": "Microsoft", + "offset": 68720, + "duration": 440 + }, + { + "text": "has", + "offset": 69160, + "duration": 120 + }, + { + "text": "a", + "offset": 69280, + "duration": 40 + }, + { + "text": "little", + "offset": 69320, + "duration": 280 + }, + { + "text": "more", + "offset": 69600, + "duration": 160 + }, + { + "text": "flexibility.", + "offset": 69760, + "duration": 880 + } + ], + "locale": "en-US", + "confidence": 0.9328801 + }, + { + "channel": 0, + "speaker": 0, + "offset": 71120, + "duration": 7120, + "text": "You kind of decide as far as you know our different options and then if your company scales and your e-commerce site goes goes big.", + "words": [ + { + "text": "You", + "offset": 71120, + "duration": 80 + }, + { + "text": "kind", + "offset": 71200, + "duration": 360 + }, + { + "text": "of", + "offset": 71560, + "duration": 80 + }, + { + "text": "decide", + "offset": 71640, + "duration": 520 + }, + { + "text": "as", + "offset": 72160, + "duration": 80 + }, + { + "text": "far", + "offset": 72240, + "duration": 360 + }, + { + "text": "as", + "offset": 72600, + "duration": 120 + }, + { + "text": "you", + "offset": 73040, + "duration": 80 + }, + { + "text": "know", + "offset": 73120, + "duration": 120 + }, + { + "text": "our", + "offset": 73280, + "duration": 160 + }, + { + "text": "different", + "offset": 73440, + "duration": 320 + }, + { + "text": "options", + "offset": 73760, + "duration": 360 + }, + { + "text": "and", + "offset": 74120, + "duration": 120 + }, + { + "text": "then", + "offset": 74520, + "duration": 200 + }, + { + "text": "if", + "offset": 74720, + "duration": 240 + }, + { + "text": "your", + "offset": 74960, + "duration": 160 + }, + { + "text": "company", + "offset": 75120, + "duration": 400 + }, + { + "text": "scales", + "offset": 75520, + "duration": 560 + }, + { + "text": "and", + "offset": 76080, + "duration": 120 + }, + { + "text": "your", + "offset": 76200, + "duration": 120 + }, + { + "text": "e-commerce", + "offset": 76320, + "duration": 520 + }, + { + "text": "site", + "offset": 76840, + "duration": 240 + }, + { + "text": "goes", + "offset": 77080, + "duration": 440 + }, + { + "text": "goes", + "offset": 77680, + "duration": 240 + }, + { + "text": "big.", + "offset": 77920, + "duration": 320 + } + ], + "locale": "en-US", + "confidence": 0.9328801 + }, + { + "channel": 0, + "speaker": 0, + "offset": 78640, + "duration": 4720, + "text": "You know, it's it's the fortunate thing about Microsoft is it's Microsoft, it's Azure.", + "words": [ + { + "text": "You", + "offset": 78640, + "duration": 80 + }, + { + "text": "know,", + "offset": 78720, + "duration": 240 + }, + { + "text": "it's", + "offset": 78960, + "duration": 280 + }, + { + "text": "it's", + "offset": 79280, + "duration": 320 + }, + { + "text": "the", + "offset": 79840, + "duration": 160 + }, + { + "text": "fortunate", + "offset": 80000, + "duration": 480 + }, + { + "text": "thing", + "offset": 80480, + "duration": 120 + }, + { + "text": "about", + "offset": 80600, + "duration": 200 + }, + { + "text": "Microsoft", + "offset": 80800, + "duration": 600 + }, + { + "text": "is", + "offset": 81400, + "duration": 280 + }, + { + "text": "it's", + "offset": 81920, + "duration": 200 + }, + { + "text": "Microsoft,", + "offset": 82120, + "duration": 640 + }, + { + "text": "it's", + "offset": 82760, + "duration": 120 + }, + { + "text": "Azure.", + "offset": 82880, + "duration": 480 + } + ], + "locale": "en-US", + "confidence": 0.9304696 + }, + { + "channel": 0, + "speaker": 0, + "offset": 83760, + "duration": 4240, + "text": "And then we have our Office 365 suite for you too for users, one share of company scale.", + "words": [ + { + "text": "And", + "offset": 83760, + "duration": 160 + }, + { + "text": "then", + "offset": 83920, + "duration": 120 + }, + { + "text": "we", + "offset": 84040, + "duration": 120 + }, + { + "text": "have", + "offset": 84160, + "duration": 200 + }, + { + "text": "our", + "offset": 84360, + "duration": 120 + }, + { + "text": "Office", + "offset": 84480, + "duration": 320 + }, + { + "text": "365", + "offset": 84800, + "duration": 720 + }, + { + "text": "suite", + "offset": 85520, + "duration": 280 + }, + { + "text": "for", + "offset": 85800, + "duration": 200 + }, + { + "text": "you", + "offset": 86000, + "duration": 80 + }, + { + "text": "too", + "offset": 86080, + "duration": 160 + }, + { + "text": "for", + "offset": 86240, + "duration": 160 + }, + { + "text": "users,", + "offset": 86400, + "duration": 400 + }, + { + "text": "one", + "offset": 86800, + "duration": 200 + }, + { + "text": "share", + "offset": 87000, + "duration": 200 + }, + { + "text": "of", + "offset": 87200, + "duration": 80 + }, + { + "text": "company", + "offset": 87280, + "duration": 320 + }, + { + "text": "scale.", + "offset": 87600, + "duration": 400 + } + ], + "locale": "en-US", + "confidence": 0.9304696 + }, + { + "channel": 0, + "speaker": 0, + "offset": 88000, + "duration": 3520, + "text": "So I didn't mean to kind of put the car before the horse, but that's kind of what I was saying.", + "words": [ + { + "text": "So", + "offset": 88000, + "duration": 240 + }, + { + "text": "I", + "offset": 88560, + "duration": 40 + }, + { + "text": "didn't", + "offset": 88600, + "duration": 320 + }, + { + "text": "mean", + "offset": 88920, + "duration": 120 + }, + { + "text": "to", + "offset": 89040, + "duration": 80 + }, + { + "text": "kind", + "offset": 89120, + "duration": 160 + }, + { + "text": "of", + "offset": 89280, + "duration": 80 + }, + { + "text": "put", + "offset": 89360, + "duration": 120 + }, + { + "text": "the", + "offset": 89480, + "duration": 80 + }, + { + "text": "car", + "offset": 89560, + "duration": 120 + }, + { + "text": "before", + "offset": 89680, + "duration": 280 + }, + { + "text": "the", + "offset": 89960, + "duration": 80 + }, + { + "text": "horse,", + "offset": 90040, + "duration": 240 + }, + { + "text": "but", + "offset": 90280, + "duration": 160 + }, + { + "text": "that's", + "offset": 90440, + "duration": 200 + }, + { + "text": "kind", + "offset": 90640, + "duration": 200 + }, + { + "text": "of", + "offset": 90840, + "duration": 80 + }, + { + "text": "what", + "offset": 90920, + "duration": 160 + }, + { + "text": "I", + "offset": 91080, + "duration": 40 + }, + { + "text": "was", + "offset": 91120, + "duration": 120 + }, + { + "text": "saying.", + "offset": 91240, + "duration": 280 + } + ], + "locale": "en-US", + "confidence": 0.9304696 + }, + { + "channel": 0, + "speaker": 0, + "offset": 91520, + "duration": 2800, + "text": "I'm talking to myself when we got cut off.", + "words": [ + { + "text": "I'm", + "offset": 91520, + "duration": 80 + }, + { + "text": "talking", + "offset": 92400, + "duration": 240 + }, + { + "text": "to", + "offset": 92640, + "duration": 160 + }, + { + "text": "myself", + "offset": 92800, + "duration": 520 + }, + { + "text": "when", + "offset": 93320, + "duration": 160 + }, + { + "text": "we", + "offset": 93480, + "duration": 120 + }, + { + "text": "got", + "offset": 93600, + "duration": 160 + }, + { + "text": "cut", + "offset": 93760, + "duration": 320 + }, + { + "text": "off.", + "offset": 94080, + "duration": 240 + } + ], + "locale": "en-US", + "confidence": 0.9304696 + }, + { + "channel": 0, + "speaker": 0, + "offset": 94320, + "duration": 4880, + "text": "But how can I kind of kind of help you out at this point, Dimitri?", + "words": [ + { + "text": "But", + "offset": 94320, + "duration": 320 + }, + { + "text": "how", + "offset": 95600, + "duration": 200 + }, + { + "text": "can", + "offset": 95800, + "duration": 200 + }, + { + "text": "I", + "offset": 96000, + "duration": 160 + }, + { + "text": "kind", + "offset": 96160, + "duration": 360 + }, + { + "text": "of", + "offset": 96520, + "duration": 200 + }, + { + "text": "kind", + "offset": 97280, + "duration": 160 + }, + { + "text": "of", + "offset": 97440, + "duration": 80 + }, + { + "text": "help", + "offset": 97520, + "duration": 240 + }, + { + "text": "you", + "offset": 97760, + "duration": 80 + }, + { + "text": "out", + "offset": 97920, + "duration": 120 + }, + { + "text": "at", + "offset": 98040, + "duration": 80 + }, + { + "text": "this", + "offset": 98120, + "duration": 200 + }, + { + "text": "point,", + "offset": 98320, + "duration": 320 + }, + { + "text": "Dimitri?", + "offset": 98640, + "duration": 560 + } + ], + "locale": "en-US", + "confidence": 0.9304696 + }, + { + "channel": 0, + "speaker": 1, + "offset": 100320, + "duration": 12560, + "text": "Well, I was wondering if there is a service that Microsoft offers where they can just look at my application and just, you know, tune it in or optimize it for the best performance.", + "words": [ + { + "text": "Well,", + "offset": 100320, + "duration": 240 + }, + { + "text": "I", + "offset": 100640, + "duration": 80 + }, + { + "text": "was", + "offset": 100720, + "duration": 120 + }, + { + "text": "wondering", + "offset": 100840, + "duration": 840 + }, + { + "text": "if", + "offset": 101680, + "duration": 320 + }, + { + "text": "there", + "offset": 102000, + "duration": 200 + }, + { + "text": "is", + "offset": 102200, + "duration": 200 + }, + { + "text": "a", + "offset": 102520, + "duration": 40 + }, + { + "text": "service", + "offset": 102560, + "duration": 480 + }, + { + "text": "that", + "offset": 103040, + "duration": 240 + }, + { + "text": "Microsoft", + "offset": 103440, + "duration": 640 + }, + { + "text": "offers", + "offset": 104080, + "duration": 440 + }, + { + "text": "where", + "offset": 104520, + "duration": 440 + }, + { + "text": "they", + "offset": 104960, + "duration": 240 + }, + { + "text": "can", + "offset": 105360, + "duration": 160 + }, + { + "text": "just", + "offset": 105520, + "duration": 280 + }, + { + "text": "look", + "offset": 105800, + "duration": 200 + }, + { + "text": "at", + "offset": 106000, + "duration": 80 + }, + { + "text": "my", + "offset": 106080, + "duration": 160 + }, + { + "text": "application", + "offset": 106240, + "duration": 800 + }, + { + "text": "and", + "offset": 107040, + "duration": 200 + }, + { + "text": "just,", + "offset": 107240, + "duration": 640 + }, + { + "text": "you", + "offset": 108560, + "duration": 120 + }, + { + "text": "know,", + "offset": 108680, + "duration": 200 + }, + { + "text": "tune", + "offset": 109360, + "duration": 320 + }, + { + "text": "it", + "offset": 109680, + "duration": 160 + }, + { + "text": "in", + "offset": 109840, + "duration": 160 + }, + { + "text": "or", + "offset": 110000, + "duration": 240 + }, + { + "text": "optimize", + "offset": 110240, + "duration": 560 + }, + { + "text": "it", + "offset": 110800, + "duration": 120 + }, + { + "text": "for", + "offset": 111400, + "duration": 120 + }, + { + "text": "the", + "offset": 111520, + "duration": 120 + }, + { + "text": "best", + "offset": 111640, + "duration": 320 + }, + { + "text": "performance.", + "offset": 112200, + "duration": 680 + } + ], + "locale": "en-US", + "confidence": 0.9264704 + }, + { + "channel": 0, + "speaker": 0, + "offset": 113200, + "duration": 480, + "text": "Sure.", + "words": [ + { + "text": "Sure.", + "offset": 113200, + "duration": 480 + } + ], + "locale": "en-US", + "confidence": 0.9264704 + }, + { + "channel": 0, + "speaker": 0, + "offset": 114440, + "duration": 1400, + "text": "We do have a support team.", + "words": [ + { + "text": "We", + "offset": 114440, + "duration": 80 + }, + { + "text": "do", + "offset": 114520, + "duration": 120 + }, + { + "text": "have", + "offset": 114640, + "duration": 240 + }, + { + "text": "a", + "offset": 114880, + "duration": 80 + }, + { + "text": "support", + "offset": 114960, + "duration": 440 + }, + { + "text": "team.", + "offset": 115400, + "duration": 440 + } + ], + "locale": "en-US", + "confidence": 0.9264704 + }, + { + "channel": 0, + "speaker": 1, + "offset": 116080, + "duration": 240, + "text": "Yeah.", + "words": [ + { + "text": "Yeah.", + "offset": 116080, + "duration": 240 + } + ], + "locale": "en-US", + "confidence": 0.9264704 + }, + { + "channel": 0, + "speaker": 0, + "offset": 116960, + "duration": 2000, + "text": "That's our engineering team on the back end.", + "words": [ + { + "text": "That's", + "offset": 116960, + "duration": 200 + }, + { + "text": "our", + "offset": 117160, + "duration": 120 + }, + { + "text": "engineering", + "offset": 117280, + "duration": 640 + }, + { + "text": "team", + "offset": 117920, + "duration": 200 + }, + { + "text": "on", + "offset": 118120, + "duration": 120 + }, + { + "text": "the", + "offset": 118240, + "duration": 80 + }, + { + "text": "back", + "offset": 118320, + "duration": 400 + }, + { + "text": "end.", + "offset": 118720, + "duration": 240 + } + ], + "locale": "en-US", + "confidence": 0.9264704 + }, + { + "channel": 0, + "speaker": 0, + "offset": 119120, + "duration": 800, + "text": "It's an option.", + "words": [ + { + "text": "It's", + "offset": 119120, + "duration": 200 + }, + { + "text": "an", + "offset": 119320, + "duration": 120 + }, + { + "text": "option.", + "offset": 119440, + "duration": 480 + } + ], + "locale": "en-US", + "confidence": 0.9264704 + } + ] +} \ No newline at end of file diff --git a/samples/fast-transcription/js_client/main.js b/samples/fast-transcription/js_client/main.js new file mode 100644 index 000000000..6ef1dea1e --- /dev/null +++ b/samples/fast-transcription/js_client/main.js @@ -0,0 +1,94 @@ +const https = require('https'); +const fs = require('fs'); +const FormData = require('form-data'); +const path = require('path'); + +// Add Credentials +const SUBSCRIPTION_KEY= "YourSubscriptionKey"; +const SERVICE_REGION= "YourServiceRegion"; + + +// fast transcription settings +const LOCALE = ["en-US"]; +const PROFANITYFILTER = "Masked"; +const DIARIZATIONSETTINGS = { minSpeakers: 1, maxSpeakers: 4 }; + +// Get transcription properties payload +function getTranscriptionProperties() { + return { + locales: LOCALE, + properties: { + profanityFilterMode: PROFANITYFILTER, + channels: [0], // Mono-speaker diarization + diarizationSettings: DIARIZATIONSETTINGS + } + }; +} + +// transcribe the code from a local file +function transcribeFromLocal(audioFilePath, transcriptionsFilePath) { + // Check if the file exists + if (!fs.existsSync(audioFilePath)) { + console.error("File does not exist at the specified path:", audioFilePath); + return; + } + + // create the form data for Multipart API call + const formData = new FormData(); + formData.append('audio', fs.createReadStream(audioFilePath), path.basename(audioFilePath)); + formData.append('definition', JSON.stringify(getTranscriptionProperties()), { contentType: 'application/json' }); + + // set up the request options + const options = { + hostname: `${SERVICE_REGION}.api.cognitive.microsoft.com`, + path: '/speechtotext/transcriptions:transcribe?api-version=2024-05-15-preview', + method: 'POST', + headers: { + 'Ocp-Apim-Subscription-Key': SUBSCRIPTION_KEY, + ...formData.getHeaders() + } + }; + + // Send the requests + const req = https.request(options, (res) => { + let data = ''; + + res.on('data', (chunk) => { + data += chunk; + }); + + res.on('end', () => { + if (res.statusCode === 200) { + console.log("Transcription successful!"); + + // Save the transcription response to a file + fs.writeFileSync(transcriptionsFilePath, JSON.stringify(JSON.parse(data), null, 4)); + console.log(`Json response saved to ${transcriptionsFilePath}`); + } else { + console.error(`Error: ${res.statusCode} - ${res.statusMessage}`); + console.error(data); + } + }); + }); + + req.on('error', (e) => { + console.error(`Problem with request: ${e.message}`); + }); + + // Pipe the form data to the request + formData.pipe(req); +} + +// main function +function main() { + const args = process.argv.slice(2); + const inputDir = args[0] || 'test_call_audio.wav'; // test audio file - change to YourAudio_FilePath + const outputDir = args[1] || 'transcription_response.json'; // test output JSON file + + transcribeFromLocal(inputDir, outputDir); +} + + +main(); + + diff --git a/samples/fast-transcription/js_client/package-lock.json b/samples/fast-transcription/js_client/package-lock.json new file mode 100644 index 000000000..62373f943 --- /dev/null +++ b/samples/fast-transcription/js_client/package-lock.json @@ -0,0 +1,74 @@ +{ + "name": "js_client", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "dependencies": { + "form-data": "^4.0.1" + } + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "license": "MIT" + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/form-data": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.1.tgz", + "integrity": "sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + } + } +} diff --git a/samples/fast-transcription/js_client/package.json b/samples/fast-transcription/js_client/package.json new file mode 100644 index 000000000..15dc280c1 --- /dev/null +++ b/samples/fast-transcription/js_client/package.json @@ -0,0 +1,5 @@ +{ + "dependencies": { + "form-data": "^4.0.1" + } +} diff --git a/samples/fast-transcription/js_client/readme.md b/samples/fast-transcription/js_client/readme.md new file mode 100644 index 000000000..08eba59ce --- /dev/null +++ b/samples/fast-transcription/js_client/readme.md @@ -0,0 +1,29 @@ +# JavaScript Speech Recognition, Synthesis and Translation Sample for Node.js + +This sample demonstrates audio transcription using the Fast Transcription API for JavaScript on Node.js. + +## Prerequisites +1. A [Node.js](https://nodejs.org) compatible device. +1. An Azure AI Speech resource in one of the regions where the fast transcription API is available. The supported regions are: Central India, East US, Southeast Asia, and West Europe. For more information and region identifier, see [Speech service regions](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/regions) +1. An audio file (less than 2 hours long and less than 200 MB in size) in one of the formats and codecs supported by the batch transcription API. For more information about supported audio formats, see [Supported audio formats](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/batch-transcription-audio-data?tabs=portal#supported-audio-formats-and-codecs) +1. For additional explanation, see [fast transcription documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/fast-transcription-create) + + +## Prepare the sample + +* Update the following strings in the `main.js` file with your configuration: + * `YourSubscriptionKey`: replace with your subscription key. + * `YourServiceRegion`: replace with the [region](https://aka.ms/csspeech/region) your subscription is associated with. For example, `westus` or `northeurope`. + * `YourAudio_FilePath`: The file path for your audio file with speech to be transcribed. See for example a file named `test_call_audio.wav` located in same folder + +* Run `npm install` to install any required dependency on your computer. + +## Run the sample + +```shell +node main.js +``` + +## References + +* [fast transcription documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/fast-transcription-create) diff --git a/samples/fast-transcription/js_client/test_call_audio.wav b/samples/fast-transcription/js_client/test_call_audio.wav new file mode 100644 index 000000000..24b513342 Binary files /dev/null and b/samples/fast-transcription/js_client/test_call_audio.wav differ diff --git a/samples/fast-transcription/js_client/transcription_response.json b/samples/fast-transcription/js_client/transcription_response.json new file mode 100644 index 000000000..817c81edf --- /dev/null +++ b/samples/fast-transcription/js_client/transcription_response.json @@ -0,0 +1,2057 @@ +{ + "duration": 120000, + "combinedPhrases": [ + { + "text": "I think we got disconnected. Good. How are you? I have to say my little spill again. Just a heads up, Dimitri, to help improve the quality of our products, services and training, this call may be recorded and monitored. Information may be accessed internationally by Microsoft, our service provider, subsidiaries and affiliates. Is it OK if we continue? OK, fantastic. OK. So I I was talking. I think I, I, I, I kept rambling. And then I didn't hear you. So I guess we got disconnected there. And so just wanted to kind of follow up. So you're kind of comparing Shopify versus Azure. It looks like you're currently using an Azure trial. I guess as far as like questions, were you kind of. I know you kind of talked about price. I can definitely go over like pricing with you. If you're comparing apples to apples, I I saw that Spotify, you know obviously they're you know it's a SaaS platform. So if you want to run on that looks like their plans are as low as 29 bucks a month. What I was just saying is Microsoft has a little more flexibility. You kind of decide as far as you know our different options and then if your company scales and your e-commerce site goes goes big. You know, it's it's the fortunate thing about Microsoft is it's Microsoft, it's Azure. And then we have our Office 365 suite for you too for users, one share of company scale. So I didn't mean to kind of put the car before the horse, but that's kind of what I was saying. I'm talking to myself when we got cut off. But how can I kind of kind of help you out at this point, Dimitri? Well, I was wondering if there is a service that Microsoft offers where they can just look at my application and just, you know, tune it in or optimize it for the best performance. Sure. We do have a support team. Yeah, that's our engineering team on the back end. It's an option." + } + ], + "phrases": [ + { + "offset": 240, + "duration": 1200, + "text": "I think we got disconnected.", + "words": [ + { + "text": "I", + "offset": 240, + "duration": 40 + }, + { + "text": "think", + "offset": 280, + "duration": 160 + }, + { + "text": "we", + "offset": 440, + "duration": 80 + }, + { + "text": "got", + "offset": 520, + "duration": 200 + }, + { + "text": "disconnected.", + "offset": 720, + "duration": 720 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 2000, + "duration": 200, + "text": "Good.", + "words": [ + { + "text": "Good.", + "offset": 2000, + "duration": 200 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 2200, + "duration": 440, + "text": "How are you?", + "words": [ + { + "text": "How", + "offset": 2200, + "duration": 80 + }, + { + "text": "are", + "offset": 2280, + "duration": 200 + }, + { + "text": "you?", + "offset": 2480, + "duration": 160 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 7760, + "duration": 1840, + "text": "I have to say my little spill again.", + "words": [ + { + "text": "I", + "offset": 7760, + "duration": 120 + }, + { + "text": "have", + "offset": 7880, + "duration": 200 + }, + { + "text": "to", + "offset": 8080, + "duration": 240 + }, + { + "text": "say", + "offset": 8480, + "duration": 160 + }, + { + "text": "my", + "offset": 8640, + "duration": 120 + }, + { + "text": "little", + "offset": 8760, + "duration": 200 + }, + { + "text": "spill", + "offset": 8960, + "duration": 320 + }, + { + "text": "again.", + "offset": 9280, + "duration": 320 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 9680, + "duration": 5640, + "text": "Just a heads up, Dimitri, to help improve the quality of our products, services and training, this call may be recorded and monitored.", + "words": [ + { + "text": "Just", + "offset": 9680, + "duration": 240 + }, + { + "text": "a", + "offset": 9920, + "duration": 40 + }, + { + "text": "heads", + "offset": 9960, + "duration": 280 + }, + { + "text": "up,", + "offset": 10240, + "duration": 120 + }, + { + "text": "Dimitri,", + "offset": 10360, + "duration": 320 + }, + { + "text": "to", + "offset": 10680, + "duration": 80 + }, + { + "text": "help", + "offset": 10760, + "duration": 160 + }, + { + "text": "improve", + "offset": 10920, + "duration": 240 + }, + { + "text": "the", + "offset": 11160, + "duration": 80 + }, + { + "text": "quality", + "offset": 11240, + "duration": 440 + }, + { + "text": "of", + "offset": 11680, + "duration": 80 + }, + { + "text": "our", + "offset": 11760, + "duration": 80 + }, + { + "text": "products,", + "offset": 11840, + "duration": 360 + }, + { + "text": "services", + "offset": 12200, + "duration": 400 + }, + { + "text": "and", + "offset": 12600, + "duration": 160 + }, + { + "text": "training,", + "offset": 12760, + "duration": 280 + }, + { + "text": "this", + "offset": 13040, + "duration": 120 + }, + { + "text": "call", + "offset": 13160, + "duration": 360 + }, + { + "text": "may", + "offset": 13600, + "duration": 160 + }, + { + "text": "be", + "offset": 13760, + "duration": 80 + }, + { + "text": "recorded", + "offset": 13840, + "duration": 560 + }, + { + "text": "and", + "offset": 14760, + "duration": 120 + }, + { + "text": "monitored.", + "offset": 14880, + "duration": 440 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 15320, + "duration": 4960, + "text": "Information may be accessed internationally by Microsoft, our service provider, subsidiaries and affiliates.", + "words": [ + { + "text": "Information", + "offset": 15320, + "duration": 520 + }, + { + "text": "may", + "offset": 15840, + "duration": 80 + }, + { + "text": "be", + "offset": 15920, + "duration": 160 + }, + { + "text": "accessed", + "offset": 16080, + "duration": 280 + }, + { + "text": "internationally", + "offset": 16360, + "duration": 640 + }, + { + "text": "by", + "offset": 17000, + "duration": 80 + }, + { + "text": "Microsoft,", + "offset": 17080, + "duration": 680 + }, + { + "text": "our", + "offset": 18240, + "duration": 240 + }, + { + "text": "service", + "offset": 18480, + "duration": 360 + }, + { + "text": "provider,", + "offset": 18840, + "duration": 440 + }, + { + "text": "subsidiaries", + "offset": 19280, + "duration": 440 + }, + { + "text": "and", + "offset": 19720, + "duration": 120 + }, + { + "text": "affiliates.", + "offset": 19840, + "duration": 440 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 20320, + "duration": 1120, + "text": "Is it OK if we continue?", + "words": [ + { + "text": "Is", + "offset": 20320, + "duration": 120 + }, + { + "text": "it", + "offset": 20440, + "duration": 120 + }, + { + "text": "OK", + "offset": 20560, + "duration": 240 + }, + { + "text": "if", + "offset": 20800, + "duration": 80 + }, + { + "text": "we", + "offset": 20880, + "duration": 80 + }, + { + "text": "continue?", + "offset": 20960, + "duration": 480 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 23440, + "duration": 1080, + "text": "OK, fantastic.", + "words": [ + { + "text": "OK,", + "offset": 23440, + "duration": 200 + }, + { + "text": "fantastic.", + "offset": 23920, + "duration": 600 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 24520, + "duration": 160, + "text": "OK.", + "words": [ + { + "text": "OK.", + "offset": 24520, + "duration": 160 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 25120, + "duration": 2080, + "text": "So I I was talking.", + "words": [ + { + "text": "So", + "offset": 25120, + "duration": 480 + }, + { + "text": "I", + "offset": 25920, + "duration": 400 + }, + { + "text": "I", + "offset": 26480, + "duration": 80 + }, + { + "text": "was", + "offset": 26560, + "duration": 160 + }, + { + "text": "talking.", + "offset": 26720, + "duration": 480 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 27200, + "duration": 2400, + "text": "I think I, I, I, I kept rambling.", + "words": [ + { + "text": "I", + "offset": 27200, + "duration": 80 + }, + { + "text": "think", + "offset": 27280, + "duration": 200 + }, + { + "text": "I,", + "offset": 27600, + "duration": 80 + }, + { + "text": "I,", + "offset": 27760, + "duration": 400 + }, + { + "text": "I,", + "offset": 28400, + "duration": 80 + }, + { + "text": "I", + "offset": 28560, + "duration": 160 + }, + { + "text": "kept", + "offset": 28720, + "duration": 320 + }, + { + "text": "rambling.", + "offset": 29040, + "duration": 560 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 30000, + "duration": 880, + "text": "And then I didn't hear you.", + "words": [ + { + "text": "And", + "offset": 30000, + "duration": 120 + }, + { + "text": "then", + "offset": 30120, + "duration": 120 + }, + { + "text": "I", + "offset": 30240, + "duration": 40 + }, + { + "text": "didn't", + "offset": 30280, + "duration": 240 + }, + { + "text": "hear", + "offset": 30520, + "duration": 200 + }, + { + "text": "you.", + "offset": 30720, + "duration": 160 + } + ], + "locale": "en-US", + "confidence": 0.9149788 + }, + { + "offset": 30880, + "duration": 2480, + "text": "So I guess we got disconnected there.", + "words": [ + { + "text": "So", + "offset": 30880, + "duration": 400 + }, + { + "text": "I", + "offset": 31840, + "duration": 80 + }, + { + "text": "guess", + "offset": 31920, + "duration": 240 + }, + { + "text": "we", + "offset": 32160, + "duration": 80 + }, + { + "text": "got", + "offset": 32240, + "duration": 200 + }, + { + "text": "disconnected", + "offset": 32440, + "duration": 600 + }, + { + "text": "there.", + "offset": 33040, + "duration": 320 + } + ], + "locale": "en-US", + "confidence": 0.9149788 + }, + { + "offset": 33920, + "duration": 2400, + "text": "And so just wanted to kind of follow up.", + "words": [ + { + "text": "And", + "offset": 33920, + "duration": 240 + }, + { + "text": "so", + "offset": 34160, + "duration": 400 + }, + { + "text": "just", + "offset": 34880, + "duration": 240 + }, + { + "text": "wanted", + "offset": 35120, + "duration": 280 + }, + { + "text": "to", + "offset": 35400, + "duration": 80 + }, + { + "text": "kind", + "offset": 35480, + "duration": 200 + }, + { + "text": "of", + "offset": 35680, + "duration": 80 + }, + { + "text": "follow", + "offset": 35760, + "duration": 400 + }, + { + "text": "up.", + "offset": 36160, + "duration": 160 + } + ], + "locale": "en-US", + "confidence": 0.9149788 + }, + { + "offset": 36720, + "duration": 3960, + "text": "So you're kind of comparing Shopify versus Azure.", + "words": [ + { + "text": "So", + "offset": 36720, + "duration": 400 + }, + { + "text": "you're", + "offset": 37120, + "duration": 160 + }, + { + "text": "kind", + "offset": 37280, + "duration": 240 + }, + { + "text": "of", + "offset": 37520, + "duration": 80 + }, + { + "text": "comparing", + "offset": 37600, + "duration": 560 + }, + { + "text": "Shopify", + "offset": 39040, + "duration": 720 + }, + { + "text": "versus", + "offset": 39760, + "duration": 480 + }, + { + "text": "Azure.", + "offset": 40240, + "duration": 440 + } + ], + "locale": "en-US", + "confidence": 0.9149788 + }, + { + "offset": 40680, + "duration": 3000, + "text": "It looks like you're currently using an Azure trial.", + "words": [ + { + "text": "It", + "offset": 40680, + "duration": 80 + }, + { + "text": "looks", + "offset": 40760, + "duration": 200 + }, + { + "text": "like", + "offset": 40960, + "duration": 160 + }, + { + "text": "you're", + "offset": 41120, + "duration": 320 + }, + { + "text": "currently", + "offset": 41440, + "duration": 560 + }, + { + "text": "using", + "offset": 42320, + "duration": 280 + }, + { + "text": "an", + "offset": 42600, + "duration": 120 + }, + { + "text": "Azure", + "offset": 42720, + "duration": 320 + }, + { + "text": "trial.", + "offset": 43040, + "duration": 640 + } + ], + "locale": "en-US", + "confidence": 0.9149788 + }, + { + "offset": 45360, + "duration": 2720, + "text": "I guess as far as like questions, were you kind of.", + "words": [ + { + "text": "I", + "offset": 45360, + "duration": 480 + }, + { + "text": "guess", + "offset": 45840, + "duration": 240 + }, + { + "text": "as", + "offset": 46240, + "duration": 80 + }, + { + "text": "far", + "offset": 46320, + "duration": 280 + }, + { + "text": "as", + "offset": 46600, + "duration": 80 + }, + { + "text": "like", + "offset": 46680, + "duration": 160 + }, + { + "text": "questions,", + "offset": 46840, + "duration": 640 + }, + { + "text": "were", + "offset": 47480, + "duration": 200 + }, + { + "text": "you", + "offset": 47680, + "duration": 80 + }, + { + "text": "kind", + "offset": 47760, + "duration": 200 + }, + { + "text": "of.", + "offset": 47960, + "duration": 120 + } + ], + "locale": "en-US", + "confidence": 0.9149788 + }, + { + "offset": 49800, + "duration": 1680, + "text": "I know you kind of talked about price.", + "words": [ + { + "text": "I", + "offset": 49800, + "duration": 40 + }, + { + "text": "know", + "offset": 50080, + "duration": 80 + }, + { + "text": "you", + "offset": 50160, + "duration": 120 + }, + { + "text": "kind", + "offset": 50280, + "duration": 200 + }, + { + "text": "of", + "offset": 50480, + "duration": 80 + }, + { + "text": "talked", + "offset": 50560, + "duration": 440 + }, + { + "text": "about", + "offset": 51000, + "duration": 280 + }, + { + "text": "price.", + "offset": 51280, + "duration": 200 + } + ], + "locale": "en-US", + "confidence": 0.9328801 + }, + { + "offset": 51480, + "duration": 1760, + "text": "I can definitely go over like pricing with you.", + "words": [ + { + "text": "I", + "offset": 51480, + "duration": 40 + }, + { + "text": "can", + "offset": 51520, + "duration": 120 + }, + { + "text": "definitely", + "offset": 51640, + "duration": 360 + }, + { + "text": "go", + "offset": 52000, + "duration": 120 + }, + { + "text": "over", + "offset": 52120, + "duration": 200 + }, + { + "text": "like", + "offset": 52320, + "duration": 160 + }, + { + "text": "pricing", + "offset": 52480, + "duration": 480 + }, + { + "text": "with", + "offset": 52960, + "duration": 160 + }, + { + "text": "you.", + "offset": 53120, + "duration": 120 + } + ], + "locale": "en-US", + "confidence": 0.9328801 + }, + { + "offset": 53800, + "duration": 6840, + "text": "If you're comparing apples to apples, I I saw that Spotify, you know obviously they're you know it's a SaaS platform.", + "words": [ + { + "text": "If", + "offset": 53800, + "duration": 80 + }, + { + "text": "you're", + "offset": 53880, + "duration": 120 + }, + { + "text": "comparing", + "offset": 54000, + "duration": 560 + }, + { + "text": "apples", + "offset": 54560, + "duration": 440 + }, + { + "text": "to", + "offset": 55000, + "duration": 80 + }, + { + "text": "apples,", + "offset": 55080, + "duration": 520 + }, + { + "text": "I", + "offset": 55600, + "duration": 40 + }, + { + "text": "I", + "offset": 55640, + "duration": 280 + }, + { + "text": "saw", + "offset": 55920, + "duration": 240 + }, + { + "text": "that", + "offset": 56160, + "duration": 160 + }, + { + "text": "Spotify,", + "offset": 56320, + "duration": 560 + }, + { + "text": "you", + "offset": 57760, + "duration": 80 + }, + { + "text": "know", + "offset": 57840, + "duration": 160 + }, + { + "text": "obviously", + "offset": 58080, + "duration": 360 + }, + { + "text": "they're", + "offset": 58440, + "duration": 280 + }, + { + "text": "you", + "offset": 59040, + "duration": 80 + }, + { + "text": "know", + "offset": 59120, + "duration": 160 + }, + { + "text": "it's", + "offset": 59360, + "duration": 240 + }, + { + "text": "a", + "offset": 59600, + "duration": 40 + }, + { + "text": "SaaS", + "offset": 59640, + "duration": 360 + }, + { + "text": "platform.", + "offset": 60000, + "duration": 640 + } + ], + "locale": "en-US", + "confidence": 0.9328801 + }, + { + "offset": 60640, + "duration": 6600, + "text": "So if you want to run on that looks like their plans are as low as 29 bucks a month.", + "words": [ + { + "text": "So", + "offset": 60640, + "duration": 320 + }, + { + "text": "if", + "offset": 61680, + "duration": 320 + }, + { + "text": "you", + "offset": 62000, + "duration": 160 + }, + { + "text": "want", + "offset": 62800, + "duration": 400 + }, + { + "text": "to", + "offset": 63280, + "duration": 80 + }, + { + "text": "run", + "offset": 64240, + "duration": 320 + }, + { + "text": "on", + "offset": 64560, + "duration": 80 + }, + { + "text": "that", + "offset": 64640, + "duration": 320 + }, + { + "text": "looks", + "offset": 64960, + "duration": 160 + }, + { + "text": "like", + "offset": 65120, + "duration": 160 + }, + { + "text": "their", + "offset": 65280, + "duration": 240 + }, + { + "text": "plans", + "offset": 65520, + "duration": 360 + }, + { + "text": "are", + "offset": 65880, + "duration": 80 + }, + { + "text": "as", + "offset": 65960, + "duration": 120 + }, + { + "text": "low", + "offset": 66080, + "duration": 80 + }, + { + "text": "as", + "offset": 66160, + "duration": 80 + }, + { + "text": "29", + "offset": 66240, + "duration": 400 + }, + { + "text": "bucks", + "offset": 66640, + "duration": 280 + }, + { + "text": "a", + "offset": 66920, + "duration": 40 + }, + { + "text": "month.", + "offset": 66960, + "duration": 280 + } + ], + "locale": "en-US", + "confidence": 0.9328801 + }, + { + "offset": 67800, + "duration": 2840, + "text": "What I was just saying is Microsoft has a little more flexibility.", + "words": [ + { + "text": "What", + "offset": 67800, + "duration": 200 + }, + { + "text": "I", + "offset": 68000, + "duration": 40 + }, + { + "text": "was", + "offset": 68040, + "duration": 120 + }, + { + "text": "just", + "offset": 68160, + "duration": 200 + }, + { + "text": "saying", + "offset": 68360, + "duration": 200 + }, + { + "text": "is", + "offset": 68560, + "duration": 160 + }, + { + "text": "Microsoft", + "offset": 68720, + "duration": 440 + }, + { + "text": "has", + "offset": 69160, + "duration": 120 + }, + { + "text": "a", + "offset": 69280, + "duration": 40 + }, + { + "text": "little", + "offset": 69320, + "duration": 280 + }, + { + "text": "more", + "offset": 69600, + "duration": 160 + }, + { + "text": "flexibility.", + "offset": 69760, + "duration": 880 + } + ], + "locale": "en-US", + "confidence": 0.9328801 + }, + { + "offset": 71120, + "duration": 7120, + "text": "You kind of decide as far as you know our different options and then if your company scales and your e-commerce site goes goes big.", + "words": [ + { + "text": "You", + "offset": 71120, + "duration": 80 + }, + { + "text": "kind", + "offset": 71200, + "duration": 360 + }, + { + "text": "of", + "offset": 71560, + "duration": 80 + }, + { + "text": "decide", + "offset": 71640, + "duration": 520 + }, + { + "text": "as", + "offset": 72160, + "duration": 80 + }, + { + "text": "far", + "offset": 72240, + "duration": 360 + }, + { + "text": "as", + "offset": 72600, + "duration": 120 + }, + { + "text": "you", + "offset": 73040, + "duration": 80 + }, + { + "text": "know", + "offset": 73120, + "duration": 120 + }, + { + "text": "our", + "offset": 73280, + "duration": 160 + }, + { + "text": "different", + "offset": 73440, + "duration": 320 + }, + { + "text": "options", + "offset": 73760, + "duration": 360 + }, + { + "text": "and", + "offset": 74120, + "duration": 120 + }, + { + "text": "then", + "offset": 74520, + "duration": 200 + }, + { + "text": "if", + "offset": 74720, + "duration": 240 + }, + { + "text": "your", + "offset": 74960, + "duration": 160 + }, + { + "text": "company", + "offset": 75120, + "duration": 400 + }, + { + "text": "scales", + "offset": 75520, + "duration": 560 + }, + { + "text": "and", + "offset": 76080, + "duration": 120 + }, + { + "text": "your", + "offset": 76200, + "duration": 120 + }, + { + "text": "e-commerce", + "offset": 76320, + "duration": 520 + }, + { + "text": "site", + "offset": 76840, + "duration": 240 + }, + { + "text": "goes", + "offset": 77080, + "duration": 440 + }, + { + "text": "goes", + "offset": 77680, + "duration": 240 + }, + { + "text": "big.", + "offset": 77920, + "duration": 320 + } + ], + "locale": "en-US", + "confidence": 0.9328801 + }, + { + "offset": 78640, + "duration": 4720, + "text": "You know, it's it's the fortunate thing about Microsoft is it's Microsoft, it's Azure.", + "words": [ + { + "text": "You", + "offset": 78640, + "duration": 80 + }, + { + "text": "know,", + "offset": 78720, + "duration": 240 + }, + { + "text": "it's", + "offset": 78960, + "duration": 280 + }, + { + "text": "it's", + "offset": 79280, + "duration": 320 + }, + { + "text": "the", + "offset": 79840, + "duration": 160 + }, + { + "text": "fortunate", + "offset": 80000, + "duration": 480 + }, + { + "text": "thing", + "offset": 80480, + "duration": 120 + }, + { + "text": "about", + "offset": 80600, + "duration": 200 + }, + { + "text": "Microsoft", + "offset": 80800, + "duration": 600 + }, + { + "text": "is", + "offset": 81400, + "duration": 280 + }, + { + "text": "it's", + "offset": 81920, + "duration": 200 + }, + { + "text": "Microsoft,", + "offset": 82120, + "duration": 640 + }, + { + "text": "it's", + "offset": 82760, + "duration": 120 + }, + { + "text": "Azure.", + "offset": 82880, + "duration": 480 + } + ], + "locale": "en-US", + "confidence": 0.9304696 + }, + { + "offset": 83760, + "duration": 4240, + "text": "And then we have our Office 365 suite for you too for users, one share of company scale.", + "words": [ + { + "text": "And", + "offset": 83760, + "duration": 160 + }, + { + "text": "then", + "offset": 83920, + "duration": 120 + }, + { + "text": "we", + "offset": 84040, + "duration": 120 + }, + { + "text": "have", + "offset": 84160, + "duration": 200 + }, + { + "text": "our", + "offset": 84360, + "duration": 120 + }, + { + "text": "Office", + "offset": 84480, + "duration": 320 + }, + { + "text": "365", + "offset": 84800, + "duration": 720 + }, + { + "text": "suite", + "offset": 85520, + "duration": 280 + }, + { + "text": "for", + "offset": 85800, + "duration": 200 + }, + { + "text": "you", + "offset": 86000, + "duration": 80 + }, + { + "text": "too", + "offset": 86080, + "duration": 160 + }, + { + "text": "for", + "offset": 86240, + "duration": 160 + }, + { + "text": "users,", + "offset": 86400, + "duration": 400 + }, + { + "text": "one", + "offset": 86800, + "duration": 200 + }, + { + "text": "share", + "offset": 87000, + "duration": 200 + }, + { + "text": "of", + "offset": 87200, + "duration": 80 + }, + { + "text": "company", + "offset": 87280, + "duration": 320 + }, + { + "text": "scale.", + "offset": 87600, + "duration": 400 + } + ], + "locale": "en-US", + "confidence": 0.9304696 + }, + { + "offset": 88000, + "duration": 3520, + "text": "So I didn't mean to kind of put the car before the horse, but that's kind of what I was saying.", + "words": [ + { + "text": "So", + "offset": 88000, + "duration": 240 + }, + { + "text": "I", + "offset": 88560, + "duration": 40 + }, + { + "text": "didn't", + "offset": 88600, + "duration": 320 + }, + { + "text": "mean", + "offset": 88920, + "duration": 120 + }, + { + "text": "to", + "offset": 89040, + "duration": 80 + }, + { + "text": "kind", + "offset": 89120, + "duration": 160 + }, + { + "text": "of", + "offset": 89280, + "duration": 80 + }, + { + "text": "put", + "offset": 89360, + "duration": 120 + }, + { + "text": "the", + "offset": 89480, + "duration": 80 + }, + { + "text": "car", + "offset": 89560, + "duration": 120 + }, + { + "text": "before", + "offset": 89680, + "duration": 280 + }, + { + "text": "the", + "offset": 89960, + "duration": 80 + }, + { + "text": "horse,", + "offset": 90040, + "duration": 240 + }, + { + "text": "but", + "offset": 90280, + "duration": 160 + }, + { + "text": "that's", + "offset": 90440, + "duration": 200 + }, + { + "text": "kind", + "offset": 90640, + "duration": 200 + }, + { + "text": "of", + "offset": 90840, + "duration": 80 + }, + { + "text": "what", + "offset": 90920, + "duration": 160 + }, + { + "text": "I", + "offset": 91080, + "duration": 40 + }, + { + "text": "was", + "offset": 91120, + "duration": 120 + }, + { + "text": "saying.", + "offset": 91240, + "duration": 280 + } + ], + "locale": "en-US", + "confidence": 0.9304696 + }, + { + "offset": 91520, + "duration": 2800, + "text": "I'm talking to myself when we got cut off.", + "words": [ + { + "text": "I'm", + "offset": 91520, + "duration": 80 + }, + { + "text": "talking", + "offset": 92400, + "duration": 240 + }, + { + "text": "to", + "offset": 92640, + "duration": 160 + }, + { + "text": "myself", + "offset": 92800, + "duration": 520 + }, + { + "text": "when", + "offset": 93320, + "duration": 160 + }, + { + "text": "we", + "offset": 93480, + "duration": 120 + }, + { + "text": "got", + "offset": 93600, + "duration": 160 + }, + { + "text": "cut", + "offset": 93760, + "duration": 320 + }, + { + "text": "off.", + "offset": 94080, + "duration": 240 + } + ], + "locale": "en-US", + "confidence": 0.9304696 + }, + { + "offset": 94320, + "duration": 4880, + "text": "But how can I kind of kind of help you out at this point, Dimitri?", + "words": [ + { + "text": "But", + "offset": 94320, + "duration": 320 + }, + { + "text": "how", + "offset": 95600, + "duration": 200 + }, + { + "text": "can", + "offset": 95800, + "duration": 200 + }, + { + "text": "I", + "offset": 96000, + "duration": 160 + }, + { + "text": "kind", + "offset": 96160, + "duration": 360 + }, + { + "text": "of", + "offset": 96520, + "duration": 200 + }, + { + "text": "kind", + "offset": 97280, + "duration": 160 + }, + { + "text": "of", + "offset": 97440, + "duration": 80 + }, + { + "text": "help", + "offset": 97520, + "duration": 240 + }, + { + "text": "you", + "offset": 97760, + "duration": 80 + }, + { + "text": "out", + "offset": 97920, + "duration": 120 + }, + { + "text": "at", + "offset": 98040, + "duration": 80 + }, + { + "text": "this", + "offset": 98120, + "duration": 200 + }, + { + "text": "point,", + "offset": 98320, + "duration": 320 + }, + { + "text": "Dimitri?", + "offset": 98640, + "duration": 560 + } + ], + "locale": "en-US", + "confidence": 0.9304696 + }, + { + "offset": 100320, + "duration": 12560, + "text": "Well, I was wondering if there is a service that Microsoft offers where they can just look at my application and just, you know, tune it in or optimize it for the best performance.", + "words": [ + { + "text": "Well,", + "offset": 100320, + "duration": 240 + }, + { + "text": "I", + "offset": 100640, + "duration": 80 + }, + { + "text": "was", + "offset": 100720, + "duration": 120 + }, + { + "text": "wondering", + "offset": 100840, + "duration": 840 + }, + { + "text": "if", + "offset": 101680, + "duration": 320 + }, + { + "text": "there", + "offset": 102000, + "duration": 200 + }, + { + "text": "is", + "offset": 102200, + "duration": 200 + }, + { + "text": "a", + "offset": 102520, + "duration": 40 + }, + { + "text": "service", + "offset": 102560, + "duration": 480 + }, + { + "text": "that", + "offset": 103040, + "duration": 240 + }, + { + "text": "Microsoft", + "offset": 103440, + "duration": 640 + }, + { + "text": "offers", + "offset": 104080, + "duration": 440 + }, + { + "text": "where", + "offset": 104520, + "duration": 440 + }, + { + "text": "they", + "offset": 104960, + "duration": 240 + }, + { + "text": "can", + "offset": 105360, + "duration": 160 + }, + { + "text": "just", + "offset": 105520, + "duration": 280 + }, + { + "text": "look", + "offset": 105800, + "duration": 200 + }, + { + "text": "at", + "offset": 106000, + "duration": 80 + }, + { + "text": "my", + "offset": 106080, + "duration": 160 + }, + { + "text": "application", + "offset": 106240, + "duration": 800 + }, + { + "text": "and", + "offset": 107040, + "duration": 200 + }, + { + "text": "just,", + "offset": 107240, + "duration": 640 + }, + { + "text": "you", + "offset": 108560, + "duration": 120 + }, + { + "text": "know,", + "offset": 108680, + "duration": 200 + }, + { + "text": "tune", + "offset": 109360, + "duration": 320 + }, + { + "text": "it", + "offset": 109680, + "duration": 160 + }, + { + "text": "in", + "offset": 109840, + "duration": 160 + }, + { + "text": "or", + "offset": 110000, + "duration": 240 + }, + { + "text": "optimize", + "offset": 110240, + "duration": 560 + }, + { + "text": "it", + "offset": 110800, + "duration": 120 + }, + { + "text": "for", + "offset": 111400, + "duration": 120 + }, + { + "text": "the", + "offset": 111520, + "duration": 120 + }, + { + "text": "best", + "offset": 111640, + "duration": 320 + }, + { + "text": "performance.", + "offset": 112200, + "duration": 680 + } + ], + "locale": "en-US", + "confidence": 0.9264704 + }, + { + "offset": 113200, + "duration": 480, + "text": "Sure.", + "words": [ + { + "text": "Sure.", + "offset": 113200, + "duration": 480 + } + ], + "locale": "en-US", + "confidence": 0.9264704 + }, + { + "offset": 114440, + "duration": 1400, + "text": "We do have a support team.", + "words": [ + { + "text": "We", + "offset": 114440, + "duration": 80 + }, + { + "text": "do", + "offset": 114520, + "duration": 120 + }, + { + "text": "have", + "offset": 114640, + "duration": 240 + }, + { + "text": "a", + "offset": 114880, + "duration": 80 + }, + { + "text": "support", + "offset": 114960, + "duration": 440 + }, + { + "text": "team.", + "offset": 115400, + "duration": 440 + } + ], + "locale": "en-US", + "confidence": 0.9264704 + }, + { + "offset": 116080, + "duration": 2880, + "text": "Yeah, that's our engineering team on the back end.", + "words": [ + { + "text": "Yeah,", + "offset": 116080, + "duration": 240 + }, + { + "text": "that's", + "offset": 116960, + "duration": 200 + }, + { + "text": "our", + "offset": 117160, + "duration": 120 + }, + { + "text": "engineering", + "offset": 117280, + "duration": 640 + }, + { + "text": "team", + "offset": 117920, + "duration": 200 + }, + { + "text": "on", + "offset": 118120, + "duration": 120 + }, + { + "text": "the", + "offset": 118240, + "duration": 80 + }, + { + "text": "back", + "offset": 118320, + "duration": 400 + }, + { + "text": "end.", + "offset": 118720, + "duration": 240 + } + ], + "locale": "en-US", + "confidence": 0.9264704 + }, + { + "offset": 119120, + "duration": 800, + "text": "It's an option.", + "words": [ + { + "text": "It's", + "offset": 119120, + "duration": 200 + }, + { + "text": "an", + "offset": 119320, + "duration": 120 + }, + { + "text": "option.", + "offset": 119440, + "duration": 480 + } + ], + "locale": "en-US", + "confidence": 0.9264704 + } + ] +} \ No newline at end of file diff --git a/samples/fast-transcription/python_client/main.py b/samples/fast-transcription/python_client/main.py new file mode 100644 index 000000000..8e3b0f487 --- /dev/null +++ b/samples/fast-transcription/python_client/main.py @@ -0,0 +1,119 @@ +# Copyright (c) Microsoft. All rights reserved. +# Licensed under the MIT license. See LICENSE.md file in the project root for full license information. + +import requests +import json +import os +import logging +import sys + + +logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, + format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p %Z") + +# Your credentials and secrets - You should create a .env file and store your private credentials +SUBSCRIPTION_KEY = "YourSubscriptionKey" +SERVICE_REGION = "YourServiceRegion" + +NAME = "Fast transcription" +DESCRIPTION = "Fast transcription description" + +ENPOINT_URL = f"https://{SERVICE_REGION}.api.cognitive.microsoft.com/speechtotext/transcriptions:transcribe?api-version=2024-05-15-preview" + +# Fast transcription description properties +LOCALE = ["en-US"] +PROFANITYFILTER = "Masked" +DIARIZATIONSETTINGS = {"minSpeakers": 1, "maxSpeakers": 4} + +#Fast transcription properties payload +def get_transcription_properties(): + + #transcription "definition" payload + transcription_definition = { + "profanityFilterMode": PROFANITYFILTER, + "channels": [0], #Change to one channel for Mono-speaker diarization + "diarizationSettings": DIARIZATIONSETTINGS, # Optional + } + + #transcription properties + transcription_properties = { + "locales": LOCALE, + "properties": transcription_definition + } + return transcription_properties + +def transcribe_from_local(audio_file_path, transcriptions_file_path, api_key): + """ + Transcribe a local file using the settings specified in `properties` + using the base model for the specified locale. + + files (dict): Dictionary of Multi-part form data to send. + audio_file_path (str): Path to the file to be uploaded. + transcriptions_file_path (str): Path to the output transcriptions JSON. + """ + # audio_file_path = r"samples\fast-transcription\python_client\test_call_audio.wav" + #Note: Content-Type should not be set manually as requests will handle it + headers = { + "Ocp-Apim-Subscription-Key": api_key + } + + # Check if the file exists in the specified file path + if not os.path.exists(audio_file_path): + print(audio_file_path) + print("File does not exist at the specified path") + return + else: + #transcription properties payload + transcription_properties = get_transcription_properties() + + #Multipart transcription files payload + files = { + 'audio': ('test_call_audio.wav', open(audio_file_path, 'rb'), 'audio/wav'), + # 'audio': ('test_call_audio2.wav', open(audio_file_path2, 'rb'), 'audio/wav'), #add multiple audio files + 'definition': (None, json.dumps(transcription_properties), 'application/json') + } + + endpoint_url = ENPOINT_URL + + logging.info("Starting transcription client...") + + response = requests.post(endpoint_url, headers=headers, files=files) + print(response.status_code) + response_json = json.dumps(response.json(), indent=4) + # print(response.json()) + + if response.status_code == 200: + response_json = response.json() + # Write response to a JSON file + with open(transcriptions_file_path, "w") as json_file: + json.dump(response_json, json_file, indent=4) + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description=DESCRIPTION, formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument( + "--input_dir", + required=False, + type=str, + default= "test_call_audio.wav", #update to your local audio file directory + help="Input directory", + ) + parser.add_argument( + "--output_dir", + required=False, + type=str, + default="transcription_response.json", #update to your local transcriptions directory + help="Output directory", + ) + parser.add_argument( + "--loglevel", + default="info", + help="Provide logging level. Example --loglevel debug, default=info", + ) + + opts, args = parser.parse_known_args() + logging.basicConfig(level=opts.loglevel.upper()) + + #transcribe audio from a local file + transcribe_from_local(opts.input_dir, opts.output_dir, SUBSCRIPTION_KEY) diff --git a/samples/fast-transcription/python_client/readme.md b/samples/fast-transcription/python_client/readme.md new file mode 100644 index 000000000..c536f7657 --- /dev/null +++ b/samples/fast-transcription/python_client/readme.md @@ -0,0 +1,37 @@ +# How to use the Speech Services Fast Transcription API from Python + +## Prerequisites + +1. An Azure AI Speech resource in one of the regions where the fast transcription API is available. The supported regions are: Central India, East US, Southeast Asia, and West Europe. For more information and region identifier, see [Speech service regions](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/regions) +1. An audio file (less than 2 hours long and less than 200 MB in size) in one of the formats and codecs supported by the batch transcription API. For more information about supported audio formats, see [Supported audio formats](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/batch-transcription-audio-data?tabs=portal#supported-audio-formats-and-codecs) +1. For additional explanation, see [fast transcription documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/fast-transcription-create) + +## Install other dependencies + +The sample uses the `requests` library. You can install it with the command + +```bash +pip install requests +``` + +## Run the sample code + +The sample code itself is [main.py](python_client/main.py) and can be run using Python 3.8 or higher. + +```shell +python3 main.py +``` +You will need to adapt the following information to run the sample: + +1. Your AI Services subscription key and region. + + * `YourSubscriptionKey`: replace with your subscription key. + * `YourServiceRegion`: replace with the [region](https://aka.ms/csspeech/region) your subscription is associated with. + For example, `westus` or `northeurope`. + * `YourAudio_FilePath`: The file path for your audio file with speech to be transcribed. See for example a file named `test_call_audio.wav` located in same folder + + Some notes: + - You can get the subscription key from the "Keys and Endpoint" tab on your Speech resource in the Azure Portal. + +You can use a development environment like Visual Studio Code to edit, debug, and execute the sample. + diff --git a/samples/fast-transcription/python_client/test_call_audio.wav b/samples/fast-transcription/python_client/test_call_audio.wav new file mode 100644 index 000000000..24b513342 Binary files /dev/null and b/samples/fast-transcription/python_client/test_call_audio.wav differ diff --git a/samples/fast-transcription/python_client/transcription_response.json b/samples/fast-transcription/python_client/transcription_response.json new file mode 100644 index 000000000..817c81edf --- /dev/null +++ b/samples/fast-transcription/python_client/transcription_response.json @@ -0,0 +1,2057 @@ +{ + "duration": 120000, + "combinedPhrases": [ + { + "text": "I think we got disconnected. Good. How are you? I have to say my little spill again. Just a heads up, Dimitri, to help improve the quality of our products, services and training, this call may be recorded and monitored. Information may be accessed internationally by Microsoft, our service provider, subsidiaries and affiliates. Is it OK if we continue? OK, fantastic. OK. So I I was talking. I think I, I, I, I kept rambling. And then I didn't hear you. So I guess we got disconnected there. And so just wanted to kind of follow up. So you're kind of comparing Shopify versus Azure. It looks like you're currently using an Azure trial. I guess as far as like questions, were you kind of. I know you kind of talked about price. I can definitely go over like pricing with you. If you're comparing apples to apples, I I saw that Spotify, you know obviously they're you know it's a SaaS platform. So if you want to run on that looks like their plans are as low as 29 bucks a month. What I was just saying is Microsoft has a little more flexibility. You kind of decide as far as you know our different options and then if your company scales and your e-commerce site goes goes big. You know, it's it's the fortunate thing about Microsoft is it's Microsoft, it's Azure. And then we have our Office 365 suite for you too for users, one share of company scale. So I didn't mean to kind of put the car before the horse, but that's kind of what I was saying. I'm talking to myself when we got cut off. But how can I kind of kind of help you out at this point, Dimitri? Well, I was wondering if there is a service that Microsoft offers where they can just look at my application and just, you know, tune it in or optimize it for the best performance. Sure. We do have a support team. Yeah, that's our engineering team on the back end. It's an option." + } + ], + "phrases": [ + { + "offset": 240, + "duration": 1200, + "text": "I think we got disconnected.", + "words": [ + { + "text": "I", + "offset": 240, + "duration": 40 + }, + { + "text": "think", + "offset": 280, + "duration": 160 + }, + { + "text": "we", + "offset": 440, + "duration": 80 + }, + { + "text": "got", + "offset": 520, + "duration": 200 + }, + { + "text": "disconnected.", + "offset": 720, + "duration": 720 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 2000, + "duration": 200, + "text": "Good.", + "words": [ + { + "text": "Good.", + "offset": 2000, + "duration": 200 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 2200, + "duration": 440, + "text": "How are you?", + "words": [ + { + "text": "How", + "offset": 2200, + "duration": 80 + }, + { + "text": "are", + "offset": 2280, + "duration": 200 + }, + { + "text": "you?", + "offset": 2480, + "duration": 160 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 7760, + "duration": 1840, + "text": "I have to say my little spill again.", + "words": [ + { + "text": "I", + "offset": 7760, + "duration": 120 + }, + { + "text": "have", + "offset": 7880, + "duration": 200 + }, + { + "text": "to", + "offset": 8080, + "duration": 240 + }, + { + "text": "say", + "offset": 8480, + "duration": 160 + }, + { + "text": "my", + "offset": 8640, + "duration": 120 + }, + { + "text": "little", + "offset": 8760, + "duration": 200 + }, + { + "text": "spill", + "offset": 8960, + "duration": 320 + }, + { + "text": "again.", + "offset": 9280, + "duration": 320 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 9680, + "duration": 5640, + "text": "Just a heads up, Dimitri, to help improve the quality of our products, services and training, this call may be recorded and monitored.", + "words": [ + { + "text": "Just", + "offset": 9680, + "duration": 240 + }, + { + "text": "a", + "offset": 9920, + "duration": 40 + }, + { + "text": "heads", + "offset": 9960, + "duration": 280 + }, + { + "text": "up,", + "offset": 10240, + "duration": 120 + }, + { + "text": "Dimitri,", + "offset": 10360, + "duration": 320 + }, + { + "text": "to", + "offset": 10680, + "duration": 80 + }, + { + "text": "help", + "offset": 10760, + "duration": 160 + }, + { + "text": "improve", + "offset": 10920, + "duration": 240 + }, + { + "text": "the", + "offset": 11160, + "duration": 80 + }, + { + "text": "quality", + "offset": 11240, + "duration": 440 + }, + { + "text": "of", + "offset": 11680, + "duration": 80 + }, + { + "text": "our", + "offset": 11760, + "duration": 80 + }, + { + "text": "products,", + "offset": 11840, + "duration": 360 + }, + { + "text": "services", + "offset": 12200, + "duration": 400 + }, + { + "text": "and", + "offset": 12600, + "duration": 160 + }, + { + "text": "training,", + "offset": 12760, + "duration": 280 + }, + { + "text": "this", + "offset": 13040, + "duration": 120 + }, + { + "text": "call", + "offset": 13160, + "duration": 360 + }, + { + "text": "may", + "offset": 13600, + "duration": 160 + }, + { + "text": "be", + "offset": 13760, + "duration": 80 + }, + { + "text": "recorded", + "offset": 13840, + "duration": 560 + }, + { + "text": "and", + "offset": 14760, + "duration": 120 + }, + { + "text": "monitored.", + "offset": 14880, + "duration": 440 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 15320, + "duration": 4960, + "text": "Information may be accessed internationally by Microsoft, our service provider, subsidiaries and affiliates.", + "words": [ + { + "text": "Information", + "offset": 15320, + "duration": 520 + }, + { + "text": "may", + "offset": 15840, + "duration": 80 + }, + { + "text": "be", + "offset": 15920, + "duration": 160 + }, + { + "text": "accessed", + "offset": 16080, + "duration": 280 + }, + { + "text": "internationally", + "offset": 16360, + "duration": 640 + }, + { + "text": "by", + "offset": 17000, + "duration": 80 + }, + { + "text": "Microsoft,", + "offset": 17080, + "duration": 680 + }, + { + "text": "our", + "offset": 18240, + "duration": 240 + }, + { + "text": "service", + "offset": 18480, + "duration": 360 + }, + { + "text": "provider,", + "offset": 18840, + "duration": 440 + }, + { + "text": "subsidiaries", + "offset": 19280, + "duration": 440 + }, + { + "text": "and", + "offset": 19720, + "duration": 120 + }, + { + "text": "affiliates.", + "offset": 19840, + "duration": 440 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 20320, + "duration": 1120, + "text": "Is it OK if we continue?", + "words": [ + { + "text": "Is", + "offset": 20320, + "duration": 120 + }, + { + "text": "it", + "offset": 20440, + "duration": 120 + }, + { + "text": "OK", + "offset": 20560, + "duration": 240 + }, + { + "text": "if", + "offset": 20800, + "duration": 80 + }, + { + "text": "we", + "offset": 20880, + "duration": 80 + }, + { + "text": "continue?", + "offset": 20960, + "duration": 480 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 23440, + "duration": 1080, + "text": "OK, fantastic.", + "words": [ + { + "text": "OK,", + "offset": 23440, + "duration": 200 + }, + { + "text": "fantastic.", + "offset": 23920, + "duration": 600 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 24520, + "duration": 160, + "text": "OK.", + "words": [ + { + "text": "OK.", + "offset": 24520, + "duration": 160 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 25120, + "duration": 2080, + "text": "So I I was talking.", + "words": [ + { + "text": "So", + "offset": 25120, + "duration": 480 + }, + { + "text": "I", + "offset": 25920, + "duration": 400 + }, + { + "text": "I", + "offset": 26480, + "duration": 80 + }, + { + "text": "was", + "offset": 26560, + "duration": 160 + }, + { + "text": "talking.", + "offset": 26720, + "duration": 480 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 27200, + "duration": 2400, + "text": "I think I, I, I, I kept rambling.", + "words": [ + { + "text": "I", + "offset": 27200, + "duration": 80 + }, + { + "text": "think", + "offset": 27280, + "duration": 200 + }, + { + "text": "I,", + "offset": 27600, + "duration": 80 + }, + { + "text": "I,", + "offset": 27760, + "duration": 400 + }, + { + "text": "I,", + "offset": 28400, + "duration": 80 + }, + { + "text": "I", + "offset": 28560, + "duration": 160 + }, + { + "text": "kept", + "offset": 28720, + "duration": 320 + }, + { + "text": "rambling.", + "offset": 29040, + "duration": 560 + } + ], + "locale": "en-US", + "confidence": 0.92867047 + }, + { + "offset": 30000, + "duration": 880, + "text": "And then I didn't hear you.", + "words": [ + { + "text": "And", + "offset": 30000, + "duration": 120 + }, + { + "text": "then", + "offset": 30120, + "duration": 120 + }, + { + "text": "I", + "offset": 30240, + "duration": 40 + }, + { + "text": "didn't", + "offset": 30280, + "duration": 240 + }, + { + "text": "hear", + "offset": 30520, + "duration": 200 + }, + { + "text": "you.", + "offset": 30720, + "duration": 160 + } + ], + "locale": "en-US", + "confidence": 0.9149788 + }, + { + "offset": 30880, + "duration": 2480, + "text": "So I guess we got disconnected there.", + "words": [ + { + "text": "So", + "offset": 30880, + "duration": 400 + }, + { + "text": "I", + "offset": 31840, + "duration": 80 + }, + { + "text": "guess", + "offset": 31920, + "duration": 240 + }, + { + "text": "we", + "offset": 32160, + "duration": 80 + }, + { + "text": "got", + "offset": 32240, + "duration": 200 + }, + { + "text": "disconnected", + "offset": 32440, + "duration": 600 + }, + { + "text": "there.", + "offset": 33040, + "duration": 320 + } + ], + "locale": "en-US", + "confidence": 0.9149788 + }, + { + "offset": 33920, + "duration": 2400, + "text": "And so just wanted to kind of follow up.", + "words": [ + { + "text": "And", + "offset": 33920, + "duration": 240 + }, + { + "text": "so", + "offset": 34160, + "duration": 400 + }, + { + "text": "just", + "offset": 34880, + "duration": 240 + }, + { + "text": "wanted", + "offset": 35120, + "duration": 280 + }, + { + "text": "to", + "offset": 35400, + "duration": 80 + }, + { + "text": "kind", + "offset": 35480, + "duration": 200 + }, + { + "text": "of", + "offset": 35680, + "duration": 80 + }, + { + "text": "follow", + "offset": 35760, + "duration": 400 + }, + { + "text": "up.", + "offset": 36160, + "duration": 160 + } + ], + "locale": "en-US", + "confidence": 0.9149788 + }, + { + "offset": 36720, + "duration": 3960, + "text": "So you're kind of comparing Shopify versus Azure.", + "words": [ + { + "text": "So", + "offset": 36720, + "duration": 400 + }, + { + "text": "you're", + "offset": 37120, + "duration": 160 + }, + { + "text": "kind", + "offset": 37280, + "duration": 240 + }, + { + "text": "of", + "offset": 37520, + "duration": 80 + }, + { + "text": "comparing", + "offset": 37600, + "duration": 560 + }, + { + "text": "Shopify", + "offset": 39040, + "duration": 720 + }, + { + "text": "versus", + "offset": 39760, + "duration": 480 + }, + { + "text": "Azure.", + "offset": 40240, + "duration": 440 + } + ], + "locale": "en-US", + "confidence": 0.9149788 + }, + { + "offset": 40680, + "duration": 3000, + "text": "It looks like you're currently using an Azure trial.", + "words": [ + { + "text": "It", + "offset": 40680, + "duration": 80 + }, + { + "text": "looks", + "offset": 40760, + "duration": 200 + }, + { + "text": "like", + "offset": 40960, + "duration": 160 + }, + { + "text": "you're", + "offset": 41120, + "duration": 320 + }, + { + "text": "currently", + "offset": 41440, + "duration": 560 + }, + { + "text": "using", + "offset": 42320, + "duration": 280 + }, + { + "text": "an", + "offset": 42600, + "duration": 120 + }, + { + "text": "Azure", + "offset": 42720, + "duration": 320 + }, + { + "text": "trial.", + "offset": 43040, + "duration": 640 + } + ], + "locale": "en-US", + "confidence": 0.9149788 + }, + { + "offset": 45360, + "duration": 2720, + "text": "I guess as far as like questions, were you kind of.", + "words": [ + { + "text": "I", + "offset": 45360, + "duration": 480 + }, + { + "text": "guess", + "offset": 45840, + "duration": 240 + }, + { + "text": "as", + "offset": 46240, + "duration": 80 + }, + { + "text": "far", + "offset": 46320, + "duration": 280 + }, + { + "text": "as", + "offset": 46600, + "duration": 80 + }, + { + "text": "like", + "offset": 46680, + "duration": 160 + }, + { + "text": "questions,", + "offset": 46840, + "duration": 640 + }, + { + "text": "were", + "offset": 47480, + "duration": 200 + }, + { + "text": "you", + "offset": 47680, + "duration": 80 + }, + { + "text": "kind", + "offset": 47760, + "duration": 200 + }, + { + "text": "of.", + "offset": 47960, + "duration": 120 + } + ], + "locale": "en-US", + "confidence": 0.9149788 + }, + { + "offset": 49800, + "duration": 1680, + "text": "I know you kind of talked about price.", + "words": [ + { + "text": "I", + "offset": 49800, + "duration": 40 + }, + { + "text": "know", + "offset": 50080, + "duration": 80 + }, + { + "text": "you", + "offset": 50160, + "duration": 120 + }, + { + "text": "kind", + "offset": 50280, + "duration": 200 + }, + { + "text": "of", + "offset": 50480, + "duration": 80 + }, + { + "text": "talked", + "offset": 50560, + "duration": 440 + }, + { + "text": "about", + "offset": 51000, + "duration": 280 + }, + { + "text": "price.", + "offset": 51280, + "duration": 200 + } + ], + "locale": "en-US", + "confidence": 0.9328801 + }, + { + "offset": 51480, + "duration": 1760, + "text": "I can definitely go over like pricing with you.", + "words": [ + { + "text": "I", + "offset": 51480, + "duration": 40 + }, + { + "text": "can", + "offset": 51520, + "duration": 120 + }, + { + "text": "definitely", + "offset": 51640, + "duration": 360 + }, + { + "text": "go", + "offset": 52000, + "duration": 120 + }, + { + "text": "over", + "offset": 52120, + "duration": 200 + }, + { + "text": "like", + "offset": 52320, + "duration": 160 + }, + { + "text": "pricing", + "offset": 52480, + "duration": 480 + }, + { + "text": "with", + "offset": 52960, + "duration": 160 + }, + { + "text": "you.", + "offset": 53120, + "duration": 120 + } + ], + "locale": "en-US", + "confidence": 0.9328801 + }, + { + "offset": 53800, + "duration": 6840, + "text": "If you're comparing apples to apples, I I saw that Spotify, you know obviously they're you know it's a SaaS platform.", + "words": [ + { + "text": "If", + "offset": 53800, + "duration": 80 + }, + { + "text": "you're", + "offset": 53880, + "duration": 120 + }, + { + "text": "comparing", + "offset": 54000, + "duration": 560 + }, + { + "text": "apples", + "offset": 54560, + "duration": 440 + }, + { + "text": "to", + "offset": 55000, + "duration": 80 + }, + { + "text": "apples,", + "offset": 55080, + "duration": 520 + }, + { + "text": "I", + "offset": 55600, + "duration": 40 + }, + { + "text": "I", + "offset": 55640, + "duration": 280 + }, + { + "text": "saw", + "offset": 55920, + "duration": 240 + }, + { + "text": "that", + "offset": 56160, + "duration": 160 + }, + { + "text": "Spotify,", + "offset": 56320, + "duration": 560 + }, + { + "text": "you", + "offset": 57760, + "duration": 80 + }, + { + "text": "know", + "offset": 57840, + "duration": 160 + }, + { + "text": "obviously", + "offset": 58080, + "duration": 360 + }, + { + "text": "they're", + "offset": 58440, + "duration": 280 + }, + { + "text": "you", + "offset": 59040, + "duration": 80 + }, + { + "text": "know", + "offset": 59120, + "duration": 160 + }, + { + "text": "it's", + "offset": 59360, + "duration": 240 + }, + { + "text": "a", + "offset": 59600, + "duration": 40 + }, + { + "text": "SaaS", + "offset": 59640, + "duration": 360 + }, + { + "text": "platform.", + "offset": 60000, + "duration": 640 + } + ], + "locale": "en-US", + "confidence": 0.9328801 + }, + { + "offset": 60640, + "duration": 6600, + "text": "So if you want to run on that looks like their plans are as low as 29 bucks a month.", + "words": [ + { + "text": "So", + "offset": 60640, + "duration": 320 + }, + { + "text": "if", + "offset": 61680, + "duration": 320 + }, + { + "text": "you", + "offset": 62000, + "duration": 160 + }, + { + "text": "want", + "offset": 62800, + "duration": 400 + }, + { + "text": "to", + "offset": 63280, + "duration": 80 + }, + { + "text": "run", + "offset": 64240, + "duration": 320 + }, + { + "text": "on", + "offset": 64560, + "duration": 80 + }, + { + "text": "that", + "offset": 64640, + "duration": 320 + }, + { + "text": "looks", + "offset": 64960, + "duration": 160 + }, + { + "text": "like", + "offset": 65120, + "duration": 160 + }, + { + "text": "their", + "offset": 65280, + "duration": 240 + }, + { + "text": "plans", + "offset": 65520, + "duration": 360 + }, + { + "text": "are", + "offset": 65880, + "duration": 80 + }, + { + "text": "as", + "offset": 65960, + "duration": 120 + }, + { + "text": "low", + "offset": 66080, + "duration": 80 + }, + { + "text": "as", + "offset": 66160, + "duration": 80 + }, + { + "text": "29", + "offset": 66240, + "duration": 400 + }, + { + "text": "bucks", + "offset": 66640, + "duration": 280 + }, + { + "text": "a", + "offset": 66920, + "duration": 40 + }, + { + "text": "month.", + "offset": 66960, + "duration": 280 + } + ], + "locale": "en-US", + "confidence": 0.9328801 + }, + { + "offset": 67800, + "duration": 2840, + "text": "What I was just saying is Microsoft has a little more flexibility.", + "words": [ + { + "text": "What", + "offset": 67800, + "duration": 200 + }, + { + "text": "I", + "offset": 68000, + "duration": 40 + }, + { + "text": "was", + "offset": 68040, + "duration": 120 + }, + { + "text": "just", + "offset": 68160, + "duration": 200 + }, + { + "text": "saying", + "offset": 68360, + "duration": 200 + }, + { + "text": "is", + "offset": 68560, + "duration": 160 + }, + { + "text": "Microsoft", + "offset": 68720, + "duration": 440 + }, + { + "text": "has", + "offset": 69160, + "duration": 120 + }, + { + "text": "a", + "offset": 69280, + "duration": 40 + }, + { + "text": "little", + "offset": 69320, + "duration": 280 + }, + { + "text": "more", + "offset": 69600, + "duration": 160 + }, + { + "text": "flexibility.", + "offset": 69760, + "duration": 880 + } + ], + "locale": "en-US", + "confidence": 0.9328801 + }, + { + "offset": 71120, + "duration": 7120, + "text": "You kind of decide as far as you know our different options and then if your company scales and your e-commerce site goes goes big.", + "words": [ + { + "text": "You", + "offset": 71120, + "duration": 80 + }, + { + "text": "kind", + "offset": 71200, + "duration": 360 + }, + { + "text": "of", + "offset": 71560, + "duration": 80 + }, + { + "text": "decide", + "offset": 71640, + "duration": 520 + }, + { + "text": "as", + "offset": 72160, + "duration": 80 + }, + { + "text": "far", + "offset": 72240, + "duration": 360 + }, + { + "text": "as", + "offset": 72600, + "duration": 120 + }, + { + "text": "you", + "offset": 73040, + "duration": 80 + }, + { + "text": "know", + "offset": 73120, + "duration": 120 + }, + { + "text": "our", + "offset": 73280, + "duration": 160 + }, + { + "text": "different", + "offset": 73440, + "duration": 320 + }, + { + "text": "options", + "offset": 73760, + "duration": 360 + }, + { + "text": "and", + "offset": 74120, + "duration": 120 + }, + { + "text": "then", + "offset": 74520, + "duration": 200 + }, + { + "text": "if", + "offset": 74720, + "duration": 240 + }, + { + "text": "your", + "offset": 74960, + "duration": 160 + }, + { + "text": "company", + "offset": 75120, + "duration": 400 + }, + { + "text": "scales", + "offset": 75520, + "duration": 560 + }, + { + "text": "and", + "offset": 76080, + "duration": 120 + }, + { + "text": "your", + "offset": 76200, + "duration": 120 + }, + { + "text": "e-commerce", + "offset": 76320, + "duration": 520 + }, + { + "text": "site", + "offset": 76840, + "duration": 240 + }, + { + "text": "goes", + "offset": 77080, + "duration": 440 + }, + { + "text": "goes", + "offset": 77680, + "duration": 240 + }, + { + "text": "big.", + "offset": 77920, + "duration": 320 + } + ], + "locale": "en-US", + "confidence": 0.9328801 + }, + { + "offset": 78640, + "duration": 4720, + "text": "You know, it's it's the fortunate thing about Microsoft is it's Microsoft, it's Azure.", + "words": [ + { + "text": "You", + "offset": 78640, + "duration": 80 + }, + { + "text": "know,", + "offset": 78720, + "duration": 240 + }, + { + "text": "it's", + "offset": 78960, + "duration": 280 + }, + { + "text": "it's", + "offset": 79280, + "duration": 320 + }, + { + "text": "the", + "offset": 79840, + "duration": 160 + }, + { + "text": "fortunate", + "offset": 80000, + "duration": 480 + }, + { + "text": "thing", + "offset": 80480, + "duration": 120 + }, + { + "text": "about", + "offset": 80600, + "duration": 200 + }, + { + "text": "Microsoft", + "offset": 80800, + "duration": 600 + }, + { + "text": "is", + "offset": 81400, + "duration": 280 + }, + { + "text": "it's", + "offset": 81920, + "duration": 200 + }, + { + "text": "Microsoft,", + "offset": 82120, + "duration": 640 + }, + { + "text": "it's", + "offset": 82760, + "duration": 120 + }, + { + "text": "Azure.", + "offset": 82880, + "duration": 480 + } + ], + "locale": "en-US", + "confidence": 0.9304696 + }, + { + "offset": 83760, + "duration": 4240, + "text": "And then we have our Office 365 suite for you too for users, one share of company scale.", + "words": [ + { + "text": "And", + "offset": 83760, + "duration": 160 + }, + { + "text": "then", + "offset": 83920, + "duration": 120 + }, + { + "text": "we", + "offset": 84040, + "duration": 120 + }, + { + "text": "have", + "offset": 84160, + "duration": 200 + }, + { + "text": "our", + "offset": 84360, + "duration": 120 + }, + { + "text": "Office", + "offset": 84480, + "duration": 320 + }, + { + "text": "365", + "offset": 84800, + "duration": 720 + }, + { + "text": "suite", + "offset": 85520, + "duration": 280 + }, + { + "text": "for", + "offset": 85800, + "duration": 200 + }, + { + "text": "you", + "offset": 86000, + "duration": 80 + }, + { + "text": "too", + "offset": 86080, + "duration": 160 + }, + { + "text": "for", + "offset": 86240, + "duration": 160 + }, + { + "text": "users,", + "offset": 86400, + "duration": 400 + }, + { + "text": "one", + "offset": 86800, + "duration": 200 + }, + { + "text": "share", + "offset": 87000, + "duration": 200 + }, + { + "text": "of", + "offset": 87200, + "duration": 80 + }, + { + "text": "company", + "offset": 87280, + "duration": 320 + }, + { + "text": "scale.", + "offset": 87600, + "duration": 400 + } + ], + "locale": "en-US", + "confidence": 0.9304696 + }, + { + "offset": 88000, + "duration": 3520, + "text": "So I didn't mean to kind of put the car before the horse, but that's kind of what I was saying.", + "words": [ + { + "text": "So", + "offset": 88000, + "duration": 240 + }, + { + "text": "I", + "offset": 88560, + "duration": 40 + }, + { + "text": "didn't", + "offset": 88600, + "duration": 320 + }, + { + "text": "mean", + "offset": 88920, + "duration": 120 + }, + { + "text": "to", + "offset": 89040, + "duration": 80 + }, + { + "text": "kind", + "offset": 89120, + "duration": 160 + }, + { + "text": "of", + "offset": 89280, + "duration": 80 + }, + { + "text": "put", + "offset": 89360, + "duration": 120 + }, + { + "text": "the", + "offset": 89480, + "duration": 80 + }, + { + "text": "car", + "offset": 89560, + "duration": 120 + }, + { + "text": "before", + "offset": 89680, + "duration": 280 + }, + { + "text": "the", + "offset": 89960, + "duration": 80 + }, + { + "text": "horse,", + "offset": 90040, + "duration": 240 + }, + { + "text": "but", + "offset": 90280, + "duration": 160 + }, + { + "text": "that's", + "offset": 90440, + "duration": 200 + }, + { + "text": "kind", + "offset": 90640, + "duration": 200 + }, + { + "text": "of", + "offset": 90840, + "duration": 80 + }, + { + "text": "what", + "offset": 90920, + "duration": 160 + }, + { + "text": "I", + "offset": 91080, + "duration": 40 + }, + { + "text": "was", + "offset": 91120, + "duration": 120 + }, + { + "text": "saying.", + "offset": 91240, + "duration": 280 + } + ], + "locale": "en-US", + "confidence": 0.9304696 + }, + { + "offset": 91520, + "duration": 2800, + "text": "I'm talking to myself when we got cut off.", + "words": [ + { + "text": "I'm", + "offset": 91520, + "duration": 80 + }, + { + "text": "talking", + "offset": 92400, + "duration": 240 + }, + { + "text": "to", + "offset": 92640, + "duration": 160 + }, + { + "text": "myself", + "offset": 92800, + "duration": 520 + }, + { + "text": "when", + "offset": 93320, + "duration": 160 + }, + { + "text": "we", + "offset": 93480, + "duration": 120 + }, + { + "text": "got", + "offset": 93600, + "duration": 160 + }, + { + "text": "cut", + "offset": 93760, + "duration": 320 + }, + { + "text": "off.", + "offset": 94080, + "duration": 240 + } + ], + "locale": "en-US", + "confidence": 0.9304696 + }, + { + "offset": 94320, + "duration": 4880, + "text": "But how can I kind of kind of help you out at this point, Dimitri?", + "words": [ + { + "text": "But", + "offset": 94320, + "duration": 320 + }, + { + "text": "how", + "offset": 95600, + "duration": 200 + }, + { + "text": "can", + "offset": 95800, + "duration": 200 + }, + { + "text": "I", + "offset": 96000, + "duration": 160 + }, + { + "text": "kind", + "offset": 96160, + "duration": 360 + }, + { + "text": "of", + "offset": 96520, + "duration": 200 + }, + { + "text": "kind", + "offset": 97280, + "duration": 160 + }, + { + "text": "of", + "offset": 97440, + "duration": 80 + }, + { + "text": "help", + "offset": 97520, + "duration": 240 + }, + { + "text": "you", + "offset": 97760, + "duration": 80 + }, + { + "text": "out", + "offset": 97920, + "duration": 120 + }, + { + "text": "at", + "offset": 98040, + "duration": 80 + }, + { + "text": "this", + "offset": 98120, + "duration": 200 + }, + { + "text": "point,", + "offset": 98320, + "duration": 320 + }, + { + "text": "Dimitri?", + "offset": 98640, + "duration": 560 + } + ], + "locale": "en-US", + "confidence": 0.9304696 + }, + { + "offset": 100320, + "duration": 12560, + "text": "Well, I was wondering if there is a service that Microsoft offers where they can just look at my application and just, you know, tune it in or optimize it for the best performance.", + "words": [ + { + "text": "Well,", + "offset": 100320, + "duration": 240 + }, + { + "text": "I", + "offset": 100640, + "duration": 80 + }, + { + "text": "was", + "offset": 100720, + "duration": 120 + }, + { + "text": "wondering", + "offset": 100840, + "duration": 840 + }, + { + "text": "if", + "offset": 101680, + "duration": 320 + }, + { + "text": "there", + "offset": 102000, + "duration": 200 + }, + { + "text": "is", + "offset": 102200, + "duration": 200 + }, + { + "text": "a", + "offset": 102520, + "duration": 40 + }, + { + "text": "service", + "offset": 102560, + "duration": 480 + }, + { + "text": "that", + "offset": 103040, + "duration": 240 + }, + { + "text": "Microsoft", + "offset": 103440, + "duration": 640 + }, + { + "text": "offers", + "offset": 104080, + "duration": 440 + }, + { + "text": "where", + "offset": 104520, + "duration": 440 + }, + { + "text": "they", + "offset": 104960, + "duration": 240 + }, + { + "text": "can", + "offset": 105360, + "duration": 160 + }, + { + "text": "just", + "offset": 105520, + "duration": 280 + }, + { + "text": "look", + "offset": 105800, + "duration": 200 + }, + { + "text": "at", + "offset": 106000, + "duration": 80 + }, + { + "text": "my", + "offset": 106080, + "duration": 160 + }, + { + "text": "application", + "offset": 106240, + "duration": 800 + }, + { + "text": "and", + "offset": 107040, + "duration": 200 + }, + { + "text": "just,", + "offset": 107240, + "duration": 640 + }, + { + "text": "you", + "offset": 108560, + "duration": 120 + }, + { + "text": "know,", + "offset": 108680, + "duration": 200 + }, + { + "text": "tune", + "offset": 109360, + "duration": 320 + }, + { + "text": "it", + "offset": 109680, + "duration": 160 + }, + { + "text": "in", + "offset": 109840, + "duration": 160 + }, + { + "text": "or", + "offset": 110000, + "duration": 240 + }, + { + "text": "optimize", + "offset": 110240, + "duration": 560 + }, + { + "text": "it", + "offset": 110800, + "duration": 120 + }, + { + "text": "for", + "offset": 111400, + "duration": 120 + }, + { + "text": "the", + "offset": 111520, + "duration": 120 + }, + { + "text": "best", + "offset": 111640, + "duration": 320 + }, + { + "text": "performance.", + "offset": 112200, + "duration": 680 + } + ], + "locale": "en-US", + "confidence": 0.9264704 + }, + { + "offset": 113200, + "duration": 480, + "text": "Sure.", + "words": [ + { + "text": "Sure.", + "offset": 113200, + "duration": 480 + } + ], + "locale": "en-US", + "confidence": 0.9264704 + }, + { + "offset": 114440, + "duration": 1400, + "text": "We do have a support team.", + "words": [ + { + "text": "We", + "offset": 114440, + "duration": 80 + }, + { + "text": "do", + "offset": 114520, + "duration": 120 + }, + { + "text": "have", + "offset": 114640, + "duration": 240 + }, + { + "text": "a", + "offset": 114880, + "duration": 80 + }, + { + "text": "support", + "offset": 114960, + "duration": 440 + }, + { + "text": "team.", + "offset": 115400, + "duration": 440 + } + ], + "locale": "en-US", + "confidence": 0.9264704 + }, + { + "offset": 116080, + "duration": 2880, + "text": "Yeah, that's our engineering team on the back end.", + "words": [ + { + "text": "Yeah,", + "offset": 116080, + "duration": 240 + }, + { + "text": "that's", + "offset": 116960, + "duration": 200 + }, + { + "text": "our", + "offset": 117160, + "duration": 120 + }, + { + "text": "engineering", + "offset": 117280, + "duration": 640 + }, + { + "text": "team", + "offset": 117920, + "duration": 200 + }, + { + "text": "on", + "offset": 118120, + "duration": 120 + }, + { + "text": "the", + "offset": 118240, + "duration": 80 + }, + { + "text": "back", + "offset": 118320, + "duration": 400 + }, + { + "text": "end.", + "offset": 118720, + "duration": 240 + } + ], + "locale": "en-US", + "confidence": 0.9264704 + }, + { + "offset": 119120, + "duration": 800, + "text": "It's an option.", + "words": [ + { + "text": "It's", + "offset": 119120, + "duration": 200 + }, + { + "text": "an", + "offset": 119320, + "duration": 120 + }, + { + "text": "option.", + "offset": 119440, + "duration": 480 + } + ], + "locale": "en-US", + "confidence": 0.9264704 + } + ] +} \ No newline at end of file