Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
Restructured code, replaced JSON definitions with DEFINEs, and improved flow.
  • Loading branch information
tomik395 authored Sep 29, 2023
1 parent c8b736c commit 9042565
Show file tree
Hide file tree
Showing 15 changed files with 585 additions and 631 deletions.
24 changes: 24 additions & 0 deletions ESP32-AI.ino
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#include "setup.h"

void setup() {
Serial.begin(115200);
delay(1000);
Serial.println("---------- Starting ------------ ");
if(!start()){
return;
}
i2s_mic_install(); //Install I2S microphone driver
createWavAudio(samplesRecordedFilePath, SAMPLE_RATE*SECONDS);
WavToBase64inSDcard(samplesRecordedFilePath, samplesRecordedFilePath_base64);
String transcription = speechToText(); //transcribe audio to text
String answer = getAnswer(transcription);
i2s_driver_uninstall(I2S_NUM_0);
i2s_speaker_install(); //Install I2S Speaker Driver
textToSpeech(answer); //play the answer
Base64ToWavinSDcard(samplesGoogleFilePath_base64, samplesGoogleFilePath);
playAudioSamples(samplesGoogleFilePath);
}

void loop() {
// not used
}
186 changes: 186 additions & 0 deletions Google_Wrapper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
#include "Google_Wrapper.h"
#include "global.h"
#include "config.h"

void textToSpeech(String text) {
Serial.print("Getting Text to Speech........ ");

WiFiClientSecure client;
client.setInsecure();
HTTPClient httpClient;
httpClient.begin(client, "https://texttospeech.googleapis.com/v1/text:synthesize?key=" + String(APIKEYGOOGLE));

DynamicJsonDocument doc(1024);
doc["input"]["text"] = text;
doc["voice"]["languageCode"] = "en-US";
doc["voice"]["name"] = "en-US-Wavenet-A";
doc["audioConfig"]["audioEncoding"] = "LINEAR16";
doc["audioConfig"]["sampleRateHertz"] = SAMPLE_RATE;
String requestBody;
serializeJson(doc, requestBody);

httpClient.addHeader("Content-Type", "application/json");
int httpCode = httpClient.POST(requestBody);
int contentLength = httpClient.getSize();

// Save the JSON response to a file on the SD card in chunks
File jsonResponseFile = SD.open(jsonResponseFilePath, FILE_WRITE);

const size_t chunkSize = 1024;
uint8_t buffer[chunkSize];
size_t bytesReceived;
while (httpClient.connected()) {
bytesReceived = httpClient.getStream().readBytesUntil('\0', buffer, chunkSize);
if (bytesReceived == 0) {
break;
}
jsonResponseFile.write(buffer, bytesReceived);
}

httpClient.end();

Serial.print("Status Code ");
Serial.println(httpCode);

// Find the position of the audioContent field in the JSON response
int audioContentPos = jsonResponseFile.find("\"audioContent\":");
if (audioContentPos == -1) {
Serial.println("Failed to find audioContent field in the JSON response");
jsonResponseFile.close();
return;
}

// Close and reopen as read, skip to audio content...
jsonResponseFile.close();
File jsonResponseFileRead = SD.open(jsonResponseFilePath, FILE_READ);
jsonResponseFileRead.seek(audioContentPos + 17);

//write base64 into sdcard
File audioSamplesFile_base64 = SD.open(samplesGoogleFilePath_base64, FILE_WRITE);
while (jsonResponseFileRead.available()) {
audioSamplesFile_base64.write(jsonResponseFileRead.read()); // Read one character from the file and write to the new file
}
jsonResponseFileRead.close();
audioSamplesFile_base64.close();

//Delete the JSON response file
//SD.remove(jsonResponseFilePath);
}

String speechToText(){
//USAGE: This function only has support for sending audio content from memory only.
// In the future, an additional function with support for sending from a Google Storage URI will be added.
Serial.print("Getting Speech to Text........ ");
WiFiClientSecure client;
client.setInsecure();
HTTPClient httpClient;
httpClient.begin(client, "https://speech.googleapis.com/v1p1beta1/speech:recognize?key=" + String(APIKEYGOOGLE));

File contentBase64 = SD.open(samplesRecordedFilePath_base64, FILE_READ);
Serial.println("PAYLOAD STATISTICS:");
Serial.printf("File Size: %d Bytes\n", contentBase64.size());

int docSize = contentBase64.size() + 2048;
int availSpace = esp_get_free_heap_size();
Serial.print("Free heap size at start: ");
Serial.println(availSpace);

if (docSize > availSpace){
Serial.println("Payload requires more than available memory. Handle this.");
}

//construct parameter string for API call
String requestBody = "{\"config\":{\"encoding\":\"LINEAR16\",\"sampleRateHertz\":16000,\"languageCode\":\"en-US\"},\"audio\":{\"content\":\"";
const int BUFFER_SIZE = 1;
char buffer[BUFFER_SIZE];
String base64Str = "";
//read bytes of Base64 encoded recorded audio content from SD card
while (contentBase64.available()) {
int bytesRead = contentBase64.readBytes(buffer, BUFFER_SIZE);
base64Str += String(buffer).substring(0, bytesRead);
}
requestBody += base64Str;
requestBody += "\"}}";

httpClient.addHeader("Content-Type", "application/json");
httpClient.addHeader("Content-Length", String(requestBody.length()));
int httpCode = httpClient.POST(requestBody);
Serial.println(requestBody);
String responseString = httpClient.getString(); // get the whole response at once
DynamicJsonDocument respDoc(4096);
deserializeJson(respDoc, responseString);
Serial.println(responseString);

// Extract the content field
String content = respDoc["response"]["results"]["alternatives"]["transcript"];
int confidence = respDoc["response"]["results"]["alternatives"]["confidence"];

httpClient.end();
contentBase64.close();

Serial.print("Status Code ");
Serial.println(httpCode);

Serial.print("Transcripted Audio: ");
Serial.println(content);
Serial.print("Transcription Confidence: ");
Serial.println(confidence);

return content;
}

//Use Google refresh token to obtain your Oauth 2.0 token
String getOAuthToken(){
Serial.print("Requesting Auth 2.0 Token.....");

WiFiClientSecure client;
client.setInsecure();
HTTPClient httpClient;

httpClient.begin("https://oauth2.googleapis.com/token");
httpClient.addHeader("Content-Type", "application/x-www-form-urlencoded");
String requestBody = "grant_type=refresh_token&response_type=token&refresh_token=" + String(REFRESHTOKENGOOGLE) + "&client_secret=" + String(CLIENTSECRETGOOGLE) + "&client_id="+ String(CLIENTIDGOOGLE);
int httpCode = httpClient.POST(requestBody);

String responseString = httpClient.getString(); // get the whole response at once
DynamicJsonDocument respDoc(1024);
deserializeJson(respDoc, responseString); //deserialize into JSON

String accessToken = respDoc["access_token"]; //obtain oauth2.0 token
if (httpCode == 200 && accessToken.length() > 30){ Serial.println(" Success!"); }

httpClient.end();
return accessToken; //return token
}

//not finished, only required if audio we are sending to speech-to-text is over 10mb or 1 minute
void uploadToStorage(String token, String fileName){
Serial.print("Storing on Cloud.............. ");

WiFiClientSecure client;
client.setInsecure();
HTTPClient httpClient;
httpClient.begin(client, "https://storage.googleapis.com/upload/storage/v1/b/hara-esp32/o");
httpClient.addHeader("Authorization", "Bearer " + String(token)); // Your OAuth 2.0 token

DynamicJsonDocument doc(1024);
doc["documentId"] = "LINEAR16";
doc["title"] = fileName;
String requestBody;
serializeJson(doc, requestBody);

int httpCode = httpClient.POST(requestBody);

String responseString = httpClient.getString(); // get the whole response at once
DynamicJsonDocument respDoc(1024);
deserializeJson(respDoc, responseString);

Serial.print("Status Code ");
Serial.println(httpCode);
Serial.print("Response: ");
Serial.println(responseString);

httpClient.end();
}

// ... other function implementations
Loading

0 comments on commit 9042565

Please sign in to comment.