-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Restructured code, replaced JSON definitions with DEFINEs, and improved flow.
- Loading branch information
Showing
15 changed files
with
585 additions
and
631 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#include "setup.h" | ||
|
||
void setup() { | ||
Serial.begin(115200); | ||
delay(1000); | ||
Serial.println("---------- Starting ------------ "); | ||
if(!start()){ | ||
return; | ||
} | ||
i2s_mic_install(); //Install I2S microphone driver | ||
createWavAudio(samplesRecordedFilePath, SAMPLE_RATE*SECONDS); | ||
WavToBase64inSDcard(samplesRecordedFilePath, samplesRecordedFilePath_base64); | ||
String transcription = speechToText(); //transcribe audio to text | ||
String answer = getAnswer(transcription); | ||
i2s_driver_uninstall(I2S_NUM_0); | ||
i2s_speaker_install(); //Install I2S Speaker Driver | ||
textToSpeech(answer); //play the answer | ||
Base64ToWavinSDcard(samplesGoogleFilePath_base64, samplesGoogleFilePath); | ||
playAudioSamples(samplesGoogleFilePath); | ||
} | ||
|
||
void loop() { | ||
// not used | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
#include "Google_Wrapper.h" | ||
#include "global.h" | ||
#include "config.h" | ||
|
||
void textToSpeech(String text) { | ||
Serial.print("Getting Text to Speech........ "); | ||
|
||
WiFiClientSecure client; | ||
client.setInsecure(); | ||
HTTPClient httpClient; | ||
httpClient.begin(client, "https://texttospeech.googleapis.com/v1/text:synthesize?key=" + String(APIKEYGOOGLE)); | ||
|
||
DynamicJsonDocument doc(1024); | ||
doc["input"]["text"] = text; | ||
doc["voice"]["languageCode"] = "en-US"; | ||
doc["voice"]["name"] = "en-US-Wavenet-A"; | ||
doc["audioConfig"]["audioEncoding"] = "LINEAR16"; | ||
doc["audioConfig"]["sampleRateHertz"] = SAMPLE_RATE; | ||
String requestBody; | ||
serializeJson(doc, requestBody); | ||
|
||
httpClient.addHeader("Content-Type", "application/json"); | ||
int httpCode = httpClient.POST(requestBody); | ||
int contentLength = httpClient.getSize(); | ||
|
||
// Save the JSON response to a file on the SD card in chunks | ||
File jsonResponseFile = SD.open(jsonResponseFilePath, FILE_WRITE); | ||
|
||
const size_t chunkSize = 1024; | ||
uint8_t buffer[chunkSize]; | ||
size_t bytesReceived; | ||
while (httpClient.connected()) { | ||
bytesReceived = httpClient.getStream().readBytesUntil('\0', buffer, chunkSize); | ||
if (bytesReceived == 0) { | ||
break; | ||
} | ||
jsonResponseFile.write(buffer, bytesReceived); | ||
} | ||
|
||
httpClient.end(); | ||
|
||
Serial.print("Status Code "); | ||
Serial.println(httpCode); | ||
|
||
// Find the position of the audioContent field in the JSON response | ||
int audioContentPos = jsonResponseFile.find("\"audioContent\":"); | ||
if (audioContentPos == -1) { | ||
Serial.println("Failed to find audioContent field in the JSON response"); | ||
jsonResponseFile.close(); | ||
return; | ||
} | ||
|
||
// Close and reopen as read, skip to audio content... | ||
jsonResponseFile.close(); | ||
File jsonResponseFileRead = SD.open(jsonResponseFilePath, FILE_READ); | ||
jsonResponseFileRead.seek(audioContentPos + 17); | ||
|
||
//write base64 into sdcard | ||
File audioSamplesFile_base64 = SD.open(samplesGoogleFilePath_base64, FILE_WRITE); | ||
while (jsonResponseFileRead.available()) { | ||
audioSamplesFile_base64.write(jsonResponseFileRead.read()); // Read one character from the file and write to the new file | ||
} | ||
jsonResponseFileRead.close(); | ||
audioSamplesFile_base64.close(); | ||
|
||
//Delete the JSON response file | ||
//SD.remove(jsonResponseFilePath); | ||
} | ||
|
||
String speechToText(){ | ||
//USAGE: This function only has support for sending audio content from memory only. | ||
// In the future, an additional function with support for sending from a Google Storage URI will be added. | ||
Serial.print("Getting Speech to Text........ "); | ||
WiFiClientSecure client; | ||
client.setInsecure(); | ||
HTTPClient httpClient; | ||
httpClient.begin(client, "https://speech.googleapis.com/v1p1beta1/speech:recognize?key=" + String(APIKEYGOOGLE)); | ||
|
||
File contentBase64 = SD.open(samplesRecordedFilePath_base64, FILE_READ); | ||
Serial.println("PAYLOAD STATISTICS:"); | ||
Serial.printf("File Size: %d Bytes\n", contentBase64.size()); | ||
|
||
int docSize = contentBase64.size() + 2048; | ||
int availSpace = esp_get_free_heap_size(); | ||
Serial.print("Free heap size at start: "); | ||
Serial.println(availSpace); | ||
|
||
if (docSize > availSpace){ | ||
Serial.println("Payload requires more than available memory. Handle this."); | ||
} | ||
|
||
//construct parameter string for API call | ||
String requestBody = "{\"config\":{\"encoding\":\"LINEAR16\",\"sampleRateHertz\":16000,\"languageCode\":\"en-US\"},\"audio\":{\"content\":\""; | ||
const int BUFFER_SIZE = 1; | ||
char buffer[BUFFER_SIZE]; | ||
String base64Str = ""; | ||
//read bytes of Base64 encoded recorded audio content from SD card | ||
while (contentBase64.available()) { | ||
int bytesRead = contentBase64.readBytes(buffer, BUFFER_SIZE); | ||
base64Str += String(buffer).substring(0, bytesRead); | ||
} | ||
requestBody += base64Str; | ||
requestBody += "\"}}"; | ||
|
||
httpClient.addHeader("Content-Type", "application/json"); | ||
httpClient.addHeader("Content-Length", String(requestBody.length())); | ||
int httpCode = httpClient.POST(requestBody); | ||
Serial.println(requestBody); | ||
String responseString = httpClient.getString(); // get the whole response at once | ||
DynamicJsonDocument respDoc(4096); | ||
deserializeJson(respDoc, responseString); | ||
Serial.println(responseString); | ||
|
||
// Extract the content field | ||
String content = respDoc["response"]["results"]["alternatives"]["transcript"]; | ||
int confidence = respDoc["response"]["results"]["alternatives"]["confidence"]; | ||
|
||
httpClient.end(); | ||
contentBase64.close(); | ||
|
||
Serial.print("Status Code "); | ||
Serial.println(httpCode); | ||
|
||
Serial.print("Transcripted Audio: "); | ||
Serial.println(content); | ||
Serial.print("Transcription Confidence: "); | ||
Serial.println(confidence); | ||
|
||
return content; | ||
} | ||
|
||
//Use Google refresh token to obtain your Oauth 2.0 token | ||
String getOAuthToken(){ | ||
Serial.print("Requesting Auth 2.0 Token....."); | ||
|
||
WiFiClientSecure client; | ||
client.setInsecure(); | ||
HTTPClient httpClient; | ||
|
||
httpClient.begin("https://oauth2.googleapis.com/token"); | ||
httpClient.addHeader("Content-Type", "application/x-www-form-urlencoded"); | ||
String requestBody = "grant_type=refresh_token&response_type=token&refresh_token=" + String(REFRESHTOKENGOOGLE) + "&client_secret=" + String(CLIENTSECRETGOOGLE) + "&client_id="+ String(CLIENTIDGOOGLE); | ||
int httpCode = httpClient.POST(requestBody); | ||
|
||
String responseString = httpClient.getString(); // get the whole response at once | ||
DynamicJsonDocument respDoc(1024); | ||
deserializeJson(respDoc, responseString); //deserialize into JSON | ||
|
||
String accessToken = respDoc["access_token"]; //obtain oauth2.0 token | ||
if (httpCode == 200 && accessToken.length() > 30){ Serial.println(" Success!"); } | ||
|
||
httpClient.end(); | ||
return accessToken; //return token | ||
} | ||
|
||
//not finished, only required if audio we are sending to speech-to-text is over 10mb or 1 minute | ||
void uploadToStorage(String token, String fileName){ | ||
Serial.print("Storing on Cloud.............. "); | ||
|
||
WiFiClientSecure client; | ||
client.setInsecure(); | ||
HTTPClient httpClient; | ||
httpClient.begin(client, "https://storage.googleapis.com/upload/storage/v1/b/hara-esp32/o"); | ||
httpClient.addHeader("Authorization", "Bearer " + String(token)); // Your OAuth 2.0 token | ||
|
||
DynamicJsonDocument doc(1024); | ||
doc["documentId"] = "LINEAR16"; | ||
doc["title"] = fileName; | ||
String requestBody; | ||
serializeJson(doc, requestBody); | ||
|
||
int httpCode = httpClient.POST(requestBody); | ||
|
||
String responseString = httpClient.getString(); // get the whole response at once | ||
DynamicJsonDocument respDoc(1024); | ||
deserializeJson(respDoc, responseString); | ||
|
||
Serial.print("Status Code "); | ||
Serial.println(httpCode); | ||
Serial.print("Response: "); | ||
Serial.println(responseString); | ||
|
||
httpClient.end(); | ||
} | ||
|
||
// ... other function implementations |
Oops, something went wrong.