-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
97 lines (74 loc) · 3.21 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from dotenv import load_dotenv
from flask import Flask, render_template, request, jsonify
from google.cloud import speech, texttospeech
from werkzeug.middleware.proxy_fix import ProxyFix
import base64
import openai
import os
app = Flask(__name__)
# Tell Flask it is Behind a Proxy
# (https://flask.palletsprojects.com/en/2.3.x/deploying/proxy_fix/)
app.wsgi_app = ProxyFix(
app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1
)
# Load environment variables from .env and set the openai api key
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
# Add some instructions to give the LLM (not currently used)
# system_msgs = []
# Instantiate a text-to-speech client to Google's API and configure
# a voice and audio encoding
tts_client = texttospeech.TextToSpeechClient()
tts_voice = texttospeech.VoiceSelectionParams(
language_code="en_US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
)
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
)
# Instantiate a speech client to Google's API and define a configuration
speech_client = speech.SpeechClient()
speech_config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.WEBM_OPUS,
sample_rate_hertz=48000,
language_code="en-US",
)
@app.route("/cs50x", methods=['GET', 'POST', 'PUT'])
def chat():
if request.method == "POST":
# Flush messages at the beginning of every request
messages = []
# Get prompt from form data
user_prompt = request.form.get("user_prompt")
# Seed messages with configured system messages, adding user promp last
# (not used currently)
# for msg in system_msgs:
# messages.append({"role": "system", "content": msg})
messages.append({"role": "user", "content": user_prompt})
# Make request to OpenAI ChatCompletion API and save content to response
response = openai.ChatCompletion.create(
model = "gpt-3.5-turbo",
messages = messages,
max_tokens = 256,
)
response = response.choices[0].message.content
# Use response to make a request to Google Cloud's Text-to-Speech API
synthesis_input = texttospeech.SynthesisInput(text=response)
tts_response = tts_client.synthesize_speech(
input=synthesis_input, voice=tts_voice, audio_config=audio_config
)
# Encode the audio bitstream to base64 and then to UTF-8 encoding
encoded_aud_data = base64.b64encode(tts_response.audio_content)
tts_audio = encoded_aud_data.decode('utf-8')
return render_template("response.html", response=response, tts_audio=tts_audio)
elif request.method == "PUT":
blob_data = request.data
# Request Speech transcription from Google
audio = speech.RecognitionAudio(content=blob_data)
response = speech_client.recognize(config=speech_config, audio=audio)
transcription = ""
for result in response.results:
print(result.alternatives[0].transcript)
transcription += result.alternatives[0].transcript
return jsonify({'message': transcription})
elif request.method == "GET":
return render_template("prompt.html")