Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#3 Use AWS Transcribe service in order to evaluate the speaking/pronunciation #18

Merged
merged 6 commits into from
Feb 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions backend/backend/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
AWS_DEFAULT_REGION=
AWS_BUCKET_NAME=
7 changes: 7 additions & 0 deletions backend/backend/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
"""

from pathlib import Path
import environ

env = environ.Env()
environ.Env.read_env()

# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent
Expand All @@ -37,6 +41,9 @@
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'rest_framework',
'backend.transcribe',
'backend.upload',
]

MIDDLEWARE = [
Expand Down
Empty file.
3 changes: 3 additions & 0 deletions backend/backend/transcribe/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.contrib import admin

# Register your models here.
6 changes: 6 additions & 0 deletions backend/backend/transcribe/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django.apps import AppConfig


class TranscribeConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'backend.transcribe'
Empty file.
3 changes: 3 additions & 0 deletions backend/backend/transcribe/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.db import models

# Create your models here.
3 changes: 3 additions & 0 deletions backend/backend/transcribe/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.test import TestCase

# Create your tests here.
10 changes: 10 additions & 0 deletions backend/backend/transcribe/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from rest_framework import routers
from django.urls import path, include
from . import views

router = routers.DefaultRouter()

urlpatterns = [
path('', views.TranscribeViewSet.as_view(), name='transcribe'),
path('/<int:id>', views.TranscribeViewSet.as_view(), name='transcribe'),
]
109 changes: 109 additions & 0 deletions backend/backend/transcribe/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from django.shortcuts import render
from rest_framework.views import APIView
from rest_framework.response import Response
import environ
import boto3
import urllib
import json

env = environ.Env()
environ.Env.read_env()

transcribe_client = boto3.client('transcribe', aws_access_key_id=env(
'AWS_ACCESS_KEY_ID'), aws_secret_access_key=env('AWS_SECRET_ACCESS_KEY'))
s3_client = boto3.client('s3', aws_access_key_id=env(
'AWS_ACCESS_KEY_ID'), aws_secret_access_key=env('AWS_SECRET_ACCESS_KEY'))


class TranscribeViewSet(APIView):

"""
The post method should receive the following JSON (example)
{
"s3_url": "https://s3.amazonaws.com/bucket/file.mp3",
"language_code": "en-US",
}
The API will respond with a JSON object with the following fields:
{
"message": "Successfully started transcription job" || "Failed to start transcription job",
"transcription_job_name": "transcription_job_name",
"aws_response": "aws_response"
}

The transcription job name has to be stored to be used in the get method to get the results
"""

def post(self, request):
s3_url = request.data.get('s3_url')
language_code = request.data.get('language_code')

s3_filename = s3_url.split('/')[-1]
s3_uri = "s3://{}/{}".format(env('AWS_BUCKET_NAME'), s3_filename)

if not s3_url or not language_code:
return Response({"message": "Please provide all the required fields."})

response = transcribe_client.start_transcription_job(
TranscriptionJobName=s3_filename+'_transcription',
Media={'MediaFileUri': s3_uri},
MediaFormat=s3_url.split('.')[-1],
LanguageCode=language_code,
OutputBucketName=env('AWS_BUCKET_NAME'),
OutputKey='transcriptions/',
)

if response:
return Response({"message": "Successfully started transcription job", "transcription_job_name": response['TranscriptionJob']['TranscriptionJobName'], "aws_response": response})
else:
return Response({"message": "Failed to start transcription job", "aws_response": response})

"""
This get method should recive the following query parameters:
Example: api.url/transcribe/?transcription-job-name=test_transcription

The API will respond with a JSON object with the following fields:
{
message: "Successfully fetched transcription" || "Failed to get transcription job" || "Transcription job not completed yet",
transcription: [{words}],
aws_response: "Response from AWS",
status: "success" || "failed" || "pending"
}
"""

def get(self, request):
transcription_job_name = request.query_params.get(
'transcription-job-name')

if not transcription_job_name:
return Response({"message": "Please provide the transcription job name."})
response = transcribe_client.get_transcription_job(
TranscriptionJobName=transcription_job_name)

if response:
s3_url = response['TranscriptionJob']['Transcript']['TranscriptFileUri']
if response['TranscriptionJob']['TranscriptionJobStatus'] == 'COMPLETED':

s3_client.put_object_acl(
ACL='public-read', Bucket=env('AWS_BUCKET_NAME'), Key="transcriptions/"+transcription_job_name+'.json')
json_data = urllib.request.urlopen(s3_url)
data = json.loads(json_data.read())
words = []

for item in data['results']['items']:
word = item['alternatives'][0]['content']
confidence = float(item['alternatives'][0]['confidence'])
if confidence > 0.7 or confidence == 0:
words.append(
{'word': word, 'confidence': confidence, 'color': 'green'})
elif confidence <= 0.7 and confidence > 0.4:
words.append(
{'word': word, 'confidence': confidence, 'color': 'yellow'})
else:
words.append(
{'word': word, 'confidence': confidence, 'color': 'red'})

return Response({"message": "Successfully fetched transcription", "transcription": words, "aws_response": response, "status": "success"})
else:
return Response({"message": "Transcription job not completed yet", "aws_response": response, "status": "pending"})
else:
return Response({"message": "Failed to get transcription job", "aws_response": response, "status": "failed"})
Empty file.
3 changes: 3 additions & 0 deletions backend/backend/upload/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.contrib import admin

# Register your models here.
6 changes: 6 additions & 0 deletions backend/backend/upload/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django.apps import AppConfig


class UploadConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'backend.upload'
Empty file.
3 changes: 3 additions & 0 deletions backend/backend/upload/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.db import models

# Create your models here.
3 changes: 3 additions & 0 deletions backend/backend/upload/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.test import TestCase

# Create your tests here.
9 changes: 9 additions & 0 deletions backend/backend/upload/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from rest_framework import routers
from django.urls import path, include
from . import views

router = routers.DefaultRouter()

urlpatterns = [
path('', views.UploadViewSet.as_view(), name='upload'),
]
31 changes: 31 additions & 0 deletions backend/backend/upload/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from django.shortcuts import render
from rest_framework.views import APIView
from rest_framework.response import Response
import environ
import boto3

env = environ.Env()
environ.Env.read_env()

s3_client = boto3.client('s3', aws_access_key_id=env(
'AWS_ACCESS_KEY_ID'), aws_secret_access_key=env('AWS_SECRET_ACCESS_KEY'))


class UploadViewSet(APIView):
"""
This put method should receive a file in form-data with the name "audio_file".
The API will put the file in the S3 bucket and return the URL of the file together with the response from AWS.
"""

def put(self, request):
audio_file = request.FILES['audio_file']
if audio_file.content_type not in ['audio/mpeg', 'audio/mp3']:
return Response({"message": "The file is not a valid audio file."})
response = s3_client.put_object(
Body=audio_file, Bucket=env('AWS_BUCKET_NAME'), Key=audio_file.name, ACL='public-read')
if response:
URL = "https://{}.s3.amazonaws.com/{}".format(
env('AWS_BUCKET_NAME'), audio_file.name)
return Response({"message": "Successfully uploaded file", "s3_url": URL, "aws_response": response})
else:
return Response({"message": "Failed to upload file", "aws_response": response})
4 changes: 3 additions & 1 deletion backend/backend/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
"""
from django.contrib import admin
from django.urls import path
from django.urls import path, include

urlpatterns = [
path('admin/', admin.site.urls),
path('transcribe/', include('backend.transcribe.urls')),
path('upload/', include('backend.upload.urls'))
]
9 changes: 8 additions & 1 deletion backend/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
asgiref==3.5.0
autopep8==1.6.0
awscli==1.22.47
boto3==1.20.46
botocore==1.23.46
botocore==1.23.47
colorama==0.4.3
Django==4.0.2
django-environ==0.8.1
djangorestframework==3.13.1
docutils==0.15.2
jmespath==0.10.0
pyasn1==0.4.8
pycodestyle==2.8.0
python-dateutil==2.8.2
pytz==2021.3
PyYAML==5.4.1
rsa==4.7.2
s3transfer==0.5.0
six==1.16.0
sqlparse==0.4.2
Expand Down