-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtool.py
57 lines (44 loc) · 1.86 KB
/
tool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import argparse
import speech_recognition as sr
import pydub
import itertools
from pydub import AudioSegment
import wave
import editdistance
import uuid
from longest_subsequence import LIS
import os
## Chunk là dùng xem mình làm bao nhiều một lượt
## CHunk_duration là thời gian gửi lên google cloud
def googleAutoTranscript(mp3_file , origin_script_text , offset= 0, chunks = 5 , chunk_duration = 5):
script_text = origin_script_text.replace('.','',1000).replace('"','',10000).replace(':','',1000).replace(',','',1000).replace('“','',1000).replace('”','',1000).replace("'","'",1000)
script_text = script_text.split()
r = sr.Recognizer()
song = AudioSegment.from_mp3(mp3_file)
temp_file_path = './temp/{}.wav'.format(str(uuid.uuid1()))
test= song.export(temp_file_path, format='wav')
file_duration = song.__len__()/1000
max_chunks = round(file_duration/chunk_duration)
# using Google speech api to generate script
text_array = []
time_stamp = []
for i in range( offset , min( offset + chunks, max_chunks)):
with sr.AudioFile(temp_file_path) as source:
try:
audio = r.record(source,
duration= min(chunk_duration, file_duration - chunk_duration * i),
offset= chunk_duration * i
)
text = r.recognize_google(audio)
print(text)
r_text = ' '.join(text.split(' '))
text_array.append(r_text)
time_stamp.append(chunk_duration * i)
except:
print('nothing')
source.audio_reader.close()
os.remove(temp_file_path)
if(offset + chunks >= max_chunks):
return text_array, time_stamp, True, max_chunks
else:
return text_array, time_stamp, False, max_chunks