1
1
/*! (C) The Hyperaudio Project. MIT @license: en.wikipedia.org/wiki/MIT_License. */
2
- /*! Version 0.0.4 */
2
+ /*! Version 0.0.5 */
3
+
3
4
4
5
class WhisperService extends HTMLElement {
5
6
@@ -50,33 +51,6 @@ function loadWhisperClient(modal) {
50
51
51
52
const whisperWorkerPath = "./js/whisper.worker.js" ;
52
53
53
- // leave the following three consts as is as they are shared by
54
- // web.worker.js
55
-
56
- const MessageTypes = {
57
- DOWNLOADING : "DOWNLOADING" ,
58
- LOADING : "LOADING" ,
59
- RESULT : "RESULT" ,
60
- RESULT_PARTIAL : "RESULT_PARTIAL" ,
61
- INFERENCE_REQUEST : "INFERENCE_REQUEST" ,
62
- INFERENCE_DONE : "INFERENCE_DONE"
63
- } ;
64
-
65
- const LoadingStatus = {
66
- SUCCESS : "success" ,
67
- ERROR : "error" ,
68
- LOADING : "loading"
69
- } ;
70
-
71
- const ModelNames = {
72
- WHISPER_TINY_EN : "openai/whisper-tiny.en" ,
73
- WHISPER_TINY : "openai/whisper-tiny" ,
74
- WHISPER_BASE : "openai/whisper-base" ,
75
- WHISPER_BASE_EN : "openai/whisper-base.en" ,
76
- WHISPER_SMALL : "openai/whisper-small" ,
77
- WHISPER_SMALL_EN : "openai/whisper-small.en"
78
- } ;
79
-
80
54
let webWorker = createWorker ( ) ;
81
55
82
56
formSubmitBtn . disabled = true ;
@@ -85,77 +59,54 @@ function loadWhisperClient(modal) {
85
59
} ) ;
86
60
87
61
function createWorker ( ) {
88
- const worker = new Worker ( whisperWorkerPath ) ;
62
+ const worker = new Worker ( whisperWorkerPath , { type : "module" } ) ;
63
+
89
64
let results = [ ] ;
90
- worker . onmessage = ( event2 ) => {
91
- const { type } = event2 . data ;
92
- if ( type === MessageTypes . LOADING ) {
93
- handleLoadingMessage ( event2 . data ) ;
94
- }
95
- if ( type === MessageTypes . DOWNLOADING ) {
96
- loadingMessageContainer . innerHTML = '<div class="vertically-centre"><center>Downloading model...</center><br/><img src="' + transcribingSvg + '" width="50" alt="transcribing" style="margin: auto; display: block;"></div>' ;
97
- }
98
- if ( type === MessageTypes . RESULT ) {
99
- handleResultMessage ( event2 . data ) ;
100
- results = event2 . data . results ;
101
- }
102
- if ( type === MessageTypes . RESULT_PARTIAL ) {
103
-
104
- }
105
- if ( type === MessageTypes . INFERENCE_DONE ) {
106
- handleInferenceDone ( results ) ;
107
- }
65
+ worker . onmessage = ( event ) => {
66
+ handleInferenceDone ( event . data ) ;
108
67
} ;
109
68
110
69
return worker ;
111
70
}
112
71
113
- function handleLoadingMessage ( data ) {
114
- const { status } = data ;
115
-
116
- if ( status === LoadingStatus . SUCCESS ) {
117
- loadingMessageContainer . innerHTML = '<div class="vertically-centre"><center>Transcribing.... <span id="transcription-progress">0</span>%</center><br/><img src="' + transcribingSvg + '" width="50" alt="transcribing" style="margin: auto; display: block;"></div>' ;
118
- }
119
- if ( status === LoadingStatus . ERROR ) {
120
- loadingMessageContainer . innerHTML = '<div class="vertically-centre"><center>Oops! Something went wrong. Please refresh the page and try again.</center><br/><img src="' + errorSvg + '" width="50" alt="error" style="margin: auto; display: block;"></div>' ;
121
- }
122
- if ( status === LoadingStatus . LOADING ) {
123
- loadingMessageContainer . innerHTML = '<div class="vertically-centre"><center>Loading model into memory...</center><br/><img src="' + transcribingSvg + '" width="50" alt="transcribing" style="margin: auto; display: block;"></div>' ;
124
- }
125
- }
126
-
127
- function handleResultMessage ( data ) {
128
- const { results, completedUntilTimestamp } = data ;
129
- const totalDuration = videoPlayer . duration ;
130
- const progress = completedUntilTimestamp / totalDuration * 100 ;
131
- document . querySelector ( "#transcription-progress" ) . innerHTML = Math . round ( progress ) ;
132
- }
133
-
134
72
function handleInferenceDone ( results ) {
135
73
136
74
console . log ( results ) ;
137
75
138
76
videoPlayer . currentTime = 0 ;
139
77
140
78
let hypertranscript = "" ;
141
- results . forEach ( ( result ) => {
142
- let words = result . text . split ( ' ' ) ;
143
- let interval = ( result . end - result . start ) / words . length ;
144
- let timecode = result . start * 1000 ;
145
- let duration = Math . floor ( ( interval * 1000 ) - 1 ) ;
146
- words . forEach ( ( word ) => {
147
- let start = Math . floor ( timecode ) ;
148
- hypertranscript += `<span data-m='${ start } ' data-d='${ duration } '>${ word } </span>\n` ;
149
- timecode += interval * 1000 ;
150
- } ) ;
151
-
152
- // new para every 5 sentences
153
- if ( result . index % 5 === 0 && result . index !== 0 ) {
154
- hypertranscript += "\n </p>\n <p>\n" ;
155
- }
79
+ let sentences = 0 ;
80
+ let lastWord = "" ;
156
81
157
- console . log ( hypertranscript ) ;
82
+ results . output . chunks . forEach ( ( word ) => {
83
+
84
+ // ignore text with square brackets - usually contains things like [BLANK _AUDIO]
85
+ if ( word . text . indexOf ( "[" ) < 0 && word . text . indexOf ( "]" ) < 0 ) {
86
+ let start = Math . floor ( word . timestamp [ 0 ] * 1000 ) ;
87
+ let duration = Math . floor ( ( word . timestamp [ 1 ] * 1000 ) - 1 ) - start ;
88
+ let wordCapitalised = false ;
89
+
90
+ if ( Array . from ( word . text ) [ 0 ] . toUpperCase ( ) === Array . from ( word . text ) [ 0 ] ) {
91
+ wordCapitalised = true ;
92
+ }
93
+
94
+ if ( wordCapitalised === true && lastWord . endsWith ( "." ) ) {
95
+ sentences += 1 ;
96
+ }
97
+
98
+ lastWord = word . text ;
99
+
100
+ // new para every 5 sentences
101
+ if ( sentences % 5 === 0 && sentences !== 0 ) {
102
+ hypertranscript += "\n </p>\n <p>\n" ;
103
+ sentences = 0 ;
104
+ }
105
+
106
+ hypertranscript += `<span data-m='${ start } ' data-d='${ duration } '>${ word . text } </span>\n` ;
107
+ }
158
108
} ) ;
109
+
159
110
resultsContainer . innerHTML = "<article>\n <section>\n <p>\n" + hypertranscript + " </p>\n </section>\n</article>\n" ;
160
111
161
112
const initEvent = new CustomEvent ( 'hyperaudioInit' ) ;
@@ -166,20 +117,21 @@ function loadWhisperClient(modal) {
166
117
167
118
async function handleFormSubmission ( ) {
168
119
169
- if ( ! isFileUploaded ( ) || ! isModelNameSelected ( ) ) {
170
- return ;
171
- }
172
-
173
- const model_name = `openai/${ modelNameSelectionInput . value } ` ;
120
+ const model_name = modelNameSelectionInput . value ;
174
121
const file = fileUploadBtn . files [ 0 ] ;
175
122
const audio = await readAudioFrom ( file ) ;
176
123
177
124
webWorker . postMessage ( {
178
- type : MessageTypes . INFERENCE_REQUEST ,
125
+ type : " INFERENCE_REQUEST" ,
179
126
audio,
180
127
model_name
181
128
} ) ;
129
+
130
+ console . log ( "web worker" ) ;
131
+ console . log ( webWorker ) ;
182
132
videoPlayer . src = URL . createObjectURL ( file ) ;
133
+
134
+ loadingMessageContainer . innerHTML = '<div class="vertically-centre"><center>Transcribing.... </center><br/><img src="' + transcribingSvg + '" width="50" alt="transcribing" style="margin: auto; display: block;"></div>' ;
183
135
}
184
136
185
137
async function readAudioFrom ( file ) {
@@ -190,20 +142,4 @@ function loadWhisperClient(modal) {
190
142
const audio = decoded . getChannelData ( 0 ) ;
191
143
return audio ;
192
144
}
193
-
194
- function isFileUploaded ( ) {
195
- if ( fileUploadBtn . files . length === 0 ) {
196
- return false ;
197
- }
198
- return true ;
199
- }
200
-
201
- function isModelNameSelected ( ) {
202
- const selectedValue = modelNameSelectionInput . value ;
203
- if ( modelNameSelectionInput . value === "" ) {
204
- return false ;
205
- }
206
- const modelName = `openai/${ selectedValue } ` ;
207
- return Object . values ( ModelNames ) . indexOf ( modelName ) !== - 1 ;
208
- }
209
145
}
0 commit comments