-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_strong_1.json
1110 lines (1110 loc) · 389 KB
/
test_strong_1.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
{"captions": "People cry and talk in a noisy crowd.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["3.898", "5.961", "7.213", "9.031"], ["5.425", "6.89", "8.197", "10.0"], ["talk", "talk", "talk", "talk"]]}
{"captions": "Alarms and buzzers sound with speech.", "data": [["Alarm", "Alarm", "Male speech, man speaking"], ["0.142", "3.401", "5.236"], ["2.535", "10.0", "10.0"], ["Alarm", "Alarm", "speech"]]}
{"captions": "Cats and other pets are making sounds.", "data": [["Cat"], ["0.0"], ["10.0"], ["Cats"]]}
{"captions": "Women are speaking and cutlery is heard in a small room.", "data": [["Female speech, woman speaking", "Cutlery, silverware", "Cutlery, silverware", "Cutlery, silverware", "Female speech, woman speaking", "Cutlery, silverware", "Female speech, woman speaking", "Cutlery, silverware"], ["1.386", "4.055", "4.78", "6.008", "4.079", "6.858", "7.472", "7.709"], ["3.614", "4.591", "5.157", "6.37", "4.535", "7.173", "9.205", "8.307"], ["speaking", "cutlery", "cutlery", "cutlery", "speaking", "cutlery", "speaking", "cutlery"]]}
{"captions": "Noise and the sounds of dishes, pots, and pans being moved, footsteps, and a drawer opening and closing are present.", "data": [["Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans"], ["0.601", "1.213", "2.228", "3.22", "3.647", "4.532", "6.445", "7.087", "9.566"], ["0.925", "1.488", "2.528", "3.439", "4.017", "5.301", "6.861", "7.948", "10.0"], ["Noise", "Noise", "Noise", "Noise", "Noise", "Noise", "Noise", "Noise", "Noise"]]}
{"captions": "People speak amidst the sound of mechanisms, beeping, tearing, and ticking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.992", "5.331", "7.094", "8.094", "9.575"], ["3.748", "5.047", "6.087", "7.921", "9.15", "10.0"], ["amidst", "amidst", "amidst", "amidst", "amidst", "amidst"]]}
{"captions": "Mechanisms and women speaking are heard, followed by music.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.008", "0.813", "2.339", "2.791", "7.728"], ["0.402", "1.802", "2.64", "3.202", "8.776"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Fire alarms blare intermittently.", "data": [["Fire alarm", "Alarm", "Alarm"], ["0.59", "4.64", "8.655"], ["3.253", "7.316", "10.0"], ["blare", "blare", "blare"]]}
{"captions": "Telephone ringing, dialing, and speech occur in a small room amidst laughter and hubbub.", "data": [["Telephone bell ringing", "Speech", "Telephone bell ringing", "Male speech, man speaking"], ["0.52", "5.047", "4.142", "7.591"], ["1.433", "6.535", "5.055", "10.0"], ["ringing", "speech", "ringing", "speech"]]}
{"captions": "Insects and animals are heard, men are speaking and having conversations.", "data": [["Male speech, man speaking", "Conversation", "Male speech, man speaking", "Conversation"], ["2.646", "3.717", "8.307", "9.394"], ["3.307", "4.063", "8.969", "10.0"], ["conversations", "speaking", "conversations", "speaking"]]}
{"captions": "A man is speaking on the radio and beeping is heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "4.512", "5.811", "6.63", "7.063", "7.339"], ["1.346", "5.315", "6.047", "6.937", "7.26", "7.929"], ["radio", "radio", "radio", "radio", "radio", "radio"]]}
{"captions": "Mechanisms and alarms sound, with meowing and ticking in the background.", "data": [["Alarm", "Alarm"], ["0.299", "1.307"], ["1.047", "2.197"], ["alarms", "alarms"]]}
{"captions": "An alarm and siren blare.", "data": [["Alarm"], ["0.0"], ["10.0"], ["siren"]]}
{"captions": "Various mechanisms make sounds in the background while a vacuum cleaner is used and there is female speech.", "data": [["Female speech, woman speaking", "Speech", "Female speech, woman speaking", "Vacuum cleaner", "Speech", "Vacuum cleaner"], ["0.0", "0.888", "3.851", "0.0", "6.221", "8.642"], ["0.346", "2.239", "4.086", "8.499", "7.259", "10.0"], ["speech", "female", "speech", "vacuum", "female", "vacuum"]]}
{"captions": "Sawing and speaking sounds are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.094", "7.551"], ["0.724", "3.74", "10.0"], ["speaking", "speaking", "speaking"]]}
{"captions": "Stirring and conversation can be heard in a small room.", "data": [["Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking"], ["2.598", "4.614", "4.85", "6.472", "8.85"], ["3.976", "4.827", "5.362", "7.74", "10.0"], ["conversation", "conversation", "conversation", "conversation", "conversation"]]}
{"captions": "Tapping and ticking sounds can be heard with speech and unknown sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.685", "4.984", "6.008"], ["3.047", "5.898", "7.094"], ["speech", "speech", "speech"]]}
{"captions": "Men speaking and filing sounds are heard.", "data": [["Male speech, man speaking", "Speech", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.079", "2.48", "4.732"], ["0.961", "2.236", "3.717", "9.22"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Female speech and laughter mix with the sound of water from a faucet, breathing, and toothbrushing.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Water tap, faucet"], ["0.0", "3.157", "4.468", "4.935"], ["1.05", "4.358", "4.715", "6.006"], ["speech", "speech", "speech", "faucet"]]}
{"captions": "A blender, man speaking, and shouting can be heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Blender", "Male speech, man speaking"], ["1.909", "4.014", "5.13", "0.0", "8.583"], ["3.829", "4.806", "6.848", "10.0", "10.0"], ["speaking", "speaking", "speaking", "blender", "speaking"]]}
{"captions": "A woman is speaking and taking pictures with crickets chirping.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "0.622", "2.614", "3.039", "3.496", "6.945"], ["0.417", "1.677", "2.803", "3.346", "5.921", "8.016"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Men are speaking, artillery fire is heard, and a woman speaks.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking"], ["0.006", "0.873", "1.254", "4.751", "7.441", "9.457"], ["0.41", "1.11", "4.509", "7.075", "7.732", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "woman"]]}
{"captions": "Speech and breathing can be heard along with computer keyboard sounds in a small room.", "data": [["Speech", "Speech", "Female speech, woman speaking", "Speech"], ["0.007", "6.993", "7.556", "8.398"], ["1.704", "7.332", "8.106", "8.968"], ["Speech", "Speech", "Speech", "Speech"]]}
{"captions": "A group of people are talking and splashing in water.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking"], ["0.326", "3.116", "4.992"], ["2.481", "4.496", "10.0"], ["talking", "talking", "talking"]]}
{"captions": "Tapping sounds are heard while a man speaks amidst mechanisms.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["2.719", "4.397", "5.135", "8.731"], ["4.211", "4.965", "8.072", "9.326"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Tapping and ticking sounds accompany speeches from a woman while dishes and pans sizzle in the background.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Dishes, pots, and pans", "Sizzle"], ["0.646", "6.244", "7.827", "5.142", "5.717"], ["1.984", "7.425", "9.157", "5.339", "10.0"], ["speeches", "speeches", "speeches", "sizzle", "pans"]]}
{"captions": "Sounds of dishes, crinkling, scraping, and pots and pans can be heard.", "data": [["Dishes, pots, and pans", "Dishes, pots, and pans"], ["0.008", "8.593"], ["1.142", "9.273"], ["crinkling", "crinkling"]]}
{"captions": "People are speaking and laughing with alarms and smoke detectors going off.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Speech", "Smoke detector, smoke alarm", "Male speech, man speaking", "Alarm", "Speech", "Alarm", "Speech", "Smoke detector, smoke alarm", "Male speech, man speaking", "Smoke detector, smoke alarm"], ["0.008", "1.165", "2.126", "2.323", "3.165", "3.827", "4.906", "5.606", "6.669", "7.142", "7.638", "9.598"], ["0.583", "1.614", "2.528", "3.236", "4.37", "4.858", "5.386", "6.661", "7.472", "8.15", "9.969", "10.0"], ["speaking", "speaking", "speaking", "alarms", "speaking", "alarms", "speaking", "alarms", "speaking", "alarms", "speaking", "alarms"]]}
{"captions": "Frying and sizzling sounds mix with female speech and stirring.", "data": [["Female speech, woman speaking", "Sizzle", "Female speech, woman speaking"], ["0.858", "0.0", "4.299"], ["1.677", "10.0", "6.354"], ["speech", "sizzling", "speech"]]}
{"captions": "A woman is speaking while music is playing and another woman is breathing.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "3.74", "5.474"], ["3.315", "4.723", "6.786"], ["speaking", "speaking", "speaking"]]}
{"captions": "People speak, breathe, and various alarms go off.", "data": [["Speech", "Alarm", "Smoke detector, smoke alarm", "Smoke detector, smoke alarm", "Male speech, man speaking", "Smoke detector, smoke alarm"], ["0.069", "6.458", "7.408", "8.428", "1.482", "9.405"], ["0.462", "7.138", "8.136", "9.15", "5.417", "10.0"], ["speak", "alarms", "alarms", "alarms", "speak", "alarms"]]}
{"captions": "Men are speaking and slamming sounds are heard with a child speaking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Child speech, kid speaking"], ["0.382", "4.751", "5.803", "7.809"], ["1.324", "5.497", "7.347", "9.283"], ["speaking", "speaking", "speaking", "child"]]}
{"captions": "A man speaks, music plays, an air horn sounds, and several men sing.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.539"], ["0.306", "0.859"], ["speaks", "speaks"]]}
{"captions": "Women are speaking and using cutlery while cooking food.", "data": [["Female speech, woman speaking", "Cutlery, silverware", "Female speech, woman speaking", "Cutlery, silverware", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Frying (food)", "Cutlery, silverware"], ["0.0", "0.969", "1.921", "3.764", "3.78", "5.898", "7.772", "0.0", "4.827"], ["0.866", "1.094", "3.354", "3.882", "5.276", "7.008", "10.0", "10.0", "4.984"], ["speaking", "cutlery", "speaking", "cutlery", "speaking", "speaking", "speaking", "cooking", "cutlery"]]}
{"captions": "A woman is speaking and cutting with scissors in the background.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "2.37", "4.661", "5.331", "7.016"], ["1.724", "4.433", "5.071", "5.913", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Animals are growling, barking, and making noise.", "data": [["Dog", "Dog", "Dog", "Dog"], ["0.007", "5.533", "7.902", "9.131"], ["4.983", "7.556", "8.629", "10.0"], ["barking", "barking", "barking", "barking"]]}
{"captions": "Women speak and cupboards and doors open and close.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["2.044", "3.87"], ["2.437", "4.474"], ["speak", "speak"]]}
{"captions": "Noise and music accompany male speeches.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["5.354", "5.813", "8.415"], ["5.607", "7.776", "10.0"], ["speech", "speech", "speech"]]}
{"captions": "Various sounds of clinking glasses and dishes, as well as coins dropping, are heard with music in the background.", "data": [["Dishes, pots, and pans", "Dishes, pots, and pans"], ["2.039", "3.189"], ["2.488", "3.646"], ["clinking", "clinking"]]}
{"captions": "Cooking sounds like sizzling and dishes clanging can be heard in a small room.", "data": [["Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Female speech, woman speaking", "Dishes, pots, and pans", "Sizzle"], ["0.0", "5.323", "5.898", "0.0", "7.441", "0.0"], ["1.315", "5.535", "6.654", "1.252", "9.685", "10.0"], ["clanging", "clanging", "clanging", "sizzling", "clanging", "sizzling"]]}
{"captions": "Fireworks are exploding, with men speaking and people making various sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["0.349", "6.977"], ["0.93", "7.625"], ["speaking", "speaking"]]}
{"captions": "Breathing and female speech can be heard with mechanisms and a sliding door.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking"], ["0.732", "3.984", "7.756"], ["1.126", "5.669", "10.0"], ["speech", "speech", "speech"]]}
{"captions": "Clicking sounds dominate, along with a ringing telephone and some speech.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Telephone bell ringing", "Female speech, woman speaking"], ["3.276", "4.181", "7.197", "8.205", "1.693", "9.748"], ["3.748", "5.094", "7.756", "9.425", "3.213", "10.0"], ["speech", "speech", "speech", "speech", "ringing", "speech"]]}
{"captions": "Conversations and female speech are heard inside a small room, with laughter and hums.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "1.921", "4.252", "6.071", "7.614"], ["1.606", "3.921", "4.52", "6.898", "10.0"], ["speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Glass shatters among mechanisms and speech.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Male speech, man speaking"], ["0.289", "2.614", "3.736", "4.546", "8.89", "9.312", "9.855"], ["2.14", "3.389", "4.078", "5.234", "9.219", "9.52", "10.0"], ["speech", "speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Truck sounds, human speech, whistling heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.11", "2.504", "4.134", "7.763"], ["0.378", "3.906", "5.055", "8.957"], ["speech", "speech", "speech", "speech"]]}
{"captions": "Male speech alternates with clicking, breathing, tapping, and human sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.962", "3.91", "5.248", "6.945", "7.217", "8.167"], ["0.842", "2.451", "4.46", "6.09", "7.149", "7.414", "9.009"], ["speech", "speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "A woman is speaking, birds are chirping and people are walking.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "2.157", "4.85", "6.362", "8.016", "9.504"], ["1.457", "2.78", "5.803", "7.126", "8.772", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A woman speaking, rapping, singing, and whooping are heard.", "data": [["Female speech, woman speaking"], ["0.0"], ["0.63"], ["speaking"]]}
{"captions": "A man is speaking and wind noise is heard.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.243"], ["0.652", "4.602"], ["speaking", "speaking"]]}
{"captions": "Women are speaking, dogs are whimpering, animals are making sounds, and speech and wind noise are heard.", "data": [["Dog", "Female speech, woman speaking", "Female speech, woman speaking", "Dog", "Female speech, woman speaking", "Female speech, woman speaking", "Speech", "Dog"], ["0.0", "0.0", "1.976", "0.858", "3.22", "5.567", "9.622", "4.882"], ["0.535", "0.543", "2.685", "1.283", "4.677", "6.646", "9.843", "5.252"], ["whimpering", "speaking", "speaking", "whimpering", "speaking", "speaking", "speaking", "whimpering"]]}
{"captions": "People talk and run in a busy road environment.", "data": [["Conversation", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["3.094", "4.134", "5.307", "5.921", "7.071", "7.567", "8.764"], ["4.087", "4.646", "5.551", "6.827", "7.465", "7.992", "9.071"], ["talk", "talk", "talk", "talk", "talk", "talk", "talk"]]}
{"captions": "Conversations and busy signals fill the air.", "data": [["Conversation", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.541", "3.266", "3.795", "7.271", "8.656", "9.912"], ["3.028", "3.741", "7.149", "8.547", "9.742", "10.0"], ["Conversations", "Conversations", "Conversations", "Conversations", "Conversations", "Conversations"]]}
{"captions": "Men and women are speaking, having phone conversations, tapping, and talking.", "data": [["Male speech, man speaking", "Conversation", "Female speech, woman speaking", "Conversation", "Telephone bell ringing"], ["0.0", "5.544", "6.603", "8.723", "4.103"], ["3.342", "5.861", "8.188", "10.0", "6.395"], ["speaking", "talking", "talking", "talking", "tapping"]]}
{"captions": "An alarm and beeping sounds alternate with music and ratcheting noises.", "data": [["Alarm"], ["0.0"], ["0.906"], ["beeping"]]}
{"captions": "A motor vehicle honks, children and a man speak, and a revving engine is heard intermittently.", "data": [["Male speech, man speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Male speech, man speaking", "Child speech, kid speaking", "Child speech, kid speaking"], ["3.661", "4.52", "6.409", "6.969", "8.291", "9.504"], ["4.488", "5.307", "6.843", "7.583", "9.024", "10.0"], ["speak", "children", "children", "speak", "children", "children"]]}
{"captions": "A blender and mechanisms can be heard in a small room.", "data": [["Blender"], ["0.0"], ["10.0"], ["blender"]]}
{"captions": "Men are speaking, with buzzing and bird songs also heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.219", "7.014"], ["1.915", "5.635", "9.698"], ["speaking", "speaking", "speaking"]]}
{"captions": "A woman and a man are having a conversation with tapping sounds.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "1.685", "3.173", "4.709", "5.386", "6.898", "7.764", "9.449"], ["1.323", "2.945", "4.512", "5.228", "5.827", "7.417", "9.165", "10.0"], ["woman", "woman", "woman", "woman", "woman", "woman", "woman", "woman"]]}
{"captions": "Electric shaver and man speaking in a small room, with speech and man speaking can be heard.", "data": [["Speech", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Speech", "Electric shaver, electric razor", "Male speech, man speaking"], ["2.882", "5.37", "7.079", "8.409", "9.307", "1.323", "9.803"], ["3.268", "6.039", "7.835", "8.961", "9.709", "10.0", "10.0"], ["speaking", "speech", "speech", "speech", "speaking", "shaver", "speech"]]}
{"captions": "Men are speaking and cooking food in a small room with sizzling sounds.", "data": [["Speech", "Male speech, man speaking", "Speech", "Speech", "Speech", "Speech", "Male speech, man speaking", "Sizzle"], ["0.0", "0.728", "4.796", "5.507", "6.713", "7.515", "9.777", "0.005"], ["0.382", "4.461", "5.199", "6.495", "7.371", "8.651", "10.0", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "cooking"]]}
{"captions": "A man and woman are speaking with hubbub and more men speaking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.188", "4.185", "6.382", "8.187"], ["1.937", "3.938", "5.546", "7.972", "9.516"], ["hubbub", "hubbub", "hubbub", "hubbub", "hubbub"]]}
{"captions": "Waves are crashing and people are speaking and making noise with wind noise in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking"], ["0.079", "6.047", "6.85", "8.244"], ["0.937", "6.236", "8.165", "8.575"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Mechanisms and cupboards make noise while a man speaks.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["4.257", "8.683", "9.342"], ["5.119", "8.955", "9.837"], ["speaks", "speaks", "speaks"]]}
{"captions": "People laugh, talk, and make specific impact sounds while a television plays.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.179", "2.439", "8.653", "9.156"], ["2.046", "3.185", "8.994", "10.0"], ["talk", "talk", "talk", "talk"]]}
{"captions": "Men shout and speak several times.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.111", "3.338", "4.134"], ["0.582", "3.072", "3.793", "4.564"], ["shout", "shout", "shout", "shout"]]}
{"captions": "Keys jangle, doors open and close, people speak, and a fire alarm goes off.", "data": [["Speech", "Female speech, woman speaking", "Fire alarm"], ["4.0", "9.701", "8.26"], ["6.039", "10.0", "9.173"], ["speak", "speak", "jangle"]]}
{"captions": "Women speak amidst background noise and music.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["3.672", "7.039", "8.858"], ["6.142", "7.835", "9.449"], ["speak", "speak", "speak"]]}
{"captions": "An electric shaver is heard, with a car passing by.", "data": [["Electric shaver, electric razor"], ["0.0"], ["10.0"], ["shaver"]]}
{"captions": "A bathtub is being filled or washed.", "data": [["Bathtub (filling or washing)", "Bathtub (filling or washing)"], ["0.0", "6.748"], ["6.598", "10.0"], ["filled", "filled"]]}
{"captions": "Men are speaking and laughing with crickets chirping and a firecracker going off.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.654", "3.354", "5.654", "6.315", "9.293"], ["2.52", "3.268", "5.394", "6.087", "8.622", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A large group of people are laughing, clapping, shouting, and having a conversation in a large room or hall.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.503", "7.638", "9.606"], ["6.877", "7.834", "10.0"], ["conversation", "conversation", "conversation"]]}
{"captions": "People are speaking, laughing, and kids are speaking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Speech"], ["1.15", "3.654", "5.591", "7.559", "8.362"], ["2.299", "5.496", "7.197", "8.165", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Men are speaking and making various sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Electric shaver, electric razor"], ["0.0", "2.258", "4.475", "5.882", "9.794", "6.898"], ["1.606", "2.78", "4.99", "7.049", "10.0", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "sounds"]]}
{"captions": "Wind noise and tick sounds mix with cat meows, mechanisms, and sound effects.", "data": [["Cat", "Cat"], ["0.945", "2.528"], ["1.283", "3.173"], ["meows", "meows"]]}
{"captions": "People whistle, breathe, and speak, with various whistling sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["2.066", "4.987", "6.601", "7.334", "7.961", "8.922"], ["4.801", "6.463", "7.047", "7.791", "8.768", "9.745"], ["speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "A doorbell rings multiple times while a dog howls in the background.", "data": [["Dog", "Dog"], ["0.0", "5.583"], ["1.591", "7.795"], ["howls", "howls"]]}
{"captions": "Engine and wind sounds, speech from children and adults, and aircraft engine sounds are heard.", "data": [["Child speech, kid speaking", "Child speech, kid speaking", "Speech", "Child speech, kid speaking", "Speech", "Child speech, kid speaking", "Speech", "Child speech, kid speaking", "Speech"], ["2.023", "2.512", "3.038", "3.914", "4.397", "5.358", "6.267", "7.074", "9.246"], ["2.331", "2.841", "3.383", "4.19", "4.849", "5.943", "6.718", "7.355", "9.687"], ["children", "children", "children", "children", "children", "children", "children", "children", "children"]]}
{"captions": "A man is speaking and using an electric shaver.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Electric shaver, electric razor"], ["0.0", "5.843", "1.717"], ["1.039", "10.0", "5.819"], ["speaking", "speaking", "shaver"]]}
{"captions": "Mechanisms produce beeping sounds and fire alarms go off.", "data": [["Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm"], ["2.397", "3.347", "4.379", "6.334", "7.346", "8.357"], ["2.994", "4.026", "5.024", "7.006", "8.004", "9.002"], ["beeping", "beeping", "beeping", "beeping", "beeping", "beeping"]]}
{"captions": "Food is being fried and dishes are being handled, with a woman speaking and pouring sounds.", "data": [["Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Female speech, woman speaking", "Female speech, woman speaking", "Frying (food)", "Dishes, pots, and pans", "Female speech, woman speaking"], ["1.354", "3.457", "4.146", "4.853", "2.15", "6.701", "0.0", "5.346", "9.717"], ["1.78", "3.701", "4.372", "4.983", "2.827", "9.008", "6.551", "6.504", "10.0"], ["handled", "handled", "handled", "handled", "speaking", "speaking", "fried", "handled", "speaking"]]}
{"captions": "Conversations and speeches with laughter.", "data": [["Conversation", "Speech", "Speech", "Male speech, man speaking"], ["0.0", "2.945", "4.63", "5.772"], ["1.646", "4.378", "5.528", "8.244"], ["laughter", "laughter", "laughter", "speech"]]}
{"captions": "Music plays with intermittent alarms and female speech.", "data": [["Female speech, woman speaking", "Alarm", "Female speech, woman speaking", "Alarm", "Alarm", "Alarm", "Alarm", "Alarm", "Alarm", "Female speech, woman speaking", "Alarm", "Female speech, woman speaking", "Alarm", "Alarm", "Alarm", "Female speech, woman speaking"], ["0.197", "0.005", "0.827", "0.854", "1.8", "2.705", "3.664", "4.623", "5.532", "1.243", "6.519", "7.054", "7.419", "8.337", "9.301", "7.862"], ["0.489", "0.402", "1.005", "1.54", "2.335", "3.184", "4.555", "5.464", "6.364", "1.444", "7.314", "7.337", "8.173", "9.027", "9.918", "10.0"], ["speech", "alarms", "speech", "alarms", "alarms", "alarms", "alarms", "alarms", "alarms", "speech", "alarms", "speech", "alarms", "alarms", "alarms", "speech"]]}
{"captions": "People are talking and a woman is speaking, crying and shouting.", "data": [["Conversation", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "5.182", "6.427", "7.778", "9.138"], ["4.64", "6.267", "7.28", "8.107", "10.0"], ["talking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Men are speaking and creaking is heard in the background.", "data": [["Speech", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.842", "2.787", "3.799", "4.243", "5.548", "5.933", "8.127", "9.145"], ["0.359", "2.324", "3.303", "4.158", "4.778", "5.816", "7.709", "8.577", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "People are speaking, pets meowing and caterwauling, in a small room.", "data": [["Male speech, man speaking", "Cat", "Speech", "Cat", "Male speech, man speaking", "Cat"], ["0.008", "0.906", "2.764", "5.378", "6.039", "7.559"], ["2.425", "3.276", "4.394", "6.346", "7.331", "10.0"], ["speaking", "caterwauling", "speaking", "caterwauling", "speaking", "caterwauling"]]}
{"captions": "Music plays as a man speaks, knocks are heard, and more men speak.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.816", "3.193", "5.315", "7.953"], ["2.313", "3.742", "6.923", "9.346"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "A woman is speaking, clicking and typing on a computer keyboard with music in the background.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.005", "0.643", "1.885", "4.227", "5.486", "5.751"], ["0.335", "1.529", "2.825", "4.615", "5.666", "6.41"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Children are speaking with intermittent ticking and surface contact sounds.", "data": [["Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking"], ["0.0", "1.538", "2.555", "3.636", "4.913", "6.399", "7.509", "8.786"], ["0.809", "2.075", "3.416", "4.538", "6.023", "7.312", "8.353", "9.746"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Radio and conversation noise mix with a cacophony while people talk and listen to radio stations.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "3.299", "5.031", "5.677", "7.598", "9.921"], ["2.236", "4.378", "5.449", "6.906", "9.323", "10.0"], ["conversation", "conversation", "conversation", "conversation", "conversation", "conversation"]]}
{"captions": "Children are speaking, music is playing, and a woman is laughing.", "data": [["Conversation", "Speech", "Child speech, kid speaking", "Child speech, kid speaking", "Female speech, woman speaking"], ["0.0", "2.283", "3.89", "6.953", "8.929"], ["1.929", "3.378", "5.378", "8.496", "10.0"], ["speaking", "speaking", "speaking", "speaking", "laughing"]]}
{"captions": "People are talking and laughing, with an ambulance siren in the background.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking"], ["0.0", "3.724", "5.378", "9.071"], ["3.63", "4.472", "8.709", "10.0"], ["talking", "talking", "talking", "talking"]]}
{"captions": "Unknown sounds and clicking can be heard with a woman speaking and a smoke alarm going off.", "data": [["Female speech, woman speaking", "Smoke detector, smoke alarm"], ["2.307", "5.551"], ["4.094", "10.0"], ["speaking", "smoke alarm"]]}
{"captions": "A man is speaking, and coins and mechanisms sounds are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.961", "3.409", "5.685", "7.693"], ["0.654", "2.622", "5.11", "6.969", "8.787"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man is speaking and mechanisms and a vacuum cleaner are heard.", "data": [["Male speech, man speaking", "Vacuum cleaner"], ["3.564", "5.472"], ["4.189", "10.0"], ["speaking", "vacuum"]]}
{"captions": "Music accompanies male speech, tapping, tool noises, and more male speech.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.128", "0.448", "1.636", "1.951", "2.444", "5.19", "5.478", "6.46", "7.172", "7.684", "8.094"], ["0.32", "1.512", "1.859", "2.348", "3.118", "5.391", "6.286", "7.016", "7.615", "7.844", "8.843"], ["speech", "speech", "speech", "speech", "speech", "speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Background noise with a woman speaking and human sounds heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["2.197", "4.039", "5.614", "7.213", "9.055"], ["2.606", "4.882", "6.094", "7.858", "9.386"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Sounds of music and speech with male speeches.", "data": [["Speech", "Male speech, man speaking", "Speech"], ["0.17", "3.938", "8.574"], ["1.392", "4.474", "9.192"], ["speeches", "speeches", "speeches"]]}
{"captions": "A chime and ringtone are heard.", "data": [["Ringtone"], ["2.543"], ["10.0"], ["chime"]]}
{"captions": "People are talking, laughing, and clapping in a crowded room.", "data": [["Conversation", "Female speech, woman speaking", "Male speech, man speaking"], ["4.913", "6.354", "7.929"], ["5.913", "6.701", "10.0"], ["talking", "talking", "talking"]]}
{"captions": "A man speaks repeatedly interrupted by ticking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.52", "1.559", "2.811", "3.189", "4.811", "5.11", "5.449", "5.795", "6.071", "6.441", "6.701", "7.614", "8.354"], ["0.425", "0.654", "2.15", "3.024", "3.591", "5.016", "5.315", "5.622", "5.937", "6.291", "6.598", "7.52", "7.764", "9.26"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Alarm clocks and wails are ringing with music and alarms.", "data": [["Alarm clock", "Alarm", "Alarm", "Alarm clock", "Alarm", "Alarm", "Alarm clock", "Alarm clock", "Alarm clock", "Alarm clock", "Alarm", "Alarm clock", "Alarm clock", "Alarm clock", "Alarm clock", "Alarm", "Alarm clock", "Alarm clock", "Alarm clock", "Alarm clock"], ["0.866", "1.37", "1.85", "2.323", "2.787", "3.26", "3.764", "4.094", "4.709", "5.15", "5.669", "6.11", "6.583", "7.031", "7.48", "8.0", "8.488", "9.031", "9.465", "9.898"], ["1.26", "1.717", "2.189", "2.661", "3.134", "3.622", "4.031", "4.543", "5.079", "5.535", "5.976", "6.504", "6.976", "7.441", "7.937", "8.362", "8.976", "9.402", "9.835", "10.0"], ["alarms", "wails", "wails", "alarms", "wails", "wails", "alarms", "alarms", "alarms", "alarms", "wails", "alarms", "alarms", "alarms", "alarms", "wails", "alarms", "alarms", "alarms", "alarms"]]}
{"captions": "A man is sanding and speaking, with mechanisms in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.441", "5.78", "7.835", "8.85"], ["1.244", "2.22", "7.307", "8.756", "9.78"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A mechanical fan is heard followed by several instances of male speech and spray.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.008", "1.189", "2.827", "5.079", "7.701", "8.197", "9.228"], ["1.0", "1.496", "4.646", "6.528", "8.063", "8.441", "9.858"], ["speech", "speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "A man is speaking in a car and the power windows make a ticking sound.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["0.0", "8.228"], ["4.0", "10.0"], ["speaking", "speaking"]]}
{"captions": "Men speak and tap in a noisy environment.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.874", "7.961", "8.417"], ["0.614", "6.283", "8.15", "8.646"], ["speak", "speak", "speak", "speak"]]}
{"captions": "A man speaks with the sound of barking dogs in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.365", "4.681", "8.02"], ["0.124", "2.825", "6.139", "10.0"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Music and sizzling sounds accompany a man singing and speaking, and slapping is heard.", "data": [["Sizzle", "Male speech, man speaking"], ["0.024", "5.693"], ["9.969", "5.913"], ["sizzling", "singing"]]}
{"captions": "A man is speaking with background noise and a train whistle.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.063", "1.488", "5.661", "7.394", "9.118"], ["1.11", "2.575", "7.016", "7.921", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Alarms, beeps, and smoke detectors are making noises.", "data": [["Smoke detector, smoke alarm", "Smoke detector, smoke alarm", "Alarm", "Smoke detector, smoke alarm", "Alarm"], ["3.772", "4.748", "5.724", "7.685", "8.701"], ["4.268", "5.26", "6.244", "8.236", "9.205"], ["Alarms", "Alarms", "Alarms", "Alarms", "Alarms"]]}
{"captions": "A woman is speaking and a caterwaul is heard in the background with mechanisms.", "data": [["Female speech, woman speaking"], ["1.005"], ["4.266"], ["speaking"]]}
{"captions": "People are speaking and laughing in a public space with a fart.", "data": [["Female speech, woman speaking", "Male speech, man speaking"], ["1.093", "7.733"], ["2.234", "8.975"], ["speaking", "speaking"]]}
{"captions": "People are speaking, narrating, and giving speeches.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Speech", "Male speech, man speaking", "Male speech, man speaking"], ["0.008", "1.913", "5.087", "8.031", "9.724"], ["1.071", "4.48", "6.661", "8.85", "10.0"], ["speaking", "speaking", "speeches", "speaking", "speaking"]]}
{"captions": "Fan noise and women's voices and giggles can be heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["3.326", "4.853", "5.298", "6.362", "6.998", "9.179"], ["4.685", "5.061", "6.171", "6.935", "7.582", "9.711"], ["voices", "voices", "voices", "voices", "voices", "voices"]]}
{"captions": "A man speaks as a snake hisses and rattles.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.822", "2.437", "6.952", "7.441", "7.889", "8.771", "9.742"], ["0.306", "1.806", "2.98", "7.156", "7.746", "8.35", "9.375", "10.0"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "People are having a conversation, music is playing and rodents are heard in the background.", "data": [["Conversation", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.197", "5.283"], ["3.087", "4.323", "10.0"], ["conversation", "conversation", "conversation"]]}
{"captions": "Speech and conversation occur with mechanisms.", "data": [["Female speech, woman speaking", "Speech", "Conversation"], ["0.0", "4.318", "8.092"], ["1.134", "4.895", "9.328"], ["mechanisms", "Speech", "mechanisms"]]}
{"captions": "Male speech accompanies pulleys and mechanisms.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.006", "2.289", "4.197", "6.936", "9.543"], ["0.549", "2.168", "3.844", "6.567", "9.236", "10.0"], ["speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "An alarm clock is heard, alternating between ringing and silence.", "data": [["Alarm", "Alarm", "Alarm", "Alarm clock", "Alarm clock", "Alarm", "Alarm clock", "Alarm", "Alarm clock", "Alarm"], ["0.0", "0.632", "1.627", "2.622", "3.638", "4.633", "5.649", "6.637", "7.632", "8.648"], ["0.089", "1.16", "2.19", "3.171", "4.214", "5.196", "6.198", "7.172", "8.168", "9.211"], ["ringing", "ringing", "ringing", "ringing", "ringing", "ringing", "ringing", "ringing", "ringing", "ringing"]]}
{"captions": "Stirring and tapping sounds are heard, along with female voices.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Sizzle", "Female speech, woman speaking"], ["5.09", "6.773", "8.832", "0.0", "9.433"], ["6.356", "7.987", "9.277", "10.0", "9.884"], ["voices", "voices", "voices", "Stirring", "voices"]]}
{"captions": "Man speaking, air horn, truck horn, tapping, and man speaking can be heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.614", "3.134", "9.378"], ["0.449", "2.984", "7.433", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Machine guns, beeps, human voices, and tapping can be heard amid sound effects and music.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["3.716", "4.617", "9.387"], ["4.034", "8.984", "9.98"], ["voices", "voices", "voices"]]}
{"captions": "A man is speaking in conversation with music, a blender, and more male speech.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Blender"], ["0.0", "3.827", "5.787", "7.669", "9.039", "0.819"], ["3.701", "5.228", "7.472", "8.803", "10.0", "3.26"], ["conversation", "conversation", "conversation", "conversation", "conversation", "blender"]]}
{"captions": "Animals and dogs are barking in the background.", "data": [["Dog", "Dog", "Dog"], ["0.007", "1.358", "4.881"], ["0.951", "4.039", "10.0"], ["barking", "barking", "barking"]]}
{"captions": "Music, conversation, and speech by men and women are heard.", "data": [["Conversation", "Conversation"], ["1.188", "4.426"], ["3.117", "5.236"], ["speech", "speech"]]}
{"captions": "A conversation is taking place with background noise, a man and woman are speaking, a child is crying, and people are laughing.", "data": [["Conversation", "Female speech, woman speaking", "Male speech, man speaking"], ["0.0", "8.787", "9.417"], ["3.843", "9.213", "10.0"], ["speaking", "woman", "man"]]}
{"captions": "A man is speaking and mechanisms are clicking and drilling.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.953", "2.701"], ["1.567", "2.551", "5.685"], ["speaking", "speaking", "speaking"]]}
{"captions": "A mains hum is heard along with a smoke alarm and male speech.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Smoke detector, smoke alarm", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Smoke detector, smoke alarm"], ["0.082", "2.732", "3.717", "4.688", "5.433", "6.354", "7.772", "9.031", "4.858"], ["2.093", "3.22", "4.346", "4.887", "6.087", "7.283", "8.614", "10.0", "5.472"], ["speech", "speech", "alarm", "speech", "speech", "speech", "speech", "speech", "alarm"]]}
{"captions": "A man is speaking and insects are chirping.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.898", "5.778", "7.056"], ["1.122", "4.257", "6.645", "7.345"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man is speaking, washing glasses, and using cutlery while water runs and taps are turned on and off.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Sink (filling or washing)", "Male speech, man speaking", "Cutlery, silverware"], ["0.008", "2.197", "3.961", "4.984", "6.425", "0.0", "7.268", "9.709"], ["1.827", "3.039", "4.488", "5.693", "7.126", "10.0", "8.906", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "washing", "speaking", "cutlery"]]}
{"captions": "A woman is speaking, typing, and breathing.", "data": [["Female speech, woman speaking", "Speech", "Female speech, woman speaking", "Speech", "Female speech, woman speaking"], ["0.0", "2.213", "4.976", "5.386", "6.913"], ["2.063", "3.22", "5.268", "6.236", "9.22"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Music, female speech, frying sounds, and dishes clanging are heard in a small room.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Sizzle", "Dishes, pots, and pans"], ["1.229", "1.786", "5.459", "6.515", "7.917", "5.299", "8.913"], ["1.471", "1.983", "6.108", "7.711", "8.246", "10.0", "10.0"], ["speech", "speech", "speech", "speech", "speech", "frying", "clanging"]]}
{"captions": "Unmodified field recording with pig sounds, man speaking, and breathing.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Speech"], ["0.839", "1.161", "1.524", "4.154"], ["1.007", "1.329", "1.734", "4.755"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man is speaking with background noise, child speech, chuckling, shouting, and more male speech.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.553", "5.458", "6.456", "8.507", "9.912"], ["1.473", "4.813", "6.137", "8.185", "9.043", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Liquid is being filled and pumped, making gushing and flowing water sounds.", "data": [["Water tap, faucet"], ["0.0"], ["10.0"], ["gushing"]]}
{"captions": "Distorted speech and sounds of a motorboat engine can be heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.866", "5.22", "6.717", "8.496"], ["0.441", "3.205", "5.606", "7.874", "10.0"], ["speech", "speech", "speech", "speech", "speech"]]}
{"captions": "An alarm, fire alarm, beeping sounds, and distortion are heard.", "data": [["Alarm"], ["0.0"], ["10.0"], ["beeping"]]}
{"captions": "Hubbub, thumping, clicking, ticking, and people speaking and laughing are heard.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.961", "3.363", "4.283", "6.622", "7.449"], ["2.142", "4.187", "6.071", "7.118", "9.06"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man speaks while a mechanical fan and wind noise sound.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "6.128", "8.858"], ["0.529", "7.697", "10.0"], ["man", "man", "man"]]}
{"captions": "A foghorn, ship sounds, and men speaking with mechanisms are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.345", "3.526", "4.036", "5.863", "7.419", "8.481", "8.959"], ["0.722", "3.882", "4.249", "6.612", "7.918", "8.662", "9.241"], ["mechanisms", "mechanisms", "mechanisms", "mechanisms", "mechanisms", "mechanisms", "mechanisms"]]}
{"captions": "Women are speaking and kitchen appliances are running in a room.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Blender"], ["0.0", "5.157", "9.272", "2.366"], ["3.8", "5.692", "10.0", "10.0"], ["speaking", "speaking", "speaking", "appliances"]]}
{"captions": "Water is boiling and a faucet is running.", "data": [["Water tap, faucet"], ["0.0"], ["10.0"], ["faucet"]]}
{"captions": "Scissors and speech are heard with sound effects and music.", "data": [["Speech", "Speech", "Speech", "Speech", "Speech", "Speech", "Speech"], ["7.071", "8.165", "8.472", "8.724", "9.157", "9.465", "9.756"], ["7.646", "8.276", "8.591", "8.976", "9.291", "9.591", "10.0"], ["speech", "speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Music is playing and women and children are speaking, while water is gushing and bells are ringing.", "data": [["Female speech, woman speaking", "Child speech, kid speaking"], ["0.008", "1.969"], ["1.701", "2.276"], ["speaking", "speaking"]]}
{"captions": "A man speaks in a small room with clicking and breathing sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.087", "4.196", "7.191"], ["0.671", "2.315", "5.822", "8.357"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Cars are accelerating and a man is speaking.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["0.591", "6.735"], ["0.93", "7.101"], ["speaking", "speaking"]]}
{"captions": "Dogs and other domestic animals growl and howl.", "data": [["Dog", "Dog", "Dog", "Dog"], ["0.709", "4.0", "6.764", "8.52"], ["2.811", "6.346", "8.386", "9.441"], ["growl", "growl", "growl", "growl"]]}
{"captions": "Wind noise is heard, a man is speaking, ticking and gurgling sounds are heard, footsteps are walking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.016", "3.117", "6.017", "8.646"], ["2.554", "4.785", "6.845", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Gunshots and footsteps are heard with male speech and speech in the background.", "data": [["Male speech, man speaking", "Speech"], ["3.622", "8.425"], ["5.173", "10.0"], ["speech", "speech"]]}
{"captions": "A man is speaking, breathing, making beeps and honking sounds, and laughing.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.406", "4.228", "5.495", "7.837", "8.485", "9.196"], ["2.053", "4.106", "5.119", "6.57", "7.987", "8.988", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Men talk and write while music plays.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["0.007", "7.807"], ["5.574", "10.0"], ["talk", "talk"]]}
{"captions": "Men and women are speaking, clicking, and tapping with a mains hum in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking"], ["0.0", "1.263", "1.888", "3.452", "4.091", "5.896", "7.014", "8.257"], ["1.064", "1.716", "3.267", "3.871", "5.697", "6.706", "7.769", "8.71"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Computer keyboards click while males speak, with the occasional camera click and human sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.016", "0.36", "1.341", "1.659", "2.799", "4.028", "4.638"], ["0.196", "1.145", "1.42", "2.332", "3.859", "4.304", "5.767"], ["speak", "speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "Sizzling and tapping sounds are heard alongside a man's speech, and dishes clatter in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Dishes, pots, and pans", "Male speech, man speaking", "Male speech, man speaking", "Sizzle", "Male speech, man speaking", "Dishes, pots, and pans"], ["0.512", "1.827", "3.134", "4.89", "5.528", "6.299", "7.937", "0.0", "9.614", "6.614"], ["1.031", "2.331", "4.047", "5.118", "5.819", "6.811", "8.646", "10.0", "10.0", "7.11"], ["speech", "speech", "speech", "speech", "clatter", "speech", "speech", "Sizzling", "speech", "clatter"]]}
{"captions": "Dogs and other domestic animals are barking and making noise.", "data": [["Dog", "Dog", "Dog", "Dog", "Dog", "Dog"], ["0.362", "2.472", "3.606", "5.079", "6.197", "8.858"], ["0.669", "3.157", "4.268", "5.307", "6.543", "9.622"], ["barking", "barking", "barking", "barking", "barking", "barking"]]}
{"captions": "A man is cooking food and speaking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Sizzle", "Male speech, man speaking"], ["0.318", "2.96", "4.619", "5.642", "0.0", "7.925"], ["1.803", "4.312", "5.312", "6.803", "10.0", "10.0"], ["speaking", "speaking", "speaking", "speaking", "cooking", "speaking"]]}
{"captions": "People are clapping, speaking and laughing in a large room or hall.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Speech", "Speech", "Speech", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.497", "1.798", "2.919", "4.081", "6.942", "8.093", "9.318"], ["1.295", "2.798", "3.838", "6.168", "7.497", "8.983", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "People are speaking, whispering, and calling birds in the windy, rural area.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.179", "1.994", "6.243"], ["1.89", "2.549", "7.382"], ["speaking", "speaking", "speaking"]]}
{"captions": "A cash register, mechanisms, tapping, tearing, clicking and a man speaking are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.039", "1.646", "4.15", "6.646", "7.181", "8.276"], ["0.512", "1.535", "3.142", "4.63", "7.087", "7.795", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man is speaking and using a telephone while breathing.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Telephone bell ringing"], ["0.0", "2.274", "3.614"], ["1.803", "4.085", "5.435"], ["speaking", "speaking", "speaking"]]}
{"captions": "A man speaks, followed by bird sounds and clicking, then speech and more bird sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Speech"], ["0.0", "1.49", "3.641", "4.301", "8.63"], ["1.171", "3.371", "4.053", "5.082", "10.0"], ["man", "man", "man", "man", "man"]]}
{"captions": "Crickets chirp as a human voice and whack sound is heard.", "data": [["Female speech, woman speaking"], ["5.107"], ["6.405"], ["voice"]]}
{"captions": "Female speech, conversation, surface contact, thumping, creaking, tapping, breathing, giggling sounds are heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.055", "2.024", "2.378", "4.433", "8.134"], ["1.953", "2.26", "2.913", "5.126", "9.286"], ["conversation", "conversation", "conversation", "conversation", "conversation"]]}
{"captions": "A child speaks and flapping and barking sounds are heard.", "data": [["Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Dog"], ["0.0", "6.961", "8.559", "0.299"], ["0.772", "7.961", "8.85", "0.528"], ["speaks", "speaks", "speaks", "barking"]]}
{"captions": "A room is humming as a woman speaks and makes squishing sounds.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.094", "0.921", "2.134", "3.252", "4.567", "7.748", "8.205", "9.252", "9.724"], ["0.685", "1.315", "3.134", "4.346", "5.858", "7.961", "8.874", "9.535", "9.913"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Children speak and shout while cutlery clinks.", "data": [["Child speech, kid speaking", "Child speech, kid speaking", "Male speech, man speaking", "Cutlery, silverware", "Child speech, kid speaking", "Cutlery, silverware", "Female speech, woman speaking", "Child speech, kid speaking", "Cutlery, silverware"], ["0.0", "3.389", "5.362", "6.807", "7.207", "7.577", "8.091", "9.115", "8.334"], ["3.008", "5.257", "7.149", "7.033", "7.958", "7.831", "8.612", "9.844", "9.167"], ["speak", "speak", "speak", "clinks", "speak", "clinks", "speak", "speak", "clinks"]]}
{"captions": "A fire alarm sounds and children shout while footsteps and female speech can be heard.", "data": [["Fire alarm", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Fire alarm", "Fire alarm", "Female speech, woman speaking"], ["0.0", "0.795", "3.058", "4.017", "3.001", "7.346", "6.559"], ["1.731", "2.457", "3.902", "6.386", "6.803", "10.0", "10.0"], ["sounds", "children", "children", "children", "sounds", "sounds", "speech"]]}
{"captions": "Music is playing followed by a telephone ringing and people speaking.", "data": [["Telephone bell ringing", "Female speech, woman speaking"], ["6.921", "9.575"], ["9.047", "10.0"], ["telephone", "speaking"]]}
{"captions": "A man is speaking, using mechanisms, and making unknown sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Electric shaver, electric razor"], ["0.0", "3.425", "9.235", "2.576"], ["2.448", "5.56", "9.809", "10.0"], ["speaking", "speaking", "speaking", "mechanisms"]]}
{"captions": "Music and speech alternate as a man speaks.", "data": [["Speech", "Male speech, man speaking", "Male speech, man speaking", "Speech", "Male speech, man speaking"], ["0.0", "1.307", "3.213", "4.031", "7.496"], ["1.283", "3.094", "4.008", "7.457", "10.0"], ["speech", "man", "man", "speech", "man"]]}
{"captions": "Bells, church bells, whispering, and speech can be heard.", "data": [["Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.654", "3.118", "5.465"], ["2.654", "4.677", "6.409"], ["speech", "speech", "speech"]]}
{"captions": "A man speaks, a skateboard is heard, people converse, and a car and footsteps are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.646", "2.646", "4.606"], ["0.315", "2.551", "4.252", "7.094"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "People are speaking, music plays, a man sings, dogs howl, and wild dogs bark.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Dog"], ["0.0", "4.567", "7.252", "8.969"], ["0.457", "5.543", "7.929", "10.0"], ["speaking", "speaking", "speaking", "howl"]]}
{"captions": "People are speaking in a small room with shuffling sounds.", "data": [["Speech", "Speech", "Speech", "Speech", "Male speech, man speaking", "Speech", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.93", "1.799", "3.578", "6.748", "7.366", "7.977", "8.52", "9.131"], ["0.815", "1.602", "3.164", "5.581", "7.257", "7.848", "8.357", "9.002", "9.633"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Mechanisms and a man speaking are heard, along with shuffling cards sounds.", "data": [["Male speech, man speaking"], ["8.976"], ["9.659"], ["speaking"]]}
{"captions": "Sheep bleat and a man speaks while dogs bark and whine.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Speech", "Dog"], ["1.29", "2.804", "4.08", "8.914"], ["2.308", "3.49", "9.009", "9.681"], ["speaks", "speaks", "speaks", "bark"]]}
{"captions": "A man speaks and artillery fire and explosions are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.407", "2.458", "4.684", "8.126", "9.036"], ["2.193", "3.666", "6.063", "8.805", "9.837"], ["speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Music, sound effects, breaking, and speech are heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.787", "2.528", "6.606", "9.291"], ["2.236", "3.638", "9.024", "10.0"], ["speech", "speech", "speech", "speech"]]}
{"captions": "A man is speaking, breathing, writing, and making noises in a quiet room with a sine wave sound.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.249", "3.401", "6.056", "9.056"], ["0.971", "3.126", "5.356", "7.834", "9.905"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Male speeches and music are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.63", "4.882", "6.402", "9.126"], ["3.575", "5.898", "8.654", "10.0"], ["speeches", "speeches", "speeches", "speeches"]]}
{"captions": "Mechanisms tick while a woman whispers and breathes.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.18", "1.816", "3.279", "3.703", "4.193", "6.455", "7.75", "9.306", "9.699"], ["1.677", "2.123", "3.615", "3.898", "4.667", "7.49", "8.305", "9.549", "10.0"], ["whispers", "whispers", "whispers", "whispers", "whispers", "whispers", "whispers", "whispers", "whispers"]]}
{"captions": "A vacuum cleaner is running.", "data": [["Vacuum cleaner"], ["0.03"], ["10.0"], ["running"]]}
{"captions": "People speak, cry, laugh, converse, and make various sounds.", "data": [["Speech", "Female speech, woman speaking", "Female speech, woman speaking", "Conversation"], ["0.0", "0.724", "2.087", "8.78"], ["0.543", "1.669", "6.433", "10.0"], ["converse", "speak", "speak", "converse"]]}
{"captions": "People are singing, speaking and splashing in a bathtub.", "data": [["Female speech, woman speaking", "Bathtub (filling or washing)"], ["0.0", "1.606"], ["0.748", "10.0"], ["speaking", "splashing"]]}
{"captions": "People are walking, speaking, whistling, and laughing with wind noise in the background.", "data": [["Female speech, woman speaking", "Speech", "Male speech, man speaking"], ["0.0", "2.008", "9.449"], ["0.787", "2.85", "10.0"], ["speaking", "speaking", "speaking"]]}
{"captions": "A woman is speaking while mechanisms and a boing sound are heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.169", "2.705", "3.157", "7.831"], ["1.937", "3.002", "6.501", "8.16"], ["woman", "woman", "woman", "woman"]]}
{"captions": "A woman is speaking and eating in a small room with background monologues.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "0.618", "2.322", "7.868"], ["0.469", "1.093", "3.021", "9.029"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Food fries, dishes clank, and a woman speaks in a small room.", "data": [["Dishes, pots, and pans", "Female speech, woman speaking", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Sizzle", "Female speech, woman speaking", "Dishes, pots, and pans"], ["1.472", "1.024", "3.079", "6.362", "8.685", "0.0", "4.205", "9.606"], ["1.874", "2.787", "3.724", "6.606", "8.858", "10.0", "5.055", "10.0"], ["clank", "speaks", "clank", "clank", "clank", "fries", "speaks", "clank"]]}
{"captions": "Crows caw and a man speaks with wind in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["4.354", "6.606"], ["6.307", "9.709"], ["speaks", "speaks"]]}
{"captions": "Scraping tools, tapping, fire, and hammering are heard, as well as a man speaking.", "data": [["Male speech, man speaking"], ["9.189"], ["10.0"], ["speaking"]]}
{"captions": "A group of people is speaking, knocking, and chirping, with music playing in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.583", "5.858", "9.055"], ["3.992", "8.606", "9.606"], ["speaking", "speaking", "speaking"]]}
{"captions": "Liquid is filling a container as water flows from a tap.", "data": [["Water tap, faucet"], ["0.005"], ["10.0"], ["tap"]]}
{"captions": "A woman is speaking, with panting and animal sounds in the background.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.306", "1.154", "1.799", "3.958", "5.452", "7.8", "9.233", "9.498"], ["0.971", "1.331", "2.288", "4.175", "5.581", "7.889", "9.375", "9.674"], ["woman", "woman", "woman", "woman", "woman", "woman", "woman", "woman"]]}
{"captions": "Women speak and laugh, a dog barks and growls.", "data": [["Female speech, woman speaking", "Dog", "Female speech, woman speaking", "Dog"], ["1.661", "4.535", "3.268", "9.354"], ["3.0", "7.685", "3.787", "10.0"], ["speak", "barks", "speak", "barks"]]}
{"captions": "A male speaker converses and dials a phone, followed by female speech.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking"], ["0.226", "3.013", "4.656", "8.658"], ["2.776", "4.072", "5.003", "10.0"], ["speaker", "speaker", "speaker", "female"]]}
{"captions": "A man speaking, mechanisms whirring, synthesized speech and laughter, breathing and clicking sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "5.898", "7.386", "9.276", "9.913"], ["1.26", "7.031", "8.803", "9.701", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man speaks while a bus with a heavy engine drives and air brakes release.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.549", "2.08", "2.807", "3.48", "6.658", "9.671"], ["0.309", "1.4", "2.642", "3.02", "5.896", "8.936", "10.0"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "A man speaks, followed by the sounds of sanding.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.882", "4.543"], ["2.11", "3.795", "9.63"], ["speaks", "speaks", "speaks"]]}
{"captions": "A man is speaking and typing on a computer keyboard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.008", "0.236", "0.535", "0.929", "1.992", "3.252", "4.15", "4.756", "6.094"], ["0.142", "0.48", "0.835", "1.74", "3.134", "3.913", "4.591", "5.882", "7.638"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "An electric toothbrush whirs with narration and female speech and breathing.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["4.737", "5.847", "7.966", "9.533"], ["5.141", "7.44", "9.156", "10.0"], ["speech", "speech", "speech", "speech"]]}
{"captions": "Female singing alternates with clapping and female speech.", "data": [["Female speech, woman speaking"], ["0.0"], ["0.787"], ["speech"]]}
{"captions": "Engines and power tools are running.", "data": [["Vacuum cleaner"], ["0.008"], ["10.0"], ["running"]]}
{"captions": "A man is speaking, with clicking and tapping sounds in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "4.26", "5.37", "8.769"], ["0.671", "4.954", "7.942", "9.948"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A woman is speaking in an unmodified field recording.", "data": [["Female speech, woman speaking"], ["3.213"], ["3.953"], ["woman"]]}
{"captions": "Kids are speaking, mechanisms are heard, and there is laughter, tapping, and gasping sounds.", "data": [["Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Female speech, woman speaking"], ["0.0", "1.661", "3.727", "7.152"], ["0.206", "3.37", "4.818", "7.433"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Men are speaking, using cutlery and chopping food, and there is mains hum.", "data": [["Cutlery, silverware", "Cutlery, silverware", "Male speech, man speaking", "Male speech, man speaking", "Cutlery, silverware", "Cutlery, silverware", "Male speech, man speaking", "Male speech, man speaking", "Cutlery, silverware"], ["1.087", "2.811", "0.0", "3.126", "3.094", "7.15", "5.685", "7.685", "7.354"], ["1.252", "2.929", "2.323", "4.291", "3.22", "7.283", "6.811", "8.307", "7.551"], ["cutlery", "cutlery", "speaking", "speaking", "cutlery", "cutlery", "speaking", "speaking", "cutlery"]]}
{"captions": "Water runs from a tap with breathing and speech sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Water tap, faucet"], ["0.0", "7.43", "0.0"], ["0.122", "10.0", "4.849"], ["speech", "speech", "tap"]]}
{"captions": "Fire alarms and speeches are heard.", "data": [["Fire alarm", "Alarm", "Speech", "Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm", "Alarm", "Fire alarm", "Female speech, woman speaking"], ["0.024", "1.039", "0.74", "2.094", "4.165", "5.181", "6.252", "8.283", "9.354", "3.488"], ["0.819", "1.803", "1.402", "2.969", "5.102", "6.118", "7.197", "9.197", "10.0", "3.843"], ["alarms", "alarms", "speeches", "alarms", "alarms", "alarms", "alarms", "alarms", "alarms", "speeches"]]}
{"captions": "Cats caterwaul and make other animal sounds in a large room or hall.", "data": [["Cat", "Cat", "Cat", "Cat", "Cat"], ["1.756", "6.614", "8.126", "8.969", "9.606"], ["6.488", "7.937", "8.386", "9.346", "10.0"], ["caterwaul", "caterwaul", "caterwaul", "caterwaul", "caterwaul"]]}
{"captions": "A man speaking, toilet flushing, coughing and human voices.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Toilet flush"], ["0.0", "5.317", "6.274", "0.0"], ["1.057", "6.112", "8.969", "10.0"], ["speaking", "speaking", "speaking", "flushing"]]}
{"captions": "A man speaks while a motorboat revs and accelerates, and a shout is heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.46", "2.13", "3.335", "4.827", "5.975"], ["1.301", "2.013", "3.16", "3.776", "5.858", "7.791"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "A man speaks, pants, taps, and breathes in a room with a mains hum.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "7.173", "8.843"], ["0.709", "7.953", "9.835"], ["speaks", "speaks", "speaks"]]}
{"captions": "People are speaking and making sounds with cutlery and glass.", "data": [["Cutlery, silverware", "Male speech, man speaking", "Cutlery, silverware", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Cutlery, silverware"], ["1.26", "0.197", "2.661", "5.276", "8.614", "9.803", "6.89"], ["1.693", "3.228", "3.331", "6.331", "9.315", "10.0", "7.457"], ["cutlery", "speaking", "cutlery", "speaking", "speaking", "speaking", "cutlery"]]}
{"captions": "Speech and tearing sounds can be heard in a small room.", "data": [["Speech", "Conversation", "Male speech, man speaking"], ["0.402", "4.268", "5.795"], ["1.402", "5.236", "6.843"], ["Speech", "Speech", "Speech"]]}
{"captions": "Men speak, laugh, clap, and sing with music, a bell, and bird chirping.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.724", "1.701", "5.89", "6.843", "7.551", "8.52", "9.661"], ["1.315", "2.638", "6.693", "7.331", "8.016", "9.52", "10.0"], ["speak", "speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "Alarms and fire alarms sound with a man speaking and tapping.", "data": [["Alarm", "Smoke detector, smoke alarm", "Male speech, man speaking"], ["0.0", "5.645", "7.589"], ["3.633", "6.697", "8.672"], ["tapping", "alarms", "speaking"]]}
{"captions": "Radio is playing, men are speaking and noise is heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.015", "3.252", "4.638", "7.292", "8.949"], ["0.827", "2.85", "4.502", "7.118", "8.567", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Music plays as people make noise and multiple men speak, beeps can be heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.74", "1.901", "4.701", "6.315", "7.516", "7.996"], ["1.517", "3.906", "5.213", "6.946", "7.804", "9.588"], ["noise", "noise", "noise", "noise", "noise", "noise"]]}
{"captions": "Men are speaking and playing a video game with machine gun and fusillade sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Speech"], ["0.0", "1.457", "2.046", "2.971", "5.59", "6.902", "9.474"], ["1.225", "1.798", "2.347", "3.301", "5.931", "7.775", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Various sounds are heard, including tapping, speech, and crying.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.844", "2.257", "3.016", "6.622", "9.007", "9.936"], ["1.928", "2.65", "5.422", "7.854", "9.756", "10.0"], ["speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "There are sounds of water and wind, with occasional speech and music.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.627", "2.607", "6.449", "7.646"], ["2.188", "2.851", "7.181", "10.0"], ["speech", "speech", "speech", "speech"]]}
{"captions": "Reversing beeps, engine noise, radio, and male speech can be heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.079", "2.748", "5.701", "6.291"], ["1.449", "2.693", "4.811", "6.22", "8.276"], ["speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Domestic animals and dogs bark and howl.", "data": [["Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog"], ["0.0", "1.079", "2.402", "3.449", "4.15", "4.543", "5.362", "5.874", "6.543", "7.157", "7.772", "8.701", "9.543"], ["0.921", "1.866", "2.882", "3.843", "4.449", "5.236", "5.693", "6.346", "7.008", "7.598", "8.457", "9.378", "10.0"], ["bark", "bark", "bark", "bark", "bark", "bark", "bark", "bark", "bark", "bark", "bark", "bark", "bark"]]}
{"captions": "Children shout and laugh while a woman speaks and birds sing.", "data": [["Female speech, woman speaking", "Child speech, kid speaking", "Child speech, kid speaking"], ["1.299", "5.11", "8.74"], ["1.772", "8.512", "10.0"], ["speaks", "shout", "shout"]]}
{"captions": "People are speaking, chewing, breathing, and laughing in a busy environment.", "data": [["Male speech, man speaking"], ["3.976"], ["5.213"], ["speaking"]]}
{"captions": "Hissing, television noise, conversation, and tapping sounds are heard.", "data": [["Conversation", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.388", "5.697", "7.056", "8.687"], ["4.662", "6.599", "8.583", "10.0"], ["conversation", "conversation", "conversation", "conversation"]]}
{"captions": "People speak, cheer, clap, and bounce a basketball while vehicles pass by.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "1.307", "3.575", "4.976", "6.567", "7.693"], ["1.102", "3.323", "4.118", "6.386", "7.126", "10.0"], ["speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "A man speaks and a gunshot is heard outside with wind noise.", "data": [["Male speech, man speaking"], ["0.516"], ["1.317"], ["speaks"]]}
{"captions": "A man speaks and an arrow is shot as more speech is heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.256", "8.624"], ["1.903", "6.397", "10.0"], ["speaks", "speaks", "speaks"]]}
{"captions": "Men are speaking and playing music.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["0.0", "9.394"], ["0.717", "10.0"], ["speaking", "speaking"]]}
{"captions": "Women whisper and speak while crumpling and water sounds are heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["2.764", "5.205", "8.031"], ["3.724", "6.528", "8.756"], ["speak", "speak", "speak"]]}
{"captions": "A drill and various mechanical sounds are heard between multiple instances of male speech.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.058", "5.417", "6.139", "7.961"], ["1.057", "6.028", "7.626", "9.501"], ["speech", "speech", "speech", "speech"]]}
{"captions": "A man is speaking and filling something with liquid.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["0.0", "7.055"], ["1.26", "8.572"], ["speaking", "speaking"]]}
{"captions": "Men are speaking and the sound of power tools can be heard.", "data": [["Speech", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "4.276", "5.575", "9.591"], ["1.717", "4.898", "7.795", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Men speak and machinery operates.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.643", "1.354", "4.424", "5.369", "5.72", "9.304"], ["0.409", "1.216", "3.935", "4.891", "5.502", "8.938", "9.942"], ["speak", "speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "Children laugh, giggle, shout, and speak while a man laughs and speaks in a large room.", "data": [["Child speech, kid speaking", "Speech", "Male speech, man speaking"], ["5.262", "6.397", "9.135"], ["6.357", "7.349", "10.0"], ["speak", "speaks", "speaks"]]}
{"captions": "Cats caterwaul and other domestic animals make noises, with occasional laughter and speech.", "data": [["Cat", "Cat", "Female speech, woman speaking", "Cat", "Speech"], ["0.37", "3.622", "3.835", "5.063", "8.031"], ["3.299", "3.898", "5.016", "10.0", "8.795"], ["caterwaul", "caterwaul", "speech", "caterwaul", "speech"]]}
{"captions": "A woman and men are speaking and shouting, with a telephone dialing.", "data": [["Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.041", "1.201", "2.43", "7.275"], ["0.769", "1.524", "3.905", "7.701"], ["woman", "men", "men", "men"]]}
{"captions": "A man is speaking, typing, and using a cash register in a room with occasional tapping.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "6.291", "6.992", "8.433"], ["3.567", "6.654", "8.126", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Background noise is mixed with speech and breathing sounds.", "data": [["Child speech, kid speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.061", "4.488", "5.051", "5.981", "6.782"], ["4.284", "4.963", "5.655", "6.497", "8.853"], ["speech", "speech", "speech", "speech", "speech"]]}
{"captions": "A man is cooking and talking in the kitchen.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Sizzle", "Dishes, pots, and pans", "Male speech, man speaking"], ["1.157", "2.157", "3.48", "4.787", "6.339", "0.0", "0.315", "8.591"], ["1.85", "2.661", "4.063", "5.213", "7.984", "10.0", "0.496", "9.677"], ["talking", "talking", "talking", "talking", "talking", "cooking", "cooking", "talking"]]}
{"captions": "A man is using an electric razor and speaking.", "data": [["Electric shaver, electric razor", "Speech"], ["0.0", "9.328"], ["10.0", "10.0"], ["razor", "speaking"]]}
{"captions": "A child is breathing, walking and speaking intermittently.", "data": [["Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking"], ["0.079", "1.157", "2.52", "4.52"], ["0.693", "2.236", "3.362", "4.787"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Multiple conversations and laughter accompany a woman's speech and the sound of an electric shaver.", "data": [["Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Electric shaver, electric razor"], ["0.0", "6.63", "7.795", "0.0"], ["6.433", "7.079", "10.0", "10.0"], ["conversations", "conversations", "conversations", "shaver"]]}
{"captions": "Alarms, mechanisms, and camera sounds are heard.", "data": [["Alarm", "Alarm", "Alarm", "Alarm", "Alarm", "Alarm", "Alarm"], ["0.0", "0.366", "1.084", "1.815", "2.572", "3.264", "4.047"], ["0.261", "0.966", "1.736", "2.402", "3.133", "3.812", "4.595"], ["Alarms", "Alarms", "Alarms", "Alarms", "Alarms", "Alarms", "Alarms"]]}
{"captions": "Silence, ticking clocks, mechanisms, and an alarm clock are heard.", "data": [["Alarm"], ["3.486"], ["8.71"], ["alarm"]]}
{"captions": "Music with rapping and telephone bells ringing.", "data": [["Telephone bell ringing", "Telephone bell ringing"], ["6.593", "9.323"], ["8.687", "10.0"], ["bells", "bells"]]}
{"captions": "Boiling water, mechanisms, and people speaking make noise in a small room.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.988", "5.919", "6.699", "8.434", "9.335", "9.856"], ["2.613", "6.358", "7.942", "9.11", "9.543", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Silence is interrupted by a ringing phone and male speech, followed by more speech and ringing.", "data": [["Ringtone", "Male speech, man speaking", "Male speech, man speaking", "Ringtone", "Speech"], ["0.134", "6.953", "8.276", "3.134", "9.685"], ["1.402", "7.591", "9.22", "4.307", "10.0"], ["ringing", "speech", "speech", "ringing", "speech"]]}
{"captions": "People are speaking outside and dogs barking.", "data": [["Dog", "Speech", "Male speech, man speaking", "Male speech, man speaking", "Dog"], ["2.357", "0.845", "5.245", "7.82", "3.147"], ["2.738", "1.499", "6.049", "8.883", "4.864"], ["barking", "speaking", "speaking", "speaking", "barking"]]}
{"captions": "A man speaks and water sounds are heard in a small room.", "data": [["Male speech, man speaking", "Water tap, faucet", "Male speech, man speaking", "Male speech, man speaking", "Sink (filling or washing)"], ["0.0", "1.567", "3.323", "7.953", "6.756"], ["0.819", "5.197", "5.22", "8.898", "9.496"], ["speaks", "water", "speaks", "speaks", "water"]]}
{"captions": "Mechanisms, clicking and women speaking are heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["1.009", "8.315"], ["1.386", "9.315"], ["women", "women"]]}
{"captions": "A woman speaks, followed by background noise, more female speech, and smoke detector beeps.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Smoke detector, smoke alarm", "Smoke detector, smoke alarm", "Female speech, woman speaking", "Female speech, woman speaking", "Smoke detector, smoke alarm"], ["0.0", "0.898", "4.598", "5.622", "3.008", "9.654", "6.622"], ["0.283", "2.346", "5.409", "6.331", "4.063", "10.0", "7.307"], ["speech", "speech", "beeps", "beeps", "speech", "speech", "beeps"]]}
{"captions": "Dogs bark and make squeaking sounds in a small room.", "data": [["Dog", "Dog", "Dog", "Dog"], ["0.661", "2.213", "3.268", "6.189"], ["1.803", "3.047", "4.992", "6.693"], ["bark", "bark", "bark", "bark"]]}
{"captions": "A man is speaking while others are writing and speaking.", "data": [["Speech", "Male speech, man speaking"], ["3.921", "8.882"], ["5.189", "9.717"], ["speaking", "speaking"]]}
{"captions": "Women are laughing, shouting, and speaking over background noise and splashing.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["2.88", "4.493", "7.083", "8.732"], ["4.13", "4.755", "8.342", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Food sizzles and dishes clatter, men speak in the background.", "data": [["Male speech, man speaking", "Dishes, pots, and pans", "Male speech, man speaking", "Sizzle", "Dishes, pots, and pans", "Male speech, man speaking"], ["2.031", "1.11", "5.717", "0.0", "6.724", "8.181"], ["5.504", "1.299", "6.638", "10.0", "8.157", "10.0"], ["speak", "clatter", "speak", "sizzles", "clatter", "speak"]]}
{"captions": "Laughter and phone sounds interrupt conversations.", "data": [["Male speech, man speaking", "Speech", "Telephone bell ringing", "Male speech, man speaking"], ["7.308", "7.829", "5.087", "9.28"], ["7.717", "8.437", "7.072", "10.0"], ["Laughter", "Laughter", "phone", "Laughter"]]}
{"captions": "A sizzling sound can be heard in a small room.", "data": [["Sizzle"], ["0.449"], ["10.0"], ["sizzling"]]}
{"captions": "Music is playing, a woman is speaking.", "data": [["Female speech, woman speaking"], ["9.89"], ["10.0"], ["speaking"]]}
{"captions": "A smoke detector or smoke alarm is beeping.", "data": [["Smoke detector, smoke alarm"], ["0.0"], ["10.0"], ["beeping"]]}
{"captions": "A man is speaking while a motorcycle engine starts and a vehicle drives.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "7.8", "8.216"], ["3.959", "7.961", "10.0"], ["speaking", "speaking", "speaking"]]}
{"captions": "Women are speaking, and an electric shaver is buzzing.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Electric shaver, electric razor", "Female speech, woman speaking"], ["0.515", "2.759", "3.233", "3.562", "4.784", "5.807", "8.524", "0.0", "9.3"], ["1.606", "3.123", "3.343", "3.96", "5.704", "7.618", "9.173", "10.0", "9.623"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "buzzing", "speaking"]]}
{"captions": "A man is speaking with environmental noise, birds are chirping, and a gunshot is heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.26", "2.772", "4.173", "6.268", "8.11", "9.575"], ["0.409", "2.047", "4.031", "4.567", "7.78", "9.307", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "There is male speech and mechanisms.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.709", "1.732", "3.331", "4.496", "4.992", "8.559", "9.693"], ["0.339", "1.528", "2.858", "3.937", "4.772", "8.173", "9.567", "10.0"], ["male", "male", "male", "male", "male", "male", "male", "male"]]}
{"captions": "Cats are meowing, and a woman is speaking.", "data": [["Speech", "Cat", "Cat", "Speech"], ["0.945", "0.433", "6.748", "1.417"], ["1.283", "5.339", "9.024", "2.465"], ["speaking", "meowing", "meowing", "speaking"]]}
{"captions": "People are speaking, and wind is blowing.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.315", "1.465", "6.252", "8.654"], ["1.11", "2.984", "8.346", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "People are speaking and making sounds with appliances.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Speech", "Speech", "Male speech, man speaking", "Male speech, man speaking", "Blender"], ["0.0", "1.976", "3.627", "6.909", "7.414", "7.876", "8.725", "2.554"], ["0.696", "2.215", "5.125", "7.313", "7.727", "8.396", "10.0", "5.805"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "appliances"]]}
{"captions": "Fire alarms repeatedly sound.", "data": [["Fire alarm", "Fire alarm", "Fire alarm"], ["0.535", "4.52", "8.528"], ["3.994", "8.06", "10.0"], ["sound", "sound", "sound"]]}
{"captions": "People are speaking and tapping with background noise.", "data": [["Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.229", "3.157", "4.098", "4.955", "8.586"], ["0.179", "1.997", "3.658", "4.468", "6.218", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Music, conversation, and alarms, interspersed with water sounds and clapping.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Alarm", "Alarm", "Alarm", "Alarm", "Alarm", "Alarm", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Alarm"], ["0.0", "1.685", "1.848", "2.337", "2.835", "3.342", "3.813", "4.339", "2.319", "7.708", "8.777", "4.819"], ["1.585", "1.969", "2.129", "2.754", "3.125", "3.696", "4.149", "4.647", "4.674", "8.062", "10.0", "5.091"], ["conversation", "conversation", "alarms", "alarms", "alarms", "alarms", "alarms", "alarms", "conversation", "conversation", "conversation", "alarms"]]}
{"captions": "A variety of sounds can be heard in a small room, including a cat meowing, a television, water, speech, and more.", "data": [["Cat", "Female speech, woman speaking", "Cat", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Cat", "Female speech, woman speaking", "Cat", "Cat", "Male speech, man speaking", "Bathtub (filling or washing)"], ["0.158", "1.133", "2.093", "3.741", "5.01", "5.456", "6.321", "4.29", "7.035", "7.516", "9.067", "8.037", "1.901"], ["1.043", "1.901", "2.807", "4.139", "5.381", "6.26", "6.863", "5.017", "7.447", "8.092", "9.657", "8.737", "4.976"], ["meowing", "speech", "meowing", "speech", "speech", "speech", "speech", "meowing", "speech", "meowing", "meowing", "speech", "water"]]}
{"captions": "An ice cream truck is playing music and adults are laughing.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["5.1", "6.404"], ["6.205", "7.255"], ["laughing", "laughing"]]}
{"captions": "People converse, gargle, and speak in a muffled environment.", "data": [["Speech", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.803", "4.709", "8.654"], ["3.181", "4.559", "6.165", "10.0"], ["speak", "converse", "converse", "converse"]]}
{"captions": "An alarm clock rings, men speak, and ticks and tocks are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Alarm clock", "Male speech, man speaking"], ["1.52", "2.717", "3.866", "6.858", "9.189", "0.0", "9.598"], ["2.567", "3.039", "4.701", "9.063", "9.496", "4.268", "10.0"], ["speak", "speak", "speak", "speak", "speak", "rings", "speak"]]}
{"captions": "Women speak and laugh with hiccups and humming sounds.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "6.504", "9.118"], ["5.071", "8.496", "9.417"], ["laugh", "laugh", "laugh"]]}
{"captions": "A fire alarm is heard repeatedly.", "data": [["Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm"], ["0.942", "1.909", "2.86", "4.793", "5.785", "6.777", "8.694", "9.669"], ["1.736", "2.545", "3.661", "5.595", "6.612", "7.645", "9.545", "10.0"], ["heard", "heard", "heard", "heard", "heard", "heard", "heard", "heard"]]}
{"captions": "Liquid splashes and drips.", "data": [["Dishes, pots, and pans", "Dishes, pots, and pans", "Water tap, faucet"], ["0.102", "1.299", "2.094"], ["0.205", "1.638", "5.772"], ["splashes", "splashes", "drips"]]}
{"captions": "A woman speaks, followed by mechanical sounds, tapping, and more speaking.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "1.48", "5.843", "7.835"], ["1.173", "5.535", "7.236", "9.984"], ["woman", "woman", "woman", "woman"]]}
{"captions": "Echoing speech, effects units, hubbub, and music play in a public space.", "data": [["Speech", "Male speech, man speaking"], ["2.102", "3.559"], ["3.22", "4.331"], ["Echoing", "hubbub"]]}
{"captions": "Cooking sounds, dishes clanging, and men talking are heard.", "data": [["Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Male speech, man speaking", "Dishes, pots, and pans", "Dishes, pots, and pans", "Frying (food)", "Male speech, man speaking"], ["0.0", "1.537", "2.896", "4.509", "6.04", "7.646", "7.715", "9.376", "0.0", "8.268"], ["1.311", "1.785", "3.926", "4.969", "6.493", "8.157", "8.133", "9.623", "10.0", "8.906"], ["clanging", "clanging", "clanging", "clanging", "clanging", "talking", "clanging", "clanging", "Cooking", "talking"]]}
{"captions": "A power tool is being used, and a man is speaking in a small room.", "data": [["Male speech, man speaking"], ["9.44"], ["10.0"], ["speaking"]]}
{"captions": "Mechanisms and male speech with tapping and ticking sounds fill the soundscape.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.528", "2.339", "5.591", "6.717", "9.543"], ["0.425", "1.661", "5.079", "6.433", "9.213", "10.0"], ["speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "A sink is filled with water and music plays.", "data": [["Sink (filling or washing)"], ["0.0"], ["10.0"], ["water"]]}
{"captions": "Sound effects punctuate silence and music while a man speaks.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["8.856", "9.884"], ["9.711", "10.0"], ["speaks", "speaks"]]}
{"captions": "Men speak and click keyboards intermittently.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "5.504", "6.583", "7.433"], ["0.575", "6.181", "7.331", "8.016"], ["speak", "speak", "speak", "speak"]]}
{"captions": "Breathing, speaking, and beeping sounds are heard over a rumble and a sine wave.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "0.611", "1.37", "3.202", "6.624", "9.202"], ["0.357", "1.16", "1.812", "5.157", "8.723", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Speeches and music are heard, with occasional breathing and human voices.", "data": [["Speech", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Speech", "Speech", "Speech", "Male speech, man speaking"], ["0.0", "0.909", "1.393", "2.444", "2.951", "5.135", "7.533", "8.712"], ["0.498", "1.188", "2.257", "2.805", "3.239", "6.053", "8.557", "10.0"], ["Speeches", "Speeches", "Speeches", "Speeches", "Speeches", "Speeches", "Speeches", "Speeches"]]}
{"captions": "Screaming, static, and water sounds mix with toilet flushing and splattering.", "data": [["Toilet flush", "Male speech, man speaking"], ["3.969", "4.74"], ["7.11", "5.488"], ["flushing", "static"]]}
{"captions": "Music plays as people converse and laugh.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking"], ["1.399", "4.197", "6.208", "7.942", "9.509"], ["1.821", "4.491", "7.277", "8.879", "9.734"], ["converse", "converse", "converse", "converse", "converse"]]}
{"captions": "Men speak, with background noise, thuds, and alarm sounds with ticking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Alarm", "Alarm", "Alarm", "Alarm", "Male speech, man speaking", "Alarm"], ["0.0", "0.536", "4.689", "5.799", "6.899", "7.801", "3.802", "8.864"], ["0.398", "3.702", "5.502", "6.543", "7.647", "8.587", "5.029", "9.671"], ["speak", "speak", "ticking", "ticking", "ticking", "ticking", "speak", "ticking"]]}
{"captions": "Music and a man singing, speaking and having a conversation are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.129", "3.218", "6.755", "8.425"], ["2.41", "5.526", "7.515", "10.0"], ["conversation", "conversation", "conversation", "conversation"]]}
{"captions": "Animals, dogs, and people are growling, shouting, and speaking.", "data": [["Dog", "Speech", "Dog", "Speech", "Dog", "Male speech, man speaking", "Dog", "Speech", "Dog", "Male speech, man speaking", "Dog", "Speech", "Dog", "Speech", "Dog", "Male speech, man speaking"], ["0.005", "0.149", "0.717", "1.609", "1.981", "3.202", "3.569", "4.583", "4.87", "5.975", "6.389", "7.631", "8.067", "9.007", "9.299", "9.878"], ["0.165", "0.738", "1.529", "1.954", "3.139", "3.532", "4.562", "4.886", "5.964", "6.378", "7.621", "8.024", "8.98", "9.288", "9.878", "10.0"], ["growling", "speaking", "growling", "speaking", "growling", "speaking", "growling", "speaking", "growling", "speaking", "growling", "speaking", "growling", "speaking", "growling", "speaking"]]}
{"captions": "Women are speaking and giggling with mechanisms, animal sounds, and surface contact sounds in the background.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "0.283", "1.394", "3.36", "8.959", "9.555"], ["0.156", "0.92", "2.25", "4.853", "9.306", "9.942"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A variety of animals and people are making sounds, including talking, birds singing, and crowd noise.", "data": [["Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.251", "3.126", "4.494", "5.526"], ["2.72", "4.142", "4.83", "8.089"], ["talking", "talking", "talking", "talking"]]}
{"captions": "People speak and there is laughter amidst the sound of an electric shaver.", "data": [["Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Electric shaver, electric razor", "Female speech, woman speaking"], ["0.409", "1.244", "2.323", "5.008", "8.016", "0.0", "8.835"], ["1.197", "2.016", "4.764", "6.094", "8.472", "10.0", "9.606"], ["People", "speak", "People", "People", "speak", "shaver", "People"]]}
{"captions": "Wind, pulleys, footsteps and man speaking sounds are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["2.354", "5.52", "5.913", "8.22"], ["3.024", "5.748", "6.016", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Sounds of beeping, man speaking, and a single-lens reflex camera in a small room, followed by silence, are present.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.882", "1.118", "2.339", "5.433", "9.803"], ["1.016", "1.551", "4.811", "7.173", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "People converse on a bus.", "data": [["Conversation", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.005", "1.322", "2.661", "4.509", "4.923", "7.047", "8.311", "9.395"], ["1.004", "2.018", "3.144", "4.711", "5.645", "7.477", "9.012", "10.0"], ["converse", "converse", "converse", "converse", "converse", "converse", "converse", "converse"]]}
{"captions": "People are speaking and dogs are barking in a small room.", "data": [["Speech", "Speech", "Dog", "Dog", "Male speech, man speaking", "Dog"], ["0.419", "2.915", "6.682", "7.713", "5.262", "9.492"], ["0.762", "3.333", "7.025", "8.61", "5.83", "10.0"], ["speaking", "speaking", "barking", "barking", "speaking", "barking"]]}
{"captions": "A basketball is bouncing as men talk.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.008", "2.772", "3.772", "5.638", "7.354", "9.22"], ["2.213", "3.433", "5.22", "7.047", "9.0", "10.0"], ["talk", "talk", "talk", "talk", "talk", "talk"]]}
{"captions": "Insects, birds, and people are heard in a rural setting.", "data": [["Child speech, kid speaking", "Child speech, kid speaking"], ["3.268", "4.709"], ["3.906", "9.52"], ["people", "people"]]}
{"captions": "Mechanical sounds and a sewing machine are heard with intermittent speech.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["6.323", "9.252"], ["7.213", "10.0"], ["speech", "speech"]]}
{"captions": "A man is speaking in intervals of silence.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "4.118", "6.287", "6.874", "9.355"], ["3.847", "5.874", "6.664", "9.063", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Music, speech, and conversation are heard.", "data": [["Male speech, man speaking", "Conversation", "Conversation", "Male speech, man speaking"], ["0.709", "3.976", "5.496", "8.638"], ["3.559", "4.961", "8.299", "10.0"], ["speech", "speech", "speech", "speech"]]}
{"captions": "People are laughing, speaking, breathing, and chuckling.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "2.544", "5.21", "8.29"], ["1.885", "4.137", "7.244", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Children and adults are shouting, crying, and speaking, with splashing water and a man's voice heard in the background.", "data": [["Child speech, kid speaking", "Child speech, kid speaking", "Speech", "Male speech, man speaking", "Speech"], ["0.0", "0.614", "4.299", "5.339", "7.677"], ["0.567", "1.976", "5.236", "7.008", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Children speak as water flows and adults converse.", "data": [["Conversation", "Child speech, kid speaking", "Conversation", "Conversation", "Child speech, kid speaking", "Conversation"], ["0.008", "1.528", "2.055", "3.929", "7.543", "8.559"], ["0.685", "1.835", "3.228", "5.827", "8.087", "10.0"], ["converse", "flows", "converse", "converse", "flows", "converse"]]}
{"captions": "People are speaking and laughing while others are babbling in the background.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking"], ["0.583", "4.244", "6.819"], ["2.756", "5.465", "10.0"], ["speaking", "speaking", "speaking"]]}
{"captions": "Dogs bark and tap their paws on the ground.", "data": [["Dog", "Dog", "Dog"], ["3.551", "7.744", "9.584"], ["4.303", "8.456", "10.0"], ["bark", "bark", "bark"]]}
{"captions": "Music plays while a man clicks and speaks over the fire's crackling.", "data": [["Male speech, man speaking"], ["9.457"], ["10.0"], ["speaks"]]}
{"captions": "A man speaks and tools are heard operating.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.949", "2.556", "3.505", "4.691", "5.813", "7.606", "8.728"], ["0.561", "2.094", "3.36", "4.344", "5.113", "6.212", "8.271", "8.947"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "A woman is brushing her teeth and speaking while water flows.", "data": [["Water tap, faucet", "Female speech, woman speaking"], ["5.024", "7.402"], ["7.323", "10.0"], ["flows", "speaking"]]}
{"captions": "Females speak and laugh while breathing.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "3.087", "3.874", "5.921", "7.961"], ["2.685", "3.693", "4.52", "7.756", "10.0"], ["speak", "speak", "speak", "speak", "speak"]]}
{"captions": "Music plays while a man speaks and shuffles.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.346", "4.197", "5.26", "6.606", "9.465"], ["2.441", "4.654", "6.134", "7.291", "10.0"], ["speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Men speak and type while chirping birds and ambient sounds are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.827", "3.008", "4.827", "8.512"], ["0.677", "1.646", "3.787", "5.63", "9.724"], ["speak", "speak", "speak", "speak", "speak"]]}
{"captions": "Sizzling dishes, pots and pans, stirring, and female speech.", "data": [["Dishes, pots, and pans", "Sizzle", "Female speech, woman speaking"], ["0.01", "0.654", "7.532"], ["10.0", "10.0", "8.38"], ["Sizzling", "Sizzling", "speech"]]}
{"captions": "A woman is speaking, music is playing, and a television is on while birds are chirping and breathing sounds are heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "0.558", "3.038", "4.339", "4.562", "4.923", "6.946", "8.263"], ["0.425", "2.31", "4.254", "4.44", "4.838", "6.283", "8.131", "9.937"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A fire alarm goes off while music and a female singer is playing.", "data": [["Fire alarm", "Fire alarm"], ["3.512", "7.913"], ["5.378", "9.732"], ["alarm", "alarm"]]}
{"captions": "Mechanisms sound, a doorbell rings, and women are speaking.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["7.377", "8.417"], ["8.115", "9.426"], ["speaking", "speaking"]]}
{"captions": "Women are speaking and music is playing in the background, with water sounds from a bathtub and speech mixed in.", "data": [["Speech", "Speech", "Speech", "Speech", "Speech", "Female speech, woman speaking", "Bathtub (filling or washing)"], ["0.701", "1.205", "2.047", "2.969", "6.992", "8.882", "5.48"], ["0.945", "1.827", "2.669", "4.346", "8.724", "10.0", "8.921"], ["speaking", "speaking", "speaking", "speaking", "speaking", "Women", "water"]]}
{"captions": "A female is speaking, mechanisms are being used, and laughter and whispering are heard in a conversation.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "3.483", "3.89", "4.345", "4.793", "7.0"], ["1.844", "3.775", "4.216", "4.623", "5.065", "8.249"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Women converse near a natural waterfall.", "data": [["Conversation", "Conversation", "Conversation", "Conversation", "Conversation"], ["0.835", "5.2", "6.959", "8.153", "9.05"], ["4.718", "6.578", "7.332", "8.805", "9.45"], ["converse", "converse", "converse", "converse", "converse"]]}
{"captions": "Men are talking and walking while someone snores and footsteps are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "4.685", "5.52", "6.173", "8.299", "9.024"], ["2.205", "4.866", "5.748", "6.409", "8.669", "10.0"], ["talking", "talking", "talking", "talking", "talking", "talking"]]}
{"captions": "A man speaks, narrates, and synthesizes speech alongside explosions.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.433", "2.803", "4.008", "7.283", "9.26"], ["0.252", "2.378", "3.457", "5.197", "8.622", "10.0"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Vehicles honk while men speak.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.78", "3.378", "4.11", "7.827"], ["2.039", "3.945", "7.213", "10.0"], ["speak", "speak", "speak", "speak"]]}
{"captions": "Men speak, sizzle, and fry food in a small room.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Sizzle"], ["0.008", "4.063", "6.276", "3.276"], ["2.346", "5.929", "10.0", "10.0"], ["speak", "speak", "speak", "fry"]]}
{"captions": "Mechanisms click and beep while a man speaks.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.323", "0.882", "3.441", "7.811"], ["0.654", "2.819", "6.102", "9.528"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Men speak and brush their teeth with electric toothbrushes in the background.", "data": [["Male speech, man speaking", "Speech", "Speech", "Speech", "Male speech, man speaking"], ["0.0", "3.548", "5.209", "7.289", "9.883"], ["0.872", "4.372", "6.129", "8.572", "10.0"], ["speak", "speak", "speak", "speak", "speak"]]}
{"captions": "Women are speaking, whispering, and making noises.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.118", "1.378", "3.252", "4.772", "7.402", "8.724"], ["0.756", "2.071", "4.008", "5.307", "7.921", "9.504"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "An alarm is repeatedly heard inside a room.", "data": [["Alarm", "Alarm", "Alarm", "Alarm", "Alarm", "Alarm"], ["3.228", "4.543", "5.764", "6.992", "8.087", "9.37"], ["4.323", "5.535", "6.622", "7.906", "9.008", "10.0"], ["repeatedly", "repeatedly", "repeatedly", "repeatedly", "repeatedly", "repeatedly"]]}
{"captions": "Music plays with a sound effect and a ringtone.", "data": [["Ringtone"], ["8.535"], ["10.0"], ["ringtone"]]}
{"captions": "Men are speaking, tapping, spraying, and making mechanical sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.181", "2.458", "4.698", "6.793", "8.671"], ["1.753", "2.759", "6.017", "8.054", "9.859"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man speaks while music is played in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.433", "5.157", "6.394", "7.843", "9.835"], ["1.157", "4.055", "6.157", "7.653", "9.094", "10.0"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "People are snapping their fingers, clapping, chuckling, and speaking.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["3.468", "4.948", "6.277", "7.607"], ["4.769", "5.503", "7.064", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A family is having a conversation, a child is speaking and there are other unknown sounds.", "data": [["Male speech, man speaking", "Child speech, kid speaking", "Female speech, woman speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Female speech, woman speaking", "Child speech, kid speaking", "Female speech, woman speaking", "Child speech, kid speaking", "Child speech, kid speaking"], ["1.695", "3.013", "3.747", "4.681", "5.395", "7.111", "7.55", "8.181", "9.348", "9.588"], ["2.629", "3.507", "4.297", "5.251", "6.994", "7.323", "8.119", "8.799", "9.485", "10.0"], ["conversation", "conversation", "conversation", "conversation", "conversation", "conversation", "conversation", "conversation", "conversation", "conversation"]]}
{"captions": "Music plays with tapping and a blender running.", "data": [["Blender"], ["0.961"], ["8.85"], ["blender"]]}
{"captions": "Telephone ringtones and conversation are heard.", "data": [["Ringtone", "Ringtone", "Ringtone", "Ringtone", "Ringtone", "Ringtone", "Ringtone", "Ringtone", "Ringtone", "Ringtone", "Ringtone", "Ringtone", "Ringtone"], ["1.843", "2.213", "2.567", "3.63", "3.976", "4.331", "4.669", "5.724", "6.087", "6.441", "6.787", "7.858", "8.205"], ["2.079", "2.433", "3.512", "3.843", "4.197", "4.528", "5.591", "5.953", "6.299", "6.661", "7.717", "8.055", "10.0"], ["ringtones", "ringtones", "ringtones", "ringtones", "ringtones", "ringtones", "ringtones", "ringtones", "ringtones", "ringtones", "ringtones", "ringtones", "ringtones"]]}
{"captions": "Women are speaking and splashing water sounds can be heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "1.622", "3.693", "4.354", "6.567", "7.969"], ["0.354", "3.268", "3.969", "5.008", "7.543", "8.984"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Hubbub and crumpling noises accompany speech from both men and women, along with laughter and a train horn.", "data": [["Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.591", "3.874", "6.583", "7.709"], ["2.488", "6.15", "7.362", "8.299"], ["speech", "speech", "speech", "speech"]]}
{"captions": "A thud is heard in a small room, with a man speaking and an electric shaver sound.", "data": [["Electric shaver, electric razor", "Male speech, man speaking"], ["0.138", "9.47"], ["9.47", "10.0"], ["shaver", "speaking"]]}
{"captions": "A police car with a siren is driving, and people are talking on the radio.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["3.78", "5.965", "6.393", "7.116", "7.549", "7.798", "8.26", "8.925", "9.486"], ["4.532", "6.081", "6.913", "7.335", "7.642", "7.96", "8.578", "9.069", "9.653"], ["radio", "radio", "radio", "radio", "radio", "radio", "radio", "radio", "radio"]]}
{"captions": "Men are speaking and silences are punctuated by a tuning fork.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.129", "3.34", "5.231", "6.231", "7.076"], ["0.79", "2.298", "4.875", "5.889", "6.967", "7.753"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Male singing, shouting, and whistling accompanies female speech and clapping.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["4.053", "8.679"], ["4.878", "9.938"], ["speech", "speech"]]}
{"captions": "Water flows from a tap into a sink in a small room.", "data": [["Water tap, faucet"], ["1.425"], ["4.937"], ["flows"]]}
{"captions": "Dogs and wolves are barking and growling, with domestic animals making noise in an interior small room.", "data": [["Dog", "Dog", "Dog", "Dog"], ["0.0", "1.486", "5.301", "6.931"], ["0.867", "4.376", "6.457", "10.0"], ["barking", "barking", "barking", "barking"]]}
{"captions": "Cutlery sizzles and clinks in the background.", "data": [["Cutlery, silverware", "Cutlery, silverware", "Cutlery, silverware", "Cutlery, silverware", "Cutlery, silverware", "Cutlery, silverware", "Cutlery, silverware", "Cutlery, silverware", "Sizzle"], ["0.0", "2.286", "3.892", "5.113", "5.848", "6.459", "7.008", "7.426", "0.0"], ["1.229", "3.267", "4.386", "5.594", "6.184", "6.726", "7.282", "9.65", "10.0"], ["clinks", "clinks", "clinks", "clinks", "clinks", "clinks", "clinks", "clinks", "Cutlery"]]}
{"captions": "A woman and a child are speaking, singing and snapping their fingers with birds chirping in the background.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.494", "1.627", "4.11", "5.382", "7.746", "7.983"], ["1.48", "3.393", "4.705", "5.59", "7.873", "9.393"], ["woman", "woman", "woman", "woman", "woman", "woman"]]}
{"captions": "Birds chirp and growl while a male speaks intermittently.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["8.677", "9.244"], ["9.039", "9.78"], ["speaks", "speaks"]]}
{"captions": "Man speaking and using an electric toothbrush with tapping sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.983", "5.252", "6.168"], ["0.85", "5.079", "5.921", "9.669"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Mechanisms sand while a man talks and liquids flow.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.671", "1.978", "5.419", "7.259"], ["1.538", "3.875", "6.807", "8.138"], ["talks", "talks", "talks", "talks"]]}
{"captions": "A woman is speaking, rodents are scurrying, and tapping can be heard inside a small room.", "data": [["Female speech, woman speaking"], ["0.0"], ["1.102"], ["speaking"]]}
{"captions": "Engine noise, music, man speaking, bells ringing, and more speaking.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["4.685", "9.677"], ["9.165", "10.0"], ["speaking", "speaking"]]}
{"captions": "A man speaks while liquid pumps and mechanisms sound.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.415", "5.698", "7.129"], ["3.134", "4.629", "6.685", "10.0"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Radio speech from both a man and woman are heard.", "data": [["Speech", "Female speech, woman speaking", "Speech", "Speech"], ["1.249", "4.392", "8.126", "9.722"], ["4.026", "6.185", "8.914", "10.0"], ["Radio", "woman", "Radio", "Radio"]]}
{"captions": "A doorbell rings, people are speaking, children are talking, and laughter is heard.", "data": [["Female speech, woman speaking", "Speech", "Speech", "Speech"], ["0.625", "2.2", "4.922", "6.694"], ["1.67", "4.223", "5.635", "7.875"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man speaks in a small room with power tools, drills, and mechanisms being used.", "data": [["Male speech, man speaking"], ["0.0"], ["2.724"], ["speaks"]]}
{"captions": "A man is speaking and gears are moving in a road vehicle.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.51", "4.721", "5.1", "6.522", "7.27", "8.736"], ["3.675", "4.955", "6.113", "7.138", "7.796", "9.618"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Wind blows as music and croaking sounds can be heard.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["3.46", "4.275"], ["4.094", "6.748"], ["croaking", "croaking"]]}
{"captions": "A sink is being filled, water is running, and women are speaking.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Sink (filling or washing)", "Female speech, woman speaking"], ["0.066", "0.325", "0.76", "1.98", "2.311", "2.725", "3.508", "3.944", "4.549", "5.384", "7.115", "7.54", "7.827", "8.191", "0.0", "8.698"], ["0.193", "0.579", "1.445", "2.239", "2.587", "3.398", "3.8", "4.418", "5.24", "6.619", "7.336", "7.744", "8.114", "8.389", "10.0", "9.068"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "running", "speaking"]]}
{"captions": "People are talking, clapping, singing, snapping, and making sounds.", "data": [["Male speech, man speaking", "Female speech, woman speaking"], ["0.0", "4.646"], ["2.409", "6.268"], ["talking", "talking"]]}
{"captions": "A man speaks and taps while breathing heavily and occasionally making thumping sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.087", "3.04", "6.593"], ["0.675", "2.907", "6.109", "10.0"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Panting and breathing of dogs can be heard with female speech in the background.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.195", "4.996", "8.058"], ["1.909", "6.954", "9.776"], ["background", "background", "background"]]}
{"captions": "People are speaking, laughing, air horns are blowing and more in an urban area.", "data": [["Speech", "Speech", "Male speech, man speaking", "Female speech, woman speaking"], ["0.0", "3.205", "5.496", "6.638"], ["0.386", "4.189", "6.118", "7.441"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Music is playing with speech and sound effects like boing.", "data": [["Female speech, woman speaking", "Speech", "Female speech, woman speaking"], ["0.11", "2.78", "3.233"], ["0.632", "2.958", "3.905"], ["speech", "speech", "speech"]]}
{"captions": "People are speaking and laughing in a small room.", "data": [["Speech", "Speech", "Male speech, man speaking", "Speech"], ["0.613", "2.78", "5.584", "7.659"], ["1.815", "3.064", "6.757", "9.405"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Women speak with background noise and rodents.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "2.079", "5.543"], ["1.606", "5.126", "9.457"], ["Women", "Women", "Women"]]}
{"captions": "Cats are heard with mechanisms and tapping sounds, as a man is speaking and laughing.", "data": [["Cat", "Cat", "Male speech, man speaking", "Male speech, man speaking", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Male speech, man speaking"], ["0.0", "0.583", "1.969", "2.843", "1.008", "3.339", "4.835", "5.031", "5.323", "5.606", "6.496", "6.858", "8.646", "9.378", "9.622", "3.252"], ["0.126", "0.756", "2.606", "3.126", "1.15", "3.772", "4.961", "5.236", "5.441", "5.717", "6.669", "7.016", "8.756", "9.543", "10.0", "3.748"], ["tapping", "tapping", "speaking", "speaking", "tapping", "tapping", "tapping", "tapping", "tapping", "tapping", "tapping", "tapping", "tapping", "tapping", "tapping", "speaking"]]}
{"captions": "A telephone ringing, tapping, and people speaking, including a speech synthesizer, is heard.", "data": [["Conversation", "Female speech, woman speaking", "Telephone bell ringing", "Female speech, woman speaking"], ["3.039", "6.874", "0.079", "9.165"], ["6.48", "8.543", "3.087", "10.0"], ["speaking", "speaking", "telephone", "speaking"]]}
{"captions": "People are walking and speaking while alarms are going off.", "data": [["Smoke detector, smoke alarm", "Speech", "Alarm", "Speech", "Speech", "Alarm", "Speech", "Speech", "Alarm"], ["0.307", "0.299", "2.26", "2.299", "3.315", "3.307", "4.291", "8.362", "4.315"], ["1.11", "1.063", "3.094", "3.031", "4.024", "4.071", "5.024", "9.346", "5.008"], ["alarms", "speaking", "alarms", "speaking", "speaking", "alarms", "speaking", "speaking", "alarms"]]}
{"captions": "Birds sing and call amidst animal sounds, laughter, and wind.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Speech", "Speech"], ["0.0", "2.756", "5.929", "7.0"], ["1.535", "4.906", "6.701", "10.0"], ["laughter", "laughter", "laughter", "laughter"]]}
{"captions": "Buzzing, laughing, men speaking, breathing, and tapping are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.284", "5.168", "7.049", "7.989"], ["2.121", "6.486", "7.694", "8.38"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man speaks while crumpling paper, breathing, and dribbling liquid is heard.", "data": [["Male speech, man speaking"], ["7.307"], ["10.0"], ["speaks"]]}
{"captions": "People are speaking and dogs and other animals can be heard.", "data": [["Speech", "Dog", "Female speech, woman speaking", "Dog"], ["0.0", "7.094", "3.425", "9.087"], ["0.409", "8.449", "6.354", "9.661"], ["speaking", "dogs", "speaking", "dogs"]]}
{"captions": "People run and walk while talking and children speak.", "data": [["Speech", "Conversation", "Conversation", "Speech", "Speech", "Child speech, kid speaking", "Female speech, woman speaking"], ["0.008", "1.283", "2.417", "3.52", "4.772", "8.244", "9.134"], ["0.567", "2.102", "3.433", "4.457", "5.693", "8.606", "10.0"], ["talking", "talking", "talking", "talking", "talking", "children", "talking"]]}
{"captions": "Breathing and stirring are heard while a man speaks in a kitchen.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.239", "2.225", "4.079", "5.332", "8.874"], ["2.14", "3.972", "4.519", "7.185", "10.0"], ["speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Mechanisms and sounds of dishes and pans clanging together are heard in this sequence.", "data": [["Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans"], ["0.252", "0.583", "1.165", "1.354", "2.89", "3.709", "4.528", "4.945", "5.488", "7.331", "7.52", "8.638", "9.386", "9.921"], ["0.488", "0.701", "1.252", "1.654", "3.134", "4.15", "4.709", "5.079", "5.984", "7.425", "7.772", "9.134", "9.693", "10.0"], ["clanging", "clanging", "clanging", "clanging", "clanging", "clanging", "clanging", "clanging", "clanging", "clanging", "clanging", "clanging", "clanging", "clanging"]]}
{"captions": "The sound of dishes and pans being used while frying food is heard.", "data": [["Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Frying (food)", "Dishes, pots, and pans"], ["0.498", "0.612", "1.129", "1.581", "2.75", "3.116", "3.394", "3.559", "3.97", "4.089", "4.267", "4.61", "4.765", "5.066", "5.24", "5.418", "5.573", "5.738", "5.943", "6.09", "6.268", "6.834", "7.026", "7.2", "7.378", "7.657", "8.269", "9.219", "9.37", "9.534", "9.671", "9.817", "0.0", "9.954"], ["0.557", "0.722", "1.224", "2.348", "3.024", "3.253", "3.499", "3.682", "4.034", "4.176", "4.409", "4.678", "4.943", "5.135", "5.322", "5.5", "5.656", "5.788", "6.026", "6.172", "6.364", "6.967", "7.104", "7.282", "7.46", "8.123", "8.776", "9.301", "9.452", "9.603", "9.753", "9.891", "10.0", "10.0"], ["frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying", "frying"]]}
{"captions": "A woman is speaking over music and blender sounds.", "data": [["Female speech, woman speaking", "Blender"], ["0.0", "4.244"], ["2.685", "10.0"], ["speaking", "blender"]]}
{"captions": "Train wheels squeal, and there are sounds of trains and vehicles with a heavy engine, along with male speech.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.68", "2.1", "8.092", "9.204"], ["1.922", "2.279", "8.641", "10.0"], ["speech", "speech", "speech", "speech"]]}
{"captions": "A man and a woman are speaking and making various sounds, including panting and rustling.", "data": [["Male speech, man speaking", "Female speech, woman speaking"], ["0.0", "0.78"], ["0.535", "1.213"], ["man", "woman"]]}
{"captions": "Multiple people are speaking in an urban area, including a man and a woman, with background noise.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.913", "2.205", "3.307", "4.087", "4.323", "6.165"], ["0.339", "1.622", "3.189", "3.772", "4.291", "4.709", "10.0"], ["man", "man", "man", "woman", "woman", "man", "man"]]}
{"captions": "Telephone bells are ringing and people are speaking, with music and speech in the background.", "data": [["Male speech, man speaking", "Speech", "Male speech, man speaking", "Ringtone", "Male speech, man speaking", "Speech", "Ringtone", "Male speech, man speaking"], ["0.64", "1.581", "2.979", "0.0", "4.669", "7.848", "6.108", "9.297"], ["1.266", "2.773", "4.271", "0.58", "6.007", "8.762", "7.871", "10.0"], ["speech", "speaking", "speech", "Telephone", "speech", "speaking", "Telephone", "speech"]]}
{"captions": "People are talking, honking and making mechanisms sounds in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.784", "5.669", "7.084", "9.667"], ["2.634", "5.492", "6.496", "9.511", "10.0"], ["talking", "talking", "talking", "talking", "talking"]]}
{"captions": "Cutlery is used as people speak and machines whir.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Cutlery, silverware", "Female speech, woman speaking", "Female speech, woman speaking", "Cutlery, silverware"], ["0.0", "3.756", "6.937", "5.15", "8.134", "7.315"], ["2.811", "4.945", "7.102", "6.039", "10.0", "7.661"], ["speak", "speak", "whir", "speak", "speak", "whir"]]}
{"captions": "Lawn mowers run while someone is speaking and tapping.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.405", "1.229", "3.638", "4.434", "5.216", "5.697", "7.529"], ["0.858", "2.89", "3.823", "4.901", "5.456", "6.287", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Dogs are barking and clicking, with breathing heard in the background.", "data": [["Dog", "Dog", "Dog", "Dog", "Dog", "Dog"], ["0.0", "0.583", "1.094", "3.0", "3.323", "8.368"], ["0.386", "1.0", "1.74", "3.189", "3.48", "8.603"], ["barking", "barking", "barking", "barking", "barking", "barking"]]}
{"captions": "Music plays in the background while women and men speak and snap their fingers.", "data": [["Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.006", "2.22", "3.098", "9.723"], ["0.873", "2.653", "4.474", "10.0"], ["women", "men", "women", "women"]]}
{"captions": "Boiling water and men are speaking.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["7.677", "9.165"], ["8.693", "10.0"], ["speaking", "speaking"]]}
{"captions": "People are biting, clicking, chewing, speaking, and crumpling papers.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["2.677", "4.417", "5.268", "7.087", "8.024"], ["3.811", "4.961", "5.866", "7.724", "8.646"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "People are talking and cooking food in a room with sizzling and clanging sounds.", "data": [["Sizzle", "Dishes, pots, and pans", "Male speech, man speaking"], ["0.0", "0.024", "7.756"], ["10.0", "0.701", "9.669"], ["cooking", "clanging", "talking"]]}
{"captions": "Pots and pans sizzle as someone stirs and speaks.", "data": [["Male speech, man speaking", "Sizzle", "Dishes, pots, and pans", "Male speech, man speaking"], ["6.796", "0.0", "0.428", "7.993"], ["7.75", "10.0", "0.573", "9.219"], ["speaks", "pans", "sizzle", "speaks"]]}
{"captions": "Scraping sounds and a man speaking are heard, followed by music.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Electric shaver, electric razor"], ["0.0", "3.116", "5.168", "0.0"], ["0.48", "4.202", "5.746", "10.0"], ["speaking", "speaking", "speaking", "Scraping"]]}
{"captions": "Background noise and wind noise can be heard with taps, ticks, an alarm and more.", "data": [["Alarm"], ["6.63"], ["10.0"], ["alarm"]]}
{"captions": "Various sounds including child speech, walking, and laughing, accompanied by whistling, thwacking, and rustling.", "data": [["Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking"], ["0.11", "2.835", "5.52"], ["0.551", "3.047", "7.307"], ["speech", "speech", "speech"]]}
{"captions": "Men speak on the telephone and in speech, with dialing sounds in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.772", "4.409", "9.465"], ["0.937", "2.528", "8.465", "10.0"], ["speech", "speech", "speech", "speech"]]}
{"captions": "A small room has sizzling and blending sounds.", "data": [["Sizzle", "Blender"], ["0.0", "4.976"], ["4.949", "10.0"], ["blending", "blending"]]}
{"captions": "A woman speaks, taps, and handles liquid.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "1.583", "3.244", "6.937", "9.063"], ["1.291", "3.15", "6.055", "8.244", "10.0"], ["speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Pouring, speech, pump and faucet sounds are heard in a kitchen or bathroom setting.", "data": [["Speech", "Water tap, faucet", "Male speech, man speaking", "Water tap, faucet"], ["0.008", "0.008", "1.488", "6.614"], ["0.22", "4.039", "3.11", "10.0"], ["speech", "faucet", "speech", "faucet"]]}
{"captions": "Breathing and a man speaking softly and whispering are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.0", "2.934", "5.18"], ["2.066", "3.934", "6.164"], ["whispering", "whispering", "whispering"]]}
{"captions": "Laughter and conversation can be heard with background noise and tapping sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.336", "2.958", "5.668", "6.501", "9.26"], ["2.094", "4.453", "6.333", "7.559", "10.0"], ["conversation", "conversation", "conversation", "conversation", "conversation"]]}
{"captions": "Male speech, laughter, and idling are heard intermittently.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "4.244", "7.031", "7.693", "8.969", "9.591"], ["0.496", "5.063", "7.535", "8.189", "9.268", "10.0"], ["speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Various sounds, including laughter and music, punctuate female speech and water splashing.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.327", "2.603", "4.005", "5.508", "6.658"], ["1.847", "3.543", "4.811", "6.423", "7.271"], ["speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Explosions, gunshots, speech, and gunfire sounds alternate.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["6.496", "9.276"], ["8.402", "10.0"], ["speech", "speech"]]}
{"captions": "A crowd cheers, claps, and speaks loudly while music plays in the background.", "data": [["Female speech, woman speaking"], ["3.354"], ["5.677"], ["speaks"]]}
{"captions": "Male speech is intermittently interrupted by cheering, laughter, smashing glass and whooshing sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "4.52", "5.024", "7.346", "9.157"], ["3.307", "4.937", "6.055", "7.992", "10.0"], ["speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Children and adults play and shout around a motorboat in an urban environment near water.", "data": [["Male speech, man speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking"], ["0.37", "1.583", "5.764", "7.008"], ["1.315", "3.394", "6.236", "8.984"], ["shout", "shout", "shout", "shout"]]}
{"captions": "A blender is vibrating.", "data": [["Blender"], ["0.0"], ["10.0"], ["vibrating"]]}
{"captions": "Women are speaking, tapping, and using mechanisms, with music in the background.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Electric shaver, electric razor"], ["0.0", "0.969", "6.717", "8.882", "2.465"], ["0.575", "2.961", "8.142", "10.0", "6.488"], ["speaking", "speaking", "speaking", "speaking", "tapping"]]}
{"captions": "Mechanisms accompany clapping and speech from both men and women.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.189", "3.496", "4.323", "6.299", "6.709", "7.598", "8.512", "9.551"], ["2.906", "4.008", "6.197", "6.551", "7.315", "8.307", "8.992", "10.0"], ["clapping", "clapping", "clapping", "speech", "speech", "clapping", "clapping", "clapping"]]}
{"captions": "A man speaks, music plays, and an eruption occurs.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.843", "4.433", "6.52", "7.638", "8.795", "9.929"], ["1.913", "3.976", "5.906", "7.307", "8.516", "9.78", "10.0"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "There is speech, pouring liquid and more speech from a man.", "data": [["Speech", "Male speech, man speaking", "Male speech, man speaking"], ["0.008", "7.031", "9.764"], ["6.15", "9.071", "10.0"], ["speech", "speech", "speech"]]}
{"captions": "A woman is crumpling paper while speaking.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "0.311", "0.811", "1.598", "3.59", "3.934", "5.369", "8.984"], ["0.246", "0.713", "1.213", "3.23", "3.861", "4.811", "6.926", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Keyboard typing and conversation is heard in a small room with interspersed bird chirping.", "data": [["Conversation", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.493", "2.562", "3.684", "4.523", "6.219", "9.894"], ["2.376", "3.348", "4.028", "5.186", "8.905", "10.0"], ["conversation", "conversation", "conversation", "conversation", "conversation", "conversation"]]}
{"captions": "A man and a child are speaking with laughter, breathing and more men speaking.", "data": [["Male speech, man speaking", "Child speech, kid speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.652", "6.543", "7.111", "7.477", "8.407", "8.832"], ["0.281", "3.123", "7.047", "7.414", "8.306", "8.725", "10.0"], ["men", "child", "men", "men", "men", "men", "men"]]}
{"captions": "A vehicle is heard and ticking and tocking sounds are heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["0.487", "5.239"], ["1.664", "6.342"], ["ticking", "ticking"]]}
{"captions": "A man speaks while a liquid pump operates in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.441", "0.781", "1.168", "1.641", "1.869", "2.544", "4.095", "6.654", "7.207", "9.411", "9.857"], ["0.701", "1.062", "1.514", "1.742", "2.395", "2.953", "4.742", "7.1", "8.81", "9.586", "10.0"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Men are speaking while water is running.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Water tap, faucet", "Male speech, man speaking"], ["0.457", "3.772", "6.496", "0.0", "9.213"], ["3.252", "5.961", "8.858", "10.0", "10.0"], ["speaking", "speaking", "speaking", "running", "speaking"]]}
{"captions": "Man speaking, speech, alarm, fire alarm, and smoke detector.", "data": [["Male speech, man speaking", "Alarm"], ["0.008", "3.213"], ["0.583", "10.0"], ["speech", "alarm"]]}
{"captions": "Male speech, music, television, telephone bell ringing, barking, and speech are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Telephone bell ringing", "Speech", "Telephone bell ringing", "Male speech, man speaking", "Speech", "Telephone bell ringing"], ["0.0", "0.74", "3.803", "4.055", "5.48", "5.78", "9.331", "9.315"], ["0.622", "3.709", "4.512", "5.205", "5.669", "9.165", "10.0", "10.0"], ["speech", "speech", "telephone", "telephone", "telephone", "speech", "telephone", "telephone"]]}
{"captions": "Men are speaking, tapping, and bouncing with mechanisms and other sounds in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.063", "4.173", "7.701"], ["0.661", "4.449", "9.646"], ["speaking", "speaking", "speaking"]]}
{"captions": "People are speaking, snoring, and laughing.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.599", "1.06", "4.683", "6.026", "7.081", "8.269"], ["0.95", "1.901", "5.875", "6.761", "7.885", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Rain and thunder are heard, and men speak.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["7.906", "9.929"], ["8.268", "10.0"], ["speak", "speak"]]}
{"captions": "Women talk, sing, and music plays.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.197", "3.142", "4.0", "5.953", "8.63"], ["1.843", "3.906", "5.591", "8.472", "10.0"], ["talk", "talk", "talk", "talk", "talk"]]}
{"captions": "Men speak over music.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["5.043", "8.936"], ["8.531", "9.919"], ["speak", "speak"]]}
{"captions": "A woman speaks, followed by whispering, then a man speaks before a door slams and footsteps are heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.663", "2.512", "4.318", "6.531", "7.122", "8.941"], ["0.367", "2.356", "2.946", "5.058", "6.864", "7.583", "10.0"], ["woman", "woman", "woman", "woman", "man", "man", "man"]]}
{"captions": "Birds chirp, rustling is heard, people speak, and insects are heard.", "data": [["Male speech, man speaking", "Female speech, woman speaking"], ["3.528", "6.307"], ["5.0", "9.102"], ["speak", "speak"]]}
{"captions": "Men talk while various noises, thumps, and breathing are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.024", "1.89", "2.913", "4.598", "5.945", "7.591"], ["0.528", "1.732", "2.598", "4.299", "4.992", "7.283", "8.362"], ["talk", "talk", "talk", "talk", "talk", "talk", "talk"]]}
{"captions": "Dogs growl and bark, a TV is heard, and there is speech and laughter from both men and women.", "data": [["Dog", "Speech", "Speech", "Dog", "Female speech, woman speaking"], ["2.236", "6.197", "7.165", "5.63", "9.205"], ["3.362", "6.929", "8.882", "8.441", "10.0"], ["growl", "speech", "speech", "growl", "laughter"]]}
{"captions": "Music is playing with a power tool and vacuum cleaner.", "data": [["Vacuum cleaner"], ["0.008"], ["10.0"], ["vacuum"]]}
{"captions": "Conversations and snickering are heard, with male and female speech.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking"], ["0.0", "4.426", "4.99", "5.771", "9.369"], ["3.34", "4.664", "5.336", "7.889", "10.0"], ["Conversations", "Conversations", "Conversations", "Conversations", "snickering"]]}
{"captions": "A man speaks, followed by a vacuum cleaner, mechanisms, and more male speech.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Vacuum cleaner"], ["0.0", "3.079", "6.457", "8.449", "9.654", "0.0"], ["2.252", "6.228", "8.079", "9.15", "10.0", "3.016"], ["man", "man", "man", "man", "man", "vacuum"]]}
{"captions": "Animals and birds make noises, along with children speaking and cutlery clinking.", "data": [["Child speech, kid speaking", "Cutlery, silverware", "Cutlery, silverware", "Cutlery, silverware", "Child speech, kid speaking"], ["4.898", "3.85", "8.26", "9.15", "7.378"], ["5.882", "5.858", "8.646", "9.622", "8.63"], ["speaking", "clinking", "clinking", "clinking", "speaking"]]}
{"captions": "People are laughing, speaking, and firing fireworks.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking"], ["0.567", "7.197", "8.606", "9.478"], ["0.976", "7.811", "9.433", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man is speaking and a police car is passing by with its siren on.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.009", "0.507", "1.024", "1.766", "2.337", "2.935", "3.551", "4.312", "5.308", "5.625"], ["0.39", "0.906", "1.431", "2.129", "2.799", "3.116", "4.04", "5.163", "5.498", "5.96"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Women whisper and speak in a small room.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.709", "6.394", "7.937", "8.496", "9.37"], ["5.378", "6.591", "8.197", "8.953", "10.0"], ["whisper", "whisper", "whisper", "whisper", "whisper"]]}
{"captions": "Video game and explosion sounds are heard among male speech and radio noise.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["2.615", "4.75"], ["4.283", "6.486"], ["speech", "speech"]]}
{"captions": "Women and men speak, buzzers sound and a crowd laughs and applauds.", "data": [["Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "5.087", "7.906"], ["1.425", "6.016", "10.0"], ["speak", "speak", "speak"]]}
{"captions": "There is a mix of speech, music, and children's voices.", "data": [["Female speech, woman speaking", "Speech", "Child speech, kid speaking", "Speech", "Speech", "Female speech, woman speaking", "Female speech, woman speaking", "Speech", "Speech", "Female speech, woman speaking"], ["0.0", "1.315", "2.126", "3.291", "3.677", "5.346", "6.362", "7.331", "7.89", "9.0"], ["1.283", "1.85", "3.197", "3.598", "5.236", "6.22", "7.228", "7.827", "8.945", "9.906"], ["speech", "speech", "children's", "speech", "speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "A child speaks and mechanisms, doors, and water taps can be heard with a man speaking.", "data": [["Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Water tap, faucet"], ["0.0", "2.071", "4.52", "6.693", "4.535"], ["0.921", "3.339", "5.882", "10.0", "8.276"], ["child", "child", "child", "child", "taps"]]}
{"captions": "A man speaks, there is chatter, and a duck quacks, amidst wind noise, a stream, and a rowboat.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.488", "3.748", "4.512", "6.063", "6.402", "9.205"], ["3.402", "3.661", "4.362", "5.512", "6.346", "7.244", "9.654"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Ticking and conversation sounds, with speech and laughter, can be heard.", "data": [["Conversation", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking"], ["0.237", "1.359", "1.683", "2.192", "2.886", "3.638", "4.147", "9.057"], ["1.209", "1.538", "2.036", "2.747", "3.505", "4.072", "8.508", "9.832"], ["conversation", "conversation", "conversation", "conversation", "conversation", "conversation", "conversation", "conversation"]]}
{"captions": "Music is playing with women speaking and speech also heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Speech", "Female speech, woman speaking", "Speech", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.236", "1.543", "2.717", "3.803", "5.339", "7.173", "8.409"], ["1.126", "2.055", "3.732", "4.717", "5.929", "8.252", "9.709"], ["speaking", "speaking", "speech", "speaking", "speech", "speaking", "speaking"]]}
{"captions": "Artillery fire, clicking, rumbling, and speech occur.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["6.268", "9.803"], ["7.181", "10.0"], ["speech", "speech"]]}
{"captions": "Crushing, breathing, and conversation sounds are present.", "data": [["Conversation", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.63", "3.638", "5.544", "7.504", "8.141", "8.879", "9.453"], ["2.682", "5.082", "7.031", "8.03", "8.816", "9.363", "10.0"], ["conversation", "conversation", "conversation", "conversation", "conversation", "conversation", "conversation"]]}
{"captions": "Sound effects, music, and male speech are followed by a single plop.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["6.811", "7.701", "9.055"], ["7.394", "8.78", "9.748"], ["speech", "speech", "speech"]]}
{"captions": "Music, boing, sound effects, and speech accompany a man speaking and birds chirping.", "data": [["Speech"], ["3.465"], ["5.547"], ["speaking"]]}
{"captions": "A woman is speaking with background noise, tapping sounds, and breathing heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.008", "1.528", "2.024", "4.583", "7.622", "8.402", "9.756"], ["0.417", "1.685", "3.457", "5.756", "8.094", "9.339", "10.0"], ["woman", "woman", "woman", "woman", "woman", "woman", "woman"]]}
{"captions": "People are chopping food and speaking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.465", "2.457", "3.22", "7.039", "8.244", "9.102"], ["0.772", "2.945", "6.858", "7.827", "8.866", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man uses tools and speaks while water runs from a tap in a small room.", "data": [["Water tap, faucet", "Water tap, faucet", "Water tap, faucet", "Water tap, faucet", "Male speech, man speaking"], ["0.0", "1.142", "2.339", "5.654", "8.378"], ["0.543", "1.283", "2.85", "10.0", "9.071"], ["tap", "tap", "tap", "tap", "speaks"]]}
{"captions": "A man is speaking, breathing, and making noises over ticks.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.591", "4.575", "6.299"], ["2.898", "4.449", "4.945", "9.709"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "People are shouting, clapping, and cheering while music plays in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.488", "6.189"], ["0.535", "5.953", "9.63"], ["shouting", "shouting", "shouting"]]}
{"captions": "A television is on with conversations and speeches being heard in a small room.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Conversation", "Conversation", "Female speech, woman speaking", "Speech", "Male speech, man speaking", "Speech", "Speech"], ["0.265", "1.012", "1.745", "2.268", "2.858", "3.578", "4.162", "6.225", "7.814", "9.05"], ["0.557", "1.69", "2.105", "2.722", "3.523", "4.101", "6.09", "7.366", "8.792", "9.525"], ["conversations", "conversations", "conversations", "conversations", "conversations", "conversations", "conversations", "conversations", "conversations", "conversations"]]}
{"captions": "Women are speaking, laughing, walking, running, and shouting.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "1.583", "3.685", "4.299", "4.709", "8.819"], ["0.85", "3.551", "4.157", "4.622", "7.992", "10.0"], ["laughing", "laughing", "laughing", "laughing", "laughing", "laughing"]]}
{"captions": "A dog barks, growls, and a bell rings.", "data": [["Dog", "Dog"], ["0.552", "3.787"], ["3.691", "7.058"], ["barks", "barks"]]}
{"captions": "Mechanisms and tapping sounds can be heard with a sizzling sound in the background.", "data": [["Sizzle"], ["0.0"], ["10.0"], ["sizzling"]]}
{"captions": "Truck sounds and male speech are heard with the wind in this recording.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.567", "2.118", "2.843", "4.598", "7.008"], ["1.402", "2.386", "3.717", "4.85", "8.622"], ["speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Conversations, laughter, coughing, and mechanisms are heard among human voices.", "data": [["Conversation", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.929", "8.008", "8.591", "8.929"], ["2.252", "8.543", "8.795", "10.0"], ["laughter", "Conversations", "Conversations", "Conversations"]]}
{"captions": "Sizzling and music accompany male speeches.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Sizzle", "Male speech, man speaking", "Male speech, man speaking", "Sizzle", "Male speech, man speaking"], ["0.228", "2.748", "0.0", "4.268", "8.197", "5.622", "9.094"], ["0.63", "3.016", "3.173", "5.394", "8.756", "10.0", "9.717"], ["speech", "speech", "Sizzling", "speech", "speech", "Sizzling", "speech"]]}
{"captions": "A man is speaking with sound effects including beeps, whooshes, and more.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.689", "8.251", "8.975"], ["1.581", "8.392", "10.0"], ["speaking", "speaking", "speaking"]]}
{"captions": "A man is speaking with thumps and thuds in the background.", "data": [["Male speech, man speaking", "Speech", "Male speech, man speaking", "Speech", "Speech"], ["2.024", "3.276", "3.866", "5.189", "6.425"], ["2.866", "3.591", "4.315", "5.701", "6.74"], ["speaking", "thumps", "speaking", "thumps", "thumps"]]}
{"captions": "Male speech is being held with background noise, tapping, breathing, beeping, and breathing sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.27", "1.814", "2.513", "3.061", "4.655", "4.993", "5.966", "6.542", "7.615", "8.214"], ["0.133", "0.964", "2.435", "2.956", "4.148", "4.87", "5.848", "6.295", "6.779", "7.972", "9.883"], ["speech", "speech", "speech", "speech", "speech", "speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Animals and dogs bark and growl.", "data": [["Dog", "Dog", "Dog", "Dog"], ["0.0", "2.0", "4.268", "6.748"], ["0.913", "3.504", "5.346", "7.984"], ["bark", "bark", "bark", "bark"]]}
{"captions": "Women are speaking and rodents can be heard, with music playing in the background.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "1.11", "3.668", "4.909", "6.012", "7.709", "8.544", "9.941"], ["0.601", "2.5", "4.308", "5.94", "6.593", "8.355", "9.282", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A monologue with a woman speaking and making kitchen sounds.", "data": [["Female speech, woman speaking", "Speech", "Blender", "Speech", "Speech", "Blender"], ["0.0", "2.565", "4.434", "6.506", "9.262", "7.323"], ["2.209", "3.755", "5.592", "7.037", "10.0", "8.779"], ["speaking", "kitchen", "kitchen", "kitchen", "kitchen", "kitchen"]]}
{"captions": "A dog is sneezing, people are speaking, and a dog is sniffing.", "data": [["Dog", "Dog", "Male speech, man speaking"], ["0.354", "8.748", "7.748"], ["6.882", "9.26", "8.638"], ["sneezing", "sneezing", "speaking"]]}
{"captions": "A woman is speaking, brushing her teeth, and making ticking sounds.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "1.827", "3.134", "4.858", "5.646", "6.583", "9.244"], ["0.543", "2.425", "3.677", "5.299", "6.079", "7.465", "9.874"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A woman is speaking, with a power tool, thunderstorm and man speaking in the background.", "data": [["Female speech, woman speaking", "Male speech, man speaking"], ["0.0", "8.165"], ["2.394", "10.0"], ["woman", "man"]]}
{"captions": "Male and female speech intermingle with noise, laughter, and music.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "5.89", "8.449"], ["0.239", "7.61", "10.0"], ["Male", "Male", "Male"]]}
{"captions": "People talk and breathe amid background noise and hissing.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.055", "4.898", "7.331"], ["2.843", "4.48", "7.181", "10.0"], ["talk", "talk", "talk", "talk"]]}
{"captions": "Tapping and chopping sounds are made with male speech and sound effects.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.11", "1.613", "3.15", "4.255", "6.349", "7.543", "9.712"], ["0.803", "2.924", "4.18", "4.818", "7.337", "9.266", "10.0"], ["speech", "speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "People are talking, shuffling cards, and making noise.", "data": [["Conversation", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.48", "1.717", "2.181", "2.646", "3.457", "7.134"], ["1.205", "2.055", "2.37", "3.197", "6.551", "10.0"], ["talking", "talking", "talking", "talking", "talking", "talking"]]}
{"captions": "Music is playing, and a woman and a man are snapping their fingers and speaking.", "data": [["Conversation", "Conversation", "Female speech, woman speaking"], ["1.717", "5.283", "8.197"], ["4.465", "6.622", "10.0"], ["speaking", "speaking", "woman"]]}
{"captions": "Women and children speak amidst music and general hubbub.", "data": [["Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking"], ["0.0", "3.782", "6.616", "8.511"], ["3.61", "5.951", "8.456", "10.0"], ["Women", "hubbub", "hubbub", "Women"]]}
{"captions": "Various birds are chirping and dogs are barking and barking.", "data": [["Dog", "Dog", "Dog", "Dog", "Dog", "Dog"], ["0.717", "1.882", "4.661", "6.181", "8.197", "9.071"], ["1.339", "2.843", "5.882", "7.252", "8.685", "9.693"], ["barking", "barking", "barking", "barking", "barking", "barking"]]}
{"captions": "Food is frying, with tap and stir sounds also heard.", "data": [["Frying (food)"], ["0.0"], ["10.0"], ["frying"]]}
{"captions": "A woman speaks, followed by the sound of a church bell and someone screaming.", "data": [["Female speech, woman speaking"], ["0.457"], ["0.866"], ["speaks"]]}
{"captions": "Cats are meowing and birds are singing amidst background noise.", "data": [["Cat", "Cat"], ["1.256", "3.686"], ["1.682", "4.125"], ["meowing", "meowing"]]}
{"captions": "Various mechanisms are making sounds along with wind noise, tapping, and male and female speech.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.244", "6.622", "7.244", "8.236", "9.087"], ["1.189", "6.787", "7.598", "8.614", "10.0"], ["speech", "speech", "speech", "speech", "speech"]]}
{"captions": "People speak, there is an explosion, and things break.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Child speech, kid speaking", "Male speech, man speaking"], ["0.0", "2.724", "3.717", "6.906", "7.693", "9.496"], ["2.354", "3.016", "3.992", "7.559", "9.063", "9.583"], ["speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "Music plays with female singing and speech.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["1.773", "3.439"], ["2.984", "5.148"], ["singing", "singing"]]}
{"captions": "A woman is speaking while crinkling and tearing paper.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.441", "3.551", "5.669", "6.543"], ["2.535", "5.071", "6.181", "7.953"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A woman speaks on the telephone and dials numbers in a small room.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["0.228", "4.11"], ["2.953", "4.772"], ["speaks", "speaks"]]}
{"captions": "A woman speaks repeatedly followed by beeping sounds.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "0.613", "1.278", "1.938", "3.58", "4.147", "5.362", "6.096", "6.645", "7.906", "8.248", "9.231"], ["0.515", "0.989", "1.648", "3.303", "4.002", "5.026", "6.021", "6.536", "7.368", "8.12", "9.144", "9.873"], ["woman", "woman", "woman", "woman", "woman", "woman", "woman", "woman", "woman", "woman", "woman", "woman"]]}
{"captions": "A female is singing, music is playing, people are speaking, laughing, singing, clapping and shouting.", "data": [["Female speech, woman speaking", "Male speech, man speaking"], ["0.449", "3.197"], ["0.866", "3.323"], ["speaking", "speaking"]]}
{"captions": "A woman is speaking while using a hair dryer.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["6.913", "8.701"], ["8.024", "9.746"], ["speaking", "speaking"]]}
{"captions": "People are speaking and a speech synthesizer is used. A man speaks and the speech synthesizer is used multiple times.", "data": [["Speech", "Male speech, man speaking", "Speech"], ["0.008", "4.323", "8.299"], ["3.528", "7.496", "10.0"], ["synthesizer", "man", "synthesizer"]]}
{"captions": "Men are speaking and chopping with hubbub in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.794", "1.69", "2.098", "4.895", "5.995", "7.108", "9.206"], ["1.439", "1.989", "2.926", "5.838", "7.027", "7.936", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "People speak as food sizzles in the background.", "data": [["Speech", "Speech", "Speech", "Female speech, woman speaking", "Speech", "Frying (food)", "Female speech, woman speaking"], ["0.211", "1.446", "3.123", "4.97", "6.85", "0.0", "8.561"], ["1.215", "2.376", "3.978", "6.592", "8.208", "4.162", "10.0"], ["speak", "speak", "speak", "sizzles", "speak", "sizzles", "sizzles"]]}
{"captions": "Food sizzles and fries in a small room while dishes clank and women speak.", "data": [["Dishes, pots, and pans", "Female speech, woman speaking", "Sizzle", "Dishes, pots, and pans", "Female speech, woman speaking"], ["0.288", "1.592", "0.0", "4.249", "8.236"], ["0.94", "3.61", "10.0", "10.0", "9.259"], ["clank", "speak", "fries", "clank", "speak"]]}
{"captions": "Females speak amidst music and bird calls.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "2.772", "6.685", "8.055", "9.874"], ["2.039", "5.953", "7.858", "9.598", "10.0"], ["music", "music", "music", "music", "music"]]}
{"captions": "Food is sizzling and being cooked on a stove.", "data": [["Sizzle", "Frying (food)", "Sizzle"], ["0.0", "1.5", "5.642"], ["1.473", "5.574", "10.0"], ["sizzling", "sizzling", "sizzling"]]}
{"captions": "Wind and speech are heard in a rural setting.", "data": [["Speech", "Male speech, man speaking", "Speech", "Speech"], ["0.345", "0.844", "3.096", "7.09"], ["0.717", "1.211", "4.434", "8.603"], ["speech", "speech", "speech", "speech"]]}
{"captions": "Men sing and speak to music.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.008", "2.421", "8.998"], ["1.853", "4.357", "10.0"], ["sing", "sing", "sing"]]}
{"captions": "People are clicking and speaking while a television plays and people breathe.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Toilet flush", "Female speech, woman speaking"], ["4.481", "5.343", "6.721", "7.488", "8.167", "8.839", "9.416", "0.285", "9.953"], ["5.071", "5.9", "7.223", "7.991", "8.507", "9.158", "9.742", "3.714", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "clicking", "speaking"]]}
{"captions": "Speech babble and clattering dishes and silverware can be heard, along with a child's voice.", "data": [["Dishes, pots, and pans", "Male speech, man speaking", "Dishes, pots, and pans", "Cutlery, silverware", "Cutlery, silverware", "Female speech, woman speaking", "Child speech, kid speaking", "Dishes, pots, and pans"], ["0.85", "0.748", "1.386", "4.693", "5.299", "1.63", "8.756", "7.717"], ["0.969", "1.173", "1.504", "4.843", "5.52", "3.409", "9.354", "7.874"], ["clattering", "babble", "clattering", "silverware", "silverware", "babble", "babble", "clattering"]]}
{"captions": "Dogs bark and birds fly and make flight sounds as a man speaks.", "data": [["Dog", "Dog", "Dog", "Dog", "Male speech, man speaking"], ["0.0", "4.898", "7.654", "8.795", "3.929"], ["0.268", "6.457", "8.016", "9.181", "4.717"], ["bark", "bark", "bark", "bark", "speaks"]]}
{"captions": "A man speaking, tapping and narrating over speech and filed wood sound.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "4.386", "4.699", "5.638", "5.921"], ["3.591", "4.601", "5.48", "5.819", "9.217"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Women are speaking on the phone, with music, telephone bells ringing and conversations.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Telephone bell ringing", "Female speech, woman speaking", "Telephone bell ringing"], ["0.339", "3.417", "6.732", "4.37", "8.835"], ["3.181", "4.094", "7.882", "5.26", "10.0"], ["Women", "Women", "bells", "Women", "bells"]]}
{"captions": "A crowd cheers, whoops and a basketball is bounced in a game.", "data": [["Male speech, man speaking"], ["0.0"], ["10.0"], ["whoops"]]}
{"captions": "Speech and the sounds of a motorcycle, engine, and vehicle are interspersed with more speech.", "data": [["Speech", "Male speech, man speaking", "Speech", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.126", "4.094", "7.441", "9.819"], ["0.85", "3.803", "7.087", "9.441", "10.0"], ["sound", "speech", "sound", "speech", "speech"]]}
{"captions": "Dogs are whimpering, howling, and other domestic animals are heard.", "data": [["Dog", "Dog", "Dog", "Dog", "Dog"], ["0.0", "2.701", "4.222", "7.021", "8.506"], ["1.324", "3.569", "6.61", "8.148", "10.0"], ["whimpering", "whimpering", "whimpering", "whimpering", "whimpering"]]}
{"captions": "Children are playing, whistling and laughing, with clicking sounds in the background.", "data": [["Child speech, kid speaking"], ["7.677"], ["9.037"], ["laughing"]]}
{"captions": "A fire engine is passing by with a siren, and people are speaking.", "data": [["Male speech, man speaking", "Child speech, kid speaking"], ["7.228", "8.488"], ["8.276", "9.142"], ["speaking", "speaking"]]}
{"captions": "Female speech and music can be heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.133", "1.641", "4.126", "5.119", "9.012", "9.575"], ["1.429", "3.946", "4.827", "5.858", "9.458", "10.0"], ["speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "A woman whistles and speaks while mechanisms beep, beep, and walk sounds play.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["5.668", "8.236", "8.652"], ["6.802", "8.531", "9.115"], ["speaks", "speaks", "speaks"]]}
{"captions": "Television, mechanisms, bird songs, whistling, and speech are heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.087", "2.266", "3.619"], ["0.416", "3.266", "5.15"], ["speech", "speech", "speech"]]}
{"captions": "Music plays while women speak, sing, clap, and buzz.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.339", "1.685", "4.677", "5.087", "6.134", "6.787", "7.346", "7.693"], ["0.764", "2.087", "4.953", "5.236", "6.362", "7.0", "7.528", "8.811"], ["speak", "speak", "speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "Music plays as a man sings, chuckles, and speaks.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["4.604", "6.94"], ["5.101", "8.062"], ["sings", "sings"]]}
{"captions": "A woman is speaking while music plays and food sizzles on a frying pan.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Frying (food)", "Female speech, woman speaking", "Sizzle"], ["0.0", "2.898", "4.031", "5.016", "0.189", "5.457", "7.449"], ["0.197", "3.898", "4.882", "5.339", "2.37", "5.724", "10.0"], ["speaking", "speaking", "speaking", "speaking", "sizzles", "speaking", "frying"]]}
{"captions": "Mechanisms, mouse sounds, and a woman speaking are heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.48", "4.394", "6.559"], ["1.606", "5.835", "6.764"], ["woman", "woman", "woman"]]}
{"captions": "Taps are heard, a sliding door is opened, a dog barks and a woman is speaking.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Dog", "Female speech, woman speaking"], ["4.957", "5.471", "6.385", "8.045", "4.286", "8.733"], ["5.2", "6.137", "7.293", "8.473", "4.563", "9.873"], ["woman", "woman", "woman", "woman", "barks", "woman"]]}
{"captions": "A man shaves and speaks while music plays in a small room.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Electric shaver, electric razor"], ["0.0", "1.748", "8.772", "0.693"], ["1.039", "4.024", "10.0", "10.0"], ["speaks", "speaks", "speaks", "shaves"]]}
{"captions": "Coins drop and a man speaks before a dog barks.", "data": [["Male speech, man speaking", "Dog"], ["6.913", "8.803"], ["7.409", "9.417"], ["speaks", "barks"]]}
{"captions": "People are speaking, walking, laughing, and making various sounds in a room.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking"], ["0.008", "1.866", "2.866", "3.724", "4.866", "6.11", "6.803", "8.0", "8.709", "9.764"], ["1.654", "2.457", "3.598", "4.732", "5.354", "6.74", "7.52", "8.677", "9.386", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Dogs bark and power tools can be heard in a small room.", "data": [["Dog", "Dog", "Dog", "Dog", "Vacuum cleaner"], ["0.0", "5.0", "7.835", "9.299", "0.016"], ["0.307", "7.024", "8.701", "10.0", "10.0"], ["bark", "bark", "bark", "bark", "tools"]]}
{"captions": "Wind noise competes with a man's speech and goat sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.244", "1.307", "5.89", "7.26", "9.811"], ["0.984", "1.898", "6.299", "9.449", "10.0"], ["speech", "speech", "speech", "speech", "speech"]]}
{"captions": "A woman is speaking, footsteps are heard and a cat is meowing with road noise in the background.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.276", "4.37", "5.354", "7.157", "8.693"], ["0.709", "4.756", "5.843", "7.78", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "People are speaking and cap guns are firing, with breathing in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.512", "2.339", "5.433", "6.614", "9.071"], ["0.26", "1.969", "4.031", "6.378", "8.701", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Background noise is present while a man and woman speak along with a crying child.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.412", "3.047", "4.049", "6.17"], ["1.043", "3.528", "4.626", "10.0"], ["man", "woman", "man", "man"]]}
{"captions": "People are speaking and laughing while birds chirp and animals make sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking"], ["0.069", "1.452", "4.135", "5.176", "5.818", "6.883", "7.513", "7.929", "9.167"], ["1.4", "2.614", "4.425", "5.674", "6.865", "7.351", "7.848", "8.363", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Humans walk and speak while various sounds are heard in the background such as wind and field recordings.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["2.535", "3.685", "5.181", "6.913", "7.953", "8.661"], ["3.094", "4.85", "6.661", "7.252", "8.598", "10.0"], ["speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "People are laughing and speaking with dogs barking and animal sounds in the background.", "data": [["Female speech, woman speaking", "Dog", "Male speech, man speaking", "Dog", "Dog", "Dog", "Dog", "Dog", "Male speech, man speaking", "Dog"], ["0.708", "2.641", "2.6", "3.637", "4.614", "5.834", "6.816", "7.46", "3.696", "9.274"], ["2.568", "3.499", "3.463", "4.546", "5.706", "6.268", "7.273", "7.972", "4.541", "9.836"], ["speaking", "barking", "speaking", "barking", "barking", "barking", "barking", "barking", "speaking", "barking"]]}
{"captions": "A man gives a monologue while others speak, in a small room with clicking noises.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.394", "6.181", "7.417"], ["2.496", "5.567", "6.669", "8.173"], ["monologue", "monologue", "monologue", "monologue"]]}
{"captions": "Water is dripping and people are speaking.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Water tap, faucet", "Male speech, man speaking"], ["0.89", "3.882", "0.0", "8.661"], ["2.386", "7.961", "5.307", "10.0"], ["speaking", "speaking", "dripping", "speaking"]]}
{"captions": "A woman speaks amid mechanisms and sounds of crumpling, breathing, and human sounds in a small room.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "4.276", "7.394"], ["0.63", "6.008", "10.0"], ["speaks", "speaks", "speaks"]]}
{"captions": "Doors slam and an alarm sounds on a motor vehicle.", "data": [["Alarm"], ["5.622"], ["10.0"], ["sounds"]]}
{"captions": "Animals and birds are making noises in the rural surroundings.", "data": [["Dog", "Dog", "Dog", "Dog", "Dog"], ["0.882", "4.142", "6.181", "8.291", "9.819"], ["1.701", "5.276", "7.063", "9.307", "10.0"], ["noises", "noises", "noises", "noises", "noises"]]}
{"captions": "Women and men speak, traffic noise and vehicle horns are heard, and cars are moving on a roadway.", "data": [["Speech", "Female speech, woman speaking", "Speech", "Female speech, woman speaking", "Speech", "Speech"], ["0.0", "1.835", "4.362", "5.142", "6.583", "8.228"], ["1.567", "2.646", "4.898", "6.102", "7.929", "10.0"], ["speak", "Women", "speak", "Women", "speak", "speak"]]}
{"captions": "People are speaking and having a conversation in a crowd.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.84", "3.424", "5.315", "7.374", "7.86", "9.237"], ["2.753", "3.152", "4.853", "6.42", "7.791", "9.132", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "An electric shaver operates and children speak over music.", "data": [["Child speech, kid speaking", "Child speech, kid speaking", "Electric shaver, electric razor", "Child speech, kid speaking"], ["0.803", "4.228", "0.0", "9.157"], ["3.472", "8.252", "10.0", "10.0"], ["children", "children", "shaver", "children"]]}
{"captions": "People are talking and laughing, with footsteps and camera clicks heard in the background.", "data": [["Conversation", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "0.925", "1.793", "2.748", "6.31", "7.253", "8.641", "9.231"], ["0.583", "1.313", "2.465", "3.165", "6.998", "8.508", "8.988", "10.0"], ["talking", "talking", "talking", "talking", "talking", "talking", "talking", "talking"]]}
{"captions": "There is wind noise, barking, and ticking amidst women speaking and an unknown sound.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["4.014", "4.98", "5.787", "6.42", "8.034"], ["4.512", "5.39", "6.047", "6.877", "8.508"], ["women", "women", "women", "women", "women"]]}
{"captions": "Breathing, speech, and child and female speech and breathing are heard intermittently.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Child speech, kid speaking", "Female speech, woman speaking"], ["0.165", "0.811", "1.614", "2.882", "7.181", "8.071"], ["0.591", "1.323", "2.717", "6.942", "7.784", "10.0"], ["speech", "speech", "speech", "speech", "intermittently", "speech"]]}
{"captions": "People talk and laugh over music.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Speech", "Female speech, woman speaking"], ["0.008", "3.827", "4.693", "7.535", "9.709"], ["3.591", "4.457", "7.307", "9.055", "10.0"], ["talk", "talk", "talk", "talk", "talk"]]}
{"captions": "Dishes clanging and pots sizzling, with men speaking.", "data": [["Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Male speech, man speaking", "Dishes, pots, and pans", "Frying (food)", "Male speech, man speaking"], ["0.0", "0.913", "1.638", "1.583", "3.488", "0.0", "4.433"], ["0.78", "1.102", "1.898", "4.102", "3.827", "10.0", "5.937"], ["clanging", "clanging", "clanging", "speaking", "clanging", "sizzling", "speaking"]]}
{"captions": "Music plays with sound effects and a man speaking.", "data": [["Male speech, man speaking"], ["6.165"], ["10.0"], ["speaking"]]}
{"captions": "Birds chirp, call, and sing, insects fly, and people speak.", "data": [["Speech", "Male speech, man speaking", "Speech"], ["0.016", "0.992", "5.362"], ["0.953", "3.283", "10.0"], ["speak", "speak", "speak"]]}
{"captions": "Mechanisms accompany boiling water, speech, and tapping.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.489", "2.288", "4.481", "7.909"], ["1.738", "3.469", "5.614", "9.253"], ["speech", "speech", "speech", "speech"]]}
{"captions": "Male speech and wind noise are heard amidst the sound of an unmodified field recording in a small room.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.378", "3.496", "4.465"], ["2.724", "4.394", "5.78"], ["speech", "speech", "speech"]]}
{"captions": "Women are whispering and talking in a small room.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.087", "1.425", "2.945", "4.976", "7.717", "9.331"], ["0.26", "2.354", "4.079", "6.48", "8.614", "9.693"], ["whispering", "whispering", "whispering", "whispering", "whispering", "whispering"]]}
{"captions": "A blender is in use with music playing in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Blender"], ["0.0", "5.181", "6.37", "9.724", "0.0"], ["1.118", "6.126", "9.386", "10.0", "4.795"], ["blender", "blender", "blender", "blender", "blender"]]}
{"captions": "A woman is speaking and birds are chirping while wind blows.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "4.222", "5.106", "7.467"], ["1.372", "4.868", "7.058", "9.802"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Hooting and insect sounds are heard while a woman speaks.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["8.165", "9.85"], ["9.654", "10.0"], ["woman", "woman"]]}
{"captions": "Cats and other pets are purring and making noises in a domestic setting.", "data": [["Cat", "Cat", "Cat", "Cat", "Cat", "Cat", "Cat"], ["0.969", "5.843", "6.843", "7.102", "8.063", "8.685", "9.339"], ["5.252", "6.583", "6.984", "7.78", "8.591", "9.047", "10.0"], ["purring", "purring", "purring", "purring", "purring", "purring", "purring"]]}
{"captions": "A man is speaking, music is playing, dishes and pots are being used, and boiling sounds are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Male speech, man speaking", "Dishes, pots, and pans"], ["0.0", "0.557", "0.472", "1.441", "2.693", "3.331", "4.047", "4.677", "5.512", "0.971", "6.331"], ["0.254", "0.822", "0.693", "2.079", "2.953", "3.654", "4.181", "5.378", "5.717", "1.815", "6.441"], ["speaking", "speaking", "boiling", "boiling", "boiling", "boiling", "boiling", "boiling", "boiling", "speaking", "boiling"]]}
{"captions": "Men and women are speaking, breaking things, and glass is shattering.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.11", "1.843", "3.937", "5.724", "7.362", "9.134"], ["0.504", "3.228", "4.764", "6.52", "8.134", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Sounds of patter, birds, and cats with chirping and tweeting.", "data": [["Cat"], ["7.742"], ["8.724"], ["chirping"]]}
{"captions": "Man speaking over background noise, with a sewing machine in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.959"], ["0.723", "4.583"], ["speaking", "speaking"]]}
{"captions": "Mechanisms, barking dogs, wind chimes, and women speaking are heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Dog"], ["3.569", "4.729", "5.909", "7.639", "9.348", "6.973"], ["4.091", "5.374", "7.069", "9.067", "9.925", "8.874"], ["speaking", "speaking", "speaking", "speaking", "speaking", "barking"]]}
{"captions": "A man and woman are speaking, kids are laughing, footsteps are heard, and someone is screaming.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Child speech, kid speaking", "Female speech, woman speaking"], ["0.0", "2.205", "4.449", "5.26", "6.129", "7.717", "9.252"], ["1.331", "2.858", "4.866", "5.732", "7.646", "8.181", "10.0"], ["man", "man", "woman", "woman", "woman", "laughing", "woman"]]}
{"captions": "People are listening to music and having a conversation.", "data": [["Conversation", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.489", "1.225", "2.497", "3.277", "4.283", "4.936"], ["0.665", "2.162", "2.699", "3.728", "4.667", "5.254"], ["listening", "conversation", "conversation", "conversation", "conversation", "conversation"]]}
{"captions": "A man speaks, taps, and operates a camera.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.213", "3.775", "5.182", "6.925", "7.632", "8.099"], ["3.397", "5.024", "6.012", "7.378", "7.934", "8.325"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "A man and woman speak inside a small room with conversation, silence, breathing, sighs, and conversation also heard.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Conversation", "Conversation", "Female speech, woman speaking", "Conversation", "Female speech, woman speaking"], ["0.0", "5.692", "6.615", "7.67", "8.561", "9.379", "9.667"], ["1.361", "6.117", "7.177", "8.273", "9.05", "9.562", "10.0"], ["conversation", "woman", "conversation", "conversation", "woman", "conversation", "woman"]]}
{"captions": "A woman whispers and speaks with human sounds and breathing in the background.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "3.378", "4.996", "6.008", "8.271"], ["1.031", "4.047", "5.92", "7.036", "8.948"], ["whispers", "whispers", "whispers", "whispers", "whispers"]]}
{"captions": "Several instances of a man speaking with wind noise in between.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.299", "4.48", "6.205", "6.858", "8.921"], ["1.213", "4.252", "5.22", "6.717", "7.732", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Music plays as a man speaks and uses effects units, then continues to speak and tap.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.527", "4.4", "4.856", "6.35"], ["3.212", "3.952", "4.733", "6.135", "6.906"], ["speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "People are chatting and laughing, with occasional taps, amidst background noise.", "data": [["Conversation", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.622", "2.291", "5.055", "7.047"], ["1.732", "4.748", "7.039", "9.441"], ["chatting", "chatting", "chatting", "chatting"]]}
{"captions": "Animals, dogs, and people are speaking and whimpering in the background.", "data": [["Dog", "Dog", "Male speech, man speaking", "Dog", "Speech", "Dog", "Male speech, man speaking", "Dog", "Speech"], ["0.173", "2.024", "3.677", "4.945", "6.417", "7.087", "8.024", "8.882", "9.646"], ["1.535", "3.472", "4.646", "6.441", "7.228", "8.236", "8.685", "10.0", "10.0"], ["whimpering", "whimpering", "people", "whimpering", "speaking", "whimpering", "people", "whimpering", "speaking"]]}
{"captions": "Crumpling sounds accompany women speaking amid a hum.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["2.661", "3.117", "4.965", "6.644", "7.546", "7.945"], ["3.038", "4.737", "6.559", "7.387", "7.801", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Power tool sounds, crushing, clanging, and multiple men speaking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["2.189", "5.496", "6.071", "6.74", "8.693"], ["2.701", "5.85", "6.598", "7.394", "9.047"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Mechanisms are making noise with speech, male singing, and music.", "data": [["Speech", "Speech", "Speech", "Speech"], ["0.475", "2.851", "6.246", "9.674"], ["1.107", "4.583", "7.556", "10.0"], ["speech", "speech", "speech", "speech"]]}
{"captions": "Several men speak while an engine runs.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.025", "1.439", "3.843", "6.558", "7.04", "8.561"], ["0.584", "1.385", "2.872", "6.323", "6.925", "7.576", "10.0"], ["speak", "speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "A humming noise is heard with dishes, pots, and pans being moved on a surface.", "data": [["Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans"], ["1.462", "2.821", "4.557", "5.299", "6.98", "8.284"], ["1.586", "3.02", "4.688", "5.402", "7.09", "8.38"], ["humming", "humming", "humming", "humming", "humming", "humming"]]}
{"captions": "Screaming, shouting and a conversation between men.", "data": [["Conversation", "Male speech, man speaking", "Male speech, man speaking", "Speech", "Speech"], ["0.197", "1.646", "3.118", "4.724", "7.071"], ["1.378", "2.732", "4.614", "6.339", "10.0"], ["shouting", "conversation", "conversation", "conversation", "conversation"]]}
{"captions": "A fire crackles while a man speaks amid mechanisms.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.228", "3.181", "5.866", "7.465"], ["1.252", "3.882", "6.614", "8.252"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "A truck reverses while a man speaks and there is wind noise.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.574", "4.19", "7.18"], ["0.271", "3.999", "6.612", "9.83"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "A man speaks and taps, scrapes, and washes in a sink as water sounds are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Water tap, faucet"], ["0.0", "1.575", "2.756", "4.705", "6.287", "9.342", "0.0"], ["0.414", "2.58", "4.542", "6.124", "8.52", "10.0", "4.094"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "washes"]]}
{"captions": "A police car (siren), sirens, and alarms are heard.", "data": [["Alarm", "Alarm", "Alarm"], ["0.0", "6.488", "8.677"], ["6.039", "7.559", "10.0"], ["sirens", "sirens", "sirens"]]}
{"captions": "Water flows from a faucet and fills something.", "data": [["Water tap, faucet"], ["0.0"], ["10.0"], ["faucet"]]}
{"captions": "Dogs and women are barking and speaking with a television in the background.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Dog", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Dog"], ["2.354", "3.157", "5.031", "3.802", "6.449", "8.173", "9.657", "5.394"], ["2.677", "3.457", "5.264", "4.685", "7.504", "8.646", "10.0", "7.394"], ["speaking", "speaking", "barking", "speaking", "speaking", "speaking", "speaking", "barking"]]}
{"captions": "Sizzling and scraping sounds are heard along with male speech and clinking glasses.", "data": [["Sizzle", "Male speech, man speaking", "Sizzle", "Sizzle", "Male speech, man speaking", "Cutlery, silverware"], ["0.0", "5.669", "5.539", "9.149", "8.531", "6.246"], ["4.873", "6.479", "6.273", "9.561", "9.06", "6.527"], ["Sizzling", "speech", "Sizzling", "Sizzling", "speech", "clinking"]]}
{"captions": "Men are speaking, laughing, tapping, and making other sounds, with a child speaking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.969", "4.472", "6.315", "8.575"], ["1.276", "4.165", "6.197", "8.465", "9.024"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Music plays alongside clicking and alarm sounds.", "data": [["Alarm", "Alarm"], ["8.992", "9.969"], ["9.638", "10.0"], ["alarm", "alarm"]]}
{"captions": "A train moves, male speech is heard, and a train whistle blows.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.039", "3.071", "3.48", "4.551", "9.291"], ["1.89", "3.362", "4.315", "6.512", "10.0"], ["speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Mechanisms hum, women speak, and cracking sounds are heard intermittently.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Cutlery, silverware"], ["0.092", "1.462", "5.559", "8.118", "9.866", "8.087"], ["1.403", "4.961", "6.875", "9.646", "10.0", "8.31"], ["speak", "speak", "speak", "speak", "speak", "cracking"]]}
{"captions": "Women are speaking and alarms and smoke detectors are going off while speech is heard.", "data": [["Speech", "Speech", "Smoke detector, smoke alarm", "Speech", "Speech", "Alarm"], ["3.913", "5.22", "4.874", "7.52", "9.173", "8.866"], ["5.079", "6.094", "8.457", "8.74", "10.0", "10.0"], ["speaking", "speaking", "alarms", "speaking", "speaking", "alarms"]]}
{"captions": "Women speak and whistle with background noise, clicking, and bird calls.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "2.134", "3.787", "4.236"], ["1.551", "3.339", "4.079", "5.756"], ["Women", "Women", "Women", "Women"]]}
{"captions": "An ice cream truck plays music as vehicles, wind noise, and multiple speeches are heard.", "data": [["Male speech, man speaking", "Child speech, kid speaking", "Speech", "Male speech, man speaking", "Speech", "Male speech, man speaking", "Speech"], ["1.503", "2.353", "2.778", "3.641", "5.651", "7.803", "9.493"], ["2.01", "2.558", "3.102", "4.08", "6.368", "9.178", "10.0"], ["speeches", "speech", "speeches", "speeches", "speeches", "speeches", "speeches"]]}
{"captions": "A man is speaking with clicking and infant cries in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.712", "2.708", "3.823", "4.683", "5.181"], ["1.078", "2.328", "3.551", "4.547", "5.054", "5.833"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Men are speaking, bee buzzing, and using tools with birds singing and tapping sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.851", "2.713", "3.534", "6.536", "9.803"], ["0.613", "2.348", "3.25", "5.899", "7.866", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "There is laughter, speech, and coughing.", "data": [["Male speech, man speaking"], ["3.49"], ["4.102"], ["speech"]]}
{"captions": "A man is speaking and tapping sounds are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.079", "2.15", "4.929", "5.449"], ["0.693", "1.898", "2.732", "5.157", "5.803"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "People speaking, a jackhammer, mechanisms, and more speaking.", "data": [["Speech", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.945", "3.512", "9.323"], ["0.504", "2.945", "8.858", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "People sing, music plays, and people speak in a noisy environment.", "data": [["Female speech, woman speaking"], ["8.512"], ["9.677"], ["speak"]]}
{"captions": "People speak and zip and tap noises are heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "4.244", "4.712", "5.378", "7.707", "8.059", "8.808", "9.16"], ["3.716", "4.575", "5.263", "7.432", "7.927", "8.362", "9.089", "9.711"], ["speak", "speak", "speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "Chopping sounds occur while a man speaks in an urban setting.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.669", "5.354", "9.118"], ["1.89", "5.921", "9.661"], ["speaks", "speaks", "speaks"]]}
{"captions": "Ringtones and music are heard repeatedly.", "data": [["Ringtone", "Ringtone", "Ringtone", "Ringtone", "Ringtone", "Ringtone", "Ringtone"], ["0.063", "0.772", "1.0", "1.685", "3.772", "4.244", "4.929"], ["0.661", "0.937", "1.504", "3.37", "4.134", "4.866", "9.819"], ["Ringtones", "Ringtones", "Ringtones", "Ringtones", "Ringtones", "Ringtones", "Ringtones"]]}
{"captions": "A female voice is speaking and making plopping sounds.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.602", "2.892", "4.303", "5.789", "7.137", "7.444", "8.207"], ["2.134", "3.997", "4.662", "7.039", "7.345", "8.091", "9.364"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man is talking, a vehicle is passing by, and food is frying.", "data": [["Male speech, man speaking", "Frying (food)", "Male speech, man speaking"], ["0.159", "0.0", "1.042"], ["0.777", "10.0", "1.299"], ["talking", "frying", "talking"]]}
{"captions": "Male voices and a sound effect are heard followed by a plop, shout, and more male speech.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.945", "4.299", "4.874", "5.78", "7.197", "9.252"], ["3.937", "4.52", "5.575", "6.213", "8.244", "10.0"], ["voices", "voices", "voices", "voices", "voices", "voices"]]}
{"captions": "A man is speaking while music and a narrator play in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["6.378", "8.031", "8.748"], ["7.654", "8.583", "10.0"], ["speaking", "speaking", "speaking"]]}
{"captions": "Water sounds and speech are heard in a small room.", "data": [["Speech", "Speech", "Speech", "Water tap, faucet", "Female speech, woman speaking"], ["3.677", "5.0", "8.079", "0.772", "9.717"], ["4.717", "6.819", "8.504", "4.638", "10.0"], ["speech", "speech", "speech", "Water", "speech"]]}
{"captions": "A man speaks while birds chirp and tweet in a rural or natural environment with water sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["2.378", "4.071", "5.701"], ["3.386", "4.858", "6.016"], ["speaks", "speaks", "speaks"]]}
{"captions": "Taps echo amidst honking and child speech.", "data": [["Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking"], ["2.197", "4.913", "5.906"], ["2.417", "5.638", "6.874"], ["speech", "speech", "speech"]]}
{"captions": "Women are speaking and clicking sounds can be heard along with breathing.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "2.803", "5.567", "6.606"], ["1.094", "3.811", "6.079", "8.409"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man speaks, speech is cheered with clapping, and a cat meows with music.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Speech", "Speech", "Cat"], ["0.008", "1.614", "3.008", "4.606", "9.724"], ["1.228", "2.693", "4.37", "6.409", "10.0"], ["speaks", "speaks", "speaks", "speaks", "meows"]]}
{"captions": "A man and a woman are speaking, whistling, and using electric shavers.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Electric shaver, electric razor"], ["0.0", "4.453", "7.058", "0.0"], ["3.536", "5.56", "10.0", "10.0"], ["speaking", "speaking", "speaking", "whistling"]]}
{"captions": "A man is speaking in a small room and a turkey gobbles.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.098", "5.185", "5.954", "7.52", "9.393"], ["0.179", "4.763", "5.821", "7.093", "8.809", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Sound effects, plops, dinging music, breathing, and multiple women speaking.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["6.969", "8.757"], ["8.473", "10.0"], ["speaking", "speaking"]]}
{"captions": "Background noise, tapping, and cat sounds are interspersed with purring.", "data": [["Cat", "Cat"], ["0.978", "9.032"], ["2.291", "10.0"], ["purring", "purring"]]}
{"captions": "A television is on, people speak, dogs and pets make noise, and a man speaks multiple times.", "data": [["Dog", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Dog", "Dog", "Dog", "Male speech, man speaking"], ["0.008", "0.181", "2.756", "3.465", "4.575", "5.362", "6.591", "0.724", "8.535", "9.504", "7.811"], ["0.189", "2.457", "3.346", "4.157", "4.961", "5.984", "7.362", "1.213", "9.173", "10.0", "9.512"], ["dogs", "people", "man", "man", "man", "man", "man", "dogs", "dogs", "dogs", "man"]]}
{"captions": "Breathing and human sounds can be heard intermittently against a backdrop of female speeches.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "1.495", "4.855", "7.419"], ["0.525", "2.446", "5.969", "8.08"], ["speeches", "speeches", "speeches", "speeches"]]}
{"captions": "A woman sings with music and an alarm clock sounds.", "data": [["Alarm clock"], ["6.734"], ["10.0"], ["alarm"]]}
{"captions": "A man is speaking, a car is driving by, and birds and a man are gobbling.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.408", "7.04", "8.803"], ["1.087", "6.067", "8.049", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Men speak over dripping water and mechanisms.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Water tap, faucet", "Male speech, man speaking"], ["0.371", "1.263", "5.518", "0.0", "7.454"], ["1.105", "3.953", "6.884", "4.75", "9.863"], ["speak", "speak", "speak", "dripping", "speak"]]}
{"captions": "Women speak, laugh, and breathe between moments of silence.", "data": [["Conversation", "Speech", "Speech", "Speech", "Female speech, woman speaking", "Female speech, woman speaking", "Speech"], ["0.15", "1.087", "4.228", "5.26", "8.142", "8.921", "9.244"], ["0.89", "3.008", "4.906", "7.047", "8.732", "9.142", "9.465"], ["speak", "speak", "speak", "speak", "laugh", "laugh", "speak"]]}
{"captions": "A dog barks and people laugh and sneeze.", "data": [["Dog", "Dog"], ["0.0", "4.053"], ["3.707", "9.382"], ["barks", "barks"]]}
{"captions": "Women converse and laugh over mechanical fan noise.", "data": [["Conversation", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Speech"], ["0.0", "1.811", "4.031", "4.693", "7.764", "8.181"], ["0.976", "3.945", "4.63", "7.307", "8.063", "10.0"], ["converse", "converse", "converse", "converse", "converse", "converse"]]}
{"captions": "People talk and animals make noise.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.477", "5.679", "9.112"], ["2.337", "6.341", "10.0"], ["talk", "talk", "talk"]]}
{"captions": "Various sounds are heard including a telephone bell ringing, music, and speech.", "data": [["Male speech, man speaking", "Telephone bell ringing", "Female speech, woman speaking", "Male speech, man speaking", "Ringtone", "Female speech, woman speaking"], ["3.358", "1.988", "4.879", "7.087", "5.723", "8.532"], ["4.694", "3.179", "5.214", "8.382", "6.786", "8.815"], ["speech", "telephone", "speech", "speech", "telephone", "speech"]]}
{"captions": "Cars are driving and people are speaking, with honking horns and food being fried.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Frying (food)", "Female speech, woman speaking"], ["1.109", "5.281", "0.0", "8.969"], ["2.359", "6.109", "10.0", "10.0"], ["speaking", "speaking", "fried", "speaking"]]}
{"captions": "Women speak and laugh, with conversations, mechanisms, gasps, and breathing sounds.", "data": [["Conversation", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "2.071", "2.598", "4.63", "7.276", "9.52"], ["1.992", "2.307", "4.315", "6.661", "8.874", "10.0"], ["conversations", "conversations", "conversations", "conversations", "conversations", "conversations"]]}
{"captions": "A man is speaking over ticking and liquid sounds in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.83", "3.623", "4.448", "7.138"], ["3.007", "4.14", "5.453", "7.437"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Unmodified field recording is made in a small room with the sound of cutlery bouncing.", "data": [["Cutlery, silverware"], ["3.913"], ["5.205"], ["cutlery"]]}
{"captions": "A non-motorized vehicle is tapped as birds chirp, and a dog barks.", "data": [["Dog", "Dog"], ["4.187", "5.709"], ["4.575", "6.039"], ["barks", "barks"]]}
{"captions": "A man is speaking while using an electric shaver.", "data": [["Male speech, man speaking", "Electric shaver, electric razor", "Male speech, man speaking"], ["0.133", "0.0", "6.352"], ["2.767", "10.0", "8.279"], ["speaking", "shaver", "speaking"]]}
{"captions": "Music, singing, and male speech can be heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["4.669", "6.764", "9.039"], ["6.339", "8.638", "10.0"], ["speech", "speech", "speech"]]}
{"captions": "Music, chorus effect, and speech is heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["3.815", "6.133", "9.751"], ["5.815", "9.162", "10.0"], ["speech", "speech", "speech"]]}
{"captions": "A woman is speaking, buttons are being clicked, and a cash register beeps.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.929", "2.157", "5.299", "8.236"], ["2.079", "3.724", "7.252", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Breathing, music, and female speech occur with occasional breaths.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.157", "2.601", "8.429", "9.645"], ["1.968", "3.497", "8.953", "10.0"], ["speech", "speech", "speech", "speech"]]}
{"captions": "Crowds shout and speak with sound effects.", "data": [["Speech", "Speech"], ["1.301", "6.93"], ["5.273", "10.0"], ["speak", "speak"]]}
{"captions": "A man speaks among the sound of machinery.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.102", "4.551", "5.677", "7.724"], ["1.724", "4.071", "5.134", "7.213", "9.559"], ["speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "A man is speaking and using an electric shaver.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Electric shaver, electric razor"], ["0.0", "1.323", "3.397", "5.951", "8.379", "7.346"], ["1.114", "3.205", "5.811", "8.22", "10.0", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "shaver"]]}
{"captions": "People sneeze, cough, and speak with silence in between.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["7.819", "9.118"], ["8.551", "9.591"], ["speak", "speak"]]}
{"captions": "People are speaking, singing, and making music.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Male speech, man speaking"], ["2.654", "3.197", "4.425", "7.78", "9.898"], ["2.976", "3.732", "6.339", "8.449", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Music is playing, with a blender and food processor in operation, and female speech and tapping sounds.", "data": [["Blender", "Blender", "Female speech, woman speaking"], ["0.315", "3.465", "6.205"], ["2.654", "5.898", "7.441"], ["blender", "blender", "speech"]]}
{"captions": "A woman dials a telephone, talks and listens, as a baby cries and someone sobs.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["2.189", "3.803", "4.74", "5.26", "6.772", "7.677", "8.402", "9.26"], ["3.654", "4.551", "4.976", "6.252", "7.559", "8.291", "8.811", "9.764"], ["talks", "talks", "talks", "talks", "talks", "talks", "talks", "talks"]]}
{"captions": "Dogs are barking, a man is speaking, and babies are laughing and babbling.", "data": [["Dog", "Dog", "Male speech, man speaking", "Dog"], ["1.105", "5.766", "0.295", "9.913"], ["5.13", "8.745", "0.578", "10.0"], ["barking", "barking", "man", "barking"]]}
{"captions": "Dial tones, tapping, background noise, conversation, and female speech are heard.", "data": [["Male speech, man speaking"], ["7.134"], ["10.0"], ["conversation"]]}
{"captions": "Men are speaking, laughing, and making various sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["0.559", "1.551"], ["1.087", "2.236"], ["speaking", "speaking"]]}
{"captions": "Aircraft engines and speech are heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Speech"], ["0.0", "3.428", "7.719"], ["2.519", "7.285", "10.0"], ["speech", "speech", "speech"]]}
{"captions": "People are speaking, dogs are barking, and birds are chirping.", "data": [["Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Female speech, woman speaking", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Female speech, woman speaking"], ["0.0", "1.181", "1.819", "3.219", "3.734", "4.269", "4.2", "4.688", "5.45", "5.861", "6.349", "6.953", "7.57", "8.126", "8.648", "9.176", "9.856", "5.36"], ["0.899", "1.407", "2.876", "3.384", "3.926", "4.427", "4.571", "5.127", "5.628", "6.115", "6.555", "7.138", "7.763", "8.277", "8.833", "9.376", "10.0", "6.321"], ["barking", "barking", "barking", "barking", "barking", "barking", "speaking", "barking", "barking", "barking", "barking", "barking", "barking", "barking", "barking", "barking", "barking", "speaking"]]}
{"captions": "A woman speaks and clicks, with speech and narration in the background.", "data": [["Speech", "Speech", "Speech"], ["0.197", "4.52", "8.425"], ["2.504", "8.244", "9.811"], ["clicks", "clicks", "clicks"]]}
{"captions": "Ringtones and music alternate.", "data": [["Ringtone", "Ringtone", "Ringtone", "Ringtone"], ["0.0", "1.992", "5.056", "8.088"], ["1.067", "4.121", "7.175", "10.0"], ["music", "music", "music", "music"]]}
{"captions": "Laughter, speech, coughing, creaking, footsteps, and whistling are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["2.322", "3.802"], ["2.539", "4.569"], ["speech", "speech"]]}
{"captions": "Men are speaking and laughing while food is frying and mechanisms are heard.", "data": [["Male speech, man speaking", "Conversation", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Frying (food)", "Conversation"], ["0.163", "2.654", "4.864", "5.679", "6.341", "0.0", "7.491"], ["0.435", "4.746", "5.507", "6.06", "6.92", "10.0", "8.823"], ["speaking", "speaking", "speaking", "speaking", "speaking", "frying", "speaking"]]}
{"captions": "Music and men are speaking while sizzling and mechanisms are in use.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Sizzle"], ["0.0", "0.898", "0.0"], ["0.685", "2.685", "10.0"], ["speaking", "speaking", "sizzling"]]}
{"captions": "A woman speaks as music plays and she speaks again multiple times while a hair dryer is heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "1.709", "3.409", "4.551", "5.094", "5.504", "6.079", "6.441", "6.661", "7.22"], ["1.268", "2.535", "4.378", "5.008", "5.425", "5.78", "6.315", "6.575", "6.906", "7.654"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "People are talking, clicking, and making various sounds in a room.", "data": [["Conversation", "Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking"], ["0.544", "1.818", "5.514", "6.583", "7.474"], ["1.252", "2.252", "6.245", "6.953", "9.077"], ["talking", "talking", "talking", "talking", "talking"]]}
{"captions": "A man speaks in a small, humming room.", "data": [["Male speech, man speaking"], ["0.0"], ["1.504"], ["man"]]}
{"captions": "A woman is speaking, with music and bird vocalization in the background.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "2.795", "6.701", "8.063", "9.882"], ["2.039", "5.85", "7.945", "9.701", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man is speaking on the phone with a ringing sound.", "data": [["Male speech, man speaking", "Telephone bell ringing"], ["0.488", "3.386"], ["1.449", "4.299"], ["speaking", "ringing"]]}
{"captions": "Mechanisms sound, tapping, and people talking, walking, and laughing can be heard.", "data": [["Male speech, man speaking", "Female speech, woman speaking"], ["6.181", "8.583"], ["7.874", "9.496"], ["talking", "talking"]]}
{"captions": "Insects, birds, and women speaking are heard with wind and hammer sounds.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["2.787", "4.268", "6.85"], ["3.724", "6.244", "8.165"], ["speaking", "speaking", "speaking"]]}
{"captions": "Women speaking, clicking and other sounds can be heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "1.055", "7.457", "8.709"], ["0.354", "2.488", "8.142", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Water fills and runs in a sink, and cutlery clinks while people talk.", "data": [["Sink (filling or washing)", "Cutlery, silverware", "Speech"], ["0.0", "7.89", "8.654"], ["6.0", "10.0", "9.236"], ["runs", "clinks", "talk"]]}
{"captions": "Animals are barking, water is being turned on and off, and dishes are making noises.", "data": [["Dishes, pots, and pans", "Water tap, faucet", "Dishes, pots, and pans"], ["7.693", "6.717", "9.677"], ["8.008", "10.0", "10.0"], ["dishes", "water", "dishes"]]}
{"captions": "Crying, sobbing, baby cries, and women speaking are heard in a small room.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.581", "2.106", "2.403", "3.107"], ["1.983", "2.325", "3.052", "3.477"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Various sounds including conversations, chirping, and splashing can be heard near a body of water.", "data": [["Conversation", "Male speech, man speaking", "Male speech, man speaking", "Conversation", "Male speech, man speaking", "Male speech, man speaking", "Conversation"], ["0.391", "1.956", "2.903", "3.617", "7.303", "8.758", "9.739"], ["1.668", "2.121", "3.301", "4.139", "8.003", "9.348", "10.0"], ["conversations", "conversations", "conversations", "conversations", "conversations", "conversations", "conversations"]]}
{"captions": "Music and screaming mix with water and crowd noise as someone speaks.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking"], ["0.323", "7.417", "8.496"], ["2.866", "8.283", "10.0"], ["speaks", "speaks", "speaks"]]}
{"captions": "Conversations and speeches are exchanged between people, with bell sounds.", "data": [["Conversation", "Conversation", "Speech", "Conversation", "Conversation"], ["3.866", "5.26", "6.181", "8.031", "9.63"], ["4.913", "5.543", "7.22", "9.457", "9.992"], ["bell", "bell", "speeches", "bell", "bell"]]}
{"captions": "Women are speaking and shuffling with background noise and occasional tapping sounds.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.228", "3.142", "6.567", "8.079", "9.024"], ["1.504", "3.685", "7.071", "8.85", "9.827"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A crowd cheers while male singing and female speech are heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.803", "1.906", "5.15", "8.102"], ["1.189", "2.685", "5.992", "8.874"], ["speech", "speech", "speech", "speech"]]}
{"captions": "A woman speaks while dishes and pots clink.", "data": [["Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Female speech, woman speaking", "Frying (food)", "Dishes, pots, and pans", "Female speech, woman speaking"], ["2.545", "6.975", "7.374", "4.378", "0.0", "8.514", "9.132"], ["3.019", "7.207", "8.41", "5.645", "10.0", "8.728", "10.0"], ["clink", "clink", "clink", "speaks", "clink", "clink", "speaks"]]}
{"captions": "Alarms, sirens, and men speaking blend together.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Alarm", "Male speech, man speaking"], ["0.417", "0.969", "1.472", "3.551", "5.173", "7.661", "0.0", "8.543"], ["0.803", "1.213", "2.0", "4.472", "6.134", "8.394", "10.0", "9.614"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "Alarms", "speaking"]]}
{"captions": "A man whistles and speaks several times.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.685", "7.598", "8.559", "9.913"], ["6.087", "8.205", "9.425", "10.0"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "A motorcycle is heard with men speaking and accelerating.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.649", "5.245", "6.287", "6.755", "7.691", "9.883"], ["4.085", "5.564", "6.553", "7.309", "9.617", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Various sound effects, speech, and a man speaking with an electric shaver are heard.", "data": [["Speech", "Electric shaver, electric razor", "Speech"], ["7.74", "5.402", "9.535"], ["8.52", "10.0", "10.0"], ["speech", "shaver", "speech"]]}
{"captions": "A man is speaking while wind noise and bird calls are heard in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.189", "5.055", "6.874", "7.504"], ["4.386", "6.614", "7.157", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Music, television, and female speech are heard, with laughter from multiple people, including a man speaking.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking"], ["0.706", "4.118", "8.843"], ["1.093", "7.658", "10.0"], ["speech", "speech", "man"]]}
{"captions": "Music, chimes, laughter, and people speaking and singing are heard.", "data": [["Female speech, woman speaking", "Speech", "Speech"], ["7.509", "8.035", "8.792"], ["7.913", "8.642", "10.0"], ["speaking", "speaking", "speaking"]]}
{"captions": "A police car's siren and an alarm sound.", "data": [["Alarm"], ["3.764"], ["5.378"], ["siren"]]}
{"captions": "Men are speaking and laughing in a crowd.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.0", "3.796", "6.047", "9.575"], ["1.417", "3.496", "5.937", "6.342", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "The sound of wind, ocean, a sailing ship, conversation, and birds singing with a man and woman speaking.", "data": [["Conversation", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.591", "3.299", "7.063", "8.731"], ["3.142", "6.512", "7.953", "9.443"], ["speaking", "conversation", "conversation", "conversation"]]}
{"captions": "Conversations, animal sounds, and car noises are heard as people speak.", "data": [["Conversation", "Female speech, woman speaking", "Speech", "Speech", "Speech"], ["0.315", "2.016", "5.244", "7.394", "9.024"], ["0.921", "4.835", "6.315", "8.236", "10.0"], ["Conversations", "Conversations", "Conversations", "Conversations", "Conversations"]]}
{"captions": "Music, human sounds, alarms, footsteps, whistling, and speech intermingle.", "data": [["Speech", "Speech", "Alarm", "Speech"], ["7.665", "8.384", "2.661", "8.839"], ["7.936", "8.554", "3.693", "10.0"], ["speech", "speech", "alarms", "speech"]]}
{"captions": "A man speaks as music, TV, and bird sounds intermingle.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "4.898", "7.496"], ["3.457", "6.228", "8.079"], ["speaks", "speaks", "speaks"]]}
{"captions": "A man speaks and narrates in a large room with applause and cheering.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.417", "4.488"], ["2.268", "4.291", "7.583"], ["speaks", "speaks", "speaks"]]}
{"captions": "Laughter, breathing, speech, and child speech with mechanisms in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Child speech, kid speaking", "Male speech, man speaking", "Child speech, kid speaking", "Male speech, man speaking", "Child speech, kid speaking", "Male speech, man speaking"], ["1.157", "1.937", "3.433", "5.63", "6.803", "7.417", "8.441", "9.378"], ["1.693", "3.307", "5.087", "6.654", "7.378", "8.331", "9.244", "10.0"], ["speech", "speech", "speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Dogs bark, growl, and pant in a domestic setting.", "data": [["Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog"], ["0.007", "0.91", "1.636", "2.22", "2.817", "3.415", "3.931", "5.295", "5.981", "6.728", "7.366"], ["0.754", "1.304", "1.982", "2.614", "3.252", "3.748", "4.25", "5.614", "6.388", "7.074", "7.773"], ["bark", "bark", "bark", "bark", "bark", "bark", "bark", "bark", "bark", "bark", "bark"]]}
{"captions": "There are radio broadcasts and conversation with a sailboat sailing, heavy engine sounds, wind noise, and speech noise in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "4.213", "7.543"], ["2.827", "6.055", "8.85"], ["conversation", "conversation", "conversation"]]}
{"captions": "People are talking in a room with music playing in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.331", "2.669", "3.465", "6.291", "7.457"], ["0.787", "3.189", "4.803", "6.764", "9.071"], ["People", "People", "People", "People", "People"]]}
{"captions": "A woman is speaking, breathing, and beeping.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "2.188", "4.801", "6.629", "7.255", "7.789", "9.753"], ["1.759", "4.532", "6.327", "7.15", "7.478", "9.274", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Men are speaking, clapping, and writing in a room.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.102", "3.386", "6.323", "8.661"], ["0.78", "3.268", "5.654", "6.709", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A small room has mechanisms, people speaking and dogs barking, growling.", "data": [["Female speech, woman speaking", "Dog", "Dog", "Speech", "Female speech, woman speaking", "Male speech, man speaking", "Dog"], ["0.157", "2.26", "2.866", "2.622", "4.433", "8.551", "3.685"], ["0.961", "2.472", "3.102", "3.646", "6.339", "10.0", "4.402"], ["speaking", "barking", "barking", "speaking", "speaking", "speaking", "barking"]]}
{"captions": "Women cook, speak, and stir amidst sizzling and clanking.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Dishes, pots, and pans", "Female speech, woman speaking", "Female speech, woman speaking", "Dishes, pots, and pans", "Female speech, woman speaking", "Dishes, pots, and pans", "Sizzle", "Female speech, woman speaking"], ["0.896", "1.255", "1.92", "2.25", "0.052", "2.632", "7.675", "4.367", "8.3", "8.305", "0.735", "9.641"], ["1.139", "1.458", "2.163", "2.551", "0.353", "3.563", "8.08", "4.592", "9.208", "8.728", "10.0", "10.0"], ["speak", "speak", "speak", "speak", "clanking", "speak", "speak", "clanking", "speak", "clanking", "sizzling", "speak"]]}
{"captions": "Men and women are speaking and mechanisms are heard inside a small room.", "data": [["Speech", "Speech"], ["0.937", "9.78"], ["4.26", "10.0"], ["speaking", "speaking"]]}
{"captions": "A woman speaks with tapping and filling sounds in the background.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["2.598", "3.709", "4.898", "5.929", "9.835"], ["3.134", "4.709", "5.52", "9.512", "10.0"], ["woman", "woman", "woman", "woman", "woman"]]}
{"captions": "People scream, cry, and speak while sound effects play, and thumping sounds are heard.", "data": [["Speech", "Male speech, man speaking", "Male speech, man speaking"], ["2.094", "3.346", "4.992"], ["2.898", "4.882", "5.283"], ["speak", "speak", "speak"]]}
{"captions": "Fireworks are heard while men and women speak.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking"], ["0.428", "1.382", "6.489", "7.918", "9.005"], ["1.255", "2.001", "7.062", "8.814", "9.306"], ["women", "women", "men", "women", "men"]]}
{"captions": "An alarm and fire alarm sound.", "data": [["Alarm"], ["0.008"], ["10.0"], ["fire"]]}
{"captions": "People are speaking, clapping, and scraping while a child is speaking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Child speech, kid speaking"], ["0.0", "4.997", "6.368", "8.884", "9.433"], ["4.511", "5.911", "8.427", "9.427", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Kids speaking, bleating, music, and male speech are heard.", "data": [["Child speech, kid speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.95", "4.32", "5.697", "7.212", "9.046"], ["2.152", "3.904", "4.899", "6.935", "8.826", "10.0"], ["speaking", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Dogs bark before and after a musical interlude.", "data": [["Dog", "Dog", "Dog"], ["0.0", "1.795", "9.685"], ["0.937", "8.953", "10.0"], ["bark", "bark", "bark"]]}
{"captions": "Fire alarms and tapping are heard.", "data": [["Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm"], ["0.275", "1.297", "3.308", "4.297", "5.319", "7.351", "8.346", "9.341"], ["0.988", "1.997", "4.091", "4.942", "5.985", "8.01", "9.053", "9.993"], ["tapping", "tapping", "tapping", "tapping", "tapping", "tapping", "tapping", "tapping"]]}
{"captions": "A man is speaking, writing and speech is heard in a small room.", "data": [["Speech", "Speech", "Speech"], ["0.591", "2.743", "6.273"], ["2.071", "5.261", "9.199"], ["speaking", "speaking", "speaking"]]}
{"captions": "People chuckle, breathe, speak, hiccup, and giggle.", "data": [["Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking"], ["0.92", "1.705", "2.513", "7.666", "9.484"], ["1.346", "2.401", "2.708", "8.504", "10.0"], ["speak", "speak", "speak", "speak", "speak"]]}
{"captions": "People are speaking, breathing, laughing, and making sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["2.092", "9.712"], ["3.353", "10.0"], ["speaking", "speaking"]]}
{"captions": "People are speaking, music is playing, and more male speech is heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.014", "2.622", "3.586", "4.377", "4.984", "6.752", "7.405", "7.753", "8.255", "9.077"], ["2.244", "3.349", "4.102", "4.651", "6.377", "7.232", "7.638", "8.008", "8.712", "10.0"], ["male", "male", "male", "male", "male", "male", "male", "male", "male", "male"]]}
{"captions": "Music, frying food, dishes, and a man speaking are heard.", "data": [["Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Male speech, man speaking", "Frying (food)", "Male speech, man speaking", "Dishes, pots, and pans"], ["6.11", "7.787", "9.047", "5.811", "5.638", "9.63", "9.606"], ["6.228", "7.882", "9.173", "6.969", "10.0", "10.0", "9.717"], ["dishes", "dishes", "dishes", "speaking", "frying", "speaking", "dishes"]]}
{"captions": "Various speech, laughter, and animal sounds are present.", "data": [["Speech", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Speech"], ["0.0", "1.311", "4.647", "6.006", "8.744"], ["1.002", "2.512", "5.827", "7.351", "9.794"], ["speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Telephone rings can be heard along with humming.", "data": [["Ringtone", "Ringtone", "Ringtone", "Ringtone"], ["0.732", "2.827", "4.937", "7.016"], ["2.37", "4.528", "6.63", "8.764"], ["rings", "rings", "rings", "rings"]]}
{"captions": "Skateboarders make sounds while whooping, cheering, and whistling.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.188", "5.465", "8.846"], ["4.159", "7.271", "10.0"], ["cheering", "cheering", "cheering"]]}
{"captions": "A toothbrush brushes teeth while women speak and music plays.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "1.451", "2.642", "4.22", "6.301", "9.705"], ["1.283", "2.462", "3.127", "5.78", "6.965", "10.0"], ["speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "Men speak and beeps are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["3.819", "8.391"], ["7.152", "10.0"], ["speak", "speak"]]}
{"captions": "A dog is making noise, including tapping and barking, and there is shouting.", "data": [["Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog"], ["0.717", "3.102", "5.173", "6.031", "6.929", "7.74", "9.535"], ["2.157", "3.362", "5.693", "6.583", "7.472", "8.654", "10.0"], ["barking", "barking", "barking", "barking", "barking", "barking", "barking"]]}
{"captions": "Children and adults speak and have conversation.", "data": [["Speech", "Speech", "Child speech, kid speaking", "Female speech, woman speaking", "Speech"], ["0.0", "2.369", "3.476", "5.099", "7.217"], ["1.568", "3.272", "5.065", "5.845", "9.192"], ["speak", "speak", "Children", "conversation", "speak"]]}
{"captions": "There are mechanisms, gurgling and bubbling sounds, liquids, and taps with female and child speech.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Child speech, kid speaking", "Female speech, woman speaking"], ["2.648", "4.752", "6.409", "8.588", "9.301"], ["3.15", "5.316", "7.502", "9.104", "9.885"], ["taps", "taps", "taps", "female", "taps"]]}
{"captions": "A man is speaking and using a blender in a small room with glass sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Blender", "Male speech, man speaking", "Blender"], ["0.008", "0.858", "2.063", "4.354", "7.764", "7.197", "8.315", "8.803"], ["0.378", "1.795", "4.087", "6.039", "8.031", "8.26", "8.795", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "blender", "speaking", "blender"]]}
{"captions": "A man speaks and a train and steam whistle sounds are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.331", "5.205", "8.173"], ["0.921", "4.346", "5.756", "10.0"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Multiple men speak, interspersed with a mid-frequency engine sound.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.276", "4.724", "6.575", "8.008"], ["0.803", "4.236", "6.307", "7.85", "8.969"], ["men", "men", "men", "men", "men"]]}
{"captions": "People are having a conversation and talking over noise.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.92"], ["2.214", "5.007"], ["conversation", "conversation"]]}
{"captions": "Crickets and croaks intermingle with male speech.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.795", "1.433", "2.764", "4.756", "6.591", "8.575"], ["1.299", "2.157", "3.457", "6.094", "8.063", "10.0"], ["speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Dogs and domestic animals make noises.", "data": [["Dog", "Dog"], ["0.0", "6.253"], ["5.574", "10.0"], ["noises", "noises"]]}
{"captions": "People are singing and speaking over music.", "data": [["Male speech, man speaking"], ["9.225"], ["10.0"], ["speaking"]]}
{"captions": "Wind noise, a firecracker, and a man speaking are accompanied by ticking.", "data": [["Male speech, man speaking"], ["8.921"], ["9.197"], ["speaking"]]}
{"captions": "Mechanisms and male speech alternate in this sequence.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.583", "4.071", "7.787", "8.551"], ["3.102", "6.268", "8.134", "9.244"], ["alternate", "alternate", "alternate", "alternate"]]}
{"captions": "Human voices converse, interspersed with bird sounds and laughter.", "data": [["Conversation", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.425", "1.331", "1.921", "3.268", "3.567"], ["0.709", "1.622", "2.205", "3.378", "3.787"], ["converse", "voices", "voices", "voices", "voices"]]}
{"captions": "An engine is running, and a man is speaking while tools are being used.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["5.552", "8.052"], ["6.857", "9.274"], ["speaking", "speaking"]]}
{"captions": "A man speaks and music plays while people talk in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.709", "7.024"], ["2.055", "6.276", "9.837"], ["speaks", "speaks", "speaks"]]}
{"captions": "A group of men are speaking while environmental noise and paper crumpling is heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["3.102", "5.087", "7.52", "8.874", "9.583"], ["4.795", "6.205", "8.591", "9.189", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Male speech and music are heard over sounds of water and ticking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.765", "2.321", "4.015", "5.794", "6.637"], ["0.276", "2.119", "3.293", "5.125", "6.166", "9.559"], ["speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Explosion, glass clinking, crackling, shouting, and sound effects are heard with men speaking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.931", "4.997", "5.58", "6.57", "7.669"], ["2.258", "5.292", "6.397", "7.241", "8.56"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Honking, animal sounds, and bird sounds are heard.", "data": [["Speech"], ["6.376"], ["7.824"], ["Honking"]]}
{"captions": "A person is breathing, and a telephone is ringing while conversations occur.", "data": [["Telephone bell ringing", "Male speech, man speaking", "Male speech, man speaking", "Telephone bell ringing", "Male speech, man speaking"], ["0.89", "6.89", "8.953", "3.717", "9.866"], ["1.732", "8.685", "9.614", "4.74", "10.0"], ["ringing", "conversations", "conversations", "ringing", "conversations"]]}
{"captions": "A man sings and speaks amidst frying and engine sounds and music.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Frying (food)"], ["0.0", "2.716", "4.854", "0.0"], ["1.521", "4.148", "6.219", "6.178"], ["speaks", "speaks", "speaks", "frying"]]}
{"captions": "There are various sounds including music, narration, speech, and babbling in a large room.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.008", "1.937", "2.85", "7.339", "8.638"], ["0.961", "2.74", "6.591", "8.252", "9.22"], ["speech", "speech", "speech", "speech", "speech"]]}
{"captions": "A man speaks with background noise, clicking, tapping, and whistling sounds.", "data": [["Male speech, man speaking"], ["0.0"], ["2.276"], ["speaks"]]}
{"captions": "A blender or food processor is in use.", "data": [["Blender"], ["0.031"], ["10.0"], ["blender"]]}
{"captions": "A man is singing, people are speaking, walking, and a child is speaking with background noise.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Child speech, kid speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.709", "4.417", "5.787", "6.543", "7.598", "8.496", "9.417"], ["3.898", "5.496", "6.283", "7.118", "8.323", "8.882", "9.898"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "People and horses are speaking and walking in a rural area.", "data": [["Speech", "Female speech, woman speaking", "Male speech, man speaking"], ["0.0", "2.99", "5.698"], ["2.65", "5.459", "10.0"], ["speaking", "People", "speaking"]]}
{"captions": "Water is being filled and a tap is running.", "data": [["Sink (filling or washing)"], ["0.0"], ["10.0"], ["running"]]}
{"captions": "Music plays as a man speaks and makes tapping, ticking, and breathing sounds.", "data": [["Male speech, man speaking", "Speech", "Male speech, man speaking", "Male speech, man speaking", "Speech", "Male speech, man speaking", "Male speech, man speaking"], ["0.213", "1.417", "2.031", "4.638", "6.394", "8.677", "9.819"], ["1.228", "1.756", "3.953", "6.244", "6.732", "9.331", "10.0"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Male speech and music play in a small room.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.189", "4.433", "5.756", "9.276"], ["0.441", "3.984", "5.496", "9.0", "10.0"], ["speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Cutlery clinks and a woman speaks with background noises.", "data": [["Cutlery, silverware", "Cutlery, silverware", "Female speech, woman speaking", "Female speech, woman speaking", "Cutlery, silverware", "Female speech, woman speaking", "Cutlery, silverware", "Female speech, woman speaking"], ["0.0", "1.561", "4.792", "7.146", "6.064", "9.087", "9.449", "9.856"], ["0.607", "1.757", "5.835", "8.614", "6.358", "9.746", "9.811", "10.0"], ["clinks", "clinks", "speaks", "speaks", "clinks", "speaks", "clinks", "speaks"]]}
{"captions": "In a small room, women are speaking and glasses are clinking.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.23", "4.028", "6.511", "7.995"], ["3.03", "5.565", "7.686", "9.876"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Speech mixes with gargling and breathing.", "data": [["Female speech, woman speaking", "Speech", "Speech"], ["4.591", "6.323", "8.087"], ["5.732", "7.126", "9.606"], ["Speech", "gargling", "gargling"]]}
{"captions": "A man speaks and narrates while cooking food and tapping can be heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Frying (food)"], ["0.0", "0.553", "2.147", "3.071", "4.094", "6.712", "7.944", "0.0"], ["0.281", "1.44", "2.609", "3.977", "5.879", "7.092", "8.795", "10.0"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "tapping"]]}
{"captions": "A man speaks amidst power tool sounds, clicking, tapping, and scraping noises.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.805", "7.65", "8.191"], ["0.436", "1.346", "8.047", "9.603"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "A crowd, unmodified field recording, footsteps, people speaking and laughing, and music are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.448", "0.984", "7.827"], ["0.889", "1.881", "8.465"], ["speaking", "speaking", "speaking"]]}
{"captions": "Water flows while several men speak.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.012", "1.295", "2.954", "3.619", "5.867", "7.277", "8.012"], ["0.312", "2.306", "3.266", "5.197", "7.139", "7.896", "10.0"], ["speak", "speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "A woman is speaking and background noise and music are heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "0.982", "2.027", "2.486", "3.609", "5.264", "8.621"], ["0.801", "1.915", "2.337", "2.866", "4.932", "6.488", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "People make surface contact, snap fingers, clap, and growl while speaking.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Child speech, kid speaking"], ["0.822", "1.175", "2.709", "4.779", "7.278", "8.826"], ["0.991", "1.806", "3.951", "6.694", "8.507", "9.118"], ["growl", "growl", "growl", "growl", "growl", "growl"]]}
{"captions": "Mechanisms are heard, a man is speaking, and a water tap is turned on.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Water tap, faucet"], ["2.087", "9.339", "4.638"], ["3.15", "10.0", "8.039"], ["speaking", "speaking", "water"]]}
{"captions": "A man speaks, a television plays, and sounds of infants crying, running children, thumps, and more are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.638", "4.102", "5.118", "6.142", "6.953"], ["0.331", "3.638", "4.969", "5.984", "6.701", "8.772"], ["speaks", "speaks", "running", "running", "speaks", "speaks"]]}
{"captions": "A man speaks, taps, sprays, and operates mechanisms.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.976", "4.275", "5.497", "8.832", "9.745"], ["0.196", "2.533", "5.39", "8.63", "9.113", "10.0"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "A man speaks while motor vehicles pass by.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "6.945", "8.874"], ["6.15", "7.512", "9.181"], ["speaks", "speaks", "speaks"]]}
{"captions": "Music plays, followed by mechanisms, typing, beeps, female singing, and an alarm.", "data": [["Alarm"], ["7.85"], ["8.496"], ["alarm"]]}
{"captions": "A man is speaking and an engine is running.", "data": [["Male speech, man speaking"], ["0.0"], ["1.401"], ["speaking"]]}
{"captions": "A man is speaking, breathing, and making crinkling sounds while chewing.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.583"], ["0.165", "3.457"], ["speaking", "speaking"]]}
{"captions": "Men are speaking, with mechanical sounds and an explosion.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.008", "2.354", "3.008", "3.638", "4.614", "7.969", "9.433", "9.969"], ["0.362", "2.858", "3.496", "4.339", "4.866", "8.465", "9.787", "10.0"], ["Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men"]]}
{"captions": "A dog is howling, with coughing and speech in a small room.", "data": [["Dog"], ["0.0"], ["10.0"], ["howling"]]}
{"captions": "Animal sounds and speeches are heard.", "data": [["Speech", "Speech", "Speech", "Male speech, man speaking"], ["0.835", "2.378", "6.709", "9.898"], ["1.874", "3.724", "8.213", "10.0"], ["speech", "speech", "speech", "speech"]]}
{"captions": "A man speaks repeatedly while a chopping sound is heard in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.009", "0.539", "1.252", "2.339", "3.394", "4.477", "5.925", "6.706", "8.228", "8.808"], ["0.43", "1.16", "2.047", "3.298", "4.308", "5.601", "6.61", "8.173", "8.684", "10.0"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "People are walking and talking inside with footsteps and laughter heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking"], ["0.0", "6.22", "9.031"], ["3.945", "7.717", "10.0"], ["talking", "talking", "talking"]]}
{"captions": "Fire alarms and a man speaking can be heard.", "data": [["Alarm", "Fire alarm", "Alarm", "Alarm", "Alarm", "Fire alarm", "Fire alarm", "Fire alarm", "Speech"], ["0.0", "0.74", "2.732", "3.709", "4.74", "6.732", "7.748", "8.748", "9.827"], ["0.504", "1.496", "3.425", "4.504", "5.488", "7.449", "8.394", "9.37", "10.0"], ["alarms", "alarms", "alarms", "alarms", "alarms", "alarms", "alarms", "alarms", "speaking"]]}
{"captions": "An alarm clock goes off and various music is playing.", "data": [["Alarm clock", "Alarm clock"], ["0.157", "1.354"], ["0.898", "2.008"], ["goes", "goes"]]}
{"captions": "Men are speaking and making sounds in a small room.", "data": [["Speech", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "7.428", "8.133", "9.457"], ["1.162", "7.902", "9.191", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "People chuckle and speak in a large room, with narration and monologues, and a crowd.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "2.105", "3.829", "7.427", "8.853"], ["1.039", "2.96", "7.006", "8.181", "9.267"], ["chuckle", "chuckle", "chuckle", "chuckle", "chuckle"]]}
{"captions": "Mechanisms, conversation, and kitchen sounds are heard.", "data": [["Dishes, pots, and pans", "Female speech, woman speaking", "Dishes, pots, and pans", "Blender"], ["2.364", "0.561", "4.925", "9.312"], ["2.844", "7.89", "5.121", "10.0"], ["kitchen", "conversation", "kitchen", "Mechanisms"]]}
{"captions": "Men speak and shuffle cards in a small room with background music.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.189", "2.063", "3.268", "4.772", "5.78", "7.882", "9.189"], ["1.827", "2.417", "4.346", "5.071", "6.732", "8.945", "10.0"], ["speak", "speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "A woman is speaking and narrating with intervals of silence.", "data": [["Female speech, woman speaking", "Speech", "Speech", "Speech"], ["3.185", "5.724", "7.509", "9.218"], ["4.276", "6.774", "8.195", "10.0"], ["woman", "speaking", "speaking", "speaking"]]}
{"captions": "Alarm sounds, splashes, ticks, and a man speaking are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Alarm", "Male speech, man speaking"], ["3.874", "9.126", "0.0", "9.622"], ["4.787", "9.52", "10.0", "10.0"], ["speaking", "speaking", "sounds", "speaking"]]}
{"captions": "People giggle, hiccup, and speak while mechanisms and breathing can be heard in the background.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["2.134", "3.968", "6.304", "7.085", "8.207", "9.803"], ["2.759", "4.708", "6.715", "7.744", "9.699", "10.0"], ["speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "A child speaks, walks, and hears birds and glass while a man speaks intermittently.", "data": [["Child speech, kid speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.874", "5.717", "8.291"], ["0.772", "4.276", "6.984", "10.0"], ["speaks", "man", "man", "man"]]}
{"captions": "Women are speaking and frying food is heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Frying (food)"], ["0.0", "3.417", "5.567", "6.772", "8.992", "0.008"], ["0.614", "4.764", "6.252", "7.827", "10.0", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "frying"]]}
{"captions": "Babies are laughing, a man is speaking, and someone is coughing with mechanisms in the background.", "data": [["Male speech, man speaking"], ["3.528"], ["4.118"], ["speaking"]]}
{"captions": "Pouring, tapping, and cutlery sounds are heard.", "data": [["Cutlery, silverware", "Cutlery, silverware", "Cutlery, silverware", "Cutlery, silverware", "Cutlery, silverware"], ["7.126", "7.764", "7.976", "8.795", "9.709"], ["7.173", "7.811", "8.008", "8.984", "9.78"], ["tapping", "tapping", "tapping", "tapping", "tapping"]]}
{"captions": "A man speaks while breathing and typing on a computer keyboard is heard, with ticking in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.835", "3.331", "6.661"], ["0.819", "3.142", "5.85", "9.937"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Sizzling, whistling and multiple people speaking with a man and woman's voice.", "data": [["Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Sizzle", "Female speech, woman speaking"], ["1.299", "2.142", "2.543", "3.89", "4.709", "6.654", "0.063", "9.417"], ["2.071", "2.472", "3.717", "4.614", "6.52", "8.732", "9.055", "10.0"], ["woman", "man", "man", "woman", "man", "woman", "Sizzling", "woman"]]}
{"captions": "Sounds of cutlery and liquid are heard in a room.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Cutlery, silverware", "Cutlery, silverware", "Female speech, woman speaking", "Cutlery, silverware"], ["0.409", "0.89", "0.685", "7.276", "2.559", "8.252"], ["0.787", "1.614", "1.094", "8.079", "4.937", "8.921"], ["cutlery", "cutlery", "cutlery", "cutlery", "cutlery", "cutlery"]]}
{"captions": "Dogs bark, animals make noise, and a man speaks as fireworks go off.", "data": [["Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Speech"], ["0.231", "1.399", "2.22", "5.085", "6.185", "8.893", "7.264"], ["1.107", "1.942", "2.96", "5.723", "8.031", "9.647", "9.79"], ["bark", "bark", "bark", "bark", "bark", "bark", "speaks"]]}
{"captions": "A cat hisses, meows, and there is speech and tapping sounds.", "data": [["Speech", "Cat", "Cat", "Speech"], ["3.878", "3.61", "9.352", "8.037"], ["5.031", "3.85", "10.0", "8.689"], ["meows", "meows", "meows", "meows"]]}
{"captions": "People are shouting and playing basketball with background noise.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.652", "1.421", "4.303", "6.747"], ["1.263", "2.292", "5.305", "7.248"], ["shouting", "shouting", "shouting", "shouting"]]}
{"captions": "Water runs, men speak.", "data": [["Male speech, man speaking", "Water tap, faucet", "Male speech, man speaking"], ["0.319", "0.0", "9.498"], ["0.706", "10.0", "10.0"], ["speak", "runs", "speak"]]}
{"captions": "Wild dogs and domestic animals bark and howl, with laughter in the background.", "data": [["Dog"], ["0.016"], ["10.0"], ["bark"]]}
{"captions": "Cars run and people speak and beep.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["0.115", "1.546"], ["0.645", "3.737"], ["speak", "speak"]]}
{"captions": "Music plays in the background while a man speaks, mechanisms clank, and human sounds occur.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["3.468", "5.029", "7.324", "8.699"], ["4.764", "6.081", "8.347", "9.416"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Laughter, breathing, clicking, and coughing can be heard.", "data": [["Male speech, man speaking"], ["3.528"], ["4.157"], ["Laughter"]]}
{"captions": "A water tap is running, filling with liquid, and being turned off.", "data": [["Water tap, faucet"], ["0.0"], ["10.0"], ["tap"]]}
{"captions": "People are speaking, ticking, tapping, and making human sounds with background noise.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.386", "2.071", "2.899", "4.0", "4.976", "8.449"], ["1.094", "1.787", "2.661", "3.898", "4.409", "5.724", "9.606"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Dogs growl and yip, and a woman speaks and something dings.", "data": [["Female speech, woman speaking"], ["8.246"], ["10.0"], ["speaks"]]}
{"captions": "Multiple people are speaking, laughing, and making speech noise.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["4.498", "8.008", "8.481", "9.416"], ["5.677", "8.407", "9.198", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Dogs howl over music and television sounds.", "data": [["Dog", "Male speech, man speaking", "Dog"], ["7.52", "0.685", "7.984"], ["7.882", "1.283", "8.402"], ["howl", "howl", "howl"]]}
{"captions": "Writing and speech can be heard, with a woman speaking in a small room.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.19", "6.006", "8.46"], ["2.873", "6.437", "9.278"], ["heard", "heard", "heard"]]}
{"captions": "Surface contact and speech can be heard with background noise and clicking sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.346", "1.811", "3.197", "4.843", "6.787", "8.173"], ["0.953", "2.016", "3.89", "6.354", "7.787", "9.898"], ["speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "There is music and shouting as a door slams and a man speaks.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["7.268", "9.094"], ["8.866", "10.0"], ["speaks", "speaks"]]}
{"captions": "People speak and clap, a child speaks and a camera clicks.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Child speech, kid speaking"], ["0.0", "7.913", "8.189", "9.724"], ["3.969", "8.157", "9.654", "10.0"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Tools and mechanisms are in use with a man speaking and hammering.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.244", "5.008", "7.291"], ["1.449", "5.827", "8.654"], ["speaking", "speaking", "speaking"]]}
{"captions": "A woman is singing and a fire alarm is heard while people are speaking.", "data": [["Speech", "Smoke detector, smoke alarm"], ["6.071", "8.787"], ["7.315", "10.0"], ["speaking", "fire alarm"]]}
{"captions": "Children speak, breathe, and laugh while women converse.", "data": [["Child speech, kid speaking", "Female speech, woman speaking", "Child speech, kid speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "3.877", "4.448", "6.225", "9.061"], ["3.039", "4.355", "4.88", "8.794", "10.0"], ["speak", "converse", "speak", "converse", "converse"]]}
{"captions": "People are breathing, shuffling, tapping, speaking, and making sound effects.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.638", "1.268", "2.472", "3.969", "5.039", "5.827", "6.701", "7.575"], ["1.142", "1.843", "3.126", "4.835", "5.701", "6.457", "7.346", "8.016"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A power tool and mechanisms are in use with a man speaking.", "data": [["Male speech, man speaking"], ["5.136"], ["8.548"], ["speaking"]]}
{"captions": "Music is playing as cars race, people speak, and children talk.", "data": [["Speech", "Child speech, kid speaking", "Child speech, kid speaking"], ["4.339", "6.559", "7.142"], ["5.189", "6.913", "7.512"], ["speak", "talk", "talk"]]}
{"captions": "Dogs and domestic animals whimper, bark and make noise, while people chatter in the background.", "data": [["Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog"], ["0.15", "1.669", "2.37", "3.134", "4.402", "8.339", "9.142"], ["1.283", "2.228", "2.945", "3.496", "4.898", "8.819", "9.512"], ["bark", "bark", "bark", "bark", "bark", "bark", "bark"]]}
{"captions": "A group of women are speaking over a noisy background with occasional thumps and yips.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.625", "1.743", "3.047", "4.756", "5.676", "6.335", "7.709", "8.559", "9.748"], ["1.229", "2.025", "4.166", "5.148", "5.951", "6.857", "8.118", "8.898", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man speaks as various animal sounds, such as barks, croaks, and ticks, are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.78", "5.055"], ["0.299", "4.433", "6.945"], ["speaks", "speaks", "speaks"]]}
{"captions": "People are cooking and talking over background music and mechanical sounds.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Frying (food)", "Female speech, woman speaking"], ["0.009", "5.19", "6.377", "8.337", "0.0", "9.945"], ["2.86", "5.966", "8.077", "9.763", "8.328", "10.0"], ["talking", "talking", "talking", "talking", "cooking", "talking"]]}
{"captions": "A man is speaking, snapping his fingers, and breathing, with mechanical sounds in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.457", "2.575", "5.835", "8.654"], ["2.362", "4.858", "6.724", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Rodents move and people talk while tapping and clicking sounds occur.", "data": [["Conversation", "Male speech, man speaking", "Male speech, man speaking"], ["1.134", "4.197", "7.031"], ["3.772", "5.307", "8.433"], ["talk", "talk", "talk"]]}
{"captions": "A bathtub fills with water as people converse and a child speaks.", "data": [["Conversation", "Male speech, man speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Bathtub (filling or washing)", "Male speech, man speaking"], ["0.514", "2.462", "5.694", "7.22", "0.0", "8.838"], ["1.619", "4.012", "6.566", "8.694", "10.0", "9.168"], ["converse", "converse", "child", "child", "fills", "converse"]]}
{"captions": "People are shouting and speaking, with music playing in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["0.0", "8.16"], ["8.038", "9.729"], ["speaking", "speaking"]]}
{"captions": "People are talking, laughing, shouting, and a fire crackles.", "data": [["Conversation", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking"], ["0.052", "0.96", "2.047", "2.811", "3.759", "6.397", "7.877", "8.814", "9.526"], ["0.706", "1.729", "2.632", "3.187", "4.806", "7.629", "8.514", "9.485", "9.867"], ["talking", "talking", "talking", "talking", "talking", "talking", "talking", "talking", "talking"]]}
{"captions": "Dogs are barking, howling, and making animal noises with sirens in the background.", "data": [["Dog", "Dog", "Dog", "Dog", "Dog", "Dog"], ["0.0", "1.252", "4.409", "6.906", "8.402", "9.575"], ["0.78", "3.772", "5.969", "8.016", "9.291", "10.0"], ["howling", "howling", "howling", "howling", "howling", "howling"]]}
{"captions": "A woman is speaking, dishes and pots are being used, and food is sizzling and being stirred while more dishes are heard clanging.", "data": [["Dishes, pots, and pans", "Female speech, woman speaking", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Female speech, woman speaking", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Female speech, woman speaking", "Dishes, pots, and pans", "Female speech, woman speaking", "Sizzle", "Dishes, pots, and pans"], ["2.142", "0.0", "2.409", "2.772", "3.331", "2.732", "3.677", "5.748", "6.142", "6.709", "7.228", "4.362", "7.654", "8.402", "0.0", "9.764"], ["2.283", "1.606", "2.528", "2.961", "3.559", "3.646", "4.417", "5.945", "6.567", "6.89", "7.457", "4.969", "7.866", "10.0", "10.0", "9.921"], ["clanging", "speaking", "clanging", "clanging", "clanging", "speaking", "clanging", "clanging", "clanging", "clanging", "clanging", "speaking", "clanging", "speaking", "sizzling", "clanging"]]}
{"captions": "Crowds cheer, boats splash, and men speak.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.819", "2.378", "4.213", "5.142", "6.197"], ["2.189", "3.921", "4.89", "5.764", "6.827"], ["speak", "speak", "speak", "speak", "speak"]]}
{"captions": "A blender is running and people are speaking, clicking, and breathing.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Blender", "Male speech, man speaking"], ["0.978", "4.819", "5.77", "8.261", "0.0", "9.755"], ["3.868", "5.507", "8.089", "9.556", "1.259", "10.0"], ["speaking", "speaking", "speaking", "speaking", "running", "speaking"]]}
{"captions": "A man is speaking and using power tools.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.818", "5.162", "6.665", "7.313", "8.067"], ["2.358", "5.104", "6.569", "7.196", "7.695", "9.368"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A cat is purring and making noise, and another cat is heard.", "data": [["Cat", "Cat", "Cat"], ["0.748", "1.969", "9.307"], ["1.414", "2.315", "10.0"], ["purring", "purring", "purring"]]}
{"captions": "A man is speaking, with cowbells and human sounds heard in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.157", "1.661", "4.134", "6.039", "8.157", "9.669"], ["1.15", "3.449", "5.402", "6.685", "8.866", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "People are talking and dogs are barking with music and mechanical sounds in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Dog"], ["0.0", "2.278", "3.648", "4.668", "7.334", "8.922", "9.644", "3.356"], ["1.981", "3.101", "4.482", "5.486", "8.136", "9.214", "10.0", "3.739"], ["talking", "talking", "talking", "talking", "talking", "talking", "talking", "barking"]]}
{"captions": "A man is speaking while a basketball is being played with background noise and clicking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["2.961", "6.937", "8.189"], ["3.457", "7.882", "8.976"], ["speaking", "speaking", "speaking"]]}
{"captions": "Music, man speaking, animal and wild animal sounds are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["3.456", "7.06"], ["4.779", "8.378"], ["man", "man"]]}
{"captions": "Speech and female voices are heard with background noise and wailing.", "data": [["Speech", "Speech", "Female speech, woman speaking", "Female speech, woman speaking", "Speech", "Male speech, man speaking", "Speech", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "0.507", "1.315", "1.936", "2.565", "3.905", "5.694", "8.178", "8.93", "9.755"], ["0.204", "0.98", "1.822", "2.41", "3.652", "5.221", "7.484", "8.431", "9.208", "10.0"], ["Speech", "Speech", "voices", "voices", "Speech", "Speech", "Speech", "voices", "voices", "voices"]]}
{"captions": "Multiple male speeches are accompanied by music.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.543", "3.63", "5.764", "8.969"], ["2.008", "4.244", "6.598", "10.0"], ["speech", "speech", "speech", "speech"]]}
{"captions": "Mechanisms cracking as men talk, ticking sounds and more cracking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.911", "4.547", "6.169", "9.565"], ["3.496", "5.861", "9.031", "10.0"], ["talk", "talk", "talk", "talk"]]}
{"captions": "A faucet drips, a cat meows, and men speak.", "data": [["Male speech, man speaking", "Water tap, faucet", "Male speech, man speaking"], ["3.74", "0.0", "7.016"], ["5.417", "10.0", "8.811"], ["speak", "drips", "speak"]]}
{"captions": "Telephone bells are ringing, a door opens, and a tick is heard.", "data": [["Telephone bell ringing", "Telephone bell ringing"], ["0.701", "6.661"], ["2.732", "8.693"], ["ringing", "ringing"]]}
{"captions": "A speech synthesizer, burping, clicking, and man speaking are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["3.488", "4.22", "6.071", "6.945"], ["4.055", "5.787", "6.441", "9.874"], ["man", "man", "man", "man"]]}
{"captions": "Men are speaking and there is a mechanical noise in the background, along with air conditioning and speech.", "data": [["Male speech, man speaking", "Speech", "Speech", "Male speech, man speaking", "Speech", "Speech", "Speech", "Male speech, man speaking"], ["0.0", "0.808", "5.207", "8.357", "9.09", "9.294", "9.6", "9.817"], ["0.312", "4.576", "7.515", "8.941", "9.213", "9.552", "9.722", "10.0"], ["speech", "speech", "speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "There is male speech with crowd noise, cheering, and music.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.689", "3.968", "8.034", "8.999", "9.78"], ["1.527", "3.274", "5.529", "8.947", "9.688", "10.0"], ["speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Music plays as people sizzle, tap, laugh, and converse.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Sizzle", "Female speech, woman speaking"], ["1.906", "7.386", "0.0", "9.079"], ["6.969", "8.95", "10.0", "10.0"], ["converse", "converse", "sizzle", "converse"]]}
{"captions": "Birds call as water taps and music plays.", "data": [["Alarm clock", "Water tap, faucet"], ["1.134", "3.386"], ["3.087", "4.417"], ["Birds", "taps"]]}
{"captions": "Men speak and typing sounds are heard from a computer keyboard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.008", "2.654", "4.165", "5.929", "6.701", "8.441"], ["2.346", "3.913", "5.386", "6.346", "7.386", "9.535"], ["speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "Music, singing, and speech create a small room's atmosphere.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["3.204", "5.16", "8.092", "8.615"], ["3.551", "6.314", "8.323", "10.0"], ["speech", "speech", "speech", "speech"]]}
{"captions": "Women speak, a doorbell rings, and there is speech.", "data": [["Female speech, woman speaking", "Speech", "Female speech, woman speaking", "Female speech, woman speaking", "Speech"], ["0.069", "1.827", "2.786", "5.347", "8.584"], ["0.491", "2.428", "3.763", "8.439", "9.59"], ["speech", "Women", "speech", "speech", "Women"]]}
{"captions": "A woman talks while food is being cooked and dishes are being used.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Frying (food)", "Female speech, woman speaking", "Dishes, pots, and pans"], ["0.354", "1.976", "5.858", "0.0", "7.362", "9.189"], ["1.772", "5.528", "7.094", "7.378", "10.0", "9.622"], ["talks", "talks", "talks", "cooked", "talks", "dishes"]]}
{"captions": "Water and a waterfall are heard, with men speaking and a hubbub of speech noise.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.009", "1.206", "2.596", "4.572", "5.568", "7.684", "9.073"], ["0.455", "2.01", "3.094", "5.096", "6.11", "8.462", "9.51"], ["hubbub", "hubbub", "hubbub", "hubbub", "hubbub", "hubbub", "hubbub"]]}
{"captions": "Music and various sounds are heard in a public space with water and female speech.", "data": [["Female speech, woman speaking"], ["2.89"], ["4.001"], ["female"]]}
{"captions": "People are speaking and making buzzing sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.48", "3.087", "3.433", "8.52"], ["2.85", "3.291", "6.701", "9.748"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Fowl, turkeys, and women make sounds, including shouting and applause.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.685", "6.354", "9.165"], ["4.913", "6.693", "10.0"], ["shouting", "shouting", "shouting"]]}
{"captions": "An electric shaver hums in a small room.", "data": [["Electric shaver, electric razor"], ["0.0"], ["10.0"], ["hums"]]}
{"captions": "Music is playing and a helicopter can be heard, followed by multiple men speaking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.197", "1.031", "1.724", "1.961", "4.567", "4.827", "5.213", "6.913", "8.362", "8.961"], ["0.89", "1.417", "1.835", "3.323", "4.717", "4.992", "6.638", "7.961", "8.756", "10.0"], ["men", "men", "men", "men", "men", "men", "men", "men", "men", "men"]]}
{"captions": "A man speaks and sings, while wails and beeps sound in the background.", "data": [["Male speech, man speaking"], ["0.0"], ["0.228"], ["speaks"]]}
{"captions": "A man speaks and shuffles cards with breathing and shuffling sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.901", "2.382", "4.53", "5.429", "5.903"], ["0.707", "2.244", "3.933", "4.818", "5.8", "6.273"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Women are speaking, with camera clicks and speech also heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.013", "4.694", "5.791", "8.138"], ["3.01", "5.268", "7.742", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A woman sobs, breathes heavily, and speaks.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "0.74", "1.528", "2.504", "6.921", "8.079"], ["0.417", "1.15", "2.386", "3.622", "7.591", "9.039"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Electric shavers, speech, and laughter can be heard.", "data": [["Electric shaver, electric razor", "Speech", "Speech", "Speech", "Electric shaver, electric razor", "Speech"], ["0.0", "0.008", "4.472", "6.031", "4.457", "6.921"], ["3.559", "2.173", "5.394", "6.252", "7.567", "10.0"], ["shavers", "laughter", "laughter", "laughter", "shavers", "laughter"]]}
{"captions": "Radios, vehicles, male speech, clicking, and crying sounds are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Child speech, kid speaking"], ["0.472", "2.512", "4.598", "6.531", "9.142"], ["2.26", "4.406", "5.984", "7.217", "10.0"], ["speech", "speech", "speech", "speech", "crying"]]}
{"captions": "Women speak, music plays, people converse, and beeps and bird calls are heard.", "data": [["Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking"], ["0.0", "1.992", "4.008", "5.236", "6.827", "8.685"], ["1.551", "3.945", "4.929", "6.394", "7.11", "9.449"], ["Women", "converse", "Women", "converse", "converse", "Women"]]}
{"captions": "Men and women are speaking and honking horns in a busy, noisy setting.", "data": [["Male speech, man speaking", "Conversation", "Female speech, woman speaking", "Male speech, man speaking"], ["0.0", "3.701", "5.031", "7.173"], ["1.417", "4.669", "5.535", "8.654"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "People are speaking, singing and breathing.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "4.039", "4.819", "8.774"], ["3.812", "4.5", "6.381", "9.286"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Clicking noises and female speech are heard in background noise.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.008", "0.654", "4.094", "7.378"], ["0.433", "2.354", "6.85", "7.955"], ["speech", "speech", "speech", "speech"]]}
{"captions": "Various mechanisms and male and female speeches are heard.", "data": [["Speech", "Speech", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.335", "3.499", "6.885"], ["0.498", "3.225", "4.281", "8.008"], ["speeches", "speeches", "mechanisms", "mechanisms"]]}
{"captions": "Men are speaking, clicking, and making surface contact sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.519", "3.184", "6.558", "8.228"], ["1.12", "3.021", "4.107", "7.176", "8.595"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Crying babies and sobbing women mixed with rattling sounds.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["5.861", "7.457"], ["6.624", "7.942"], ["sobbing", "sobbing"]]}
{"captions": "Alarms sound in a small room.", "data": [["Alarm", "Alarm", "Alarm clock", "Alarm clock", "Alarm clock", "Alarm", "Alarm clock", "Alarm", "Alarm", "Alarm clock", "Alarm clock"], ["0.0", "0.731", "1.74", "2.736", "3.733", "4.716", "5.725", "6.721", "7.718", "8.752", "9.748"], ["0.416", "1.362", "2.371", "3.354", "4.363", "5.359", "6.356", "7.364", "8.361", "9.357", "10.0"], ["Alarms", "Alarms", "Alarms", "Alarms", "Alarms", "Alarms", "Alarms", "Alarms", "Alarms", "Alarms", "Alarms"]]}
{"captions": "Male and female speech, mouse and rodent sounds, and more speech are heard in a small room.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Speech", "Speech", "Male speech, man speaking"], ["0.0", "1.055", "1.724", "5.984", "6.976", "7.709", "8.252", "9.244"], ["0.102", "1.535", "2.551", "6.496", "7.126", "7.803", "8.74", "10.0"], ["Male", "female", "female", "Male", "Male", "speech", "speech", "Male"]]}
{"captions": "Telephones ring and people speak.", "data": [["Speech", "Speech", "Male speech, man speaking", "Speech", "Telephone bell ringing", "Speech"], ["3.528", "4.575", "5.819", "6.52", "0.465", "8.984"], ["4.339", "5.181", "6.37", "8.803", "1.48", "9.701"], ["speak", "speak", "speak", "speak", "ring", "speak"]]}
{"captions": "People are talking, mechanisms are ticking and spraying, and a woman is heard.", "data": [["Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.457", "1.894", "3.598", "4.26", "5.394", "7.039", "7.748"], ["1.758", "3.279", "4.008", "4.677", "6.198", "7.425", "8.244"], ["woman", "talking", "talking", "woman", "woman", "woman", "woman"]]}
{"captions": "A fire crackles outside with the sounds of engines and sirens in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["2.01", "2.464", "5.859", "8.832"], ["2.274", "3.775", "6.402", "10.0"], ["engines", "engines", "engines", "engines"]]}
{"captions": "Men are speaking and spraying water in the wind.", "data": [["Male speech, man speaking", "Speech", "Speech", "Male speech, man speaking", "Speech"], ["0.749", "1.152", "2.847", "4.689", "6.474"], ["0.929", "2.756", "4.36", "5.528", "7.812"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man is speaking, music is playing, and sizzling is heard.", "data": [["Male speech, man speaking", "Sizzle"], ["0.0", "0.0"], ["0.228", "10.0"], ["speaking", "sizzling"]]}
{"captions": "A man speaks with a ticking sound, stomach rumbling, and dripping water in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "4.962", "6.934", "8.54"], ["0.589", "5.551", "7.899", "10.0"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Male speech, rain, a woman speaking, sound effects, explosions are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking"], ["1.661", "2.425", "3.189", "3.961", "4.717", "6.874"], ["2.157", "2.906", "3.701", "4.48", "5.228", "7.52"], ["Male", "Male", "Male", "Male", "Male", "woman"]]}
{"captions": "People cheer, shout battle cries, clap, speak, and make noise.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.562", "1.995", "2.429", "3.048", "4.072", "4.523", "6.935"], ["1.747", "2.29", "2.741", "3.956", "4.396", "5.547", "8.386"], ["speak", "speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "Air conditioning hums while men speak and tap.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.448", "1.942", "4.144", "7.444", "9.053", "9.519"], ["1.841", "2.312", "4.596", "8.887", "9.323", "10.0"], ["hums", "hums", "hums", "hums", "hums", "hums"]]}
{"captions": "A vehicle and fire alarm sound while people speak and clap.", "data": [["Fire alarm", "Male speech, man speaking", "Fire alarm", "Male speech, man speaking", "Fire alarm", "Fire alarm", "Male speech, man speaking", "Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm", "Male speech, man speaking"], ["0.416", "0.594", "1.407", "1.785", "3.312", "4.294", "4.111", "5.309", "7.3", "8.324", "9.488", "6.397"], ["0.749", "1.183", "2.266", "2.312", "4.13", "5.066", "5.066", "5.998", "7.89", "8.639", "10.0", "6.658"], ["fire", "speak", "fire", "speak", "fire", "fire", "speak", "fire", "fire", "fire", "fire", "speak"]]}
{"captions": "A man is speaking, humming, breathing, laughing, snapping his fingers, scraping, and speaking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.446", "6.027", "9.607"], ["0.816", "3.713", "9.427", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Telephones ring and produce ringtones.", "data": [["Telephone bell ringing", "Telephone bell ringing", "Telephone bell ringing", "Telephone bell ringing", "Telephone bell ringing"], ["0.0", "1.11", "4.559", "6.346", "7.457"], ["0.48", "3.535", "5.504", "7.11", "9.661"], ["ringtones", "ringtones", "ringtones", "ringtones", "ringtones"]]}
{"captions": "Men and children are speaking, music is playing, and there is television and laughter in a room.", "data": [["Male speech, man speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking"], ["0.0", "3.039", "4.575", "6.654"], ["1.283", "4.102", "5.236", "7.709"], ["Men", "children", "children", "children"]]}
{"captions": "A man is cooking and washing dishes while talking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Dishes, pots, and pans", "Male speech, man speaking", "Dishes, pots, and pans", "Male speech, man speaking", "Male speech, man speaking", "Dishes, pots, and pans"], ["0.0", "0.714", "4.858", "2.488", "5.252", "5.614", "7.819", "6.761"], ["0.291", "2.283", "4.984", "4.433", "5.378", "6.535", "10.0", "7.049"], ["talking", "talking", "washing", "talking", "washing", "talking", "talking", "washing"]]}
{"captions": "A woman is speaking with background noise, breathing, and ticking sounds.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.268", "3.244", "5.031", "8.315"], ["3.008", "4.205", "6.685", "8.858"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A group of men are having a conversation, with camera sounds and background noise.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.102", "2.063", "4.126", "5.354", "6.134", "7.74", "9.803"], ["0.598", "1.748", "4.087", "5.307", "6.071", "7.701", "9.441", "10.0"], ["conversation", "conversation", "conversation", "conversation", "conversation", "conversation", "conversation", "conversation"]]}
{"captions": "Men are speaking, a heartbeat and dripping water is heard in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.168", "4.031", "6.796", "7.82"], ["2.857", "5.963", "7.571", "9.497"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man is speaking in a car with rumbling noise, tapping sounds, and other car sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.145", "5.344", "5.709", "7.108", "8.184"], ["2.921", "5.604", "6.368", "7.322", "8.797"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Men are speaking while a blender runs.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Blender", "Male speech, man speaking"], ["0.157", "3.402", "5.638", "0.0", "8.024"], ["2.787", "5.252", "7.646", "10.0", "10.0"], ["blender", "blender", "blender", "blender", "blender"]]}
{"captions": "Men speak while a rowboat is in the water with wind noise.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.299", "1.638", "4.047", "5.819", "7.701", "8.654"], ["1.055", "2.795", "4.795", "6.496", "8.378", "10.0"], ["speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "A woman speaks while using an electric toothbrush.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "0.858", "9.11"], ["0.102", "1.638", "9.52"], ["speaks", "speaks", "speaks"]]}
{"captions": "A man is speaking while machines hum and papers rustle.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "4.189", "6.545", "8.811"], ["1.709", "5.73", "8.527", "9.674"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A dog is barking, a man and women are speaking.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.646", "3.252", "6.252", "9.118"], ["2.283", "4.094", "6.827", "9.583"], ["man", "women", "women", "women"]]}
{"captions": "Crackling, rustling paper, tapping, and speech combine with female voices and human sounds.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "1.701", "7.157"], ["0.858", "5.291", "9.378"], ["speech", "speech", "speech"]]}
{"captions": "Music is playing, followed by a child speaking, heartbeats, and sound effects.", "data": [["Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking"], ["2.205", "3.134", "3.992", "6.331", "6.929", "7.945", "9.528"], ["2.677", "3.606", "5.843", "6.661", "7.654", "9.323", "10.0"], ["child", "child", "child", "child", "child", "child", "child"]]}
{"captions": "Filling and washing sounds from a sink, bathtub, and tap, as well as child speech, are heard.", "data": [["Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Sink (filling or washing)", "Child speech, kid speaking"], ["0.382", "1.803", "2.827", "3.734", "4.116", "5.087", "6.445", "7.347", "8.133", "0.0", "9.197"], ["1.214", "1.971", "3.503", "3.96", "4.705", "5.416", "6.642", "7.815", "8.983", "10.0", "9.416"], ["child", "child", "child", "child", "child", "child", "child", "child", "child", "Filling", "child"]]}
{"captions": "Male speech alternates with chirps and tweets.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["0.205", "7.543"], ["1.323", "10.0"], ["speech", "speech"]]}
{"captions": "A rooster crows while a man speaks and clucks.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.528", "5.82", "7.716"], ["1.184", "2.974", "7.143", "10.0"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "A sequence of speeches, music, and singing in a small room.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Speech", "Female speech, woman speaking"], ["0.0", "2.394", "4.181", "5.276", "7.89", "9.236"], ["1.504", "3.205", "5.055", "7.008", "8.354", "10.0"], ["speeches", "speeches", "speeches", "speeches", "speeches", "speeches"]]}
{"captions": "A series of telephone ringing sounds, speech synthesizer, groans, and ringtones.", "data": [["Telephone bell ringing", "Ringtone", "Ringtone", "Telephone bell ringing"], ["3.472", "5.441", "7.291", "9.409"], ["5.094", "7.094", "9.047", "10.0"], ["ringtones", "ringtones", "ringtones", "ringtones"]]}
{"captions": "Music plays while speech and rain sounds occur.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.168", "5.558", "5.923", "7.172", "7.924", "8.196", "9.601"], ["5.2", "5.818", "6.287", "7.426", "8.103", "9.526", "10.0"], ["speech", "speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "A telephone rings repeatedly.", "data": [["Telephone bell ringing", "Telephone bell ringing", "Telephone bell ringing"], ["0.0", "2.568", "8.473"], ["2.221", "8.022", "10.0"], ["repeatedly", "repeatedly", "repeatedly"]]}
{"captions": "Beeping and speech intermix with cricket sounds and female speech.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.898", "4.094", "4.756", "5.85", "7.386", "7.78", "8.559", "9.039", "9.402", "9.646", "9.898"], ["2.669", "4.52", "5.685", "7.213", "7.661", "8.417", "8.898", "9.283", "9.583", "9.724", "10.0"], ["speech", "speech", "speech", "speech", "speech", "speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "A man is speaking, walking and breathing while another man is shouting and glass is shattering.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.175", "3.754", "4.118", "4.91", "5.46", "7.085", "7.808"], ["0.185", "3.534", "4.06", "4.795", "5.356", "6.113", "7.715", "9.179"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Music plays with urban background noise and a man speaking.", "data": [["Male speech, man speaking"], ["9.661"], ["10.0"], ["speaking"]]}
{"captions": "A woman and child speak while a speech synthesizer talks.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking"], ["0.0", "1.49", "4.307", "4.934", "6.613", "8.311"], ["1.262", "2.77", "4.877", "6.442", "7.903", "9.839"], ["woman", "woman", "child", "child", "child", "child"]]}
{"captions": "A goat is bleating and people are speaking in the wind.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["0.118", "5.024"], ["0.882", "6.37"], ["speaking", "speaking"]]}
{"captions": "Women speak with music, creaking, and door sounds.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "0.382", "1.104", "3.503", "4.202", "8.96"], ["0.26", "0.977", "2.052", "3.844", "6.514", "9.601"], ["music", "music", "music", "music", "music", "music"]]}
{"captions": "Children speak, clap, and breathe.", "data": [["Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking"], ["0.0", "4.102", "6.551", "7.378", "9.087"], ["3.843", "6.236", "7.244", "8.709", "10.0"], ["speak", "speak", "speak", "speak", "speak"]]}
{"captions": "An alarm is ringing and men are speaking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Alarm", "Male speech, man speaking"], ["3.165", "5.019", "0.0", "9.793"], ["4.567", "9.092", "10.0", "10.0"], ["speaking", "speaking", "ringing", "speaking"]]}
{"captions": "A rowboat and male speech are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.142", "2.252", "3.654", "5.291", "9.016", "9.906"], ["2.134", "2.394", "4.307", "8.803", "9.701", "10.0"], ["speech", "speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Food is frying and a man is speaking and stirring.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Frying (food)", "Male speech, man speaking"], ["0.183", "3.203", "4.011", "5.021", "6.076", "0.0", "7.899"], ["1.224", "3.842", "4.897", "5.966", "7.383", "10.0", "8.79"], ["speaking", "speaking", "speaking", "speaking", "speaking", "frying", "speaking"]]}
{"captions": "A man is speaking, spraying, and crumpling, with music in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.441", "2.827", "4.307"], ["0.102", "2.488", "3.882", "5.512"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Water flows and dishes clatter with child speech and laughter.", "data": [["Child speech, kid speaking", "Child speech, kid speaking", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Child speech, kid speaking", "Child speech, kid speaking", "Male speech, man speaking", "Water tap, faucet", "Dishes, pots, and pans"], ["0.0", "1.732", "1.983", "3.175", "4.774", "5.711", "6.076", "2.942", "7.803", "8.547", "0.0", "6.423"], ["1.503", "2.12", "2.156", "3.298", "5.076", "5.834", "6.24", "3.541", "8.493", "9.557", "10.0", "7.012"], ["laughter", "laughter", "clatter", "clatter", "clatter", "clatter", "clatter", "laughter", "laughter", "laughter", "flows", "clatter"]]}
{"captions": "Background noise is heard, tapping and dial tones occur, a man and a woman speak.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["8.213", "9.496"], ["9.465", "10.0"], ["speak", "speak"]]}
{"captions": "Male speech, clicking, and sine wave sounds are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["2.094", "3.307"], ["3.181", "5.142"], ["speech", "speech"]]}
{"captions": "A man speaks amid music and other men's voices.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.937", "5.457", "9.386"], ["1.764", "4.661", "9.236", "10.0"], ["speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Men are speaking on a ship while the ocean, wind, and waves are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.669", "3.677", "5.205"], ["1.276", "3.339", "4.047", "6.638"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Women talk and a waterfall can be heard with crickets and men talking.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.803", "7.276", "8.756"], ["0.685", "4.843", "7.756", "9.016"], ["Women", "Women", "men", "men"]]}
{"captions": "Male and female speech mix with laughter and conversation, and mechanisms make noise.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.394", "2.189", "3.118", "6.181", "7.709"], ["1.591", "2.882", "5.661", "7.252", "9.691"], ["Male", "Male", "Male", "Male", "Male"]]}
{"captions": "Men speak among a noisy babble while music plays and someone chuckles and taps.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.898", "2.559", "4.315", "9.567"], ["1.205", "2.362", "4.189", "7.094", "10.0"], ["speak", "speak", "speak", "speak", "speak"]]}
{"captions": "A man is speaking, hitting something, and making clicking noises.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.74", "6.583", "8.197", "9.323"], ["2.228", "6.441", "7.606", "9.15", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "People are chewing, speaking, and making sounds while a hum is heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["3.135", "3.418", "4.338", "6.42", "8.485", "9.439", "9.832"], ["3.303", "4.158", "5.356", "7.091", "9.057", "9.636", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Fire alarms are heard.", "data": [["Fire alarm", "Alarm", "Fire alarm"], ["0.228", "2.843", "6.795"], ["2.551", "6.567", "10.0"], ["heard", "heard", "heard"]]}
{"captions": "A man is speaking and using a power tool.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.327", "4.214", "5.333", "8.401"], ["0.899", "2.944", "4.688", "6.122", "9.575"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "People are speaking and a dog is barking, growling, and making various noises in a small room.", "data": [["Dog", "Speech", "Dog", "Dog", "Dog", "Female speech, woman speaking", "Dog"], ["2.717", "0.008", "3.945", "5.016", "7.449", "4.638", "8.929"], ["3.843", "1.622", "4.354", "7.118", "8.291", "5.252", "9.843"], ["barking", "speaking", "barking", "barking", "barking", "speaking", "barking"]]}
{"captions": "People talk and a speedboat moves with waves and wind noise.", "data": [["Conversation"], ["0.0"], ["2.984"], ["talk"]]}
{"captions": "Wind noise, tapping, and speech can be heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.136", "1.703", "3.071", "6.812"], ["1.223", "2.364", "3.551", "8.279"], ["speech", "speech", "speech", "speech"]]}
{"captions": "Clocks and alarms are ticking and ringing.", "data": [["Alarm clock", "Alarm clock"], ["0.0", "3.605"], ["2.655", "10.0"], ["ringing", "ringing"]]}
{"captions": "People are speaking, singing, and making various sounds with music playing.", "data": [["Speech", "Speech", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.299", "4.551", "7.622", "8.803"], ["2.724", "4.441", "7.354", "8.402", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Music is playing with men speaking and singing.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.543", "1.189", "2.583", "3.496", "4.317"], ["0.889", "1.953", "3.346", "4.11", "5.213"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "An emergency vehicle and a motor vehicle (road) are heard, followed by tapping and male speech on a radio.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.756", "2.362", "5.321"], ["2.157", "3.008", "5.72"], ["radio", "radio", "radio"]]}
{"captions": "A man speaks and mechanisms click, with breathing heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "3.22", "4.535", "7.016", "8.488"], ["2.685", "4.008", "6.78", "8.315", "10.0"], ["speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Men are speaking, birds are flying, and various mechanisms create noise.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.868", "2.892", "7.056"], ["2.487", "4.517", "7.987"], ["speaking", "speaking", "speaking"]]}
{"captions": "A telephone rings, people are speaking and eating, with chirping birds in the background.", "data": [["Telephone bell ringing", "Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Telephone bell ringing", "Telephone bell ringing", "Male speech, man speaking"], ["0.0", "0.354", "5.045", "5.738", "4.592", "9.533", "7.748"], ["0.659", "3.833", "5.518", "5.906", "6.479", "10.0", "8.945"], ["rings", "speaking", "speaking", "speaking", "rings", "rings", "speaking"]]}
{"captions": "People are speaking, ticking, and tapping with specific impacts sounds.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.102", "2.488", "3.504", "4.835", "6.795", "8.457", "9.693"], ["2.016", "2.346", "3.394", "4.693", "5.874", "7.913", "8.945", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Men speak repeatedly with music playing.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.116", "0.629", "1.504", "2.355", "4.739", "5.024", "7.667", "8.472"], ["0.381", "1.252", "2.19", "3.006", "4.909", "7.315", "8.334", "9.465"], ["speak", "speak", "speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "Women speak and cut with scissors in a small room.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.795", "2.079", "2.795", "3.189", "6.354", "6.63", "8.047", "8.512", "9.118"], ["1.795", "2.504", "3.071", "3.693", "6.551", "7.709", "8.378", "8.984", "9.732"], ["speak", "speak", "speak", "speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "Mechanisms, women are speaking, and laughter can be heard with a blender in the background.", "data": [["Female speech, woman speaking", "Speech", "Blender"], ["0.102", "1.606", "7.992"], ["1.26", "3.433", "9.866"], ["women", "speaking", "blender"]]}
{"captions": "Music, men speaking, and water sounds are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["3.378", "5.05", "9.788"], ["4.891", "6.835", "10.0"], ["speaking", "speaking", "speaking"]]}
{"captions": "People are cheering, shouting, laughing and whistling in a crowd.", "data": [["Speech", "Speech"], ["1.079", "3.252"], ["2.756", "4.819"], ["laughing", "laughing"]]}
{"captions": "A woman speaks while clicking noises occur.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.165", "3.638", "5.283", "7.197", "8.15", "9.378"], ["2.205", "4.717", "6.953", "7.677", "8.874", "10.0"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "A man speaks, wind noise and sirens are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.724", "8.622"], ["0.441", "7.181", "10.0"], ["speaks", "speaks", "speaks"]]}
{"captions": "A ringtone and sound effect can be heard.", "data": [["Ringtone"], ["0.0"], ["10.0"], ["ringtone"]]}
{"captions": "People are speaking, shouting, laughing, clapping, screaming, and music is playing over thumping sounds.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking"], ["0.0", "1.723", "3.384", "5.758"], ["0.268", "3.274", "5.278", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Various tones, ringtones, and male/female speech are heard in a conversation.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Ringtone", "Male speech, man speaking", "Child speech, kid speaking", "Ringtone", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Ringtone"], ["2.225", "4.18", "4.822", "4.519", "5.481", "5.146", "6.012", "6.442", "7.377", "7.982", "8.471", "9.241", "9.511", "9.756", "6.314"], ["3.611", "4.355", "5.093", "4.806", "5.773", "5.868", "6.187", "6.883", "7.886", "8.311", "9.071", "9.448", "9.665", "10.0", "7.164"], ["speech", "speech", "tones", "speech", "tones", "tones", "speech", "speech", "speech", "speech", "speech", "speech", "speech", "speech", "tones"]]}
{"captions": "Static and noise are heard, followed by a man speaking.", "data": [["Male speech, man speaking"], ["6.319"], ["10.0"], ["speaking"]]}
{"captions": "Continuous buzzes accompany multiple male speeches.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["3.362", "5.457", "7.307", "9.409"], ["4.811", "6.646", "9.15", "10.0"], ["speeches", "speeches", "speeches", "speeches"]]}
{"captions": "Mechanisms, a cat, and tapping sounds are heard.", "data": [["Cat"], ["2.362"], ["3.37"], ["tapping"]]}
{"captions": "Women are whispering and speaking, with mechanisms and zippers in the background.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "0.361", "0.676", "3.12", "3.49", "4.121", "4.568", "4.806", "5.162", "5.583", "5.998", "6.547", "7.081"], ["0.251", "0.484", "0.882", "3.294", "3.828", "4.445", "4.692", "5.048", "5.418", "5.642", "6.254", "6.706", "7.268"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man is speaking, breathing, and answering the phone while other sounds like clicking and thumping occur.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Telephone bell ringing", "Telephone bell ringing", "Male speech, man speaking", "Telephone bell ringing"], ["0.0", "0.896", "1.399", "4.595", "3.185", "7.711"], ["0.197", "1.619", "2.78", "5.965", "4.636", "8.59"], ["speaking", "speaking", "answering", "answering", "speaking", "answering"]]}
{"captions": "Various mechanisms click and tap while a woman speaks.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["0.071", "0.268"], ["0.197", "0.441"], ["speaks", "speaks"]]}
{"captions": "Men are speaking, taking photos, and setting off car alarms.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.102", "4.638", "6.488", "9.008"], ["1.228", "5.472", "7.236", "9.913"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Men are speaking and making mechanisms sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.948", "4.701", "5.933", "7.328", "8.225", "9.33"], ["1.449", "4.556", "5.0", "7.174", "8.034", "9.067", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Men talk, breathe, and make noise.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.338", "3.541", "4.491", "5.354", "7.584", "9.479"], ["3.422", "4.134", "5.076", "5.985", "9.178", "10.0"], ["talk", "talk", "talk", "talk", "talk", "talk"]]}
{"captions": "A fixed-wing aircraft flies while a man speaks.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["9.055", "9.795"], ["9.614", "10.0"], ["man", "man"]]}
{"captions": "Various conversations and speeches with the sound of a low-frequency engine in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.22", "2.766", "3.384", "5.677", "9.15", "9.795"], ["1.008", "2.19", "3.171", "3.562", "6.843", "9.638", "10.0"], ["conversations", "conversations", "conversations", "conversations", "conversations", "conversations", "conversations"]]}
{"captions": "Women are speaking and clicking sounds are heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["0.43", "2.348"], ["1.759", "3.655"], ["speaking", "speaking"]]}
{"captions": "A man speaks to a cheering crowd with applause and children shouting.", "data": [["Male speech, man speaking", "Female speech, woman speaking"], ["0.0", "5.756"], ["0.417", "9.094"], ["speaks", "speaks"]]}
{"captions": "Alarm clocks and female speech alternate with speech and music.", "data": [["Alarm clock", "Female speech, woman speaking", "Female speech, woman speaking", "Alarm clock", "Female speech, woman speaking"], ["0.0", "3.103", "5.777", "5.438", "7.705"], ["4.44", "4.019", "6.483", "10.0", "8.934"], ["Alarm", "Alarm", "Alarm", "Alarm", "Alarm"]]}
{"captions": "Dogs bark, pant, and walk as men speak and wind blows.", "data": [["Dog", "Male speech, man speaking", "Dog", "Male speech, man speaking", "Male speech, man speaking", "Dog", "Dog", "Male speech, man speaking"], ["0.008", "0.449", "2.173", "2.606", "5.945", "5.638", "7.992", "7.299"], ["1.543", "1.228", "4.929", "3.732", "6.449", "7.315", "9.496", "7.858"], ["bark", "bark", "bark", "bark", "bark", "bark", "bark", "bark"]]}
{"captions": "Women speak and whisper in a small room.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Speech", "Female speech, woman speaking"], ["0.0", "1.575", "4.866", "8.835"], ["0.811", "3.598", "8.638", "9.787"], ["whisper", "whisper", "whisper", "whisper"]]}
{"captions": "A man is speaking with occasional crinkling and tapping sounds, and ticking in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.559", "2.701", "5.512"], ["0.26", "2.252", "4.189", "6.78"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Children play, shout, and speak.", "data": [["Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking", "Child speech, kid speaking"], ["0.0", "3.598", "4.304", "6.762", "8.629"], ["3.333", "4.141", "5.255", "8.384", "10.0"], ["shout", "shout", "shout", "shout", "shout"]]}
{"captions": "A man speaks, mechanisms click, hands move, a tuning fork is struck, and background noise is present.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["0.0", "6.063"], ["5.142", "6.669"], ["speaks", "speaks"]]}
{"captions": "An alarm sounds repeatedly over background noise.", "data": [["Alarm", "Alarm", "Alarm", "Alarm", "Alarm", "Alarm", "Alarm", "Alarm", "Alarm", "Alarm"], ["0.0", "0.611", "1.737", "2.841", "3.909", "5.013", "6.113", "7.217", "8.285", "9.368"], ["0.212", "1.402", "2.507", "3.457", "4.61", "5.672", "6.819", "7.966", "9.007", "10.0"], ["repeatedly", "repeatedly", "repeatedly", "repeatedly", "repeatedly", "repeatedly", "repeatedly", "repeatedly", "repeatedly", "repeatedly"]]}
{"captions": "Music, sewing machine, scissors, and women speaking with occasional tapping sounds.", "data": [["Female speech, woman speaking"], ["6.488"], ["9.709"], ["women"]]}
{"captions": "Men speak and breathe to music.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.553", "1.183", "2.321", "3.809", "4.873", "6.021", "8.187", "9.913"], ["1.01", "1.608", "3.075", "4.784", "5.607", "6.789", "9.297", "10.0"], ["speak", "speak", "speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "A man speaks and jangling keys can be heard with an alarm.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Alarm"], ["0.0", "3.78", "5.661", "5.063"], ["3.244", "5.173", "10.0", "6.165"], ["speaks", "speaks", "speaks", "jangling"]]}
{"captions": "Mechanisms and human sounds can be heard, with people talking and kids speaking.", "data": [["Female speech, woman speaking", "Child speech, kid speaking", "Female speech, woman speaking", "Child speech, kid speaking", "Speech"], ["0.449", "6.094", "6.654", "7.74", "9.898"], ["2.055", "6.488", "7.102", "8.165", "10.0"], ["talking", "kids", "talking", "kids", "talking"]]}
{"captions": "Dogs and humans make noises, and mechanisms click, in a room.", "data": [["Dog", "Dog", "Dog", "Dog", "Dog", "Dog", "Dog"], ["1.323", "3.614", "4.638", "7.756", "8.15", "8.528", "8.976"], ["1.661", "4.291", "6.913", "8.055", "8.37", "8.85", "9.181"], ["noises", "noises", "noises", "noises", "noises", "noises", "noises"]]}
{"captions": "Male singing, clapping, and speech occur.", "data": [["Speech"], ["1.568"], ["2.75"], ["speech"]]}
{"captions": "An engine runs with male speech and clicking sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["2.528", "9.197", "9.591"], ["3.52", "9.378", "9.811"], ["speech", "speech", "speech"]]}
{"captions": "Dogs are yipping while a man is speaking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.803", "2.22", "2.425", "2.606", "2.772", "2.89", "4.268"], ["1.882", "2.346", "2.504", "2.661", "2.811", "2.984", "4.827"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Music plays and people speak and click.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.26", "1.917", "2.698", "4.328", "8.396"], ["1.275", "2.618", "3.271", "8.136", "9.172"], ["speak", "speak", "speak", "speak", "speak"]]}
{"captions": "Various human sounds, including speech and laughter, are interspersed with burping.", "data": [["Speech", "Speech", "Speech", "Speech", "Female speech, woman speaking"], ["0.978", "6.364", "6.967", "8.118", "9.15"], ["1.814", "6.528", "7.108", "9.05", "9.63"], ["speech", "speech", "speech", "speech", "speech"]]}
{"captions": "Men are speaking and music is playing with ticking sounds in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "0.281", "2.819", "7.992", "9.528"], ["0.196", "1.515", "3.503", "8.094", "9.858"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "A man speaks, breathes, writes, and clicks.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.724", "2.346", "3.331", "5.047", "6.11", "9.551"], ["0.433", "2.173", "2.929", "3.961", "5.913", "7.346", "10.0"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "Alarms sound with sliding door and tapping sounds.", "data": [["Alarm", "Alarm", "Alarm", "Alarm", "Alarm"], ["0.0", "2.323", "4.457", "6.669", "8.843"], ["1.039", "3.244", "5.346", "7.543", "9.78"], ["Alarm", "Alarm", "Alarm", "Alarm", "Alarm"]]}
{"captions": "Music, video game sounds, and explosions are heard while people are speaking.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.202", "3.187", "5.654", "7.047"], ["1.07", "5.032", "6.457", "7.811"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Various sounds, including music, wind, and laughter, accompany female and male speech and a child's speech.", "data": [["Female speech, woman speaking", "Child speech, kid speaking", "Male speech, man speaking"], ["0.449", "4.047", "6.803"], ["1.102", "6.323", "7.819"], ["laughter", "laughter", "speech"]]}
{"captions": "Women are speaking and narration is heard over speech.", "data": [["Speech", "Female speech, woman speaking", "Female speech, woman speaking", "Speech", "Female speech, woman speaking"], ["0.512", "2.63", "4.496", "6.409", "8.921"], ["2.425", "3.394", "5.346", "7.291", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Women speak, and ships and speeches are heard with unmodified recordings.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Speech", "Speech", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "1.976", "2.96", "4.888", "6.055", "7.89", "9.717"], ["1.908", "2.811", "3.523", "5.587", "6.307", "8.323", "10.0"], ["Women", "Women", "recordings", "recordings", "Women", "Women", "Women"]]}
{"captions": "Sewing machine and mechanisms make sounds.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.563", "3.061", "6.788"], ["1.936", "3.679", "7.351"], ["machine", "machine", "machine"]]}
{"captions": "Conversations are had while an engine runs.", "data": [["Conversation", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["1.053", "1.787", "3.678", "4.916", "5.529"], ["1.51", "3.216", "4.685", "5.419", "6.599"], ["Conversations", "Conversations", "Conversations", "Conversations", "Conversations"]]}
{"captions": "A man is speaking and clicking, with chirping birds heard in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.898", "2.795", "3.055", "4.039", "5.031", "9.945"], ["1.189", "2.732", "2.953", "3.268", "4.921", "6.276", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Music accompanies male speech with a boing sound in the background.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "5.755", "7.753"], ["1.989", "7.311", "9.696"], ["speech", "speech", "speech"]]}
{"captions": "Pots and pans clink and clank.", "data": [["Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans", "Dishes, pots, and pans"], ["1.885", "2.246", "3.298", "5.433", "7.876"], ["2.002", "2.363", "3.542", "5.932", "8.184"], ["clink", "clink", "clink", "clink", "clink"]]}
{"captions": "Music with sound effects, pouring, blending, door sounds, and human voices.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking", "Blender", "Female speech, woman speaking"], ["7.307", "7.598", "8.058", "2.504", "8.37"], ["7.557", "7.898", "8.202", "3.39", "8.799"], ["voices", "voices", "voices", "blending", "voices"]]}
{"captions": "Women speak and tap while a man sings with music playing.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "2.827", "5.26", "5.835", "9.126", "9.835"], ["0.126", "3.772", "5.591", "6.039", "9.646", "10.0"], ["speak", "speak", "speak", "speak", "speak", "speak"]]}
{"captions": "Gears turn as tools are used and men speak.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["5.504", "6.89", "9.819"], ["6.614", "8.008", "10.0"], ["speak", "speak", "speak"]]}
{"captions": "People speak while engine and wind noises are heard, followed by the sound of a skateboard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.008", "1.701", "6.701", "8.331"], ["0.882", "5.535", "7.843", "9.031"], ["speak", "speak", "speak", "speak"]]}
{"captions": "Music plays with female speech and breathing sounds.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["5.819", "9.472"], ["8.953", "9.921"], ["speech", "speech"]]}
{"captions": "Sewing machine and female speech sounds are heard.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["3.843", "5.777"], ["5.676", "6.382"], ["speech", "speech"]]}
{"captions": "Bells are ringing, people are speaking, a clock is ticking and an alarm is heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Alarm clock"], ["1.188", "2.874", "7.186"], ["2.627", "3.705", "10.0"], ["speaking", "speaking", "ticking"]]}
{"captions": "A man speaks amidst the sound of a drill and mechanisms.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.006", "1.821", "2.832", "5.775", "7.96"], ["1.11", "2.671", "3.318", "7.491", "9.821"], ["speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "A man is brushing his teeth and using the sink in a small room.", "data": [["Male speech, man speaking", "Water tap, faucet"], ["1.559", "3.724"], ["2.008", "5.591"], ["man", "sink"]]}
{"captions": "A telephone rings and crunching sounds occur intermittently.", "data": [["Female speech, woman speaking", "Telephone bell ringing", "Female speech, woman speaking"], ["6.165", "0.748", "7.055"], ["6.654", "2.795", "8.457"], ["telephone", "rings", "telephone"]]}
{"captions": "Sounds of chewing, howling wind, crickets, dogs, and bird vocalizations are heard.", "data": [["Dog", "Dog"], ["3.024", "6.472"], ["3.562", "8.15"], ["dogs", "dogs"]]}
{"captions": "Men speaking, liquids pouring, laughter, and caterwauling are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "4.951", "5.963", "6.545", "7.137"], ["4.043", "5.876", "6.356", "7.013", "7.368"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "People are singing, speaking, making keys jangle, and dropping coins, while a child is speaking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Child speech, kid speaking", "Male speech, man speaking"], ["0.622", "1.425", "3.654", "4.898", "6.937"], ["1.268", "3.0", "4.835", "6.709", "8.339"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Men are speaking, music is playing, and narration is heard in a small room.", "data": [["Speech", "Speech", "Speech"], ["0.0", "2.402", "6.551"], ["0.213", "3.906", "7.827"], ["Men", "Men", "Men"]]}
{"captions": "People are speaking, with wind, rain and white noise in the background.", "data": [["Conversation", "Male speech, man speaking"], ["0.165", "4.811"], ["4.472", "6.543"], ["speaking", "speaking"]]}
{"captions": "People are talking, clapping, and shouting in a crowded environment.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking", "Male speech, man speaking"], ["0.669", "2.26", "4.11", "4.992", "5.898"], ["2.118", "3.78", "4.819", "5.646", "6.787"], ["talking", "talking", "talking", "talking", "talking"]]}
{"captions": "Men are speaking and music is playing while a telephone rings and conversation is heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Telephone bell ringing"], ["0.0", "3.713", "4.349", "4.835", "8.959", "3.441"], ["3.615", "4.147", "4.714", "7.172", "9.196", "9.774"], ["speaking", "speaking", "speaking", "speaking", "speaking", "telephone"]]}
{"captions": "A man is speaking while sounds of pouring and trickling are heard.", "data": [["Male speech, man speaking", "Speech", "Speech"], ["0.11", "5.142", "7.496"], ["4.472", "6.031", "10.0"], ["speaking", "speaking", "speaking"]]}
{"captions": "A man speaks while water fills, dishes clatter, and gurgling water is heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Dishes, pots, and pans", "Male speech, man speaking", "Sink (filling or washing)", "Dishes, pots, and pans"], ["0.008", "1.598", "4.331", "6.567", "4.315", "8.717", "0.559", "8.772"], ["0.409", "3.213", "5.252", "8.457", "6.575", "10.0", "4.0", "10.0"], ["speaks", "speaks", "speaks", "speaks", "clatter", "speaks", "dishes", "clatter"]]}
{"captions": "A male voice is narrating a speech in a small room while water is being turned on and off.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Speech", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Speech", "Water tap, faucet"], ["0.016", "1.307", "2.598", "4.449", "6.331", "7.11", "8.441", "9.504", "8.252"], ["1.039", "2.346", "4.173", "6.157", "6.992", "7.803", "9.339", "9.882", "10.0"], ["narrating", "narrating", "narrating", "narrating", "narrating", "narrating", "narrating", "narrating", "water"]]}
{"captions": "Male speech is interrupted by a ringing telephone.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Telephone bell ringing"], ["0.015", "6.951", "9.058", "1.868"], ["1.898", "7.803", "10.0", "3.931"], ["speech", "speech", "speech", "ringing"]]}
{"captions": "A toilet is flushed and water is splashing in a small room.", "data": [["Toilet flush"], ["0.0"], ["10.0"], ["splashing"]]}
{"captions": "Mechanisms, male speech, squeaking, finger snapping, and more are heard.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.446", "2.107", "3.02", "8.806", "9.657"], ["0.783", "2.903", "3.384", "9.451", "10.0"], ["speech", "speech", "speech", "speech", "speech"]]}
{"captions": "There are sounds of dishes, pots, and pans, female speech, and frying food.", "data": [["Dishes, pots, and pans", "Female speech, woman speaking", "Dishes, pots, and pans", "Female speech, woman speaking", "Dishes, pots, and pans", "Female speech, woman speaking", "Frying (food)"], ["0.0", "2.846", "1.365", "4.014", "4.968", "5.067", "6.113"], ["0.376", "3.742", "1.966", "4.575", "5.09", "6.05", "10.0"], ["dishes", "speech", "dishes", "speech", "dishes", "speech", "frying"]]}
{"captions": "A woman is speaking and clicking sounds are present.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "2.165", "7.866", "9.394"], ["1.906", "3.543", "9.276", "10.0"], ["speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Silence is followed by a ringtone, an explosion, music, and male speech.", "data": [["Ringtone", "Male speech, man speaking"], ["0.482", "5.967"], ["4.284", "6.293"], ["ringtone", "speech"]]}
{"captions": "A woman speaks while using a sewing machine and tapping.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "0.571", "0.786", "1.019", "1.311", "6.204", "6.843", "9.109"], ["0.32", "0.663", "0.914", "1.188", "1.485", "6.748", "8.013", "10.0"], ["speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks", "speaks"]]}
{"captions": "A man is speaking and a train whistle blows.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "2.205", "7.244"], ["1.858", "7.094", "8.803"], ["speaking", "speaking", "speaking"]]}
{"captions": "Fire alarms are ringing.", "data": [["Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm", "Fire alarm"], ["0.0", "1.277", "2.286", "3.288", "5.285", "6.287", "7.303", "8.003", "9.334"], ["0.094", "1.961", "2.953", "3.89", "5.953", "6.961", "7.906", "8.181", "10.0"], ["ringing", "ringing", "ringing", "ringing", "ringing", "ringing", "ringing", "ringing", "ringing"]]}
{"captions": "A woman is speaking, sizzling and frying food, crumpling and stirring, and more.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Dishes, pots, and pans", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Sizzle", "Dishes, pots, and pans"], ["0.0", "2.087", "2.764", "3.567", "7.031", "8.898", "0.0", "5.874"], ["0.441", "3.299", "3.307", "6.646", "8.598", "10.0", "10.0", "6.646"], ["speaking", "speaking", "sizzling", "speaking", "speaking", "speaking", "frying", "sizzling"]]}
{"captions": "People and birds are speaking and singing.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Female speech, woman speaking"], ["0.0", "1.497", "2.116", "5.093", "8.289"], ["1.093", "1.936", "2.699", "8.214", "9.33"], ["speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Barking, crushing, and car sounds are heard with music and speaking.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["5.943", "8.352", "9.836"], ["7.074", "9.533", "10.0"], ["speaking", "speaking", "speaking"]]}
{"captions": "A speech synthesizer, conversation, and singing is heard with occasional silence.", "data": [["Speech", "Male speech, man speaking", "Conversation"], ["0.685", "3.441", "6.976"], ["1.394", "5.48", "10.0"], ["synthesizer", "synthesizer", "conversation"]]}
{"captions": "Crickets and insects chirp in a small room with men speaking and breathing.", "data": [["Male speech, man speaking", "Speech"], ["1.535", "9.441"], ["6.449", "10.0"], ["speaking", "speaking"]]}
{"captions": "Music plays and people sneeze and breathe over background noise.", "data": [["Female speech, woman speaking", "Female speech, woman speaking"], ["1.866", "7.732"], ["2.339", "8.181"], ["Music", "Music"]]}
{"captions": "Yodeling and singing are accompanied by speeches from a man.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["3.465", "7.323", "8.819"], ["6.819", "8.559", "10.0"], ["speeches", "speeches", "speeches"]]}
{"captions": "Taps and mechanisms create rhythms as a woman speaks and a sink runs, followed by coughing.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Sink (filling or washing)"], ["0.181", "3.898", "7.528"], ["1.315", "4.772", "10.0"], ["speaks", "speaks", "runs"]]}
{"captions": "Men are speaking and a whirring sound is present.", "data": [["Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking", "Male speech, man speaking"], ["0.0", "1.404", "3.306", "4.067", "5.435", "6.875", "9.565"], ["0.797", "2.88", "3.922", "4.42", "6.721", "9.158", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "There are various alarms and howls.", "data": [["Alarm", "Alarm", "Alarm", "Alarm", "Alarm", "Alarm"], ["0.0", "0.746", "2.238", "4.725", "7.224", "7.733"], ["0.607", "2.111", "4.639", "7.068", "7.634", "10.0"], ["howls", "howls", "howls", "howls", "howls", "howls"]]}
{"captions": "Cars rev and pass by as a man and woman speak.", "data": [["Male speech, man speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["1.594", "7.227", "8.438"], ["1.791", "7.721", "8.63"], ["man", "woman", "woman"]]}
{"captions": "Women are speaking, tapping, and breathing in the background.", "data": [["Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking", "Female speech, woman speaking"], ["0.0", "0.567", "2.37", "4.764", "7.181", "9.315"], ["0.205", "2.268", "4.661", "6.811", "9.079", "10.0"], ["speaking", "speaking", "speaking", "speaking", "speaking", "speaking"]]}
{"captions": "Alarms and bird calls fill the air amidst male speech and more alarms.", "data": [["Male speech, man speaking", "Alarm", "Alarm", "Alarm", "Alarm", "Male speech, man speaking", "Alarm", "Alarm", "Alarm", "Alarm", "Alarm", "Alarm", "Male speech, man speaking", "Alarm", "Alarm", "Alarm", "Male speech, man speaking"], ["2.811", "0.0", "5.732", "6.087", "6.512", "3.984", "6.882", "7.26", "7.669", "8.047", "8.441", "8.74", "6.921", "9.11", "9.496", "9.898", "9.11"], ["3.654", "1.764", "5.981", "6.371", "6.747", "5.402", "7.137", "7.527", "7.89", "8.32", "8.642", "8.978", "7.126", "9.328", "9.745", "10.0", "9.504"], ["speech", "Alarms", "Alarms", "Alarms", "Alarms", "speech", "Alarms", "Alarms", "Alarms", "Alarms", "Alarms", "Alarms", "speech", "Alarms", "Alarms", "Alarms", "speech"]]}
{"captions": "Cars and trucks are honking and people are speaking on a busy road.", "data": [["Male speech, man speaking", "Male speech, man speaking"], ["5.016", "7.843"], ["6.614", "9.535"], ["speaking", "speaking"]]}