Skip to content

Commit

Permalink
subcen+topk, mqmha (#70)
Browse files Browse the repository at this point in the history
* update some tricks(subcen + topk, mqmha)

Co-authored-by: YANOrange <[email protected]>
  • Loading branch information
wangers and YanOrange1 authored Dec 29, 2022
1 parent bf3e055 commit 141f904
Show file tree
Hide file tree
Showing 7 changed files with 1,073 additions and 13 deletions.
81 changes: 81 additions & 0 deletions conf/egs_pre_sre_ecapa1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Copyright xmuspeech (Author: Leo 2022-01-23)
feat_dim: 80 # the num_mel_bins of fbank and the num_ceps of mfcc
data_type: 'shard' # shard or raw
# feature extraction
dataset_conf:
# asv_target: true
filter: true
filter_conf:
max_length: 4.0
min_length: 0.2
max_cut: true
# resample
resample: false
resample_conf:
resample_rate: 16000

# pre speed_perturb
pre_speed_perturb: true
perturb_conf:
speeds: [90, 100, 110] # larger->slower
sample_rate: 16000
# random_chunk
random_chunk: true
random_chunk_size: 2.015
# waveform true config
speech_aug: true
speech_aug_conf: subtools/conf/speech_aug_ecapa.yaml
csv_aug_folder: ''
# It seems exit some bug, DO NOT set dither and use_energy together.
feature_extraction_conf:
# feature_type: 'mfcc'
# kaldi_featset:
# num_ceps: 23
# num_mel_bins: 23
# frame_shift: 10
# frame_length: 25
# low_freq: 40.0
# high_freq: -200
# energy_floor: 0.0
# dither: 0.0 # conflicted with use_energy=true.
# use_energy: true # if you want use energy-based vad, set it true.

feature_type: 'fbank'
kaldi_featset:
num_mel_bins: 80
frame_shift: 10
frame_length: 25
low_freq: 40
high_freq: -200
energy_floor: 0.0
use_energy: false

mean_var_conf:
mean_norm: true
std_norm: false

# spec level config
spec_aug: false
spec_aug_conf:
aug: specaugment # None or specaugment
aug_params:
frequency: 0.2
frame: 0.2
rows: 4
cols: 4
random_rows: true
random_cols: true


shuffle: true
shuffle_conf:
shuffle_size: 3000
batch_conf:
batch_type: 'static' # static or dynamic
batch_size: 128

# attention: Do not specify batch size in dataloader.
data_loader_conf:
num_workers: 8
pin_memory: false
prefetch_factor: 50 # pf(400) * bs(16) is about 2 shards which has 3000 samples each.
83 changes: 83 additions & 0 deletions conf/speech_aug_ecapa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
speechaug:
mod: random # chain,concat,random
aug_classes:
-
aug_name: add_noise # Define the speeech augment name
aug_type: Env # Env or Time
random_mod_weight: 0.5
reverb_prob: 0.0
noise_prob: 1.0
noise_snr_low: 5
noise_snr_high: 15
noise_csv: exp/aug_csv/combine_music_noise.csv
noise_num_workers: 0

-
aug_name: add_babble_noise
aug_type: Env
random_mod_weight: 0.2
reverb_prob: 0.0
noise_prob: 0.0
babble_prob: 1.0
babble_speaker_count: 4
babble_snr_low: 13
babble_snr_high: 20
babble_csv: exp/aug_csv/musan_speech.csv
babble_noise_max_len: 3.015
noise_num_workers: 0

-
aug_name: add_rev
aug_type: Env
random_mod_weight: 0.3
reverb_prob: 1.0
noise_prob: 0.0
babble_prob: 0.0
reverb_csv: exp/aug_csv/combine_sim_small_medium_rev.csv
rir_scale_factor: 1.0


-
aug_name: add_rev_noise
aug_type: Env
random_mod_weight: 0.2
reverb_prob: 1.0
noise_prob: 0.5
noise_snr_low: 0
noise_snr_high: 15
noise_csv: exp/aug_csv/pointsrc_noise.csv
reverb_csv: exp/aug_csv/real_reverb.csv
noise_num_workers: 0
rir_scale_factor: 1.0

# -
# aug_name: augment_wavedrop
# aug_type: Time
# random_mod_weight: 0.5
# perturb_prob: 0.0
# drop_freq_prob: 1.0
# drop_chunk_prob: 1.0
# drop_freq_count_low: 0
# drop_freq_count_high: 3
# drop_chunk_count_low: 0
# drop_chunk_count_high: 4
# drop_chunk_length_low: 1000
# drop_chunk_length_high: 2000
# sample_rate: 16000
# speeds: [100]


# You can define here for more augment strategy.
tail_speechaug:
mod: chain
aug_classes:
-
aug_name: augment_speed
aug_type: Time
perturb_type: sox_tempo
perturb_prob: 1.0
drop_freq_prob: 0.0
drop_chunk_prob: 0.0
sample_rate: 16000
speeds: [95, 100, 105]
keep_shape: true
Loading

0 comments on commit 141f904

Please sign in to comment.