Skip to content

Commit

Permalink
fix bug
Browse files Browse the repository at this point in the history
  • Loading branch information
Snowdar committed Apr 25, 2020
1 parent e9b0c50 commit b66f070
Show file tree
Hide file tree
Showing 11 changed files with 116 additions and 65 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/BeDeprecated
/challenge
/ToDo
/develop
__pycache__
*.pyc
5 changes: 4 additions & 1 deletion augmentDataByNoise.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ sampling_rate=16000
frame_shift=0.01
factor=1 # The ratio of augmented data with origin data. In this case, 4 means using all augmented data if aug-data-dir is provided.
nj=20 # Num-jobs
force_clear=true
force_clear=false

. subtools/parse_options.sh
. subtools/path.sh
Expand Down Expand Up @@ -157,6 +157,9 @@ if [ $num -gt 1 ];then
subtools/kaldi/utils/combine_data.sh $additive_aug_data $all_data
fi

bc_path=$(command -v bc)
[ "$bc_path" == "" ] && echo -e "[exit] No bc in ($PATH)\nPlease install bc by 'yum install bc'." && exit 1

num_origin_utts=$(wc -l $data/reco2dur | awk '{print $1}')
[ $(echo "$factor - $num" | bc) -gt 0 ] && factor=$num # Get min
num_additive_utts=$(echo "$num_origin_utts * $factor / 1" | bc)
Expand Down
4 changes: 3 additions & 1 deletion newCopyData.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ target=$topdir/${pre}/$name
rm -rf $target
mkdir -p $target

for x in wav.scp utt2spk spk2utt feats.scp vad.scp utt2num_frames utt2dur reco2dur text utt2gender spk2gender trials;do
trials=$(find $src -name "*trials" | xargs -n 1 basename)

for x in wav.scp utt2spk spk2utt feats.scp vad.scp utt2num_frames utt2dur reco2dur text utt2gender spk2gender $trials;do
[ -f $src/$x ] && cp $src/$x $target/ && echo "[ $x ] copy done"
done
echo "[note] Your new datadir is $target"
Expand Down
15 changes: 10 additions & 5 deletions pytorch/launcher/runResnetXvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,11 @@
"s":30, "mhe_loss":False, "mhe_w":0.01},

"use_step":False,
"step_params":{
"t":False, "s":False, "m":False,
"T":None, "record_T":0, "t_tuple":(0.5, 1.2),
"s_tuple":(30, 12), "m_tuple":(0, 0.2)}
"step_params":{"T":None,
"m":False, "lambda_0":0, "lambda_b":1000, "alpha":5, "gamma":1e-4,
"s":False, "s_tuple":(30, 12), "s_list":None,
"t":False, "t_tuple":(0.5, 1.2),
"p":False, "p_tuple":(0.5, 0.1)}
}

optimizer_params = {
Expand Down Expand Up @@ -211,6 +212,10 @@
model_dir="exp/resnet_xv_baseline_warmR_voxceleb1"
##--------------------------------------------------##
##
#### Auto-config params
if lr_scheduler_params["name"] == "warmR" and model_params["use_step"]:
model_params["step_params"]["T"]=(lr_scheduler_params["warmR.T_max"], lr_scheduler_params["warmR.T_mult"])

#### Set seed
utils.set_all_seed(1024)

Expand Down Expand Up @@ -269,7 +274,7 @@
if utils.is_main_training(): logger.info("Init a simple trainer.")
# Package(Elements:dict, Params:dict}. It is a key parameter's package to trainer and model_dir/config/.
package = ({"data":bunch, "model":model, "optimizer":optimizer, "lr_scheduler":lr_scheduler},
{"model_dir":model_dir, "model_blueprint":model_blueprint, "exist_model":"",
{"model_dir":model_dir, "model_blueprint":model_blueprint, "exist_model":exist_model,
"start_epoch":train_stage, "epochs":epochs, "use_gpu":use_gpu, "gpu_id":gpu_id,
"benchmark":benchmark, "suffix":suffix, "report_times_every_epoch":report_times_every_epoch,
"report_interval_iters":report_interval_iters, "record_file":"train.csv"})
Expand Down
30 changes: 18 additions & 12 deletions pytorch/launcher/runSnowdarXvector-voxceleb1.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@
loader_params = {
"use_fast_loader":True, # It is a queue loader to prefetch batch and storage.
"max_prefetch":10,
"batch_size":128,
"batch_size":512,
"shuffle":True,
"num_workers":2,
"pin_memory":False,
Expand All @@ -152,7 +152,7 @@
# Difine model_params by model_blueprint w.r.t your model's __init__(model_params).
model_params = {
"extend":False,
"aug_dropout":0.2, "hidden_dropout":0.,
"aug_dropout":0., "hidden_dropout":0.,
"dropout_params":{"type":"default", "start_p":0., "dim":2, "method":"uniform",
"continuous":False, "inplace":True},
"training":True, "extracted_embedding":"far", "SE":False, "se_ratio":4,
Expand All @@ -165,18 +165,20 @@
"margin_loss_params":{"method":"am", "m":0.2, "feature_normalize":True,
"s":30, "mhe_loss":False, "mhe_w":0.01},
"use_step":False,
"step_params":{"t":False, "s":False, "record_T":0, "T":[], "t_tuple":(0.5, 1.2),
"s_tuple":(30, 12),
"m":False, "lambda_0":0, "lambda_b":1000, "alpha":5, "gamma":1e-4}
"step_params":{"T":None,
"m":False, "lambda_0":0, "lambda_b":1000, "alpha":5, "gamma":1e-4,
"s":False, "s_tuple":(30, 12), "s_list":None,
"t":False, "t_tuple":(0.5, 1.2),
"p":False, "p_tuple":(0.5, 0.1)}
}

optimizer_params = {
"name":"adam",
"name":"adamW",
"learn_rate":0.001,
"beta1":0.9,
"beta2":0.999,
"beta3":0.999,
"weight_decay":3e-4, # Should be large for decouped weight decay (adamW) and small for L2 regularization (sgd, adam).
"weight_decay":3e-1, # Should be large for decouped weight decay (adamW) and small for L2 regularization (sgd, adam).
"lookahead.k":5,
"lookahead.alpha":0., # 0 means not using lookahead and if used, suggest to set it as 0.5.
"gc":False # If true, use gradient centralization.
Expand All @@ -185,8 +187,8 @@
lr_scheduler_params = {
"name":"warmR",
"warmR.lr_decay_step":0, # 0 means decay after every epoch and 1 means every iter.
"warmR.T_max":7,
"warmR.T_mult":1,
"warmR.T_max":3,
"warmR.T_mult":2,
"warmR.factor":1.0, # The max_lr_decay_factor.
"warmR.eta_min":4e-8,
"warmR.log_decay":False
Expand All @@ -210,6 +212,10 @@
model_dir="exp/standard_xv_warmR_voxceleb1"
##--------------------------------------------------##
##
#### Auto-config params
if lr_scheduler_params["name"] == "warmR" and model_params["use_step"]:
model_params["step_params"]["T"]=(lr_scheduler_params["warmR.T_max"], lr_scheduler_params["warmR.T_mult"])

#### Set seed
utils.set_all_seed(1024)

Expand Down Expand Up @@ -267,7 +273,7 @@
if utils.is_main_training(): logger.info("Init a simple trainer.")
# Package(Elements:dict, Params:dict}. It is a key parameter's package to trainer and model_dir/config/.
package = ({"data":bunch, "model":model, "optimizer":optimizer, "lr_scheduler":lr_scheduler},
{"model_dir":model_dir, "model_blueprint":model_blueprint, "exist_model":"",
{"model_dir":model_dir, "model_blueprint":model_blueprint, "exist_model":exist_model,
"start_epoch":train_stage, "epochs":epochs, "use_gpu":use_gpu, "gpu_id":gpu_id,
"benchmark":benchmark, "suffix":suffix, "report_times_every_epoch":report_times_every_epoch,
"report_interval_iters":report_interval_iters, "record_file":"train.csv"})
Expand All @@ -287,9 +293,9 @@
data_root = "data" # It contains all dataset just like Kaldi recipe.
prefix = "mfcc_23_pitch" # For to_extracted_data.

to_extracted_positions = ["far"] # Define this w.r.t model_blueprint.
to_extracted_positions = ["far","near"] # Define this w.r.t model_blueprint.
to_extracted_data = ["voxceleb1_train_aug", "voxceleb1_test"] # All dataset should be in dataroot/prefix.
to_extracted_epochs = ["7", "14", "21"] # It is model's name, such as 10.params or final.params (suffix is w.r.t package).
to_extracted_epochs = ["21"] # It is model's name, such as 10.params or final.params (suffix is w.r.t package).

nj = 10
force = False
Expand Down
23 changes: 16 additions & 7 deletions pytorch/launcher/runSnowdarXvector-voxceleb2.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@
# Difine model_params by model_blueprint w.r.t your model's __init__(model_params).
model_params = {
"extend":False,
"aug_dropout":0.2, "dropout_params":{"type":"default", "dim":2, "start_p":0., "inplace":True},
"aug_dropout":0., "dropout_params":{"type":"default", "dim":2, "start_p":0., "inplace":True},
"training":True, "extracted_embedding":"far", "SE":False, "se_ratio":4,
"tdnn_layer_params":{"momentum":0.5, "nonlinearity":'relu'},
"tdnn6":True, "tdnn7_params":{"nonlinearity":"default", "bn":True},
Expand All @@ -157,25 +157,30 @@
"margin_loss":False,
"margin_loss_params":{"method":"am", "m":0.2, "feature_normalize":True,
"s":30, "mhe_loss":False, "mhe_w":0.01},
"use_step":False, "step_params":{}
"use_step":False,
"step_params":{"T":None,
"m":False, "lambda_0":0, "lambda_b":1000, "alpha":5, "gamma":1e-4,
"s":False, "s_tuple":(30, 12), "s_list":None,
"t":False, "t_tuple":(0.5, 1.2),
"p":False, "p_tuple":(0.5, 0.1)}
}

optimizer_params = {
"name":"adam",
"name":"adamW",
"learn_rate":0.001,
"beta1":0.9,
"beta2":0.999,
"beta3":0.999,
"weight_decay":3e-4, # Should be large for decouped weight decay (adamW) and small for L2 regularization (sgd, adam).
"weight_decay":3e-1, # Should be large for decouped weight decay (adamW) and small for L2 regularization (sgd, adam).
"lookahead.k":5,
"lookahead.alpha":0, # 0 means not using lookahead and if used, suggest to set it as 0.5.
"gc":False # If true, use gradient centralization.
}

lr_scheduler_params = {
"name":"warmR",
"warmR.lr_decay_step":2000, # 0 means decay after every epoch and 1 means every iter.
"warmR.T_max":1,
"warmR.lr_decay_step":4000, # 0 means decay after every epoch and 1 means every iter.
"warmR.T_max":5,
"warmR.T_mult":2,
"warmR.factor":1.0, # The max_lr_decay_factor.
"warmR.eta_min":4e-8
Expand All @@ -199,6 +204,10 @@
model_dir="exp/standard_xv_warmR_voxceleb2"
##--------------------------------------------------##
##
#### Auto-config params
if lr_scheduler_params["name"] == "warmR" and model_params["use_step"]:
model_params["step_params"]["T"]=(lr_scheduler_params["warmR.T_max"], lr_scheduler_params["warmR.T_mult"])

#### Set seed
utils.set_all_seed(1024)

Expand Down Expand Up @@ -257,7 +266,7 @@
if utils.is_main_training(): logger.info("Init a simple trainer.")
# Package(Elements:dict, Params:dict}. It is a key parameter's package to trainer and model_dir/config/.
package = ({"data":bunch, "model":model, "optimizer":optimizer, "lr_scheduler":lr_scheduler},
{"model_dir":model_dir, "model_blueprint":model_blueprint, "exist_model":"",
{"model_dir":model_dir, "model_blueprint":model_blueprint, "exist_model":exist_model,
"start_epoch":train_stage, "epochs":epochs, "use_gpu":use_gpu, "gpu_id":gpu_id,
"benchmark":benchmark, "suffix":suffix, "report_times_every_epoch":report_times_every_epoch,
"report_interval_iters":report_interval_iters, "record_file":"train.csv"})
Expand Down
2 changes: 1 addition & 1 deletion pytorch/launcher/runXvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@
logger.info("Init a simple trainer.")
# Package(Elements:dict, Params:dict}. It is a key parameter's package to trainer and model_dir/config/.
package = ({"data":bunch, "model":model, "optimizer":optimizer, "lr_scheduler":lr_scheduler},
{"model_dir":model_dir, "model_blueprint":model_blueprint, "exist_model":"",
{"model_dir":model_dir, "model_blueprint":model_blueprint, "exist_model":exist_model,
"start_epoch":train_stage, "epochs":epochs, "use_gpu":use_gpu, "gpu_id":gpu_id,
"benchmark":benchmark, "suffix":suffix, "report_times_every_epoch":report_times_every_epoch,
"report_interval_iters":report_interval_iters, "record_file":"train.csv"})
Expand Down
4 changes: 2 additions & 2 deletions pytorch/libs/training/reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ def _update(self):
try:
res = self.queue.get()
if res is None:
if self.record_file is not None:break
self.bar.finish()
break

snapshot, training_point, current_lr = res
current_epoch, current_iter, num_batchs_train = training_point
Expand All @@ -128,7 +129,6 @@ def finish(self):
self.queue.put(None)
# Wait process completed.
self.process.join()
self.bar.finish()


class LRFinderReporter():
Expand Down
1 change: 1 addition & 0 deletions pytorch/libs/training/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def init_training(self):

## Select device
self.select_device()
#self.elements["model"]=torch.nn.parallel.DistributedDataParallel(model)

def save_model(self, from_epoch=True):
if from_epoch:
Expand Down
47 changes: 32 additions & 15 deletions pytorch/model/resnet-xvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,11 @@ def init(self, inputs_dim, num_targets, aug_dropout=0.2, tail_dropout=0., traini
}

default_step_params = {
"t":False, "s":False, "m":False,
"T":None, "record_T":0, "t_tuple":(0.5, 1.2),
"s_tuple":(30, 12), "m_tuple":(0, 0.2)
"T":None,
"m":False, "lambda_0":0, "lambda_b":1000, "alpha":5, "gamma":1e-4,
"s":False, "s_tuple":(30, 12), "s_list":None,
"t":False, "t_tuple":(0.5, 1.2),
"p":False, "p_tuple":(0.5, 0.1)
}

resnet_params = utils.assign_params_dict(default_resnet_params, resnet_params)
Expand Down Expand Up @@ -162,26 +164,41 @@ def extract_embedding(self, inputs):
return xvector


def compute_decay_value(self, start, end, current_postion, T):
return start - (start - end)/(T-1) * (current_postion%T)
def get_warmR_T(T_0, T_mult, epoch):
n = int(math.log(max(0.05, (epoch / T_0 * (T_mult - 1) + 1)), T_mult))
T_cur = epoch - T_0 * (T_mult ** n - 1) / (T_mult - 1)
T_i = T_0 * T_mult ** (n)
return T_cur, T_i


def compute_decay_value(self, start, end, T_cur, T_i):
# Linear decay in every cycle time.
return start - (start - end)/(T_i-1) * (T_cur%T_i)


def step(self, epoch, this_iter, epoch_batchs):
# heated up for t, s, m
# Heated up for t and s.
# Decay for margin and dropout p.
if self.use_step:
if self.step_params["record_T"] < self.step_params["T"][epoch]:
self.current_epoch = epoch*epoch_batchs
self.T = self.step_params["T"][epoch] * epoch_batchs
self.step_params["record_T"] = self.step_params["T"][epoch]
if self.step_params["m"]:
current_postion = epoch*epoch_batchs + this_iter
lambda_factor = max(self.step_params["lambda_0"],
self.step_params["lambda_b"]*(1+self.step_params["gamma"]*current_postion)**(-self.step_params["alpha"]))
self.loss.step(lambda_factor)

current_postion = self.current_epoch + this_iter
if self.step_params["T"] is not None and (self.step_params["t"] or self.step_params["p"]):
T_cur, T_i = get_warmR_T(*self.step_params["T"], epoch)
T_cur = T_cur*epoch_batchs + this_iter
T_i = T_i * epoch_batchs

if self.step_params["t"]:
self.loss.t = self.compute_decay_value(*self.step_params["t_tuple"], current_postion, self.T)
self.loss.t = self.compute_decay_value(*self.step_params["t_tuple"], T_cur, T_i)

if self.step_params["p"]:
self.aug_dropout.p = self.compute_decay_value(*self.step_params["p_tuple"], T_cur, T_i)

if self.step_params["s"]:
self.loss.s = self.compute_decay_value(*self.step_params["s_tuple"], current_postion, self.T)
if self.step_params["m"]:
self.loss.m = self.compute_decay_value(*self.step_params["m_tuple"], current_postion, self.T)
self.loss.s = self.step_params["s_tuple"][self.step_params["s_list"][epoch]]


# Test.
Expand Down
Loading

0 comments on commit b66f070

Please sign in to comment.