Skip to content

Commit

Permalink
fix bug, jit test before synbn (#64)
Browse files Browse the repository at this point in the history
* fix data processing

* add margin_warm, support chunksize to extract xv

* add conformer-xv

* add large-margin fine-tune

* fix bug, jit test before synbn

Co-authored-by: YANOrange <[email protected]>
  • Loading branch information
wangers and YanOrange1 authored Nov 28, 2022
1 parent 5558452 commit 353a662
Show file tree
Hide file tree
Showing 11 changed files with 44 additions and 159 deletions.
5 changes: 3 additions & 2 deletions doc/papers/conformer.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@

#### Baseline ASV conformers are conducted on VoxCeleb and CNCeleb.
* VoxCeleb: `subtools/recipe/voxcelebSRC/runVoxcelebSRC_online.sh`
* CNCeleb: To be released.
* CNCeleb: To be released. see
[Towards A Unified Conformer Structure: from ASR to ASV Task](https://arxiv.org/abs/2211.07201)

#### ASR transferring is conducted on CNCeleb
</br>
<div align='center'>
<img src="./trans.jpg" width=40% height=100 align=center />
<img src="./trans.jpg" width=40% height=40% align=center />
</div>
</br>
* The pretrained ASR encoder can be either an open source pretrained model or trained from scratch.
Expand Down
12 changes: 6 additions & 6 deletions pytorch/launcher/runEcapaXvector_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,11 +397,6 @@
if hasattr(model,'margin_warm'):
model.margin_warm.update_step_range(epoch_iters)

# If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important
# to make peformance stable.
# It will change nothing for single-GPU training.
model = utils.convert_synchronized_batchnorm(model)

if utils.is_main_training():
print(model)
p1=sum(p.numel() for p in model.parameters())
Expand All @@ -414,7 +409,12 @@
logger.info("The number of steps per epoch is about {}.".format(epoch_iters))
logger.info("Define optimizer and lr_scheduler.")
del script_model


# If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important
# to make peformance stable.
# It will change nothing for single-GPU training.
model = utils.convert_synchronized_batchnorm(model)

optimizer = optim.get_optimizer(model, optimizer_params)
lr_scheduler = learn_rate_scheduler.LRSchedulerWrapper(
optimizer, lr_scheduler_params)
Expand Down
10 changes: 5 additions & 5 deletions pytorch/launcher/runRepvggXvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,11 +400,6 @@
# Give your model class name here w.r.t the model.py.
model = model_py.RepVggXvector(info["feat_dim"], info["num_targets"], **model_params)

# If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important
# to make peformance stable.
# It will change nothing for single-GPU training.
model = utils.convert_synchronized_batchnorm(model)

epoch_iters = (info['epoch_iters']//accum_grad)
if hasattr(model,'margin_warm'):
model.margin_warm.update_step_range(epoch_iters)
Expand All @@ -421,6 +416,11 @@
logger.info("The number of steps per epoch is about {}.".format(epoch_iters))
logger.info("Define optimizer and lr_scheduler.")
del script_model

# If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important
# to make peformance stable.
# It will change nothing for single-GPU training.
model = utils.convert_synchronized_batchnorm(model)
optimizer = optim.get_optimizer(model, optimizer_params)
lr_scheduler = learn_rate_scheduler.LRSchedulerWrapper(optimizer, lr_scheduler_params)

Expand Down
9 changes: 5 additions & 4 deletions pytorch/launcher/runResnetXvector_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,10 +405,6 @@
# Give your model class name here w.r.t the model.py.
model = model_py.ResNetXvector(info["feat_dim"], info["num_targets"], **model_params)

# If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important
# to make peformance stable.
# It will change nothing for single-GPU training.
model = utils.convert_synchronized_batchnorm(model)

epoch_iters = (info['epoch_iters']//accum_grad)
if hasattr(model,'margin_warm'):
Expand All @@ -426,6 +422,11 @@
logger.info("The number of steps per epoch is about {}.".format(epoch_iters))
logger.info("Define optimizer and lr_scheduler.")
del script_model

# If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important
# to make peformance stable.
# It will change nothing for single-GPU training.
model = utils.convert_synchronized_batchnorm(model)
optimizer = optim.get_optimizer(model, optimizer_params)
lr_scheduler = learn_rate_scheduler.LRSchedulerWrapper(optimizer, lr_scheduler_params)

Expand Down
11 changes: 5 additions & 6 deletions pytorch/launcher/runTransformerXvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,11 +431,6 @@
model = model_py.TransformerXvector(
info["feat_dim"], info["num_targets"], **model_params)


# If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important
# to make peformance stable.
# It will change nothing for single-GPU training.
model = utils.convert_synchronized_batchnorm(model)
# print(model)
epoch_iters = (info['epoch_iters']//accum_grad)
if hasattr(model,'margin_warm'):
Expand All @@ -454,7 +449,11 @@
logger.info("The number of steps per epoch is about {}.".format(epoch_iters))
logger.info("Define optimizer and lr_scheduler.")
del script_model


# If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important
# to make peformance stable.
# It will change nothing for single-GPU training.
model = utils.convert_synchronized_batchnorm(model)
optimizer = optim.get_optimizer(model, optimizer_params)
lr_scheduler = learn_rate_scheduler.LRSchedulerWrapper(
optimizer, lr_scheduler_params)
Expand Down
9 changes: 4 additions & 5 deletions pytorch/launcher/runTransformerXvector_LM.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,11 +381,6 @@
model = model_py.TransformerXvector(
info["feat_dim"], info["num_targets"], **model_params)


# If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important
# to make peformance stable.
# It will change nothing for single-GPU training.
model = utils.convert_synchronized_batchnorm(model)
# print(model)
epoch_iters = (info['epoch_iters']//accum_grad)
if hasattr(model,'margin_warm'):
Expand All @@ -405,6 +400,10 @@
logger.info("Define optimizer and lr_scheduler.")
del script_model

# If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important
# to make peformance stable.
# It will change nothing for single-GPU training.
model = utils.convert_synchronized_batchnorm(model)
optimizer = optim.get_optimizer(model, optimizer_params)
lr_scheduler = learn_rate_scheduler.LRSchedulerWrapper(
optimizer, lr_scheduler_params)
Expand Down
8 changes: 8 additions & 0 deletions pytorch/libs/egs/speech_augment.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,10 @@ def _load_noise(self, lengths, max_length):
left_padding = torch.randint(high = pad+1, size=(1,))[0]
padding = (left_padding,pad-left_padding)
noise_batch = torch.nn.functional.pad(noise_batch, padding)
# pad = max_length - noise_batch.size(1)
# left_padding = torch.randint(high = pad+1, size=(1,))[0]
# padding = (left_padding,pad-left_padding)
# noise_batch = torch.nn.functional.pad(noise_batch, padding)

# Select a random starting location in the waveform
start_index = self.start_index
Expand Down Expand Up @@ -714,6 +718,10 @@ def _load_noise(self, lengths, max_length):
left_padding = torch.randint(high = pad+1, size=(1,))[0]
padding = (left_padding,pad-left_padding)
noise_batch = torch.nn.functional.pad(noise_batch, padding)
# pad = max_length - noise_batch.size(1)
# left_padding = torch.randint(high = pad+1, size=(1,))[0]
# padding = (left_padding,pad-left_padding)
# noise_batch = torch.nn.functional.pad(noise_batch, padding)

# Select a random starting location in the waveform
start_index = self.start_index
Expand Down
102 changes: 0 additions & 102 deletions pytorch/libs/nnet/transformer/TransformerEncoder.py

This file was deleted.

25 changes: 0 additions & 25 deletions pytorch/libs/nnet/transformer/repeat.py

This file was deleted.

6 changes: 4 additions & 2 deletions pytorch/libs/training/trainer_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,8 +476,10 @@ def run(self):

for _, batch in enumerate(data.train_loader, 0):
# It is important for reporter.
dist.barrier()
if utils.use_ddp():dist.all_reduce(stop_training,op=dist.ReduceOp.SUM)

if utils.use_ddp():
dist.barrier()
dist.all_reduce(stop_training,op=dist.ReduceOp.SUM)
if stop_training:
break
self.training_point[1] +=1
Expand Down
6 changes: 4 additions & 2 deletions pytorch/libs/training/trainer_online_sam.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,8 +532,10 @@ def run(self):

for _, batch in enumerate(data.train_loader, 0):
# It is important for reporter.
dist.barrier()
if utils.use_ddp():dist.all_reduce(stop_training,op=dist.ReduceOp.SUM)

if utils.use_ddp():
dist.barrier()
dist.all_reduce(stop_training,op=dist.ReduceOp.SUM)
if stop_training:
break
self.training_point[1] +=1
Expand Down

0 comments on commit 353a662

Please sign in to comment.