fix bug, jit test before synbn (#64)

* fix data processing * add margin_warm, support chunksize to extract xv * add conformer-xv * add large-margin fine-tune * fix bug, jit test before synbn Co-authored-by: YANOrange <[email protected]>
Snowdar · Nov 28, 2022 · 353a662 · 353a662
1 parent 5558452
commit 353a662
Show file tree

Hide file tree

Showing 11 changed files with 44 additions and 159 deletions.
diff --git a/doc/papers/conformer.md b/doc/papers/conformer.md
@@ -2,12 +2,13 @@
 
 #### Baseline ASV conformers are conducted on VoxCeleb and CNCeleb.
 * VoxCeleb: `subtools/recipe/voxcelebSRC/runVoxcelebSRC_online.sh`
-* CNCeleb: To be released.
+* CNCeleb: To be released. see  
+[Towards A Unified Conformer Structure: from ASR to ASV Task](https://arxiv.org/abs/2211.07201)
 
 #### ASR transferring is conducted on CNCeleb
 </br>
 <div align='center'>
-<img src="./trans.jpg" width=40% height=100 align=center />
+<img src="./trans.jpg" width=40% height=40%  align=center />
 </div>  
 </br>
 * The pretrained ASR encoder can be either an open source pretrained model or trained from scratch.

diff --git a/pytorch/launcher/runEcapaXvector_online.py b/pytorch/launcher/runEcapaXvector_online.py
@@ -397,11 +397,6 @@
     if hasattr(model,'margin_warm'):
         model.margin_warm.update_step_range(epoch_iters)
 
-    # If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important
-    # to make peformance stable.
-    # It will change nothing for single-GPU training.
-    model = utils.convert_synchronized_batchnorm(model)
-
     if utils.is_main_training():
         print(model)
         p1=sum(p.numel() for p in model.parameters())
@@ -414,7 +409,12 @@
         logger.info("The number of steps per epoch is about {}.".format(epoch_iters))           
         logger.info("Define optimizer and lr_scheduler.")
         del script_model
-
+
+    # If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important
+    # to make peformance stable.
+    # It will change nothing for single-GPU training.
+    model = utils.convert_synchronized_batchnorm(model)
+
     optimizer = optim.get_optimizer(model, optimizer_params)
     lr_scheduler = learn_rate_scheduler.LRSchedulerWrapper(
         optimizer, lr_scheduler_params)

diff --git a/pytorch/launcher/runRepvggXvector.py b/pytorch/launcher/runRepvggXvector.py
@@ -400,11 +400,6 @@
     # Give your model class name here w.r.t the model.py.
     model = model_py.RepVggXvector(info["feat_dim"], info["num_targets"], **model_params)
 
-    # If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important 
-    # to make peformance stable. 
-    # It will change nothing for single-GPU training.
-    model = utils.convert_synchronized_batchnorm(model)
-
     epoch_iters = (info['epoch_iters']//accum_grad)
     if hasattr(model,'margin_warm'):
         model.margin_warm.update_step_range(epoch_iters)
@@ -421,6 +416,11 @@
         logger.info("The number of steps per epoch is about {}.".format(epoch_iters))           
         logger.info("Define optimizer and lr_scheduler.")
         del script_model
+
+    # If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important 
+    # to make peformance stable. 
+    # It will change nothing for single-GPU training.
+    model = utils.convert_synchronized_batchnorm(model)
     optimizer = optim.get_optimizer(model, optimizer_params)
     lr_scheduler = learn_rate_scheduler.LRSchedulerWrapper(optimizer, lr_scheduler_params)
 

diff --git a/pytorch/launcher/runResnetXvector_online.py b/pytorch/launcher/runResnetXvector_online.py
@@ -405,10 +405,6 @@
     # Give your model class name here w.r.t the model.py.
     model = model_py.ResNetXvector(info["feat_dim"], info["num_targets"], **model_params)
 
-    # If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important 
-    # to make peformance stable. 
-    # It will change nothing for single-GPU training.
-    model = utils.convert_synchronized_batchnorm(model)
 
     epoch_iters = (info['epoch_iters']//accum_grad)
     if hasattr(model,'margin_warm'):
@@ -426,6 +422,11 @@
         logger.info("The number of steps per epoch is about {}.".format(epoch_iters))        
         logger.info("Define optimizer and lr_scheduler.")
         del script_model
+
+    # If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important 
+    # to make peformance stable. 
+    # It will change nothing for single-GPU training.
+    model = utils.convert_synchronized_batchnorm(model)
     optimizer = optim.get_optimizer(model, optimizer_params)
     lr_scheduler = learn_rate_scheduler.LRSchedulerWrapper(optimizer, lr_scheduler_params)
 

diff --git a/pytorch/launcher/runTransformerXvector.py b/pytorch/launcher/runTransformerXvector.py
@@ -431,11 +431,6 @@
     model = model_py.TransformerXvector(
         info["feat_dim"], info["num_targets"], **model_params)
 
-
-    # If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important
-    # to make peformance stable.
-    # It will change nothing for single-GPU training.
-    model = utils.convert_synchronized_batchnorm(model)
     # print(model)
     epoch_iters = (info['epoch_iters']//accum_grad)
     if hasattr(model,'margin_warm'):
@@ -454,7 +449,11 @@
         logger.info("The number of steps per epoch is about {}.".format(epoch_iters))
         logger.info("Define optimizer and lr_scheduler.")
         del script_model
-
+
+    # If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important
+    # to make peformance stable.
+    # It will change nothing for single-GPU training.
+    model = utils.convert_synchronized_batchnorm(model)
     optimizer = optim.get_optimizer(model, optimizer_params)
     lr_scheduler = learn_rate_scheduler.LRSchedulerWrapper(
         optimizer, lr_scheduler_params)

diff --git a/pytorch/launcher/runTransformerXvector_LM.py b/pytorch/launcher/runTransformerXvector_LM.py
@@ -381,11 +381,6 @@
     model = model_py.TransformerXvector(
         info["feat_dim"], info["num_targets"], **model_params)
 
-
-    # If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important
-    # to make peformance stable.
-    # It will change nothing for single-GPU training.
-    model = utils.convert_synchronized_batchnorm(model)
     # print(model)
     epoch_iters = (info['epoch_iters']//accum_grad)
     if hasattr(model,'margin_warm'):
@@ -405,6 +400,10 @@
         logger.info("Define optimizer and lr_scheduler.")
         del script_model
 
+    # If multi-GPU used, then batchnorm will be converted to synchronized batchnorm, which is important
+    # to make peformance stable.
+    # It will change nothing for single-GPU training.
+    model = utils.convert_synchronized_batchnorm(model)
     optimizer = optim.get_optimizer(model, optimizer_params)
     lr_scheduler = learn_rate_scheduler.LRSchedulerWrapper(
         optimizer, lr_scheduler_params)

diff --git a/pytorch/libs/egs/speech_augment.py b/pytorch/libs/egs/speech_augment.py
@@ -342,6 +342,10 @@ def _load_noise(self, lengths, max_length):
             left_padding = torch.randint(high = pad+1, size=(1,))[0]
             padding = (left_padding,pad-left_padding)
             noise_batch = torch.nn.functional.pad(noise_batch, padding)
+            # pad = max_length - noise_batch.size(1)
+            # left_padding = torch.randint(high = pad+1, size=(1,))[0]
+            # padding = (left_padding,pad-left_padding)
+            # noise_batch = torch.nn.functional.pad(noise_batch, padding)
 
         # Select a random starting location in the waveform
         start_index = self.start_index
@@ -714,6 +718,10 @@ def _load_noise(self, lengths, max_length):
             left_padding = torch.randint(high = pad+1, size=(1,))[0]
             padding = (left_padding,pad-left_padding)
             noise_batch = torch.nn.functional.pad(noise_batch, padding)
+            # pad = max_length - noise_batch.size(1)
+            # left_padding = torch.randint(high = pad+1, size=(1,))[0]
+            # padding = (left_padding,pad-left_padding)
+            # noise_batch = torch.nn.functional.pad(noise_batch, padding)
 
         # Select a random starting location in the waveform
         start_index = self.start_index

diff --git a/pytorch/libs/nnet/transformer/TransformerEncoder.py b/pytorch/libs/nnet/transformer/TransformerEncoder.py
diff --git a/pytorch/libs/nnet/transformer/repeat.py b/pytorch/libs/nnet/transformer/repeat.py
diff --git a/pytorch/libs/training/trainer_online.py b/pytorch/libs/training/trainer_online.py
@@ -476,8 +476,10 @@ def run(self):
 
                     for _, batch in enumerate(data.train_loader, 0):
                         # It is important for reporter.
-                        dist.barrier()
-                        if utils.use_ddp():dist.all_reduce(stop_training,op=dist.ReduceOp.SUM) 
+
+                        if utils.use_ddp():
+                            dist.barrier()
+                            dist.all_reduce(stop_training,op=dist.ReduceOp.SUM) 
                         if stop_training: 
                             break
                         self.training_point[1] +=1

diff --git a/pytorch/libs/training/trainer_online_sam.py b/pytorch/libs/training/trainer_online_sam.py
@@ -532,8 +532,10 @@ def run(self):
 
                     for _, batch in enumerate(data.train_loader, 0):
                         # It is important for reporter.
-                        dist.barrier()
-                        if utils.use_ddp():dist.all_reduce(stop_training,op=dist.ReduceOp.SUM) 
+
+                        if utils.use_ddp():
+                            dist.barrier()
+                            dist.all_reduce(stop_training,op=dist.ReduceOp.SUM) 
                         if stop_training: 
                             break
                         self.training_point[1] +=1