References

Please cite the following if you make use of the code.

@inproceedings{chung2020in,
  title={In defence of metric learning for speaker recognition},
  author={Chung, Joon Son and Huh, Jaesung and Mun, Seongkyu and Lee, Minjae and Heo, Hee Soo and Choe, Soyeon and Ham, Chiheon and Jung, Sunghwan and Lee, Bong-Jin and Han, Icksang},
  booktitle={Interspeech},
  year={2020}
}

This trainer uses many models and loss functions that have been proposed in previous works. The suggested citations are as follows:

Models

VGGVox

@inproceedings{nagrani2017voxceleb,
  title={VoxCeleb: A Large-Scale Speaker Identification Dataset},
  author={Nagrani, Arsha and Chung, Joon Son and Zisserman, Andrew},
  booktitle={Interspeech},
  pages={2616--2620},
  year={2017}
}

ResNet

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
  pages={770--778},
  year={2016}
}

Aggregation

SAP

@inproceedings{bhattacharya2017deep,
  title={Deep Speaker Embeddings for Short-Duration Speaker Verification},
  author={Bhattacharya, Gautam and Alam, Md Jahangir and Kenny, Patrick},
  booktitle={Interspeech},
  pages={1517--1521},
  year={2017}
}

ASP

@inproceedings{okabe2018attentive,
  title={Attentive Statistics Pooling for Deep Speaker Embedding},
  author={Okabe, Koji and Koshinaka, Takafumi and Shinoda, Koichi},
  booktitle={Interspeech},
  pages={2252--2256},
  year={2018}
}

Loss functions

Prototypical Networks

@inproceedings{snell2017prototypical,
  title={Prototypical networks for few-shot learning},
  author={Snell, Jake and Swersky, Kevin and Zemel, Richard},
  booktitle={Advances in Neural Information Processing Systems},
  pages={4077--4087},
  year={2017}
}

GE2E

@inproceedings{wan2018generalized,
  title={Generalized end-to-end loss for speaker verification},
  author={Wan, Li and Wang, Quan and Papir, Alan and Moreno, Ignacio Lopez},
  booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing},
  pages={4879--4883},
  year={2018}
}

Triplet loss

@inproceedings{schroff2015facenet,
  title={Facenet: A unified embedding for face recognition and clustering},
  author={Schroff, Florian and Kalenichenko, Dmitry and Philbin, James},
  booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
  pages={815--823},
  year={2015}
}

AM-Softmax

@inproceedings{wang2018cosface,
  title={Cosface: Large margin cosine loss for deep face recognition},
  author={Wang, Hao and Wang, Yitong and Zhou, Zheng and Ji, Xing and Gong, Dihong and Zhou, Jingchao and Li, Zhifeng and Liu, Wei},
  booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
  pages={5265--5274},
  year={2018}
}

AAM-Softmax

@inproceedings{deng2019arcface,
  title={Arcface: Additive angular margin loss for deep face recognition},
  author={Deng, Jiankang and Guo, Jia and Xue, Niannan and Zafeiriou, Stefanos},
  booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
  pages={4690--4699},
  year={2019}
}

Data augmentation

MUSAN database

@article{snyder2015musan,
  title={Musan: A music, speech, and noise corpus},
  author={Snyder, David and Chen, Guoguo and Povey, Daniel},
  journal={arXiv preprint arXiv:1510.08484},
  year={2015}
}

Room Impulse Response database

@inproceedings{ko2017study,
  title={A study on data augmentation of reverberant speech for robust speech recognition},
  author={Ko, Tom and Peddinti, Vijayaditya and Povey, Daniel and Seltzer, Michael L and Khudanpur, Sanjeev},
  booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing},
  pages={5220--5224},
  year={2017}
}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

References.md

References.md

References

Models

VGGVox

ResNet

Aggregation

SAP

ASP

Loss functions

Prototypical Networks

GE2E

Triplet loss

AM-Softmax

AAM-Softmax

Data augmentation

MUSAN database

Room Impulse Response database

Files

References.md

Latest commit

History

References.md

File metadata and controls

References

Models

VGGVox

ResNet

Aggregation

SAP

ASP

Loss functions

Prototypical Networks

GE2E

Triplet loss

AM-Softmax

AAM-Softmax

Data augmentation

MUSAN database

Room Impulse Response database