diff --git a/DatasetLoader.py b/DatasetLoader.py index 9ec2672..56ec73e 100644 --- a/DatasetLoader.py +++ b/DatasetLoader.py @@ -10,6 +10,7 @@ import time import math import glob +import pathlib import soundfile from scipy import signal from scipy.io import wavfile @@ -50,7 +51,7 @@ def loadWAV(filename, max_frames, evalmode=True, num_eval=10): for asf in startframe: feats.append(audio[int(asf):int(asf)+max_audio]) - feat = numpy.stack(feats,axis=0).astype(numpy.float) + feat = numpy.stack(feats,axis=0).astype(numpy.float64) return feat; @@ -65,19 +66,25 @@ def __init__(self, musan_path, rir_path, max_frames): self.noisesnr = {'noise':[0,15],'speech':[13,20],'music':[5,15]} self.numnoise = {'noise':[1,1], 'speech':[3,7], 'music':[1,1] } + # Something is wrong with this ... noice, speech, and music file names should be assigned here self.noiselist = {} - augment_files = glob.glob(os.path.join(musan_path,'*/*/*/*.wav')); + currWorkDir = os.getcwd() + musanPath = pathlib.Path(currWorkDir).joinpath(musan_path) + augmentFilesPaths = list(musanPath.glob("**/*.wav")) + augment_files = [str(af) for af in augmentFilesPaths] - for file in augment_files: - if not file.split('/')[-4] in self.noiselist: - self.noiselist[file.split('/')[-4]] = [] - self.noiselist[file.split('/')[-4]].append(file) + for file in augmentFilesPaths: + mainParent = str(file.parts[-3]) + if mainParent not in self.noiselist.keys(): + self.noiselist[mainParent] = [] + self.noiselist[mainParent].append(str(file)) - self.rir_files = glob.glob(os.path.join(rir_path,'*/*/*.wav')); + rirPath = pathlib.Path(currWorkDir).joinpath(rir_path) + rirFilesPaths = list(rirPath.glob("**/*.wav")) + self.rir_files = [str(rf) for rf in rirFilesPaths] def additive_noise(self, noisecat, audio): - clean_db = 10 * numpy.log10(numpy.mean(audio ** 2)+1e-4) numnoise = self.numnoise[noisecat] @@ -99,7 +106,7 @@ def reverberate(self, audio): rir_file = random.choice(self.rir_files) rir, fs = soundfile.read(rir_file) - rir = numpy.expand_dims(rir.astype(numpy.float),0) + rir = numpy.expand_dims(rir.astype(numpy.float64),0) rir = rir / numpy.sqrt(numpy.sum(rir**2)) return signal.convolve(audio, rir, mode='full')[:,:self.max_audio] @@ -176,9 +183,17 @@ def __init__(self, test_list, test_path, eval_frames, num_eval, **kwargs): self.test_list = test_list def __getitem__(self, index): - audio = loadWAV(os.path.join(self.test_path,self.test_list[index]), self.max_frames, evalmode=True, num_eval=self.num_eval) + filePath = pathlib.Path(self.test_path).joinpath(self.test_list[index]) + audio = loadWAV(str(filePath), self.max_frames, evalmode=True, num_eval=self.num_eval) return torch.FloatTensor(audio), self.test_list[index] + # def __getitems__(self, indexList): + # sampleList = [] + # for index in indexList: + # sample = self.__getitem__(index) + # sampleList.append(sample) + # return sampleList + def __len__(self): return len(self.test_list) diff --git a/SpeakerNet.py b/SpeakerNet.py index 4e64e7d..ad8d712 100644 --- a/SpeakerNet.py +++ b/SpeakerNet.py @@ -157,17 +157,23 @@ def evaluateFromList(self, test_list, test_path, nDataLoaderThread, distributed, setfiles.sort() ## Define test data loader + # print(f" - {setfiles}") + test_dataset = test_dataset_loader(setfiles, test_path, num_eval=num_eval, **kwargs) if distributed: + print(f" - Evaluating in 'Distributed' mode ... ") sampler = torch.utils.data.distributed.DistributedSampler(test_dataset, shuffle=False) else: + print(f" - Evaluating in 'Serial' mode ... ") sampler = None + print(f" - No. Workers = {nDataLoaderThread}") test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=nDataLoaderThread, drop_last=False, sampler=sampler) ## Extract features for every image - for idx, data in enumerate(test_loader): + print(f" - Extracting features ...") + for idx, data in enumerate(test_dataset): inp1 = data[0][0].cuda() with torch.no_grad(): ref_feat = self.__model__(inp1).detach().cpu() diff --git a/configs/ResNetSE34L_AP.yaml b/configs/ResNetSE34L_AP.yaml index d4b1e6b..3d19181 100644 --- a/configs/ResNetSE34L_AP.yaml +++ b/configs/ResNetSE34L_AP.yaml @@ -4,4 +4,5 @@ encoder_type: SAP trainfunc: angleproto save_path: exps/ResNetSE34L_AP nPerSpeaker: 2 -batch_size: 200 \ No newline at end of file +batch_size: 200 +augment: True diff --git a/trainSpeakerNet.py b/trainSpeakerNet.py index 2219d3c..a49b3bf 100644 --- a/trainSpeakerNet.py +++ b/trainSpeakerNet.py @@ -37,6 +37,7 @@ parser.add_argument('--test_interval', type=int, default=10, help='Test and save every [test_interval] epochs') parser.add_argument('--max_epoch', type=int, default=500, help='Maximum number of epochs') parser.add_argument('--trainfunc', type=str, default="", help='Loss function') +parser.add_argument('--find_unused_parameters', dest='findunusedparams', action='store_true', help='Find unused parameters') ## Optimizer parser.add_argument('--optimizer', type=str, default="adam", help='sgd or adam') @@ -114,6 +115,14 @@ def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu + print(" ================================== ") + print(" Model Configurations") + print(" ================================== ") + for arg in vars(args): + varg = getattr(args, arg) + print(f" - {arg:32s} : {varg}") + print(" ---------------------------------- \n") + ## Load models s = SpeakerNet(**vars(args)) @@ -126,7 +135,7 @@ def main_worker(gpu, ngpus_per_node, args): torch.cuda.set_device(args.gpu) s.cuda(args.gpu) - s = torch.nn.parallel.DistributedDataParallel(s, device_ids=[args.gpu], find_unused_parameters=True) + s = torch.nn.parallel.DistributedDataParallel(s, device_ids=[args.gpu], find_unused_parameters=args.findunusedparams) print('Loaded the model on GPU {:d}'.format(args.gpu)) @@ -145,6 +154,13 @@ def main_worker(gpu, ngpus_per_node, args): train_sampler = train_dataset_sampler(train_dataset, **vars(args)) + + # print(" =========================================") + # print(" Training Data set ") + # print(" =========================================") + # for i, dataLabel in enumerate(train_dataset.data_label): + # print(f" - {dataLabel} --> {train_dataset.data_list[i]}") + train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, @@ -220,7 +236,7 @@ def main_worker(gpu, ngpus_per_node, args): scorefile.write("Epoch {:d}, TEER/TAcc {:2.2f}, TLOSS {:f}, LR {:f} \n".format(it, traineer, loss, max(clr))) if it % args.test_interval == 0: - + print(f" - Testing trained network every {args.test_interval} Epochs ...") sc, lab, _ = trainer.evaluateFromList(**vars(args)) if args.gpu == 0: