From 6f5d3bdaabdb8fadf78aadc1ab29edc4fd920cc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20BREDIN?= Date: Tue, 27 Jun 2023 16:49:08 +0200 Subject: [PATCH 1/2] wip: re-assign training embedding to their original cluster --- pyannote/audio/pipelines/clustering.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pyannote/audio/pipelines/clustering.py b/pyannote/audio/pipelines/clustering.py index 3c2786232..095624d54 100644 --- a/pyannote/audio/pipelines/clustering.py +++ b/pyannote/audio/pipelines/clustering.py @@ -189,9 +189,12 @@ def assign_embeddings( hard_clusters = self.constrained_argmax(soft_clusters) else: hard_clusters = np.argmax(soft_clusters, axis=2) - - # TODO: add a flag to revert argmax for trainign subset - # hard_clusters[train_chunk_idx, train_speaker_idx] = train_clusters + # revert to actual cluster for training subset + reassigned_ratio = np.mean( + hard_clusters[train_chunk_idx, train_speaker_idx] != train_clusters + ) + print("Reassigned ratio:", reassigned_ratio, flush=True) + hard_clusters[train_chunk_idx, train_speaker_idx] = train_clusters return hard_clusters, soft_clusters, centroids From 16e562841d776b604bd8d24835c463b276f294e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20BREDIN?= Date: Wed, 28 Jun 2023 09:06:00 +0200 Subject: [PATCH 2/2] fix: revert change and add note for future self --- pyannote/audio/pipelines/clustering.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pyannote/audio/pipelines/clustering.py b/pyannote/audio/pipelines/clustering.py index 095624d54..a779016cb 100644 --- a/pyannote/audio/pipelines/clustering.py +++ b/pyannote/audio/pipelines/clustering.py @@ -189,12 +189,10 @@ def assign_embeddings( hard_clusters = self.constrained_argmax(soft_clusters) else: hard_clusters = np.argmax(soft_clusters, axis=2) - # revert to actual cluster for training subset - reassigned_ratio = np.mean( - hard_clusters[train_chunk_idx, train_speaker_idx] != train_clusters - ) - print("Reassigned ratio:", reassigned_ratio, flush=True) - hard_clusters[train_chunk_idx, train_speaker_idx] = train_clusters + + # NOTE: train_embeddings might be reassigned to a different cluster + # in the process. based on experiments, this seems to lead to better + # results than sticking to the original assignment. return hard_clusters, soft_clusters, centroids