From b569c3b6bcbc57b887378e8e37265b8ee45397f5 Mon Sep 17 00:00:00 2001 From: Razi Taj Mazinani Date: Tue, 26 Nov 2024 22:05:40 +0330 Subject: [PATCH 1/6] Update speech_separation.py --- pyannote/audio/pipelines/speech_separation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyannote/audio/pipelines/speech_separation.py b/pyannote/audio/pipelines/speech_separation.py index 0ffe42a0d..fd330d6d0 100644 --- a/pyannote/audio/pipelines/speech_separation.py +++ b/pyannote/audio/pipelines/speech_separation.py @@ -70,7 +70,7 @@ class SpeechSeparation(SpeakerDiarizationMixin, Pipeline): `segmentation_step` controls the step of this window, provided as a ratio of its duration. Defaults to 0.1 (i.e. 90% overlap between two consecuive windows). embedding : Model, str, or dict, optional - Pretrained embedding model. Defaults to "pyannote/embedding@2022.07". + Pretrained embedding model. Defaults to "speechbrain/spkrec-ecapa-voxceleb@5c0be3875fda05e81f3c004ed8c7c06be308de1e". See pyannote.audio.pipelines.utils.get_model for supported format. embedding_exclude_overlap : bool, optional Exclude overlapping speech regions when extracting embeddings. From 2f1c92e33793a9038213ff1071ea4aee6c5b4e3c Mon Sep 17 00:00:00 2001 From: Razi Taj Mazinani Date: Tue, 26 Nov 2024 22:07:26 +0330 Subject: [PATCH 2/6] Update speaker_diarization.py --- pyannote/audio/pipelines/speaker_diarization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyannote/audio/pipelines/speaker_diarization.py b/pyannote/audio/pipelines/speaker_diarization.py index e0d43e30c..9242b1ab9 100644 --- a/pyannote/audio/pipelines/speaker_diarization.py +++ b/pyannote/audio/pipelines/speaker_diarization.py @@ -69,7 +69,7 @@ class SpeakerDiarization(SpeakerDiarizationMixin, Pipeline): `segmentation_step` controls the step of this window, provided as a ratio of its duration. Defaults to 0.1 (i.e. 90% overlap between two consecuive windows). embedding : Model, str, or dict, optional - Pretrained embedding model. Defaults to "pyannote/embedding@2022.07". + Pretrained embedding model. Defaults to "speechbrain/spkrec-ecapa-voxceleb@5c0be3875fda05e81f3c004ed8c7c06be308de1e". See pyannote.audio.pipelines.utils.get_model for supported format. embedding_exclude_overlap : bool, optional Exclude overlapping speech regions when extracting embeddings. From 0cbfe9716a8a4682ecfec9f1a6317d4ce4b0e833 Mon Sep 17 00:00:00 2001 From: Razi Taj Mazinani Date: Wed, 27 Nov 2024 18:28:13 +0330 Subject: [PATCH 3/6] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fbf63f8b7..5436422c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ Clipping and speaker/source alignment issues in speech separation pipeline have - fix(separation): fix `PixIT` training with manual optimization ([@clement-pages](https://github.com/clement-pages/)) - fix(doc): fix link to pytorch ([@emmanuel-ferdman](https://github.com/emmanuel-ferdman/)) - fix(task): fix corner case with small (<9) number of validation samples ([@antoinelaurent](https://github.com/antoinelaurent/)) +- fix(documentation): Changed the default embedding model from "pyannote/embedding@2022.07" to "speechbrain/spkrec-ecapa-voxcele" in documentations of speech_separation.py and speaker_diarization.py. ## Version 3.3.2 (2024-09-11) From 09ff5e8a43de9c7bf9a9ef0ea90185384f747f49 Mon Sep 17 00:00:00 2001 From: Razi Taj Mazinani Date: Thu, 28 Nov 2024 23:52:51 +0330 Subject: [PATCH 4/6] Update pyannote/audio/pipelines/speaker_diarization.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Hervé BREDIN --- pyannote/audio/pipelines/speaker_diarization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyannote/audio/pipelines/speaker_diarization.py b/pyannote/audio/pipelines/speaker_diarization.py index 9242b1ab9..3548628e4 100644 --- a/pyannote/audio/pipelines/speaker_diarization.py +++ b/pyannote/audio/pipelines/speaker_diarization.py @@ -69,7 +69,7 @@ class SpeakerDiarization(SpeakerDiarizationMixin, Pipeline): `segmentation_step` controls the step of this window, provided as a ratio of its duration. Defaults to 0.1 (i.e. 90% overlap between two consecuive windows). embedding : Model, str, or dict, optional - Pretrained embedding model. Defaults to "speechbrain/spkrec-ecapa-voxceleb@5c0be3875fda05e81f3c004ed8c7c06be308de1e". + Pretrained embedding model. Defaults to "speechbrain/spkrec-ecapa-voxceleb@5c0be38". See pyannote.audio.pipelines.utils.get_model for supported format. embedding_exclude_overlap : bool, optional Exclude overlapping speech regions when extracting embeddings. From 42fa46415512b1bba24cfa8bf8bdd3a6b0d5fa52 Mon Sep 17 00:00:00 2001 From: Razi Taj Mazinani Date: Thu, 28 Nov 2024 23:53:00 +0330 Subject: [PATCH 5/6] Update pyannote/audio/pipelines/speech_separation.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Hervé BREDIN --- pyannote/audio/pipelines/speech_separation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyannote/audio/pipelines/speech_separation.py b/pyannote/audio/pipelines/speech_separation.py index b9cbe46c0..4172601c3 100644 --- a/pyannote/audio/pipelines/speech_separation.py +++ b/pyannote/audio/pipelines/speech_separation.py @@ -70,7 +70,7 @@ class SpeechSeparation(SpeakerDiarizationMixin, Pipeline): `segmentation_step` controls the step of this window, provided as a ratio of its duration. Defaults to 0.1 (i.e. 90% overlap between two consecuive windows). embedding : Model, str, or dict, optional - Pretrained embedding model. Defaults to "speechbrain/spkrec-ecapa-voxceleb@5c0be3875fda05e81f3c004ed8c7c06be308de1e". + Pretrained embedding model. Defaults to "speechbrain/spkrec-ecapa-voxceleb@5c0be38". See pyannote.audio.pipelines.utils.get_model for supported format. embedding_exclude_overlap : bool, optional Exclude overlapping speech regions when extracting embeddings. From fba1543b694c30970f8ce4caa1e748696d23d917 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20BREDIN?= Date: Thu, 28 Nov 2024 22:19:11 +0100 Subject: [PATCH 6/6] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fe6c139b..1a0e40fbb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,7 +47,7 @@ Clipping and speaker/source alignment issues in speech separation pipeline have - fix(separation): fix `PixIT` training with manual optimization ([@clement-pages](https://github.com/clement-pages/)) - fix(doc): fix link to pytorch ([@emmanuel-ferdman](https://github.com/emmanuel-ferdman/)) - fix(task): fix corner case with small (<9) number of validation samples ([@antoinelaurent](https://github.com/antoinelaurent/)) -- fix(documentation): Changed the default embedding model from "pyannote/embedding@2022.07" to "speechbrain/spkrec-ecapa-voxcele" in documentations of speech_separation.py and speaker_diarization.py. +- fix(doc): fix default embedding in `SpeechSeparation` and `SpeakerDiarization` docstring ([@razi-tm](https://github.com/razi-tm/)). ## Version 3.3.2 (2024-09-11)