From 299ffcce02a3be745801ac11432e5199e2c2ad0e Mon Sep 17 00:00:00 2001 From: Juan Coria Date: Fri, 28 Jun 2024 23:50:29 +0200 Subject: [PATCH] Apply suggestions from code review --- README.md | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 58e0368c..3a3f3f11 100644 --- a/README.md +++ b/README.md @@ -287,33 +287,30 @@ import diart.operators as dops from diart.sources import MicrophoneAudioSource, FileAudioSource from diart.blocks import SpeakerSegmentation, OverlapAwareSpeakerEmbedding -segmentation = SpeakerSegmentation.from_pretrained("pyannote/segmentation", use_hf_token="") -embedding = OverlapAwareSpeakerEmbedding.from_pretrained("pyannote/embedding", use_hf_token="") +segmentation = SpeakerSegmentation.from_pretrained("pyannote/segmentation") +embedding = OverlapAwareSpeakerEmbedding.from_pretrained("pyannote/embedding") -mic = MicrophoneAudioSource() +source = MicrophoneAudioSource() # To take input from file: -# mic = FileAudioSource("", sample_rate=16000) +# source = FileAudioSource("", sample_rate=16000) stream = mic.stream.pipe( # Reformat stream to 5s duration and 500ms shift - dops.rearrange_audio_stream(sample_rate=16000), + dops.rearrange_audio_stream(sample_rate=source.sample_rate), ops.map(lambda wav: (wav, segmentation(wav))), ops.starmap(embedding) -).subscribe(on_next=lambda emb: print(emb)) #emb.shape to display shape +).subscribe(on_next=lambda emb: print(emb.shape)) -mic.read() +source.read() ``` Output: ``` -# Displaying embeds: -tensor([[[-0.0442, -0.0327, -0.0910, ..., 0.0134, 0.0209, 0.0050], - [-0.0404, -0.0342, -0.0780, ..., 0.0395, 0.0334, -0.0140], - [-0.0404, -0.0342, -0.0780, ..., 0.0395, 0.0334, -0.0140]]]) -tensor([[[-0.0724, 0.0049, -0.0660, ..., 0.0359, 0.0247, -0.0256], - [-0.0462, -0.0256, -0.0642, ..., 0.0417, 0.0273, -0.0135], - [-0.0459, -0.0263, -0.0639, ..., 0.0412, 0.0269, -0.0131]]]) +# Shape is (batch_size, num_speakers, embedding_dim) +torch.Size([1, 3, 512]) +torch.Size([1, 3, 512]) +torch.Size([1, 3, 512]) ... ```