From f5ba74b3e9404fbda9dcd7950a5e15990e48f447 Mon Sep 17 00:00:00 2001 From: Flying Fish <525942103@qq.com> Date: Fri, 16 Sep 2022 18:22:54 +0800 Subject: [PATCH] Update README.md --- README.md | 93 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 47 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 3d508bc..a65f233 100644 --- a/README.md +++ b/README.md @@ -6,77 +6,78 @@ https://github.com/mozilla/TTS/tree/master/TTS/speaker_encoder pip install coqpit -# download model,or get it at **release** +# download model, https://github.com/mozilla/TTS/wiki/Released-Models Speaker-Encoder by @mueller91 LibriTTS + VCTK + VoxCeleb + CommonVoice -# please read the config https://drive.google.com/drive/folders/15oeBYf6Qn1edONkVLXe82MzdIi3O_9m3 +Or get it at release **saved_models.zip** + # use python vi_speaker_single.py ./saved_models/best_model.pth.tar ./saved_models/config.json -s TEST.wav -t TEST.npy # batch use python vi_speaker_batch.py ./saved_models/best_model.pth.tar ./saved_models/config.json ./data/waves ./speaker_embedding -data/ -└── waves + data/ + └── waves + ├── spk1 + │   ├── 000002.wav + │   ├── 000006.wav + │   └── 000038.wav + └── spk2 + ├── 000040.wav + ├── 000044.wav + └── 000077.wav + + speaker_embedding/ ├── spk1 - │   ├── 000002.wav - │   ├── 000006.wav - │   └── 000038.wav + │   ├── 000002.npy + │   ├── 000006.npy + │   └── 000038.npy └── spk2 - ├── 000040.wav - ├── 000044.wav - └── 000077.wav - -speaker_embedding/ -├── spk1 -│   ├── 000002.npy -│   ├── 000006.npy -│   └── 000038.npy -└── spk2 - ├── 000040.npy - ├── 000044.npy - └── 000077.npy + ├── 000040.npy + ├── 000044.npy + └── 000077.npy # compute speaker center input path = speaker_embedding, output path = speaker_embedding_center python vi_speaker_center.py -speaker_embedding_center/ -├── spk1.npy -└── spk2.npy + speaker_embedding_center/ + ├── spk1.npy + └── spk2.npy # for VI-SVC mv speaker_embedding_center data/spkid -data/ -├── waves -│   ├── 10001 -│   ├── 20400 -│   │   ├── 20400_001.wav -│   │   ├── 20456_019.wav -│   │   -├── phone -│   ├── 10001 -│   ├── 20400 -│   │   ├── 20400_001.npy -│   │   ├── 20456_019.npy -│   │   -├── lable -│   ├── 10001 -│   ├── 20400 -│   │   ├── 20400_001.npy -│   │   ├── 20456_019.npy -│   │   -├── spkid -│   ├── 10001.npy -│   ├── 20400.npy -│   │   + data/ + ├── waves + │   ├── 10001 + │   ├── 20400 + │   │   ├── 20400_001.wav + │   │   ├── 20456_019.wav + │   │   + ├── phone + │   ├── 10001 + │   ├── 20400 + │   │   ├── 20400_001.npy + │   │   ├── 20456_019.npy + │   │   + ├── lable + │   ├── 10001 + │   ├── 20400 + │   │   ├── 20400_001.npy + │   │   ├── 20456_019.npy + │   │   + ├── spkid + │   ├── 10001.npy + │   ├── 20400.npy + │   │