From 1ee2be0965d3a9b94b71244047fd11888b6111aa Mon Sep 17 00:00:00 2001 From: oxygen-dioxide <54425948+oxygen-dioxide@users.noreply.github.com> Date: Tue, 10 Sep 2024 16:04:10 +0800 Subject: [PATCH] diffsinger phonemizers: Check if phoneme is supported by voicebanks --- OpenUtau.Core/Api/G2pDictionary.cs | 20 +++++++-------- .../DiffSinger/DiffSingerBasePhonemizer.cs | 10 ++++---- .../Phonemizers/DiffSingerG2pPhonemizer.cs | 25 ++++++++++++------- 3 files changed, 30 insertions(+), 25 deletions(-) diff --git a/OpenUtau.Core/Api/G2pDictionary.cs b/OpenUtau.Core/Api/G2pDictionary.cs index 8ff0b8aae..fd9f89592 100644 --- a/OpenUtau.Core/Api/G2pDictionary.cs +++ b/OpenUtau.Core/Api/G2pDictionary.cs @@ -120,6 +120,15 @@ void BuildTrie(TrieNode node, string grapheme, int index, IEnumerable sy public Builder Load(string input) { var data = Core.Yaml.DefaultDeserializer.Deserialize(input); + return Load(data); + } + + public Builder Load(TextReader textReader) { + var data = Core.Yaml.DefaultDeserializer.Deserialize(textReader); + return Load(data); + } + + public Builder Load(G2pDictionaryData data){ if (data.symbols != null) { foreach (var symbolData in data.symbols) { AddSymbol(symbolData.symbol, symbolData.type); @@ -133,17 +142,6 @@ public Builder Load(string input) { return this; } - public Builder Load(TextReader textReader) { - var data = Core.Yaml.DefaultDeserializer.Deserialize(textReader); - foreach (var symbolData in data.symbols) { - AddSymbol(symbolData.symbol, symbolData.type); - } - foreach (var entry in data.entries) { - AddEntry(entry.grapheme, entry.phonemes); - } - return this; - } - public G2pDictionary Build() { return new G2pDictionary(root, phonemeSymbols, glideSymbols); } diff --git a/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs b/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs index 132212db4..644f3d0b0 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs @@ -83,7 +83,7 @@ private bool _executeSetSinger(USinger singer) { } this.frameMs = dsConfig.frameMs(); //Load g2p - g2p = LoadG2p(rootPath); + g2p = LoadG2p(rootPath, dsConfig.use_lang_id); //Load phonemes list string phonemesPath = Path.Combine(rootPath, dsConfig.phonemes); phonemeTokens = DiffSingerUtils.LoadPhonemes(phonemesPath); @@ -109,7 +109,7 @@ private bool _executeSetSinger(USinger singer) { return true; } - protected virtual IG2p LoadG2p(string rootPath) { + protected virtual IG2p LoadG2p(string rootPath, bool useLangId = false) { //Each phonemizer has a delicated dictionary name, such as dsdict-en.yaml, dsdict-ru.yaml. //If this dictionary exists, load it. //If not, load dsdict.yaml. @@ -138,13 +138,13 @@ protected virtual IG2p LoadG2p(string rootPath) { //Check if the phoneme is supported. If unsupported, return an empty string. //And apply language prefix to phoneme string ValidatePhoneme(string phoneme){ - if(g2p.IsValidSymbol(phoneme)){ + if(g2p.IsValidSymbol(phoneme) && phonemeTokens.ContainsKey(phoneme)){ return phoneme; } var langCode = GetLangCode(); if(langCode != String.Empty){ var phonemeWithLanguage = langCode + "/" + phoneme; - if(g2p.IsValidSymbol(phonemeWithLanguage)){ + if(g2p.IsValidSymbol(phonemeWithLanguage) && phonemeTokens.ContainsKey(phonemeWithLanguage)){ return phonemeWithLanguage; } } @@ -306,7 +306,7 @@ protected override void ProcessPart(Note[][] phrase) { var wordFound = new bool[phrase.Length]; foreach (int wordIndex in Enumerable.Range(0, phrase.Length)) { Note[] word = phrase[wordIndex]; - var symbols = GetSymbols(word[0]); + var symbols = GetSymbols(word[0]).Where(s => phonemeTokens.ContainsKey(s)).ToArray(); if (symbols == null || symbols.Length == 0) { symbols = new string[] { defaultPause }; wordFound[wordIndex] = false; diff --git a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerG2pPhonemizer.cs b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerG2pPhonemizer.cs index e3265ef18..c46c8ddd3 100644 --- a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerG2pPhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerG2pPhonemizer.cs @@ -2,23 +2,20 @@ using System; using System.Collections.Generic; using System.IO; +using System.Linq; using OpenUtau.Api; namespace OpenUtau.Core.DiffSinger { - public class G2pReplacementsData{ + class DiffSingerG2pDictionaryData : G2pDictionaryData{ public struct Replacement{ public string from; public string to; } public Replacement[]? replacements; - - public static G2pReplacementsData Load(string text){ - return OpenUtau.Core.Yaml.DefaultDeserializer.Deserialize(text); - } - public Dictionary toDict(){ + public Dictionary replacementsDict(){ var dict = new Dictionary(); if(replacements!=null){ foreach(var r in replacements){ @@ -39,7 +36,7 @@ public abstract class DiffSingerG2pPhonemizer : DiffSingerBasePhonemizer protected virtual string[] GetBaseG2pVowels()=>new string[]{}; protected virtual string[] GetBaseG2pConsonants()=>new string[]{}; - protected override IG2p LoadG2p(string rootPath) { + protected override IG2p LoadG2p(string rootPath, bool useLangId = false) { //Each phonemizer has a delicated dictionary name, such as dsdict-en.yaml, dsdict-ru.yaml. //If this dictionary exists, load it. //If not, load dsdict.yaml. @@ -54,8 +51,9 @@ protected override IG2p LoadG2p(string rootPath) { if (File.Exists(dictionaryPath)) { try { string dictText = File.ReadAllText(dictionaryPath); - replacements = G2pReplacementsData.Load(dictText).toDict(); - g2pBuilder.Load(dictText); + var dictData = Yaml.DefaultDeserializer.Deserialize(dictText); + g2pBuilder.Load(dictData); + replacements = dictData.replacementsDict(); } catch (Exception e) { Log.Error(e, $"Failed to load {dictionaryPath}"); } @@ -79,6 +77,15 @@ protected override IG2p LoadG2p(string rootPath) { foreach(var c in GetBaseG2pConsonants()){ phonemeSymbols[c]=false; } + if(useLangId){ + //For diffsinger multi dict voicebanks, the replacements of g2p phonemes default to the / + var langCode = GetLangCode(); + foreach(var ph in GetBaseG2pVowels().Concat(GetBaseG2pConsonants())){ + if(!replacements.ContainsKey(ph)){ + replacements[ph]=langCode + "/" + ph; + } + } + } foreach(var from in replacements.Keys){ var to = replacements[from]; if(baseG2p.IsValidSymbol(to)){