Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Diffsinger phonemizers: G2p results add langcode by default; Check if phoneme is supported by duration model #1280

Merged
merged 1 commit into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions OpenUtau.Core/Api/G2pDictionary.cs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,15 @@ void BuildTrie(TrieNode node, string grapheme, int index, IEnumerable<string> sy

public Builder Load(string input) {
var data = Core.Yaml.DefaultDeserializer.Deserialize<G2pDictionaryData>(input);
return Load(data);
}

public Builder Load(TextReader textReader) {
var data = Core.Yaml.DefaultDeserializer.Deserialize<G2pDictionaryData>(textReader);
return Load(data);
}

public Builder Load(G2pDictionaryData data){
if (data.symbols != null) {
foreach (var symbolData in data.symbols) {
AddSymbol(symbolData.symbol, symbolData.type);
Expand All @@ -133,17 +142,6 @@ public Builder Load(string input) {
return this;
}

public Builder Load(TextReader textReader) {
var data = Core.Yaml.DefaultDeserializer.Deserialize<G2pDictionaryData>(textReader);
foreach (var symbolData in data.symbols) {
AddSymbol(symbolData.symbol, symbolData.type);
}
foreach (var entry in data.entries) {
AddEntry(entry.grapheme, entry.phonemes);
}
return this;
}

public G2pDictionary Build() {
return new G2pDictionary(root, phonemeSymbols, glideSymbols);
}
Expand Down
10 changes: 5 additions & 5 deletions OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ private bool _executeSetSinger(USinger singer) {
}
this.frameMs = dsConfig.frameMs();
//Load g2p
g2p = LoadG2p(rootPath);
g2p = LoadG2p(rootPath, dsConfig.use_lang_id);
//Load phonemes list
string phonemesPath = Path.Combine(rootPath, dsConfig.phonemes);
phonemeTokens = DiffSingerUtils.LoadPhonemes(phonemesPath);
Expand All @@ -109,7 +109,7 @@ private bool _executeSetSinger(USinger singer) {
return true;
}

protected virtual IG2p LoadG2p(string rootPath) {
protected virtual IG2p LoadG2p(string rootPath, bool useLangId = false) {
//Each phonemizer has a delicated dictionary name, such as dsdict-en.yaml, dsdict-ru.yaml.
//If this dictionary exists, load it.
//If not, load dsdict.yaml.
Expand Down Expand Up @@ -138,13 +138,13 @@ protected virtual IG2p LoadG2p(string rootPath) {
//Check if the phoneme is supported. If unsupported, return an empty string.
//And apply language prefix to phoneme
string ValidatePhoneme(string phoneme){
if(g2p.IsValidSymbol(phoneme)){
if(g2p.IsValidSymbol(phoneme) && phonemeTokens.ContainsKey(phoneme)){
return phoneme;
}
var langCode = GetLangCode();
if(langCode != String.Empty){
var phonemeWithLanguage = langCode + "/" + phoneme;
if(g2p.IsValidSymbol(phonemeWithLanguage)){
if(g2p.IsValidSymbol(phonemeWithLanguage) && phonemeTokens.ContainsKey(phonemeWithLanguage)){
return phonemeWithLanguage;
}
}
Expand Down Expand Up @@ -306,7 +306,7 @@ protected override void ProcessPart(Note[][] phrase) {
var wordFound = new bool[phrase.Length];
foreach (int wordIndex in Enumerable.Range(0, phrase.Length)) {
Note[] word = phrase[wordIndex];
var symbols = GetSymbols(word[0]);
var symbols = GetSymbols(word[0]).Where(s => phonemeTokens.ContainsKey(s)).ToArray();
if (symbols == null || symbols.Length == 0) {
symbols = new string[] { defaultPause };
wordFound[wordIndex] = false;
Expand Down
25 changes: 16 additions & 9 deletions OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerG2pPhonemizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,20 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;

using OpenUtau.Api;

namespace OpenUtau.Core.DiffSinger
{
public class G2pReplacementsData{
class DiffSingerG2pDictionaryData : G2pDictionaryData{
public struct Replacement{
public string from;
public string to;
}
public Replacement[]? replacements;

public static G2pReplacementsData Load(string text){
return OpenUtau.Core.Yaml.DefaultDeserializer.Deserialize<G2pReplacementsData>(text);
}

public Dictionary<string, string> toDict(){
public Dictionary<string, string> replacementsDict(){
var dict = new Dictionary<string, string>();
if(replacements!=null){
foreach(var r in replacements){
Expand All @@ -39,7 +36,7 @@ public abstract class DiffSingerG2pPhonemizer : DiffSingerBasePhonemizer
protected virtual string[] GetBaseG2pVowels()=>new string[]{};
protected virtual string[] GetBaseG2pConsonants()=>new string[]{};

protected override IG2p LoadG2p(string rootPath) {
protected override IG2p LoadG2p(string rootPath, bool useLangId = false) {
//Each phonemizer has a delicated dictionary name, such as dsdict-en.yaml, dsdict-ru.yaml.
//If this dictionary exists, load it.
//If not, load dsdict.yaml.
Expand All @@ -54,8 +51,9 @@ protected override IG2p LoadG2p(string rootPath) {
if (File.Exists(dictionaryPath)) {
try {
string dictText = File.ReadAllText(dictionaryPath);
replacements = G2pReplacementsData.Load(dictText).toDict();
g2pBuilder.Load(dictText);
var dictData = Yaml.DefaultDeserializer.Deserialize<DiffSingerG2pDictionaryData>(dictText);
g2pBuilder.Load(dictData);
replacements = dictData.replacementsDict();
} catch (Exception e) {
Log.Error(e, $"Failed to load {dictionaryPath}");
}
Expand All @@ -79,6 +77,15 @@ protected override IG2p LoadG2p(string rootPath) {
foreach(var c in GetBaseG2pConsonants()){
phonemeSymbols[c]=false;
}
if(useLangId){
//For diffsinger multi dict voicebanks, the replacements of g2p phonemes default to the <langcode>/<phoneme>
var langCode = GetLangCode();
foreach(var ph in GetBaseG2pVowels().Concat(GetBaseG2pConsonants())){
if(!replacements.ContainsKey(ph)){
replacements[ph]=langCode + "/" + ph;
}
}
}
foreach(var from in replacements.Keys){
var to = replacements[from];
if(baseG2p.IsValidSymbol(to)){
Expand Down
Loading