Skip to content

Commit

Permalink
Merge pull request #1147 from lottev1991/JaMonoG2p
Browse files Browse the repository at this point in the history
Add Japanese monophone G2P (tailored to AI voicebanks/phonemizers) + add support to Diffsinger Japanese Phonemizer
  • Loading branch information
stakira authored Jun 9, 2024
2 parents 304faa8 + 3c63ca7 commit a265891
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 10 deletions.
Original file line number Diff line number Diff line change
@@ -1,10 +1,40 @@
using System;
using System.Collections.Generic;

using System.Linq;
using OpenUtau.Api;
using OpenUtau.Core.G2p;

namespace OpenUtau.Core.DiffSinger {
[Phonemizer("DiffSinger Japanese Phonemizer", "DIFFS JA", language: "JA")]
public class DiffSingerJapanesePhonemizer : DiffSingerBasePhonemizer {
public class DiffSingerJapanesePhonemizer : DiffSingerG2pPhonemizer {
protected override string GetDictionaryName()=>"dsdict-ja.yaml";
protected override IG2p LoadBaseG2p() => new JapaneseMonophoneG2p();
protected override string[] GetBaseG2pVowels() => new string[] {
"A", "AP", "E", "I", "N", "O", "SP", "U",
"a", "e", "i", "o", "u"
};

protected override string[] GetBaseG2pConsonants() => new string[] {
"b", "by", "ch", "cl", "d", "dy", "f", "g", "gw", "gy", "h", "hy",
"j", "k", "kw", "ky", "m", "my", "n", "ng", "ngy", "ny", "p", "py",
"r", "ry", "s", "sh", "t", "ts", "ty", "v", "w", "y", "z"
};

public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevs) {
if (notes[0].lyric == "-") {
return MakeSimpleResult("SP");
}
if (!partResult.TryGetValue(notes[0].position, out var phonemes)) {
throw new Exception("Part result not found");
}
return new Result {
phonemes = phonemes
.Select((tu) => new Phoneme() {
phoneme = tu.Item1,
position = tu.Item2,
})
.ToArray(),
};
}
}
}
26 changes: 18 additions & 8 deletions OpenUtau.Core/G2p/Data/Resources.Designer.cs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions OpenUtau.Core/G2p/Data/Resources.resx
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@
<data name="g2p-it" type="System.Resources.ResXFileRef, System.Windows.Forms">
<value>g2p-it.zip;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</data>
<data name="g2p-ja-mono" type="System.Resources.ResXFileRef, System.Windows.Forms">
<value>g2p-ja-mono.zip;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</data>
<data name="g2p-jyutping" type="System.Resources.ResXFileRef, System.Windows.Forms">
<value>g2p-jyutping.zip;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</data>
Expand Down
Binary file added OpenUtau.Core/G2p/Data/g2p-ja-mono.zip
Binary file not shown.
126 changes: 126 additions & 0 deletions OpenUtau.Core/G2p/JapaneseMonophoneG2p.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.OnnxRuntime;
using OpenUtau.Api;
using OpenUtau.Core.Util;

namespace OpenUtau.Core.G2p {
public class JapaneseMonophoneG2p : G2pPack {
private static readonly string[] graphemes = new string[] {
"", "", "", "", "a", "b", "c", "d", "e", "f", "g",
"h", "i", "j", "k", "m", "n", "o", "p", "r", "s",
"t", "u", "v", "w", "y", "z", "あ", "い", "う", "え",
"お", "ぁ", "ぃ", "ぅ", "ぇ", "ぉ", "か", "き", "く",
"け", "こ", "さ", "し", "す", "せ", "そ", "ざ", "じ", "ず",
"ぜ", "ぞ", "た", "ち", "つ", "て", "と", "だ", "ぢ", "づ", "で",
"ど", "な", "に", "ぬ", "ね", "の", "は", "ひ", "ふ", "へ", "ほ",
"ば", "び", "ぶ", "べ", "ぼ", "ぱ", "ぴ", "ぷ", "ぺ", "ぽ", "ま",
"み", "む", "め", "も", "や", "ゆ", "よ", "ゃ", "ゅ", "ょ", "ら",
"り", "る", "れ", "ろ", "わ", "を", "ん", "っ", "ヴ", "ゔ","゜",
"ゐ", "ゑ", "ア", "イ", "ウ", "エ", "オ", "ァ", "ィ", "ゥ", "ェ",
"ォ", "カ", "キ", "ク", "ケ", "コ", "サ", "シ", "ス", "セ", "ソ",
"ザ", "ジ", "ズ", "ゼ", "ゾ", "タ", "チ", "ツ", "テ", "ト", "ダ",
"ヂ", "ヅ", "デ", "ド", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", "ヒ",
"フ", "ヘ", "ホ", "バ", "ビ", "ブ", "ベ", "ボ", "パ", "ピ", "プ",
"ペ", "ポ", "マ", "ミ", "ム", "メ", "モ", "ヤ", "ユ", "ヨ", "ャ",
"ュ", "ョ", "ラ", "リ", "ル", "レ", "ロ", "ワ", "ヲ", "ン", "ッ",
"ヰ", "ヱ", "息", "吸", "-", "R"
};

private static readonly string[] phonemes = new string[] {
"", "", "", "", "A", "AP", "E", "I", "N", "O", "U",
"SP", "a", "b", "ch", "cl", "d", "dy", "e", "f", "g", "gw",
"gy", "h", "hy", "i", "j", "k", "kw", "ky", "m", "my", "n",
"ng", "ny", "o", "p", "py", "r", "ry", "s", "sh", "t", "ts",
"ty", "u", "v", "w", "y", "z"
};

private static object lockObj = new object();
private static Dictionary<string, int> graphemeIndexes;
private static IG2p hiragana;
private static IG2p katakana;
private static IG2p romaji;
private static IG2p special;
private static InferenceSession session;
private static Dictionary<string, string[]> predCache = new Dictionary<string, string[]>();

protected Tuple<IG2p, InferenceSession> LoadPack(
byte[] data,
Func<string, string> prepGrapheme = null,
Func<string, string> prepPhoneme = null) {
prepGrapheme = prepGrapheme ?? ((string s) => s);
prepPhoneme = prepPhoneme ?? ((string s) => s);
string[] hiraganaTxt = Zip.ExtractText(data, "hiragana.txt");
string[] katakanaTxt = Zip.ExtractText(data, "katakana.txt");
string[] romajiTxt = Zip.ExtractText(data, "romaji.txt");
string[] specialTxt = Zip.ExtractText(data, "special.txt");
string[] phonesTxt = Zip.ExtractText(data, "phones.txt");
var builder = G2pDictionary.NewBuilder();
phonesTxt.Select(line => line.Trim())
.Select(line => line.Split())
.Where(parts => parts.Length == 2)
.ToList()
.ForEach(parts => builder.AddSymbol(prepPhoneme(parts[0]), parts[1]));
hiraganaTxt.Where(line => !line.StartsWith(";;;"))
.Select(line => line.Trim())
.Select(line => line.Split(new string[] { " " }, StringSplitOptions.None))
.Where(parts => parts.Length == 2)
.ToList()
.ForEach(parts => builder.AddEntry(
prepGrapheme(parts[0]),
parts[1].Split().Select(symbol => prepPhoneme(symbol))));
katakanaTxt.Where(line => !line.StartsWith(";;;"))
.Select(line => line.Trim())
.Select(line => line.Split(new string[] { " " }, StringSplitOptions.None))
.Where(parts => parts.Length == 2)
.ToList()
.ForEach(parts => builder.AddEntry(
prepGrapheme(parts[0]),
parts[1].Split().Select(symbol => prepPhoneme(symbol))));
romajiTxt.Where(line => !line.StartsWith(";;;"))
.Select(line => line.Trim())
.Select(line => line.Split(new string[] { " " }, StringSplitOptions.None))
.Where(parts => parts.Length == 2)
.ToList()
.ForEach(parts => builder.AddEntry(
prepGrapheme(parts[0]),
parts[1].Split().Select(symbol => prepPhoneme(symbol))));
specialTxt.Where(line => !line.StartsWith(";;;"))
.Select(line => line.Trim())
.Select(line => line.Split(new string[] { " " }, StringSplitOptions.None))
.Where(parts => parts.Length == 2)
.ToList()
.ForEach(parts => builder.AddEntry(
prepGrapheme(parts[0]),
parts[1].Split().Select(symbol => prepPhoneme(symbol))));
var dict = builder.Build();
return Tuple.Create((IG2p) dict, session);
}

public JapaneseMonophoneG2p() {
lock (lockObj) {
if (graphemeIndexes == null) {
graphemeIndexes = graphemes
.Skip(4)
.Select((g, i) => Tuple.Create(g, i))
.ToDictionary(t => t.Item1, t => t.Item2 + 4);
var tuple = LoadPack(Data.Resources.g2p_ja_mono);
hiragana = tuple.Item1;
katakana = tuple.Item1;
romaji = tuple.Item1;
special = tuple.Item1;
session = tuple.Item2;
}
}
GraphemeIndexes = graphemeIndexes;
Phonemes = phonemes;
Session = session;
Dict = hiragana;
Dict = katakana;
Dict = romaji;
Dict = special;
PredCache = predCache;
}
}
}

0 comments on commit a265891

Please sign in to comment.