-
Notifications
You must be signed in to change notification settings - Fork 311
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1147 from lottev1991/JaMonoG2p
Add Japanese monophone G2P (tailored to AI voicebanks/phonemizers) + add support to Diffsinger Japanese Phonemizer
- Loading branch information
Showing
5 changed files
with
179 additions
and
10 deletions.
There are no files selected for viewing
34 changes: 32 additions & 2 deletions
34
OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,40 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
|
||
using System.Linq; | ||
using OpenUtau.Api; | ||
using OpenUtau.Core.G2p; | ||
|
||
namespace OpenUtau.Core.DiffSinger { | ||
[Phonemizer("DiffSinger Japanese Phonemizer", "DIFFS JA", language: "JA")] | ||
public class DiffSingerJapanesePhonemizer : DiffSingerBasePhonemizer { | ||
public class DiffSingerJapanesePhonemizer : DiffSingerG2pPhonemizer { | ||
protected override string GetDictionaryName()=>"dsdict-ja.yaml"; | ||
protected override IG2p LoadBaseG2p() => new JapaneseMonophoneG2p(); | ||
protected override string[] GetBaseG2pVowels() => new string[] { | ||
"A", "AP", "E", "I", "N", "O", "SP", "U", | ||
"a", "e", "i", "o", "u" | ||
}; | ||
|
||
protected override string[] GetBaseG2pConsonants() => new string[] { | ||
"b", "by", "ch", "cl", "d", "dy", "f", "g", "gw", "gy", "h", "hy", | ||
"j", "k", "kw", "ky", "m", "my", "n", "ng", "ngy", "ny", "p", "py", | ||
"r", "ry", "s", "sh", "t", "ts", "ty", "v", "w", "y", "z" | ||
}; | ||
|
||
public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevs) { | ||
if (notes[0].lyric == "-") { | ||
return MakeSimpleResult("SP"); | ||
} | ||
if (!partResult.TryGetValue(notes[0].position, out var phonemes)) { | ||
throw new Exception("Part result not found"); | ||
} | ||
return new Result { | ||
phonemes = phonemes | ||
.Select((tu) => new Phoneme() { | ||
phoneme = tu.Item1, | ||
position = tu.Item2, | ||
}) | ||
.ToArray(), | ||
}; | ||
} | ||
} | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using Microsoft.ML.OnnxRuntime; | ||
using OpenUtau.Api; | ||
using OpenUtau.Core.Util; | ||
|
||
namespace OpenUtau.Core.G2p { | ||
public class JapaneseMonophoneG2p : G2pPack { | ||
private static readonly string[] graphemes = new string[] { | ||
"", "", "", "", "a", "b", "c", "d", "e", "f", "g", | ||
"h", "i", "j", "k", "m", "n", "o", "p", "r", "s", | ||
"t", "u", "v", "w", "y", "z", "あ", "い", "う", "え", | ||
"お", "ぁ", "ぃ", "ぅ", "ぇ", "ぉ", "か", "き", "く", | ||
"け", "こ", "さ", "し", "す", "せ", "そ", "ざ", "じ", "ず", | ||
"ぜ", "ぞ", "た", "ち", "つ", "て", "と", "だ", "ぢ", "づ", "で", | ||
"ど", "な", "に", "ぬ", "ね", "の", "は", "ひ", "ふ", "へ", "ほ", | ||
"ば", "び", "ぶ", "べ", "ぼ", "ぱ", "ぴ", "ぷ", "ぺ", "ぽ", "ま", | ||
"み", "む", "め", "も", "や", "ゆ", "よ", "ゃ", "ゅ", "ょ", "ら", | ||
"り", "る", "れ", "ろ", "わ", "を", "ん", "っ", "ヴ", "ゔ","゜", | ||
"ゐ", "ゑ", "ア", "イ", "ウ", "エ", "オ", "ァ", "ィ", "ゥ", "ェ", | ||
"ォ", "カ", "キ", "ク", "ケ", "コ", "サ", "シ", "ス", "セ", "ソ", | ||
"ザ", "ジ", "ズ", "ゼ", "ゾ", "タ", "チ", "ツ", "テ", "ト", "ダ", | ||
"ヂ", "ヅ", "デ", "ド", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", "ヒ", | ||
"フ", "ヘ", "ホ", "バ", "ビ", "ブ", "ベ", "ボ", "パ", "ピ", "プ", | ||
"ペ", "ポ", "マ", "ミ", "ム", "メ", "モ", "ヤ", "ユ", "ヨ", "ャ", | ||
"ュ", "ョ", "ラ", "リ", "ル", "レ", "ロ", "ワ", "ヲ", "ン", "ッ", | ||
"ヰ", "ヱ", "息", "吸", "-", "R" | ||
}; | ||
|
||
private static readonly string[] phonemes = new string[] { | ||
"", "", "", "", "A", "AP", "E", "I", "N", "O", "U", | ||
"SP", "a", "b", "ch", "cl", "d", "dy", "e", "f", "g", "gw", | ||
"gy", "h", "hy", "i", "j", "k", "kw", "ky", "m", "my", "n", | ||
"ng", "ny", "o", "p", "py", "r", "ry", "s", "sh", "t", "ts", | ||
"ty", "u", "v", "w", "y", "z" | ||
}; | ||
|
||
private static object lockObj = new object(); | ||
private static Dictionary<string, int> graphemeIndexes; | ||
private static IG2p hiragana; | ||
private static IG2p katakana; | ||
private static IG2p romaji; | ||
private static IG2p special; | ||
private static InferenceSession session; | ||
private static Dictionary<string, string[]> predCache = new Dictionary<string, string[]>(); | ||
|
||
protected Tuple<IG2p, InferenceSession> LoadPack( | ||
byte[] data, | ||
Func<string, string> prepGrapheme = null, | ||
Func<string, string> prepPhoneme = null) { | ||
prepGrapheme = prepGrapheme ?? ((string s) => s); | ||
prepPhoneme = prepPhoneme ?? ((string s) => s); | ||
string[] hiraganaTxt = Zip.ExtractText(data, "hiragana.txt"); | ||
string[] katakanaTxt = Zip.ExtractText(data, "katakana.txt"); | ||
string[] romajiTxt = Zip.ExtractText(data, "romaji.txt"); | ||
string[] specialTxt = Zip.ExtractText(data, "special.txt"); | ||
string[] phonesTxt = Zip.ExtractText(data, "phones.txt"); | ||
var builder = G2pDictionary.NewBuilder(); | ||
phonesTxt.Select(line => line.Trim()) | ||
.Select(line => line.Split()) | ||
.Where(parts => parts.Length == 2) | ||
.ToList() | ||
.ForEach(parts => builder.AddSymbol(prepPhoneme(parts[0]), parts[1])); | ||
hiraganaTxt.Where(line => !line.StartsWith(";;;")) | ||
.Select(line => line.Trim()) | ||
.Select(line => line.Split(new string[] { " " }, StringSplitOptions.None)) | ||
.Where(parts => parts.Length == 2) | ||
.ToList() | ||
.ForEach(parts => builder.AddEntry( | ||
prepGrapheme(parts[0]), | ||
parts[1].Split().Select(symbol => prepPhoneme(symbol)))); | ||
katakanaTxt.Where(line => !line.StartsWith(";;;")) | ||
.Select(line => line.Trim()) | ||
.Select(line => line.Split(new string[] { " " }, StringSplitOptions.None)) | ||
.Where(parts => parts.Length == 2) | ||
.ToList() | ||
.ForEach(parts => builder.AddEntry( | ||
prepGrapheme(parts[0]), | ||
parts[1].Split().Select(symbol => prepPhoneme(symbol)))); | ||
romajiTxt.Where(line => !line.StartsWith(";;;")) | ||
.Select(line => line.Trim()) | ||
.Select(line => line.Split(new string[] { " " }, StringSplitOptions.None)) | ||
.Where(parts => parts.Length == 2) | ||
.ToList() | ||
.ForEach(parts => builder.AddEntry( | ||
prepGrapheme(parts[0]), | ||
parts[1].Split().Select(symbol => prepPhoneme(symbol)))); | ||
specialTxt.Where(line => !line.StartsWith(";;;")) | ||
.Select(line => line.Trim()) | ||
.Select(line => line.Split(new string[] { " " }, StringSplitOptions.None)) | ||
.Where(parts => parts.Length == 2) | ||
.ToList() | ||
.ForEach(parts => builder.AddEntry( | ||
prepGrapheme(parts[0]), | ||
parts[1].Split().Select(symbol => prepPhoneme(symbol)))); | ||
var dict = builder.Build(); | ||
return Tuple.Create((IG2p) dict, session); | ||
} | ||
|
||
public JapaneseMonophoneG2p() { | ||
lock (lockObj) { | ||
if (graphemeIndexes == null) { | ||
graphemeIndexes = graphemes | ||
.Skip(4) | ||
.Select((g, i) => Tuple.Create(g, i)) | ||
.ToDictionary(t => t.Item1, t => t.Item2 + 4); | ||
var tuple = LoadPack(Data.Resources.g2p_ja_mono); | ||
hiragana = tuple.Item1; | ||
katakana = tuple.Item1; | ||
romaji = tuple.Item1; | ||
special = tuple.Item1; | ||
session = tuple.Item2; | ||
} | ||
} | ||
GraphemeIndexes = graphemeIndexes; | ||
Phonemes = phonemes; | ||
Session = session; | ||
Dict = hiragana; | ||
Dict = katakana; | ||
Dict = romaji; | ||
Dict = special; | ||
PredCache = predCache; | ||
} | ||
} | ||
} |