Skip to content

Commit

Permalink
Merge pull request #1165 from lottev1991/MoreEnXSampaFixes
Browse files Browse the repository at this point in the history
[EN X-SAMPA Phonemizer] Multiple fixes
  • Loading branch information
stakira authored Jun 9, 2024
2 parents f68e800 + 8a2181b commit c58665d
Showing 1 changed file with 40 additions and 4 deletions.
44 changes: 40 additions & 4 deletions OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ namespace OpenUtau.Plugin.Builtin {
/// </summary>
[Phonemizer("English X-SAMPA phonemizer", "EN X-SAMPA", "Lotte V", language: "EN")]
public class EnXSampaPhonemizer : SyllableBasedPhonemizer {
private readonly string[] vowels = "a,A,@,{,V,O,aU,aI,E,3,eI,I,i,oU,OI,U,u,Q,Ol,Ql,aUn,e@,eN,IN,e,o,Ar,Qr,Er,Ir,Or,Ur,ir,ur,aIr,aUr,A@,Q@,E@,I@,O@,U@,i@,u@,aI@,aU@,@r,@l,@m,@n,@N,1,e@m,e@n,y,I\\,M,U\\,Y,@\\,@`,3`,A`,Q`,E`,I`,O`,U`,i`,u`,aI`,aU`,},2,3\\,6,7,8,9,&,{~,I~,aU~,VI,VU,@U,ai,ei,Oi,au,ou,Ou,@u,i:,u:,O:,e@0,E~,e~,3r,ar,or,{l,Al,al,El,Il,il,ul,Ul,mm,nn,ll,NN".Split(',');
private readonly string[] vowels = "a,A,@,{,V,O,aU,aI,E,3,eI,I,i,oU,OI,U,u,Q,Ol,Ql,aUn,e@,eN,IN,e,o,Ar,Qr,Er,Ir,Or,Ur,ir,ur,aIr,aUr,A@,Q@,E@,I@,O@,U@,i@,u@,aI@,aU@,@r,@l,@m,@n,@N,1,e@m,e@n,y,I\\,M,U\\,Y,@\\,@`,3`,A`,Q`,E`,I`,O`,U`,i`,u`,aI`,aU`,},2,3\\,6,7,8,9,&,{~,I~,aU~,VI,VU,@U,ai,ei,Oi,au,ou,Ou,@u,i:,u:,O:,e@0,E~,e~,3r,ar,or,{l,Al,al,El,Il,il,ol,ul,Ul,oUl,mm,nn,ll,NN".Split(',');
private readonly string[] consonants = "b,tS,d,D,4,f,g,h,dZ,k,l,m,n,N,p,r,s,S,t,T,v,w,W,j,z,Z,t_},・,_".Split(',');
private readonly string[] affricates = "tS,dZ".Split(',');
private readonly string[] shortConsonants = "4".Split(",");
Expand Down Expand Up @@ -108,6 +108,15 @@ public class EnXSampaPhonemizer : SyllableBasedPhonemizer {

private bool isVelarNasalFallback = false;

// For Kasane Teto's missing sample
private readonly Dictionary<string, string> tetoException = "V=@".Split(';')
.Select(entry => entry.Split('='))
.Where(parts => parts.Length == 2)
.Where(parts => parts[0] != parts[1])
.ToDictionary(parts => parts[0], parts => parts[1]);

private bool isTetoException = false;

private readonly Dictionary<string, string> vvExceptions =
new Dictionary<string, string>() {
{"aI","j"},
Expand Down Expand Up @@ -227,10 +236,14 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
isMissingCanadianRaising = true;
}

if (!HasOto($"- V", syllable.vowelTone) && !HasOto($"V", syllable.vowelTone) || (!HasOto($"- bV", syllable.vowelTone) && !HasOto($"bV", syllable.vowelTone))) {
if (!HasOto($"- V", syllable.vowelTone) && !HasOto($"V", syllable.vowelTone)) {
isSimpleDelta = true;
}

if (!HasOto($"- bV", syllable.vowelTone) && !HasOto($"bV", syllable.vowelTone)) {
isTetoException = true;
}

if ((!HasOto($"- I", syllable.vowelTone) && !HasOto($"I", syllable.vowelTone)) || (!HasOto($"- U", syllable.vowelTone) && !HasOto($"U", syllable.vowelTone))) {
isMiniDelta = true;
}
Expand Down Expand Up @@ -370,9 +383,10 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
lastC = 0;
} else {
var cv = cc.Last() + v;
basePhoneme = cv;
if ((!HasOto(cv, syllable.vowelTone) && !HasOto(ValidateAlias(cv), syllable.vowelTone)) && (HasOto(crv, syllable.vowelTone) || HasOto(ValidateAlias(crv), syllable.vowelTone))) {
if (HasOto(crv, syllable.vowelTone) || HasOto(ValidateAlias(crv), syllable.vowelTone)) {
basePhoneme = crv;
} else {
basePhoneme = cv;
}
// try CCV
if (cc.Length - firstC > 1) {
Expand Down Expand Up @@ -424,6 +438,8 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
var cc1 = $"{string.Join("", cc.Skip(i))}";
var ccv = string.Join("", cc.Skip(i)) + v;
var ucv = $"_{cc.Last()}{v}";
var crv = $"{cc.Last()} {v}";
var cv = $"{cc.Last()}{v}";
// Use [C1C2...] when current word starts with 2 consonants or more
if (CurrentWordCc.Length >= 2) {
cc1 = $"{string.Join("", cc.Skip(i))}";
Expand Down Expand Up @@ -452,6 +468,10 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
// Use _CV if it exists
} else if ((HasOto(ucv, syllable.vowelTone) || HasOto(ValidateAlias(ucv), syllable.vowelTone)) && HasOto(cc1, syllable.vowelTone) && !cc1.Contains($"{cc[i]} {cc[i + 1]}")) {
basePhoneme = ucv;
} else if (HasOto(crv, syllable.vowelTone) || HasOto(ValidateAlias(crv), syllable.vowelTone)) {
basePhoneme = crv;
} else {
basePhoneme = cv;
}
if (i + 1 < lastC) {
var cc2 = $"{string.Join("", cc.Skip(i))}";
Expand Down Expand Up @@ -483,6 +503,10 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
// Use _CV if it exists
} else if ((HasOto(ucv, syllable.vowelTone) || HasOto(ValidateAlias(ucv), syllable.vowelTone)) && (HasOto(cc2, syllable.vowelTone) || HasOto(ValidateAlias(cc2), syllable.vowelTone)) && !cc2.Contains($"{cc[i + 1]} {cc[i + 2]}") && !PreviousWordCc.Contains(ucv)) {
basePhoneme = ucv;
} else if (HasOto(crv, syllable.vowelTone) || HasOto(ValidateAlias(crv), syllable.vowelTone)) {
basePhoneme = crv;
} else {
basePhoneme = cv;
}
if (HasOto(cc1, syllable.tone) && HasOto(cc2, syllable.tone) && !cc1.Contains($"{string.Join("", cc.Skip(i))}")) {
// like [V C1] [C1 C2] [C2 C3] [C3 ..]
Expand Down Expand Up @@ -707,6 +731,18 @@ protected override string ValidateAlias(string alias) {
}
}

if (isTetoException) {
foreach (var syllable in tetoException) {
alias = alias.Replace(syllable.Key, syllable.Value);
}
}

if (isMissingCanadianRaising) {
foreach (var syllable in CanadianRaising) {
alias = alias.Replace(syllable.Key, syllable.Value);
}
}

// Split diphthongs adjuster
if (alias.Contains("U^")) {
alias = alias.Replace("U^", "U");
Expand Down

0 comments on commit c58665d

Please sign in to comment.