Skip to content

Commit 57f66b6

Browse files
authored
Merge pull request #959 from lottev1991/ZhYueSyoPhonemizer
Add Cantonese "Syo-style" Phonemizer
2 parents 343d861 + ea1ceb2 commit 57f66b6

File tree

1 file changed

+368
-0
lines changed

1 file changed

+368
-0
lines changed
Lines changed: 368 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,368 @@
1+
using OpenUtau.Api;
2+
using OpenUtau.Core.G2p;
3+
using OpenUtau.Core.Ustx;
4+
using System.Collections.Generic;
5+
using System.Linq;
6+
7+
namespace OpenUtau.Plugin.Builtin {
8+
/// <summary>
9+
/// Cantonese phonemizer for Syo-style banks.
10+
/// Supports both full jyutping syllables as well as syllable fallbacks without a final consonant or falling diphthong.
11+
/// Supports hanzi and jyutping input.
12+
/// </summary>
13+
[Phonemizer("Cantonese Syo-Style Phonemizer", "ZH-YUE SYO", "Lotte V", language: "ZH-YUE")]
14+
public class CantoneseSyoPhonemizer : Phonemizer {
15+
16+
/// <summary>
17+
/// The consonant table.
18+
/// </summary>
19+
static readonly string consonants = "b,p,m,f,d,t,n,l,g,k,ng,h,gw,kw,w,z,c,s,j";
20+
21+
/// <summary>
22+
/// The vowel split table.
23+
/// </summary>
24+
static readonly string vowels = "aap=aa p,aat=aa t,aak=aa k,aam=aa m,aan=aa n,aang=aa ng,aai=aa i,aau=aa u,ap=a p,at=a t,ak=a k,am=a m,an=a n,ang=a ng,ai=a i,au=a u,op=o p,ot=o t,ok=o k,om=o m,on=o n,ong=o ng,oi=o i,ou=o u,oet=oe t,oek=oe k,oeng=oe ng,oei=oe i,eot=eo t,eon=eo n,eoi=eo i,ep=e p,et=e t,ek=e k,em=e m,en=e n,eng=e ng,ei=e i,eu=e u,up=u p,ut=u t,uk=uu k,um=um,un=u n,ung=uu ng,ui=u i,yut=yu t,yun=yu n,ip=i p,it=i t,ik=ii k,im=i m,in=i n,ing=ii ng,iu=i u";
25+
26+
/// <summary>
27+
/// Check for vowel substitutes.
28+
/// </summary>
29+
static readonly string[] substitution = new string[] {
30+
"aap,aat,aak,aam,aan,aang,aai,aau=aa", "ap,at,ak,am,an,ang,ai,au=a", "op,ot,ok,om,on,ong,oi,ou=o", "oet,oek,oen,oeng,oei=oe", "eot,eon,eoi=eo","ep,et,ek,em,en,eng,ei,eu=e", "uk,ung=uu", "up,ut,um,un,ui=u", "yut,yun=yu","ik,ing=ii", "ip,it,im,in,iu=i"
31+
};
32+
33+
/// <summary>
34+
/// Check for substitutes for finals.
35+
/// </summary>
36+
static readonly string[] finalSub = new string[] {
37+
"ii ng=i ng", "ii k=i k", "uu k=u k", "uu ng=u ng", "oe t=eo t", "oe i=eo i"
38+
};
39+
40+
static HashSet<string> cSet;
41+
static Dictionary<string, string> vDict;
42+
static readonly Dictionary<string, string> substituteLookup;
43+
static readonly Dictionary<string, string> finalSubLookup;
44+
45+
static CantoneseSyoPhonemizer() {
46+
cSet = new HashSet<string>(consonants.Split(','));
47+
vDict = vowels.Split(',')
48+
.Select(s => s.Split('='))
49+
.ToDictionary(a => a[0], a => a[1]);
50+
substituteLookup = substitution.ToList()
51+
.SelectMany(line => {
52+
var parts = line.Split('=');
53+
return parts[0].Split(',').Select(orig => (orig, parts[1]));
54+
})
55+
.ToDictionary(t => t.Item1, t => t.Item2);
56+
finalSubLookup = finalSub.ToList()
57+
.SelectMany(line => {
58+
var parts = line.Split('=');
59+
return parts[0].Split(',').Select(orig => (orig, parts[1]));
60+
})
61+
.ToDictionary(t => t.Item1, t => t.Item2);
62+
}
63+
64+
private USinger singer;
65+
66+
// Simply stores the singer in a field.
67+
public override void SetSinger(USinger singer) => this.singer = singer;
68+
69+
/// <summary>
70+
/// Converts hanzi notes to jyutping phonemes.
71+
/// </summary>
72+
/// <param name="groups"></param>
73+
public override void SetUp(Note[][] groups) {
74+
JyutpingConversion.RomanizeNotes(groups);
75+
}
76+
public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevNeighbours) {
77+
// The overall logic is:
78+
// 1. Remove consonant: "jyut" -> "yut".
79+
// 2. Lookup the trailing sound in vowel table: "yut" -> "yu t".
80+
// 3. Split the total duration and returns "jyut"/"jyu" and "yu t".
81+
var note = notes[0];
82+
var lyric = note.lyric;
83+
string consonant = string.Empty;
84+
string vowel = string.Empty;
85+
86+
if (lyric.Length > 2 && cSet.Contains(lyric.Substring(0, 2))) {
87+
// First try to find consonant "gw", "kw", "ng", and extract vowel.
88+
consonant = lyric.Substring(0, 2);
89+
vowel = lyric.Substring(2);
90+
} else if (lyric.Length > 1 && cSet.Contains(lyric.Substring(0, 1)) && lyric != "ng") {
91+
// Then try to find single character consonants, and extract vowel.
92+
consonant = lyric.Substring(0, 1);
93+
vowel = lyric.Substring(1);
94+
} else {
95+
// Otherwise the lyric is a vowel.
96+
vowel = lyric;
97+
}
98+
99+
string phoneme0 = lyric;
100+
101+
// Get color
102+
string color = string.Empty;
103+
int toneShift = 0;
104+
int? alt = 0;
105+
if (note.phonemeAttributes != null) {
106+
var attr = note.phonemeAttributes.FirstOrDefault(attr0 => attr0.index == 0);
107+
color = attr.voiceColor;
108+
toneShift = attr.toneShift;
109+
alt = attr.alternate;
110+
}
111+
112+
string fin = $"{vowel} -";
113+
// We will need to split the total duration for phonemes, so we compute it here.
114+
int totalDuration = notes.Sum(n => n.duration);
115+
// Lookup the vowel split table. For example, "yut" will match "yu t".
116+
if (vDict.TryGetValue(vowel, out var phoneme1) && !string.IsNullOrEmpty(phoneme1)) {
117+
// Now phoneme0="jyu" and phoneme1="yu t",
118+
// try to give "yu t" 120 ticks, but no more than half of the total duration.
119+
int length1 = 120;
120+
121+
if (length1 > totalDuration / 2) {
122+
length1 = totalDuration / 2;
123+
}
124+
var lyrics = new List<string> { lyric };
125+
// find potential substitute symbol
126+
if (substituteLookup.TryGetValue(vowel ?? string.Empty, out var sub)) {
127+
if (!string.IsNullOrEmpty(consonant)) {
128+
lyrics.Add($"{consonant}{sub}");
129+
} else {
130+
lyrics.Add(sub);
131+
}
132+
}
133+
134+
// Try initial and then a plain lyric
135+
if (prevNeighbour == null || (prevNeighbour != null && (prevNeighbour.Value.lyric.EndsWith("p") || prevNeighbour.Value.lyric.EndsWith("t") || prevNeighbour.Value.lyric.EndsWith("k")))) {
136+
var initial = $"- {lyric}";
137+
var initial2 = $"- {lyrics[1]}";
138+
var tests = new List<string> { initial, initial2, lyric, lyrics[1] };
139+
if (checkOtoUntilHit(tests, note, out var otoInit)) {
140+
phoneme0 = otoInit.Alias;
141+
}
142+
} else { // nothing special necessary
143+
if (checkOtoUntilHit(lyrics, note, out var otoLyric)) {
144+
phoneme0 = otoLyric.Alias;
145+
}
146+
}
147+
148+
int length2 = 60;
149+
if (length2 > totalDuration / 2) {
150+
length2 = totalDuration / 2;
151+
}
152+
if (nextNeighbour == null && singer.TryGetMappedOto(fin, note.tone, out _)) {
153+
// Vowel ending is minimum 60 ticks, maximum half of note
154+
var finals = new List<string> { fin };
155+
if (checkOtoUntilHitFinal(finals, note, out var otoFin)) {
156+
phoneme1 = otoFin.Alias;
157+
}
158+
return new Result {
159+
phonemes = new Phoneme[] {
160+
new Phoneme() {
161+
phoneme = phoneme0,
162+
},
163+
new Phoneme() {
164+
phoneme = phoneme1,
165+
position = totalDuration - length2,
166+
}
167+
},
168+
};
169+
} else {
170+
var tails = new List<string> { phoneme1 };
171+
// find potential substitute symbol
172+
if (finalSubLookup.TryGetValue(phoneme1 ?? string.Empty, out var finSub)) {
173+
tails.Add(finSub);
174+
}
175+
if (checkOtoUntilHitFinal(tails, note, out var otoTail)) {
176+
phoneme1 = otoTail.Alias;
177+
} else {
178+
return MakeSimpleResult(phoneme0);
179+
}
180+
}
181+
182+
return new Result {
183+
phonemes = new Phoneme[] {
184+
new Phoneme() {
185+
phoneme = phoneme0,
186+
},
187+
new Phoneme() {
188+
phoneme = phoneme1,
189+
position = totalDuration - length1,
190+
}
191+
},
192+
};
193+
}
194+
195+
// Check for vowel ending on open syllables.
196+
// If a vowel ending does not exist, it will not be inserted.
197+
if (nextNeighbour == null && string.IsNullOrEmpty(phoneme1) && !string.IsNullOrEmpty(fin)) {
198+
// Vowel ending is minimum 60 ticks, maximum half of note
199+
int length1 = 60;
200+
201+
if (length1 > totalDuration / 2) {
202+
length1 = totalDuration / 2;
203+
}
204+
// Try initial and then a plain lyric
205+
var lyrics = new List<string> { lyric };
206+
if (prevNeighbour == null || (prevNeighbour != null && (prevNeighbour.Value.lyric.EndsWith("p") || prevNeighbour.Value.lyric.EndsWith("t") || prevNeighbour.Value.lyric.EndsWith("k")))) {
207+
var initial = $"- {lyric}";
208+
var tests = new List<string> { initial, lyric };
209+
if (checkOtoUntilHit(tests, note, out var otoInit)) {
210+
phoneme0 = otoInit.Alias;
211+
}
212+
} else { // nothing special necessary
213+
if (checkOtoUntilHit(lyrics, note, out var otoLyric)) {
214+
phoneme0 = otoLyric.Alias;
215+
} else {
216+
return MakeSimpleResult(phoneme0);
217+
}
218+
}
219+
220+
// Map vowel ending
221+
var tails = new List<string> { fin };
222+
if (checkOtoUntilHitFinal(tails, note, out var otoTail)) {
223+
fin = otoTail.Alias;
224+
} else {
225+
return MakeSimpleResult(phoneme0);
226+
}
227+
228+
return new Result {
229+
phonemes = new Phoneme[] {
230+
new Phoneme() {
231+
phoneme = phoneme0,
232+
},
233+
new Phoneme() {
234+
phoneme = fin,
235+
position = totalDuration - length1,
236+
}
237+
},
238+
};
239+
}
240+
241+
// Try initial and then a plain lyric
242+
if (prevNeighbour == null || (prevNeighbour != null && (prevNeighbour.Value.lyric.EndsWith("p") || prevNeighbour.Value.lyric.EndsWith("t") || prevNeighbour.Value.lyric.EndsWith("k")))) {
243+
var simpleInitial = $"- {lyric}";
244+
var tests = new List<string> { simpleInitial, lyric };
245+
if (checkOtoUntilHit(tests, note, out var otoInit)) {
246+
phoneme0 = otoInit.Alias;
247+
} else {
248+
return MakeSimpleResult(phoneme0);
249+
}
250+
} else { // nothing special necessary
251+
var tests = new List<string> { lyric };
252+
if (checkOtoUntilHit(tests, note, out var otoLyric)) {
253+
phoneme0 = otoLyric.Alias;
254+
} else {
255+
return MakeSimpleResult(phoneme0);
256+
}
257+
}
258+
// Not spliting is needed. Return as is.
259+
return new Result {
260+
phonemes = new Phoneme[] {
261+
new Phoneme() {
262+
phoneme = phoneme0,
263+
}
264+
},
265+
};
266+
267+
}
268+
269+
/// <summary>
270+
/// Converts hanzi to jyutping, based on G2P.
271+
/// </summary>
272+
public class JyutpingConversion {
273+
public static Note[] ChangeLyric(Note[] group, string lyric) {
274+
var oldNote = group[0];
275+
group[0] = new Note {
276+
lyric = lyric,
277+
phoneticHint = oldNote.phoneticHint,
278+
tone = oldNote.tone,
279+
position = oldNote.position,
280+
duration = oldNote.duration,
281+
phonemeAttributes = oldNote.phonemeAttributes,
282+
};
283+
return group;
284+
}
285+
286+
public static string[] Romanize(IEnumerable<string> lyrics) {
287+
var lyricsArray = lyrics.ToArray();
288+
var hanziLyrics = lyricsArray
289+
.Where(ZhG2p.CantoneseInstance.IsHanzi)
290+
.ToList();
291+
var jyutpingResult = ZhG2p.CantoneseInstance.Convert(hanziLyrics, false, false).ToLower().Split();
292+
if (jyutpingResult == null) {
293+
return lyricsArray;
294+
}
295+
var jyutpingIndex = 0;
296+
for (int i = 0; i < lyricsArray.Length; i++) {
297+
if (lyricsArray[i].Length == 1 && ZhG2p.CantoneseInstance.IsHanzi(lyricsArray[i])) {
298+
lyricsArray[i] = jyutpingResult[jyutpingIndex];
299+
jyutpingIndex++;
300+
}
301+
}
302+
return lyricsArray;
303+
}
304+
305+
public static void RomanizeNotes(Note[][] groups) {
306+
var ResultLyrics = Romanize(groups.Select(group => group[0].lyric));
307+
Enumerable.Zip(groups, ResultLyrics, ChangeLyric).Last();
308+
}
309+
310+
public void SetUp(Note[][] groups) {
311+
RomanizeNotes(groups);
312+
}
313+
}
314+
315+
// make it quicker to check multiple oto occurrences at once rather than spamming if else if
316+
private bool checkOtoUntilHit(List<string> input, Note note, out UOto oto) {
317+
oto = default;
318+
var attr = note.phonemeAttributes?.FirstOrDefault(attrCheck => attrCheck.index == 0) ?? default;
319+
320+
var otos = new List<UOto>();
321+
foreach (string test in input) {
322+
if (singer.TryGetMappedOto(test + attr.alternate, note.tone + attr.toneShift, attr.voiceColor, out var otoAlt)) {
323+
otos.Add(otoAlt);
324+
} else if (singer.TryGetMappedOto(test, note.tone + attr.toneShift, attr.voiceColor, out var otoCandidacy)) {
325+
otos.Add(otoCandidacy);
326+
}
327+
}
328+
329+
string color = attr.voiceColor ?? "";
330+
if (otos.Count > 0) {
331+
if (otos.Any(otoCheck => (otoCheck.Color ?? string.Empty) == color)) {
332+
oto = otos.Find(otoCheck => (otoCheck.Color ?? string.Empty) == color);
333+
return true;
334+
} else {
335+
oto = otos.First();
336+
return true;
337+
}
338+
}
339+
return false;
340+
}
341+
342+
// Check for final consonant or vowel ending
343+
private bool checkOtoUntilHitFinal(List<string> input, Note note, out UOto oto) {
344+
oto = default;
345+
var attr = note.phonemeAttributes?.FirstOrDefault(attrCheck => attrCheck.index == 1) ?? default;
346+
347+
var otos = new List<UOto>();
348+
foreach (string test in input) {
349+
if (singer.TryGetMappedOto(test + attr.alternate, note.tone + attr.toneShift, attr.voiceColor, out var otoAlt)) {
350+
otos.Add(otoAlt);
351+
} else if (singer.TryGetMappedOto(test, note.tone + attr.toneShift, attr.voiceColor, out var otoCandidacy)) {
352+
otos.Add(otoCandidacy);
353+
}
354+
}
355+
356+
string color = attr.voiceColor ?? "";
357+
if (otos.Count > 0) {
358+
if (otos.Any(otoCheck => (otoCheck.Color ?? string.Empty) == color)) {
359+
oto = otos.Find(otoCheck => (otoCheck.Color ?? string.Empty) == color);
360+
return true;
361+
} else {
362+
return false;
363+
}
364+
}
365+
return false;
366+
}
367+
}
368+
}

0 commit comments

Comments
 (0)