Skip to content

Commit f23b171

Browse files
authored
Merge pull request #961 from lottev1991/ZhYueCvvcPhonemizer
Add Cantonese CVVC Phonemizer
2 parents 57f66b6 + f9e5e96 commit f23b171

File tree

1 file changed

+284
-0
lines changed

1 file changed

+284
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
4+
using System.Linq;
5+
using OpenUtau.Api;
6+
using OpenUtau.Classic;
7+
using OpenUtau.Core.G2p;
8+
using OpenUtau.Core.Ustx;
9+
using Serilog;
10+
11+
namespace OpenUtau.Plugin.Builtin {
12+
/// <summary>
13+
/// Cantonese CVVC phonemizer.
14+
/// It works similarly to the Chinese CVVC phonemizer, including presamp.ini requirement.
15+
/// The big difference is that it converts hanzi to jyutping instead of pinyin.
16+
/// </summary>
17+
[Phonemizer("Cantonese CVVC Phonemizer", "ZH-YUE CVVC", "Lotte V", language: "ZH-YUE")]
18+
public class CantoneseCVVCPhonemizer : Phonemizer {
19+
private Dictionary<string, string> vowels = new Dictionary<string, string>();
20+
private Dictionary<string, string> consonants = new Dictionary<string, string>();
21+
private USinger singer;
22+
public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevNeighbours) {
23+
var lyric = notes[0].lyric;
24+
string consonant = consonants.TryGetValue(lyric, out consonant) ? consonant : lyric;
25+
string prevVowel = "-";
26+
if (prevNeighbour != null) {
27+
var prevLyric = prevNeighbour.Value.lyric;
28+
if (vowels.TryGetValue(prevLyric, out var vowel)) {
29+
prevVowel = vowel;
30+
}
31+
};
32+
var attr0 = notes[0].phonemeAttributes?.FirstOrDefault(attr => attr.index == 0) ?? default;
33+
var attr1 = notes[0].phonemeAttributes?.FirstOrDefault(attr => attr.index == 1) ?? default;
34+
var attr2 = notes[0].phonemeAttributes?.FirstOrDefault(attr => attr.index == 2) ?? default;
35+
if (lyric == "-" || lyric.ToLowerInvariant() == "r") {
36+
if (singer.TryGetMappedOto($"{prevVowel} R", notes[0].tone + attr0.toneShift, attr0.voiceColor, out var oto1)) {
37+
return MakeSimpleResult(oto1.Alias);
38+
}
39+
return MakeSimpleResult($"{prevVowel} R");
40+
}
41+
string currVowel = vowels.TryGetValue(lyric, out currVowel) ? currVowel : lyric;
42+
int totalDuration = notes.Sum(n => n.duration); // totalDuration of current note
43+
44+
if (singer.TryGetMappedOto($"{prevVowel} {lyric}", notes[0].tone + attr0.toneShift, attr0.voiceColor, out var oto)) {
45+
if (nextNeighbour == null && singer.TryGetMappedOto($"{currVowel} R", notes[0].tone + attr1.toneShift, attr1.voiceColor, out var oto1)) {
46+
// automatically add ending if present
47+
return new Result {
48+
phonemes = new Phoneme[] {
49+
new Phoneme() {
50+
phoneme = oto.Alias,
51+
},
52+
new Phoneme() {
53+
phoneme = oto1.Alias,
54+
position = totalDuration - (totalDuration / 6),
55+
},
56+
},
57+
};
58+
}
59+
return MakeSimpleResult(oto.Alias);
60+
}
61+
int vcLen = 120;
62+
if (singer.TryGetMappedOto(lyric, notes[0].tone + attr1.toneShift, attr1.voiceColor, out var cvOto)) {
63+
vcLen = MsToTick(cvOto.Preutter);
64+
if (cvOto.Overlap == 0 && vcLen < 120) {
65+
vcLen = Math.Min(120, vcLen * 2); // explosive consonant with short preutter.
66+
}
67+
if (cvOto.Overlap < 0) {
68+
vcLen = MsToTick(cvOto.Preutter - cvOto.Overlap);
69+
}
70+
}
71+
72+
if (singer.TryGetMappedOto(lyric, notes[0].tone + attr0.toneShift, attr0.voiceColor, out var cvOtoSimple)) {
73+
lyric = cvOtoSimple.Alias;
74+
}
75+
76+
var vcPhoneme = $"{prevVowel} {consonant}";
77+
if (prevNeighbour != null) {
78+
if (singer.TryGetMappedOto(vcPhoneme, prevNeighbour.Value.tone + attr0.toneShift, attr0.voiceColor, out oto)) {
79+
vcPhoneme = oto.Alias;
80+
}
81+
// prevDuration calculated on basis of previous note length
82+
int prevDuration = prevNeighbour.Value.duration;
83+
// vcLength depends on the Vel of the current base note
84+
vcLen = Convert.ToInt32(Math.Min(prevDuration / 1.5, Math.Max(30, vcLen * (attr1.consonantStretchRatio ?? 1))));
85+
} else {
86+
if (singer.TryGetMappedOto(vcPhoneme, notes[0].tone + attr0.toneShift, attr0.voiceColor, out oto)) {
87+
vcPhoneme = oto.Alias;
88+
}
89+
// no previous note, so length can be minimum velocity regardless of oto
90+
vcLen = Convert.ToInt32(Math.Min(vcLen * 2, Math.Max(30, vcLen * (attr1.consonantStretchRatio ?? 1))));
91+
}
92+
93+
if (nextNeighbour == null) { // automatically add ending if present
94+
if (singer.TryGetMappedOto($"{prevVowel} {lyric}", notes[0].tone + attr0.toneShift, attr0.voiceColor, out var oto0)) {
95+
if (singer.TryGetMappedOto($"{currVowel} R", notes[0].tone + attr1.toneShift, attr1.voiceColor, out var otoEnd)) {
96+
// automatically add ending if present
97+
return new Result {
98+
phonemes = new Phoneme[] {
99+
new Phoneme() {
100+
phoneme = oto0.Alias,
101+
},
102+
new Phoneme() {
103+
phoneme = otoEnd.Alias,
104+
position = totalDuration - (totalDuration / 6),
105+
},
106+
},
107+
};
108+
}
109+
} else {
110+
// use vc if present
111+
if (prevNeighbour == null && singer.TryGetMappedOto(vcPhoneme, notes[0].tone + attr0.toneShift, attr0.voiceColor, out var vcOto1)) {
112+
vcPhoneme = vcOto1.Alias;
113+
// automatically add ending if present
114+
if (singer.TryGetMappedOto($"{currVowel} R", notes[0].tone + attr2.toneShift, attr2.voiceColor, out var otoEnd)) {
115+
return new Result {
116+
phonemes = new Phoneme[] {
117+
new Phoneme() {
118+
phoneme = vcPhoneme,
119+
position = -vcLen,
120+
},
121+
new Phoneme() {
122+
phoneme = cvOto?.Alias ?? lyric,
123+
},
124+
new Phoneme() {
125+
phoneme = otoEnd.Alias,
126+
position = totalDuration - (totalDuration / 6),
127+
},
128+
},
129+
};
130+
}
131+
} else if (prevNeighbour != null && singer.TryGetMappedOto(vcPhoneme, prevNeighbour.Value.tone + attr0.toneShift, attr0.voiceColor, out var vcOto2)) {
132+
vcPhoneme = vcOto2.Alias;
133+
// automatically add ending if present
134+
if (singer.TryGetMappedOto($"{currVowel} R", notes[0].tone + attr2.toneShift, attr2.voiceColor, out var otoEnd)) {
135+
return new Result {
136+
phonemes = new Phoneme[] {
137+
new Phoneme() {
138+
phoneme = vcPhoneme,
139+
position = -vcLen,
140+
},
141+
new Phoneme() {
142+
phoneme = cvOto?.Alias ?? lyric,
143+
},
144+
new Phoneme() {
145+
phoneme = otoEnd.Alias,
146+
position = totalDuration - (totalDuration / 6),
147+
},
148+
},
149+
};
150+
}
151+
} // just base note and ending
152+
if (singer.TryGetMappedOto($"{currVowel} R", notes[0].tone + attr1.toneShift, attr1.voiceColor, out var otoEnd1)) {
153+
return new Result {
154+
phonemes = new Phoneme[] {
155+
new Phoneme() {
156+
phoneme = cvOtoSimple?.Alias ?? lyric,
157+
},
158+
new Phoneme() {
159+
phoneme = otoEnd1.Alias,
160+
position = totalDuration - (totalDuration / 6),
161+
},
162+
},
163+
};
164+
}
165+
}
166+
}
167+
168+
if (singer.TryGetMappedOto(vcPhoneme, notes[0].tone + attr0.toneShift, attr0.voiceColor, out oto)) {
169+
return new Result {
170+
phonemes = new Phoneme[] {
171+
new Phoneme() {
172+
phoneme = vcPhoneme,
173+
position = -vcLen,
174+
},
175+
new Phoneme() {
176+
phoneme = cvOto?.Alias ?? lyric,
177+
},
178+
},
179+
};
180+
}
181+
return MakeSimpleResult(cvOtoSimple?.Alias ?? lyric);
182+
}
183+
184+
public override void SetSinger(USinger singer) {
185+
if (this.singer == singer) {
186+
return;
187+
}
188+
this.singer = singer;
189+
vowels.Clear();
190+
consonants.Clear();
191+
if (this.singer == null) {
192+
return;
193+
}
194+
try {
195+
string file = Path.Combine(singer.Location, "presamp.ini");
196+
using (var reader = new StreamReader(file, singer.TextFileEncoding)) {
197+
var blocks = Ini.ReadBlocks(reader, file, @"\[\w+\]");
198+
var vowelLines = blocks.Find(block => block.header == "[VOWEL]").lines;
199+
foreach (var iniLine in vowelLines) {
200+
var parts = iniLine.line.Split('=');
201+
if (parts.Length >= 3) {
202+
string vowelLower = parts[0];
203+
string vowelUpper = parts[1];
204+
string[] sounds = parts[2].Split(',');
205+
foreach (var sound in sounds) {
206+
vowels[sound] = vowelLower;
207+
}
208+
}
209+
}
210+
var consonantLines = blocks.Find(block => block.header == "[CONSONANT]").lines;
211+
foreach (var iniLine in consonantLines) {
212+
var parts = iniLine.line.Split('=');
213+
if (parts.Length >= 3) {
214+
string consonant = parts[0];
215+
string[] sounds = parts[1].Split(',');
216+
foreach (var sound in sounds) {
217+
consonants[sound] = consonant;
218+
}
219+
}
220+
}
221+
var priority = blocks.Find(block => block.header == "PRIORITY");
222+
var replace = blocks.Find(block => block.header == "REPLACE");
223+
var alias = blocks.Find(block => block.header == "ALIAS");
224+
}
225+
} catch (Exception e) {
226+
Log.Error(e, "failed to load presamp.ini");
227+
}
228+
}
229+
230+
/// <summary>
231+
/// Converts hanzi notes to jyutping phonemes.
232+
/// </summary>
233+
/// <param name="groups"></param>
234+
public override void SetUp(Note[][] groups) {
235+
JyutpingConversion.RomanizeNotes(groups);
236+
}
237+
238+
/// <summary>
239+
/// Converts hanzi to jyutping, based on G2P.
240+
/// </summary>
241+
public class JyutpingConversion {
242+
public static Note[] ChangeLyric(Note[] group, string lyric) {
243+
var oldNote = group[0];
244+
group[0] = new Note {
245+
lyric = lyric,
246+
phoneticHint = oldNote.phoneticHint,
247+
tone = oldNote.tone,
248+
position = oldNote.position,
249+
duration = oldNote.duration,
250+
phonemeAttributes = oldNote.phonemeAttributes,
251+
};
252+
return group;
253+
}
254+
255+
public static string[] Romanize(IEnumerable<string> lyrics) {
256+
var lyricsArray = lyrics.ToArray();
257+
var hanziLyrics = lyricsArray
258+
.Where(ZhG2p.CantoneseInstance.IsHanzi)
259+
.ToList();
260+
var jyutpingResult = ZhG2p.CantoneseInstance.Convert(hanziLyrics, false, false).ToLower().Split();
261+
if (jyutpingResult == null) {
262+
return lyricsArray;
263+
}
264+
var jyutpingIndex = 0;
265+
for (int i = 0; i < lyricsArray.Length; i++) {
266+
if (lyricsArray[i].Length == 1 && ZhG2p.CantoneseInstance.IsHanzi(lyricsArray[i])) {
267+
lyricsArray[i] = jyutpingResult[jyutpingIndex];
268+
jyutpingIndex++;
269+
}
270+
}
271+
return lyricsArray;
272+
}
273+
274+
public static void RomanizeNotes(Note[][] groups) {
275+
var ResultLyrics = Romanize(groups.Select(group => group[0].lyric));
276+
Enumerable.Zip(groups, ResultLyrics, ChangeLyric).Last();
277+
}
278+
279+
public void SetUp(Note[][] groups) {
280+
RomanizeNotes(groups);
281+
}
282+
}
283+
}
284+
}

0 commit comments

Comments
 (0)