|
| 1 | +using OpenUtau.Api; |
| 2 | +using OpenUtau.Core.G2p; |
| 3 | +using OpenUtau.Core.Ustx; |
| 4 | +using System.Collections.Generic; |
| 5 | +using System.Linq; |
| 6 | + |
| 7 | +namespace OpenUtau.Plugin.Builtin { |
| 8 | + /// <summary> |
| 9 | + /// Cantonese phonemizer for Syo-style banks. |
| 10 | + /// Supports both full jyutping syllables as well as syllable fallbacks without a final consonant or falling diphthong. |
| 11 | + /// Supports hanzi and jyutping input. |
| 12 | + /// </summary> |
| 13 | + [Phonemizer("Cantonese Syo-Style Phonemizer", "ZH-YUE SYO", "Lotte V", language: "ZH-YUE")] |
| 14 | + public class CantoneseSyoPhonemizer : Phonemizer { |
| 15 | + |
| 16 | + /// <summary> |
| 17 | + /// The consonant table. |
| 18 | + /// </summary> |
| 19 | + static readonly string consonants = "b,p,m,f,d,t,n,l,g,k,ng,h,gw,kw,w,z,c,s,j"; |
| 20 | + |
| 21 | + /// <summary> |
| 22 | + /// The vowel split table. |
| 23 | + /// </summary> |
| 24 | + static readonly string vowels = "aap=aa p,aat=aa t,aak=aa k,aam=aa m,aan=aa n,aang=aa ng,aai=aa i,aau=aa u,ap=a p,at=a t,ak=a k,am=a m,an=a n,ang=a ng,ai=a i,au=a u,op=o p,ot=o t,ok=o k,om=o m,on=o n,ong=o ng,oi=o i,ou=o u,oet=oe t,oek=oe k,oeng=oe ng,oei=oe i,eot=eo t,eon=eo n,eoi=eo i,ep=e p,et=e t,ek=e k,em=e m,en=e n,eng=e ng,ei=e i,eu=e u,up=u p,ut=u t,uk=uu k,um=um,un=u n,ung=uu ng,ui=u i,yut=yu t,yun=yu n,ip=i p,it=i t,ik=ii k,im=i m,in=i n,ing=ii ng,iu=i u"; |
| 25 | + |
| 26 | + /// <summary> |
| 27 | + /// Check for vowel substitutes. |
| 28 | + /// </summary> |
| 29 | + static readonly string[] substitution = new string[] { |
| 30 | + "aap,aat,aak,aam,aan,aang,aai,aau=aa", "ap,at,ak,am,an,ang,ai,au=a", "op,ot,ok,om,on,ong,oi,ou=o", "oet,oek,oen,oeng,oei=oe", "eot,eon,eoi=eo","ep,et,ek,em,en,eng,ei,eu=e", "uk,ung=uu", "up,ut,um,un,ui=u", "yut,yun=yu","ik,ing=ii", "ip,it,im,in,iu=i" |
| 31 | + }; |
| 32 | + |
| 33 | + /// <summary> |
| 34 | + /// Check for substitutes for finals. |
| 35 | + /// </summary> |
| 36 | + static readonly string[] finalSub = new string[] { |
| 37 | + "ii ng=i ng", "ii k=i k", "uu k=u k", "uu ng=u ng", "oe t=eo t", "oe i=eo i" |
| 38 | + }; |
| 39 | + |
| 40 | + static HashSet<string> cSet; |
| 41 | + static Dictionary<string, string> vDict; |
| 42 | + static readonly Dictionary<string, string> substituteLookup; |
| 43 | + static readonly Dictionary<string, string> finalSubLookup; |
| 44 | + |
| 45 | + static CantoneseSyoPhonemizer() { |
| 46 | + cSet = new HashSet<string>(consonants.Split(',')); |
| 47 | + vDict = vowels.Split(',') |
| 48 | + .Select(s => s.Split('=')) |
| 49 | + .ToDictionary(a => a[0], a => a[1]); |
| 50 | + substituteLookup = substitution.ToList() |
| 51 | + .SelectMany(line => { |
| 52 | + var parts = line.Split('='); |
| 53 | + return parts[0].Split(',').Select(orig => (orig, parts[1])); |
| 54 | + }) |
| 55 | + .ToDictionary(t => t.Item1, t => t.Item2); |
| 56 | + finalSubLookup = finalSub.ToList() |
| 57 | + .SelectMany(line => { |
| 58 | + var parts = line.Split('='); |
| 59 | + return parts[0].Split(',').Select(orig => (orig, parts[1])); |
| 60 | + }) |
| 61 | + .ToDictionary(t => t.Item1, t => t.Item2); |
| 62 | + } |
| 63 | + |
| 64 | + private USinger singer; |
| 65 | + |
| 66 | + // Simply stores the singer in a field. |
| 67 | + public override void SetSinger(USinger singer) => this.singer = singer; |
| 68 | + |
| 69 | + /// <summary> |
| 70 | + /// Converts hanzi notes to jyutping phonemes. |
| 71 | + /// </summary> |
| 72 | + /// <param name="groups"></param> |
| 73 | + public override void SetUp(Note[][] groups) { |
| 74 | + JyutpingConversion.RomanizeNotes(groups); |
| 75 | + } |
| 76 | + public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevNeighbours) { |
| 77 | + // The overall logic is: |
| 78 | + // 1. Remove consonant: "jyut" -> "yut". |
| 79 | + // 2. Lookup the trailing sound in vowel table: "yut" -> "yu t". |
| 80 | + // 3. Split the total duration and returns "jyut"/"jyu" and "yu t". |
| 81 | + var note = notes[0]; |
| 82 | + var lyric = note.lyric; |
| 83 | + string consonant = string.Empty; |
| 84 | + string vowel = string.Empty; |
| 85 | + |
| 86 | + if (lyric.Length > 2 && cSet.Contains(lyric.Substring(0, 2))) { |
| 87 | + // First try to find consonant "gw", "kw", "ng", and extract vowel. |
| 88 | + consonant = lyric.Substring(0, 2); |
| 89 | + vowel = lyric.Substring(2); |
| 90 | + } else if (lyric.Length > 1 && cSet.Contains(lyric.Substring(0, 1)) && lyric != "ng") { |
| 91 | + // Then try to find single character consonants, and extract vowel. |
| 92 | + consonant = lyric.Substring(0, 1); |
| 93 | + vowel = lyric.Substring(1); |
| 94 | + } else { |
| 95 | + // Otherwise the lyric is a vowel. |
| 96 | + vowel = lyric; |
| 97 | + } |
| 98 | + |
| 99 | + string phoneme0 = lyric; |
| 100 | + |
| 101 | + // Get color |
| 102 | + string color = string.Empty; |
| 103 | + int toneShift = 0; |
| 104 | + int? alt = 0; |
| 105 | + if (note.phonemeAttributes != null) { |
| 106 | + var attr = note.phonemeAttributes.FirstOrDefault(attr0 => attr0.index == 0); |
| 107 | + color = attr.voiceColor; |
| 108 | + toneShift = attr.toneShift; |
| 109 | + alt = attr.alternate; |
| 110 | + } |
| 111 | + |
| 112 | + string fin = $"{vowel} -"; |
| 113 | + // We will need to split the total duration for phonemes, so we compute it here. |
| 114 | + int totalDuration = notes.Sum(n => n.duration); |
| 115 | + // Lookup the vowel split table. For example, "yut" will match "yu t". |
| 116 | + if (vDict.TryGetValue(vowel, out var phoneme1) && !string.IsNullOrEmpty(phoneme1)) { |
| 117 | + // Now phoneme0="jyu" and phoneme1="yu t", |
| 118 | + // try to give "yu t" 120 ticks, but no more than half of the total duration. |
| 119 | + int length1 = 120; |
| 120 | + |
| 121 | + if (length1 > totalDuration / 2) { |
| 122 | + length1 = totalDuration / 2; |
| 123 | + } |
| 124 | + var lyrics = new List<string> { lyric }; |
| 125 | + // find potential substitute symbol |
| 126 | + if (substituteLookup.TryGetValue(vowel ?? string.Empty, out var sub)) { |
| 127 | + if (!string.IsNullOrEmpty(consonant)) { |
| 128 | + lyrics.Add($"{consonant}{sub}"); |
| 129 | + } else { |
| 130 | + lyrics.Add(sub); |
| 131 | + } |
| 132 | + } |
| 133 | + |
| 134 | + // Try initial and then a plain lyric |
| 135 | + if (prevNeighbour == null || (prevNeighbour != null && (prevNeighbour.Value.lyric.EndsWith("p") || prevNeighbour.Value.lyric.EndsWith("t") || prevNeighbour.Value.lyric.EndsWith("k")))) { |
| 136 | + var initial = $"- {lyric}"; |
| 137 | + var initial2 = $"- {lyrics[1]}"; |
| 138 | + var tests = new List<string> { initial, initial2, lyric, lyrics[1] }; |
| 139 | + if (checkOtoUntilHit(tests, note, out var otoInit)) { |
| 140 | + phoneme0 = otoInit.Alias; |
| 141 | + } |
| 142 | + } else { // nothing special necessary |
| 143 | + if (checkOtoUntilHit(lyrics, note, out var otoLyric)) { |
| 144 | + phoneme0 = otoLyric.Alias; |
| 145 | + } |
| 146 | + } |
| 147 | + |
| 148 | + int length2 = 60; |
| 149 | + if (length2 > totalDuration / 2) { |
| 150 | + length2 = totalDuration / 2; |
| 151 | + } |
| 152 | + if (nextNeighbour == null && singer.TryGetMappedOto(fin, note.tone, out _)) { |
| 153 | + // Vowel ending is minimum 60 ticks, maximum half of note |
| 154 | + var finals = new List<string> { fin }; |
| 155 | + if (checkOtoUntilHitFinal(finals, note, out var otoFin)) { |
| 156 | + phoneme1 = otoFin.Alias; |
| 157 | + } |
| 158 | + return new Result { |
| 159 | + phonemes = new Phoneme[] { |
| 160 | + new Phoneme() { |
| 161 | + phoneme = phoneme0, |
| 162 | + }, |
| 163 | + new Phoneme() { |
| 164 | + phoneme = phoneme1, |
| 165 | + position = totalDuration - length2, |
| 166 | + } |
| 167 | + }, |
| 168 | + }; |
| 169 | + } else { |
| 170 | + var tails = new List<string> { phoneme1 }; |
| 171 | + // find potential substitute symbol |
| 172 | + if (finalSubLookup.TryGetValue(phoneme1 ?? string.Empty, out var finSub)) { |
| 173 | + tails.Add(finSub); |
| 174 | + } |
| 175 | + if (checkOtoUntilHitFinal(tails, note, out var otoTail)) { |
| 176 | + phoneme1 = otoTail.Alias; |
| 177 | + } else { |
| 178 | + return MakeSimpleResult(phoneme0); |
| 179 | + } |
| 180 | + } |
| 181 | + |
| 182 | + return new Result { |
| 183 | + phonemes = new Phoneme[] { |
| 184 | + new Phoneme() { |
| 185 | + phoneme = phoneme0, |
| 186 | + }, |
| 187 | + new Phoneme() { |
| 188 | + phoneme = phoneme1, |
| 189 | + position = totalDuration - length1, |
| 190 | + } |
| 191 | + }, |
| 192 | + }; |
| 193 | + } |
| 194 | + |
| 195 | + // Check for vowel ending on open syllables. |
| 196 | + // If a vowel ending does not exist, it will not be inserted. |
| 197 | + if (nextNeighbour == null && string.IsNullOrEmpty(phoneme1) && !string.IsNullOrEmpty(fin)) { |
| 198 | + // Vowel ending is minimum 60 ticks, maximum half of note |
| 199 | + int length1 = 60; |
| 200 | + |
| 201 | + if (length1 > totalDuration / 2) { |
| 202 | + length1 = totalDuration / 2; |
| 203 | + } |
| 204 | + // Try initial and then a plain lyric |
| 205 | + var lyrics = new List<string> { lyric }; |
| 206 | + if (prevNeighbour == null || (prevNeighbour != null && (prevNeighbour.Value.lyric.EndsWith("p") || prevNeighbour.Value.lyric.EndsWith("t") || prevNeighbour.Value.lyric.EndsWith("k")))) { |
| 207 | + var initial = $"- {lyric}"; |
| 208 | + var tests = new List<string> { initial, lyric }; |
| 209 | + if (checkOtoUntilHit(tests, note, out var otoInit)) { |
| 210 | + phoneme0 = otoInit.Alias; |
| 211 | + } |
| 212 | + } else { // nothing special necessary |
| 213 | + if (checkOtoUntilHit(lyrics, note, out var otoLyric)) { |
| 214 | + phoneme0 = otoLyric.Alias; |
| 215 | + } else { |
| 216 | + return MakeSimpleResult(phoneme0); |
| 217 | + } |
| 218 | + } |
| 219 | + |
| 220 | + // Map vowel ending |
| 221 | + var tails = new List<string> { fin }; |
| 222 | + if (checkOtoUntilHitFinal(tails, note, out var otoTail)) { |
| 223 | + fin = otoTail.Alias; |
| 224 | + } else { |
| 225 | + return MakeSimpleResult(phoneme0); |
| 226 | + } |
| 227 | + |
| 228 | + return new Result { |
| 229 | + phonemes = new Phoneme[] { |
| 230 | + new Phoneme() { |
| 231 | + phoneme = phoneme0, |
| 232 | + }, |
| 233 | + new Phoneme() { |
| 234 | + phoneme = fin, |
| 235 | + position = totalDuration - length1, |
| 236 | + } |
| 237 | + }, |
| 238 | + }; |
| 239 | + } |
| 240 | + |
| 241 | + // Try initial and then a plain lyric |
| 242 | + if (prevNeighbour == null || (prevNeighbour != null && (prevNeighbour.Value.lyric.EndsWith("p") || prevNeighbour.Value.lyric.EndsWith("t") || prevNeighbour.Value.lyric.EndsWith("k")))) { |
| 243 | + var simpleInitial = $"- {lyric}"; |
| 244 | + var tests = new List<string> { simpleInitial, lyric }; |
| 245 | + if (checkOtoUntilHit(tests, note, out var otoInit)) { |
| 246 | + phoneme0 = otoInit.Alias; |
| 247 | + } else { |
| 248 | + return MakeSimpleResult(phoneme0); |
| 249 | + } |
| 250 | + } else { // nothing special necessary |
| 251 | + var tests = new List<string> { lyric }; |
| 252 | + if (checkOtoUntilHit(tests, note, out var otoLyric)) { |
| 253 | + phoneme0 = otoLyric.Alias; |
| 254 | + } else { |
| 255 | + return MakeSimpleResult(phoneme0); |
| 256 | + } |
| 257 | + } |
| 258 | + // Not spliting is needed. Return as is. |
| 259 | + return new Result { |
| 260 | + phonemes = new Phoneme[] { |
| 261 | + new Phoneme() { |
| 262 | + phoneme = phoneme0, |
| 263 | + } |
| 264 | + }, |
| 265 | + }; |
| 266 | + |
| 267 | + } |
| 268 | + |
| 269 | + /// <summary> |
| 270 | + /// Converts hanzi to jyutping, based on G2P. |
| 271 | + /// </summary> |
| 272 | + public class JyutpingConversion { |
| 273 | + public static Note[] ChangeLyric(Note[] group, string lyric) { |
| 274 | + var oldNote = group[0]; |
| 275 | + group[0] = new Note { |
| 276 | + lyric = lyric, |
| 277 | + phoneticHint = oldNote.phoneticHint, |
| 278 | + tone = oldNote.tone, |
| 279 | + position = oldNote.position, |
| 280 | + duration = oldNote.duration, |
| 281 | + phonemeAttributes = oldNote.phonemeAttributes, |
| 282 | + }; |
| 283 | + return group; |
| 284 | + } |
| 285 | + |
| 286 | + public static string[] Romanize(IEnumerable<string> lyrics) { |
| 287 | + var lyricsArray = lyrics.ToArray(); |
| 288 | + var hanziLyrics = lyricsArray |
| 289 | + .Where(ZhG2p.CantoneseInstance.IsHanzi) |
| 290 | + .ToList(); |
| 291 | + var jyutpingResult = ZhG2p.CantoneseInstance.Convert(hanziLyrics, false, false).ToLower().Split(); |
| 292 | + if (jyutpingResult == null) { |
| 293 | + return lyricsArray; |
| 294 | + } |
| 295 | + var jyutpingIndex = 0; |
| 296 | + for (int i = 0; i < lyricsArray.Length; i++) { |
| 297 | + if (lyricsArray[i].Length == 1 && ZhG2p.CantoneseInstance.IsHanzi(lyricsArray[i])) { |
| 298 | + lyricsArray[i] = jyutpingResult[jyutpingIndex]; |
| 299 | + jyutpingIndex++; |
| 300 | + } |
| 301 | + } |
| 302 | + return lyricsArray; |
| 303 | + } |
| 304 | + |
| 305 | + public static void RomanizeNotes(Note[][] groups) { |
| 306 | + var ResultLyrics = Romanize(groups.Select(group => group[0].lyric)); |
| 307 | + Enumerable.Zip(groups, ResultLyrics, ChangeLyric).Last(); |
| 308 | + } |
| 309 | + |
| 310 | + public void SetUp(Note[][] groups) { |
| 311 | + RomanizeNotes(groups); |
| 312 | + } |
| 313 | + } |
| 314 | + |
| 315 | + // make it quicker to check multiple oto occurrences at once rather than spamming if else if |
| 316 | + private bool checkOtoUntilHit(List<string> input, Note note, out UOto oto) { |
| 317 | + oto = default; |
| 318 | + var attr = note.phonemeAttributes?.FirstOrDefault(attrCheck => attrCheck.index == 0) ?? default; |
| 319 | + |
| 320 | + var otos = new List<UOto>(); |
| 321 | + foreach (string test in input) { |
| 322 | + if (singer.TryGetMappedOto(test + attr.alternate, note.tone + attr.toneShift, attr.voiceColor, out var otoAlt)) { |
| 323 | + otos.Add(otoAlt); |
| 324 | + } else if (singer.TryGetMappedOto(test, note.tone + attr.toneShift, attr.voiceColor, out var otoCandidacy)) { |
| 325 | + otos.Add(otoCandidacy); |
| 326 | + } |
| 327 | + } |
| 328 | + |
| 329 | + string color = attr.voiceColor ?? ""; |
| 330 | + if (otos.Count > 0) { |
| 331 | + if (otos.Any(otoCheck => (otoCheck.Color ?? string.Empty) == color)) { |
| 332 | + oto = otos.Find(otoCheck => (otoCheck.Color ?? string.Empty) == color); |
| 333 | + return true; |
| 334 | + } else { |
| 335 | + oto = otos.First(); |
| 336 | + return true; |
| 337 | + } |
| 338 | + } |
| 339 | + return false; |
| 340 | + } |
| 341 | + |
| 342 | + // Check for final consonant or vowel ending |
| 343 | + private bool checkOtoUntilHitFinal(List<string> input, Note note, out UOto oto) { |
| 344 | + oto = default; |
| 345 | + var attr = note.phonemeAttributes?.FirstOrDefault(attrCheck => attrCheck.index == 1) ?? default; |
| 346 | + |
| 347 | + var otos = new List<UOto>(); |
| 348 | + foreach (string test in input) { |
| 349 | + if (singer.TryGetMappedOto(test + attr.alternate, note.tone + attr.toneShift, attr.voiceColor, out var otoAlt)) { |
| 350 | + otos.Add(otoAlt); |
| 351 | + } else if (singer.TryGetMappedOto(test, note.tone + attr.toneShift, attr.voiceColor, out var otoCandidacy)) { |
| 352 | + otos.Add(otoCandidacy); |
| 353 | + } |
| 354 | + } |
| 355 | + |
| 356 | + string color = attr.voiceColor ?? ""; |
| 357 | + if (otos.Count > 0) { |
| 358 | + if (otos.Any(otoCheck => (otoCheck.Color ?? string.Empty) == color)) { |
| 359 | + oto = otos.Find(otoCheck => (otoCheck.Color ?? string.Empty) == color); |
| 360 | + return true; |
| 361 | + } else { |
| 362 | + return false; |
| 363 | + } |
| 364 | + } |
| 365 | + return false; |
| 366 | + } |
| 367 | + } |
| 368 | +} |
0 commit comments