diff --git a/scriptshifter/tables/data/_cyrillic_base.yml b/scriptshifter/tables/data/_cyrillic_base.yml index 7e3cb0e..ccf074e 100644 --- a/scriptshifter/tables/data/_cyrillic_base.yml +++ b/scriptshifter/tables/data/_cyrillic_base.yml @@ -8,85 +8,426 @@ general: has been left here on purpose, assuming it's valid for all child languages. +# COMMON COMBINING CHARACTERS (always follow a base letter): +# combining grave \u0300 +# combining acute \u0301 +# combining circumflex \u0302 +# combining tilde \u0303 +# combining macron \u0304 +# combining breve \u0306 +# combining dot above \u0307 +# combining diaeresis \u0308 +# combining ring above \u030A +# combining double acute \u030B +# combining caron (hachek) \u030C +# combining candrabindu \u0310 +# combining dot below \u0323 +# combining dieresis below \u0324 +# combining comma below \u0326 (Romanian, Latvian, Livonian) +# combining cedilla \u0327 (French, Turkish, Azeri) +# combining ogonek (hook) \u0328 (Polish, Lithuanian) +# combining low line \u0332 +# combining double low line \u0333 +# combining left ligature \uFE20 (Cyrillic transliteration) +# combining right ligature \uFE21 (Cyrillic transliteration) +# soft sign/prime (spacing) \u02B9(Cyrillic transliteration) +# hard sign/double prime (spacing) \u02BA (Cyrillic transliteration) +# ayn(spacing) \u02BB (Semitic and Caucasian languages) +# alif (spacing) \u02BC (Semitic languages) +# middle dot (space) \u00B7) (Catalan) + roman_to_script: map: - "A": "\u0410" - "a": "\u0430" + "A\uFE20E\uFE21": "\u04D4" + "A\uFE20e\uFE21": "\u04D4" + "a\uFE20e\uFE21": "\u04D5" + "A\u0306\u0323": "\u04D0" + "a\u0306\u0323": "\u04D1" + "\u00C6": "\u04D4" + "\u00E6": "\u04D5" + "A\u0306": "\u04D8" + "a\u0306": "\u04D9" + "A\u030B": "\u04DA" + "a\u030B": "\u04DB" + "A\u0308": "\u04D2" + "a\u0308": "\u04D3" + "A\u0310": "\u0518" + "a\u0310": "\u0519" + "B": "\u0411" "b": "\u0431" - "V": "\u0412" - "v": "\u0432" + + "C\u0301h\u0301": "\u04BE" + "c\u0301h\u0301": "\u04BF" + "C\u0301h": "\u04BC" + "c\u0301h": "\u04BD" + "C\u0301": "\u040B" + "c\u0301": "\u045B" + "C\u0308h": "\u04F4" + "c\u0308h": "\u04F5" + "C\u0323h": "\u04CB" + "c\u0323h": "\u04CC" + + "D\u0301": "\u0502" + "d\u0301": "\u0503" + "D\u0307": "\u0500" + "d\u0307": "\u0501" + "D\uFE20c\uFE21h": "\u052C" + "d\uFE20c\uFE21h": "\u052D" + "D\uFE20z\uFE21h": "\u052A" + "d\uFE20z\uFE21h": "\u052B" + "D\uFE20Z\uFE21": "\u04E0" + "d\uFE20z\uFE21": "\u04E1" + "Dz\u030C": "\u040F" + "dz\u030C": "\u045F" "D": "\u0414" "d": "\u0434" + + "E\u0300": "\u0400" + "e\u0300": "\u0450" + "E\u0304": "\u0404" + "e\u0304": "\u0454" + "E\u0306": "\u04D6" + "e\u0306": "\u04D7" + "E\u0306\u0323": "\u048C" + "e\u0306\u0323": "\u048D" + "E\u0307": "\u042D" + "e\u0307": "\u044D" + "E\u0308\u0323": "\u04EC" + "e\u0308\u0323": "\u04ED" + "E\u0308": "\u0401" + "e\u0308": "\u0451" + "E\u0328": "\u0466" + "e\u0328": "\u0467" + + "F\u0307": "\u0472" + "f\u0307": "\u0473" + "F": "\u0424" + "f": "\u0444" + + "Gh\u0327": "\u04FA" + "gh\u0327": "\u04FB" + "Gh": "\u0492" + "gh": "\u0493" + "G\u0301": "\u0403" + "g\u0301": "\u0453" + "G\u0306": "\u0490" + "g\u0306": "\u0491" + "G\u0307": "\u049C" + "g\u0307": "\u049D" + "G\u0323": "\u04F6" + "g\u0323": "\u04F7" + "G\u0327": "\u0494" + "g\u0327": "\u0495" + + "H\u0304": "\u04FE" + "h\u0304": "\u04FF" + "H\u0327": "\u04FC" + "h\u0327": "\u04FD" + "H\u0307": "\u04BA" + "h\u0307": "\u04BB" + "H\u0308": "\u04C0" + "h\u0308": "\u04CF" + + "I\u0300": "\u040D" + "i\u0300": "\u045D" + "I\u0304\u0323": "\u04E2" + "i\u0304\u0323": "\u04E3" + "I\u0304": "\u0406" + "i\u0304": "\u0456" + "I\u0306\u0323": "\u048A" + "i\u0306\u0323": "\u048B" + "I\u0306": "\u0419" + "i\u0306": "\u0439" + "I\u0308\u0323": "\u04E4" + "i\u0308\u0323": "\u04E5" + "I\u0308": "\u0407" + "i\u0308": "\u0457" + "I\u0310": "\u0408" + "i\u0310": "\u0458" + + "I\uFE20A\uFE21": "\u042F" + "i\uFE20a\uFE21": "\u044F" + "A": "\u0410" + "a": "\u0430" + + "I\uFE20E\uFE21\u0304": "\u0464" + "i\uFE20e\uFE21\u0304": "\u0465" + "I\uFE20E\uFE21\u0328": "\u0468" + "i\uFE20e\uFE21\u0328": "\u0469" + "I\uFE20E\uFE21": "\u0462" + "i\uFE20e\uFE21": "\u0463" "E": "\u0415" "e": "\u0435" - # this conversion shouldn't be needed, but does no harm - "Z": "\u0417" - "z": "\u0437" - "I\u0306": "\u0419" - # this conversion shouldn't be needed, but does no harm + + "I\uFE20O\uFE21\u0328": "\u046C" + "i\uFE20o\uFE21\u0328": "\u046D" "I\uFE20U\uFE21": "\u042E" - # this conversion shouldn't be needed, but does no harm - "I\uFE20u\uFE21": "\u042E" - "I\uFE20A\uFE21": "\u042F" - # this conversion shouldn't be needed, but does no harm - "I\uFE20a\uFE21": "\u042F" - "i\u0306": "\u0439" "i\uFE20u\uFE21": "\u044E" - "i\uFE20a\uFE21": "\u044F" - # this conversion shouldn't be needed, but does no harm - "KH": "\u0425" + "I": "\u0418" + "i": "\u0438" + + "J\u0304": "\u04B8" + "j\u0304": "\u04B9" + "J\u0306": "\u04C1" + "j\u0306": "\u04C2" + "J\u0302": "\u04B6" + "j\u0302": "\u04B7" + "J\u0308": "\u04DC" + "j\u0308": "\u04DD" + + "K\u0300": "\u051E" + "k\u0300": "\u051F" + "K\u0301": "\u040C" + "k\u0301": "\u045C" + "K\uFE20H\uFE21": "\u04B2" + "k\uFE20h\uFE21": "\u04B3" "Kh": "\u0425" - "K": "\u041A" "kh": "\u0445" + "K\uFE20S\uFE21": "\u046E" + "k\uFE20s\uFE21": "\u046F" + "K": "\u041A" "k": "\u043A" + + "Lj": "\u0409" + "lj": "\u0459" + "Lkh\u0307": "\u0514" + "lkh\u0307": "\u0515" + "L\u0301": "\u0508" + "l\u0301": "\u0509" + "L\u0321": "\u04C5" + "l\u0326": "\u04C6" + "L\u0323": "\u052E" + "l\u0323": "\u052F" + "L\u0327": "\u0512" + "l\u0327": "\u0513" + "L\u0324": "\u0520" + "l\u0324": "\u0521" "L": "\u041B" "l": "\u043B" + + "M\u0323": "\u04CD" + "m\u0323": "\u04CE" "M": "\u041C" "m": "\u043C" + + "Nj": "\u040A" + "nj": "\u045A" + "N\u0301G\u0300": "\u04A4" + "n\u0301g\u0300": "\u04A5" + "N\u0301": "\u050A" + "n\u0301": "\u050B" + "N\u0326": "\u0528" + "n\u0326": "\u0529" + "N\u0327": "\u0522" + "n\u0327": "\u0523" + "N\uFE20\u0323G\uFE21": "\u04C9" + "n\uFE20\u0323g\uFE21": "\u04CA" + "N\uFE20\u0327G\uFE21": "\u04C7" + "n\uFE20\u0327g\uFE21": "\u04C8" + "N\uFE20G\uFE21": "\u04A2" + "n\uFE20g\uFE21": "\u04A3" + "No\u0332": "\u2116" "N": "\u041D" "n": "\u043D" + + "G": "\u0413" + "g": "\u0433" + + "J": "\u0496" + "j": "\u0497" + + "O\u0303": "\u047C" + "o\u0303": "\u047D" + "O\u0304\u0323": "\u047A" + "o\u0304\u0323": "\u047B" + "O\u0304\uFE20T\uFE21": "\u047E" + "o\u0304\uFE20t\uFE21": "\u047F" + "O\u0304\u0324": "\u0460" + "o\u0304\u0324": "\u0461" + "O\u0304": "\u04EA" + "o\u0304": "\u04EB" + "O\u0307": "\u04E8" + "o\u0307": "\u04E9" + "O\u0308": "\u04E6" + "o\u0308": "\u04E7" + "O\u0328": "\u046A" + "o\u0328": "\u046B" + "O\uFE20u\uFE21": "\u0478" + "o\uFE20u\uFE21": "\u0479" "O": "\u041E" "o": "\u043E" + + "Ph": "\u04A6" + "ph": "\u04A7" + "P\u0323": "\u0524" + "p\u0323": "\u0525" + "P\uFE20S\uFE21": "\u0470" + "p\uFE20s\uFE21": "\u0471" "P": "\u041F" "p": "\u043F" + + "Q\u0300": "\u04A0" + "q\u0300": "\u04A1" + "Q\u0302": "\u0480" + "q\u0302": "\u0481" + "Q\u0304": "\u049E" + "q\u0304": "\u049F" + "Q\u0307": "\u04C3" + "q\u0307": "\u04C4" + "Q\u0308": "\u051A" + "q\u0308": "\u051B" + "Q": "\u049A" + "q": "\u049B" + + "Rkh\u0307": "\u0516" + "rkh\u0307": "\u0517" + "R\u0306": "\u048E" + "r\u0306": "\u048F" "R": "\u0420" "r": "\u0440" - # this conversion shouldn't be needed, but does no harm - # this conversion shouldn't be needed, but does no harm - "SH": "\u0428" + + "Shch": "\u0429" + "shch": "\u0449" + "Sh\u0323": "\u0526" + "sh\u0323": "\u0527" "Sh": "\u0428" - "S": "\u0421" "sh": "\u0448" + "S\u0301": "\u050C" + "s\u0301": "\u050D" + "S\u0307": "\u0405" + "s\u0307": "\u0455" + + "Ch": "\u0427" + "ch": "\u0447" + "C": "\u0426" + "c": "\u0446" + + "Th": "\u04AA" + "th": "\u04AB" + "T\u0301": "\u050E" + "t\u0301": "\u050F" + "T\u0327": "\u04AC" + "t\u0327": "\u04AD" + "T\uFE20H\uFE21": "\u0498" + "t\uFE20h\uFE21": "\u0499" + "T\uFE20S\uFE21": "\u0426" + "t\uFE20s\uFE21": "\u0446" + "T\uFE20S\uFE21\u0307": "\u04B4" + "t\uFE20s\uFE21\u0307": "\u04B5" + + "S": "\u0421" "s": "\u0441" - # this conversion shouldn't be needed, but does no harm + "T": "\u0422" "t": "\u0442" + + "U\u0302": "\u04B0" + "u\u0302": "\u04B1" + "U\u0304": "\u04EE" + "u\u0304": "\u04EF" + "U\u0306": "\u040E" + "u\u0306": "\u045E" + "U\u0307": "\u04AE" + "u\u0307": "\u04AF" + "U\u0308": "\u04F0" + "u\u0308": "\u04F1" + "U\u030B": "\u04F2" + "u\u030B": "\u04F3" "U": "\u0423" "u": "\u0443" - "F": "\u0424" - "f": "\u0444" - # this conversion shouldn't be needed, but does no harm - "CH": "\u0427" - "Ch": "\u0427" - "ch": "\u0447" - # this conversion shouldn't be needed, but does no harm - "\uFE20": "" - # this conversion shouldn't be needed, but does no harm - "\uFE21": "" - # this conversion is ambiguous - \u042C is also theoretically possible + + "V\u0307": "\u0474" + "v\u0307": "\u0475" + "V\u0308": "\u0476" + "v\u0308": "\u0477" + "V": "\u0412" + "v": "\u0432" + + "W\u0308": "\u051C" + "w\u0308": "\u051D" + "W": "\u04A8" + "w": "\u04A9" + + "X": "\u0058" + "x": "\u0078" + + "Y\u0307": "\u0474" + "y\u0307": "\u0475" + "Y\u0308": "\u04F8" + "y\u0308": "\u04F9" + "Y": "\u042B" + "y": "\u044B" + + "Zh": "\u0416" + "zh": "\u0436" + "Z\u0301": "\u0504" + "z\u0301": "\u0505" + "Z\u0307": "\u0510" + "z\u0307": "\u0511" + "Z\u0308": "\u04DE" + "z\u0308": "\u04DF" + "Z\u0327": "\u0506" + "z\u0327": "\u0507" + "Z": "\u0417" + "z": "\u0437" + + "H": "\u0413" + "h": "\u0433" + + "\u0110": "\u0402" + "\u0111": "\u0452" + "\u02B9\u0333": "\u042C" "\u02B9": "\u044C" + "\u02BA\u0333": "\u042A" + "\u02BA": "\u044A" + "\u0303": "\u0487" + "\u0311": "\u0484" + "\u0313": "\u0486" + "\u0314": "\u0485" + "\u007E": "\u0483" + "(|)": "\u0482" + "(^)": "\u0488" + "(')": "\u0489" + + # Two Less-than signs mapped to Left-pointing double angle quotation mark + "\u003C\u003C": "\u00AB" + # Two Greater-than signs mapped to Right-pointing double angle quotation mark + "\u003E\u003E": "\u00BB" script_to_roman: map: - "\u0404": "I\uFE20E\uFE21" + + # Left-pointing double angle quotation mark mapped to Two Less-than signs + "\u00AB": "\u003C\u003C" + # Right-pointing double angle quotation mark mapped to Two Greater-than signs + "\u00BB": "\u003E\u003E" + "\u2116": "No\u0332" + "\u0400": "E\u0300" + "\u0401": "E\u0308" + "\u0402": "\u0110" + "\u0403": "G\u0301" + "\u0404": "E\u0304" + "\u0405": "S\u0307" + "\u0406": "I\u0304" "\u0407": "I\u0308" + "\u0408": "I\u0310" + "\u0409": "Lj" + "\u040A": "Nj" + "\u040B": "C\u0301" + "\u040C": "K\u0301" + "\u040D": "I\u0300" + "\u040E": "U\u0306" + "\u040F": "Dz\u030C" "\u0410": "A" "\u0411": "B" "\u0412": "V" + "\u0413": "G" "\u0414": "D" "\u0415": "E" + "\u0416": "Zh" "\u0417": "Z" + "\u0418": "I" "\u0419": "I\u0306" "\u041A": "K" "\u041B": "L" @@ -100,18 +441,25 @@ script_to_roman: "\u0423": "U" "\u0424": "F" "\u0425": "Kh" + "\u0426": "T\uFE20S\uFE21" "\u0427": "Ch" "\u0428": "Sh" "\u0429": "Shch" - "\u042C": "\u02B9" + "\u042A": "\u02BA\u0333" + "\u042B": "Y" + "\u042C": "\u02B9\u0333" + "\u042D": "E\u0307" "\u042E": "I\uFE20U\uFE21" "\u042F": "I\uFE20A\uFE21" "\u0430": "a" "\u0431": "b" "\u0432": "v" + "\u0433": "g" "\u0434": "d" "\u0435": "e" + "\u0436": "zh" "\u0437": "z" + "\u0438": "i" "\u0439": "i\u0306" "\u043A": "k" "\u043B": "l" @@ -125,9 +473,237 @@ script_to_roman: "\u0443": "u" "\u0444": "f" "\u0445": "kh" + "\u0446": "t\uFE20s\uFE21" "\u0447": "ch" "\u0448": "sh" "\u0449": "shch" + "\u044A": "\u02BA" + "\u044B": "y" "\u044C": "\u02B9" + "\u044D": "e\u0307" "\u044E": "i\uFE20u\uFE21" "\u044F": "i\uFE20a\uFE21" + "\u0450": "e\u0300" + "\u0451": "e\u0308" + "\u0452": "\u0111" + "\u0453": "g\u0301" + "\u0454": "e\u0304" + "\u0455": "s\u0307" + "\u0456": "i\u0304" + "\u0457": "i\u0308" + "\u0458": "i\u0310" + "\u0459": "lj" + "\u045A": "nj" + "\u045B": "c\u0301" + "\u045C": "k\u0301" + "\u045D": "i\u0300" + "\u045E": "u\u0306" + "\u045F": "dz\u030C" + "\u0460": "O\u0304\u0324" + "\u0461": "o\u0304\u0324" + "\u0462": "I\uFE20E\uFE21" + "\u0463": "i\uFE20e\uFE21" + "\u0464": "I\uFE20E\uFE21\u0304" + "\u0465": "i\uFE20e\uFE21\u0304" + "\u0466": "E\u0328" + "\u0467": "e\u0328" + "\u0468": "I\uFE20E\uFE21\u0328" + "\u0469": "i\uFE20e\uFE21\u0328" + "\u046A": "O\u0328" + "\u046B": "o\u0328" + "\u046C": "I\uFE20O\uFE21\u0328" + "\u046D": "i\uFE20o\uFE21\u0328" + "\u046E": "K\uFE20S\uFE21" + "\u046F": "k\uFE20s\uFE21" + "\u0470": "P\uFE20S\uFE21" + "\u0471": "p\uFE20s\uFE21" + "\u0472": "F\u0307" + "\u0473": "f\u0307" + "\u0474": "V\u0307" + "\u0475": "v\u0307" + "\u0476": "V\u0308" + "\u0477": "v\u0308" + "\u0478": "O\uFE20u\uFE21" + "\u0479": "o\uFE20u\uFE21" + "\u047A": "O\u0304\u0323" + "\u047B": "o\u0304\u0323" + "\u047C": "O\u0303" + "\u047D": "o\u0303" + "\u047E": "O\u0304\uFE20T\uFE21" + "\u047F": "o\u0304\uFE20t\uFE21" + "\u0480": "Q\u0302" + "\u0481": "q\u0302" + "\u0482": "(|)" + "\u0483": "\u007E" + "\u0484": "\u0311" + "\u0485": "\u0314" + "\u0486": "\u0313" + "\u0487": "\u0303" + "\u0488": "(^)" + "\u0489": "(')" + "\u048A": "I\u0306\u0323" + "\u048B": "i\u0306\u0323" + "\u048C": "E\u0306\u0323" + "\u048D": "e\u0306\u0323" + "\u048E": "R\u0306" + "\u048F": "r\u0306" + "\u0490": "G\u0306" + "\u0491": "g\u0306" + "\u0492": "Gh" + "\u0493": "gh" + "\u0494": "G\u0327" + "\u0495": "g\u0327" + "\u0496": "J" + "\u0497": "j" + "\u0498": "T\uFE20H\uFE21" + "\u0499": "t\uFE20h\uFE21" + "\u049A": "Q" + "\u049B": "q" + "\u049C": "G\u0307" + "\u049D": "g\u0307" + "\u049E": "Q\u0304" + "\u049F": "q\u0304" + "\u04A0": "Q\u0300" + "\u04A1": "q\u0300" + "\u04A2": "N\uFE20G\uFE21" + "\u04A3": "n\uFE20g\uFE21" + "\u04A4": "N\u0301G\u0300" + "\u04A5": "n\u0301g\u0300" + "\u04A6": "Ph" + "\u04A7": "ph" + "\u04A8": "W" + "\u04A9": "w" + "\u04AA": "Th" + "\u04AB": "th" + "\u04AC": "T\u0327" + "\u04AD": "t\u0327" + "\u04AE": "U\u0307" + "\u04AF": "u\u0307" + "\u04B0": "U\u0302" + "\u04B1": "u\u0302" + "\u04B2": "K\uFE20H\uFE21" + "\u04B3": "k\uFE20h\uFE21" + "\u04B4": "T\uFE20S\uFE21\u0307" + "\u04B5": "t\uFE20s\uFE21\u0307" + "\u04B6": "J\u0302" + "\u04B7": "j\u0302" + "\u04B8": "J\u0304" + "\u04B9": "j\u0304" + "\u04BA": "H\u0307" + "\u04BB": "h\u0307" + "\u04BC": "C\u0301h" + "\u04BD": "c\u0301h" + "\u04BE": "C\u0301h\u0301" + "\u04BF": "c\u0301h\u0301" + "\u04C0": "H\u0308" + "\u04C1": "J\u0306" + "\u04C2": "j\u0306" + "\u04C3": "Q\u0307" + "\u04C4": "q\u0307" + "\u04C5": "L\u0326" + "\u04C6": "l\u0326" + "\u04C7": "N\uFE20\u0327G\uFE21" + "\u04C8": "n\uFE20\u0327g\uFE21" + "\u04C9": "N\uFE20\u0323G\uFE21" + "\u04CA": "n\uFE20\u0323g\uFE21" + "\u04CB": "C\u0323h" + "\u04CC": "c\u0323h" + "\u04CD": "M\u0323" + "\u04CE": "m\u0323" + "\u04CF": "h\u0308" + "\u04D0": "A\u0306\u0323" + "\u04D1": "a\u0306\u0323" + "\u04D2": "A\u0308" + "\u04D3": "a\u0308" + "\u04D4": "\u00C6" + "\u04D5": "\u00E6" + "\u04D6": "E\u0306" + "\u04D7": "e\u0306" + "\u04D8": "A\u0306" + "\u04D9": "a\u0306" + "\u04DA": "A\u030B" + "\u04DB": "a\u030B" + "\u04DC": "J\u0308" + "\u04DD": "j\u0308" + "\u04DE": "Z\u0308" + "\u04DF": "z\u0308" + "\u04E0": "D\uFE20Z\uFE21" + "\u04E1": "d\uFE20z\uFE21" + "\u04E2": "I\u0304\u0323" + "\u04E3": "i\u0304\u0323" + "\u04E4": "I\u0308\u0323" + "\u04E5": "i\u0308\u0323" + "\u04E6": "O\u0308" + "\u04E7": "o\u0308" + "\u04E8": "O\u0307" + "\u04E9": "o\u0307" + "\u04EA": "O\u0304" + "\u04EB": "o\u0304" + "\u04EC": "E\u0308\u0323" + "\u04ED": "e\u0308\u0323" + "\u04EE": "U\u0304" + "\u04EF": "u\u0304" + "\u04F0": "U\u0308" + "\u04F1": "u\u0308" + "\u04F2": "U\u030B" + "\u04F3": "u\u030B" + "\u04F4": "C\u0308h" + "\u04F5": "c\u0308h" + "\u04F6": "G\u0323" + "\u04F7": "g\u0323" + "\u04F8": "Y\u0308" + "\u04F9": "y\u0308" + "\u04FA": "Gh\u0327" + "\u04FB": "gh\u0327" + "\u04FC": "H\u0327" + "\u04FD": "h\u0327" + "\u04FE": "H\u0304" + "\u04FF": "h\u0304" + "\u0500": "D\u0307" + "\u0501": "d\u0307" + "\u0502": "D\u0301" + "\u0503": "d\u0301" + "\u0504": "Z\u0301" + "\u0505": "z\u0301" + "\u0506": "Z\u0327" + "\u0507": "z\u0327" + "\u0508": "L\u0301" + "\u0509": "l\u0301" + "\u050A": "N\u0301" + "\u050B": "n\u0301" + "\u050C": "S\u0301" + "\u050D": "s\u0301" + "\u050E": "T\u0301" + "\u050F": "t\u0301" + "\u0510": "Z\u0307" + "\u0511": "z\u0307" + "\u0512": "L\u0327" + "\u0513": "l\u0327" + "\u0514": "Lkh\u0307" + "\u0515": "lkh\u0307" + "\u0516": "Rkh\u0307" + "\u0517": "rkh\u0307" + "\u0518": "A\u0310" + "\u0519": "a\u0310" + "\u051A": "Q\u0308" + "\u051B": "q\u0308" + "\u051C": "W\u0308" + "\u051D": "w\u0308" + "\u051E": "K\u0300" + "\u051F": "k\u0300" + "\u0520": "L\u0324" + "\u0521": "l\u0324" + "\u0522": "N\u0327" + "\u0523": "n\u0327" + "\u0524": "P\u0323" + "\u0525": "p\u0323" + "\u0526": "Sh\u0323" + "\u0527": "sh\u0323" + "\u0528": "N\u0326" + "\u0529": "n\u0326" + "\u052A": "D\uFE20z\uFE21h" + "\u052B": "d\uFE20z\uFE21h" + "\u052C": "D\uFE20c\uFE21h" + "\u052D": "d\uFE20c\uFE21h" + "\u052E": "L\u0323" + "\u052F": "l\u0323" diff --git a/scriptshifter/tables/data/asian_cyrillic.yml b/scriptshifter/tables/data/asian_cyrillic.yml index b58ebec..4cfb251 100644 --- a/scriptshifter/tables/data/asian_cyrillic.yml +++ b/scriptshifter/tables/data/asian_cyrillic.yml @@ -1,5 +1,5 @@ general: - name: Asian (Cyrillic) + name: Cyrillic (Generic) parents: - _cyrillic_base @@ -384,15 +384,19 @@ roman_to_script: "(|)": "\u0482" "(^)": "\u0488" "(')": "\u0489" - + + # Two Less-than signs mapped to Left-pointing double angle quotation mark "\u003C\u003C": "\u00AB" + # Two Greater-than signs mapped to Right-pointing double angle quotation mark "\u003E\u003E": "\u00BB" script_to_roman: map: - "\u00AB": "\"" - "\u00BB": "\"" + # Left-pointing double angle quotation mark mapped to Two Less-than signs + "\u00AB": "\u003C\u003C" + # Right-pointing double angle quotation mark mapped to Two Greater-than signs + "\u00BB": "\u003E\u003E" "\u2116": "No\u0332" "\u0400": "E\u0300" "\u0401": "E\u0308" diff --git a/scriptshifter/tables/data/bulgarian.yml b/scriptshifter/tables/data/bulgarian.yml index 147f571..c3a207c 100644 --- a/scriptshifter/tables/data/bulgarian.yml +++ b/scriptshifter/tables/data/bulgarian.yml @@ -5,54 +5,34 @@ general: roman_to_script: map: - "G": "\u0413" - "g": "\u0433" - # this conversion shouldn't be needed, but does no harm - "ZH": "\u0416" - "Zh": "\u0416" - "zh": "\u0436" - "I\uFE20E\uFE21": "\u0462" - # this conversion shouldn't be needed, but does no harm - "I\uFE20e\uFE21": "\u0462" - # this conversion shouldn't be needed, but does no harm - # this conversion shouldn't be needed, but does no harm - "I": "\u0418" - "i\uFE20e\uFE21": "\u0463" - "i": "\u0438" - # this conversion shouldn't be needed, but does no harm "SHT": "\u0429" "Sht": "\u0429" "sht": "\u0449" - "T\uFE20S\uFE21": "\u0426" - # this conversion shouldn't be needed, but does no harm - "T\uFE20s\uFE21": "\u0426" - "t\uFE20s\uFE21": "\u0446" - "U\u0310": "\u046A" + "U\u0306": "\u042A" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u016C": "\u042A" "u\u0306": "\u044A" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u016D": "\u044A" + "U\u0310": "\u046A" "u\u0310": "\u046B" # this conversion is ambiguous - \u042A is also theoretically possible "\u02BA": "\u044A" + # upper case hard sign is unlikely to occur + "\u02BA\u0332": "\u042A" script_to_roman: map: - "\u044C": "" - "\u042C": "" - "\u044A": "" - "\u042A%": "u\u0306" - "\u042A": "" - "\u0413": "G" - "\u0433": "g" - "\u0416": "Zh" - "\u0436": "zh" - "\u0462": "I\uFE20E\uFE21" - "\u0418": "I" - "\u0463": "i\uFE20e\uFE21" - "\u0438": "i" "\u0429": "Sht" + "\u042A": "U\u0306" + # Capital letter hard sign at the end of a word (rare) + "%\u042A": "\u02BA\u0332" + "\u042C": "\u02B9\u0332" "\u0449": "sht" - "\u0426": "T\uFE20S\uFE21" - "\u0446": "t\uFE20s\uFE21" + "\u044A": "u\u0306" + # Small letter hard sign at the end of a word (rare) + "%\u044A": "\u02BA" + "\u044C": "\u02B9" "\u046A": "U\u0310" "\u046B": "u\u0310" - "\u042A": "u\u016C" - "\u044A": "u\u016D" + diff --git a/scriptshifter/tables/data/cyrillic_generic.yml b/scriptshifter/tables/data/cyrillic_generic.yml new file mode 100644 index 0000000..4cfb251 --- /dev/null +++ b/scriptshifter/tables/data/cyrillic_generic.yml @@ -0,0 +1,704 @@ +general: + name: Cyrillic (Generic) + parents: + - _cyrillic_base + +# COMMON COMBINING CHARACTERS (always follow a base letter): +# combining grave \u0300 +# combining acute \u0301 +# combining circumflex \u0302 +# combining tilde \u0303 +# combining macron \u0304 +# combining breve \u0306 +# combining dot above \u0307 +# combining diaeresis \u0308 +# combining ring above \u030A +# combining double acute \u030B +# combining caron (hachek) \u030C +# combining candrabindu \u0310 +# combining dot below \u0323 +# combining dieresis below \u0324 +# combining comma below \u0326 (Romanian, Latvian, Livonian) +# combining cedilla \u0327 (French, Turkish, Azeri) +# combining ogonek (hook) \u0328 (Polish, Lithuanian) +# combining low line \u0332 +# combining double low line \u0333 +# combining left ligature \uFE20 (Cyrillic transliteration) +# combining right ligature \uFE21 (Cyrillic transliteration) +# soft sign/prime (spacing) \u02B9(Cyrillic transliteration) +# hard sign/double prime (spacing) \u02BA (Cyrillic transliteration) +# ayn(spacing) \u02BB (Semitic and Caucasian languages) +# alif (spacing) \u02BC (Semitic languages) +# middle dot (space) \u00B7) (Catalan) + +roman_to_script: + map: + "A\uFE20E\uFE21": "\u04D4" + "A\uFE20e\uFE21": "\u04D4" + "a\uFE20e\uFE21": "\u04D5" + "A\u0306\u0323": "\u04D0" + "a\u0306\u0323": "\u04D1" + "\u00C6": "\u04D4" + "\u00E6": "\u04D5" + "A\u0306": "\u04D8" + "a\u0306": "\u04D9" + "A\u030B": "\u04DA" + "a\u030B": "\u04DB" + "A\u0308": "\u04D2" + "a\u0308": "\u04D3" + "A\u0310": "\u0518" + "a\u0310": "\u0519" + + "B": "\u0411" + "b": "\u0431" + + "C\u0301h\u0301": "\u04BE" + "c\u0301h\u0301": "\u04BF" + "C\u0301h": "\u04BC" + "c\u0301h": "\u04BD" + "C\u0301": "\u040B" + "c\u0301": "\u045B" + "C\u0308h": "\u04F4" + "c\u0308h": "\u04F5" + "C\u0323h": "\u04CB" + "c\u0323h": "\u04CC" + + "D\u0301": "\u0502" + "d\u0301": "\u0503" + "D\u0307": "\u0500" + "d\u0307": "\u0501" + "D\uFE20c\uFE21h": "\u052C" + "d\uFE20c\uFE21h": "\u052D" + "D\uFE20z\uFE21h": "\u052A" + "d\uFE20z\uFE21h": "\u052B" + "D\uFE20Z\uFE21": "\u04E0" + "d\uFE20z\uFE21": "\u04E1" + "Dz\u030C": "\u040F" + "dz\u030C": "\u045F" + "D": "\u0414" + "d": "\u0434" + + "E\u0300": "\u0400" + "e\u0300": "\u0450" + "E\u0304": "\u0404" + "e\u0304": "\u0454" + "E\u0306": "\u04D6" + "e\u0306": "\u04D7" + "E\u0306\u0323": "\u048C" + "e\u0306\u0323": "\u048D" + "E\u0307": "\u042D" + "e\u0307": "\u044D" + "E\u0308\u0323": "\u04EC" + "e\u0308\u0323": "\u04ED" + "E\u0308": "\u0401" + "e\u0308": "\u0451" + "E\u0328": "\u0466" + "e\u0328": "\u0467" + + "F\u0307": "\u0472" + "f\u0307": "\u0473" + "F": "\u0424" + "f": "\u0444" + + "Gh\u0327": "\u04FA" + "gh\u0327": "\u04FB" + "Gh": "\u0492" + "gh": "\u0493" + "G\u0301": "\u0403" + "g\u0301": "\u0453" + "G\u0306": "\u0490" + "g\u0306": "\u0491" + "G\u0307": "\u049C" + "g\u0307": "\u049D" + "G\u0323": "\u04F6" + "g\u0323": "\u04F7" + "G\u0327": "\u0494" + "g\u0327": "\u0495" + + "H\u0304": "\u04FE" + "h\u0304": "\u04FF" + "H\u0327": "\u04FC" + "h\u0327": "\u04FD" + "H\u0307": "\u04BA" + "h\u0307": "\u04BB" + "H\u0308": "\u04C0" + "h\u0308": "\u04CF" + + "I\u0300": "\u040D" + "i\u0300": "\u045D" + "I\u0304\u0323": "\u04E2" + "i\u0304\u0323": "\u04E3" + "I\u0304": "\u0406" + "i\u0304": "\u0456" + "I\u0306\u0323": "\u048A" + "i\u0306\u0323": "\u048B" + "I\u0306": "\u0419" + "i\u0306": "\u0439" + "I\u0308\u0323": "\u04E4" + "i\u0308\u0323": "\u04E5" + "I\u0308": "\u0407" + "i\u0308": "\u0457" + "I\u0310": "\u0408" + "i\u0310": "\u0458" + + "I\uFE20A\uFE21": "\u042F" + "i\uFE20a\uFE21": "\u044F" + "A": "\u0410" + "a": "\u0430" + + "I\uFE20E\uFE21\u0304": "\u0464" + "i\uFE20e\uFE21\u0304": "\u0465" + "I\uFE20E\uFE21\u0328": "\u0468" + "i\uFE20e\uFE21\u0328": "\u0469" + "I\uFE20E\uFE21": "\u0462" + "i\uFE20e\uFE21": "\u0463" + "E": "\u0415" + "e": "\u0435" + + "I\uFE20O\uFE21\u0328": "\u046C" + "i\uFE20o\uFE21\u0328": "\u046D" + "I\uFE20U\uFE21": "\u042E" + "i\uFE20u\uFE21": "\u044E" + "I": "\u0418" + "i": "\u0438" + + "J\u0304": "\u04B8" + "j\u0304": "\u04B9" + "J\u0306": "\u04C1" + "j\u0306": "\u04C2" + "J\u0302": "\u04B6" + "j\u0302": "\u04B7" + "J\u0308": "\u04DC" + "j\u0308": "\u04DD" + + "K\u0300": "\u051E" + "k\u0300": "\u051F" + "K\u0301": "\u040C" + "k\u0301": "\u045C" + "K\uFE20H\uFE21": "\u04B2" + "k\uFE20h\uFE21": "\u04B3" + "Kh": "\u0425" + "kh": "\u0445" + "K\uFE20S\uFE21": "\u046E" + "k\uFE20s\uFE21": "\u046F" + "K": "\u041A" + "k": "\u043A" + + "Lj": "\u0409" + "lj": "\u0459" + "Lkh\u0307": "\u0514" + "lkh\u0307": "\u0515" + "L\u0301": "\u0508" + "l\u0301": "\u0509" + "L\u0321": "\u04C5" + "l\u0326": "\u04C6" + "L\u0323": "\u052E" + "l\u0323": "\u052F" + "L\u0327": "\u0512" + "l\u0327": "\u0513" + "L\u0324": "\u0520" + "l\u0324": "\u0521" + "L": "\u041B" + "l": "\u043B" + + "M\u0323": "\u04CD" + "m\u0323": "\u04CE" + "M": "\u041C" + "m": "\u043C" + + "Nj": "\u040A" + "nj": "\u045A" + "N\u0301G\u0300": "\u04A4" + "n\u0301g\u0300": "\u04A5" + "N\u0301": "\u050A" + "n\u0301": "\u050B" + "N\u0326": "\u0528" + "n\u0326": "\u0529" + "N\u0327": "\u0522" + "n\u0327": "\u0523" + "N\uFE20\u0323G\uFE21": "\u04C9" + "n\uFE20\u0323g\uFE21": "\u04CA" + "N\uFE20\u0327G\uFE21": "\u04C7" + "n\uFE20\u0327g\uFE21": "\u04C8" + "N\uFE20G\uFE21": "\u04A2" + "n\uFE20g\uFE21": "\u04A3" + "No\u0332": "\u2116" + "N": "\u041D" + "n": "\u043D" + + "G": "\u0413" + "g": "\u0433" + + "J": "\u0496" + "j": "\u0497" + + "O\u0303": "\u047C" + "o\u0303": "\u047D" + "O\u0304\u0323": "\u047A" + "o\u0304\u0323": "\u047B" + "O\u0304\uFE20T\uFE21": "\u047E" + "o\u0304\uFE20t\uFE21": "\u047F" + "O\u0304\u0324": "\u0460" + "o\u0304\u0324": "\u0461" + "O\u0304": "\u04EA" + "o\u0304": "\u04EB" + "O\u0307": "\u04E8" + "o\u0307": "\u04E9" + "O\u0308": "\u04E6" + "o\u0308": "\u04E7" + "O\u0328": "\u046A" + "o\u0328": "\u046B" + "O\uFE20u\uFE21": "\u0478" + "o\uFE20u\uFE21": "\u0479" + "O": "\u041E" + "o": "\u043E" + + "Ph": "\u04A6" + "ph": "\u04A7" + "P\u0323": "\u0524" + "p\u0323": "\u0525" + "P\uFE20S\uFE21": "\u0470" + "p\uFE20s\uFE21": "\u0471" + "P": "\u041F" + "p": "\u043F" + + "Q\u0300": "\u04A0" + "q\u0300": "\u04A1" + "Q\u0302": "\u0480" + "q\u0302": "\u0481" + "Q\u0304": "\u049E" + "q\u0304": "\u049F" + "Q\u0307": "\u04C3" + "q\u0307": "\u04C4" + "Q\u0308": "\u051A" + "q\u0308": "\u051B" + "Q": "\u049A" + "q": "\u049B" + + "Rkh\u0307": "\u0516" + "rkh\u0307": "\u0517" + "R\u0306": "\u048E" + "r\u0306": "\u048F" + "R": "\u0420" + "r": "\u0440" + + "Shch": "\u0429" + "shch": "\u0449" + "Sh\u0323": "\u0526" + "sh\u0323": "\u0527" + "Sh": "\u0428" + "sh": "\u0448" + "S\u0301": "\u050C" + "s\u0301": "\u050D" + "S\u0307": "\u0405" + "s\u0307": "\u0455" + + "Ch": "\u0427" + "ch": "\u0447" + "C": "\u0426" + "c": "\u0446" + + "Th": "\u04AA" + "th": "\u04AB" + "T\u0301": "\u050E" + "t\u0301": "\u050F" + "T\u0327": "\u04AC" + "t\u0327": "\u04AD" + "T\uFE20H\uFE21": "\u0498" + "t\uFE20h\uFE21": "\u0499" + "T\uFE20S\uFE21": "\u0426" + "t\uFE20s\uFE21": "\u0446" + "T\uFE20S\uFE21\u0307": "\u04B4" + "t\uFE20s\uFE21\u0307": "\u04B5" + + "S": "\u0421" + "s": "\u0441" + + "T": "\u0422" + "t": "\u0442" + + "U\u0302": "\u04B0" + "u\u0302": "\u04B1" + "U\u0304": "\u04EE" + "u\u0304": "\u04EF" + "U\u0306": "\u040E" + "u\u0306": "\u045E" + "U\u0307": "\u04AE" + "u\u0307": "\u04AF" + "U\u0308": "\u04F0" + "u\u0308": "\u04F1" + "U\u030B": "\u04F2" + "u\u030B": "\u04F3" + "U": "\u0423" + "u": "\u0443" + + "V\u0307": "\u0474" + "v\u0307": "\u0475" + "V\u0308": "\u0476" + "v\u0308": "\u0477" + "V": "\u0412" + "v": "\u0432" + + "W\u0308": "\u051C" + "w\u0308": "\u051D" + "W": "\u04A8" + "w": "\u04A9" + + "X": "\u0058" + "x": "\u0078" + + "Y\u0307": "\u0474" + "y\u0307": "\u0475" + "Y\u0308": "\u04F8" + "y\u0308": "\u04F9" + "Y": "\u042B" + "y": "\u044B" + + "Zh": "\u0416" + "zh": "\u0436" + "Z\u0301": "\u0504" + "z\u0301": "\u0505" + "Z\u0307": "\u0510" + "z\u0307": "\u0511" + "Z\u0308": "\u04DE" + "z\u0308": "\u04DF" + "Z\u0327": "\u0506" + "z\u0327": "\u0507" + "Z": "\u0417" + "z": "\u0437" + + "H": "\u0413" + "h": "\u0433" + + "\u0110": "\u0402" + "\u0111": "\u0452" + "\u02B9\u0333": "\u042C" + "\u02B9": "\u044C" + "\u02BA\u0333": "\u042A" + "\u02BA": "\u044A" + "\u0303": "\u0487" + "\u0311": "\u0484" + "\u0313": "\u0486" + "\u0314": "\u0485" + "\u007E": "\u0483" + "(|)": "\u0482" + "(^)": "\u0488" + "(')": "\u0489" + + # Two Less-than signs mapped to Left-pointing double angle quotation mark + "\u003C\u003C": "\u00AB" + # Two Greater-than signs mapped to Right-pointing double angle quotation mark + "\u003E\u003E": "\u00BB" + +script_to_roman: + map: + + # Left-pointing double angle quotation mark mapped to Two Less-than signs + "\u00AB": "\u003C\u003C" + # Right-pointing double angle quotation mark mapped to Two Greater-than signs + "\u00BB": "\u003E\u003E" + "\u2116": "No\u0332" + "\u0400": "E\u0300" + "\u0401": "E\u0308" + "\u0402": "\u0110" + "\u0403": "G\u0301" + "\u0404": "E\u0304" + "\u0405": "S\u0307" + "\u0406": "I\u0304" + "\u0407": "I\u0308" + "\u0408": "I\u0310" + "\u0409": "Lj" + "\u040A": "Nj" + "\u040B": "C\u0301" + "\u040C": "K\u0301" + "\u040D": "I\u0300" + "\u040E": "U\u0306" + "\u040F": "Dz\u030C" + "\u0410": "A" + "\u0411": "B" + "\u0412": "V" + "\u0413": "G" + "\u0414": "D" + "\u0415": "E" + "\u0416": "Zh" + "\u0417": "Z" + "\u0418": "I" + "\u0419": "I\u0306" + "\u041A": "K" + "\u041B": "L" + "\u041C": "M" + "\u041D": "N" + "\u041E": "O" + "\u041F": "P" + "\u0420": "R" + "\u0421": "S" + "\u0422": "T" + "\u0423": "U" + "\u0424": "F" + "\u0425": "Kh" + "\u0426": "T\uFE20S\uFE21" + "\u0427": "Ch" + "\u0428": "Sh" + "\u0429": "Shch" + "\u042A": "\u02BA\u0333" + "\u042B": "Y" + "\u042C": "\u02B9\u0333" + "\u042D": "E\u0307" + "\u042E": "I\uFE20U\uFE21" + "\u042F": "I\uFE20A\uFE21" + "\u0430": "a" + "\u0431": "b" + "\u0432": "v" + "\u0433": "g" + "\u0434": "d" + "\u0435": "e" + "\u0436": "zh" + "\u0437": "z" + "\u0438": "i" + "\u0439": "i\u0306" + "\u043A": "k" + "\u043B": "l" + "\u043C": "m" + "\u043D": "n" + "\u043E": "o" + "\u043F": "p" + "\u0440": "r" + "\u0441": "s" + "\u0442": "t" + "\u0443": "u" + "\u0444": "f" + "\u0445": "kh" + "\u0446": "t\uFE20s\uFE21" + "\u0447": "ch" + "\u0448": "sh" + "\u0449": "shch" + "\u044A": "\u02BA" + "\u044B": "y" + "\u044C": "\u02B9" + "\u044D": "e\u0307" + "\u044E": "i\uFE20u\uFE21" + "\u044F": "i\uFE20a\uFE21" + "\u0450": "e\u0300" + "\u0451": "e\u0308" + "\u0452": "\u0111" + "\u0453": "g\u0301" + "\u0454": "e\u0304" + "\u0455": "s\u0307" + "\u0456": "i\u0304" + "\u0457": "i\u0308" + "\u0458": "i\u0310" + "\u0459": "lj" + "\u045A": "nj" + "\u045B": "c\u0301" + "\u045C": "k\u0301" + "\u045D": "i\u0300" + "\u045E": "u\u0306" + "\u045F": "dz\u030C" + "\u0460": "O\u0304\u0324" + "\u0461": "o\u0304\u0324" + "\u0462": "I\uFE20E\uFE21" + "\u0463": "i\uFE20e\uFE21" + "\u0464": "I\uFE20E\uFE21\u0304" + "\u0465": "i\uFE20e\uFE21\u0304" + "\u0466": "E\u0328" + "\u0467": "e\u0328" + "\u0468": "I\uFE20E\uFE21\u0328" + "\u0469": "i\uFE20e\uFE21\u0328" + "\u046A": "O\u0328" + "\u046B": "o\u0328" + "\u046C": "I\uFE20O\uFE21\u0328" + "\u046D": "i\uFE20o\uFE21\u0328" + "\u046E": "K\uFE20S\uFE21" + "\u046F": "k\uFE20s\uFE21" + "\u0470": "P\uFE20S\uFE21" + "\u0471": "p\uFE20s\uFE21" + "\u0472": "F\u0307" + "\u0473": "f\u0307" + "\u0474": "V\u0307" + "\u0475": "v\u0307" + "\u0476": "V\u0308" + "\u0477": "v\u0308" + "\u0478": "O\uFE20u\uFE21" + "\u0479": "o\uFE20u\uFE21" + "\u047A": "O\u0304\u0323" + "\u047B": "o\u0304\u0323" + "\u047C": "O\u0303" + "\u047D": "o\u0303" + "\u047E": "O\u0304\uFE20T\uFE21" + "\u047F": "o\u0304\uFE20t\uFE21" + "\u0480": "Q\u0302" + "\u0481": "q\u0302" + "\u0482": "(|)" + "\u0483": "\u007E" + "\u0484": "\u0311" + "\u0485": "\u0314" + "\u0486": "\u0313" + "\u0487": "\u0303" + "\u0488": "(^)" + "\u0489": "(')" + "\u048A": "I\u0306\u0323" + "\u048B": "i\u0306\u0323" + "\u048C": "E\u0306\u0323" + "\u048D": "e\u0306\u0323" + "\u048E": "R\u0306" + "\u048F": "r\u0306" + "\u0490": "G\u0306" + "\u0491": "g\u0306" + "\u0492": "Gh" + "\u0493": "gh" + "\u0494": "G\u0327" + "\u0495": "g\u0327" + "\u0496": "J" + "\u0497": "j" + "\u0498": "T\uFE20H\uFE21" + "\u0499": "t\uFE20h\uFE21" + "\u049A": "Q" + "\u049B": "q" + "\u049C": "G\u0307" + "\u049D": "g\u0307" + "\u049E": "Q\u0304" + "\u049F": "q\u0304" + "\u04A0": "Q\u0300" + "\u04A1": "q\u0300" + "\u04A2": "N\uFE20G\uFE21" + "\u04A3": "n\uFE20g\uFE21" + "\u04A4": "N\u0301G\u0300" + "\u04A5": "n\u0301g\u0300" + "\u04A6": "Ph" + "\u04A7": "ph" + "\u04A8": "W" + "\u04A9": "w" + "\u04AA": "Th" + "\u04AB": "th" + "\u04AC": "T\u0327" + "\u04AD": "t\u0327" + "\u04AE": "U\u0307" + "\u04AF": "u\u0307" + "\u04B0": "U\u0302" + "\u04B1": "u\u0302" + "\u04B2": "K\uFE20H\uFE21" + "\u04B3": "k\uFE20h\uFE21" + "\u04B4": "T\uFE20S\uFE21\u0307" + "\u04B5": "t\uFE20s\uFE21\u0307" + "\u04B6": "J\u0302" + "\u04B7": "j\u0302" + "\u04B8": "J\u0304" + "\u04B9": "j\u0304" + "\u04BA": "H\u0307" + "\u04BB": "h\u0307" + "\u04BC": "C\u0301h" + "\u04BD": "c\u0301h" + "\u04BE": "C\u0301h\u0301" + "\u04BF": "c\u0301h\u0301" + "\u04C0": "H\u0308" + "\u04C1": "J\u0306" + "\u04C2": "j\u0306" + "\u04C3": "Q\u0307" + "\u04C4": "q\u0307" + "\u04C5": "L\u0326" + "\u04C6": "l\u0326" + "\u04C7": "N\uFE20\u0327G\uFE21" + "\u04C8": "n\uFE20\u0327g\uFE21" + "\u04C9": "N\uFE20\u0323G\uFE21" + "\u04CA": "n\uFE20\u0323g\uFE21" + "\u04CB": "C\u0323h" + "\u04CC": "c\u0323h" + "\u04CD": "M\u0323" + "\u04CE": "m\u0323" + "\u04CF": "h\u0308" + "\u04D0": "A\u0306\u0323" + "\u04D1": "a\u0306\u0323" + "\u04D2": "A\u0308" + "\u04D3": "a\u0308" + "\u04D4": "\u00C6" + "\u04D5": "\u00E6" + "\u04D6": "E\u0306" + "\u04D7": "e\u0306" + "\u04D8": "A\u0306" + "\u04D9": "a\u0306" + "\u04DA": "A\u030B" + "\u04DB": "a\u030B" + "\u04DC": "J\u0308" + "\u04DD": "j\u0308" + "\u04DE": "Z\u0308" + "\u04DF": "z\u0308" + "\u04E0": "D\uFE20Z\uFE21" + "\u04E1": "d\uFE20z\uFE21" + "\u04E2": "I\u0304\u0323" + "\u04E3": "i\u0304\u0323" + "\u04E4": "I\u0308\u0323" + "\u04E5": "i\u0308\u0323" + "\u04E6": "O\u0308" + "\u04E7": "o\u0308" + "\u04E8": "O\u0307" + "\u04E9": "o\u0307" + "\u04EA": "O\u0304" + "\u04EB": "o\u0304" + "\u04EC": "E\u0308\u0323" + "\u04ED": "e\u0308\u0323" + "\u04EE": "U\u0304" + "\u04EF": "u\u0304" + "\u04F0": "U\u0308" + "\u04F1": "u\u0308" + "\u04F2": "U\u030B" + "\u04F3": "u\u030B" + "\u04F4": "C\u0308h" + "\u04F5": "c\u0308h" + "\u04F6": "G\u0323" + "\u04F7": "g\u0323" + "\u04F8": "Y\u0308" + "\u04F9": "y\u0308" + "\u04FA": "Gh\u0327" + "\u04FB": "gh\u0327" + "\u04FC": "H\u0327" + "\u04FD": "h\u0327" + "\u04FE": "H\u0304" + "\u04FF": "h\u0304" + "\u0500": "D\u0307" + "\u0501": "d\u0307" + "\u0502": "D\u0301" + "\u0503": "d\u0301" + "\u0504": "Z\u0301" + "\u0505": "z\u0301" + "\u0506": "Z\u0327" + "\u0507": "z\u0327" + "\u0508": "L\u0301" + "\u0509": "l\u0301" + "\u050A": "N\u0301" + "\u050B": "n\u0301" + "\u050C": "S\u0301" + "\u050D": "s\u0301" + "\u050E": "T\u0301" + "\u050F": "t\u0301" + "\u0510": "Z\u0307" + "\u0511": "z\u0307" + "\u0512": "L\u0327" + "\u0513": "l\u0327" + "\u0514": "Lkh\u0307" + "\u0515": "lkh\u0307" + "\u0516": "Rkh\u0307" + "\u0517": "rkh\u0307" + "\u0518": "A\u0310" + "\u0519": "a\u0310" + "\u051A": "Q\u0308" + "\u051B": "q\u0308" + "\u051C": "W\u0308" + "\u051D": "w\u0308" + "\u051E": "K\u0300" + "\u051F": "k\u0300" + "\u0520": "L\u0324" + "\u0521": "l\u0324" + "\u0522": "N\u0327" + "\u0523": "n\u0327" + "\u0524": "P\u0323" + "\u0525": "p\u0323" + "\u0526": "Sh\u0323" + "\u0527": "sh\u0323" + "\u0528": "N\u0326" + "\u0529": "n\u0326" + "\u052A": "D\uFE20z\uFE21h" + "\u052B": "d\uFE20z\uFE21h" + "\u052C": "D\uFE20c\uFE21h" + "\u052D": "d\uFE20c\uFE21h" + "\u052E": "L\u0323" + "\u052F": "l\u0323" diff --git a/scriptshifter/tables/data/index.yml b/scriptshifter/tables/data/index.yml index 9b3be81..75c8970 100644 --- a/scriptshifter/tables/data/index.yml +++ b/scriptshifter/tables/data/index.yml @@ -21,17 +21,6 @@ arabic: armenian: marc_code: arm name: Armenian -asian_cyrillic: - description: 'Multi-purpose transliteration for non-Slavic Cyrillic scripts: Abaza, - Abkhaz, Adygei, Aisor, Altai, Avar, Azeri, Balkar, Bashkir, Buryat, Chechen, Chukchi, - Chuvash, Dargwa, Dungan, Eskimo, Even, Evenki, Gagauz, Ingush, Inuit, Kabardian, - Kalmyk, Karachay, Karachay-Balkar, Karakalpak, Karelian, Khakass, Khanty, Komi, - Komi-Permyak, Koryak, Kumyk, Lak, Lapp, Lezghian, Lithuanian, Mansi, Mari, Moldovan, - Molodstov, Mordvin, Nanai, Nenets, Nivkh, Nogai, Ossetic, Permyak, Romanian, Romany, - Selkup, Shor, Tabasaran, Tat, Tuva, Udekhe, Udmurt, Yakut.' - marc_code: abk, ady, alt, ava, bak, che, chv, dar, ale, esk, kbd, xal, krc, kaa, - krl, kom, kum, lez, lit, chm, nog, oss, rum, rom, sel, udm, sah - name: Asian Cyrillic azerbaijani_cyrillic: marc_code: aze name: Azerbaijani (Cyrillic) @@ -64,6 +53,17 @@ church_slavonic: chuvash_cyrillic: marc_code: chv name: Chuvash (Cyrillic) +cyrillic_generic: + description: 'Multi-purpose transliteration for most languages that use the Cyrillic script: + Abaza, Abkhaz, Adygei, Aisor, Altai, Avar, Azeri, Balkar, Bashkir, Belarusian, Bulgarian, + Buryat, Chechen, Chukchi, Chuvash, Dargwa, Dungan, Eskimo, Even, Evenki, Gagauz, Ingush, + Inuit, Kabardian, Kalmyk, Karachay, Karachay-Balkar, Karakalpak, Karelian, Khakass, Khanty, + Komi, Komi-Permyak, Koryak, Kumyk, Lak, Lapp, Lezghian, Lithuanian, Macedonian, Mansi, Mari, + Moldovan, Molodstov, Mordvin, Nanai, Nenets, Nivkh, Nogai, Ossetic, Permyak, Romanian, Romany, + Russian, Selkup, Serbian, Shor, Tabasaran, Tat, Tuva, Udekhe, Udmurt, Ukrainian, Yakut.' + marc_code: abk, ady, alt, ava, bak, bel, bul, che, chm, chv, dar, ale, esk, kbd, xal, krc, kaa, + krl, kom, kum, lez, lit, mac, nog, oss, rum, rom, sah, sel, srp, udm, ukr + name: Cyrillic (Generic) devanagari: marc_code: hin, san name: Devanagari @@ -261,6 +261,9 @@ thai_alt: tibetan: marc_code: tib name: Tibetan +tod_mongolian: + marc_code: xal + name: Tod Mongolian turkmen_cyrillic: marc_code: tuk name: Turkmen (Cyrillic) @@ -293,3 +296,4 @@ yiddish: name: Yiddish yuit_cyrillic: name: Yuit (Cyrillic) + diff --git a/scriptshifter/tables/data/macedonian.yml b/scriptshifter/tables/data/macedonian.yml index 49728d0..1ee7168 100644 --- a/scriptshifter/tables/data/macedonian.yml +++ b/scriptshifter/tables/data/macedonian.yml @@ -6,45 +6,94 @@ general: roman_to_script: map: "G\u0301": "\u0403" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u01F4": "\u0403" "G": "\u0413" "g\u0301": "\u0453" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u01F5": "\u0453" "g": "\u0433" "\u0110": "\u0402" - # this conversion shouldn't be needed, but does no harm - "DZ\u030C": "\u040F" - # this conversion shouldn't be needed, but does no harm - "DZ": "\u0405" - "Dz\u030C": "\u040F" - "Dz": "\u0405" + "D\uFE20Z\u030C\uFE21": "\040F" + "D\uFE20z\u030C\uFE21": "\040F" + "d\uFE20Z\u030C\uFE21": "\040F" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u01C4": "\u040F" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u01C5": "\u040F" + "d\uFE20z\u030C\uFE21": "\045F" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u01C6": "\u045F" + "D\uFE20Z\uFE21": "\u0405" + "D\uFE20z\uFE21": "\u0405" + "d\uFE20Z\uFE21": "\u0405" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u01F1": "\u405" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u01F2": "\u405" + "d\uFE20z\uFE21": "\u0455" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u01F3": "\u455" "\u0111": "\u0452" "dz\u030C": "\u045F" "dz": "\u0455" "Z\u030C": "\u0416" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u017D": "\u0416" "z\u030C": "\u0436" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u017E": "\u0436" "z": "\u0437" "I": "\u0418" "i": "\u0438" "J": "\u0408" "j": "\u0458" "K\u0301": "\u040C" - "H": "\u0425" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u1E30:\": "\u040C" "k\u0301": "\u045C" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u1E31": "\u045C" + "H": "\u0425" "h": "\u0445" - # this conversion shouldn't be needed, but does no harm "LJ": "\u0409" "Lj": "\u0409" + "lJ": "\u0409" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u01C7": "\u0409" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u01C8": "\u0409" "lj": "\u0459" - # this conversion shouldn't be needed, but does no harm + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u01C9": "\u0459" "NJ": "\u040A" "Nj": "\u040A" + "nJ": "\u040A" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u01CA": "\u040A" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u01CB": "\u040A" "nj": "\u045A" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u01CC": "\u045A" "S\u030C": "\u0428" + "\u0160": "\u0428" "s\u030C": "\u0448" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u0161": "\u0448" "C\u0301": "\u040B" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u0106": "\u040B" "C\u030C": "\u0427" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u010C": "\u0427" "C": "\u0426" "c\u0301": "\u045B" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u0107": "\u045B" "c\u030C": "\u0447" + # Mapping from precomposed non-MARC-8 Latin equivalent + "\u010D": "\u0447" "c": "\u0446" script_to_roman: @@ -57,8 +106,8 @@ script_to_roman: "\u0452": "\u0111" "\u0416": "Z\u030C" "\u0436": "z\u030C" - "\u0405": "Dz" - "\u0455": "dz" + "\u0405": "D\uFE20Z\uFE21" + "\u0455": "d\FE20z\FE21" "\u0418": "I" "\u0438": "i" "\u0408": "J" @@ -79,9 +128,6 @@ script_to_roman: "\u0446": "c" "\u0427": "C\u030C" "\u0447": "c\u030C" - "\u040F": "Dz\u030C" - "\u045F": "dz\u030C" - "\u1029": "D\uFE20Z\uFE21" - "\u0455": "d\uFE20z\uFE21" "\u040F": "D\uFE20Z\u030C\uFE21" "\u045F": "d\uFE20z\u030C\uFE21" + diff --git a/scriptshifter/tables/data/manchu.yml b/scriptshifter/tables/data/manchu.yml new file mode 100644 index 0000000..55edddf --- /dev/null +++ b/scriptshifter/tables/data/manchu.yml @@ -0,0 +1,191 @@ +--- +general: + name: Manchu + parents: + - _ignore_base + case_sensitive: false + +roman_to_script: + + map: + # Generates Narrow No-Break Space + "\u002D": "\u202F" + "Ai": "\u1820\u1873" + "ai": "\u1820\u1873" + "A": "\u1820" + "a": "\u1820" + "E": "\u185D" + "e": "\u185D" + "O": "\u1823" + "o": "\u1823" + "U\u0304": "\u1861" + "u\u0304": "\u1861" + "U\u0308": "\u1861" + "u\u0308": "\u1861" + "U": "\u1860" + "u": "\u1860" + "I": "\u1873" + "i": "\u1873" + "B": "\u182A" + "b": "\u182A" + "Cy": "\u1871\u1873" + "cy": "\u1871\u1873" + "C": "\u1834" + "c": "\u1834" + "DZ": "\u186F" + "Dz": "\u186F" + "dz": "\u186F" + "D": "\u1869" + "d": "\u1869" + "Fa": "\u1876\u1820" + "fa": "\u1876\u1820" + "Fe": "\u1876\u1850" + "fe": "\u1876\u1850" + "Fi": "\u1838\u185E" + "fi": "\u1838\u185E" + "Fo": "\u1838\u1823" + "fo": "\u1838\u1823" + "Fu\u0304": "\u1838\u1861" + "fu\u0304": "\u1838\u1861" + "Fu\u0308": "\u1838\u1861" + "fu\u0308": "\u1838\u1861" + "Fu": "\u1838\u1860" + "fu": "\u1838\u1860" + "F": "\u1838" + "f": "\u1838" + "G\u0027": "\u186C" + "g\u0027": "\u186C" + "G": "\u1864" + "g": "\u1864" + "H\u0027": "\u186D" + "h\u0027": "\u186D" + "H": "\u1865" + "h": "\u1865" + "Jy": "\u1877\u1873" + "jy": "\u1872\u1873" + "J": "\u1835" + "j": "\u1835" + "K\u0027": "\u183A" + "k\u0027": "\u183A" + "K": "\u1874" + "k": "\u1874" + "L": "\u182F" + "l": "\u182F" + "M": "\u182E" + "m": "\u182E" + "NG": "\u1829" + "ng": "\u1829" + "N": "\u1828" + "n": "\u1828" + "P": "\u1866" + "p": "\u1866" + "R": "\u1875" + "r": "\u1875" + "Sy": "\u186E\u185F" + "sy": "\u186E\u185F" + "S\u030C": "\u1867" + "s\u030C": "\u1867" + "S": "\u1830" + "s": "\u1830" + "TS": "\u186E" + "Ts": "\u186E" + "ts": "\u186E" + "T": "\u1868" + "t": "\u1868" + "W": "\u1838" + "w": "\u1838" + "Y": "\u1836" + "y": "\u1836" + "ZH": "\u1877" + "Zh": "\u1877" + "zh": "\u1877" + "Z\u030C": "\u1870" + "z\u030C": "\u1870" + "Z": "\u1853" + "z": "\u1853" + "...": "\u1801" + "..": "\u1803" + ".": "\u180A" + ",": "\u1802" + ":": "\u1804" + # Left pointing double angle quotation mark + "\u003C\u003C": "\u300A" + # Right pointing double angle quotation mark + "\u003E\u003E": "\u300B" + "0": "\u1810" + "1": "\u1811" + "2": "\u1812" + "3": "\u1813" + "4": "\u1814" + "5": "\u1815" + "6": "\u1816" + "7": "\u1817" + "8": "\u1818" + "9": "\u1819" + +script_to_roman: + + map: + # Generates Narrow No-Break Space + "\u202F": "\u002D" + "\u1801": "..." + "\u1802": "," + "\u1803": ".." + "\u1804": ":" + "\u180A": "." + "\u180E": "-" + "\u1810": "0" + "\u1811": "1" + "\u1812": "2" + "\u1813": "3" + "\u1814": "4" + "\u1815": "5" + "\u1816": "6" + "\u1817": "7" + "\u1818": "8" + "\u1819": "9" + "\u1820": "a" + "\u1822\u1822": "i" + "\u1822": "i" + "\u1823": "o" + "\u1828": "n" + "\u1829": "ng" + "\u182A": "b" + "\u182E": "m" + "\u182F": "l" + "\u1830": "s" + "\u1834": "c" + "\u1835": "j" + "\u1836": "y" + "\u1838": "w" + "\u183A": "k\u0027" + "\u1853": "z" + "\u1856": "v" + "\u185D": "e" + "\u1860": "u" + "\u1861": "u\u0304" + "\u1862": "ng" + "\u1864": "g" + "\u1865": "h" + "\u1866": "p" + "\u1867": "s\u030C" + "\u1868": "t" + "\u1869": "d" + "\u186A": "j" + "\u186C": "g\u0027" + "\u186D": "h\u0027" + "\u186E\u185F": "sy" + "\u186E": "ts" + "\u186F": "dz" + "\u1870": "z\u030C" + "\u1871\u1873": "cy" + "\u1877\u1873": "jy" + "\u1873": "i" + "\u1874": "k" + "\u1875": "r" + "\u1876": "f" + "\u202F": "\u002D" + # Left pointing double angle quotation mark + "\u300A": "\u003C\u003C" + # Right pointing double angle quotation mark + "\u300B": " \u003E\u003E" diff --git a/scriptshifter/tables/data/mongolian_mongol_bichig.yml b/scriptshifter/tables/data/mongolian_mongol_bichig.yml index 2cf7839..6b81519 100644 --- a/scriptshifter/tables/data/mongolian_mongol_bichig.yml +++ b/scriptshifter/tables/data/mongolian_mongol_bichig.yml @@ -8,6 +8,10 @@ general: roman_to_script: map: + "\u0020Latin": "\u0020\u182F\u1820\u1832\u180B\u1822\u1828" + "\u0020latin": "\u0020\u182F\u1820\u1832\u180B\u1822\u1828" + "Mate\u0307riyal": "\u182E\u1820\u1832\u180B\u1827\u1837\u1822\u1836\u1820\u182F" + "mate\u0307riyal": "\u182E\u1820\u1832\u180B\u1827\u1837\u1822\u1836\u1820\u182F" "\u002Daca": "\u202F\u1820\u1834\u1820" "\u002DA": "\u180E\u1820" "\u002Da": "\u180E\u1820" @@ -33,6 +37,9 @@ roman_to_script: "u\u0307": "\u1826" "U": "\u1824" "u": "\u1824" + # Feminine g control when followed by l + "nggl": "\u1829\u182D\u180D\u182F" + "ng\u0307": "\u1828\u182D" "NG": "\u1829" "nG": "\u1829" "ng": "\u1829" @@ -72,8 +79,12 @@ roman_to_script: "s\u0301": "\u1831" "S": "\u1830" "s": "\u1830" + "T'": "\u1832\u180B" + "t'": "\u1832\u180B" "T": "\u1832" "t": "\u1832" + "D'": "\u1833\u180B" + "d'": "\u1833\u180B" "D": "\u1833" "d": "\u1833" "J": "\u1835" @@ -109,10 +120,38 @@ roman_to_script: "c": "\u1834" "H": "\u183E" "h": "\u183E" - "-": "\u180E" + "...": "\u1801" + "..": "\u1803" + ".": "\u180A" + ",": "\u1802" + ":": "\u1804" + "-": "\u202F" + "&": "\u180A" + # Spacing grave to Mongolian Free Variation Selector Three + "`": "\u180D" + "'": "\u180B" + "*": "\u00B7" + # hyphen minus to Narrow no-break space + "002D": "\u202F" + # low line to Mongolian vowel separator + "\u005F": "\u180E" + "\u003C\u003C": "\u300A" + "\u003E\u003E": "\u300B" + "0": "\u1810" + "1": "\u1811" + "2": "\u1812" + "3": "\u1813" + "4": "\u1814" + "5": "\u1815" + "6": "\u1816" + "7": "\u1817" + "8": "\u1818" + "9": "\u1819" script_to_roman: map: + "\u0020\u182F\u1820\u1832\u180B\u1822\u1828": "\u0020latin" + "\u182E\u1820\u1832\u180B\u1827\u1837\u1822\u1836\u1820\u182F": "mate\u0307riyal" # ga "\u182D\u1820": "g\u0307a" # go @@ -137,8 +176,6 @@ script_to_roman: "\u182D\u1827": "ge\u0307" # eg "\u1821\u182D": "eg" - # ig - "\u1822\u182D": "ig" # oeg "\u1825\u182D": "o\u0307g" # ueg @@ -146,17 +183,11 @@ script_to_roman: # eeg "\u1827\u182D": "e\u0307g" # qa - "\u182C\u1820": "q\u0307a" + "\u182C\u1820": "qa" # qo - "\u182C\u1823": "q\u0307o" + "\u182C\u1823": "qo" # qu - "\u182C\u1824": "q\u0307u" - # aq (should not occur) - "\u1820\u182C": "aq" - # oq (should not occur) - "\u1823\u182C": "oq" - # uq (should not occur) - "\u1824\u182C": "uq" + "\u182C\u1824": "qu" # ke "\u182C\u1821": "ke" # ki @@ -167,37 +198,29 @@ script_to_roman: "\u182C\u1826": "ku\u0307" # kee "\u182C\u1827": "ke\u0307" - # ek (should not occur) - "\u1821\u182C": "ek" - # ik should not occur) - "\u1822\u182C": "ik" - # oek (should not occur) - "\u1825\u182C": "o\u0307k" - # uek (should not occur) - "\u1826\u182C": "o\u0307k" - # eek should not occur) - "\u1827\u182C": "e\u0307k" # non-connecting vowel a "\u180E\u1820": "\u002Da" + "\u202F\u1820": "\u002Da" # non-connecting vowel e "\u180E\u1821": "\u002De" - # non-connectubg vowel i + "\u202F\u1821": "\u002De" + # non-connecting vowel i "\u180E\u1822": "\u002Di" + "\u202F\u1822": "\u002Di" # Other Mongolian vowel separators to hyphen - "\u180E": "\u002De" + "\u180E": "\u002D" # Narrow no-break space to hyphen "\u202F": "\u002D" - # Other Mongolian vowel NOT associated with g or k/q "\u1801": "..." "\u1802": "," - "\u1803": "." + "\u1803": ".." "\u1804": ":" "\u1805": "*" "\u1806": "-" "\u1807": "\u0020" "\u1808": "," "\u1809": "." - "\u180A": "-" + "\u180A": "." "\u1810": "0" "\u1811": "1" "\u1812": "2" @@ -217,6 +240,8 @@ script_to_roman: "\u1825": "o\u0307" "\u1826": "u\u0307" "\u1827": "e\u0307" + # Feminine g control when followed by l + "\u1829\u182D\u180D\u182F": "nggl" "\u1828": "n" "\u1829": "ng" "\u182A": "b" @@ -245,3 +270,6 @@ script_to_roman: "\u1841": "zh" "\u1842": "ch" "\u1878": "c\u0307" + # low line to Mongolian vowel separator + "\u300A": "\u003C\u003C" + "\u300B": "\u003E\u003E" diff --git a/scriptshifter/tables/data/serbian.yml b/scriptshifter/tables/data/serbian.yml index 67ad28c..bc92df9 100644 --- a/scriptshifter/tables/data/serbian.yml +++ b/scriptshifter/tables/data/serbian.yml @@ -6,45 +6,61 @@ general: roman_to_script: map: "G\u0301": "\u0403" + "\u01F4": "\u0403" "G": "\u0413" "g\u0301": "\u0453" + "\u01f5": "\u0453" "g": "\u0433" "\u0110": "\u0402" - # this conversion shouldn't be needed, but does no harm + # Mapping to non-MARC-8 Capital Latin letter African D: should not occur + "\u0189": "\u0402" "DZ\u030C": "\u040F" - # this conversion shouldn't be needed, but does no harm - "DZ": "\u0405" "Dz\u030C": "\u040F" - "Dz": "\u0405" - "\u0111": "\u0452" + "dZ\u030C": "\u040F" + "\u01C5": "\u040F" "dz\u030C": "\u045F" + "\u01C6": "\u045F" + "\u0111": "\u0452" "dz": "\u0455" "Z\u030C": "\u0416" + "\u017D": "\u0416" "z\u030C": "\u0436" + "\u017E": "\u0436" "z": "\u0437" "I": "\u0418" "i": "\u0438" - "J": "\u0408" - "j": "\u0458" "K\u0301": "\u040C" "H": "\u0425" "k\u0301": "\u045C" "h": "\u0445" - # this conversion shouldn't be needed, but does no harm "LJ": "\u0409" "Lj": "\u0409" + "lJ": "\u0409" + "\u01C8": "\u0409" "lj": "\u0459" - # this conversion shouldn't be needed, but does no harm + "\u01C9": "\u0459" "NJ": "\u040A" "Nj": "\u040A" + "nJ": "\u040A" + "\u01CA": "\u040A" + "\u01CB": "\u040A" "nj": "\u045A" + "\u01CC": "\u045A" + "J": "\u0408" + "j": "\u0458" "S\u030C": "\u0428" + "\u0160": "\u0428" "s\u030C": "\u0448" + "\u0161": "\u0448" "C\u0301": "\u040B" + "\u0106": "\u040B" "C\u030C": "\u0427" + "\u010C": "\u0427" "C": "\u0426" "c\u0301": "\u045B" + "\u0107": "\u0458" "c\u030C": "\u0447" + "\u010D": "\u0447" "c": "\u0446" script_to_roman: diff --git a/scriptshifter/tables/data/tod_mongolian.yml b/scriptshifter/tables/data/tod_mongolian.yml new file mode 100644 index 0000000..ebf4dd0 --- /dev/null +++ b/scriptshifter/tables/data/tod_mongolian.yml @@ -0,0 +1,305 @@ +--- +general: + name: Manchu + parents: + - _ignore_base + case_sensitive: false + +roman_to_script: + + map: + # Generates Narrow No-Break Space + "\u002D": "\u202F" + "\u002Daca": "\u202F\u1820\u1854\u1820" + "\u002DA": "\u180E\u1820" + "\u002Da": "\u180E\u1820" + "A": "\u1820" + "a": "\u1820" + "\u002Dece": "\u202F\u1844\u1854\u1844" + "\u002DE": "\u180E\u1844" + "\u002De": "\u180E\u1844" + # Generates Narrow No-Break Space + "\u002D": "\u202F" + "E": "\u1844" + "e": "\u1844" + "\u002DI": "\u180E\u1847" + "\u002Di": "\u180E\u1847" + "I": "\u1845" + "i": "\u1845" + "O\u0308": "\u1848" + "o\u0308": "\u1848" + "O": "\u1846" + "o": "\u1846" + "U\u0308": "\u1849" + "u\u0308": "\u1849" + "U": "\u1847" + "u": "\u1847" + # n followed by a g with dot + "ng\u0307": "\u1828\u184E" + # this conversion shouldn't be needed, but does no harm + "nG": "\u184A" + "ng": "\u184A" + "N\u0303": "\u185B" + "n\u0303": "\u185B" + "N": "\u1828" + "n": "\u1828" + "B": "\u184B" + "b": "\u184B" + "P": "\u184C" + "p": "\u184C" + "Q": "\u184E" + "q": "\u184E" + "KH": "\u183B" + "Kh": "\u183B" + # this conversion shouldn't be needed, but does no harm + "kH": "\u183B" + "kh": "\u183B" + "K\u0307": "\u1857" + "k\u0307": "\u1857" + "Ka": "\u1857\u1820" + "ka": "\u1857\u1820" + "Ke": "\u184D\u1844" + "ke": "\u184D\u1844" + "Ki": "\u184D\u1845" + "ki": "\u184D\u1845" + "Ko\u0308": "\u184D\u1848" + "ko\u0308": "\u184D\u1848" + "Ko": "\u1857\u1846" + "ko": "\u1857\u1846" + "Ku\u0308": "\u184D\u1849" + "ku\u0308": "\u184D\u1849" + "Ku": "\u1857\u1847" + "ku": "\u1857\u1847" + "K": "\u1857" + "k": "\u1857" + "G\u0307": "\u184E" + "g\u0307": "\u184E" + "G": "\u184E" + "g": "\u184E" + "M": "\u184F" + "m": "\u184F" + "LH": "\u1840" + "Lh": "\u1840" + # this conversion shouldn't be needed, but does no harm + "lH": "\u1840" + "lh": "\u1840" + "L": "\u182F" + "l": "\u182F" + "TS\u0307": "\u1854" + # this conversion shouldn't be needed, but does no harm + "Ts\u0307": "\u1854" + # this conversion shouldn't be needed, but does no harm + "tS\u0307": "\u1854" + "ts\u0307": "\u1854" + "S\u0301": "\u1831" + "s\u0301": "\u1831" + "S": "\u1830" + "s": "\u1830" + "T": "\u1850" + "t": "\u1850" + "D": "\u1851" + "d": "\u1851" + "J\u0301": "\u185A" + "j\u0301": "\u185A" + "J": "\u1853" + "j": "\u1853" + "Y": "\u1855" + "y": "\u1855" + "V": "\u1856" + "v": "\u1856" + "W": "\u1856" + "w": "\u1856" + "F": "\u1839" + "f": "\u1839" + "Xa": "\u184D\u1820" + "xa": "\u184D\u1820" + "Xe": "\u184D\u1844" + "xe": "\u184D\u1844" + "Xi": "\u184D\u1845" + "xi": "\u184D\u1845" + "Xo\u0308": "\u184D\u1848" + "xo\u0308": "\u184D\u1848" + "Xo": "\u184D\u1846" + "xo": "\u184D\u1846" + "Xu\u0308": "\u184D\u1849" + "xu\u0308": "\u184D\u1849" + "Xu": "\u184D\u1847" + "xu": "\u184D\u1847" + "X": "\u184D" + "x": "\u184D" + "Z\u0301": "\u183F" + "z\u0301": "\u183F" + "ZR": "\u183F" + # this conversion shouldn't be needed, but does no harm + "Zr": "\u183F" + # this conversion shouldn't be needed, but does no harm + "zR": "\u183F" + "zr": "\u183F" + "R": "\u1837" + "r": "\u1837" + "ZH": "\u1841" + "Zh": "\u1841" + # this conversion shouldn't be needed, but does no harm + "zH": "\u1841" + "zh": "\u1841" + "CH": "\u1842" + "Ch": "\u1842" + # this conversion shouldn't be needed, but does no harm + "cH": "\u1842" + "ch": "\u1842" + "C\u0307": "\u1878" + "c\u0307": "\u1878" + "C\u0301": "\u183C" + "c\u0301": "\u183C" + "C": "\u1852" + "c": "\u1852" + "H": "\u183E" + "h": "\u183E" + "Z": "\u1834" + "z": "\u1834" + "-": "\u180E" + "...": "\u1801" + "..": "\u1803" + ".": "\u180A" + ",": "\u1802" + ":": "\u1804" + # Left pointing double angle quotation mark + "\u003C\u003C": "\u300A" + # Right pointing double angle quotation mark + "\u003E\u003E": "\u300B" + "0": "\u1810" + "1": "\u1811" + "2": "\u1812" + "3": "\u1813" + "4": "\u1814" + "5": "\u1815" + "6": "\u1816" + "7": "\u1817" + "8": "\u1818" + "9": "\u1819" + "\u0304": "\u1843 + +script_to_roman: + + map: + # ga" + "\u184E\u1820": "g\u0307a" + # go + "\u184E\u1846": "g\u0307o" + # gu + "\u184E\u1847": "g\u0307u" + # aq + "\u1820\u184E": "aq" + # oq + "\u1846\u184E": "oq" + # uq + "\u1847\u184E": "uq" + # ge" + "\u184E\u1844": "ge" + # gi" + "\u184E\u1845": "gi" + # goe" + "\u184E\u1848": "go\u0308" + # gue" + "\u184E\u1849": "gu\u0308" + # eq" + "\u1844\u184E": "eq" + # iq" + "\u1845\u184E": "iq" + # oeq" + "\u1848\u184E": "o\u0308q" + # ueq" + "\u1849\u184E": "u\u0308q" + # xa" + "\u184D\u1820": "xa" + # xo" + "\u184D\u1846": "xo" + # xu" + "\u184D\u1847": "xu" + # ke" + "\u184D\u1844": "ke" + # ki" + "\u184D\u1845": "ki" + # koe" + "\u184D\u1848": "ko\u0308" + # kue" + "\u184D\u1849": "ku\u0308" + # non-connecting vowel a" + "\u180E\u1820": "\u002Da" + # non-connecting vowel e" + "\u180E\u1844": "\u002De" + # non-connecting vowel i" + "\u180E\u1845": "U002Di" + # Other Mongolian vowel separators to hyphen + "\u180E": "\u002D" + # Narrow no-break space to hyphen + "\u202F": "\u002D" + "\u1801": "..." + "\u1803": "." + "\u1802": "," + "\u1804": ":" + "\u1810": "0" + "\u1811": "1" + "\u1812": "2" + "\u1813": "3" + "\u1814": "4" + "\u1815": "5" + "\u1816": "6" + "\u1817": "7" + "\u1818": "8" + "\u1819": "9" + "\u1820": "a" + "\u1828": "n" + "\u182F": "l" + "\u1830": "s" + "\u1831": "s\u0301" + "\u1834": "z" + "\u1837": "r" + "\u1839": "f" + "\u183C": "c\u0301" + "\u183E": "h" + "\u183F": "z\u0301" + "\u1843": "\u0304" + "\u1844": "e" + "\u1845": "i" + "\u1846": "o" + "\u1847": "u" + "\u1848": "o\u0308" + "\u1849": "u\u0308" + "\u184A": "ng" + "\u184B": "b" + "\u184C": "p" + "\u184D\u1820": "xa" + "\u184D\u1844": "xe" + "\u184D\u1845": "xi" + "\u184D\u1848": "xo\u0308" + "\u184D\u1846": "xo" + "\u184D\u1849": "xu\u0308" + "\u184D\u1847": "xu" + "\u184D": "q" + "\u184E": "g" + "\u184F": "m" + "\u1850": "t" + "\u1851": "d" + "\u1852": "c" + "\u1853": "j" + "\u1854": "ts" + "\u1855": "y" + "\u1856": "v" + "\u1857\u1820": "ka" + "\u1857\u1844": "ke" + "\u1857\u1845": "ki" + "\u1857\u1846": "ko" + "\u1857\u1847": "ku" + "\u1857\u1848": "ko\u0308" + "\u1857\u1849": "ku\u0308" + "\u1857": "k" + "\u1858": "g" + "\u1859": "h" + "\u185A": "j\u0301" + "\u185B": "k\u0307" + "\u185C": "j" + "\u00AB": "\u003C\u003C" + "\u00BB": "\u003E\u003E" + "\u300A": "\u0022" + "\u300B": "\u0022"