Skip to content

Commit

Permalink
Added tone sandhi for 仔
Browse files Browse the repository at this point in the history
  • Loading branch information
andreihar committed Apr 26, 2024
1 parent c460667 commit 0d231dc
Showing 1 changed file with 19 additions and 4 deletions.
23 changes: 19 additions & 4 deletions taibun/taibun.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,12 +181,26 @@ def __get_mark_tone(self, input, placement, tones):
# Helper to apply tone sandhi to a word
def __tone_sandhi(self, words, last):
sandhi = {'1':'7', '7':'3', '3':'2', '2':'1', '5':'7', 'p4':'p8', 't4':'t8', 'k4':'k8', 'h4':'2', 'p8':'p4', 't8':'t4', 'k8':'k4', 'h8':'3'}
a_sandhi = {'1':'7', '2':'1', '3':'1', '5':'7', 'p4':'p8', 't4':'t8', 'k4':'k8', 'h4':'1', 'p8':'p4', 't8':'t4', 'k8':'k4', 'h8':'7'}
if self.dialect == 'north':
sandhi.update({'5':'3'})
indices = range(len(words)-1) if not last else range(len(words))
sandhi_words = [self.__replacement_tool(sandhi, words[i]) for i in indices]
if not last:
sandhi_words.append(words[-1])
if last == 'a suff':
indices = range(len(words)-2) if len(words) > 1 else range(len(words))
sandhi_words = [self.__replacement_tool(sandhi, words[i]) for i in indices]
if len(words) > 1:
sandhi_words.append(self.__replacement_tool(a_sandhi, words[-2]))
sandhi_words.append(words[-1])
else:
sandhi_words.append(words[-1])
else:
indices = range(len(words)-1) if not last else range(len(words))
sandhi_words = [self.__replacement_tool(sandhi, words[i]) for i in indices]
if not last:
sandhi_words.append(words[-1])
# indices = range(len(words)-1) if not last else range(len(words))
# sandhi_words = [self.__replacement_tool(sandhi, words[i]) for i in indices]
# if not last:
# sandhi_words.append(words[-1])
return sandhi_words


Expand All @@ -197,6 +211,7 @@ def __tone_sandhi_position(self, input):
'incl_last': [(char, True) for char in input],
}
result_list = sandhi_logic.get(self.sandhi, [(char, False if char in self.__no_sandhi else (i < len(input) - 1 and is_cjk(input[i+1]))) for i, char in enumerate(input)])
result_list = sandhi_logic.get(self.sandhi, [(char, "a suff" if len(char) > 1 and char[-1] == "仔" else (False if char in self.__no_sandhi else (i < len(input) - 1 and is_cjk(input[i+1])))) for i, char in enumerate(input)])
for i in range(len(result_list) - 2, -1, -1):
if result_list[i+1][0] in self.__suffixes:
result_list[i] = (result_list[i][0], False)
Expand Down

0 comments on commit 0d231dc

Please sign in to comment.