We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 41558bb commit d1b64a7Copy full SHA for d1b64a7
pythainlp/tokenize/crfcut.py
@@ -204,7 +204,7 @@ def segment(text: str) -> List[str]:
204
if toks[idx].strip().endswith(("!", ".", "?")):
205
labs[idx] = "E"
206
# Spaces or empty strings would no longer be treated as end of sentence.
207
- elif toks[idx].strip() == "":
+ elif (idx == 0 or labs[idx-1] == "E") and toks[idx].strip() == "":
208
labs[idx] = "I"
209
210
sentences = []
0 commit comments