Skip to content

Commit

Permalink
Merge pull request #671 from PyThaiNLP/fixed-#666-2
Browse files Browse the repository at this point in the history
Fixed #666 again
  • Loading branch information
wannaphong authored May 16, 2022
2 parents bcc596e + 5138b75 commit 34e2e01
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 14 deletions.
2 changes: 1 addition & 1 deletion pythainlp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
__version__ = "3.0.7"
__version__ = "3.0.8"

thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ" # 44 chars

Expand Down
13 changes: 5 additions & 8 deletions pythainlp/tokenize/nercut.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,18 +65,15 @@ def segment(
words.append(combining_word)
combining_word = ""
words.append(curr_word)
else:
else: # if tag is O
combining_word = ""
words.append(curr_word)
if idx + 1 == len(tagged_words):
if (
curr_tag.startswith("B-") or curr_tag == "O"
) and combining_word != "":
if curr_tag.startswith("B-") and combining_word != "":
words.append(combining_word)
combining_word = ""
words.append(curr_word)
else: # if tag is O
combining_word += curr_word
elif curr_tag.startswith("I-") and combining_word != "":
words.append(combining_word)
else:
pass

return words
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 3.0.7
current_version = 3.0.8
commit = True
tag = True
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@

setup(
name="pythainlp",
version="3.0.7",
version="3.0.8",
description="Thai Natural Language Processing library",
long_description=readme,
long_description_content_type="text/markdown",
Expand Down
11 changes: 8 additions & 3 deletions tests/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,9 +610,14 @@ def test_nercut(self):
self.assertEqual(nercut.segment(None), [])
self.assertEqual(nercut.segment(""), [])
self.assertIsNotNone(nercut.segment("ทดสอบ"))
self.assertIsNotNone(nercut.segment("ทุ๊กกโคนน"))
self.assertIsNotNone(nercut.segment("อือหือ"))
self.assertIsNotNone(nercut.segment("อย่าลืมอัพการ์ดนะจ๊ะ"))
self.assertEqual(nercut.segment("ทันแน่ๆ"), ['ทัน', 'แน่ๆ'])
self.assertEqual(nercut.segment("%1ครั้ง"), ['%', '1', 'ครั้ง'])
self.assertEqual(nercut.segment("ทุ๊กกโคนน"), ['ทุ๊กกโคนน'])
self.assertEqual(nercut.segment("อือหือ"), ['อือหือ'])
self.assertEqual(
nercut.segment("อย่าลืมอัพการ์ดนะจ๊ะ"),
['อย่าลืมอัพการ์ดนะจ๊ะ']
)
self.assertIsNotNone(word_tokenize("ทดสอบ", engine="nercut"))

def test_ssg(self):
Expand Down

0 comments on commit 34e2e01

Please sign in to comment.