Skip to content

Commit

Permalink
Merge pull request #1027 from PyThaiNLP/release-5.0.5
Browse files Browse the repository at this point in the history
PyThaiNLP v5.0.5
  • Loading branch information
wannaphong authored Dec 14, 2024
2 parents 79e5d58 + b804d41 commit 0c20956
Show file tree
Hide file tree
Showing 16 changed files with 175 additions and 70 deletions.
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@ authors:
given-names: "Pattarawat"
orcid: "https://orcid.org/0000-0000-0000-0000"
title: "PyThaiNLP: Thai Natural Language Processing in Python"
version: v5.0.4
version: v5.0.5
license: Apache-2.0
date-released: 2024-06-02
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ PyThaiNLP เป็นไลบารีภาษาไพทอนสำหร
| Version | Description | Status |
|:------:|:--:|:------:|
| [5.0.4](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/788) |
| [5.0.5](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/788) |
| [`dev`](https://github.com/PyThaiNLP/pythainlp/tree/dev) | Release Candidate for 5.1 | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/900) |

## Getting Started
Expand Down
2 changes: 1 addition & 1 deletion README_TH.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ PyThaiNLP เป็นไลบารีภาษาไพทอนสำหร
| รุ่น | คำอธิบาย | สถานะ |
|:------:|:--:|:------:|
| [5.0.4](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/788) |
| [5.0.5](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/788) |
| [`dev`](https://github.com/PyThaiNLP/pythainlp/tree/dev) | Release Candidate for 5.1 | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/900) |

ติดตามพวกเราบน [PyThaiNLP Facebook page](https://www.facebook.com/pythainlp/) เพื่อรับข่าวสารเพิ่มเติม
Expand Down
2 changes: 1 addition & 1 deletion pythainlp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
# SPDX-License-Identifier: Apache-2.0
__version__ = "5.0.4"
__version__ = "5.0.5"

thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ" # 44 chars

Expand Down
6 changes: 2 additions & 4 deletions pythainlp/cls/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,10 @@
pythainlp.cls
Depreciated. Use pythainlp.classify instead.
"""
import warnings

__all__ = ["GzipModel"]

from pythainlp.classify.param_free import GzipModel
from pythainlp.tools import warn_deprecation

warnings.warn(
"Deprecated: Use pythainlp.classify instead.", DeprecationWarning
)
warn_deprecation("pythainlp.cls", "pythainlp.classify", "5.1", "5.2")
15 changes: 10 additions & 5 deletions pythainlp/corpus/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
]

from typing import FrozenSet, List, Union
import warnings

from pythainlp.corpus import get_corpus, get_corpus_as_is, get_corpus_path
from pythainlp.tools import warn_deprecation

_THAI_COUNTRIES: FrozenSet[str] = frozenset()
_THAI_COUNTRIES_FILENAME = "countries_th.txt"
Expand Down Expand Up @@ -56,9 +56,9 @@

_THAI_ORST_WORDS: FrozenSet[str] = frozenset()

_THAI_DICT = {}
_THAI_WSD_DICT = {}
_THAI_SYNONYMS = {}
_THAI_DICT: dict[str, list] = {}
_THAI_WSD_DICT: dict[str, list] = {}
_THAI_SYNONYMS: dict[str, list] = {}


def countries() -> FrozenSet[str]:
Expand Down Expand Up @@ -336,7 +336,12 @@ def thai_synonyms() -> dict:


def thai_synonym() -> dict:
warnings.warn("Deprecated: Use thai_synonyms() instead.", DeprecationWarning)
warn_deprecation(
"pythainlp.corpus.thai_synonym",
"pythainlp.corpus.thai_synonyms",
"5.1",
"5.2",
)
return thai_synonyms()


Expand Down
2 changes: 1 addition & 1 deletion pythainlp/phayathaibert/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ def get_ner(
if pos:
warnings.warn(
"This model doesn't support output \
postag and It doesn't output the postag."
postag and it doesn't output the postag."
)

sample_output = []
Expand Down
17 changes: 8 additions & 9 deletions pythainlp/tokenize/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
"""
Generic functions of tokenizers
"""

import re
import warnings
from typing import Iterable, List, Union

from pythainlp.tokenize import (
Expand All @@ -21,6 +21,7 @@
rejoin_formatted_num,
strip_whitespace,
)
from pythainlp.tools import warn_deprecation
from pythainlp.util.trie import Trie, dict_trie


Expand All @@ -45,13 +46,9 @@ def clause_tokenize(doc: List[str]) -> List[List[str]]:
# ['และ', 'คุณ', 'เล่น', 'มือถือ'],
# ['ส่วน', 'น้อง', 'เขียน', 'โปรแกรม']]
"""
warn_deprecation("pythainlp.util.clause_tokenize", "", "5.0.5", "5.1")
from pythainlp.tokenize.crfcls import segment

warnings.warn(
"""
clause_tokenize is no longer supported \
and will be removed in version 5.1.
""", DeprecationWarning)
return segment(doc)


Expand All @@ -71,6 +68,7 @@ def word_detokenize(
::
from pythainlp.tokenize import word_detokenize
print(word_detokenize(["เรา", "เล่น"]))
# output: เราเล่น
"""
Expand Down Expand Up @@ -299,18 +297,19 @@ def word_tokenize(
segments = segment(text)
elif engine == "nlpo3":
from pythainlp.tokenize.nlpo3 import segment

# Currently cannot handle custom_dict from inside word_tokenize(),
# due to difference in type.
#if isinstance(custom_dict, str):
# if isinstance(custom_dict, str):
# segments = segment(text, custom_dict=custom_dict)
#elif not isinstance(custom_dict, str) and not custom_dict:
# elif not isinstance(custom_dict, str) and not custom_dict:
# raise ValueError(
# f"""Tokenizer \"{engine}\":
# custom_dict must be a str.
# It is a dictionary name as assigned with load_dict().
# See pythainlp.tokenize.nlpo3.load_dict()"""
# )
#else:
# else:
# segments = segment(text)
segments = segment(text)
else:
Expand Down
4 changes: 4 additions & 0 deletions pythainlp/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@
"get_full_data_path",
"get_pythainlp_data_path",
"get_pythainlp_path",
"safe_print",
"warn_deprecation",
]

from pythainlp.tools.core import safe_print, warn_deprecation

from pythainlp.tools.path import (
PYTHAINLP_DEFAULT_DATA_DIR,
get_full_data_path,
Expand Down
49 changes: 49 additions & 0 deletions pythainlp/tools/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
# SPDX-License-Identifier: Apache-2.0
"""
Generic support functions for PyThaiNLP.
"""

import sys
import warnings


def warn_deprecation(
deprecated_func: str,
replacing_func: str = "",
deprecated_version: str = "",
removal_version: str = "",
):
"""Warn about the deprecation of a function.
:param str deprecated_func: Name of the deprecated function.
:param str replacing_func: Name of the function to use instead (optional).
:param str deprecated_version: Version in which the function will be deprecated (optional).
:param str removal_version: Version in which the function will be removed (optional).
"""
message = f"The '{deprecated_func}' function is deprecated"
if deprecated_version:
message += f" since {deprecated_version}"
if not removal_version:
removal_version = "a future release"
message += f" and will be removed in {removal_version}."
if replacing_func:
message += f" Please use '{replacing_func}' instead."
warnings.warn(message, DeprecationWarning, stacklevel=2)


def safe_print(text: str):
"""Print text to console, handling UnicodeEncodeError.
:param text: Text to print.
:type text: str
"""
try:
print(text)
except UnicodeEncodeError:
print(
text.encode(sys.stdout.encoding, errors="replace").decode(
sys.stdout.encoding
)
)
5 changes: 4 additions & 1 deletion pythainlp/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@
"display_thai_char",
"emoji_to_thai",
"eng_to_thai",
"expand_maiyamok",
"find_keyword",
"ipa_to_rtgs",
"is_native_thai",
"isthai",
"isthaichar",
"maiyamok",
"nectec_to_ipa",
"normalize",
"now_reign_year",
Expand Down Expand Up @@ -85,8 +87,9 @@
from pythainlp.util.emojiconv import emoji_to_thai
from pythainlp.util.keywords import find_keyword, rank
from pythainlp.util.normalize import (
normalize,
expand_maiyamok,
maiyamok,
normalize,
remove_dangling,
remove_dup_spaces,
remove_repeat_vowels,
Expand Down
Loading

0 comments on commit 0c20956

Please sign in to comment.