Skip to content

Commit

Permalink
Revise documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
hankcs committed Nov 4, 2021
1 parent f02ac9b commit 439711e
Show file tree
Hide file tree
Showing 8 changed files with 27 additions and 10 deletions.
4 changes: 2 additions & 2 deletions docs/api/restful_java.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ Add the following dependency into the `pom.xml` file of your project.
<dependency>
<groupId>com.hankcs.hanlp.restful</groupId>
<artifactId>hanlp-restful</artifactId>
<version>0.0.4</version>
<version>0.0.7</version>
</dependency>
```

Obtain an `auth` from any compatible service provider like our [free service](https://bbs.hankcs.com/t/apply-for-free-hanlp-restful-apis/3178), then initiate a `HanLPClient` and call its `parse` interface.

```java
HanLPClient client = new HanLPClient("https://hanlp.hankcs.com/api", null); // Replace null with your auth
System.out.println(client.parse("2021年HanLPv2.1为生产环境带来次世代最先进的多语种NLP技术。英首相与特朗普通电话讨论华为与苹果公司"));
System.out.println(client.parse("2021年HanLPv2.1为生产环境带来次世代最先进的多语种NLP技术。阿婆主来到北京立方庭参观自然语义科技公司"));
```

Refer to our [testcases](https://github.com/hankcs/HanLP/blob/master/plugins/hanlp_restful_java/src/test/java/com/hankcs/hanlp/restful/HanLPClientTest.java) and [data format](../data_format) for more details.
Expand Down
3 changes: 1 addition & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# -- Project information -----------------------------------------------------
import os
import sys
import os

Expand Down Expand Up @@ -74,7 +73,7 @@
# a list of builtin themes.
#
html_theme = "sphinx_book_theme"
html_title = "HanLP documentation"
html_title = "HanLP Documentation"
html_logo = "_static/logo.png"
html_favicon = "_static/favicon.png"
html_copy_source = True
Expand Down
2 changes: 1 addition & 1 deletion docs/data_format.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ For example, the following RESTful codes will output such an instance.
:tags: [output_scroll]
from hanlp_restful import HanLPClient
HanLP = HanLPClient('https://hanlp.hankcs.com/api', auth=None) # Fill in your auth
print(HanLP('2021年HanLPv2.1为生产环境带来次世代最先进的多语种NLP技术。英首相与特朗普通电话讨论华为与苹果公司。'))
print(HanLP('2021年HanLPv2.1为生产环境带来次世代最先进的多语种NLP技术。阿婆主来到北京立方庭参观自然语义科技公司。'))
```

The outputs above is represented as a `json` dictionary where each key is a task name and its value is
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ which offers visualization in any mono-width text environment.

````{margin} **Non-ASCII**
```{note}
Non-ASCII text might screw in which case copying it into a `.tsv` editor will align it correctly.
Non-ASCII text might screw in terminals but in Jupyter Notebook it will align correctly.
You can also use our [live demo](https://hanlp.hankcs.com/).
```
````
Expand Down
2 changes: 1 addition & 1 deletion hanlp/components/mtl/tasks/pos.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __init__(self,
**kwargs) -> None:
"""A simple tagger using a linear layer with an optional CRF (:cite:`lafferty2001conditional`) layer for
any tagging tasks including PoS tagging and many others. It also features with a custom dictionary ``dict_tags``
to perform ``longest-prefix-matching`` and replaced matched tokens with given tags.
to perform ``longest-prefix-matching`` which replaces matched tokens with given tags.
.. Note:: For algorithm beginners, longest-prefix-matching is the prerequisite to understand what dictionary can
Expand Down
18 changes: 16 additions & 2 deletions plugins/hanlp_restful/hanlp_restful/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,14 +242,15 @@ def coreference_resolution(self, text: Optional[str] = None, tokens: Optional[Li
'language': language or self._language})
return response

def tokenize(self, text: Union[str, List[str]], coarse: Optional[bool] = None) -> List[List[str]]:
def tokenize(self, text: Union[str, List[str]], coarse: Optional[bool] = None, language=None) -> List[List[str]]:
""" Split a document into sentences and tokenize them. Note that it is always faster to tokenize a whole
document than to tokenize each sentence one by one. So avoid calling this method sentence by sentence but put
sentences into a ``list`` and pass them to the ``text`` argument.
Args:
text: A document (``str``), or a list of sentences (``List[str]``).
coarse: Whether to perform coarse-grained or fine-grained tokenization.
language: The language of input text. ``None`` to use the default language.
Examples::
Expand All @@ -274,8 +275,21 @@ def tokenize(self, text: Union[str, List[str]], coarse: Optional[bool] = None) -
[['商品', '和', '服务', '。'],
['当', '下雨天', '地面', '积水', '分', '外', '严重']]
# Multilingual tokenization by specifying language='mul':
HanLP.tokenize(
['In 2021, HanLPv2.1 delivers state-of-the-art multilingual NLP techniques
'to production environment.',
'2021年、HanLPv2.1は次世代の最先端多言語NLP技術を本番環境に導入します。',
'2021年 HanLPv2.1为生产环境带来次世代最先进的多语种NLP技术。'], language='mul')
[['In', '2021', ',', 'HanLPv2.1', 'delivers', 'state-of-the-art', 'multilingual',
'NLP', 'techniques', 'to', 'production', 'environment', '.'],
['2021', '年', '、', 'HanLPv2.1', 'は', '次', '世代', 'の', '最', '先端', '多',
'言語', 'NLP', '技術', 'を', '本番', '環境', 'に', '導入', 'します', '。'],
['2021', '年', 'HanLPv2.1', '为', '生产', '环境', '带来', '次世代', '最', '先进的',
'多', '语种', 'NLP', '技术', '。']]
Returns:
A list of tokenized sentences.
"""
doc = self.parse(text=text, tasks='tok/coarse' if coarse is True else 'tok')
doc = self.parse(text=text, tasks='tok/coarse' if coarse is True else 'tok', language=language)
return next(iter(doc.values()))
2 changes: 1 addition & 1 deletion plugins/hanlp_restful/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

setup(
name='hanlp_restful',
version='0.0.8',
version='0.0.9',
description='HanLP: Han Language Processing',
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
4 changes: 4 additions & 0 deletions plugins/hanlp_restful/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ def test_tokenize(self):
print(self.HanLP.tokenize('商品和服务。阿婆主来到北京立方庭参观自然语义科技公司'))
print(self.HanLP.tokenize('商品和服务。阿婆主来到北京立方庭参观自然语义科技公司', coarse=True))
print(self.HanLP.tokenize(['商品和服务。', '当下雨天地面积水分外严重']))
print(self.HanLP.tokenize(
['In 2021, HanLPv2.1 delivers state-of-the-art multilingual NLP techniques to production environment.',
'2021年、HanLPv2.1は次世代の最先端多言語NLP技術を本番環境に導入します。',
'2021年 HanLPv2.1为生产环境带来次世代最先进的多语种NLP技术。'], language='mul'))

def test_coreference_resolution(self):
print(self.HanLP.coreference_resolution('我姐送我她的猫。我很喜欢它。'))
Expand Down

0 comments on commit 439711e

Please sign in to comment.