Skip to content

Commit 6554eb9

Browse files
committed
Merge branch 'pr/5' into dev
2 parents d88ceed + 61e5f5c commit 6554eb9

File tree

5 files changed

+99
-21
lines changed

5 files changed

+99
-21
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM python:3.6
1+
FROM python:3.7
22
MAINTAINER Wannaphong Phatthiyaphaibun <[email protected]>
33
WORKDIR /app
44
EXPOSE 8000

app.py

Lines changed: 0 additions & 15 deletions
This file was deleted.

docker-compose.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
version: "3"
2+
services:
3+
api:
4+
build: .
5+
volumes:
6+
- .:/usr/src/app
7+
ports:
8+
- "8000:8000"

main.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# -*- coding: utf-8 -*-
2+
from fastapi import Depends, FastAPI, Header, HTTPException
3+
from fastapi.middleware.cors import CORSMiddleware
4+
from routers import tag, tokenize
5+
import uvicorn
6+
import pythainlp
7+
8+
DESC_TEXT = "Pythainlp API"
9+
10+
app = FastAPI(
11+
title='Pythainlp API',
12+
description=DESC_TEXT,
13+
version='0.1',
14+
)
15+
16+
app.add_middleware(
17+
CORSMiddleware,
18+
allow_origins=["*"],
19+
allow_credentials=True,
20+
allow_methods=["*"],
21+
allow_headers=["*"],
22+
)
23+
24+
25+
@app.get("/")
26+
def index():
27+
return {"Pythainlp Version": pythainlp.__version__}
28+
29+
30+
app.include_router(tag.router, prefix="/tag", tags=["Tag"])
31+
app.include_router(tokenize.router, prefix="/tokenize", tags=["Tokenize"])
32+
33+
34+
if __name__ == "__main__":
35+
uvicorn.run(app, host="0.0.0.0", port=8000)

routers/tokenize.py

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,62 @@
11
# -*- coding: utf-8 -*-
22
from fastapi import APIRouter
33
from pythainlp import tokenize
4+
from enum import Enum
5+
from typing import List, Optional
6+
from pydantic import BaseModel
47

58
router = APIRouter()
69

710

8-
@router.get("/tokenize/word_tokenize", tags=["tokenize"])
9-
def word_tokenize(q: str, engine: str = None):
10-
if not engine:
11-
engine = "newmm"
12-
return "|".join(tokenize.word_tokenize(q, engine=engine))
11+
class SentTokenizeEngine(str, Enum):
12+
whitespace = "whitespace"
13+
whitespace_newline = "whitespace+newline"
14+
15+
16+
class WordTokenizeEngine(str, Enum):
17+
newmm = "newmm"
18+
longest = "longest"
19+
deepcut = "deepcut"
20+
icu = "icu"
21+
ulmfit = "ulmfit"
22+
23+
24+
class SubwordTokenizeEngine(str, Enum):
25+
tcc = "tcc"
26+
etcc = "etcc"
27+
28+
29+
class SentTokenizeResponse(BaseModel):
30+
sents: List[str] = []
31+
32+
33+
class WordTokenizeResponse(BaseModel):
34+
words: List[str] = []
35+
36+
37+
class SyllableTokenizeResponse(BaseModel):
38+
syllables: List[str] = []
39+
40+
41+
class SubwordTokenizeResponse(BaseModel):
42+
subwords: List[str] = []
43+
44+
45+
@router.get('/sent', response_model=SentTokenizeResponse)
46+
def sent_tokenize(q: str, engine: SentTokenizeEngine = "whitespace"):
47+
return {"sents": tokenize.sent_tokenize(q, engine=engine)}
48+
49+
50+
@router.get('/word', response_model=WordTokenizeResponse)
51+
def word_tokenize(q: str, engine: WordTokenizeEngine = "newmm"):
52+
return {"words": tokenize.word_tokenize(q, engine=engine)}
53+
54+
55+
@router.get('/syllable', response_model=SyllableTokenizeResponse)
56+
def syllable_tokenize(q: str):
57+
return {"syllables": tokenize.syllable_tokenize(q)}
58+
59+
60+
@router.get('/subword', response_model=SubwordTokenizeResponse)
61+
def subword_tokenize(q: str, engine: SubwordTokenizeEngine = "tcc"):
62+
return {"subwords": tokenize.subword_tokenize(q, engine=engine)}

0 commit comments

Comments
 (0)