enhancement: add simpleServing code for sentence transformers (PaddlePaddle#4795)

MooTong123 · Mu Tong · w5688414 · web-flow · commit a85752d63cf0 · 2023-02-16T09:56:07.000+08:00
* enhancement: add simpleServing code for sentence transformers

* Update export_model.py

* enhancement: update simpleServing for sentence transformers

* pre-commit

* checkout code format

---------

Co-authored-by: Mu Tong &lt;tong.mou@seaboxdata.com&gt;
Co-authored-by: w5688414 &lt;wugaosheng@mails.ccnu.edu.cn&gt;
Co-authored-by: w5688414 &lt;w5688414@gmail.com&gt;
diff --git a/examples/text_matching/sentence_transformers/deploy/simple_serving/README.md b/examples/text_matching/sentence_transformers/deploy/simple_serving/README.md
@@ -0,0 +1,39 @@
+# 基于PaddleNLP SimpleServing 的服务化部署
+
+## 目录
+- [环境准备](#环境准备)
+- [Server启动服务](#Server服务启动)
+- [其他参数设置](#其他参数设置)
+
+## 环境准备
+使用有SimpleServing功能的PaddleNLP版本
+```shell
+pip install paddlenlp >= 2.4.4
+```
+## Server服务启动
+### 分类任务启动
+#### 启动分类 Server 服务
+```bash
+paddlenlp server server:app --host 0.0.0.0 --port 8189
+```
+
+#### 启动分类 Client 服务
+```bash
+python client.py
+```
+
+## 其他参数设置
+可以在client端设置 `max_seq_len`, `batch_size`, `prob_limit` 参数
+```python
+    data = {
+        'data': {
+            'text': texts,
+            'text_pair': text_pairs,
+        },
+        'parameters': {
+            'max_seq_len': args.max_seq_len,
+            'batch_size': args.batch_size,
+            'prob_limit': args.prob_limit
+        }
+    }
+```
diff --git a/examples/text_matching/sentence_transformers/deploy/simple_serving/client.py b/examples/text_matching/sentence_transformers/deploy/simple_serving/client.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import json
+
+import requests
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--max_seq_len", default=128, type=int, help="The maximum total input sequence length after tokenization."
+)
+parser.add_argument("--batch_size", default=1, type=int, help="Batch size per GPU/CPU for predicting.")
+parser.add_argument("--prob_limit", default=0.5, type=int, help="probability limit.")
+args = parser.parse_args()
+
+url = "http://0.0.0.0:8189/models/text_matching"
+headers = {"Content-Type": "application/json"}
+
+if __name__ == "__main__":
+    texts = ["三亚是一个美丽的城市", "北京烤鸭怎么样"]
+    text_pair = ["三亚是个漂亮的城市", "北京烤鸭多少钱"]
+
+    data = {
+        "data": {
+            "text": texts,
+            "text_pair": text_pair,
+        },
+        "parameters": {"max_seq_len": args.max_seq_len, "batch_size": args.batch_size, "prob_limit": args.prob_limit},
+    }
+    r = requests.post(url=url, headers=headers, data=json.dumps(data))
+    result_json = json.loads(r.text)
+    print(result_json)
diff --git a/examples/text_matching/sentence_transformers/deploy/simple_serving/server.py b/examples/text_matching/sentence_transformers/deploy/simple_serving/server.py
@@ -0,0 +1,135 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+from scipy.special import softmax
+
+from paddlenlp import SimpleServer
+from paddlenlp.data import Pad, Tuple
+from paddlenlp.server import BaseModelHandler, BasePostHandler
+
+
+class TextMatchingModelHandler(BaseModelHandler):
+    def __init__(self):
+        super().__init__()
+
+    @classmethod
+    def process(cls, predictor, tokenizer, data, parameters):
+
+        max_seq_len = 128
+        batch_size = 1
+        if "max_seq_len" not in parameters:
+            max_seq_len = parameters["max_seq_len"]
+        if "batch_size" not in parameters:
+            batch_size = parameters["batch_size"]
+        text = None
+        if "text" in data:
+            text = data["text"]
+        if text is None:
+            return {}
+        if isinstance(text, str):
+            text = [text]
+        has_pair = False
+        if "text_pair" in data and data["text_pair"] is not None:
+            text_pair = data["text_pair"]
+            if isinstance(text_pair, str):
+                text_pair = [text_pair]
+            if len(text) != len(text_pair):
+                raise ValueError("The length of text and text_pair must be same.")
+            has_pair = True
+
+        # Get the result of tokenizer
+        examples = []
+        for idx, _ in enumerate(text):
+            if has_pair:
+                text_a = tokenizer(text=text[idx], max_length=max_seq_len)
+                text_b = tokenizer(text=text_pair[idx], max_length=max_seq_len)
+
+            examples.append((text_a["input_ids"], text_b["input_ids"]))
+
+        # Seperates data into some batches.
+        batches = [examples[i : i + batch_size] for i in range(0, len(examples), batch_size)]
+
+        def batchify_fn(samples):
+            return Tuple(
+                Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype="int64"),
+                Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype="int64"),
+            )(samples)
+
+        results = [[]] * predictor._output_num
+        for batch in batches:
+            query_input_ids, title_input_ids = batchify_fn(batch)
+            if predictor._predictor_type == "paddle_inference":
+                predictor._input_handles[0].copy_from_cpu(query_input_ids)
+                predictor._input_handles[1].copy_from_cpu(title_input_ids)
+                predictor._predictor.run()
+                output = [output_handle.copy_to_cpu() for output_handle in predictor._output_handles]
+                for i, out in enumerate(output):
+                    results[i].append(out)
+        print(results)
+
+        # Resolve the logits result and get the predict label and confidence
+        results_concat = []
+        for i in range(0, len(results)):
+            results_concat.append(np.concatenate(results[i], axis=0))
+
+        out_dict = {"logits": results_concat[0].tolist(), "data": data}
+
+        return out_dict
+
+
+class TextMatchingPostHandler(BasePostHandler):
+    def __init__(self):
+        super().__init__()
+
+    @classmethod
+    def process(cls, data, parameters):
+        if "logits" not in data:
+            raise ValueError(
+                "The output of model handler do not include the 'logits', "
+                " please check the model handler output. The model handler output:\n{}".format(data)
+            )
+
+        prob_limit = 0.5
+        if "prob_limit" in parameters:
+            prob_limit = parameters["prob_limit"]
+        logits = data["logits"]
+        # softmax for probs
+        logits = softmax(logits, axis=-1)
+
+        print(logits)
+
+        labels = []
+        probs = []
+        for logit in logits:
+            if logit[1] > prob_limit:
+                labels.append(1)
+            else:
+                labels.append(0)
+            probs.append(logit[1])
+
+        out_dict = {"label": labels, "similarity": probs}
+        return out_dict
+
+
+app = SimpleServer()
+app.register(
+    task_name="models/text_matching",
+    model_path="../../export_model",
+    tokenizer_name="ernie-3.0-medium-zh",
+    model_handler=TextMatchingModelHandler,
+    post_handler=TextMatchingPostHandler,
+    precision="fp32",
+    device_id=0,
+)
diff --git a/examples/text_matching/sentence_transformers/export_model.py b/examples/text_matching/sentence_transformers/export_model.py
@@ -0,0 +1,101 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+
+import paddle
+import paddle.nn as nn
+
+from paddlenlp.transformers import AutoModel, AutoTokenizer
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--params_path", type=str, default="ernie-1.0", help="The path to model parameters to be loaded.")
+parser.add_argument(
+    "--output_path", type=str, default="./export", help="The path of model parameter in static graph to be saved."
+)
+args = parser.parse_args()
+
+
+class SentenceTransformer(nn.Layer):
+    def __init__(self, pretrained_model, dropout=None):
+        super().__init__()
+        self.ptm = pretrained_model
+        self.dropout = nn.Dropout(dropout if dropout is not None else 0.1)
+        # num_labels = 2 (similar or dissimilar)
+        self.classifier = nn.Linear(self.ptm.config["hidden_size"] * 3, 2)
+
+    def forward(
+        self,
+        query_input_ids,
+        title_input_ids,
+        query_token_type_ids=None,
+        query_position_ids=None,
+        query_attention_mask=None,
+        title_token_type_ids=None,
+        title_position_ids=None,
+        title_attention_mask=None,
+    ):
+        query_token_embedding, _ = self.ptm(
+            query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask
+        )
+        query_token_embedding = self.dropout(query_token_embedding)
+        query_attention_mask = paddle.unsqueeze(
+            (query_input_ids != self.ptm.pad_token_id).astype(self.ptm.pooler.dense.weight.dtype), axis=2
+        )
+        # Set token embeddings to 0 for padding tokens
+        query_token_embedding = query_token_embedding * query_attention_mask
+        query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
+        query_sum_mask = paddle.sum(query_attention_mask, axis=1)
+        query_mean = query_sum_embedding / query_sum_mask
+
+        title_token_embedding, _ = self.ptm(
+            title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask
+        )
+        title_token_embedding = self.dropout(title_token_embedding)
+        title_attention_mask = paddle.unsqueeze(
+            (title_input_ids != self.ptm.pad_token_id).astype(self.ptm.pooler.dense.weight.dtype), axis=2
+        )
+        # Set token embeddings to 0 for padding tokens
+        title_token_embedding = title_token_embedding * title_attention_mask
+        title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
+        title_sum_mask = paddle.sum(title_attention_mask, axis=1)
+        title_mean = title_sum_embedding / title_sum_mask
+
+        sub = paddle.abs(paddle.subtract(query_mean, title_mean))
+        projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
+
+        logits = self.classifier(projection)
+
+        return logits
+
+
+if __name__ == "__main__":
+
+    tokenizer = AutoTokenizer.from_pretrained(args.params_path)
+    pretrained_model = AutoModel.from_pretrained(args.params_path)
+
+    model = SentenceTransformer(pretrained_model)
+    model.eval()
+
+    input_spec = [
+        paddle.static.InputSpec(shape=[None, None], dtype="int64", name="query_input_ids"),
+        paddle.static.InputSpec(shape=[None, None], dtype="int64", name="title_input_ids"),
+    ]
+    # Convert to static graph with specific input description
+    model = paddle.jit.to_static(model, input_spec=input_spec)
+
+    # Save in static graph model.
+    save_path = os.path.join(args.output_path, "float32")
+    paddle.jit.save(model, save_path)