Skip to content

Commit 1a54970

Browse files
authored
Add TRT deploy (#520)
* Add TRT deploy * code clean * doc refine
1 parent 9f43bbc commit 1a54970

File tree

10 files changed

+739
-0
lines changed

10 files changed

+739
-0
lines changed

Diff for: README.md

+1
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ pip install paddleslim==1.2.0 -i https://pypi.tuna.tsinghua.edu.cn/simple
237237
- [SlimFaceNet](demo/slimfacenet/README.md)
238238
- [OCR模型压缩(基于PaddleOCR)](demo/ocr/README.md)
239239
- [检测模型压缩(基于PaddleDetection)](demo/detection/README.md)
240+
- [TensorRT部署](demo/quant/deploy/TensorRT): 介绍如何使用TensorRT部署PaddleSlim量化得到的模型。
240241

241242
## 部分压缩策略效果
242243

Diff for: README_en.md

+1
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ pip install paddleslim==1.2.0 -i https://pypi.tuna.tsinghua.edu.cn/simple
9090
- [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection/tree/master/slim): Introduce how to use PaddleSlim in PaddleDetection library.
9191
- [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/develop/slim): Introduce how to use PaddleSlim in PaddleSeg library.
9292
- [PaddleLite](https://paddlepaddle.github.io/Paddle-Lite/): How to use PaddleLite to deploy models generated by PaddleSlim.
93+
- [TensorRT Deploy](demo/quant/deploy/TensorRT): How to use TensorRT to deploy models generated by PaddleSlim.
9394

9495
## Performance
9596

Diff for: demo/quant/deploy/TensorRT/CMakeLists.txt

+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
cmake_minimum_required(VERSION 3.0)
2+
project(inference_test CXX C)
3+
option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." OFF)
4+
option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF)
5+
option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." OFF)
6+
option(USE_TENSORRT "Compile demo with TensorRT." OFF)
7+
8+
if(NOT DEFINED PADDLE_LIB)
9+
message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib")
10+
endif()
11+
12+
# check file system
13+
file(READ "/etc/issue" ETC_ISSUE)
14+
string(REGEX MATCH "Debian|Ubuntu|CentOS" DIST ${ETC_ISSUE})
15+
16+
if(DIST STREQUAL "Debian")
17+
message(STATUS ">>>> Found Debian <<<<")
18+
elseif(DIST STREQUAL "Ubuntu")
19+
message(STATUS ">>>> Found Ubuntu <<<<")
20+
elseif(DIST STREQUAL "CentOS")
21+
message(STATUS ">>>> Found CentOS <<<<")
22+
else()
23+
message(STATUS ">>>> Found unknown distribution <<<<")
24+
endif()
25+
26+
include_directories("${PADDLE_LIB}/")
27+
set(PADDLE_LIB_THIRD_PARTY_PATH "${PADDLE_LIB}/third_party/install/")
28+
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/include")
29+
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/include")
30+
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/include")
31+
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/include")
32+
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}zlib/include")
33+
include_directories("${PADDLE_LIB}/third_party/boost")
34+
include_directories("${PADDLE_LIB}/third_party/eigen3")
35+
include_directories("${PADDLE_LIB}/paddle/include")
36+
include_directories("${PADDLE_LIB}/paddle")
37+
38+
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}zlib/lib")
39+
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/lib")
40+
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/lib")
41+
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/lib")
42+
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/lib")
43+
link_directories("${PADDLE_LIB}/paddle/lib")
44+
45+
46+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -std=c++11")
47+
message("flags" ${CMAKE_CXX_FLAGS})
48+
49+
if (USE_TENSORRT AND WITH_GPU)
50+
message("=====> TENSORRT_INCLUDE_DIR is ${TENSORRT_INCLUDE_DIR}")
51+
message("=====> TENSORRT_LIB_DIR is ${TENSORRT_LIB_DIR}")
52+
include_directories("${TENSORRT_INCLUDE_DIR}")
53+
link_directories("${TENSORRT_LIB_DIR}")
54+
endif()
55+
56+
if(WITH_MKL)
57+
set(MATH_LIB_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mklml")
58+
include_directories("${MATH_LIB_PATH}/include")
59+
set(MATH_LIB ${MATH_LIB_PATH}/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX}
60+
${MATH_LIB_PATH}/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX})
61+
set(MKLDNN_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mkldnn")
62+
if(EXISTS ${MKLDNN_PATH})
63+
include_directories("${MKLDNN_PATH}/include")
64+
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
65+
endif()
66+
else()
67+
set(MATH_LIB ${PADDLE_LIB_THIRD_PARTY_PATH}openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX})
68+
endif()
69+
70+
# Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a
71+
if(WITH_STATIC_LIB)
72+
set(DEPS ${PADDLE_LIB}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
73+
else()
74+
set(DEPS ${PADDLE_LIB}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
75+
endif()
76+
77+
set(EXTERNAL_LIB "-lrt -ldl -lpthread -lprotobuf")
78+
set(DEPS ${DEPS}
79+
${MATH_LIB} ${MKLDNN_LIB}
80+
glog gflags protobuf z xxhash
81+
${EXTERNAL_LIB})
82+
83+
if(WITH_GPU)
84+
if (USE_TENSORRT)
85+
set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX})
86+
set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX})
87+
endif()
88+
set(DEPS ${DEPS} $ENV{CUDA_LIB})
89+
endif()
90+
91+
set(TestFiles "trt_clas.cc";
92+
"trt_gen_calib_table_test.cc";
93+
"test_acc.cc";)
94+
95+
foreach(testsourcefile ${TestFiles})
96+
message("====> ${testsourcefile} will be compiled")
97+
# add executable for all test files
98+
string(REPLACE ".cc" "" testname ${testsourcefile})
99+
add_executable(${testname} ${testsourcefile})
100+
101+
# link libs
102+
target_link_libraries(${testname} ${DEPS})
103+
104+
endforeach()

Diff for: demo/quant/deploy/TensorRT/README.md

+230
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
# PaddleSlim量化模型的TensorRT预测
2+
3+
本教程将介绍使用TensortRT部署PaddleSlim量化得到的模型的详细步骤。
4+
5+
6+
## 1. 准备环境
7+
8+
* 有2种方式获取Paddle预测库,下面进行详细介绍。
9+
10+
### 1.1 直接下载安装
11+
12+
* [Paddle预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)上提供了不同cuda版本的Linux预测库,可以在官网查看并选择带有TensorRT的预测库版本。
13+
14+
* 下载之后使用下面的方法解压。
15+
16+
```
17+
tar -xf fluid_inference.tgz
18+
```
19+
20+
最终会在当前的文件夹中生成`fluid_inference/`的子文件夹。
21+
22+
23+
### 1.2 预测库源码编译
24+
* 如果希望获取最新预测库特性,可以从Paddle github上克隆最新代码,源码编译预测库。
25+
* 可以参考[Paddle预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)的说明,从github上获取Paddle代码,然后进行编译,生成最新的预测库。使用git获取代码方法如下。
26+
27+
```shell
28+
git clone https://github.com/PaddlePaddle/Paddle.git
29+
```
30+
*[Nvidia官网](https://developer.nvidia.com/TensorRT)下载TensorRT并解压, 本示例以TensorRT 6.0为例。
31+
32+
* 进入Paddle目录后,编译方法如下。
33+
34+
```shell
35+
rm -rf build
36+
mkdir build
37+
cd build
38+
39+
cmake .. \
40+
-DWITH_MKL=ON \
41+
-DWITH_MKLDNN=ON \
42+
-DCMAKE_BUILD_TYPE=Release \
43+
-DWITH_INFERENCE_API_TEST=OFF \
44+
-DTENSORRT_ROOT=TensorRT-6.0.1.5 \
45+
-DFLUID_INFERENCE_INSTALL_DIR=LIB_ROOT \
46+
-DON_INFER=ON \
47+
-DWITH_PYTHON=ON
48+
make -j
49+
make inference_lib_dist
50+
```
51+
52+
其中`DFLUID_INFERENCE_INSTALL_DIR`代表编译完成后预测库生成的地址,`DTENSORRT_ROOT`代表下载解压后的TensorRT路径。
53+
54+
更多编译参数选项可以参考Paddle C++预测库官网:[https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)
55+
56+
57+
* 编译完成之后,可以在`LIB_ROOT`路径下看到生成了以下文件及文件夹。
58+
59+
```
60+
LIB_ROOT/
61+
|-- CMakeCache.txt
62+
|-- paddle
63+
|-- third_party
64+
|-- version.txt
65+
```
66+
67+
其中`paddle`就是之后进行TensorRT预测时所需的Paddle库,`version.txt`中包含当前预测库的版本信息。
68+
69+
70+
## 2 开始运行
71+
72+
### 2.1 将模型导出为inference model
73+
74+
* 可以参考[量化训练教程](https://paddleslim.readthedocs.io/zh_CN/latest/quick_start/quant_aware_tutorial.html#id9),在训练完成后导出inference model。
75+
76+
```
77+
inference/
78+
|-- model
79+
|-- params
80+
```
81+
82+
83+
### 2.2 编译TensorRT预测demo
84+
85+
* 编译命令如下,其中Paddle, TensorRT地址需要换成自己机器上的实际地址。
86+
87+
88+
```shell
89+
sh tools/build.sh
90+
```
91+
92+
具体地,`tools/build.sh`中内容如下。
93+
94+
```shell
95+
PADDLE_LIB_PATH=trt_inference # change to your path
96+
USE_GPU=ON
97+
USE_MKL=ON
98+
USE_TRT=ON
99+
TENSORRT_INCLUDE_DIR=TensorRT-6.0.1.5/include # change to your path
100+
TENSORRT_LIB_DIR=TensorRT-6.0.1.5/lib # change to your path
101+
102+
if [ $USE_GPU -eq ON ]; then
103+
export CUDA_LIB=`find /usr/local -name libcudart.so`
104+
fi
105+
BUILD=build
106+
mkdir -p $BUILD
107+
cd $BUILD
108+
cmake .. \
109+
-DPADDLE_LIB=${PADDLE_LIB_PATH} \
110+
-DWITH_GPU=${USE_GPU} \
111+
-DWITH_MKL=${USE_MKL} \
112+
-DCUDA_LIB=${CUDA_LIB} \
113+
-DUSE_TENSORRT=${USE_TRT} \
114+
-DTENSORRT_INCLUDE_DIR=${TENSORRT_INCLUDE_DIR} \
115+
-DTENSORRT_LIB_DIR=${TENSORRT_LIB_DIR}
116+
make -j4
117+
```
118+
119+
`PADDLE_LIB_PATH`为下载(`fluid_inference`文件夹)或者编译生成的Paddle预测库地址(`build/fluid_inference_install_dir`文件夹);`TENSORRT_INCLUDE_DIR``TENSORRT_LIB_DIR`分别代表TensorRT的include和lib目录路径。
120+
121+
122+
* 编译完成之后,会在`build`文件夹下生成可执行文件。
123+
124+
125+
### 2.3 数据预处理转化
126+
127+
在精度和性能预测中,需要先对数据进行二进制转化。运行脚本如下可转化完整ILSVRC2012 val数据集。使用`--local`可以转化用户自己的数据。在Paddle所在目录运行下面的脚本。脚本在官网位置为[full_ILSVRC2012_val_preprocess.py](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py)
128+
```
129+
python Paddle/paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py --local --data_dir=/PATH/TO/USER/DATASET/ --output_file=/PATH/TO/SAVE/BINARY/FILE
130+
```
131+
132+
可选参数:
133+
- 不设置任何参数。脚本将下载 ILSVRC2012_img_val数据集,并转化为二进制文件。
134+
- **local:** 设置便为true,表示用户将提供自己的数据
135+
- **data_dir:** 用户自己的数据目录
136+
- **label_list:** 图片路径-图片类别列表文件,类似于`val_list.txt`
137+
- **output_file:** 生成的binary文件路径。
138+
- **data_dim:** 预处理图片的长和宽。默认值 224。
139+
140+
用户自己的数据集目录结构应该如下
141+
```
142+
imagenet_user
143+
├── val
144+
│   ├── ILSVRC2012_val_00000001.jpg
145+
│   ├── ILSVRC2012_val_00000002.jpg
146+
| |── ...
147+
└── val_list.txt
148+
```
149+
其中,val_list.txt 内容应该如下:
150+
```
151+
val/ILSVRC2012_val_00000001.jpg 0
152+
val/ILSVRC2012_val_00000002.jpg 0
153+
```
154+
155+
注意:
156+
- 为什么将数据集转化为二进制文件?因为paddle中的数据预处理(resize, crop等)都使用pythong.Image模块进行,训练出的模型也是基于Python预处理的图片,但是我们发现Python测试性能开销很大,导致预测性能下降。为了获得良好性能,在量化模型预测阶段,我们需要使用C++测试,而C++只支持Open-CV等库,Paddle不建议使用外部库,因此我们使用Python将图片预处理然后放入二进制文件,再在C++测试中读出。用户根据自己的需要,可以更改C++测试以直接读数据并预处理,精度不会有太大下降。
157+
158+
### 2.4 部署预测
159+
160+
161+
### 运行demo
162+
* 执行以下命令,完成一个分类模型的TensorRT预测。
163+
164+
```shell
165+
sh tools/run.sh
166+
```
167+
其中`MODEL_DIR``DATA_FILE`分别代表模型文件和数据文件, 需要在预测时替换为自己实际要用的地址。
168+
169+
可以看到类似下面的预测结果:
170+
171+
```shell
172+
I1123 11:30:49.160024 10999 trt_clas.cc:103] finish prediction
173+
I1123 11:30:49.160050 10999 trt_clas.cc:136] pred image class is : 65, ground truth label is : 65
174+
```
175+
176+
* 修改`tools/run.sh`中的repeat_times大于1,通过多次预测取平均完成对一个模型的TensorRT速度评测。
177+
178+
```shell
179+
sh tools/run.sh
180+
```
181+
182+
可以看到类似下面的评测结果:
183+
184+
```shell
185+
I1123 11:40:30.936796 11681 trt_clas.cc:83] finish warm up 10 times
186+
I1123 11:40:30.947906 11681 trt_clas.cc:101] total predict cost is : 11.042 ms, repeat 10 times
187+
I1123 11:40:30.947947 11681 trt_clas.cc:102] average predict cost is : 1.1042 ms
188+
```
189+
190+
191+
* 执行以下命令,完成对一个模型的TensorRT精度评测。
192+
193+
```shell
194+
sh tools/test_acc.sh
195+
```
196+
197+
同上,在预测时需要将其中路径替换为自己实际要用的地址。
198+
199+
可以看到类似下面的评测结果:
200+
201+
```shell
202+
I1123 11:23:11.856046 10913 test_acc.cc:64] 5000
203+
I1123 11:23:50.318663 10913 test_acc.cc:64] 10000
204+
I1123 11:24:28.793603 10913 test_acc.cc:64] 15000
205+
I1123 11:25:07.277580 10913 test_acc.cc:64] 20000
206+
I1123 11:25:45.698241 10913 test_acc.cc:64] 25000
207+
I1123 11:26:24.195798 10913 test_acc.cc:64] 30000
208+
I1123 11:27:02.625052 10913 test_acc.cc:64] 35000
209+
I1123 11:27:41.178545 10913 test_acc.cc:64] 40000
210+
I1123 11:28:19.798691 10913 test_acc.cc:64] 45000
211+
I1123 11:28:58.457620 10913 test_acc.cc:107] final result:
212+
I1123 11:28:58.457688 10913 test_acc.cc:108] top1 acc:0.70664
213+
I1123 11:28:58.457712 10913 test_acc.cc:109] top5 acc:0.89494
214+
```
215+
216+
217+
## 3 Benchmark
218+
219+
GPU: NVIDIA® Tesla® P4
220+
221+
数据集: ImageNet-2012
222+
223+
预测引擎: Paddle-TensorRT
224+
225+
226+
| 模型 | FP32精度(Top1/Top5) | INT8精度(Top1/Top5) | FP32预测时延(ms) | INT8预测时延(ms) | 量化加速比 |
227+
| :---------: | :-----------------: | :-----------------: | :----------: | :----------: | :--------: |
228+
| MobileNetV1 | 71.00%/89.69% | 70.66%/89.27% | 1.083 | 0.568 | 47.55% |
229+
| MobileNetV2 | 72.16%/90.65% | 71.09%/90.16% | 1.821 | 0.980 | 46.19% |
230+
| ResNet50 | 76.50%/93.00% | 76.27%/92.95% | 4.960 | 2.014 | 59.39% |

0 commit comments

Comments
 (0)