Skip to content

Commit 5de09da

Browse files
author
Wish
committed
fix FP32
1 parent 7ff2f71 commit 5de09da

File tree

11 files changed

+20
-19
lines changed

11 files changed

+20
-19
lines changed

src/application/app_alphapose.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ int app_alphapose(){
4040
return 0;
4141

4242
string onnx_file = iLogger::format("%s.onnx", name);
43-
string model_file = iLogger::format("%s.fp32.trtmodel", name);
43+
string model_file = iLogger::format("%s.FP32.trtmodel", name);
4444
int test_batch_size = 16;
4545

4646
if(!iLogger::exists(model_file)){

src/application/app_arcface.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ static bool compile_models(){
3232
return false;
3333

3434
string onnx_file = iLogger::format("%s.onnx", name);
35-
string model_file = iLogger::format("%s.fp32.trtmodel", name);
35+
string model_file = iLogger::format("%s.FP32.trtmodel", name);
3636
int test_batch_size = 1;
3737

3838
if(not iLogger::exists(model_file)){
@@ -116,7 +116,7 @@ int app_arcface(){
116116

117117
auto detector = Scrfd::create_infer("scrfd_2.5g_bnkps.640x480.FP32.trtmodel", 0, 0.6f);
118118
//auto detector = RetinaFace::create_infer("mb_retinaface.640x480.FP32.trtmodel", 0, 0.5f);
119-
auto arcface = Arcface::create_infer("arcface_iresnet50.fp32.trtmodel", 0);
119+
auto arcface = Arcface::create_infer("arcface_iresnet50.FP32.trtmodel", 0);
120120
auto library = build_library(detector, arcface);
121121

122122
auto files = iLogger::find_files("face/recognize");
@@ -180,7 +180,7 @@ int app_arcface_video(){
180180

181181
auto detector = Scrfd::create_infer("scrfd_2.5g_bnkps.640x480.FP32.trtmodel", 0, 0.6f);
182182
//auto detector = RetinaFace::create_infer("mb_retinaface.640x480.FP32.trtmodel", 0, 0.5f);
183-
auto arcface = Arcface::create_infer("arcface_iresnet50.fp32.trtmodel", 0);
183+
auto arcface = Arcface::create_infer("arcface_iresnet50.FP32.trtmodel", 0);
184184
auto library = build_library(detector, arcface);
185185
//auto remote_show = create_zmq_remote_show();
186186
INFO("Use tools/show.py to remote show");
@@ -277,7 +277,7 @@ int app_arcface_tracker(){
277277

278278
auto detector = Scrfd::create_infer("scrfd_2.5g_bnkps.640x480.FP32.trtmodel", 0, 0.6f);
279279
//auto detector = RetinaFace::create_infer("mb_retinaface.640x480.FP32.trtmodel", 0, 0.6f);
280-
auto arcface = Arcface::create_infer("arcface_iresnet50.fp32.trtmodel", 0);
280+
auto arcface = Arcface::create_infer("arcface_iresnet50.FP32.trtmodel", 0);
281281
//auto library = build_library(detector, arcface);
282282

283283
//tools/show.py connect to remote show

src/application/app_arcface/arcface.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,8 @@ namespace Arcface{
179179
//checkCudaRuntime(cudaMemcpyAsync(image_host, image.data, size_image, cudaMemcpyHostToHost, stream_));
180180
// speed up
181181
memcpy(image_host, image.data, size_image);
182+
memcpy(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i));
182183
checkCudaRuntime(cudaMemcpyAsync(image_device, image_host, size_image, cudaMemcpyHostToDevice, stream_));
183-
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i), cudaMemcpyHostToHost, stream_));
184184
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_device, affine_matrix_host, sizeof(job.additional.d2i), cudaMemcpyHostToDevice, stream_));
185185

186186
CUDAKernel::warp_affine_bilinear_and_normalize(

src/application/app_fall_recognize.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ static bool compile_models(){
2626
return false;
2727

2828
string onnx_file = iLogger::format("%s.onnx", name);
29-
string model_file = iLogger::format("%s.fp32.trtmodel", name);
29+
string model_file = iLogger::format("%s.FP32.trtmodel", name);
3030
int test_batch_size = 1;
3131

3232
if(not iLogger::exists(model_file)){
@@ -46,13 +46,13 @@ static bool compile_models(){
4646
int app_fall_recognize(){
4747
cv::setNumThreads(0);
4848

49-
INFO("===================== test alphapose fp32 ==================================");
49+
INFO("===================== test alphapose FP32 ==================================");
5050
if(!compile_models())
5151
return 0;
5252

53-
auto pose_model_file = "sppe.fp32.trtmodel";
54-
auto detector_model_file = "yolox_m.fp32.trtmodel";
55-
auto gcn_model_file = "fall_bp.fp32.trtmodel";
53+
auto pose_model_file = "sppe.FP32.trtmodel";
54+
auto detector_model_file = "yolox_m.FP32.trtmodel";
55+
auto gcn_model_file = "fall_bp.FP32.trtmodel";
5656

5757
auto pose_model = AlphaPose::create_infer(pose_model_file, 0);
5858
auto detector_model = Yolo::create_infer(detector_model_file, Yolo::Type::X, 0, 0.4f);

src/application/app_high_performance/yolo_high_perf.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,9 +241,10 @@ namespace YoloHighPerf{
241241
float* affine_matrix_host = (float*)cpu_workspace;
242242
uint8_t* image_host = size_matrix + cpu_workspace;
243243

244-
checkCudaRuntime(cudaMemcpyAsync(image_host, image.data, size_image, cudaMemcpyHostToHost, stream_));
244+
//checkCudaRuntime(cudaMemcpyAsync(image_host, image.data, size_image, cudaMemcpyHostToHost, stream_));
245+
memcpy(image_host, image.data, size_image);
246+
memcpy(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i));
245247
checkCudaRuntime(cudaMemcpyAsync(image_device, image_host, size_image, cudaMemcpyHostToDevice, stream_));
246-
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i), cudaMemcpyHostToHost, stream_));
247248
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_device, affine_matrix_host, sizeof(job.additional.d2i), cudaMemcpyHostToDevice, stream_));
248249

249250
CUDAKernel::warp_affine_bilinear_and_normalize(

src/application/app_retinaface/retinaface.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,8 +249,8 @@ namespace RetinaFace{
249249
// checkCudaRuntime(cudaMemcpyAsync(image_host, image.data, size_image, cudaMemcpyHostToHost, stream_));
250250
// speed up
251251
memcpy(image_host, image.data, size_image);
252+
memcpy(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i));
252253
checkCudaRuntime(cudaMemcpyAsync(image_device, image_host, size_image, cudaMemcpyHostToDevice, stream_));
253-
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i), cudaMemcpyHostToHost, stream_));
254254
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_device, affine_matrix_host, sizeof(job.additional.d2i), cudaMemcpyHostToDevice, stream_));
255255

256256
CUDAKernel::warp_affine_bilinear_and_normalize(

src/application/app_scrfd.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ static void scrfd_performance(shared_ptr<Scrfd::Infer> infer){
8181
int app_scrfd(){
8282

8383
TRT::set_device(0);
84-
INFO("===================== test scrfd fp32 ==================================");
84+
INFO("===================== test scrfd FP32 ==================================");
8585

8686
string model_file;
8787
if(!compile_scrfd(640, 640, model_file))

src/application/app_scrfd/scrfd.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,8 +251,8 @@ namespace Scrfd{
251251
//checkCudaRuntime(cudaMemcpyAsync(image_host, image.data, size_image, cudaMemcpyHostToHost, stream_));
252252
// speed up
253253
memcpy(image_host, image.data, size_image);
254+
memcpy(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i));
254255
checkCudaRuntime(cudaMemcpyAsync(image_device, image_host, size_image, cudaMemcpyHostToDevice, stream_));
255-
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i), cudaMemcpyHostToHost, stream_));
256256
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_device, affine_matrix_host, sizeof(job.additional.d2i), cudaMemcpyHostToDevice, stream_));
257257

258258
CUDAKernel::warp_affine_bilinear_and_normalize(

src/application/app_yolo/yolo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,8 +246,8 @@ namespace Yolo{
246246
//checkCudaRuntime(cudaMemcpyAsync(image_host, image.data, size_image, cudaMemcpyHostToHost, stream_));
247247
// speed up
248248
memcpy(image_host, image.data, size_image);
249+
memcpy(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i));
249250
checkCudaRuntime(cudaMemcpyAsync(image_device, image_host, size_image, cudaMemcpyHostToDevice, stream_));
250-
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i), cudaMemcpyHostToHost, stream_));
251251
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_device, affine_matrix_host, sizeof(job.additional.d2i), cudaMemcpyHostToDevice, stream_));
252252

253253
CUDAKernel::warp_affine_bilinear_and_normalize(

src/main.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ void test_all(){
2525
app_arcface_video();
2626
app_arcface_tracker();
2727
app_scrfd();
28-
app_plugin();
2928
INFO("test done.");
3029
}
3130

src/tensorRT/common/trt_tensor.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,8 @@ namespace TRT{
263263
if(head_ == DataHead::Device){
264264
checkCudaRuntime(cudaMemcpyAsync((char*)data_->gpu() + offset_location, src, copyed_bytes, cudaMemcpyHostToDevice, stream_));
265265
}else if(head_ == DataHead::Host){
266-
checkCudaRuntime(cudaMemcpyAsync((char*)data_->cpu() + offset_location, src, copyed_bytes, cudaMemcpyHostToHost, stream_));
266+
//checkCudaRuntime(cudaMemcpyAsync((char*)data_->cpu() + offset_location, src, copyed_bytes, cudaMemcpyHostToHost, stream_));
267+
memcpy((char*)data_->cpu() + offset_location, src, copyed_bytes);
267268
}else{
268269
INFOE("Unsupport head type %d", head_);
269270
}

0 commit comments

Comments
 (0)