Skip to content

Commit 301fab8

Browse files
committed
New TensorRT Utilities
0 parents  commit 301fab8

9 files changed

+542
-0
lines changed

bak_convert_tf_to_trt.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#from helper import ModelOptimizer
2+
#import tensorrt as trt
3+
from tensorflow.python.compiler.tensorrt import trt_convert
4+
from time import perf_counter
5+
6+
PRECISION = "FP16"
7+
GPU_RAM_4G = 4000000000
8+
GPU_RAM_6G = 6000000000
9+
GPU_RAM_8G = 8000000000
10+
MPL = "/home/aisg/src/ongtw/PeekingDuck/peekingduck_weights/movenet/multipose_lightning"
11+
SPL = "/home/aisg/src/ongtw/PeekingDuck/peekingduck_weights/movenet/singlepose_lightning"
12+
SPT = "/home/aisg/src/ongtw/PeekingDuck/peekingduck_weights/movenet/singlepose_thunder"
13+
14+
model_dir = SPL
15+
model_out_dir = model_dir + "_fp16"
16+
17+
# dotw: uses helper but error, helper not found...
18+
#opt_model = ModelOptimizer(model_dir)
19+
#model_fp16 = opt_model.convert(model_dir + "_fp16", precision=PRECISION)
20+
21+
# dotw: error, create_inference_graph() missing 2 required positional arguments:
22+
# 'input_graph_def' and 'outputs'
23+
#trt_convert.create_inference_graph(
24+
# input_saved_model_dir = model_dir,
25+
# output_saved_model_dir = model_out_dir
26+
#)
27+
28+
conv_parms = trt_convert.TrtConversionParams(
29+
precision_mode = trt_convert.TrtPrecisionMode.FP16,
30+
max_workspace_size_bytes = GPU_RAM_4G,
31+
max_batch_size = 1
32+
)
33+
converter = trt_convert.TrtGraphConverterV2(
34+
input_saved_model_dir = model_dir,
35+
conversion_params = conv_parms
36+
)
37+
38+
print("converting model...")
39+
st = perf_counter()
40+
converter.convert()
41+
#converter.build(input_fn = self.my_input_fn)
42+
et = perf_counter()
43+
print(f"conversion time = {et - st:.2f} sec")
44+
45+
print("saving model...")
46+
st = perf_counter()
47+
converter.save(model_out_dir)
48+
et = perf_counter()
49+
print(f"model saving time = {et - st:.2f} sec")
50+

bak_tf2trt_v1.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#from helper import ModelOptimizer
2+
import tensorrt as trt
3+
import tensorflow as tf
4+
from tensorflow.python.compiler.tensorrt import trt_convert
5+
from time import perf_counter
6+
7+
#
8+
# dotw: 2021-01-12
9+
# - FPS increased greatly
10+
# - accuracy untested
11+
# - model file size larger
12+
# - very slow startup time
13+
# - lots of TF/TRT warnings
14+
#
15+
16+
print(f"tensorflow version={tf.__version__}")
17+
print(f"tensorrt version={trt.__version__}")
18+
19+
PRECISION = "FP16"
20+
GPU_RAM_4G = 4000000000
21+
GPU_RAM_6G = 6000000000
22+
GPU_RAM_8G = 8000000000
23+
MPL = "/home/aisg/src/ongtw/PeekingDuck/peekingduck_weights/movenet/multipose_lightning"
24+
SPL = "/home/aisg/src/ongtw/PeekingDuck/peekingduck_weights/movenet/singlepose_lightning"
25+
SPT = "/home/aisg/src/ongtw/PeekingDuck/peekingduck_weights/movenet/singlepose_thunder"
26+
27+
model_dir = SPL
28+
model_out_dir = model_dir + "_fp16"
29+
30+
# dotw: uses helper but error, helper not found...
31+
#opt_model = ModelOptimizer(model_dir)
32+
#model_fp16 = opt_model.convert(model_dir + "_fp16", precision=PRECISION)
33+
34+
# dotw: error, create_inference_graph() missing 2 required positional arguments:
35+
# 'input_graph_def' and 'outputs'
36+
#trt_convert.create_inference_graph(
37+
# input_saved_model_dir = model_dir,
38+
# output_saved_model_dir = model_out_dir
39+
#)
40+
41+
conv_parms = trt_convert.TrtConversionParams(
42+
precision_mode = trt_convert.TrtPrecisionMode.FP16,
43+
max_workspace_size_bytes = GPU_RAM_4G,
44+
)
45+
converter = trt_convert.TrtGraphConverterV2(
46+
input_saved_model_dir = model_dir,
47+
conversion_params = conv_parms
48+
)
49+
50+
print(f"generating {model_out_dir}")
51+
print("converting original model...")
52+
st0 = perf_counter()
53+
converter.convert()
54+
#converter.build(input_fn = self.my_input_fn)
55+
et0 = perf_counter()
56+
print(f"conversion time = {et0 - st0:.2f} sec")
57+
58+
print("saving generated model...")
59+
st1 = perf_counter()
60+
converter.save(model_out_dir)
61+
et1 = perf_counter()
62+
print(f"save time = {et1 - st1:.2f} sec")
63+
64+
print(f"Total conversion time = {et1 - st0:.2f} sec")
65+

check_tensor_gpu.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from tensorflow.python.client import device_lib
2+
3+
def check_tensor_core_gpu_present():
4+
local_device_protos = device_lib.list_local_devices()
5+
for line in local_device_protos:
6+
if "compute capability" in str(line):
7+
compute_capability = float(line.physical_device_desc.split("compute capability: ")[-1])
8+
if compute_capability>=7.0:
9+
return True
10+
11+
tensor_core_gpu = check_tensor_core_gpu_present()
12+
print(f"Tensor Core GPU Present: {tensor_core_gpu}")
13+

convert_tf_to_trt.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#from helper import ModelOptimizer
2+
import tensorrt as trt
3+
import tensorflow as tf
4+
from tensorflow.python.compiler.tensorrt import trt_convert
5+
from time import perf_counter
6+
7+
print(f"tensorflow version={tf.__version__}")
8+
print(f"tensorrt version={trt.__version__}")
9+
10+
PRECISION = "FP16"
11+
GPU_RAM_4G = 4000000000
12+
GPU_RAM_6G = 6000000000
13+
GPU_RAM_8G = 8000000000
14+
MPL = "/home/aisg/src/ongtw/PeekingDuck/peekingduck_weights/movenet/multipose_lightning"
15+
SPL = "/home/aisg/src/ongtw/PeekingDuck/peekingduck_weights/movenet/singlepose_lightning"
16+
SPT = "/home/aisg/src/ongtw/PeekingDuck/peekingduck_weights/movenet/singlepose_thunder"
17+
18+
model_dir = SPT
19+
model_out_dir = model_dir + "_fp16"
20+
21+
# dotw: uses helper but error, helper not found...
22+
#opt_model = ModelOptimizer(model_dir)
23+
#model_fp16 = opt_model.convert(model_dir + "_fp16", precision=PRECISION)
24+
25+
# dotw: error, create_inference_graph() missing 2 required positional arguments:
26+
# 'input_graph_def' and 'outputs'
27+
#trt_convert.create_inference_graph(
28+
# input_saved_model_dir = model_dir,
29+
# output_saved_model_dir = model_out_dir
30+
#)
31+
32+
conv_parms = trt_convert.TrtConversionParams(
33+
precision_mode = trt_convert.TrtPrecisionMode.FP16,
34+
max_workspace_size_bytes = GPU_RAM_4G,
35+
# max_batch_size = 1
36+
)
37+
converter = trt_convert.TrtGraphConverterV2(
38+
input_saved_model_dir = model_dir,
39+
conversion_params = conv_parms
40+
)
41+
42+
print(f"generating {model_out_dir}")
43+
print("converting original model...")
44+
st = perf_counter()
45+
converter.convert()
46+
#converter.build(input_fn = self.my_input_fn)
47+
et = perf_counter()
48+
print(f"conversion time = {et - st:.2f} sec")
49+
50+
print("saving generated model...")
51+
st = perf_counter()
52+
converter.save(model_out_dir)
53+
et = perf_counter()
54+
print(f"save time = {et - st:.2f} sec")
55+

mem_usage.cu

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#include <stdio.h>
2+
#include "cuda.h"
3+
4+
int main()
5+
{
6+
float free_m, total_m, used_m;
7+
size_t free_t, total_t;
8+
9+
cudaMemGetInfo(&free_t, &total_t);
10+
11+
total_m = total_t / 1048576.0;
12+
free_m = free_t / 1048576.0 ;
13+
used_m = total_m - free_m;
14+
15+
printf("mem total %.2f MB\n", total_m);
16+
printf("mem free %.2f MB\n", free_m);
17+
printf("mem used %.2f MB\n", used_m);
18+
19+
return 0;
20+
}

ram_usage.cu

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#include <iostream>
2+
#include <unistd.h>
3+
#include "cuda.h"
4+
5+
int main()
6+
{
7+
// show memory usage of GPU
8+
size_t free_byte ;
9+
size_t total_byte ;
10+
11+
while (true )
12+
{
13+
cudaError_t cuda_status = cudaMemGetInfo( &free_byte, &total_byte ) ;
14+
15+
if ( cudaSuccess != cuda_status ){
16+
std::cout << "Error: cudaMemGetInfo fails, "
17+
<< cudaGetErrorString(cuda_status) << std::endl;
18+
exit(1);
19+
}
20+
21+
double free_db = (double)free_byte ;
22+
double total_db = (double)total_byte ;
23+
double used_db = total_db - free_db ;
24+
25+
std::cout << "GPU memory usage: used = " << used_db/1024.0/1024.0 << ", free = "
26+
<< free_db/1024.0/1024.0 << " MB, total = " << total_db/1024.0/1024.0 << " MB"
27+
<< std::endl; sleep(1);
28+
break;
29+
}
30+
31+
return 0;
32+
}

tf2trt_v1.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import numpy as np
2+
#import tensorrt as trt
3+
#import tensorflow as tf
4+
from tensorflow.python.compiler.tensorrt import trt_convert
5+
from time import perf_counter
6+
7+
print("TF to TRT Converter v2")
8+
#print(f"tensorflow version={tf.__version__}")
9+
#print(f"tensorrt version={trt.__version__}")
10+
11+
#
12+
# Generator functions
13+
# Since cannot pass params into generators (Python Error: 'generator' object is not callable),
14+
# construct different generator types
15+
#
16+
def my_input_gen_192():
17+
inp = np.zeros((1, 192, 192, 3)).astype(np.int32)
18+
yield (inp,)
19+
20+
def my_input_gen_256():
21+
inp = np.zeros((1, 256, 256, 3)).astype(np.int32)
22+
yield (inp,)
23+
24+
PRECISION = "FP16"
25+
GPU_RAM_2G = 2000000000
26+
GPU_RAM_4G = 4000000000
27+
GPU_RAM_6G = 6000000000
28+
GPU_RAM_8G = 8000000000
29+
GPU_RAM = "4G"
30+
MODEL_PRECISION = {
31+
"INT8": trt_convert.TrtPrecisionMode.INT8,
32+
"FP16": trt_convert.TrtPrecisionMode.FP16,
33+
"FP32": trt_convert.TrtPrecisionMode.FP32,
34+
}
35+
MODEL_RAM = {
36+
2: 2000000000,
37+
4: 4000000000,
38+
6: 6000000000,
39+
8: 8000000000,
40+
}
41+
# Main data structure to store model code and model info
42+
MODEL_MAP = {
43+
"SPL": {
44+
"dir": "/home/aisg/src/ongtw/PeekingDuck/peekingduck_weights/movenet/singlepose_lightning",
45+
"gen": my_input_gen_192,
46+
},
47+
"SPT": {
48+
"dir": "/home/aisg/src/ongtw/PeekingDuck/peekingduck_weights/movenet/singlepose_thunder",
49+
"gen": my_input_gen_256,
50+
},
51+
"MPL": {
52+
"dir": "/home/aisg/src/ongtw/PeekingDuck/peekingduck_weights/movenet/multipose_lightning",
53+
"gen": my_input_gen_256,
54+
},
55+
}
56+
57+
58+
#
59+
# Model attribute queries
60+
#
61+
def get_model_dir(model_code: str) -> str:
62+
the_dir = MODEL_MAP[model_code]["dir"]
63+
return the_dir
64+
65+
def get_model_gen(model_code: str):
66+
the_gen = MODEL_MAP[model_code]["gen"]
67+
return the_gen
68+
69+
def get_model_save_filepath(model_code: str, prec: str, gpu_ram: int) -> str:
70+
the_dir = get_model_dir(model_code)
71+
the_path = f"{the_dir}_v1_{prec.lower()}_{gpu_ram}GB"
72+
return the_path
73+
74+
75+
#
76+
# Main program
77+
#
78+
def convert_model(model_code: str, prec: str, gpu_ram: int):
79+
model_dir = get_model_dir(model_code)
80+
model_save_path = get_model_save_filepath(model_code, prec, gpu_ram)
81+
print(f"generating {model_save_path}")
82+
# setup converter params
83+
conv_parms = trt_convert.TrtConversionParams(
84+
precision_mode = MODEL_PRECISION[prec],
85+
max_workspace_size_bytes = MODEL_RAM[gpu_ram],
86+
)
87+
converter = trt_convert.TrtGraphConverterV2(
88+
input_saved_model_dir = get_model_dir(model_code),
89+
conversion_params = conv_parms
90+
)
91+
# convert original base model to TF-TRT model
92+
print("converting original model...")
93+
pc1 = perf_counter()
94+
converter.convert()
95+
conv_dur = perf_counter() - pc1
96+
print(f"conversion time = {conv_dur:.2f} sec")
97+
# save model
98+
print("saving generated model...")
99+
pc3 = perf_counter()
100+
converter.save(model_save_path)
101+
save_dur = perf_counter() - pc3
102+
print(f"save time = {save_dur:.2f} sec")
103+
# print time stats
104+
total_dur = perf_counter() - pc1
105+
print(f"{model_save_path}:")
106+
print(f"Conversion time = {conv_dur:.2f} sec")
107+
print(f"Save time = {save_dur:.2f} sec")
108+
print(f"Total time = {total_dur:.2f} sec")
109+
110+
111+
if __name__ == "__main__":
112+
for model_code in MODEL_MAP.keys():
113+
convert_model(model_code, "FP16", 4)
114+
115+

0 commit comments

Comments
 (0)